gnss-sdr/src/algorithms/tracking/libs/cudahelpers/helper_cuda.h

1256 lines
36 KiB
C++

/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions for initialization and error checking
#ifndef HELPER_CUDA_H
#define HELPER_CUDA_H
#pragma once
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <helper_string.h>
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// Note, it is required that your SDK sample to include the proper header files, please
// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
// on which CUDA functions are used.
// CUDA Runtime error messages
#ifdef __DRIVER_TYPES_H__
static const char *_cudaGetErrorEnum(cudaError_t error)
{
switch (error)
{
case cudaSuccess:
return "cudaSuccess";
case cudaErrorMissingConfiguration:
return "cudaErrorMissingConfiguration";
case cudaErrorMemoryAllocation:
return "cudaErrorMemoryAllocation";
case cudaErrorInitializationError:
return "cudaErrorInitializationError";
case cudaErrorLaunchFailure:
return "cudaErrorLaunchFailure";
case cudaErrorPriorLaunchFailure:
return "cudaErrorPriorLaunchFailure";
case cudaErrorLaunchTimeout:
return "cudaErrorLaunchTimeout";
case cudaErrorLaunchOutOfResources:
return "cudaErrorLaunchOutOfResources";
case cudaErrorInvalidDeviceFunction:
return "cudaErrorInvalidDeviceFunction";
case cudaErrorInvalidConfiguration:
return "cudaErrorInvalidConfiguration";
case cudaErrorInvalidDevice:
return "cudaErrorInvalidDevice";
case cudaErrorInvalidValue:
return "cudaErrorInvalidValue";
case cudaErrorInvalidPitchValue:
return "cudaErrorInvalidPitchValue";
case cudaErrorInvalidSymbol:
return "cudaErrorInvalidSymbol";
case cudaErrorMapBufferObjectFailed:
return "cudaErrorMapBufferObjectFailed";
case cudaErrorUnmapBufferObjectFailed:
return "cudaErrorUnmapBufferObjectFailed";
case cudaErrorInvalidHostPointer:
return "cudaErrorInvalidHostPointer";
case cudaErrorInvalidDevicePointer:
return "cudaErrorInvalidDevicePointer";
case cudaErrorInvalidTexture:
return "cudaErrorInvalidTexture";
case cudaErrorInvalidTextureBinding:
return "cudaErrorInvalidTextureBinding";
case cudaErrorInvalidChannelDescriptor:
return "cudaErrorInvalidChannelDescriptor";
case cudaErrorInvalidMemcpyDirection:
return "cudaErrorInvalidMemcpyDirection";
case cudaErrorAddressOfConstant:
return "cudaErrorAddressOfConstant";
case cudaErrorTextureFetchFailed:
return "cudaErrorTextureFetchFailed";
case cudaErrorTextureNotBound:
return "cudaErrorTextureNotBound";
case cudaErrorSynchronizationError:
return "cudaErrorSynchronizationError";
case cudaErrorInvalidFilterSetting:
return "cudaErrorInvalidFilterSetting";
case cudaErrorInvalidNormSetting:
return "cudaErrorInvalidNormSetting";
case cudaErrorMixedDeviceExecution:
return "cudaErrorMixedDeviceExecution";
case cudaErrorCudartUnloading:
return "cudaErrorCudartUnloading";
case cudaErrorUnknown:
return "cudaErrorUnknown";
case cudaErrorNotYetImplemented:
return "cudaErrorNotYetImplemented";
case cudaErrorMemoryValueTooLarge:
return "cudaErrorMemoryValueTooLarge";
case cudaErrorInvalidResourceHandle:
return "cudaErrorInvalidResourceHandle";
case cudaErrorNotReady:
return "cudaErrorNotReady";
case cudaErrorInsufficientDriver:
return "cudaErrorInsufficientDriver";
case cudaErrorSetOnActiveProcess:
return "cudaErrorSetOnActiveProcess";
case cudaErrorInvalidSurface:
return "cudaErrorInvalidSurface";
case cudaErrorNoDevice:
return "cudaErrorNoDevice";
case cudaErrorECCUncorrectable:
return "cudaErrorECCUncorrectable";
case cudaErrorSharedObjectSymbolNotFound:
return "cudaErrorSharedObjectSymbolNotFound";
case cudaErrorSharedObjectInitFailed:
return "cudaErrorSharedObjectInitFailed";
case cudaErrorUnsupportedLimit:
return "cudaErrorUnsupportedLimit";
case cudaErrorDuplicateVariableName:
return "cudaErrorDuplicateVariableName";
case cudaErrorDuplicateTextureName:
return "cudaErrorDuplicateTextureName";
case cudaErrorDuplicateSurfaceName:
return "cudaErrorDuplicateSurfaceName";
case cudaErrorDevicesUnavailable:
return "cudaErrorDevicesUnavailable";
case cudaErrorInvalidKernelImage:
return "cudaErrorInvalidKernelImage";
case cudaErrorNoKernelImageForDevice:
return "cudaErrorNoKernelImageForDevice";
case cudaErrorIncompatibleDriverContext:
return "cudaErrorIncompatibleDriverContext";
case cudaErrorPeerAccessAlreadyEnabled:
return "cudaErrorPeerAccessAlreadyEnabled";
case cudaErrorPeerAccessNotEnabled:
return "cudaErrorPeerAccessNotEnabled";
case cudaErrorDeviceAlreadyInUse:
return "cudaErrorDeviceAlreadyInUse";
case cudaErrorProfilerDisabled:
return "cudaErrorProfilerDisabled";
case cudaErrorProfilerNotInitialized:
return "cudaErrorProfilerNotInitialized";
case cudaErrorProfilerAlreadyStarted:
return "cudaErrorProfilerAlreadyStarted";
case cudaErrorProfilerAlreadyStopped:
return "cudaErrorProfilerAlreadyStopped";
/* Since CUDA 4.0*/
case cudaErrorAssert:
return "cudaErrorAssert";
case cudaErrorTooManyPeers:
return "cudaErrorTooManyPeers";
case cudaErrorHostMemoryAlreadyRegistered:
return "cudaErrorHostMemoryAlreadyRegistered";
case cudaErrorHostMemoryNotRegistered:
return "cudaErrorHostMemoryNotRegistered";
/* Since CUDA 5.0 */
case cudaErrorOperatingSystem:
return "cudaErrorOperatingSystem";
case cudaErrorPeerAccessUnsupported:
return "cudaErrorPeerAccessUnsupported";
case cudaErrorLaunchMaxDepthExceeded:
return "cudaErrorLaunchMaxDepthExceeded";
case cudaErrorLaunchFileScopedTex:
return "cudaErrorLaunchFileScopedTex";
case cudaErrorLaunchFileScopedSurf:
return "cudaErrorLaunchFileScopedSurf";
case cudaErrorSyncDepthExceeded:
return "cudaErrorSyncDepthExceeded";
case cudaErrorLaunchPendingCountExceeded:
return "cudaErrorLaunchPendingCountExceeded";
case cudaErrorNotPermitted:
return "cudaErrorNotPermitted";
case cudaErrorNotSupported:
return "cudaErrorNotSupported";
/* Since CUDA 6.0 */
case cudaErrorHardwareStackError:
return "cudaErrorHardwareStackError";
case cudaErrorIllegalInstruction:
return "cudaErrorIllegalInstruction";
case cudaErrorMisalignedAddress:
return "cudaErrorMisalignedAddress";
case cudaErrorInvalidAddressSpace:
return "cudaErrorInvalidAddressSpace";
case cudaErrorInvalidPc:
return "cudaErrorInvalidPc";
case cudaErrorIllegalAddress:
return "cudaErrorIllegalAddress";
/* Since CUDA 6.5*/
case cudaErrorInvalidPtx:
return "cudaErrorInvalidPtx";
case cudaErrorInvalidGraphicsContext:
return "cudaErrorInvalidGraphicsContext";
case cudaErrorStartupFailure:
return "cudaErrorStartupFailure";
case cudaErrorApiFailureBase:
return "cudaErrorApiFailureBase";
}
return "<unknown>";
}
#endif
#ifdef __cuda_cuda_h__
// CUDA Driver API errors
static const char *_cudaGetErrorEnum(CUresult error)
{
switch (error)
{
case CUDA_SUCCESS:
return "CUDA_SUCCESS";
case CUDA_ERROR_INVALID_VALUE:
return "CUDA_ERROR_INVALID_VALUE";
case CUDA_ERROR_OUT_OF_MEMORY:
return "CUDA_ERROR_OUT_OF_MEMORY";
case CUDA_ERROR_NOT_INITIALIZED:
return "CUDA_ERROR_NOT_INITIALIZED";
case CUDA_ERROR_DEINITIALIZED:
return "CUDA_ERROR_DEINITIALIZED";
case CUDA_ERROR_PROFILER_DISABLED:
return "CUDA_ERROR_PROFILER_DISABLED";
case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
case CUDA_ERROR_PROFILER_ALREADY_STARTED:
return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
case CUDA_ERROR_NO_DEVICE:
return "CUDA_ERROR_NO_DEVICE";
case CUDA_ERROR_INVALID_DEVICE:
return "CUDA_ERROR_INVALID_DEVICE";
case CUDA_ERROR_INVALID_IMAGE:
return "CUDA_ERROR_INVALID_IMAGE";
case CUDA_ERROR_INVALID_CONTEXT:
return "CUDA_ERROR_INVALID_CONTEXT";
case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
case CUDA_ERROR_MAP_FAILED:
return "CUDA_ERROR_MAP_FAILED";
case CUDA_ERROR_UNMAP_FAILED:
return "CUDA_ERROR_UNMAP_FAILED";
case CUDA_ERROR_ARRAY_IS_MAPPED:
return "CUDA_ERROR_ARRAY_IS_MAPPED";
case CUDA_ERROR_ALREADY_MAPPED:
return "CUDA_ERROR_ALREADY_MAPPED";
case CUDA_ERROR_NO_BINARY_FOR_GPU:
return "CUDA_ERROR_NO_BINARY_FOR_GPU";
case CUDA_ERROR_ALREADY_ACQUIRED:
return "CUDA_ERROR_ALREADY_ACQUIRED";
case CUDA_ERROR_NOT_MAPPED:
return "CUDA_ERROR_NOT_MAPPED";
case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
case CUDA_ERROR_ECC_UNCORRECTABLE:
return "CUDA_ERROR_ECC_UNCORRECTABLE";
case CUDA_ERROR_UNSUPPORTED_LIMIT:
return "CUDA_ERROR_UNSUPPORTED_LIMIT";
case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
case CUDA_ERROR_INVALID_PTX:
return "CUDA_ERROR_INVALID_PTX";
case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
case CUDA_ERROR_INVALID_SOURCE:
return "CUDA_ERROR_INVALID_SOURCE";
case CUDA_ERROR_FILE_NOT_FOUND:
return "CUDA_ERROR_FILE_NOT_FOUND";
case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
case CUDA_ERROR_OPERATING_SYSTEM:
return "CUDA_ERROR_OPERATING_SYSTEM";
case CUDA_ERROR_INVALID_HANDLE:
return "CUDA_ERROR_INVALID_HANDLE";
case CUDA_ERROR_NOT_FOUND:
return "CUDA_ERROR_NOT_FOUND";
case CUDA_ERROR_NOT_READY:
return "CUDA_ERROR_NOT_READY";
case CUDA_ERROR_ILLEGAL_ADDRESS:
return "CUDA_ERROR_ILLEGAL_ADDRESS";
case CUDA_ERROR_LAUNCH_FAILED:
return "CUDA_ERROR_LAUNCH_FAILED";
case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
case CUDA_ERROR_LAUNCH_TIMEOUT:
return "CUDA_ERROR_LAUNCH_TIMEOUT";
case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
case CUDA_ERROR_CONTEXT_IS_DESTROYED:
return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
case CUDA_ERROR_ASSERT:
return "CUDA_ERROR_ASSERT";
case CUDA_ERROR_TOO_MANY_PEERS:
return "CUDA_ERROR_TOO_MANY_PEERS";
case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
case CUDA_ERROR_HARDWARE_STACK_ERROR:
return "CUDA_ERROR_HARDWARE_STACK_ERROR";
case CUDA_ERROR_ILLEGAL_INSTRUCTION:
return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
case CUDA_ERROR_MISALIGNED_ADDRESS:
return "CUDA_ERROR_MISALIGNED_ADDRESS";
case CUDA_ERROR_INVALID_ADDRESS_SPACE:
return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
case CUDA_ERROR_INVALID_PC:
return "CUDA_ERROR_INVALID_PC";
case CUDA_ERROR_NOT_PERMITTED:
return "CUDA_ERROR_NOT_PERMITTED";
case CUDA_ERROR_NOT_SUPPORTED:
return "CUDA_ERROR_NOT_SUPPORTED";
case CUDA_ERROR_UNKNOWN:
return "CUDA_ERROR_UNKNOWN";
}
return "<unknown>";
}
#endif
#ifdef CUBLAS_API_H_
// cuBLAS API errors
static const char *_cudaGetErrorEnum(cublasStatus_t error)
{
switch (error)
{
case CUBLAS_STATUS_SUCCESS:
return "CUBLAS_STATUS_SUCCESS";
case CUBLAS_STATUS_NOT_INITIALIZED:
return "CUBLAS_STATUS_NOT_INITIALIZED";
case CUBLAS_STATUS_ALLOC_FAILED:
return "CUBLAS_STATUS_ALLOC_FAILED";
case CUBLAS_STATUS_INVALID_VALUE:
return "CUBLAS_STATUS_INVALID_VALUE";
case CUBLAS_STATUS_ARCH_MISMATCH:
return "CUBLAS_STATUS_ARCH_MISMATCH";
case CUBLAS_STATUS_MAPPING_ERROR:
return "CUBLAS_STATUS_MAPPING_ERROR";
case CUBLAS_STATUS_EXECUTION_FAILED:
return "CUBLAS_STATUS_EXECUTION_FAILED";
case CUBLAS_STATUS_INTERNAL_ERROR:
return "CUBLAS_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
#endif
#ifdef _CUFFT_H_
// cuFFT API errors
static const char *_cudaGetErrorEnum(cufftResult error)
{
switch (error)
{
case CUFFT_SUCCESS:
return "CUFFT_SUCCESS";
case CUFFT_INVALID_PLAN:
return "CUFFT_INVALID_PLAN";
case CUFFT_ALLOC_FAILED:
return "CUFFT_ALLOC_FAILED";
case CUFFT_INVALID_TYPE:
return "CUFFT_INVALID_TYPE";
case CUFFT_INVALID_VALUE:
return "CUFFT_INVALID_VALUE";
case CUFFT_INTERNAL_ERROR:
return "CUFFT_INTERNAL_ERROR";
case CUFFT_EXEC_FAILED:
return "CUFFT_EXEC_FAILED";
case CUFFT_SETUP_FAILED:
return "CUFFT_SETUP_FAILED";
case CUFFT_INVALID_SIZE:
return "CUFFT_INVALID_SIZE";
case CUFFT_UNALIGNED_DATA:
return "CUFFT_UNALIGNED_DATA";
case CUFFT_INCOMPLETE_PARAMETER_LIST:
return "CUFFT_INCOMPLETE_PARAMETER_LIST";
case CUFFT_INVALID_DEVICE:
return "CUFFT_INVALID_DEVICE";
case CUFFT_PARSE_ERROR:
return "CUFFT_PARSE_ERROR";
case CUFFT_NO_WORKSPACE:
return "CUFFT_NO_WORKSPACE";
case CUFFT_NOT_IMPLEMENTED:
return "CUFFT_NOT_IMPLEMENTED";
case CUFFT_LICENSE_ERROR:
return "CUFFT_LICENSE_ERROR";
}
return "<unknown>";
}
#endif
#ifdef CUSPARSEAPI
// cuSPARSE API errors
static const char *_cudaGetErrorEnum(cusparseStatus_t error)
{
switch (error)
{
case CUSPARSE_STATUS_SUCCESS:
return "CUSPARSE_STATUS_SUCCESS";
case CUSPARSE_STATUS_NOT_INITIALIZED:
return "CUSPARSE_STATUS_NOT_INITIALIZED";
case CUSPARSE_STATUS_ALLOC_FAILED:
return "CUSPARSE_STATUS_ALLOC_FAILED";
case CUSPARSE_STATUS_INVALID_VALUE:
return "CUSPARSE_STATUS_INVALID_VALUE";
case CUSPARSE_STATUS_ARCH_MISMATCH:
return "CUSPARSE_STATUS_ARCH_MISMATCH";
case CUSPARSE_STATUS_MAPPING_ERROR:
return "CUSPARSE_STATUS_MAPPING_ERROR";
case CUSPARSE_STATUS_EXECUTION_FAILED:
return "CUSPARSE_STATUS_EXECUTION_FAILED";
case CUSPARSE_STATUS_INTERNAL_ERROR:
return "CUSPARSE_STATUS_INTERNAL_ERROR";
case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
}
return "<unknown>";
}
#endif
#ifdef CUSOLVER_COMMON_H_
//cuSOLVER API errors
static const char *_cudaGetErrorEnum(cusolverStatus_t error)
{
switch(error)
{
case CUSOLVER_STATUS_SUCCESS:
return "CUSOLVER_STATUS_SUCCESS";
case CUSOLVER_STATUS_NOT_INITIALIZED:
return "CUSOLVER_STATUS_NOT_INITIALIZED";
case CUSOLVER_STATUS_ALLOC_FAILED:
return "CUSOLVER_STATUS_ALLOC_FAILED";
case CUSOLVER_STATUS_INVALID_VALUE:
return "CUSOLVER_STATUS_INVALID_VALUE";
case CUSOLVER_STATUS_ARCH_MISMATCH:
return "CUSOLVER_STATUS_ARCH_MISMATCH";
case CUSOLVER_STATUS_MAPPING_ERROR:
return "CUSOLVER_STATUS_MAPPING_ERROR";
case CUSOLVER_STATUS_EXECUTION_FAILED:
return "CUSOLVER_STATUS_EXECUTION_FAILED";
case CUSOLVER_STATUS_INTERNAL_ERROR:
return "CUSOLVER_STATUS_INTERNAL_ERROR";
case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
case CUSOLVER_STATUS_NOT_SUPPORTED :
return "CUSOLVER_STATUS_NOT_SUPPORTED ";
case CUSOLVER_STATUS_ZERO_PIVOT:
return "CUSOLVER_STATUS_ZERO_PIVOT";
case CUSOLVER_STATUS_INVALID_LICENSE:
return "CUSOLVER_STATUS_INVALID_LICENSE";
}
return "<unknown>";
}
#endif
#ifdef CURAND_H_
// cuRAND API errors
static const char *_cudaGetErrorEnum(curandStatus_t error)
{
switch (error)
{
case CURAND_STATUS_SUCCESS:
return "CURAND_STATUS_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
return "<unknown>";
}
#endif
#ifdef NV_NPPIDEFS_H
// NPP API errors
static const char *_cudaGetErrorEnum(NppStatus error)
{
switch (error)
{
case NPP_NOT_SUPPORTED_MODE_ERROR:
return "NPP_NOT_SUPPORTED_MODE_ERROR";
case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
case NPP_RESIZE_NO_OPERATION_ERROR:
return "NPP_RESIZE_NO_OPERATION_ERROR";
case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
case NPP_BAD_ARG_ERROR:
return "NPP_BAD_ARGUMENT_ERROR";
case NPP_COEFF_ERROR:
return "NPP_COEFFICIENT_ERROR";
case NPP_RECT_ERROR:
return "NPP_RECTANGLE_ERROR";
case NPP_QUAD_ERROR:
return "NPP_QUADRANGLE_ERROR";
case NPP_MEM_ALLOC_ERR:
return "NPP_MEMORY_ALLOCATION_ERROR";
case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
case NPP_INVALID_INPUT:
return "NPP_INVALID_INPUT";
case NPP_POINTER_ERROR:
return "NPP_POINTER_ERROR";
case NPP_WARNING:
return "NPP_WARNING";
case NPP_ODD_ROI_WARNING:
return "NPP_ODD_ROI_WARNING";
#else
// These are for CUDA 5.5 or higher
case NPP_BAD_ARGUMENT_ERROR:
return "NPP_BAD_ARGUMENT_ERROR";
case NPP_COEFFICIENT_ERROR:
return "NPP_COEFFICIENT_ERROR";
case NPP_RECTANGLE_ERROR:
return "NPP_RECTANGLE_ERROR";
case NPP_QUADRANGLE_ERROR:
return "NPP_QUADRANGLE_ERROR";
case NPP_MEMORY_ALLOCATION_ERR:
return "NPP_MEMORY_ALLOCATION_ERROR";
case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
case NPP_INVALID_HOST_POINTER_ERROR:
return "NPP_INVALID_HOST_POINTER_ERROR";
case NPP_INVALID_DEVICE_POINTER_ERROR:
return "NPP_INVALID_DEVICE_POINTER_ERROR";
#endif
case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
case NPP_TEXTURE_BIND_ERROR:
return "NPP_TEXTURE_BIND_ERROR";
case NPP_WRONG_INTERSECTION_ROI_ERROR:
return "NPP_WRONG_INTERSECTION_ROI_ERROR";
case NPP_NOT_EVEN_STEP_ERROR:
return "NPP_NOT_EVEN_STEP_ERROR";
case NPP_INTERPOLATION_ERROR:
return "NPP_INTERPOLATION_ERROR";
case NPP_RESIZE_FACTOR_ERROR:
return "NPP_RESIZE_FACTOR_ERROR";
case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
case NPP_MEMFREE_ERR:
return "NPP_MEMFREE_ERR";
case NPP_MEMSET_ERR:
return "NPP_MEMSET_ERR";
case NPP_MEMCPY_ERR:
return "NPP_MEMCPY_ERROR";
case NPP_MIRROR_FLIP_ERR:
return "NPP_MIRROR_FLIP_ERR";
#else
case NPP_MEMFREE_ERROR:
return "NPP_MEMFREE_ERROR";
case NPP_MEMSET_ERROR:
return "NPP_MEMSET_ERROR";
case NPP_MEMCPY_ERROR:
return "NPP_MEMCPY_ERROR";
case NPP_MIRROR_FLIP_ERROR:
return "NPP_MIRROR_FLIP_ERROR";
#endif
case NPP_ALIGNMENT_ERROR:
return "NPP_ALIGNMENT_ERROR";
case NPP_STEP_ERROR:
return "NPP_STEP_ERROR";
case NPP_SIZE_ERROR:
return "NPP_SIZE_ERROR";
case NPP_NULL_POINTER_ERROR:
return "NPP_NULL_POINTER_ERROR";
case NPP_CUDA_KERNEL_EXECUTION_ERROR:
return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
case NPP_NOT_IMPLEMENTED_ERROR:
return "NPP_NOT_IMPLEMENTED_ERROR";
case NPP_ERROR:
return "NPP_ERROR";
case NPP_SUCCESS:
return "NPP_SUCCESS";
case NPP_WRONG_INTERSECTION_QUAD_WARNING:
return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
case NPP_MISALIGNED_DST_ROI_WARNING:
return "NPP_MISALIGNED_DST_ROI_WARNING";
case NPP_AFFINE_QUAD_INCORRECT_WARNING:
return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
case NPP_DOUBLE_SIZE_WARNING:
return "NPP_DOUBLE_SIZE_WARNING";
case NPP_WRONG_INTERSECTION_ROI_WARNING:
return "NPP_WRONG_INTERSECTION_ROI_WARNING";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
/* These are 6.0 or higher */
case NPP_LUT_PALETTE_BITSIZE_ERROR:
return "NPP_LUT_PALETTE_BITSIZE_ERROR";
case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
case NPP_QUALITY_INDEX_ERROR:
return "NPP_QUALITY_INDEX_ERROR";
case NPP_CHANNEL_ORDER_ERROR:
return "NPP_CHANNEL_ORDER_ERROR";
case NPP_ZERO_MASK_VALUE_ERROR:
return "NPP_ZERO_MASK_VALUE_ERROR";
case NPP_NUMBER_OF_CHANNELS_ERROR:
return "NPP_NUMBER_OF_CHANNELS_ERROR";
case NPP_COI_ERROR:
return "NPP_COI_ERROR";
case NPP_DIVISOR_ERROR:
return "NPP_DIVISOR_ERROR";
case NPP_CHANNEL_ERROR:
return "NPP_CHANNEL_ERROR";
case NPP_STRIDE_ERROR:
return "NPP_STRIDE_ERROR";
case NPP_ANCHOR_ERROR:
return "NPP_ANCHOR_ERROR";
case NPP_MASK_SIZE_ERROR:
return "NPP_MASK_SIZE_ERROR";
case NPP_MOMENT_00_ZERO_ERROR:
return "NPP_MOMENT_00_ZERO_ERROR";
case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
case NPP_THRESHOLD_ERROR:
return "NPP_THRESHOLD_ERROR";
case NPP_CONTEXT_MATCH_ERROR:
return "NPP_CONTEXT_MATCH_ERROR";
case NPP_FFT_FLAG_ERROR:
return "NPP_FFT_FLAG_ERROR";
case NPP_FFT_ORDER_ERROR:
return "NPP_FFT_ORDER_ERROR";
case NPP_SCALE_RANGE_ERROR:
return "NPP_SCALE_RANGE_ERROR";
case NPP_DATA_TYPE_ERROR:
return "NPP_DATA_TYPE_ERROR";
case NPP_OUT_OFF_RANGE_ERROR:
return "NPP_OUT_OFF_RANGE_ERROR";
case NPP_DIVIDE_BY_ZERO_ERROR:
return "NPP_DIVIDE_BY_ZERO_ERROR";
case NPP_RANGE_ERROR:
return "NPP_RANGE_ERROR";
case NPP_NO_MEMORY_ERROR:
return "NPP_NO_MEMORY_ERROR";
case NPP_ERROR_RESERVED:
return "NPP_ERROR_RESERVED";
case NPP_NO_OPERATION_WARNING:
return "NPP_NO_OPERATION_WARNING";
case NPP_DIVIDE_BY_ZERO_WARNING:
return "NPP_DIVIDE_BY_ZERO_WARNING";
#endif
}
return "<unknown>";
}
#endif
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset();
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
template< typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
if (result)
{
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
DEVICE_RESET
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
#ifdef __DRIVER_TYPES_H__
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
// This will output the proper error string when calling cudaGetLastError
#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
{
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err)
{
fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
file, line, errorMessage, (int)err, cudaGetErrorString(err));
DEVICE_RESET
exit(EXIT_FAILURE);
}
}
#endif
#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif
// Float To Int conversion
inline int ftoi(float value)
{
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
}
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct
{
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] =
{
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1)
{
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
{
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
return nGpuArchCoresPerSM[index-1].Cores;
}
// end of GPU Architecture definitions
#ifdef __CUDA_RUNTIME_H__
// General GPU Device CUDA Initialization
inline int gpuDeviceInit(int devID)
{
int device_count;
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
if (devID < 0)
{
devID = 0;
}
if (devID > device_count-1)
{
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
fprintf(stderr, "\n");
return -devID;
}
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
if (deviceProp.computeMode == cudaComputeModeProhibited)
{
fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
return -1;
}
if (deviceProp.major < 1)
{
fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
exit(EXIT_FAILURE);
}
checkCudaErrors(cudaSetDevice(devID));
printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
return devID;
}
// This function returns the best GPU (with maximum GFLOPS)
inline int gpuGetMaxGflopsDeviceId()
{
int current_device = 0, sm_per_multiproc = 0;
int max_perf_device = 0;
int device_count = 0, best_SM_arch = 0;
int devices_prohibited = 0;
unsigned long long max_compute_perf = 0;
cudaDeviceProp deviceProp;
cudaGetDeviceCount(&device_count);
checkCudaErrors(cudaGetDeviceCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
// Find the best major SM Architecture GPU device
while (current_device < device_count)
{
cudaGetDeviceProperties(&deviceProp, current_device);
// If this GPU is not running on Compute Mode prohibited, then we can add it to the list
if (deviceProp.computeMode != cudaComputeModeProhibited)
{
if (deviceProp.major > 0 && deviceProp.major < 9999)
{
best_SM_arch = MAX(best_SM_arch, deviceProp.major);
}
}
else
{
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count)
{
fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
exit(EXIT_FAILURE);
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count)
{
cudaGetDeviceProperties(&deviceProp, current_device);
// If this GPU is not running on Compute Mode prohibited, then we can add it to the list
if (deviceProp.computeMode != cudaComputeModeProhibited)
{
if (deviceProp.major == 9999 && deviceProp.minor == 9999)
{
sm_per_multiproc = 1;
}
else
{
sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
}
unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
if (compute_perf > max_compute_perf)
{
// If we find GPU with SM major > 2, search only these
if (best_SM_arch > 2)
{
// If our device==dest_SM_arch, choose this, or else pass
if (deviceProp.major == best_SM_arch)
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
else
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
}
++current_device;
}
return max_perf_device;
}
// Initialization code to find the best CUDA Device
inline int findCudaDevice(int argc, const char **argv)
{
cudaDeviceProp deviceProp;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, argv, "device"))
{
devID = getCmdLineArgumentInt(argc, argv, "device=");
if (devID < 0)
{
printf("Invalid command line parameter\n ");
exit(EXIT_FAILURE);
}
else
{
devID = gpuDeviceInit(devID);
if (devID < 0)
{
printf("exiting...\n");
exit(EXIT_FAILURE);
}
}
}
else
{
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsDeviceId();
checkCudaErrors(cudaSetDevice(devID));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
}
return devID;
}
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilities(int major_version, int minor_version)
{
cudaDeviceProp deviceProp;
deviceProp.major = 0;
deviceProp.minor = 0;
int dev;
checkCudaErrors(cudaGetDevice(&dev));
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
if ((deviceProp.major > major_version) ||
(deviceProp.major == major_version && deviceProp.minor >= minor_version))
{
printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
return true;
}
else
{
printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
return false;
}
}
#endif
// end of CUDA Helper Functions
#endif