Removing cudahelpers library and usage by a copyright issue. It does not

affect functionality.
This commit is contained in:
Carles Fernandez 2015-09-10 17:46:38 +02:00
parent a6608c47a2
commit a84b4baef0
13 changed files with 75 additions and 5796 deletions

View File

@ -1,151 +0,0 @@
/*
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
/* CUda UTility Library */
#ifndef _EXCEPTION_H_
#define _EXCEPTION_H_
// includes, system
#include <exception>
#include <stdexcept>
#include <iostream>
#include <stdlib.h>
//! Exception wrapper.
//! @param Std_Exception Exception out of namespace std for easy typing.
template<class Std_Exception>
class Exception : public Std_Exception
{
public:
//! @brief Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it(const char *file,
const int line,
const char *detailed = "-");
//! Static construction interface
//! @return Alwayss throws ( Located_Exception<Exception>)
//! @param file file in which the Exception occurs
//! @param line line in which the Exception occurs
//! @param detailed details on the code fragment causing the Exception
static void throw_it(const char *file,
const int line,
const std::string &detailed);
//! Destructor
virtual ~Exception() throw();
private:
//! Constructor, default (private)
Exception();
//! Constructor, standard
//! @param str string returned by what()
Exception(const std::string &str);
};
////////////////////////////////////////////////////////////////////////////////
//! Exception handler function for arbitrary exceptions
//! @param ex exception to handle
////////////////////////////////////////////////////////////////////////////////
template<class Exception_Typ>
inline void
handleException(const Exception_Typ &ex)
{
std::cerr << ex.what() << std::endl;
exit(EXIT_FAILURE);
}
//! Convenience macros
//! Exception caused by dynamic program behavior, e.g. file does not exist
#define RUNTIME_EXCEPTION( msg) \
Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
//! Logic exception in program, e.g. an assert failed
#define LOGIC_EXCEPTION( msg) \
Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
//! Out of range exception
#define RANGE_EXCEPTION( msg) \
Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
////////////////////////////////////////////////////////////////////////////////
//! Implementation
// includes, system
#include <sstream>
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it(const char *file, const int line, const char *detailed)
{
std::stringstream s;
// Quiet heavy-weight but exceptions are not for
// performance / release versions
s << "Exception in file '" << file << "' in line " << line << "\n"
<< "Detailed description: " << detailed << "\n";
throw Exception(s.str());
}
////////////////////////////////////////////////////////////////////////////////
//! Static construction interface.
//! @param Exception causing code fragment (file and line) and detailed infos.
////////////////////////////////////////////////////////////////////////////////
/*static*/ template<class Std_Exception>
void
Exception<Std_Exception>::
throw_it(const char *file, const int line, const std::string &msg)
{
throw_it(file, line, msg.c_str());
}
////////////////////////////////////////////////////////////////////////////////
//! Constructor, default (private).
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception() :
Std_Exception("Unknown Exception.\n")
{ }
////////////////////////////////////////////////////////////////////////////////
//! Constructor, standard (private).
//! String returned by what().
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::Exception(const std::string &s) :
Std_Exception(s)
{ }
////////////////////////////////////////////////////////////////////////////////
//! Destructor
////////////////////////////////////////////////////////////////////////////////
template<class Std_Exception>
Exception<Std_Exception>::~Exception() throw() { }
// functions, exported
#endif // #ifndef _EXCEPTION_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,517 +0,0 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// Helper functions for CUDA Driver API error handling (make sure that CUDA_H is included in your projects)
#ifndef HELPER_CUDA_DRVAPI_H
#define HELPER_CUDA_DRVAPI_H
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <helper_string.h>
#include <drvapi_error_string.h>
#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif
#ifndef HELPER_CUDA_H
inline int ftoi(float value)
{
return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
}
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions
// add a level of protection to the CUDA SDK samples, let's force samples to explicitly include CUDA.H
#ifdef __cuda_cuda_h__
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#ifndef checkCudaErrors
#define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
// These are the inline versions for all of the SDK helper functions
inline void __checkCudaErrors(CUresult err, const char *file, const int line)
{
if (CUDA_SUCCESS != err)
{
fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n",
err, getCudaDrvErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
#endif
#ifdef getLastCudaDrvErrorMsg
#undef getLastCudaDrvErrorMsg
#endif
#define getLastCudaDrvErrorMsg(msg) __getLastCudaDrvErrorMsg (msg, __FILE__, __LINE__)
inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file, const int line)
{
CUresult err = cuCtxSynchronize();
if (CUDA_SUCCESS != err)
{
fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg);
fprintf(stderr, "getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d \"%s\" in file <%s>, line %i.\n",
err, getCudaDrvErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
{
CUresult error_result = cuDeviceGetAttribute(attribute, device_attribute, device);
if (error_result != CUDA_SUCCESS)
{
printf("cuDeviceGetAttribute returned %d\n-> %s\n", (int)error_result, getCudaDrvErrorString(error_result));
exit(EXIT_SUCCESS);
}
}
#endif
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2CoresDRV(int major, int minor)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef struct
{
int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int Cores;
} sSMtoCores;
sSMtoCores nGpuArchCoresPerSM[] =
{
{ 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
{ 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
{ 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
{ 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
{ 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
{ 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
{ 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
{ 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
{ -1, -1 }
};
int index = 0;
while (nGpuArchCoresPerSM[index].SM != -1)
{
if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
{
return nGpuArchCoresPerSM[index].Cores;
}
index++;
}
// If we don't find the values, we default use the previous one to run properly
printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
return nGpuArchCoresPerSM[index-1].Cores;
}
// end of GPU Architecture definitions
#ifdef __cuda_cuda_h__
// General GPU Device CUDA Initialization
inline int gpuDeviceInitDRV(int ARGC, const char **ARGV)
{
int cuDevice = 0;
int deviceCount = 0;
CUresult err = cuInit(0);
if (CUDA_SUCCESS == err)
{
checkCudaErrors(cuDeviceGetCount(&deviceCount));
}
if (deviceCount == 0)
{
fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
int dev = 0;
dev = getCmdLineArgumentInt(ARGC, (const char **) ARGV, "device=");
if (dev < 0)
{
dev = 0;
}
if (dev > deviceCount-1)
{
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
fprintf(stderr, ">> cudaDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
fprintf(stderr, "\n");
return -dev;
}
checkCudaErrors(cuDeviceGet(&cuDevice, dev));
char name[100];
cuDeviceGetName(name, 100, cuDevice);
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
if (computeMode == CU_COMPUTEMODE_PROHIBITED)
{
fprintf(stderr, "Error: device is running in <CU_COMPUTEMODE_PROHIBITED>, no threads can use this CUDA Device.\n");
return -1;
}
if (checkCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == false)
{
printf("gpuDeviceInitDRV() Using CUDA Device [%d]: %s\n", dev, name);
}
return dev;
}
// This function returns the best GPU based on performance
inline int gpuGetMaxGflopsDeviceIdDRV()
{
CUdevice current_device = 0;
CUdevice max_perf_device = 0;
int device_count = 0;
int sm_per_multiproc = 0;
unsigned long long max_compute_perf = 0;
int best_SM_arch = 0;
int major = 0;
int minor = 0;
int multiProcessorCount;
int clockRate;
int devices_prohibited = 0;
cuInit(0);
checkCudaErrors(cuDeviceGetCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
// Find the best major SM Architecture GPU device
while (current_device < device_count)
{
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
if (major > 0 && major < 9999)
{
best_SM_arch = MAX(best_SM_arch, major);
}
current_device++;
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count)
{
checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
current_device));
checkCudaErrors(cuDeviceGetAttribute(&clockRate,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
current_device));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED)
{
if (major == 9999 && minor == 9999)
{
sm_per_multiproc = 1;
}
else
{
sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
}
unsigned long long compute_perf = (unsigned long long) (multiProcessorCount * sm_per_multiproc * clockRate);
if (compute_perf > max_compute_perf)
{
// If we find GPU with SM major > 2, search only these
if (best_SM_arch > 2)
{
// If our device==dest_SM_arch, choose this, or else pass
if (major == best_SM_arch)
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
else
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
}
else
{
devices_prohibited++;
}
++current_device;
}
if (devices_prohibited == device_count)
{
fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: all devices have compute mode prohibited.\n");
exit(EXIT_FAILURE);
}
return max_perf_device;
}
// This function returns the best Graphics GPU based on performance
inline int gpuGetMaxGflopsGLDeviceIdDRV()
{
CUdevice current_device = 0, max_perf_device = 0;
int device_count = 0, sm_per_multiproc = 0;
int max_compute_perf = 0, best_SM_arch = 0;
int major = 0, minor = 0, multiProcessorCount, clockRate;
int bTCC = 0;
int devices_prohibited = 0;
char deviceName[256];
cuInit(0);
checkCudaErrors(cuDeviceGetCount(&device_count));
if (device_count == 0)
{
fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: no devices supporting CUDA\n");
exit(EXIT_FAILURE);
}
// Find the best major SM Architecture GPU device that are graphics devices
while (current_device < device_count)
{
checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
#if CUDA_VERSION >= 3020
checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
#else
// Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
if (deviceName[0] == 'T')
{
bTCC = 1;
}
#endif
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED)
{
if (!bTCC)
{
if (major > 0 && major < 9999)
{
best_SM_arch = MAX(best_SM_arch, major);
}
}
}
else
{
devices_prohibited++;
}
current_device++;
}
if (devices_prohibited == device_count)
{
fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: all devices have compute mode prohibited.\n");
exit(EXIT_FAILURE);
}
// Find the best CUDA capable GPU device
current_device = 0;
while (current_device < device_count)
{
checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
current_device));
checkCudaErrors(cuDeviceGetAttribute(&clockRate,
CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
current_device));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
#if CUDA_VERSION >= 3020
checkCudaErrors(cuDeviceGetAttribute(&bTCC, CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
#else
// Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
if (deviceName[0] == 'T')
{
bTCC = 1;
}
#endif
int computeMode;
getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
if (computeMode != CU_COMPUTEMODE_PROHIBITED)
{
if (major == 9999 && minor == 9999)
{
sm_per_multiproc = 1;
}
else
{
sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
}
// If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor
if (!bTCC) // Is this GPU running the TCC driver? If so we pass on this
{
int compute_perf = multiProcessorCount * sm_per_multiproc * clockRate;
if (compute_perf > max_compute_perf)
{
// If we find GPU with SM major > 2, search only these
if (best_SM_arch > 2)
{
// If our device = dest_SM_arch, then we pick this one
if (major == best_SM_arch)
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
else
{
max_compute_perf = compute_perf;
max_perf_device = current_device;
}
}
}
}
++current_device;
}
return max_perf_device;
}
// General initialization call to pick the best CUDA Device
inline CUdevice findCudaDeviceDRV(int argc, const char **argv)
{
CUdevice cuDevice;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device"))
{
devID = gpuDeviceInitDRV(argc, argv);
if (devID < 0)
{
printf("exiting...\n");
exit(EXIT_SUCCESS);
}
}
else
{
// Otherwise pick the device with highest Gflops/s
char name[100];
devID = gpuGetMaxGflopsDeviceIdDRV();
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
cuDeviceGetName(name, 100, cuDevice);
printf("> Using CUDA Device [%d]: %s\n", devID, name);
}
cuDeviceGet(&cuDevice, devID);
return cuDevice;
}
// This function will pick the best CUDA device available with OpenGL interop
inline CUdevice findCudaGLDeviceDRV(int argc, const char **argv)
{
CUdevice cuDevice;
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device"))
{
devID = gpuDeviceInitDRV(argc, (const char **)argv);
if (devID < 0)
{
printf("no CUDA capable devices found, exiting...\n");
exit(EXIT_SUCCESS);
}
}
else
{
char name[100];
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsGLDeviceIdDRV();
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
cuDeviceGetName(name, 100, cuDevice);
printf("> Using CUDA/GL Device [%d]: %s\n", devID, name);
}
return devID;
}
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int devID)
{
CUdevice cuDevice;
char name[256];
int major = 0, minor = 0;
checkCudaErrors(cuDeviceGet(&cuDevice, devID));
checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID));
if ((major > major_version) ||
(major == major_version && minor >= minor_version))
{
printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", devID, name, major, minor);
return true;
}
else
{
printf("No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
return false;
}
}
#endif
// end of CUDA Helper Functions
#endif

View File

@ -1,165 +0,0 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef HELPER_CUDA_GL_H
#define HELPER_CUDA_GL_H
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// includes, graphics
#if defined (__APPLE__) || defined(MACOSX)
#include <OpenGL/gl.h>
#include <OpenGL/glu.h>
#else
#include <GL/gl.h>
#include <GL/glu.h>
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset()
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
#ifdef __CUDA_GL_INTEROP_H__
////////////////////////////////////////////////////////////////////////////////
// These are CUDA OpenGL Helper functions
inline int gpuGLDeviceInit(int ARGC, const char **ARGV)
{
int deviceCount;
checkCudaErrors(cudaGetDeviceCount(&deviceCount));
if (deviceCount == 0)
{
fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
exit(EXIT_FAILURE);
}
int dev = 0;
dev = getCmdLineArgumentInt(ARGC, ARGV, "device=");
if (dev < 0)
{
dev = 0;
}
if (dev > deviceCount-1)
{
fprintf(stderr, "\n");
fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
fprintf(stderr, ">> gpuGLDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
fprintf(stderr, "\n");
return -dev;
}
cudaDeviceProp deviceProp;
checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
if (deviceProp.computeMode == cudaComputeModeProhibited)
{
fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
return -1;
}
if (deviceProp.major < 1)
{
fprintf(stderr, "Error: device does not support CUDA.\n");
exit(EXIT_FAILURE);
}
if (checkCmdLineFlag(ARGC, ARGV, "quiet") == false)
{
fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name);
}
checkCudaErrors(cudaGLSetGLDevice(dev));
return dev;
}
// This function will pick the best CUDA device available with OpenGL interop
inline int findCudaGLDevice(int argc, const char **argv)
{
int devID = 0;
// If the command-line has a device number specified, use it
if (checkCmdLineFlag(argc, (const char **)argv, "device"))
{
devID = gpuGLDeviceInit(argc, (const char **)argv);
if (devID < 0)
{
printf("no CUDA capable devices found, exiting...\n");
DEVICE_RESET
exit(EXIT_SUCCESS);
}
}
else
{
// Otherwise pick the device with highest Gflops/s
devID = gpuGetMaxGflopsDeviceId();
cudaGLSetGLDevice(devID);
}
return devID;
}
////////////////////////////////////////////////////////////////////////////
//! Check for OpenGL error
//! @return bool if no GL error has been encountered, otherwise 0
//! @param file __FILE__ macro
//! @param line __LINE__ macro
//! @note The GL error is listed on stderr
//! @note This function should be used via the CHECK_ERROR_GL() macro
////////////////////////////////////////////////////////////////////////////
inline bool
sdkCheckErrorGL(const char *file, const int line)
{
bool ret_val = true;
// check for error
GLenum gl_error = glGetError();
if (gl_error != GL_NO_ERROR)
{
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
char tmpStr[512];
// NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line
// when the user double clicks on the error line in the Output pane. Like any compile error.
sprintf_s(tmpStr, 255, "\n%s(%i) : GL Error : %s\n\n", file, line, gluErrorString(gl_error));
fprintf(stderr, "%s", tmpStr);
#endif
fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
fprintf(stderr, "%s\n", gluErrorString(gl_error));
ret_val = false;
}
return ret_val;
}
#define SDK_CHECK_ERROR_GL() \
if( false == sdkCheckErrorGL( __FILE__, __LINE__)) { \
DEVICE_RESET \
exit(EXIT_FAILURE); \
}
#endif
#endif

View File

@ -1,42 +0,0 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (string parsing, timers, image helpers, etc)
#ifndef HELPER_FUNCTIONS_H
#define HELPER_FUNCTIONS_H
#ifdef WIN32
#pragma warning(disable:4996)
#endif
// includes, project
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <assert.h>
#include <exception.h>
#include <math.h>
#include <fstream>
#include <vector>
#include <iostream>
#include <algorithm>
// includes, timer, string parsing, image helpers
#include <helper_timer.h> // helper functions for timers
#include <helper_string.h> // helper functions for string parsing
#include <helper_image.h> // helper functions for image compare, dump, data comparisons
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
#endif // HELPER_FUNCTIONS_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,516 +0,0 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// These are helper functions for the SDK samples (string parsing, timers, etc)
#ifndef STRING_HELPER_H
#define STRING_HELPER_H
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#ifndef _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_DEPRECATE
#endif
#ifndef STRCASECMP
#define STRCASECMP _stricmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP _strnicmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result != 0)
#endif
#ifndef SSCANF
#define SSCANF sscanf_s
#endif
#ifndef SPRINTF
#define SPRINTF sprintf_s
#endif
#else // Linux Includes
#include <string.h>
#include <strings.h>
#ifndef STRCASECMP
#define STRCASECMP strcasecmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP strncasecmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result == NULL)
#endif
#ifndef SSCANF
#define SSCANF sscanf
#endif
#ifndef SPRINTF
#define SPRINTF sprintf
#endif
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// CUDA Utility Helper Functions
inline int stringRemoveDelimiter(char delimiter, const char *string)
{
int string_start = 0;
while (string[string_start] == delimiter)
{
string_start++;
}
if (string_start >= (int)strlen(string)-1)
{
return 0;
}
return string_start;
}
inline int getFileExtension(char *filename, char **extension)
{
int string_length = (int)strlen(filename);
while (filename[string_length--] != '.')
{
if (string_length == 0)
break;
}
if (string_length > 0) string_length += 2;
if (string_length == 0)
*extension = NULL;
else
*extension = &filename[string_length];
return string_length;
}
inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
{
bool bFound = false;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
const char *equal_pos = strchr(string_argv, '=');
int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
int length = (int)strlen(string_ref);
if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
{
bFound = true;
continue;
}
}
}
return bFound;
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
{
bool bFound = false;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
if (length+1 <= (int)strlen(string_argv))
{
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
*value = (T)atoi(&string_argv[length + auto_inc]);
}
bFound = true;
i=argc;
}
}
}
return bFound;
}
inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
{
bool bFound = false;
int value = -1;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
if (length+1 <= (int)strlen(string_argv))
{
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = atoi(&string_argv[length + auto_inc]);
}
else
{
value = 0;
}
bFound = true;
continue;
}
}
}
if (bFound)
{
return value;
}
else
{
return 0;
}
}
inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
{
bool bFound = false;
float value = -1;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
if (length+1 <= (int)strlen(string_argv))
{
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = (float)atof(&string_argv[length + auto_inc]);
}
else
{
value = 0.f;
}
bFound = true;
continue;
}
}
}
if (bFound)
{
return value;
}
else
{
return 0;
}
}
inline bool getCmdLineArgumentString(const int argc, const char **argv,
const char *string_ref, char **string_retval)
{
bool bFound = false;
if (argc >= 1)
{
for (int i=1; i < argc; i++)
{
int string_start = stringRemoveDelimiter('-', argv[i]);
char *string_argv = (char *)&argv[i][string_start];
int length = (int)strlen(string_ref);
if (!STRNCASECMP(string_argv, string_ref, length))
{
*string_retval = &string_argv[length+1];
bFound = true;
continue;
}
}
}
if (!bFound)
{
*string_retval = NULL;
}
return bFound;
}
//////////////////////////////////////////////////////////////////////////////
//! Find the path for a file assuming that
//! files are found in the searchPath.
//!
//! @return the path if succeeded, otherwise 0
//! @param filename name of the file
//! @param executable_path optional absolute path of the executable
//////////////////////////////////////////////////////////////////////////////
inline char *sdkFindFilePath(const char *filename, const char *executable_path)
{
// <executable_name> defines a variable that is replaced with the name of the executable
// Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
// The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
const char *searchPath[] =
{
"./", // same dir
"./common/", // "/common/" subdir
"./common/data/", // "/common/data/" subdir
"./data/", // "/data/" subdir
"./src/", // "/src/" subdir
"./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir
"./inc/", // "/inc/" subdir
"./0_Simple/", // "/0_Simple/" subdir
"./1_Utilities/", // "/1_Utilities/" subdir
"./2_Graphics/", // "/2_Graphics/" subdir
"./3_Imaging/", // "/3_Imaging/" subdir
"./4_Finance/", // "/4_Finance/" subdir
"./5_Simulations/", // "/5_Simulations/" subdir
"./6_Advanced/", // "/6_Advanced/" subdir
"./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
"./8_Android/", // "/8_Android/" subdir
"./samples/", // "/samples/" subdir
"../", // up 1 in tree
"../common/", // up 1 in tree, "/common/" subdir
"../common/data/", // up 1 in tree, "/common/data/" subdir
"../data/", // up 1 in tree, "/data/" subdir
"../src/", // up 1 in tree, "/src/" subdir
"../inc/", // up 1 in tree, "/inc/" subdir
"../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir
"../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
"../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
"../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
"../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir
"../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
"../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
"../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir
"../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir
"../../", // up 2 in tree
"../../common/", // up 2 in tree, "/common/" subdir
"../../common/data/", // up 2 in tree, "/common/data/" subdir
"../../data/", // up 2 in tree, "/data/" subdir
"../../src/", // up 2 in tree, "/src/" subdir
"../../inc/", // up 2 in tree, "/inc/" subdir
"../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir
"../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir
"../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
"../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
"../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
"../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir
"../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
"../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
"../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir
"../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir
"../../../", // up 3 in tree
"../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir
"../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir
"../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir
"../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir
"../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir
"../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
"../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
"../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
"../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
"../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
"../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
"../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
"../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
"../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
"../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir
"../../../common/", // up 3 in tree, "../../../common/" subdir
"../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
"../../../data/", // up 3 in tree, "../../../data/" subdir
"../../../../", // up 4 in tree
"../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir
"../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir
"../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir
"../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir
"../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir
"../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
"../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
"../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
"../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
"../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
"../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
"../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
"../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
"../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
"../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir
"../../../../common/", // up 4 in tree, "../../../common/" subdir
"../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir
"../../../../data/", // up 4 in tree, "../../../data/" subdir
"../../../../../", // up 5 in tree
"../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir
"../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir
"../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir
"../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir
"../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir
"../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
"../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
"../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
"../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir
"../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
"../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
"../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
"../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir
"../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
"../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
"../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
"../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir
"../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir
"../../../../../common/", // up 5 in tree, "../../../common/" subdir
"../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir
};
// Extract the executable name
std::string executable_name;
if (executable_path != 0)
{
executable_name = std::string(executable_path);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Windows path delimiter
size_t delimiter_pos = executable_name.find_last_of('\\');
executable_name.erase(0, delimiter_pos + 1);
if (executable_name.rfind(".exe") != std::string::npos)
{
// we strip .exe, only if the .exe is found
executable_name.resize(executable_name.size() - 4);
}
#else
// Linux & OSX path delimiter
size_t delimiter_pos = executable_name.find_last_of('/');
executable_name.erase(0,delimiter_pos+1);
#endif
}
// Loop over all search paths and return the first hit
for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
{
std::string path(searchPath[i]);
size_t executable_name_pos = path.find("<executable_name>");
// If there is executable_name variable in the searchPath
// replace it with the value
if (executable_name_pos != std::string::npos)
{
if (executable_path != 0)
{
path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
}
else
{
// Skip this path entry if no executable argument is given
continue;
}
}
#ifdef _DEBUG
printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
#endif
// Test if the file exists
path.append(filename);
FILE *fp;
FOPEN(fp, path.c_str(), "rb");
if (fp != NULL)
{
fclose(fp);
// File found
// returning an allocated array here for backwards compatibility reasons
char *file_path = (char *) malloc(path.length() + 1);
STRCPY(file_path, path.length() + 1, path.c_str());
return file_path;
}
if (fp)
{
fclose(fp);
}
}
// File not found
return 0;
}
#endif

View File

@ -1,499 +0,0 @@
/**
* Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
// Helper Timing Functions
#ifndef HELPER_TIMER_H
#define HELPER_TIMER_H
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// includes, system
#include <vector>
// includes, project
#include <exception.h>
// Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
// But rather in a self contained class interface
class StopWatchInterface
{
public:
StopWatchInterface() {};
virtual ~StopWatchInterface() {};
public:
//! Start time measurement
virtual void start() = 0;
//! Stop time measurement
virtual void stop() = 0;
//! Reset time counters to zero
virtual void reset() = 0;
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
virtual float getTime() = 0;
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
virtual float getAverageTime() = 0;
};
//////////////////////////////////////////////////////////////////
// Begin Stopwatch timer class definitions for all OS platforms //
//////////////////////////////////////////////////////////////////
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// includes, system
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
#undef min
#undef max
//! Windows specific implementation of StopWatch
class StopWatchWin : public StopWatchInterface
{
public:
//! Constructor, default
StopWatchWin() :
start_time(), end_time(),
diff_time(0.0f), total_time(0.0f),
running(false), clock_sessions(0), freq(0), freq_set(false)
{
if (! freq_set)
{
// helper variable
LARGE_INTEGER temp;
// get the tick frequency from the OS
QueryPerformanceFrequency((LARGE_INTEGER *) &temp);
// convert to type in which it is needed
freq = ((double) temp.QuadPart) / 1000.0;
// rememeber query
freq_set = true;
}
};
// Destructor
~StopWatchWin() { };
public:
//! Start time measurement
inline void start();
//! Stop time measurement
inline void stop();
//! Reset time counters to zero
inline void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
inline float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
inline float getAverageTime();
private:
// member variables
//! Start of measurement
LARGE_INTEGER start_time;
//! End of measurement
LARGE_INTEGER end_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
//! tick frequency
double freq;
//! flag if the frequency has been set
bool freq_set;
};
// functions, inlined
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchWin::start()
{
QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchWin::stop()
{
QueryPerformanceCounter((LARGE_INTEGER *) &end_time);
diff_time = (float)
(((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
total_time += diff_time;
clock_sessions++;
running = false;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchWin::reset()
{
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running)
{
QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
}
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchWin::getTime()
{
// Return the TOTAL time to date
float retval = total_time;
if (running)
{
LARGE_INTEGER temp;
QueryPerformanceCounter((LARGE_INTEGER *) &temp);
retval += (float)
(((double)(temp.QuadPart - start_time.QuadPart)) / freq);
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchWin::getAverageTime()
{
return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
}
#else
// Declarations for Stopwatch on Linux and Mac OSX
// includes, system
#include <ctime>
#include <sys/time.h>
//! Windows specific implementation of StopWatch
class StopWatchLinux : public StopWatchInterface
{
public:
//! Constructor, default
StopWatchLinux() :
start_time(), diff_time(0.0), total_time(0.0),
running(false), clock_sessions(0)
{ };
// Destructor
virtual ~StopWatchLinux()
{ };
public:
//! Start time measurement
inline void start();
//! Stop time measurement
inline void stop();
//! Reset time counters to zero
inline void reset();
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned, otherwise the
//! time between the last start() and stop call is returned
inline float getTime();
//! Mean time to date based on the number of times the stopwatch has been
//! _stopped_ (ie finished sessions) and the current total time
inline float getAverageTime();
private:
// helper functions
//! Get difference between start time and current time
inline float getDiffTime();
private:
// member variables
//! Start of measurement
struct timeval start_time;
//! Time difference between the last start and stop
float diff_time;
//! TOTAL time difference between starts and stops
float total_time;
//! flag if the stop watch is running
bool running;
//! Number of times clock has been started
//! and stopped to allow averaging
int clock_sessions;
};
// functions, inlined
////////////////////////////////////////////////////////////////////////////////
//! Start time measurement
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchLinux::start()
{
gettimeofday(&start_time, 0);
running = true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop time measurement and increment add to the current diff_time summation
//! variable. Also increment the number of times this clock has been run.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchLinux::stop()
{
diff_time = getDiffTime();
total_time += diff_time;
running = false;
clock_sessions++;
}
////////////////////////////////////////////////////////////////////////////////
//! Reset the timer to 0. Does not change the timer running state but does
//! recapture this point in time as the current start time if it is running.
////////////////////////////////////////////////////////////////////////////////
inline void
StopWatchLinux::reset()
{
diff_time = 0;
total_time = 0;
clock_sessions = 0;
if (running)
{
gettimeofday(&start_time, 0);
}
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. after start. If the stop watch is still running (i.e. there
//! was no call to stop()) then the elapsed time is returned added to the
//! current diff_time sum, otherwise the current summed time difference alone
//! is returned.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchLinux::getTime()
{
// Return the TOTAL time to date
float retval = total_time;
if (running)
{
retval += getDiffTime();
}
return retval;
}
////////////////////////////////////////////////////////////////////////////////
//! Time in msec. for a single run based on the total number of COMPLETED runs
//! and the total time.
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchLinux::getAverageTime()
{
return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
}
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
inline float
StopWatchLinux::getDiffTime()
{
struct timeval t_time;
gettimeofday(&t_time, 0);
// time difference in milli-seconds
return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
+ (0.001 * (t_time.tv_usec - start_time.tv_usec)));
}
#endif // WIN32
////////////////////////////////////////////////////////////////////////////////
//! Timer functionality exported
////////////////////////////////////////////////////////////////////////////////
//! Create a new timer
//! @return true if a time has been created, otherwise false
//! @param name of the new timer, 0 if the creation failed
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkCreateTimer(StopWatchInterface **timer_interface)
{
//printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
*timer_interface = (StopWatchInterface *)new StopWatchWin();
#else
*timer_interface = (StopWatchInterface *)new StopWatchLinux();
#endif
return (*timer_interface != NULL) ? true : false;
}
////////////////////////////////////////////////////////////////////////////////
//! Delete a timer
//! @return true if a time has been deleted, otherwise false
//! @param name of the timer to delete
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkDeleteTimer(StopWatchInterface **timer_interface)
{
//printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
delete *timer_interface;
*timer_interface = NULL;
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Start the time with name \a name
//! @param name name of the timer to start
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkStartTimer(StopWatchInterface **timer_interface)
{
//printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
(*timer_interface)->start();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Stop the time with name \a name. Does not reset.
//! @param name name of the timer to stop
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkStopTimer(StopWatchInterface **timer_interface)
{
// printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
(*timer_interface)->stop();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Resets the timer's counter.
//! @param name name of the timer to reset.
////////////////////////////////////////////////////////////////////////////////
inline bool
sdkResetTimer(StopWatchInterface **timer_interface)
{
// printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
(*timer_interface)->reset();
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
//! Return the average time for timer execution as the total time
//! for the timer dividied by the number of completed (stopped) runs the timer
//! has made.
//! Excludes the current running time if the timer is currently running.
//! @param name name of the timer to return the time of
////////////////////////////////////////////////////////////////////////////////
inline float
sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
{
// printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
return (*timer_interface)->getAverageTime();
}
else
{
return 0.0f;
}
}
////////////////////////////////////////////////////////////////////////////////
//! Total execution time for the timer over all runs since the last reset
//! or timer creation.
//! @param name name of the timer to obtain the value of.
////////////////////////////////////////////////////////////////////////////////
inline float
sdkGetTimerValue(StopWatchInterface **timer_interface)
{
// printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
if (*timer_interface)
{
return (*timer_interface)->getTime();
}
else
{
return 0.0f;
}
}
#endif // HELPER_TIMER_H

View File

@ -19,9 +19,7 @@
if(ENABLE_CUDA)
set(OPT_TRACKING_BLOCKS ${OPT_TRACKING_BLOCKS} gps_l1_ca_dll_pll_tracking_gpu_cc.cc)
set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES}
${CUDA_INCLUDE_DIRS}
${CMAKE_SOURCE_DIR}/src/algorithms/libs/cudahelpers)
set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS})
set(OPT_TRACKING_LIBRARIES ${OPT_TRACKING_LIBRARIES} ${CUDA_LIBRARIES})
endif(ENABLE_CUDA)

View File

@ -47,11 +47,9 @@
#include "lock_detectors.h"
#include "GPS_L1_CA.h"
#include "control_message_factory.h"
#include <volk/volk.h> //volk_alignement
// includes
#include <volk/volk.h> // volk_alignment
#include <cuda_profiler_api.h>
#include <helper_functions.h> // helper for shared functions common to CUDA Samples
#include <helper_cuda.h> // helper functions for CUDA error checking and initialization
/*!
* \todo Include in definition header file
@ -131,24 +129,24 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
multicorrelator_gpu->init_cuda_integrated_resampler(0, NULL, 2 * d_vector_length , GPS_L1_CA_CODE_LENGTH_CHIPS , N_CORRELATORS);
// Get space for the resampled early / prompt / late local replicas
checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float), cudaHostAllocMapped ));
cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float), cudaHostAllocMapped );
//allocate host memory
//pinned memory mode - use special function to get OS-pinned memory
checkCudaErrors(cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped ));
cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length * sizeof(gr_complex), cudaHostAllocMapped );
//old local codes vector
//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
// (cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
//new integrated shifts
//checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
// (cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
// correlator outputs (scalar)
checkCudaErrors(cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocWriteCombined ));
cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS, cudaHostAllocWriteCombined );
//map to EPL pointers
d_Early = &d_corr_outs_gpu[0];
d_Prompt = &d_corr_outs_gpu[1];
d_Prompt = &d_corr_outs_gpu[1];
d_Late = &d_corr_outs_gpu[2];
//--- Perform initializations ------------------------------
@ -181,7 +179,6 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
systemName["G"] = std::string("GPS");
systemName["S"] = std::string("SBAS");
set_relative_rate(1.0/((double)d_vector_length*2));
d_channel_internal_queue = 0;
@ -303,10 +300,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
{
// process vars
float carr_error_hz=0.0;
float carr_error_filt_hz=0.0;
float code_error_chips=0.0;
float code_error_filt_chips=0.0;
float carr_error_hz = 0.0;
float carr_error_filt_hz = 0.0;
float code_error_chips = 0.0;
float code_error_filt_chips = 0.0;
// Block input data and block output stream pointers
const gr_complex* in = (gr_complex*) input_items[0];
@ -339,20 +336,20 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
// UPDATE NCO COMMAND
float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);
//code resampler on GPU (new)
//code resampler on GPU (new)
float code_phase_step_chips = static_cast<float>(d_code_freq_chips) / static_cast<float>(d_fs_in);
float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);
cudaProfilerStart();
multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(
d_corr_outs_gpu,
in,
d_rem_carr_phase_rad,
phase_step_rad,
code_phase_step_chips,
rem_code_phase_chips,
d_current_prn_length_samples,
3);
d_corr_outs_gpu,
in,
d_rem_carr_phase_rad,
phase_step_rad,
code_phase_step_chips,
rem_code_phase_chips,
d_current_prn_length_samples,
3);
cudaProfilerStop();
// ################## PLL ##########################################################

View File

@ -22,15 +22,11 @@ if(ENABLE_CUDA)
# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_30,code=sm_30; -std=c++11;-O3; -use_fast_math -default-stream per-thread")
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
CUDA_INCLUDE_DIRECTORIES(
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cudahelpers
)
CUDA_INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_SOURCE_DIR})
set(LIB_TYPE STATIC) #set the lib type
CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
set(OPT_TRACKING_LIBRARIES ${OPT_TRACKING_LIBRARIES} CUDA_CORRELATOR_LIB)
set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cudahelpers)
set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS} )
endif(ENABLE_CUDA)

View File

@ -49,9 +49,6 @@
// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>
// helper functions and utilities to work with CUDA
#include <helper_cuda.h>
#include <helper_functions.h>
#define ACCUM_N 256
@ -224,7 +221,6 @@ __global__ void scalarProdGPUCPXxN(
//int vectorBase = IMUL(elementN, vec);
//int vectorEnd = vectorBase + elementN;
////////////////////////////////////////////////////////////////////////
// Each accumulator cycles through vectors with
// stride equal to number of total number of accumulators ACCUM_N
@ -392,28 +388,28 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }
//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
size_t size = signal_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
cudaMalloc((void **)&d_sig_in, size);
// (cudaMalloc((void **)&d_nco_in, size));
cudaMalloc((void **)&d_sig_doppler_wiped, size);
// old version: all local codes are independent vectors
//checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size*n_correlators));
// (cudaMalloc((void **)&d_local_codes_in, size*n_correlators));
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators));
cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes);
cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators);
//scalars
checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
// Launch the Vector Add CUDA Kernel
threadsPerBlock = 256;
@ -481,30 +477,30 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
// printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
// }
//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
// ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors
size_t size = signal_length_samples * sizeof(GPU_Complex);
checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
checkCudaErrors(cudaMemset(d_sig_in,0,size));
cudaMalloc((void **)&d_sig_in, size);
cudaMemset(d_sig_in,0,size);
//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
checkCudaErrors(cudaMemset(d_sig_doppler_wiped,0,size));
// (cudaMalloc((void **)&d_nco_in, size));
cudaMalloc((void **)&d_sig_doppler_wiped, size);
cudaMemset(d_sig_doppler_wiped,0,size);
checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips));
checkCudaErrors(cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips));
cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips);
cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips);
d_code_length_chips=code_length_chips;
checkCudaErrors(cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators));
checkCudaErrors(cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators));
cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators);
cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators);
//scalars
checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
checkCudaErrors(cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators));
cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators);
// Launch the Vector Add CUDA Kernel
threadsPerBlock = 256;
@ -523,12 +519,12 @@ bool cuda_multicorrelator::set_local_code_and_taps(
)
{
// local code CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1));
cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1);
d_code_length_chips=(float)code_length_chips;
// Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
checkCudaErrors(cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
cudaMemcpyHostToDevice,stream1));
cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
cudaMemcpyHostToDevice,stream1);
return true;
}
@ -550,40 +546,40 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
// input signal CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream1));
cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream1);
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
//checkCudaErrors(cudaMemcpyAsync(d_nco_in, nco_in, memSize,
// (cudaMemcpyAsync(d_nco_in, nco_in, memSize,
// cudaMemcpyHostToDevice, stream1));
// old version: all local codes are independent vectors
//checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
// (cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
// cudaMemcpyHostToDevice, stream2));
// new version: only one vector with extra samples to shift the local code for the correlator set
// Required: The last correlator tap in d_shifts_samples has the largest sample shift
// local code CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
cudaMemcpyHostToDevice, stream2));
cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
cudaMemcpyHostToDevice, stream2);
// Correlator shifts vector CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
cudaMemcpyHostToDevice, stream2));
cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
cudaMemcpyHostToDevice, stream2);
//Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
checkCudaErrors(cudaStreamSynchronize(stream1));
cudaStreamSynchronize(stream1);
CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream1>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
//printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);
//wait for Doppler wipeoff end...
checkCudaErrors(cudaStreamSynchronize(stream1));
checkCudaErrors(cudaStreamSynchronize(stream2));
//checkCudaErrors(cudaDeviceSynchronize());
cudaStreamSynchronize(stream1);
cudaStreamSynchronize(stream2);
// (cudaDeviceSynchronize());
//old
// scalarProdGPUCPXxN<<<blocksPerGrid, threadsPerBlock,0 ,stream2>>>(
@ -604,15 +600,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
n_correlators,
signal_length_samples
);
checkCudaErrors(cudaGetLastError());
cudaGetLastError();
//wait for correlators end...
checkCudaErrors(cudaStreamSynchronize(stream2));
cudaStreamSynchronize(stream2);
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
checkCudaErrors(cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost));
cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost);
return true;
}
@ -629,19 +625,19 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
size_t memSize = signal_length_samples * sizeof(std::complex<float>);
// input signal CPU -> GPU copy memory
checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream2));
cudaMemcpyAsync(d_sig_in, sig_in, memSize,
cudaMemcpyHostToDevice, stream2);
//***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
//Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
checkCudaErrors(cudaStreamSynchronize(stream2));
cudaStreamSynchronize(stream2);
CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream2>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);
//wait for Doppler wipeoff end...
checkCudaErrors(cudaStreamSynchronize(stream1));
checkCudaErrors(cudaStreamSynchronize(stream2));
cudaStreamSynchronize(stream1);
cudaStreamSynchronize(stream2);
//launch the multitap correlator with integrated local code resampler!
@ -657,16 +653,16 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
signal_length_samples
);
checkCudaErrors(cudaGetLastError());
cudaGetLastError();
//wait for correlators end...
checkCudaErrors(cudaStreamSynchronize(stream1));
cudaStreamSynchronize(stream1);
// Copy the device result vector in device memory to the host result vector
// in host memory.
//scalar products (correlators outputs)
checkCudaErrors(cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost,stream1));
checkCudaErrors(cudaStreamSynchronize(stream1));
cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
cudaMemcpyDeviceToHost,stream1);
cudaStreamSynchronize(stream1);
return true;
}
@ -708,7 +704,7 @@ bool cuda_multicorrelator::free_cuda()
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
//checkCudaErrors(cudaDeviceReset());
// (cudaDeviceReset());
return true;
}