Removing cudahelpers library and usage by a copyright issue. It does not

affect functionality.
2025-11-25 11:34:53 +00:00 · 2015-09-10 17:46:38 +02:00
parent a6608c47a2
commit a84b4baef0
13 changed files with 75 additions and 5796 deletions
--- a/src/algorithms/libs/cudahelpers/exception.h
+++ b/src/algorithms/libs/cudahelpers/exception.h
@@ -1,151 +0,0 @@
-/*
-* Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
-*
-* Please refer to the NVIDIA end user license agreement (EULA) associated
-* with this source code for terms and conditions that govern your use of
-* this software. Any use, reproduction, disclosure, or distribution of
-* this software and related documentation outside the terms of the EULA
-* is strictly prohibited.
-*
-*/
-
-/* CUda UTility Library */
-#ifndef _EXCEPTION_H_
-#define _EXCEPTION_H_
-
-// includes, system
-#include <exception>
-#include <stdexcept>
-#include <iostream>
-#include <stdlib.h>
-
-//! Exception wrapper.
-//! @param Std_Exception Exception out of namespace std for easy typing.
-template<class Std_Exception>
-class Exception : public Std_Exception
-{
-    public:
-
-        //! @brief Static construction interface
-        //! @return Alwayss throws ( Located_Exception<Exception>)
-        //! @param file file in which the Exception occurs
-        //! @param line line in which the Exception occurs
-        //! @param detailed details on the code fragment causing the Exception
-        static void throw_it(const char *file,
-                             const int line,
-                             const char *detailed = "-");
-
-        //! Static construction interface
-        //! @return Alwayss throws ( Located_Exception<Exception>)
-        //! @param file file in which the Exception occurs
-        //! @param line line in which the Exception occurs
-        //! @param detailed details on the code fragment causing the Exception
-        static void throw_it(const char *file,
-                             const int line,
-                             const std::string &detailed);
-
-        //! Destructor
-        virtual ~Exception() throw();
-
-    private:
-
-        //! Constructor, default (private)
-        Exception();
-
-        //! Constructor, standard
-        //! @param str string returned by what()
-        Exception(const std::string &str);
-
-};
-
-////////////////////////////////////////////////////////////////////////////////
-//! Exception handler function for arbitrary exceptions
-//! @param ex exception to handle
-////////////////////////////////////////////////////////////////////////////////
-template<class Exception_Typ>
-inline void
-handleException(const Exception_Typ &ex)
-{
-    std::cerr << ex.what() << std::endl;
-
-    exit(EXIT_FAILURE);
-}
-
-//! Convenience macros
-
-//! Exception caused by dynamic program behavior, e.g. file does not exist
-#define RUNTIME_EXCEPTION( msg) \
-    Exception<std::runtime_error>::throw_it( __FILE__, __LINE__, msg)
-
-//! Logic exception in program, e.g. an assert failed
-#define LOGIC_EXCEPTION( msg) \
-    Exception<std::logic_error>::throw_it( __FILE__, __LINE__, msg)
-
-//! Out of range exception
-#define RANGE_EXCEPTION( msg) \
-    Exception<std::range_error>::throw_it( __FILE__, __LINE__, msg)
-
-////////////////////////////////////////////////////////////////////////////////
-//! Implementation
-
-// includes, system
-#include <sstream>
-
-////////////////////////////////////////////////////////////////////////////////
-//! Static construction interface.
-//! @param  Exception causing code fragment (file and line) and detailed infos.
-////////////////////////////////////////////////////////////////////////////////
-/*static*/ template<class Std_Exception>
-void
-Exception<Std_Exception>::
-throw_it(const char *file, const int line, const char *detailed)
-{
-    std::stringstream s;
-
-    // Quiet heavy-weight but exceptions are not for
-    // performance / release versions
-    s << "Exception in file '" << file << "' in line " << line << "\n"
-      << "Detailed description: " << detailed << "\n";
-
-    throw Exception(s.str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Static construction interface.
-//! @param  Exception causing code fragment (file and line) and detailed infos.
-////////////////////////////////////////////////////////////////////////////////
-/*static*/ template<class Std_Exception>
-void
-Exception<Std_Exception>::
-throw_it(const char *file, const int line, const std::string &msg)
-{
-    throw_it(file, line, msg.c_str());
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Constructor, default (private).
-////////////////////////////////////////////////////////////////////////////////
-template<class Std_Exception>
-Exception<Std_Exception>::Exception() :
-    Std_Exception("Unknown Exception.\n")
-{ }
-
-////////////////////////////////////////////////////////////////////////////////
-//! Constructor, standard (private).
-//! String returned by what().
-////////////////////////////////////////////////////////////////////////////////
-template<class Std_Exception>
-Exception<Std_Exception>::Exception(const std::string &s) :
-    Std_Exception(s)
-{ }
-
-////////////////////////////////////////////////////////////////////////////////
-//! Destructor
-////////////////////////////////////////////////////////////////////////////////
-template<class Std_Exception>
-Exception<Std_Exception>::~Exception() throw() { }
-
-// functions, exported
-
-#endif // #ifndef _EXCEPTION_H_
-
--- a/src/algorithms/libs/cudahelpers/helper_cuda.h
+++ b/src/algorithms/libs/cudahelpers/helper_cuda.h
--- a/src/algorithms/libs/cudahelpers/helper_cuda_drvapi.h
+++ b/src/algorithms/libs/cudahelpers/helper_cuda_drvapi.h
@@ -1,517 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// Helper functions for CUDA Driver API error handling (make sure that CUDA_H is included in your projects)
-#ifndef HELPER_CUDA_DRVAPI_H
-#define HELPER_CUDA_DRVAPI_H
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <helper_string.h>
-#include <drvapi_error_string.h>
-
-#ifndef MAX
-#define MAX(a,b) (a > b ? a : b)
-#endif
-
-#ifndef HELPER_CUDA_H
-inline int ftoi(float value)
-{
-    return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
-}
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA Helper functions
-
-// add a level of protection to the CUDA SDK samples, let's force samples to explicitly include CUDA.H
-#ifdef  __cuda_cuda_h__
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#ifndef checkCudaErrors
-#define checkCudaErrors(err)  __checkCudaErrors (err, __FILE__, __LINE__)
-
-// These are the inline versions for all of the SDK helper functions
-inline void __checkCudaErrors(CUresult err, const char *file, const int line)
-{
-    if (CUDA_SUCCESS != err)
-    {
-        fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n",
-                err, getCudaDrvErrorString(err), file, line);
-        exit(EXIT_FAILURE);
-    }
-}
-#endif
-
-#ifdef getLastCudaDrvErrorMsg
-#undef getLastCudaDrvErrorMsg
-#endif
-
-#define getLastCudaDrvErrorMsg(msg)           __getLastCudaDrvErrorMsg  (msg, __FILE__, __LINE__)
-
-inline void __getLastCudaDrvErrorMsg(const char *msg, const char *file, const int line)
-{
-    CUresult err = cuCtxSynchronize();
-
-    if (CUDA_SUCCESS != err)
-    {
-        fprintf(stderr, "getLastCudaDrvErrorMsg -> %s", msg);
-        fprintf(stderr, "getLastCudaDrvErrorMsg -> cuCtxSynchronize API error = %04d \"%s\" in file <%s>, line %i.\n",
-                err, getCudaDrvErrorString(err), file, line);
-        exit(EXIT_FAILURE);
-    }
-}
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
-{
-    CUresult error_result = cuDeviceGetAttribute(attribute, device_attribute, device);
-
-    if (error_result != CUDA_SUCCESS)
-    {
-        printf("cuDeviceGetAttribute returned %d\n-> %s\n", (int)error_result, getCudaDrvErrorString(error_result));
-        exit(EXIT_SUCCESS);
-    }
-}
-#endif
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2CoresDRV(int major, int minor)
-{
-    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-    typedef struct
-    {
-        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-        int Cores;
-    } sSMtoCores;
-
-    sSMtoCores nGpuArchCoresPerSM[] =
-    {
-        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
-        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
-        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
-        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
-        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
-        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
-        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
-        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
-        {   -1, -1 }
-    };
-
-    int index = 0;
-
-    while (nGpuArchCoresPerSM[index].SM != -1)
-    {
-        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
-        {
-            return nGpuArchCoresPerSM[index].Cores;
-        }
-
-        index++;
-    }
-
-    // If we don't find the values, we default use the previous one to run properly
-    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
-    return nGpuArchCoresPerSM[index-1].Cores;
-}
-// end of GPU Architecture definitions
-
-#ifdef __cuda_cuda_h__
-// General GPU Device CUDA Initialization
-inline int gpuDeviceInitDRV(int ARGC, const char **ARGV)
-{
-    int cuDevice = 0;
-    int deviceCount = 0;
-    CUresult err = cuInit(0);
-
-    if (CUDA_SUCCESS == err)
-    {
-        checkCudaErrors(cuDeviceGetCount(&deviceCount));
-    }
-
-    if (deviceCount == 0)
-    {
-        fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
-        exit(EXIT_FAILURE);
-    }
-
-    int dev = 0;
-    dev = getCmdLineArgumentInt(ARGC, (const char **) ARGV, "device=");
-
-    if (dev < 0)
-    {
-        dev = 0;
-    }
-
-    if (dev > deviceCount-1)
-    {
-        fprintf(stderr, "\n");
-        fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
-        fprintf(stderr, ">> cudaDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
-        fprintf(stderr, "\n");
-        return -dev;
-    }
-
-    checkCudaErrors(cuDeviceGet(&cuDevice, dev));
-    char name[100];
-    cuDeviceGetName(name, 100, cuDevice);
-
-    int computeMode;
-    getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, dev);
-
-    if (computeMode == CU_COMPUTEMODE_PROHIBITED)
-    {
-        fprintf(stderr, "Error: device is running in <CU_COMPUTEMODE_PROHIBITED>, no threads can use this CUDA Device.\n");
-        return -1;
-    }
-
-    if (checkCmdLineFlag(ARGC, (const char **) ARGV, "quiet") == false)
-    {
-        printf("gpuDeviceInitDRV() Using CUDA Device [%d]: %s\n", dev, name);
-    }
-
-    return dev;
-}
-
-// This function returns the best GPU based on performance
-inline int gpuGetMaxGflopsDeviceIdDRV()
-{
-    CUdevice current_device  = 0;
-    CUdevice max_perf_device = 0;
-    int device_count     = 0;
-    int sm_per_multiproc = 0;
-    unsigned long long max_compute_perf = 0;
-    int best_SM_arch = 0;
-    int major = 0;
-    int minor = 0;
-    int multiProcessorCount;
-    int clockRate;
-    int devices_prohibited = 0;
-
-    cuInit(0);
-    checkCudaErrors(cuDeviceGetCount(&device_count));
-
-    if (device_count == 0)
-    {
-        fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: no devices supporting CUDA\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // Find the best major SM Architecture GPU device
-    while (current_device < device_count)
-    {
-        checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
-        if (major > 0 && major < 9999)
-        {
-            best_SM_arch = MAX(best_SM_arch, major);
-        }
-
-        current_device++;
-    }
-
-    // Find the best CUDA capable GPU device
-    current_device = 0;
-
-    while (current_device < device_count)
-    {
-        checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
-                                             CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
-                                             current_device));
-        checkCudaErrors(cuDeviceGetAttribute(&clockRate,
-                                             CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
-                                             current_device));
-        checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
-        int computeMode;
-        getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
-
-        if (computeMode != CU_COMPUTEMODE_PROHIBITED)
-        {
-            if (major == 9999 && minor == 9999)
-            {
-                sm_per_multiproc = 1;
-            }
-            else
-            {
-                sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
-            }
-
-            unsigned long long compute_perf = (unsigned long long) (multiProcessorCount * sm_per_multiproc * clockRate);
-
-            if (compute_perf  > max_compute_perf)
-            {
-                // If we find GPU with SM major > 2, search only these
-                if (best_SM_arch > 2)
-                {
-                    // If our device==dest_SM_arch, choose this, or else pass
-                    if (major == best_SM_arch)
-                    {
-                        max_compute_perf  = compute_perf;
-                        max_perf_device   = current_device;
-                    }
-                }
-                else
-                {
-                    max_compute_perf  = compute_perf;
-                    max_perf_device   = current_device;
-                }
-            }
-        }
-        else
-        {
-            devices_prohibited++;
-        }
-
-        ++current_device;
-    }
-
-    if (devices_prohibited == device_count)
-    {    
-        fprintf(stderr, "gpuGetMaxGflopsDeviceIdDRV error: all devices have compute mode prohibited.\n");
-        exit(EXIT_FAILURE);
-    }    
-
-    return max_perf_device;
-}
-
-// This function returns the best Graphics GPU based on performance
-inline int gpuGetMaxGflopsGLDeviceIdDRV()
-{
-    CUdevice current_device = 0, max_perf_device = 0;
-    int device_count     = 0, sm_per_multiproc = 0;
-    int max_compute_perf = 0, best_SM_arch     = 0;
-    int major = 0, minor = 0, multiProcessorCount, clockRate;
-    int bTCC = 0;
-    int devices_prohibited = 0;
-    char deviceName[256];
-
-    cuInit(0);
-    checkCudaErrors(cuDeviceGetCount(&device_count));
-
-    if (device_count == 0)
-    {
-        fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: no devices supporting CUDA\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // Find the best major SM Architecture GPU device that are graphics devices
-    while (current_device < device_count)
-    {
-        checkCudaErrors(cuDeviceGetName(deviceName, 256, current_device));
-        checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
-#if CUDA_VERSION >= 3020
-        checkCudaErrors(cuDeviceGetAttribute(&bTCC,  CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
-#else
-
-        // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
-        if (deviceName[0] == 'T')
-        {
-            bTCC = 1;
-        }
-
-#endif
-
-        int computeMode;
-        getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
-
-        if (computeMode != CU_COMPUTEMODE_PROHIBITED)
-        {
-            if (!bTCC)
-            {
-                if (major > 0 && major < 9999)
-                {
-                    best_SM_arch = MAX(best_SM_arch, major);
-                }
-            }
-        }
-        else
-        {
-            devices_prohibited++;
-        }
-
-        current_device++;
-    }
-
-    if (devices_prohibited == device_count)
-    {
-        fprintf(stderr, "gpuGetMaxGflopsGLDeviceIdDRV error: all devices have compute mode prohibited.\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // Find the best CUDA capable GPU device
-    current_device = 0;
-
-    while (current_device < device_count)
-    {
-        checkCudaErrors(cuDeviceGetAttribute(&multiProcessorCount,
-                                             CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
-                                             current_device));
-        checkCudaErrors(cuDeviceGetAttribute(&clockRate,
-                                             CU_DEVICE_ATTRIBUTE_CLOCK_RATE,
-                                             current_device));
-        checkCudaErrors(cuDeviceComputeCapability(&major, &minor, current_device));
-
-#if CUDA_VERSION >= 3020
-        checkCudaErrors(cuDeviceGetAttribute(&bTCC,  CU_DEVICE_ATTRIBUTE_TCC_DRIVER, current_device));
-#else
-
-        // Assume a Tesla GPU is running in TCC if we are running CUDA 3.1
-        if (deviceName[0] == 'T')
-        {
-            bTCC = 1;
-        }
-
-#endif
-
-        int computeMode;
-        getCudaAttribute<int>(&computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, current_device);
-
-        if (computeMode != CU_COMPUTEMODE_PROHIBITED)
-        {
-            if (major == 9999 && minor == 9999)
-            {
-                sm_per_multiproc = 1;
-            }
-            else
-            {
-                sm_per_multiproc = _ConvertSMVer2CoresDRV(major, minor);
-            }
-
-            // If this is a Tesla based GPU and SM 2.0, and TCC is disabled, this is a contendor
-            if (!bTCC)   // Is this GPU running the TCC driver?  If so we pass on this
-            {
-                int compute_perf  = multiProcessorCount * sm_per_multiproc * clockRate;
-
-                if (compute_perf  > max_compute_perf)
-                {
-                    // If we find GPU with SM major > 2, search only these
-                    if (best_SM_arch > 2)
-                    {
-                        // If our device = dest_SM_arch, then we pick this one
-                        if (major == best_SM_arch)
-                        {
-                            max_compute_perf  = compute_perf;
-                            max_perf_device   = current_device;
-                        }
-                    }
-                    else
-                    {
-                        max_compute_perf  = compute_perf;
-                        max_perf_device   = current_device;
-                    }
-                }
-            }
-        }
-
-        ++current_device;
-    }
-
-    return max_perf_device;
-}
-
-// General initialization call to pick the best CUDA Device
-inline CUdevice findCudaDeviceDRV(int argc, const char **argv)
-{
-    CUdevice cuDevice;
-    int devID = 0;
-
-    // If the command-line has a device number specified, use it
-    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
-    {
-        devID = gpuDeviceInitDRV(argc, argv);
-
-        if (devID < 0)
-        {
-            printf("exiting...\n");
-            exit(EXIT_SUCCESS);
-        }
-    }
-    else
-    {
-        // Otherwise pick the device with highest Gflops/s
-        char name[100];
-        devID = gpuGetMaxGflopsDeviceIdDRV();
-        checkCudaErrors(cuDeviceGet(&cuDevice, devID));
-        cuDeviceGetName(name, 100, cuDevice);
-        printf("> Using CUDA Device [%d]: %s\n", devID, name);
-    }
-
-    cuDeviceGet(&cuDevice, devID);
-
-    return cuDevice;
-}
-
-// This function will pick the best CUDA device available with OpenGL interop
-inline CUdevice findCudaGLDeviceDRV(int argc, const char **argv)
-{
-    CUdevice cuDevice;
-    int devID = 0;
-
-    // If the command-line has a device number specified, use it
-    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
-    {
-        devID = gpuDeviceInitDRV(argc, (const char **)argv);
-
-        if (devID < 0)
-        {
-            printf("no CUDA capable devices found, exiting...\n");
-            exit(EXIT_SUCCESS);
-        }
-    }
-    else
-    {
-        char name[100];
-        // Otherwise pick the device with highest Gflops/s
-        devID = gpuGetMaxGflopsGLDeviceIdDRV();
-        checkCudaErrors(cuDeviceGet(&cuDevice, devID));
-        cuDeviceGetName(name, 100, cuDevice);
-        printf("> Using CUDA/GL Device [%d]: %s\n", devID, name);
-    }
-
-    return devID;
-}
-
-// General check for CUDA GPU SM Capabilities
-inline bool checkCudaCapabilitiesDRV(int major_version, int minor_version, int devID)
-{
-    CUdevice cuDevice;
-    char name[256];
-    int major = 0, minor = 0;
-
-    checkCudaErrors(cuDeviceGet(&cuDevice, devID));
-    checkCudaErrors(cuDeviceGetName(name, 100, cuDevice));
-    checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID));
-
-    if ((major > major_version) ||
-        (major == major_version && minor >= minor_version))
-    {
-        printf("> Device %d: <%16s >, Compute SM %d.%d detected\n", devID, name, major, minor);
-        return true;
-    }
-    else
-    {
-        printf("No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
-        return false;
-    }
-}
-#endif
-
-// end of CUDA Helper Functions
-
-#endif
--- a/src/algorithms/libs/cudahelpers/helper_cuda_gl.h
+++ b/src/algorithms/libs/cudahelpers/helper_cuda_gl.h
@@ -1,165 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-#ifndef HELPER_CUDA_GL_H
-#define HELPER_CUDA_GL_H
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-// includes, graphics
-#if defined (__APPLE__) || defined(MACOSX)
-#include <OpenGL/gl.h>
-#include <OpenGL/glu.h>
-#else
-#include <GL/gl.h>
-#include <GL/glu.h>
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-#ifndef DEVICE_RESET
-#define DEVICE_RESET cudaDeviceReset()
-#endif
-#else
-#ifndef DEVICE_RESET
-#define DEVICE_RESET
-#endif
-#endif
-
-#ifdef __CUDA_GL_INTEROP_H__
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA OpenGL Helper functions
-
-inline int gpuGLDeviceInit(int ARGC, const char **ARGV)
-{
-    int deviceCount;
-    checkCudaErrors(cudaGetDeviceCount(&deviceCount));
-
-    if (deviceCount == 0)
-    {
-        fprintf(stderr, "CUDA error: no devices supporting CUDA.\n");
-        exit(EXIT_FAILURE);
-    }
-
-    int dev = 0;
-    dev = getCmdLineArgumentInt(ARGC, ARGV, "device=");
-
-    if (dev < 0)
-    {
-        dev = 0;
-    }
-
-    if (dev > deviceCount-1)
-    {
-        fprintf(stderr, "\n");
-        fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", deviceCount);
-        fprintf(stderr, ">> gpuGLDeviceInit (-device=%d) is not a valid GPU device. <<\n", dev);
-        fprintf(stderr, "\n");
-        return -dev;
-    }
-
-    cudaDeviceProp deviceProp;
-    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
-
-    if (deviceProp.computeMode == cudaComputeModeProhibited)
-    {
-        fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
-        return -1;
-    }
-
-    if (deviceProp.major < 1)
-    {
-        fprintf(stderr, "Error: device does not support CUDA.\n");
-        exit(EXIT_FAILURE);
-    }
-
-    if (checkCmdLineFlag(ARGC, ARGV, "quiet") == false)
-    {
-        fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name);
-    }
-
-    checkCudaErrors(cudaGLSetGLDevice(dev));
-    return dev;
-}
-
-// This function will pick the best CUDA device available with OpenGL interop
-inline int findCudaGLDevice(int argc, const char **argv)
-{
-    int devID = 0;
-
-    // If the command-line has a device number specified, use it
-    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
-    {
-        devID = gpuGLDeviceInit(argc, (const char **)argv);
-
-        if (devID < 0)
-        {
-            printf("no CUDA capable devices found, exiting...\n");
-            DEVICE_RESET
-            exit(EXIT_SUCCESS);
-        }
-    }
-    else
-    {
-        // Otherwise pick the device with highest Gflops/s
-        devID = gpuGetMaxGflopsDeviceId();
-        cudaGLSetGLDevice(devID);
-    }
-
-    return devID;
-}
-
-////////////////////////////////////////////////////////////////////////////
-//! Check for OpenGL error
-//! @return bool if no GL error has been encountered, otherwise 0
-//! @param file  __FILE__ macro
-//! @param line  __LINE__ macro
-//! @note The GL error is listed on stderr
-//! @note This function should be used via the CHECK_ERROR_GL() macro
-////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkCheckErrorGL(const char *file, const int line)
-{
-    bool ret_val = true;
-
-    // check for error
-    GLenum gl_error = glGetError();
-
-    if (gl_error != GL_NO_ERROR)
-    {
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        char tmpStr[512];
-        // NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line
-        // when the user double clicks on the error line in the Output pane. Like any compile error.
-        sprintf_s(tmpStr, 255, "\n%s(%i) : GL Error : %s\n\n", file, line, gluErrorString(gl_error));
-        fprintf(stderr, "%s", tmpStr);
-#endif
-        fprintf(stderr, "GL Error in file '%s' in line %d :\n", file, line);
-        fprintf(stderr, "%s\n", gluErrorString(gl_error));
-        ret_val = false;
-    }
-
-    return ret_val;
-}
-
-#define SDK_CHECK_ERROR_GL()                                              \
-    if( false == sdkCheckErrorGL( __FILE__, __LINE__)) {                  \
-        DEVICE_RESET                                                      \
-        exit(EXIT_FAILURE);                                               \
-    }
-#endif
-
-#endif
--- a/src/algorithms/libs/cudahelpers/helper_functions.h
+++ b/src/algorithms/libs/cudahelpers/helper_functions.h
@@ -1,42 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (string parsing, timers, image helpers, etc)
-#ifndef HELPER_FUNCTIONS_H
-#define HELPER_FUNCTIONS_H
-
-#ifdef WIN32
-#pragma warning(disable:4996)
-#endif
-
-// includes, project
-#include <stdio.h>
-#include <stdlib.h>
-#include <string>
-#include <assert.h>
-#include <exception.h>
-#include <math.h>
-
-#include <fstream>
-#include <vector>
-#include <iostream>
-#include <algorithm>
-
-// includes, timer, string parsing, image helpers
-#include <helper_timer.h>   // helper functions for timers
-#include <helper_string.h>  // helper functions for string parsing
-#include <helper_image.h>   // helper functions for image compare, dump, data comparisons
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#endif //  HELPER_FUNCTIONS_H
--- a/src/algorithms/libs/cudahelpers/helper_image.h
+++ b/src/algorithms/libs/cudahelpers/helper_image.h
--- a/src/algorithms/libs/cudahelpers/helper_math.h
+++ b/src/algorithms/libs/cudahelpers/helper_math.h
--- a/src/algorithms/libs/cudahelpers/helper_string.h
+++ b/src/algorithms/libs/cudahelpers/helper_string.h
@@ -1,516 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (string parsing, timers, etc)
-#ifndef STRING_HELPER_H
-#define STRING_HELPER_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <string>
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#ifndef _CRT_SECURE_NO_DEPRECATE
-#define _CRT_SECURE_NO_DEPRECATE
-#endif
-#ifndef STRCASECMP
-#define STRCASECMP  _stricmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP _strnicmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result != 0)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf_s
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf_s
-#endif
-#else // Linux Includes
-#include <string.h>
-#include <strings.h>
-
-#ifndef STRCASECMP
-#define STRCASECMP  strcasecmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP strncasecmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result == NULL)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf
-#endif
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// CUDA Utility Helper Functions
-inline int stringRemoveDelimiter(char delimiter, const char *string)
-{
-    int string_start = 0;
-
-    while (string[string_start] == delimiter)
-    {
-        string_start++;
-    }
-
-    if (string_start >= (int)strlen(string)-1)
-    {
-        return 0;
-    }
-
-    return string_start;
-}
-
-inline int getFileExtension(char *filename, char **extension)
-{
-    int string_length = (int)strlen(filename);
-
-    while (filename[string_length--] != '.')
-    {
-        if (string_length == 0)
-            break;
-    }
-
-    if (string_length > 0) string_length += 2;
-
-    if (string_length == 0)
-        *extension = NULL;
-    else
-        *extension = &filename[string_length];
-
-    return string_length;
-}
-
-
-inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-
-            const char *equal_pos = strchr(string_argv, '=');
-            int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
-
-            int length = (int)strlen(string_ref);
-
-            if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
-            {
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    return bFound;
-}
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    *value = (T)atoi(&string_argv[length + auto_inc]);
-                }
-
-                bFound = true;
-                i=argc;
-            }
-        }
-    }
-
-    return bFound;
-}
-
-inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-    int value = -1;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    value = atoi(&string_argv[length + auto_inc]);
-                }
-                else
-                {
-                    value = 0;
-                }
-
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (bFound)
-    {
-        return value;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-    float value = -1;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    value = (float)atof(&string_argv[length + auto_inc]);
-                }
-                else
-                {
-                    value = 0.f;
-                }
-
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (bFound)
-    {
-        return value;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-inline bool getCmdLineArgumentString(const int argc, const char **argv,
-                                     const char *string_ref, char **string_retval)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            char *string_argv = (char *)&argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                *string_retval = &string_argv[length+1];
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (!bFound)
-    {
-        *string_retval = NULL;
-    }
-
-    return bFound;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//! Find the path for a file assuming that
-//! files are found in the searchPath.
-//!
-//! @return the path if succeeded, otherwise 0
-//! @param filename         name of the file
-//! @param executable_path  optional absolute path of the executable
-//////////////////////////////////////////////////////////////////////////////
-inline char *sdkFindFilePath(const char *filename, const char *executable_path)
-{
-    // <executable_name> defines a variable that is replaced with the name of the executable
-
-    // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
-    // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
-    const char *searchPath[] =
-    {
-        "./",                                       // same dir
-        "./common/",                                // "/common/" subdir
-        "./common/data/",                           // "/common/data/" subdir
-        "./data/",                                  // "/data/" subdir
-        "./src/",                                   // "/src/" subdir
-        "./src/<executable_name>/data/",            // "/src/<executable_name>/data/" subdir
-        "./inc/",                                   // "/inc/" subdir
-        "./0_Simple/",                              // "/0_Simple/" subdir
-        "./1_Utilities/",                           // "/1_Utilities/" subdir
-        "./2_Graphics/",                            // "/2_Graphics/" subdir
-        "./3_Imaging/",                             // "/3_Imaging/" subdir
-        "./4_Finance/",                             // "/4_Finance/" subdir
-        "./5_Simulations/",                         // "/5_Simulations/" subdir
-        "./6_Advanced/",                            // "/6_Advanced/" subdir
-        "./7_CUDALibraries/",                       // "/7_CUDALibraries/" subdir
-        "./8_Android/",                             // "/8_Android/" subdir
-        "./samples/",                               // "/samples/" subdir
-
-        "../",                                      // up 1 in tree
-        "../common/",                               // up 1 in tree, "/common/" subdir
-        "../common/data/",                          // up 1 in tree, "/common/data/" subdir
-        "../data/",                                 // up 1 in tree, "/data/" subdir
-        "../src/",                                  // up 1 in tree, "/src/" subdir
-        "../inc/",                                  // up 1 in tree, "/inc/" subdir
-
-        "../0_Simple/<executable_name>/data/",       // up 1 in tree, "/0_Simple/<executable_name>/" subdir
-        "../1_Utilities/<executable_name>/data/",    // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../2_Graphics/<executable_name>/data/",     // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../3_Imaging/<executable_name>/data/",      // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../4_Finance/<executable_name>/data/",      // up 1 in tree, "/4_Finance/<executable_name>/" subdir
-        "../5_Simulations/<executable_name>/data/",  // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../6_Advanced/<executable_name>/data/",     // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../8_Android/<executable_name>/data/",      // up 1 in tree, "/8_Android/<executable_name>/" subdir
-        "../samples/<executable_name>/data/",        // up 1 in tree, "/samples/<executable_name>/" subdir
-        "../../",                                        // up 2 in tree
-        "../../common/",                                 // up 2 in tree, "/common/" subdir
-        "../../common/data/",                            // up 2 in tree, "/common/data/" subdir
-        "../../data/",                                   // up 2 in tree, "/data/" subdir
-        "../../src/",                                    // up 2 in tree, "/src/" subdir
-        "../../inc/",                                    // up 2 in tree, "/inc/" subdir
-        "../../sandbox/<executable_name>/data/",         // up 2 in tree, "/sandbox/<executable_name>/" subdir
-        "../../0_Simple/<executable_name>/data/",        // up 2 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../1_Utilities/<executable_name>/data/",     // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../2_Graphics/<executable_name>/data/",      // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../3_Imaging/<executable_name>/data/",       // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../4_Finance/<executable_name>/data/",       // up 2 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../5_Simulations/<executable_name>/data/",   // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../6_Advanced/<executable_name>/data/",      // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../8_Android/<executable_name>/data/",       // up 2 in tree, "/8_Android/<executable_name>/" subdir
-        "../../samples/<executable_name>/data/",         // up 2 in tree, "/samples/<executable_name>/" subdir
-        "../../../",                                        // up 3 in tree
-        "../../../src/<executable_name>/",                  // up 3 in tree, "/src/<executable_name>/" subdir
-        "../../../src/<executable_name>/data/",             // up 3 in tree, "/src/<executable_name>/data/" subdir
-        "../../../src/<executable_name>/src/",              // up 3 in tree, "/src/<executable_name>/src/" subdir
-        "../../../src/<executable_name>/inc/",              // up 3 in tree, "/src/<executable_name>/inc/" subdir
-        "../../../sandbox/<executable_name>/",              // up 3 in tree, "/sandbox/<executable_name>/" subdir
-        "../../../sandbox/<executable_name>/data/",         // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
-        "../../../sandbox/<executable_name>/src/",          // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
-        "../../../sandbox/<executable_name>/inc/",          // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
-        "../../../0_Simple/<executable_name>/data/",        // up 3 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../1_Utilities/<executable_name>/data/",     // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../2_Graphics/<executable_name>/data/",      // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../3_Imaging/<executable_name>/data/",       // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../4_Finance/<executable_name>/data/",       // up 3 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../5_Simulations/<executable_name>/data/",   // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../6_Advanced/<executable_name>/data/",      // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../8_Android/<executable_name>/data/",       // up 3 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../0_Simple/<executable_name>/",        // up 3 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../1_Utilities/<executable_name>/",     // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../2_Graphics/<executable_name>/",      // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../3_Imaging/<executable_name>/",       // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../4_Finance/<executable_name>/",       // up 3 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../5_Simulations/<executable_name>/",   // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../6_Advanced/<executable_name>/",      // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../8_Android/<executable_name>/",       // up 3 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../samples/<executable_name>/data/",         // up 3 in tree, "/samples/<executable_name>/" subdir
-        "../../../common/",                                 // up 3 in tree, "../../../common/" subdir
-        "../../../common/data/",                            // up 3 in tree, "../../../common/data/" subdir
-        "../../../data/",                                   // up 3 in tree, "../../../data/" subdir
-        "../../../../",                                // up 4 in tree
-        "../../../../src/<executable_name>/",          // up 4 in tree, "/src/<executable_name>/" subdir
-        "../../../../src/<executable_name>/data/",     // up 4 in tree, "/src/<executable_name>/data/" subdir
-        "../../../../src/<executable_name>/src/",      // up 4 in tree, "/src/<executable_name>/src/" subdir
-        "../../../../src/<executable_name>/inc/",      // up 4 in tree, "/src/<executable_name>/inc/" subdir
-        "../../../../sandbox/<executable_name>/",      // up 4 in tree, "/sandbox/<executable_name>/" subdir
-        "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
-        "../../../../sandbox/<executable_name>/src/",  // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
-        "../../../../sandbox/<executable_name>/inc/",   // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
-        "../../../../0_Simple/<executable_name>/data/",     // up 4 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../../1_Utilities/<executable_name>/data/",  // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../../2_Graphics/<executable_name>/data/",   // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../../3_Imaging/<executable_name>/data/",    // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../../4_Finance/<executable_name>/data/",    // up 4 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../../6_Advanced/<executable_name>/data/",   // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../../8_Android/<executable_name>/data/",    // up 4 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../../0_Simple/<executable_name>/",     // up 4 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../../1_Utilities/<executable_name>/",  // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../../2_Graphics/<executable_name>/",   // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../../3_Imaging/<executable_name>/",    // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../../4_Finance/<executable_name>/",    // up 4 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../../6_Advanced/<executable_name>/",   // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../../8_Android/<executable_name>/",    // up 4 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../../samples/<executable_name>/data/",      // up 4 in tree, "/samples/<executable_name>/" subdir
-        "../../../../common/",                              // up 4 in tree, "../../../common/" subdir
-        "../../../../common/data/",                         // up 4 in tree, "../../../common/data/" subdir
-        "../../../../data/",                                // up 4 in tree, "../../../data/" subdir
-        "../../../../../",                                // up 5 in tree
-        "../../../../../src/<executable_name>/",          // up 5 in tree, "/src/<executable_name>/" subdir
-        "../../../../../src/<executable_name>/data/",     // up 5 in tree, "/src/<executable_name>/data/" subdir
-        "../../../../../src/<executable_name>/src/",      // up 5 in tree, "/src/<executable_name>/src/" subdir
-        "../../../../../src/<executable_name>/inc/",      // up 5 in tree, "/src/<executable_name>/inc/" subdir
-        "../../../../../sandbox/<executable_name>/",      // up 5 in tree, "/sandbox/<executable_name>/" subdir
-        "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
-        "../../../../../sandbox/<executable_name>/src/",  // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
-        "../../../../../sandbox/<executable_name>/inc/",   // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
-        "../../../../../0_Simple/<executable_name>/data/",     // up 5 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../../../1_Utilities/<executable_name>/data/",  // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../../../2_Graphics/<executable_name>/data/",   // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../../../3_Imaging/<executable_name>/data/",    // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../../../4_Finance/<executable_name>/data/",    // up 5 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../../../6_Advanced/<executable_name>/data/",   // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../../../8_Android/<executable_name>/data/",    // up 5 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../../../samples/<executable_name>/data/",      // up 5 in tree, "/samples/<executable_name>/" subdir
-        "../../../../../common/",                         // up 5 in tree, "../../../common/" subdir
-        "../../../../../common/data/",                    // up 5 in tree, "../../../common/data/" subdir
-    };
-
-    // Extract the executable name
-    std::string executable_name;
-
-    if (executable_path != 0)
-    {
-        executable_name = std::string(executable_path);
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        // Windows path delimiter
-        size_t delimiter_pos = executable_name.find_last_of('\\');
-        executable_name.erase(0, delimiter_pos + 1);
-
-        if (executable_name.rfind(".exe") != std::string::npos)
-        {
-            // we strip .exe, only if the .exe is found
-            executable_name.resize(executable_name.size() - 4);
-        }
-
-#else
-        // Linux & OSX path delimiter
-        size_t delimiter_pos = executable_name.find_last_of('/');
-        executable_name.erase(0,delimiter_pos+1);
-#endif
-    }
-
-    // Loop over all search paths and return the first hit
-    for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
-    {
-        std::string path(searchPath[i]);
-        size_t executable_name_pos = path.find("<executable_name>");
-
-        // If there is executable_name variable in the searchPath
-        // replace it with the value
-        if (executable_name_pos != std::string::npos)
-        {
-            if (executable_path != 0)
-            {
-                path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
-            }
-            else
-            {
-                // Skip this path entry if no executable argument is given
-                continue;
-            }
-        }
-
-#ifdef _DEBUG
-        printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
-#endif
-
-        // Test if the file exists
-        path.append(filename);
-        FILE *fp;
-        FOPEN(fp, path.c_str(), "rb");
-
-        if (fp != NULL)
-        {
-            fclose(fp);
-            // File found
-            // returning an allocated array here for backwards compatibility reasons
-            char *file_path = (char *) malloc(path.length() + 1);
-            STRCPY(file_path, path.length() + 1, path.c_str());
-            return file_path;
-        }
-
-        if (fp)
-        {
-            fclose(fp);
-        }
-    }
-
-    // File not found
-    return 0;
-}
-
-#endif
--- a/src/algorithms/libs/cudahelpers/helper_timer.h
+++ b/src/algorithms/libs/cudahelpers/helper_timer.h
@@ -1,499 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// Helper Timing Functions
-#ifndef HELPER_TIMER_H
-#define HELPER_TIMER_H
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// includes, system
-#include <vector>
-
-// includes, project
-#include <exception.h>
-
-// Definition of the StopWatch Interface, this is used if we don't want to use the CUT functions
-// But rather in a self contained class interface
-class StopWatchInterface
-{
-    public:
-        StopWatchInterface() {};
-        virtual ~StopWatchInterface() {};
-
-    public:
-        //! Start time measurement
-        virtual void start() = 0;
-
-        //! Stop time measurement
-        virtual void stop() = 0;
-
-        //! Reset time counters to zero
-        virtual void reset() = 0;
-
-        //! Time in msec. after start. If the stop watch is still running (i.e. there
-        //! was no call to stop()) then the elapsed time is returned, otherwise the
-        //! time between the last start() and stop call is returned
-        virtual float getTime() = 0;
-
-        //! Mean time to date based on the number of times the stopwatch has been
-        //! _stopped_ (ie finished sessions) and the current total time
-        virtual float getAverageTime() = 0;
-};
-
-
-//////////////////////////////////////////////////////////////////
-// Begin Stopwatch timer class definitions for all OS platforms //
-//////////////////////////////////////////////////////////////////
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-// includes, system
-#define WINDOWS_LEAN_AND_MEAN
-#include <windows.h>
-#undef min
-#undef max
-
-//! Windows specific implementation of StopWatch
-class StopWatchWin : public StopWatchInterface
-{
-    public:
-        //! Constructor, default
-        StopWatchWin() :
-            start_time(),     end_time(),
-            diff_time(0.0f),  total_time(0.0f),
-            running(false), clock_sessions(0), freq(0), freq_set(false)
-        {
-            if (! freq_set)
-            {
-                // helper variable
-                LARGE_INTEGER temp;
-
-                // get the tick frequency from the OS
-                QueryPerformanceFrequency((LARGE_INTEGER *) &temp);
-
-                // convert to type in which it is needed
-                freq = ((double) temp.QuadPart) / 1000.0;
-
-                // rememeber query
-                freq_set = true;
-            }
-        };
-
-        // Destructor
-        ~StopWatchWin() { };
-
-    public:
-        //! Start time measurement
-        inline void start();
-
-        //! Stop time measurement
-        inline void stop();
-
-        //! Reset time counters to zero
-        inline void reset();
-
-        //! Time in msec. after start. If the stop watch is still running (i.e. there
-        //! was no call to stop()) then the elapsed time is returned, otherwise the
-        //! time between the last start() and stop call is returned
-        inline float getTime();
-
-        //! Mean time to date based on the number of times the stopwatch has been
-        //! _stopped_ (ie finished sessions) and the current total time
-        inline float getAverageTime();
-
-    private:
-        // member variables
-
-        //! Start of measurement
-        LARGE_INTEGER  start_time;
-        //! End of measurement
-        LARGE_INTEGER  end_time;
-
-        //! Time difference between the last start and stop
-        float  diff_time;
-
-        //! TOTAL time difference between starts and stops
-        float  total_time;
-
-        //! flag if the stop watch is running
-        bool running;
-
-        //! Number of times clock has been started
-        //! and stopped to allow averaging
-        int clock_sessions;
-
-        //! tick frequency
-        double  freq;
-
-        //! flag if the frequency has been set
-        bool  freq_set;
-};
-
-// functions, inlined
-
-////////////////////////////////////////////////////////////////////////////////
-//! Start time measurement
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchWin::start()
-{
-    QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
-    running = true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Stop time measurement and increment add to the current diff_time summation
-//! variable. Also increment the number of times this clock has been run.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchWin::stop()
-{
-    QueryPerformanceCounter((LARGE_INTEGER *) &end_time);
-    diff_time = (float)
-                (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
-
-    total_time += diff_time;
-    clock_sessions++;
-    running = false;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Reset the timer to 0. Does not change the timer running state but does
-//! recapture this point in time as the current start time if it is running.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchWin::reset()
-{
-    diff_time = 0;
-    total_time = 0;
-    clock_sessions = 0;
-
-    if (running)
-    {
-        QueryPerformanceCounter((LARGE_INTEGER *) &start_time);
-    }
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. after start. If the stop watch is still running (i.e. there
-//! was no call to stop()) then the elapsed time is returned added to the
-//! current diff_time sum, otherwise the current summed time difference alone
-//! is returned.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchWin::getTime()
-{
-    // Return the TOTAL time to date
-    float retval = total_time;
-
-    if (running)
-    {
-        LARGE_INTEGER temp;
-        QueryPerformanceCounter((LARGE_INTEGER *) &temp);
-        retval += (float)
-                  (((double)(temp.QuadPart - start_time.QuadPart)) / freq);
-    }
-
-    return retval;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. for a single run based on the total number of COMPLETED runs
-//! and the total time.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchWin::getAverageTime()
-{
-    return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
-}
-#else
-// Declarations for Stopwatch on Linux and Mac OSX
-// includes, system
-#include <ctime>
-#include <sys/time.h>
-
-//! Windows specific implementation of StopWatch
-class StopWatchLinux : public StopWatchInterface
-{
-    public:
-        //! Constructor, default
-        StopWatchLinux() :
-            start_time(), diff_time(0.0), total_time(0.0),
-            running(false), clock_sessions(0)
-        { };
-
-        // Destructor
-        virtual ~StopWatchLinux()
-        { };
-
-    public:
-        //! Start time measurement
-        inline void start();
-
-        //! Stop time measurement
-        inline void stop();
-
-        //! Reset time counters to zero
-        inline void reset();
-
-        //! Time in msec. after start. If the stop watch is still running (i.e. there
-        //! was no call to stop()) then the elapsed time is returned, otherwise the
-        //! time between the last start() and stop call is returned
-        inline float getTime();
-
-        //! Mean time to date based on the number of times the stopwatch has been
-        //! _stopped_ (ie finished sessions) and the current total time
-        inline float getAverageTime();
-
-    private:
-
-        // helper functions
-
-        //! Get difference between start time and current time
-        inline float getDiffTime();
-
-    private:
-
-        // member variables
-
-        //! Start of measurement
-        struct timeval  start_time;
-
-        //! Time difference between the last start and stop
-        float  diff_time;
-
-        //! TOTAL time difference between starts and stops
-        float  total_time;
-
-        //! flag if the stop watch is running
-        bool running;
-
-        //! Number of times clock has been started
-        //! and stopped to allow averaging
-        int clock_sessions;
-};
-
-// functions, inlined
-
-////////////////////////////////////////////////////////////////////////////////
-//! Start time measurement
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchLinux::start()
-{
-    gettimeofday(&start_time, 0);
-    running = true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Stop time measurement and increment add to the current diff_time summation
-//! variable. Also increment the number of times this clock has been run.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchLinux::stop()
-{
-    diff_time = getDiffTime();
-    total_time += diff_time;
-    running = false;
-    clock_sessions++;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Reset the timer to 0. Does not change the timer running state but does
-//! recapture this point in time as the current start time if it is running.
-////////////////////////////////////////////////////////////////////////////////
-inline void
-StopWatchLinux::reset()
-{
-    diff_time = 0;
-    total_time = 0;
-    clock_sessions = 0;
-
-    if (running)
-    {
-        gettimeofday(&start_time, 0);
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. after start. If the stop watch is still running (i.e. there
-//! was no call to stop()) then the elapsed time is returned added to the
-//! current diff_time sum, otherwise the current summed time difference alone
-//! is returned.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchLinux::getTime()
-{
-    // Return the TOTAL time to date
-    float retval = total_time;
-
-    if (running)
-    {
-        retval += getDiffTime();
-    }
-
-    return retval;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Time in msec. for a single run based on the total number of COMPLETED runs
-//! and the total time.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchLinux::getAverageTime()
-{
-    return (clock_sessions > 0) ? (total_time/clock_sessions) : 0.0f;
-}
-////////////////////////////////////////////////////////////////////////////////
-
-////////////////////////////////////////////////////////////////////////////////
-inline float
-StopWatchLinux::getDiffTime()
-{
-    struct timeval t_time;
-    gettimeofday(&t_time, 0);
-
-    // time difference in milli-seconds
-    return (float)(1000.0 * (t_time.tv_sec - start_time.tv_sec)
-                   + (0.001 * (t_time.tv_usec - start_time.tv_usec)));
-}
-#endif // WIN32
-
-////////////////////////////////////////////////////////////////////////////////
-//! Timer functionality exported
-
-////////////////////////////////////////////////////////////////////////////////
-//! Create a new timer
-//! @return true if a time has been created, otherwise false
-//! @param  name of the new timer, 0 if the creation failed
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkCreateTimer(StopWatchInterface **timer_interface)
-{
-    //printf("sdkCreateTimer called object %08x\n", (void *)*timer_interface);
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-    *timer_interface = (StopWatchInterface *)new StopWatchWin();
-#else
-    *timer_interface = (StopWatchInterface *)new StopWatchLinux();
-#endif
-    return (*timer_interface != NULL) ? true : false;
-}
-
-
-////////////////////////////////////////////////////////////////////////////////
-//! Delete a timer
-//! @return true if a time has been deleted, otherwise false
-//! @param  name of the timer to delete
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkDeleteTimer(StopWatchInterface **timer_interface)
-{
-    //printf("sdkDeleteTimer called object %08x\n", (void *)*timer_interface);
-    if (*timer_interface)
-    {
-        delete *timer_interface;
-        *timer_interface = NULL;
-    }
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Start the time with name \a name
-//! @param name  name of the timer to start
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkStartTimer(StopWatchInterface **timer_interface)
-{
-    //printf("sdkStartTimer called object %08x\n", (void *)*timer_interface);
-    if (*timer_interface)
-    {
-        (*timer_interface)->start();
-    }
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Stop the time with name \a name. Does not reset.
-//! @param name  name of the timer to stop
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkStopTimer(StopWatchInterface **timer_interface)
-{
-    // printf("sdkStopTimer called object %08x\n", (void *)*timer_interface);
-    if (*timer_interface)
-    {
-        (*timer_interface)->stop();
-    }
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Resets the timer's counter.
-//! @param name  name of the timer to reset.
-////////////////////////////////////////////////////////////////////////////////
-inline bool
-sdkResetTimer(StopWatchInterface **timer_interface)
-{
-    // printf("sdkResetTimer called object %08x\n", (void *)*timer_interface);
-    if (*timer_interface)
-    {
-        (*timer_interface)->reset();
-    }
-
-    return true;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Return the average time for timer execution as the total time
-//! for the timer dividied by the number of completed (stopped) runs the timer
-//! has made.
-//! Excludes the current running time if the timer is currently running.
-//! @param name  name of the timer to return the time of
-////////////////////////////////////////////////////////////////////////////////
-inline float
-sdkGetAverageTimerValue(StopWatchInterface **timer_interface)
-{
-    //  printf("sdkGetAverageTimerValue called object %08x\n", (void *)*timer_interface);
-    if (*timer_interface)
-    {
-        return (*timer_interface)->getAverageTime();
-    }
-    else
-    {
-        return 0.0f;
-    }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! Total execution time for the timer over all runs since the last reset
-//! or timer creation.
-//! @param name  name of the timer to obtain the value of.
-////////////////////////////////////////////////////////////////////////////////
-inline float
-sdkGetTimerValue(StopWatchInterface **timer_interface)
-{
-    // printf("sdkGetTimerValue called object %08x\n", (void *)*timer_interface);
-    if (*timer_interface)
-    {
-        return (*timer_interface)->getTime();
-    }
-    else
-    {
-        return 0.0f;
-    }
-}
-
-#endif // HELPER_TIMER_H
--- a/src/algorithms/tracking/gnuradio_blocks/CMakeLists.txt
+++ b/src/algorithms/tracking/gnuradio_blocks/CMakeLists.txt
@@ -19,9 +19,7 @@

 if(ENABLE_CUDA)
     set(OPT_TRACKING_BLOCKS ${OPT_TRACKING_BLOCKS} gps_l1_ca_dll_pll_tracking_gpu_cc.cc)
-     set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES}  
-                               ${CUDA_INCLUDE_DIRS}
-                               ${CMAKE_SOURCE_DIR}/src/algorithms/libs/cudahelpers)
+     set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS})
     set(OPT_TRACKING_LIBRARIES ${OPT_TRACKING_LIBRARIES} ${CUDA_LIBRARIES}) 
 endif(ENABLE_CUDA)

--- a/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
+++ b/src/algorithms/tracking/gnuradio_blocks/gps_l1_ca_dll_pll_tracking_gpu_cc.cc
@@ -47,11 +47,9 @@
 #include "lock_detectors.h"
 #include "GPS_L1_CA.h"
 #include "control_message_factory.h"
-#include <volk/volk.h> //volk_alignement
-// includes
+#include <volk/volk.h> // volk_alignment
 #include <cuda_profiler_api.h>
-#include <helper_functions.h>  // helper for shared functions common to CUDA Samples
-#include <helper_cuda.h>       // helper functions for CUDA error checking and initialization
+

 /*!
 * \todo Include in definition header file
@@ -131,24 +129,24 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
    multicorrelator_gpu->init_cuda_integrated_resampler(0, NULL, 2 * d_vector_length , GPS_L1_CA_CODE_LENGTH_CHIPS , N_CORRELATORS);

    // Get space for the resampled early / prompt / late local replicas
-    checkCudaErrors(cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float),  cudaHostAllocMapped ));
-
+    cudaHostAlloc((void**)&d_local_code_shift_chips, N_CORRELATORS * sizeof(float),  cudaHostAllocMapped );

    //allocate host memory
    //pinned memory mode - use special function to get OS-pinned memory
-    checkCudaErrors(cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length  * sizeof(gr_complex),  cudaHostAllocMapped ));
+    cudaHostAlloc((void**)&in_gpu, 2 * d_vector_length  * sizeof(gr_complex),  cudaHostAllocMapped );

    //old local codes vector
-    //checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));
+    // (cudaHostAlloc((void**)&d_local_codes_gpu, (V_LEN * sizeof(gr_complex))*N_CORRELATORS, cudaHostAllocWriteCombined ));

    //new integrated shifts
-    //checkCudaErrors(cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));
+    // (cudaHostAlloc((void**)&d_local_codes_gpu, (2 * d_vector_length * sizeof(gr_complex)), cudaHostAllocWriteCombined ));

    // correlator outputs (scalar)
-    checkCudaErrors(cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS,  cudaHostAllocWriteCombined ));
+    cudaHostAlloc((void**)&d_corr_outs_gpu ,sizeof(gr_complex)*N_CORRELATORS,  cudaHostAllocWriteCombined );
+
    //map to EPL pointers
    d_Early = &d_corr_outs_gpu[0];
-    d_Prompt =  &d_corr_outs_gpu[1];
+    d_Prompt = &d_corr_outs_gpu[1];
    d_Late = &d_corr_outs_gpu[2];

    //--- Perform initializations ------------------------------
@@ -181,7 +179,6 @@ Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc(
    systemName["G"] = std::string("GPS");
    systemName["S"] = std::string("SBAS");

-
    set_relative_rate(1.0/((double)d_vector_length*2));

    d_channel_internal_queue = 0;
@@ -303,10 +300,10 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
        gr_vector_const_void_star &input_items, gr_vector_void_star &output_items)
 {
    // process vars
-    float carr_error_hz=0.0;
-    float carr_error_filt_hz=0.0;
-    float code_error_chips=0.0;
-    float code_error_filt_chips=0.0;
+    float carr_error_hz = 0.0;
+    float carr_error_filt_hz = 0.0;
+    float code_error_chips = 0.0;
+    float code_error_filt_chips = 0.0;

    // Block input data and block output stream pointers
    const gr_complex* in = (gr_complex*) input_items[0];
@@ -339,20 +336,20 @@ int Gps_L1_Ca_Dll_Pll_Tracking_GPU_cc::general_work (int noutput_items, gr_vecto
            // UPDATE NCO COMMAND
            float phase_step_rad = static_cast<float>(GPS_TWO_PI) * d_carrier_doppler_hz / static_cast<float>(d_fs_in);

-            //code resampler on GPU (new)
+        	//code resampler on GPU (new)
            float code_phase_step_chips = static_cast<float>(d_code_freq_chips) / static_cast<float>(d_fs_in);
            float rem_code_phase_chips = d_rem_code_phase_samples * (d_code_freq_chips / d_fs_in);

            cudaProfilerStart();
            multicorrelator_gpu->Carrier_wipeoff_multicorrelator_resampler_cuda(
-    				d_corr_outs_gpu,
-    				in,
-    				d_rem_carr_phase_rad,
-    				phase_step_rad,
-    				code_phase_step_chips,
-    				rem_code_phase_chips,
-    				d_current_prn_length_samples,
-    				3);
+                    d_corr_outs_gpu,
+                    in,
+                    d_rem_carr_phase_rad,
+                    phase_step_rad,
+                    code_phase_step_chips,
+                    rem_code_phase_chips,
+                    d_current_prn_length_samples,
+                    3);
            cudaProfilerStop();

            // ################## PLL ##########################################################
--- a/src/algorithms/tracking/libs/CMakeLists.txt
+++ b/src/algorithms/tracking/libs/CMakeLists.txt
@@ -22,15 +22,11 @@ if(ENABLE_CUDA)
    # set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --gpu-architecture sm_30)
    list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_30,code=sm_30; -std=c++11;-O3; -use_fast_math -default-stream per-thread")
    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
-    CUDA_INCLUDE_DIRECTORIES(
-        ${CMAKE_CURRENT_SOURCE_DIR}
-        ${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cudahelpers
-        )
-
+    CUDA_INCLUDE_DIRECTORIES( ${CMAKE_CURRENT_SOURCE_DIR})
    set(LIB_TYPE STATIC) #set the lib type
    CUDA_ADD_LIBRARY(CUDA_CORRELATOR_LIB ${LIB_TYPE} cuda_multicorrelator.h cuda_multicorrelator.cu)
    set(OPT_TRACKING_LIBRARIES ${OPT_TRACKING_LIBRARIES} CUDA_CORRELATOR_LIB)
-    set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS} ${CMAKE_CURRENT_SOURCE_DIR}/../../libs/cudahelpers)
+    set(OPT_TRACKING_INCLUDES ${OPT_TRACKING_INCLUDES} ${CUDA_INCLUDE_DIRS} )
 endif(ENABLE_CUDA)


--- a/src/algorithms/tracking/libs/cuda_multicorrelator.cu
+++ b/src/algorithms/tracking/libs/cuda_multicorrelator.cu
@@ -49,9 +49,6 @@
 // For the CUDA runtime routines (prefixed with "cuda_")
 #include <cuda_runtime.h>

-// helper functions and utilities to work with CUDA
-#include <helper_cuda.h>
-#include <helper_functions.h>

 #define ACCUM_N 256

@@ -224,7 +221,6 @@ __global__ void scalarProdGPUCPXxN(
        //int vectorBase = IMUL(elementN, vec);
        //int vectorEnd  = vectorBase + elementN;

-
        ////////////////////////////////////////////////////////////////////////
        // Each accumulator cycles through vectors with
        // stride equal to number of total number of accumulators ACCUM_N
@@ -392,28 +388,28 @@ bool cuda_multicorrelator::init_cuda(const int argc, const char **argv, int sign
 //    	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
 //    }

-	//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
+	// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));


    // ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors

    size_t size = signal_length_samples * sizeof(GPU_Complex);

-	checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
-	//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
-	checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
+	cudaMalloc((void **)&d_sig_in, size);
+	// (cudaMalloc((void **)&d_nco_in, size));
+	cudaMalloc((void **)&d_sig_doppler_wiped, size);

 	// old version: all local codes are independent vectors
-	//checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size*n_correlators));
+	// (cudaMalloc((void **)&d_local_codes_in, size*n_correlators));

 	// new version: only one vector with extra samples to shift the local code for the correlator set
 	// Required: The last correlator tap in d_shifts_samples has the largest sample shift
    size_t size_local_code_bytes = local_codes_length_samples * sizeof(GPU_Complex);
-	checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes));
-	checkCudaErrors(cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators));
+	cudaMalloc((void **)&d_local_codes_in, size_local_code_bytes);
+	cudaMalloc((void **)&d_shifts_samples, sizeof(int)*n_correlators);

 	//scalars
-	checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
+	cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);

    // Launch the Vector Add CUDA Kernel
 	threadsPerBlock = 256;
@@ -481,30 +477,30 @@ bool cuda_multicorrelator::init_cuda_integrated_resampler(
 //    	    printf("multiProcessorCount= %i \n",prop.multiProcessorCount);
 //    }

-	//checkCudaErrors(cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));
+	// (cudaFuncSetCacheConfig(CUDA_32fc_x2_multiply_x2_dot_prod_32fc_, cudaFuncCachePreferShared));

    // ALLOCATE GPU MEMORY FOR INPUT/OUTPUT and INTERNAL vectors

    size_t size = signal_length_samples * sizeof(GPU_Complex);

-	checkCudaErrors(cudaMalloc((void **)&d_sig_in, size));
-	checkCudaErrors(cudaMemset(d_sig_in,0,size));
+	cudaMalloc((void **)&d_sig_in, size);
+	cudaMemset(d_sig_in,0,size);

-	//checkCudaErrors(cudaMalloc((void **)&d_nco_in, size));
-	checkCudaErrors(cudaMalloc((void **)&d_sig_doppler_wiped, size));
-	checkCudaErrors(cudaMemset(d_sig_doppler_wiped,0,size));
+	// (cudaMalloc((void **)&d_nco_in, size));
+	cudaMalloc((void **)&d_sig_doppler_wiped, size);
+	cudaMemset(d_sig_doppler_wiped,0,size);

-	checkCudaErrors(cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips));
-	checkCudaErrors(cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips));
+	cudaMalloc((void **)&d_local_codes_in, sizeof(std::complex<float>)*code_length_chips);
+	cudaMemset(d_local_codes_in,0,sizeof(std::complex<float>)*code_length_chips);

    d_code_length_chips=code_length_chips;

-	checkCudaErrors(cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators));
-	checkCudaErrors(cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators));
+	cudaMalloc((void **)&d_shifts_chips, sizeof(float)*n_correlators);
+	cudaMemset(d_shifts_chips,0,sizeof(float)*n_correlators);

 	//scalars
-	checkCudaErrors(cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators));
-	checkCudaErrors(cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators));
+	cudaMalloc((void **)&d_corr_out, sizeof(std::complex<float>)*n_correlators);
+	cudaMemset(d_corr_out,0,sizeof(std::complex<float>)*n_correlators);

    // Launch the Vector Add CUDA Kernel
 	threadsPerBlock = 256;
@@ -523,12 +519,12 @@ bool cuda_multicorrelator::set_local_code_and_taps(
 		)
 {
    // local code CPU -> GPU copy memory
-    checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1));
+    cudaMemcpyAsync(d_local_codes_in, local_codes_in, sizeof(GPU_Complex)*code_length_chips, cudaMemcpyHostToDevice,stream1);
    d_code_length_chips=(float)code_length_chips;

    // Correlator shifts vector CPU -> GPU copy memory (fractional chip shifts are allowed!)
-    checkCudaErrors(cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
-                                    cudaMemcpyHostToDevice,stream1));
+    cudaMemcpyAsync(d_shifts_chips, shifts_chips, sizeof(float)*n_correlators,
+                                    cudaMemcpyHostToDevice,stream1);

 	return true;
 }
@@ -550,40 +546,40 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(

 	// input signal CPU -> GPU copy memory

-    checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
-                                    cudaMemcpyHostToDevice, stream1));
+    cudaMemcpyAsync(d_sig_in, sig_in, memSize,
+                                    cudaMemcpyHostToDevice, stream1);

    //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****
-    //checkCudaErrors(cudaMemcpyAsync(d_nco_in, nco_in, memSize,
+    // (cudaMemcpyAsync(d_nco_in, nco_in, memSize,
    //                                cudaMemcpyHostToDevice, stream1));


 	// old version: all local codes are independent vectors
-    //checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
+    // (cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize*n_correlators,
    //                                cudaMemcpyHostToDevice, stream2));

 	// new version: only one vector with extra samples to shift the local code for the correlator set
 	// Required: The last correlator tap in d_shifts_samples has the largest sample shift

    // local code CPU -> GPU copy memory
-    checkCudaErrors(cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
-                                    cudaMemcpyHostToDevice, stream2));
+    cudaMemcpyAsync(d_local_codes_in, local_codes_in, memSize+sizeof(std::complex<float>)*shifts_samples[n_correlators-1],
+                                    cudaMemcpyHostToDevice, stream2);
    // Correlator shifts vector CPU -> GPU copy memory
-    checkCudaErrors(cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
-                                    cudaMemcpyHostToDevice, stream2));
+    cudaMemcpyAsync(d_shifts_samples, shifts_samples, sizeof(int)*n_correlators,
+                                    cudaMemcpyHostToDevice, stream2);


    //Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
-    checkCudaErrors(cudaStreamSynchronize(stream1));
+    cudaStreamSynchronize(stream1);
    CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream1>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);


    //printf("CUDA kernel launch with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);

    //wait for Doppler wipeoff end...
-    checkCudaErrors(cudaStreamSynchronize(stream1));
-    checkCudaErrors(cudaStreamSynchronize(stream2));
-    //checkCudaErrors(cudaDeviceSynchronize());
+    cudaStreamSynchronize(stream1);
+    cudaStreamSynchronize(stream2);
+    // (cudaDeviceSynchronize());

    //old
 //    scalarProdGPUCPXxN<<<blocksPerGrid, threadsPerBlock,0 ,stream2>>>(
@@ -604,15 +600,15 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_cuda(
 			n_correlators,
 			signal_length_samples
 		);
-    checkCudaErrors(cudaGetLastError());
+    cudaGetLastError();
    //wait for correlators end...
-    checkCudaErrors(cudaStreamSynchronize(stream2));
+    cudaStreamSynchronize(stream2);
    // Copy the device result vector in device memory to the host result vector
    // in host memory.

    //scalar products (correlators outputs)
-    checkCudaErrors(cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
-            cudaMemcpyDeviceToHost));
+    cudaMemcpy(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
+            cudaMemcpyDeviceToHost);
    return true;
 }

@@ -629,19 +625,19 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(

 	size_t memSize = signal_length_samples * sizeof(std::complex<float>);
 	// input signal CPU -> GPU copy memory
-    checkCudaErrors(cudaMemcpyAsync(d_sig_in, sig_in, memSize,
-                                    cudaMemcpyHostToDevice, stream2));
+    cudaMemcpyAsync(d_sig_in, sig_in, memSize,
+                                    cudaMemcpyHostToDevice, stream2);

    //***** NOTICE: NCO is computed on-the-fly, not need to copy NCO into GPU! ****

    //Launch carrier wipe-off kernel here, while local codes are being copied to GPU!
-    checkCudaErrors(cudaStreamSynchronize(stream2));
+    cudaStreamSynchronize(stream2);

    CUDA_32fc_Doppler_wipeoff<<<blocksPerGrid, threadsPerBlock,0, stream2>>>(d_sig_doppler_wiped, d_sig_in,rem_carrier_phase_in_rad,phase_step_rad, signal_length_samples);

    //wait for Doppler wipeoff end...
-    checkCudaErrors(cudaStreamSynchronize(stream1));
-    checkCudaErrors(cudaStreamSynchronize(stream2));
+    cudaStreamSynchronize(stream1);
+    cudaStreamSynchronize(stream2);

    //launch the multitap correlator with integrated local code resampler!

@@ -657,16 +653,16 @@ bool cuda_multicorrelator::Carrier_wipeoff_multicorrelator_resampler_cuda(
 			signal_length_samples
 		);

-    checkCudaErrors(cudaGetLastError());
+    cudaGetLastError();
    //wait for correlators end...
-    checkCudaErrors(cudaStreamSynchronize(stream1));
+    cudaStreamSynchronize(stream1);
    // Copy the device result vector in device memory to the host result vector
    // in host memory.

    //scalar products (correlators outputs)
-    checkCudaErrors(cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
-            cudaMemcpyDeviceToHost,stream1));
-    checkCudaErrors(cudaStreamSynchronize(stream1));
+    cudaMemcpyAsync(corr_out, d_corr_out, sizeof(std::complex<float>)*n_correlators,
+            cudaMemcpyDeviceToHost,stream1);
+    cudaStreamSynchronize(stream1);
    return true;
 }

@@ -708,7 +704,7 @@ bool cuda_multicorrelator::free_cuda()
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
-	//checkCudaErrors(cudaDeviceReset());
+	// (cudaDeviceReset());
 	return true;
 }