//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
#define __float128 double
...
...
@@ -33,6 +34,12 @@ protected:
bioem_Probability*pProb_device;
bioem_map*pConvMap_device[2];
mycomplex_t*pRefMapsFFT;
mycomplex_t*pConvMapFFT;
mycuComplex_t*pFFTtmp2;
myfloat_t*pFFTtmp;
cufftHandleplan;
intGPUAlgo;//GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best)
intGPUAsync;//Run GPU Asynchronously, do the convolutions on the host in parallel.
intGPUWorkload;//Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel.