intGPUAlgo;//GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best)
intGPUAsync;//Run GPU Asynchronously, do the convolutions on the host in parallel.
intGPUDualStream;//Use two streams to improve paralelism
intGPUWorkload;//Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel.
intmaxRef;
intselectCudaDevice();
intdeviceInitialized;
cudaStream_tcudaStream[PIPELINE_LVL+1];// Streams are used for both
// PIPELINE and MULTISTREAM control
cudaEvent_tcudaEvent[PIPELINE_LVL+1];
cudaEvent_tcudaFFTEvent[MULTISTREAM_LVL];
bioem_RefMap*gpumap;
bioem_Probability*pProb_host;
bioem_ProbabilitypProb_device;
void*pProb_memory;
mycomplex_t*pRefMapsFFT;
mycomplex_t*pConvMapFFT;
mycomplex_t*pConvMapFFT_Host;
mycuComplex_t*pFFTtmp2[MULTISTREAM_LVL];
myfloat_t*pFFTtmp[MULTISTREAM_LVL];
cufftHandleplan[SPLIT_MAPS_LVL][MULTISTREAM_LVL];
myparam5_t*pTmp_comp_params;
myblockGPU_t*pTmp_comp_blocks;
intNcomp_blocks;
bool*initialized_const;// In order to make sure Constoadd is initialized to
// the first value
myfloat_t*sum,*sumsquare;
intGPUAsync;// Run GPU Asynchronously, do the convolutions on the host in
// parallel.
intGPUDualStream;// Use two streams to improve paralelism
intGPUWorkload;// Percentage of workload to perform on GPU. Default 100.