bioem_cuda_internal.h 1.13 KB
Newer Older
qon's avatar
qon committed
1
2
3
4
5
#ifndef BIOEM_CUDA_INTERNAL_H
#define BIOEM_CUDA_INTERNAL_H

#include <cuda.h>

6
7
8
9
10
//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
#define __float128 double
#include <fftw3.h>
#undef __float128

qon's avatar
qon committed
11
12
13
14
15
#include "bioem_cuda.h"

class bioem_cuda : public bioem
{
public:
16
17
18
19
20
	bioem_cuda();
	virtual ~bioem_cuda();

	virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map, const int startMap = 0);

qon's avatar
qon committed
21
22
23
24
25
protected:
	virtual int deviceInit();
	virtual int deviceStartRun();
	virtual int deviceFinishRun();
	int deviceExit();
26

qon's avatar
qon committed
27
	int deviceInitialized;
28

qon's avatar
qon committed
29
	cudaStream_t cudaStream;
30
	cudaEvent_t cudaEvent[2];
qon's avatar
qon committed
31
32
33
	bioem_RefMap_Mod* pRefMap_device_Mod;
	bioem_RefMap* pRefMap_device;
	bioem_Probability* pProb_device;
34
	bioem_map* pConvMap_device[2];
35

36
37
38
	int GPUAlgo;		//GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best)
	int GPUAsync;		//Run GPU Asynchronously, do the convolutions on the host in parallel.
	int GPUWorkload;	//Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel.
39

40
	int maxRef;
qon's avatar
qon committed
41
42
43
};

#endif