bioem_cuda_internal.h 2.43 KB
Newer Older
1
2
3
4
5
/* ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   < BioEM software for Bayesian inference of Electron Microscopy images>
   Copyright (C) 2016 Pilar Cossio, David Rohr, Fabio Baruffa, Markus Rampp, 
        Volker Lindenstruth and Gerhard Hummer.
   Max Planck Institute of Biophysics, Frankfurt, Germany.
Pilar Cossio's avatar
Pilar Cossio committed
6
7
   Frankfurt Institute for Advanced Studies, Goethe University Frankfurt, Germany.
   Max Planck Computing and Data Facility, Garching, Germany. 
8

Pilar Cossio's avatar
Pilar Cossio committed
9
   Released under the GNU Public License, v3. 
10
11
12
13
   See license statement for terms of distribution.

   ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/

qon's avatar
qon committed
14
15
16
17
#ifndef BIOEM_CUDA_INTERNAL_H
#define BIOEM_CUDA_INTERNAL_H

#include <cuda.h>
18
#include <cufft.h>
qon's avatar
qon committed
19

20
21
22
23
24
//Hack to make nvcc compiler accept fftw.h, float128 is not used anyway
#define __float128 double
#include <fftw3.h>
#undef __float128

qon's avatar
qon committed
25
26
27
28
29
#include "bioem_cuda.h"

class bioem_cuda : public bioem
{
public:
30
31
32
	bioem_cuda();
	virtual ~bioem_cuda();

33
	virtual int compareRefMaps(int iOrient, int iConv, myfloat_t amp, myfloat_t pha, myfloat_t env, const myfloat_t* conv_map, mycomplex_t* localmultFFT, myfloat_t sumC, myfloat_t sumsquareC, const int startMap = 0);
34
35
	virtual void* malloc_device_host(size_t size);
	virtual void free_device_host(void* ptr);
Luka Stanisic's avatar
Luka Stanisic committed
36
	virtual void rebalance(int workload); //Rebalance GPUWorkload
37

qon's avatar
qon committed
38
39
40
41
42
protected:
	virtual int deviceInit();
	virtual int deviceStartRun();
	virtual int deviceFinishRun();
	int deviceExit();
David Rohr's avatar
David Rohr committed
43
44
45
	
private:
	int selectCudaDevice();
46

qon's avatar
qon committed
47
	int deviceInitialized;
48

49
50
	cudaStream_t cudaStream[3];
	cudaEvent_t cudaEvent[3];
51
	cudaEvent_t cudaFFTEvent[2];
qon's avatar
qon committed
52
	bioem_RefMap_Mod* pRefMap_device_Mod;
53
	bioem_RefMap* gpumap;
David Rohr's avatar
David Rohr committed
54
	bioem_Probability* pProb_host;
55
	bioem_Probability pProb_device;
David Rohr's avatar
David Rohr committed
56
	void* pProb_memory;
57
	myfloat_t* pConvMap_device[2];
58

59
60
	mycomplex_t* pRefMapsFFT;
	mycomplex_t* pConvMapFFT;
61
	mycomplex_t* pConvMapFFT_Host;
62
63
64
	mycuComplex_t* pFFTtmp2[2];
	myfloat_t* pFFTtmp[2];
	cufftHandle plan[2][2];
65

66
67
	myfloat_t *maps, *sum, *sumsquare;

68
69
	int GPUAlgo;		//GPU Algorithm to use, 0: parallelize over maps, 1: as 0 but work split in multiple kernels (better), 2: also parallelize over shifts (best)
	int GPUAsync;		//Run GPU Asynchronously, do the convolutions on the host in parallel.
70
	int GPUDualStream;	//Use two streams to improve paralelism
71
	int GPUWorkload;	//Percentage of workload to perform on GPU. Default 100. Rest is done on processor in parallel.
72

73
	int maxRef;
qon's avatar
qon committed
74
75
76
};

#endif
77