Commit c9754f9e authored by qon's avatar qon
Browse files

Change all line endings in files to unix style

parent 27a1ff28
This diff is collapsed.
#define BIOEM_GPUCODE #define BIOEM_GPUCODE
#if defined(_WIN32) #if defined(_WIN32)
#include <windows.h> #include <windows.h>
#endif #endif
#include <iostream> #include <iostream>
using namespace std; using namespace std;
#include "bioem_cuda_internal.h" #include "bioem_cuda_internal.h"
#include "bioem_algorithm.h" #include "bioem_algorithm.h"
//__constant__ bioem_map gConvMap; //__constant__ bioem_map gConvMap;
static inline void checkCudaErrors(cudaError_t error) static inline void checkCudaErrors(cudaError_t error)
{ {
if (error != cudaSuccess) if (error != cudaSuccess)
{ {
printf("CUDA Error %d / %s\n", error, cudaGetErrorString(error)); printf("CUDA Error %d / %s\n", error, cudaGetErrorString(error));
exit(1); exit(1);
} }
} }
bioem_cuda::bioem_cuda() bioem_cuda::bioem_cuda()
{ {
deviceInitialized = 0; deviceInitialized = 0;
GPUAlgo = getenv("GPUALGO") == NULL ? 2 : atoi(getenv("GPUALGO")); GPUAlgo = getenv("GPUALGO") == NULL ? 2 : atoi(getenv("GPUALGO"));
GPUAsync = getenv("GPUASYNC") == NULL ? 0 : atoi(getenv("GPUASYNC")); GPUAsync = getenv("GPUASYNC") == NULL ? 0 : atoi(getenv("GPUASYNC"));
} }
bioem_cuda::~bioem_cuda() bioem_cuda::~bioem_cuda()
{ {
deviceExit(); deviceExit();
} }
__global__ void compareRefMap_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int cent_x, const int cent_y) __global__ void compareRefMap_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap, const int cent_x, const int cent_y)
{ {
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < RefMap->ntotRefMap) if (iRefMap < RefMap->ntotRefMap)
{ {
compareRefMap<0>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y); compareRefMap<0>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y);
} }
} }
__global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap) __global__ void compareRefMapShifted_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap_Mod* RefMap)
{ {
const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX; const int iRefMap = myBlockIdxX * myBlockDimX + myThreadIdxX;
if (iRefMap < RefMap->ntotRefMap) if (iRefMap < RefMap->ntotRefMap)
{ {
compareRefMapShifted<1>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap); compareRefMapShifted<1>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap);
} }
} }
__global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap* RefMap, const int blockoffset, const int nShifts, const int nShiftBits) __global__ void compareRefMapLoopShifts_kernel(const int iOrient, const int iConv, const bioem_map* pMap, bioem_Probability* pProb, const bioem_param_device param, const bioem_RefMap* RefMap, const int blockoffset, const int nShifts, const int nShiftBits)
{ {
const size_t myid = (myBlockIdxX + blockoffset) * myBlockDimX + myThreadIdxX; const size_t myid = (myBlockIdxX + blockoffset) * myBlockDimX + myThreadIdxX;
const int iRefMap = myid >> (nShiftBits << 1); const int iRefMap = myid >> (nShiftBits << 1);
const int myRef = myThreadIdxX >> (nShiftBits << 1); const int myRef = myThreadIdxX >> (nShiftBits << 1);
const int myShiftIdx = (myid >> nShiftBits) & (nShifts - 1); const int myShiftIdx = (myid >> nShiftBits) & (nShifts - 1);
const int myShiftIdy = myid & (nShifts - 1); const int myShiftIdy = myid & (nShifts - 1);
const int myShift = myid & (nShifts * nShifts - 1); const int myShift = myid & (nShifts * nShifts - 1);
const int cent_x = myShiftIdx * param.GridSpaceCenter - param.maxDisplaceCenter; const int cent_x = myShiftIdx * param.GridSpaceCenter - param.maxDisplaceCenter;
const int cent_y = myShiftIdy * param.GridSpaceCenter - param.maxDisplaceCenter; const int cent_y = myShiftIdy * param.GridSpaceCenter - param.maxDisplaceCenter;
compareRefMap<2>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef); compareRefMap<2>(iRefMap,iOrient,iConv,*pMap, pProb, param, *RefMap, cent_x, cent_y, myShift, nShifts * nShifts, myRef);
} }
template <class T> static inline T divup(T num, T divider) {return((num + divider - 1) / divider);} template <class T> static inline T divup(T num, T divider) {return((num + divider - 1) / divider);}
static inline bool IsPowerOf2(int x) {return ((x > 0) && ((x & (x - 1)) == 0));} static inline bool IsPowerOf2(int x) {return ((x > 0) && ((x & (x - 1)) == 0));}
#if defined(_WIN32) #if defined(_WIN32)
static inline int ilog2 (int value) static inline int ilog2 (int value)
{ {
DWORD index; DWORD index;
_BitScanReverse (&index, value); _BitScanReverse (&index, value);
return(value); return(value);
} }
#else #else
static inline int ilog2(int value) {return 31 - __builtin_clz(value);} static inline int ilog2(int value) {return 31 - __builtin_clz(value);}
#endif #endif
int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map) int bioem_cuda::compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map)
{ {
//cudaMemcpyToSymbolAsync(&gConvMap, &conv_map, sizeof(bioem_map), 0, cudaMemcpyHostToDevice, cudaStream); //cudaMemcpyToSymbolAsync(&gConvMap, &conv_map, sizeof(bioem_map), 0, cudaMemcpyHostToDevice, cudaStream);
if (GPUAsync) if (GPUAsync)
{ {
checkCudaErrors(cudaEventSynchronize(cudaEvent[iConv & 1])); checkCudaErrors(cudaEventSynchronize(cudaEvent[iConv & 1]));
} }
checkCudaErrors(cudaMemcpyAsync(pConvMap_device[iConv & 1], &conv_map, sizeof(bioem_map), cudaMemcpyHostToDevice, cudaStream)); checkCudaErrors(cudaMemcpyAsync(pConvMap_device[iConv & 1], &conv_map, sizeof(bioem_map), cudaMemcpyHostToDevice, cudaStream));
if (GPUAlgo == 2) //Loop over shifts if (GPUAlgo == 2) //Loop over shifts
{ {
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
if (!IsPowerOf2(nShifts)) if (!IsPowerOf2(nShifts))
{ {
cout << "Invalid number of displacements, no power of two\n"; cout << "Invalid number of displacements, no power of two\n";
exit(1); exit(1);
} }
if (CUDA_THREAD_COUNT % (nShifts * nShifts)) if (CUDA_THREAD_COUNT % (nShifts * nShifts))
{ {
cout << "CUDA Thread count (" << CUDA_THREAD_COUNT << ") is no multiple of number of shifts (" << (nShifts * nShifts) << ")\n"; cout << "CUDA Thread count (" << CUDA_THREAD_COUNT << ") is no multiple of number of shifts (" << (nShifts * nShifts) << ")\n";
exit(1); exit(1);
} }
if (nShifts > CUDA_MAX_SHIFT_REDUCE) if (nShifts > CUDA_MAX_SHIFT_REDUCE)
{ {
cout << "Too many displacements for CUDA reduction\n"; cout << "Too many displacements for CUDA reduction\n";
exit(1); exit(1);
} }
const int nShiftBits = ilog2(nShifts); const int nShiftBits = ilog2(nShifts);
size_t totalBlocks = divup((size_t) RefMap.ntotRefMap * (size_t) nShifts * (size_t) nShifts, (size_t) CUDA_THREAD_COUNT); size_t totalBlocks = divup((size_t) RefMap.ntotRefMap * (size_t) nShifts * (size_t) nShifts, (size_t) CUDA_THREAD_COUNT);
size_t nBlocks = CUDA_BLOCK_COUNT; size_t nBlocks = CUDA_BLOCK_COUNT;
for (size_t i = 0;i < totalBlocks;i += nBlocks) for (size_t i = 0;i < totalBlocks;i += nBlocks)
{ {
compareRefMapLoopShifts_kernel <<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device, i, nShifts, nShiftBits); compareRefMapLoopShifts_kernel <<<min(nBlocks, totalBlocks - i), CUDA_THREAD_COUNT, (CUDA_THREAD_COUNT * 2 + CUDA_THREAD_COUNT / (nShifts * nShifts) * 4) * sizeof(myfloat_t), cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device, i, nShifts, nShiftBits);
} }
} }
else if (GPUAlgo == 1) //Split shifts in multiple kernels else if (GPUAlgo == 1) //Split shifts in multiple kernels
{ {
for (int cent_x = -param.param_device.maxDisplaceCenter; cent_x <= param.param_device.maxDisplaceCenter; cent_x=cent_x+param.param_device.GridSpaceCenter) for (int cent_x = -param.param_device.maxDisplaceCenter; cent_x <= param.param_device.maxDisplaceCenter; cent_x=cent_x+param.param_device.GridSpaceCenter)
{ {
for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y=cent_y+param.param_device.GridSpaceCenter) for (int cent_y = -param.param_device.maxDisplaceCenter; cent_y <= param.param_device.maxDisplaceCenter; cent_y=cent_y+param.param_device.GridSpaceCenter)
{ {
compareRefMap_kernel <<<divup(RefMap.ntotRefMap, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y); compareRefMap_kernel <<<divup(RefMap.ntotRefMap, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod, cent_x, cent_y);
} }
} }
} }
else if (GPUAlgo == 0) //All shifts in one kernel else if (GPUAlgo == 0) //All shifts in one kernel
{ {
compareRefMapShifted_kernel <<<divup(RefMap.ntotRefMap, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod); compareRefMapShifted_kernel <<<divup(RefMap.ntotRefMap, CUDA_THREAD_COUNT), CUDA_THREAD_COUNT, 0, cudaStream>>> (iProjectionOut, iConv, pConvMap_device[iConv & 1], pProb_device, param.param_device, pRefMap_device_Mod);
} }
else else
{ {
cout << "Invalid GPU Algorithm selected\n"; cout << "Invalid GPU Algorithm selected\n";
exit(1); exit(1);
} }
if (GPUAsync) if (GPUAsync)
{ {
checkCudaErrors(cudaEventRecord(cudaEvent[iConv & 1], cudaStream)); checkCudaErrors(cudaEventRecord(cudaEvent[iConv & 1], cudaStream));
} }
else else
{ {
checkCudaErrors(cudaStreamSynchronize(cudaStream)); checkCudaErrors(cudaStreamSynchronize(cudaStream));
} }
return(0); return(0);
} }
int bioem_cuda::deviceInit() int bioem_cuda::deviceInit()
{ {
deviceExit(); deviceExit();
checkCudaErrors(cudaStreamCreate(&cudaStream)); checkCudaErrors(cudaStreamCreate(&cudaStream));
checkCudaErrors(cudaMalloc(&pRefMap_device, sizeof(bioem_RefMap))); checkCudaErrors(cudaMalloc(&pRefMap_device, sizeof(bioem_RefMap)));
checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap)); checkCudaErrors(cudaMalloc(&pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap));
for (int i = 0;i < 2;i++) for (int i = 0;i < 2;i++)
{ {
checkCudaErrors(cudaEventCreate(&cudaEvent[i])); checkCudaErrors(cudaEventCreate(&cudaEvent[i]));
checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(bioem_map))); checkCudaErrors(cudaMalloc(&pConvMap_device[i], sizeof(bioem_map)));
} }
pRefMap_device_Mod = (bioem_RefMap_Mod*) pRefMap_device; pRefMap_device_Mod = (bioem_RefMap_Mod*) pRefMap_device;
if (GPUAlgo == 0 || GPUAlgo == 1) if (GPUAlgo == 0 || GPUAlgo == 1)
{ {
bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap); bioem_RefMap_Mod* RefMapGPU = new bioem_RefMap_Mod(RefMap);
cudaMemcpy(pRefMap_device_Mod, RefMapGPU, sizeof(bioem_RefMap), cudaMemcpyHostToDevice); cudaMemcpy(pRefMap_device_Mod, RefMapGPU, sizeof(bioem_RefMap), cudaMemcpyHostToDevice);
delete RefMapGPU; delete RefMapGPU;
} }
else else
{ {
cudaMemcpy(pRefMap_device, &RefMap, sizeof(bioem_RefMap), cudaMemcpyHostToDevice); cudaMemcpy(pRefMap_device, &RefMap, sizeof(bioem_RefMap), cudaMemcpyHostToDevice);
} }
deviceInitialized = 1; deviceInitialized = 1;
return(0); return(0);
} }
int bioem_cuda::deviceExit() int bioem_cuda::deviceExit()
{ {
if (deviceInitialized == 0) return(0); if (deviceInitialized == 0) return(0);
cudaStreamDestroy(cudaStream); cudaStreamDestroy(cudaStream);
cudaFree(pRefMap_device); cudaFree(pRefMap_device);
cudaFree(pProb_device); cudaFree(pProb_device);
for (int i = 0;i < 2;i++) for (int i = 0;i < 2;i++)
{ {
cudaEventDestroy(cudaEvent[i]); cudaEventDestroy(cudaEvent[i]);
cudaFree(pConvMap_device); cudaFree(pConvMap_device);
} }
cudaThreadExit(); cudaThreadExit();
deviceInitialized = 0; deviceInitialized = 0;
return(0); return(0);
} }
int bioem_cuda::deviceStartRun() int bioem_cuda::deviceStartRun()
{ {
cudaMemcpy(pProb_device, pProb, sizeof(bioem_Probability) * RefMap.ntotRefMap, cudaMemcpyHostToDevice); cudaMemcpy(pProb_device, pProb, sizeof(bioem_Probability) * RefMap.ntotRefMap, cudaMemcpyHostToDevice);
return(0); return(0);
} }
int bioem_cuda::deviceFinishRun() int bioem_cuda::deviceFinishRun()
{ {
if (GPUAsync) cudaStreamSynchronize(cudaStream); if (GPUAsync) cudaStreamSynchronize(cudaStream);
cudaMemcpy(pProb, pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap, cudaMemcpyDeviceToHost); cudaMemcpy(pProb, pProb_device, sizeof(bioem_Probability) * RefMap.ntotRefMap, cudaMemcpyDeviceToHost);
return(0); return(0);
} }
bioem* bioem_cuda_create() bioem* bioem_cuda_create()
{ {
return new bioem_cuda; return new bioem_cuda;
} }
#ifndef BIOEM_H #ifndef BIOEM_H
#define BIOEM_H #define BIOEM_H
#include <fstream> #include <fstream>
#include <iostream> #include <iostream>
#include <complex> #include <complex>
#include "defs.h" #include "defs.h"
#include "bioem.h" #include "bioem.h"
#include "model.h" #include "model.h"
#include "map.h" #include "map.h"
#include "param.h" #include "param.h"
class bioem class bioem
{ {
public: public:
bioem(); bioem();
virtual ~bioem(); virtual ~bioem();
int configure(int ac, char* av[]); int configure(int ac, char* av[]);
int precalculate(); // Is it better to pass directly the input File names? int precalculate(); // Is it better to pass directly the input File names?
int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal); int dopreCalCrossCorrelation(int iRefMap, int iRefMapLocal);
int run(); int run();
int doProjections(int iMap); int doProjections(int iMap);
int createConvolutedProjectionMap(int iOreint,int iMap, mycomplex_t* lproj,bioem_map& Mapconv); int createConvolutedProjectionMap(int iOreint,int iMap, mycomplex_t* lproj,bioem_map& Mapconv);
virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map); virtual int compareRefMaps(int iProjectionOut, int iConv, const bioem_map& conv_map);
int createProjection(int iMap, mycomplex_t* map); int createProjection(int iMap, mycomplex_t* map);
int calcross_cor(bioem_map& localmap,myfloat_t& sum,myfloat_t& sumsquare); int calcross_cor(bioem_map& localmap,myfloat_t& sum,myfloat_t& sumsquare);
bioem_Probability* pProb; bioem_Probability* pProb;
protected: protected:
virtual int deviceInit(); virtual int deviceInit();
virtual int deviceStartRun(); virtual int deviceStartRun();
virtual int deviceFinishRun(); virtual int deviceFinishRun();
bioem_param param; bioem_param param;
bioem_model Model; bioem_model Model;
bioem_RefMap RefMap; bioem_RefMap RefMap;
int nReferenceMaps; //Maps in memory at a time int nReferenceMaps; //Maps in memory at a time
int nReferenceMapsTotal; //Maps in total int nReferenceMapsTotal; //Maps in total
int nProjectionMaps; //Maps in memory at a time int nProjectionMaps; //Maps in memory at a time
int nProjectionMapsTotal; //Maps in total int nProjectionMapsTotal; //Maps in total
}; };
#endif #endif
#ifndef BIOEM_DEFS_H #ifndef BIOEM_DEFS_H
#define BIOEM_DEFS_H #define BIOEM_DEFS_H
typedef float myfloat_t; typedef float myfloat_t;
typedef double mycomplex_t[2]; typedef double mycomplex_t[2];
#define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU #define BIOEM_FLOAT_3_PHYSICAL_SIZE 3 //Possible set to 4 for GPU
#define BIOEM_MAP_SIZE_X 224 #define BIOEM_MAP_SIZE_X 224
#define BIOEM_MAP_SIZE_Y 224 #define BIOEM_MAP_SIZE_Y 224
#define BIOEM_MODEL_SIZE 120000 #define BIOEM_MODEL_SIZE 120000
#define BIOEM_MAX_MAPS 12000 #define BIOEM_MAX_MAPS 12000
#define MAX_REF_CTF 200 #define MAX_REF_CTF 200
#define MAX_ORIENT 20000 #define MAX_ORIENT 20000
struct myfloat3_t struct myfloat3_t
{ {
myfloat_t pos[BIOEM_FLOAT_3_PHYSICAL_SIZE]; myfloat_t pos[BIOEM_FLOAT_3_PHYSICAL_SIZE];
}; };
#ifdef BIOEM_GPUCODE #ifdef BIOEM_GPUCODE
#define myThreadIdxX threadIdx.x #define myThreadIdxX threadIdx.x
#define myThreadIdxY threadIdx.y #define myThreadIdxY threadIdx.y
#define myBlockDimX blockDim.x #define myBlockDimX blockDim.x
#define myBlockDimY blockDim.y #define myBlockDimY blockDim.y
#define myBlockIdxX blockIdx.x #define myBlockIdxX blockIdx.x
#define myBlockIdxY blockIdx.y #define myBlockIdxY blockIdx.y
#else #else
#define __device__ #define __device__
#define __host__ #define __host__
#define myThreadIdxX 0 #define myThreadIdxX 0
#define myThreadIdxY 0 #define myThreadIdxY 0
#define myBlockDimX 1 #define myBlockDimX 1
#define myBlockDimY 1 #define myBlockDimY 1
#define myBlockIdxX 0 #define myBlockIdxX 0
#define myBlockIdxY 0 #define myBlockIdxY 0
#endif #endif
#define CUDA_THREAD_COUNT 256 #define CUDA_THREAD_COUNT 256
#define CUDA_BLOCK_COUNT 1024 * 16 #define CUDA_BLOCK_COUNT 1024 * 16
#define CUDA_MAX_SHIFT_REDUCE 1024 #define CUDA_MAX_SHIFT_REDUCE 1024
#endif #endif
#ifndef BIOEM_MAP_H #ifndef BIOEM_MAP_H
#define BIOEM_MAP_H #define BIOEM_MAP_H
#include "defs.h" #include "defs.h"
#include <complex> #include <complex>
#include <math.h> #include <math.h>
class bioem_param; class bioem_param;
class bioem_map class bioem_map
{ {
public: public:
myfloat_t points[BIOEM_MAP_SIZE_X][BIOEM_MAP_SIZE_Y]; myfloat_t points[BIOEM_MAP_SIZE_X][BIOEM_MAP_SIZE_Y];
}; };
class bioem_map_forFFT class bioem_map_forFFT
{ {
public: public:
mycomplex_t cpoints[2*BIOEM_MAP_SIZE_X*2*BIOEM_MAP_SIZE_Y]; mycomplex_t cpoints[2*BIOEM_MAP_SIZE_X*2*BIOEM_MAP_SIZE_Y];
}; };
class bioem_RefMap class bioem_RefMap
{ {
public: public:
int readRefMaps(); int readRefMaps();
const char* filemap; const char* filemap;
int ntotRefMap; int ntotRefMap;
bioem_map Ref[BIOEM_MAX_MAPS]; bioem_map Ref[BIOEM_MAX_MAPS];
myfloat_t sum_RefMap[BIOEM_MAX_MAPS]; myfloat_t sum_RefMap[BIOEM_MAX_MAPS];
myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS]; myfloat_t sumsquare_RefMap[BIOEM_MAX_MAPS];
myfloat_t ForLogProbfromRef[BIOEM_MAX_MAPS];