Commit 6e7cc545 authored by David Rohr's avatar David Rohr
Browse files

implement alignment optimizations, currently disabled since they are not working correctly on GPU

parent a476152e
...@@ -116,7 +116,7 @@ __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* re ...@@ -116,7 +116,7 @@ __global__ void multComplexMap(const mycomplex_t* convmap, const mycomplex_t* re
{ {
if (myBlockIdxX >= NumberMaps) return; if (myBlockIdxX >= NumberMaps) return;
const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset]; const mycomplex_t* myin = &refmap[myBlockIdxX * MapSize + Offset];
mycuComplex_t* myout = &out[(myBlockIdxX * MapSize)]; mycuComplex_t* myout = &out[myBlockIdxX * MapSize];
for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX) for(int i = myThreadIdxX; i < NumberPixelsTotal; i += myBlockDimX)
{ {
myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1]; myout[i].x = convmap[i][0] * myin[i][0] + convmap[i][1] * myin[i][1];
...@@ -339,7 +339,7 @@ int bioem_cuda::deviceStartRun() ...@@ -339,7 +339,7 @@ int bioem_cuda::deviceStartRun()
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
{ {
int n[2] = {param.param_device.NumberPixels, param.param_device.NumberPixels}; int n[2] = {param.param_device.NumberPixels, param.param_device.NumberPixels};
if (cufftPlanMany(&plan[i], 2, n, NULL, 1, 0, NULL, 1, 0, MY_CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS) if (cufftPlanMany(&plan[i], 2, n, NULL, 1, param.FFTMapSize, NULL, 1, 0, MY_CUFFT_C2R, i ? (maxRef % CUDA_FFTS_AT_ONCE) : CUDA_FFTS_AT_ONCE) != CUFFT_SUCCESS)
{ {
cout << "Error planning CUFFT\n"; cout << "Error planning CUFFT\n";
exit(1); exit(1);
......
...@@ -33,6 +33,7 @@ public: ...@@ -33,6 +33,7 @@ public:
bioem_param_device param_device; bioem_param_device param_device;
int FFTMapSize; int FFTMapSize;
int Alignment;
mycomplex_t* refCTF; mycomplex_t* refCTF;
myfloat3_t* CtfParam; myfloat3_t* CtfParam;
......
...@@ -180,8 +180,6 @@ int bioem_param::readParameters() ...@@ -180,8 +180,6 @@ int bioem_param::readParameters()
} }
input.close(); input.close();
param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1;
FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D;
cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; cout << " +++++++++++++++++++++++++++++++++++++++++ \n";
cout << "Preparing FFTs\n"; cout << "Preparing FFTs\n";
...@@ -189,6 +187,7 @@ int bioem_param::readParameters() ...@@ -189,6 +187,7 @@ int bioem_param::readParameters()
mycomplex_t *tmp_map, *tmp_map2; mycomplex_t *tmp_map, *tmp_map2;
tmp_map = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels); tmp_map = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels);
tmp_map2 = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels); tmp_map2 = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberPixels);
Alignment = 64;
fft_plan_c2c_forward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_FORWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT); fft_plan_c2c_forward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_FORWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT);
fft_plan_c2c_backward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT); fft_plan_c2c_backward = myfftw_plan_dft_2d(param_device.NumberPixels, param_device.NumberPixels, tmp_map, tmp_map2, FFTW_BACKWARD, FFTW_MEASURE | FFTW_DESTROY_INPUT);
...@@ -204,6 +203,17 @@ int bioem_param::readParameters() ...@@ -204,6 +203,17 @@ int bioem_param::readParameters()
myfftw_free(tmp_map); myfftw_free(tmp_map);
myfftw_free(tmp_map2); myfftw_free(tmp_map2);
fft_plans_created = 1; fft_plans_created = 1;
param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1;
FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D;
//Does currently not work with custom alignment on GPU
/*if (FFTMapSize % Alignment)
{
FFTMapSize += Alignment - FFTMapSize % Alignment;
}
cout << "Using MAP Size " << FFTMapSize << " (Alignment " << Alignment << ", Unaligned Size " << param_device.NumberPixels * param_device.NumberFFTPixels1D << ")\n";*/
cout << " +++++++++++++++++++++++++++++++++++++++++ \n"; cout << " +++++++++++++++++++++++++++++++++++++++++ \n";
return(0); return(0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment