Commit b1e9df04 authored by David Rohr's avatar David Rohr
Browse files

Add option to do multiple projections in parallel

parent 4dc6c762
...@@ -81,6 +81,7 @@ bioem::bioem() ...@@ -81,6 +81,7 @@ bioem::bioem()
{ {
FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO")); FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO"));
DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT")); DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE"));
} }
bioem::~bioem() bioem::~bioem()
...@@ -361,13 +362,14 @@ int bioem::run() ...@@ -361,13 +362,14 @@ int bioem::run()
// ******************************** MAIN CYCLE ****************************************** // ******************************** MAIN CYCLE ******************************************
mycomplex_t* proj_mapFFT; mycomplex_t* proj_mapsFFT;
myfloat_t* conv_map = NULL; myfloat_t* conv_map = NULL;
mycomplex_t* conv_mapFFT; mycomplex_t* conv_mapFFT;
myfloat_t sumCONV, sumsquareCONV; myfloat_t sumCONV, sumsquareCONV;
//allocating fftw_complex vector //allocating fftw_complex vector
proj_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); const int ProjMapSize = (param.FFTMapSize + 64) & ~63; //Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly
proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce);
conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
...@@ -379,47 +381,60 @@ int bioem::run() ...@@ -379,47 +381,60 @@ int bioem::run()
int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size); int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size);
if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles; if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles;
for (int iOrient = iOrientStart; iOrient < iOrientEnd; iOrient++) for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce)
{ {
// *************************************************************************************** // ***************************************************************************************
// ***** Creating Projection for given orientation and transforming to Fourier space ***** // ***** Creating Projection for given orientation and transforming to Fourier space *****
if (DebugOutput >= 1) timer2.ResetStart(); if (DebugOutput >= 1) timer2.ResetStart();
if (DebugOutput >= 2) timer.ResetStart(); if (DebugOutput >= 2) timer.ResetStart();
createProjection(iOrient, proj_mapFFT); int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce);
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrient, timer.GetCurrentElapsedTime(), mpi_rank); #pragma omp parallel for
for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
{
createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]);
}
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank);
// *************************************************************************************** for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
// ***** **** Internal Loop over convolutions **** *****
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
{ {
// *** Calculating convolutions of projection map and crosscorrelations *** mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize];
// ***************************************************************************************
// ***** **** Internal Loop over convolutions **** *****
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
{
// *** Calculating convolutions of projection map and crosscorrelations ***
if (DebugOutput >= 2) timer.ResetStart(); if (DebugOutput >= 2) timer.ResetStart();
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank); if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);
// *************************************************************************************** // ***************************************************************************************
// *** Comparing each calculated convoluted map with all experimental maps *** // *** Comparing each calculated convoluted map with all experimental maps ***
if (DebugOutput >= 2) timer.ResetStart(); if (DebugOutput >= 2) timer.ResetStart();
compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) if (DebugOutput >= 2)
{
const double compTime = timer.GetCurrentElapsedTime();
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
}
}
if (DebugOutput >= 1)
{ {
const double compTime = timer.GetCurrentElapsedTime(); printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; timer2.ResetStart();
const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
} }
} }
if (DebugOutput >= 1) printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
} }
//deallocating fftw_complex vector //deallocating fftw_complex vector
myfftw_free(proj_mapFFT); myfftw_free(proj_mapsFFT);
myfftw_free(conv_mapFFT); myfftw_free(conv_mapFFT);
if (!FFTAlgo) myfftw_free(conv_map); if (!FFTAlgo) myfftw_free(conv_map);
...@@ -626,7 +641,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc ...@@ -626,7 +641,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
if (FFTAlgo) if (FFTAlgo)
{ {
//With FFT Algorithm //With FFT Algorithm
#pragma omp parallel for #pragma omp parallel for schedule(dynamic, 1)
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{ {
const int num = omp_get_thread_num(); const int num = omp_get_thread_num();
...@@ -636,7 +651,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc ...@@ -636,7 +651,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
else else
{ {
//Without FFT Algorithm //Without FFT Algorithm
#pragma omp parallel for #pragma omp parallel for schedule(dynamic, 1)
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{ {
compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap); compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap);
...@@ -716,52 +731,52 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT) ...@@ -716,52 +731,52 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
int i, j; int i, j;
//********** Projection with radius *************** //********** Projection with radius ***************
if(param.doaaradius){ if(param.doaaradius)
{
int irad; int irad;
myfloat_t dist, rad2; myfloat_t dist, rad2;
for(int n = 0; n < Model.nPointsModel; n++) for(int n = 0; n < Model.nPointsModel; n++)
{ {
//Getting Centers of Sphere //Getting Centers of Sphere
i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
//Getting the radius //Getting the radius
irad=int( Model.points[n].radius / param.pixelSize ); irad=int( Model.points[n].radius / param.pixelSize );
rad2= Model.points[n].radius * Model.points[n].radius; rad2= Model.points[n].radius * Model.points[n].radius;
//Projecting over the radius //Projecting over the radius
for(int ii= i - irad; ii < i + irad; ii++) for(int ii= i - irad; ii < i + irad; ii++)
{ {
for(int jj = j - irad; jj < j + irad; jj++) for(int jj = j - irad; jj < j + irad; jj++)
{ {
dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize ; dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize ;
if( dist < rad2 ) if( dist < rad2 )
{ {
localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density
/ Model.NormDen * 3 / (4 * M_PI * rad2 * Model.points[n].radius); / Model.NormDen * 3 / (4 * M_PI * rad2 * Model.points[n].radius);
} }
} }
} }
} }
} }
else else
{ {
// ************ Simple Projection over the Z axis******************** // ************ Simple Projection over the Z axis********************
for(int n = 0; n < Model.nPointsModel; n++) for(int n = 0; n < Model.nPointsModel; n++)
{ {
//Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 ) //Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 )
i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels) if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels)
{ {
if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n"; if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n";
continue; continue;
} }
localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density / Model.NormDen; localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density / Model.NormDen;
} }
} }
// **** Output Just to check**** // **** Output Just to check****
......
...@@ -51,8 +51,9 @@ protected: ...@@ -51,8 +51,9 @@ protected:
int nProjectionMaps; //Maps in memory at a time int nProjectionMaps; //Maps in memory at a time
int nProjectionMapsTotal; //Maps in total int nProjectionMapsTotal; //Maps in total
int FFTAlgo; int FFTAlgo; //Use the FFT Algorithm (Default 1)
int DebugOutput; int DebugOutput; //Debug Output Level (Default 2)
int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1)
}; };
#endif #endif
...@@ -277,7 +277,6 @@ int bioem_param::readParameters(const char* fileinput) ...@@ -277,7 +277,6 @@ int bioem_param::readParameters(const char* fileinput)
exit(1); exit(1);
} }
param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1; param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1;
FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D; FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment