Commit b1e9df04 authored by David Rohr's avatar David Rohr
Browse files

Add option to do multiple projections in parallel

parent 4dc6c762
......@@ -81,6 +81,7 @@ bioem::bioem()
{
FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO"));
DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE"));
}
bioem::~bioem()
......@@ -361,13 +362,14 @@ int bioem::run()
// ******************************** MAIN CYCLE ******************************************
mycomplex_t* proj_mapFFT;
mycomplex_t* proj_mapsFFT;
myfloat_t* conv_map = NULL;
mycomplex_t* conv_mapFFT;
myfloat_t sumCONV, sumsquareCONV;
//allocating fftw_complex vector
proj_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
const int ProjMapSize = (param.FFTMapSize + 64) & ~63; //Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly
proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce);
conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
......@@ -379,15 +381,23 @@ int bioem::run()
int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size);
if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles;
for (int iOrient = iOrientStart; iOrient < iOrientEnd; iOrient++)
for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce)
{
// ***************************************************************************************
// ***** Creating Projection for given orientation and transforming to Fourier space *****
if (DebugOutput >= 1) timer2.ResetStart();
if (DebugOutput >= 2) timer.ResetStart();
createProjection(iOrient, proj_mapFFT);
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrient, timer.GetCurrentElapsedTime(), mpi_rank);
int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce);
#pragma omp parallel for
for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
{
createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]);
}
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank);
for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
{
mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize];
// ***************************************************************************************
// ***** **** Internal Loop over convolutions **** *****
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
......@@ -416,10 +426,15 @@ int bioem::run()
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
}
}
if (DebugOutput >= 1) printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
if (DebugOutput >= 1)
{
printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
timer2.ResetStart();
}
}
}
//deallocating fftw_complex vector
myfftw_free(proj_mapFFT);
myfftw_free(proj_mapsFFT);
myfftw_free(conv_mapFFT);
if (!FFTAlgo) myfftw_free(conv_map);
......@@ -626,7 +641,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
if (FFTAlgo)
{
//With FFT Algorithm
#pragma omp parallel for
#pragma omp parallel for schedule(dynamic, 1)
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
const int num = omp_get_thread_num();
......@@ -636,7 +651,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
else
{
//Without FFT Algorithm
#pragma omp parallel for
#pragma omp parallel for schedule(dynamic, 1)
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap);
......@@ -716,8 +731,8 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
int i, j;
//********** Projection with radius ***************
if(param.doaaradius){
if(param.doaaradius)
{
int irad;
myfloat_t dist, rad2;
......
......@@ -51,8 +51,9 @@ protected:
int nProjectionMaps; //Maps in memory at a time
int nProjectionMapsTotal; //Maps in total
int FFTAlgo;
int DebugOutput;
int FFTAlgo; //Use the FFT Algorithm (Default 1)
int DebugOutput; //Debug Output Level (Default 2)
int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1)
};
#endif
......@@ -277,7 +277,6 @@ int bioem_param::readParameters(const char* fileinput)
exit(1);
}
param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1;
FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment