Commit b1e9df04 authored by David Rohr's avatar David Rohr
Browse files

Add option to do multiple projections in parallel

parent 4dc6c762
......@@ -81,6 +81,7 @@ bioem::bioem()
{
FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO"));
DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE"));
}
bioem::~bioem()
......@@ -361,13 +362,14 @@ int bioem::run()
// ******************************** MAIN CYCLE ******************************************
mycomplex_t* proj_mapFFT;
mycomplex_t* proj_mapsFFT;
myfloat_t* conv_map = NULL;
mycomplex_t* conv_mapFFT;
myfloat_t sumCONV, sumsquareCONV;
//allocating fftw_complex vector
proj_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
const int ProjMapSize = (param.FFTMapSize + 64) & ~63; //Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly
proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce);
conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
......@@ -379,47 +381,60 @@ int bioem::run()
int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size);
if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles;
for (int iOrient = iOrientStart; iOrient < iOrientEnd; iOrient++)
for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce)
{
// ***************************************************************************************
// ***** Creating Projection for given orientation and transforming to Fourier space *****
if (DebugOutput >= 1) timer2.ResetStart();
if (DebugOutput >= 2) timer.ResetStart();
createProjection(iOrient, proj_mapFFT);
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrient, timer.GetCurrentElapsedTime(), mpi_rank);
int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce);
#pragma omp parallel for
for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
{
createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]);
}
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank);
// ***************************************************************************************
// ***** **** Internal Loop over convolutions **** *****
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++)
{
// *** Calculating convolutions of projection map and crosscorrelations ***
mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize];
// ***************************************************************************************
// ***** **** Internal Loop over convolutions **** *****
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
{
// *** Calculating convolutions of projection map and crosscorrelations ***
if (DebugOutput >= 2) timer.ResetStart();
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);
if (DebugOutput >= 2) timer.ResetStart();
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);
// ***************************************************************************************
// *** Comparing each calculated convoluted map with all experimental maps ***
if (DebugOutput >= 2) timer.ResetStart();
compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
// ***************************************************************************************
// *** Comparing each calculated convoluted map with all experimental maps ***
if (DebugOutput >= 2) timer.ResetStart();
compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2)
if (DebugOutput >= 2)
{
const double compTime = timer.GetCurrentElapsedTime();
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
}
}
if (DebugOutput >= 1)
{
const double compTime = timer.GetCurrentElapsedTime();
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
timer2.ResetStart();
}
}
if (DebugOutput >= 1) printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
}
//deallocating fftw_complex vector
myfftw_free(proj_mapFFT);
myfftw_free(proj_mapsFFT);
myfftw_free(conv_mapFFT);
if (!FFTAlgo) myfftw_free(conv_map);
......@@ -626,7 +641,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
if (FFTAlgo)
{
//With FFT Algorithm
#pragma omp parallel for
#pragma omp parallel for schedule(dynamic, 1)
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
const int num = omp_get_thread_num();
......@@ -636,7 +651,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
else
{
//Without FFT Algorithm
#pragma omp parallel for
#pragma omp parallel for schedule(dynamic, 1)
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap);
......@@ -716,52 +731,52 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
int i, j;
//********** Projection with radius ***************
if(param.doaaradius){
int irad;
myfloat_t dist, rad2;
if(param.doaaradius)
{
int irad;
myfloat_t dist, rad2;
for(int n = 0; n < Model.nPointsModel; n++)
{
//Getting Centers of Sphere
i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
for(int n = 0; n < Model.nPointsModel; n++)
{
//Getting Centers of Sphere
i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
//Getting the radius
irad=int( Model.points[n].radius / param.pixelSize );
rad2= Model.points[n].radius * Model.points[n].radius;
rad2= Model.points[n].radius * Model.points[n].radius;
//Projecting over the radius
for(int ii= i - irad; ii < i + irad; ii++)
//Projecting over the radius
for(int ii= i - irad; ii < i + irad; ii++)
{
for(int jj = j - irad; jj < j + irad; jj++)
{
dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize ;
if( dist < rad2 )
dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize ;
if( dist < rad2 )
{
localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density
/ Model.NormDen * 3 / (4 * M_PI * rad2 * Model.points[n].radius);
localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density
/ Model.NormDen * 3 / (4 * M_PI * rad2 * Model.points[n].radius);
}
}
}
}
}
}
}
else
{
// ************ Simple Projection over the Z axis********************
// ************ Simple Projection over the Z axis********************
for(int n = 0; n < Model.nPointsModel; n++)
{
//Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 )
i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
//Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 )
i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f);
if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels)
{
if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n";
continue;
}
if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels)
{
if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n";
continue;
}
localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density / Model.NormDen;
}
localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density / Model.NormDen;
}
}
// **** Output Just to check****
......
......@@ -51,8 +51,9 @@ protected:
int nProjectionMaps; //Maps in memory at a time
int nProjectionMapsTotal; //Maps in total
int FFTAlgo;
int DebugOutput;
int FFTAlgo; //Use the FFT Algorithm (Default 1)
int DebugOutput; //Debug Output Level (Default 2)
int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1)
};
#endif
......@@ -277,7 +277,6 @@ int bioem_param::readParameters(const char* fileinput)
exit(1);
}
param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1;
FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment