diff --git a/bioem.cpp b/bioem.cpp index 086c8a895de8235408d4e2f1f45920ec9b191e18..c5688393aa40b87aaf9164e4c97f0c9f937999dc 100644 --- a/bioem.cpp +++ b/bioem.cpp @@ -81,6 +81,7 @@ bioem::bioem() { FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO")); DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT")); + nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE")); } bioem::~bioem() @@ -361,13 +362,14 @@ int bioem::run() // ******************************** MAIN CYCLE ****************************************** - mycomplex_t* proj_mapFFT; + mycomplex_t* proj_mapsFFT; myfloat_t* conv_map = NULL; mycomplex_t* conv_mapFFT; myfloat_t sumCONV, sumsquareCONV; //allocating fftw_complex vector - proj_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); + const int ProjMapSize = (param.FFTMapSize + 64) & ~63; //Make sure this is properly aligned for fftw..., Actually this should be ensureb by using FFTMapSize, but it is not due to a bug in CUFFT which cannot handle padding properly + proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce); conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); @@ -379,47 +381,60 @@ int bioem::run() int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size); if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles; - for (int iOrient = iOrientStart; iOrient < iOrientEnd; iOrient++) + for (int iOrientAtOnce = iOrientStart; iOrientAtOnce < iOrientEnd; iOrientAtOnce += nProjectionsAtOnce) { // *************************************************************************************** // ***** Creating Projection for given orientation and transforming to Fourier space ***** if (DebugOutput >= 1) timer2.ResetStart(); if (DebugOutput >= 2) timer.ResetStart(); - createProjection(iOrient, proj_mapFFT); - if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrient, timer.GetCurrentElapsedTime(), mpi_rank); + int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce); +#pragma omp parallel for + for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++) + { + createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]); + } + if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank); - // *************************************************************************************** - // ***** **** Internal Loop over convolutions **** ***** - for (int iConv = 0; iConv < param.nTotCTFs; iConv++) + for (int iOrient = iOrientAtOnce; iOrient < iTmpEnd;iOrient++) { - // *** Calculating convolutions of projection map and crosscorrelations *** + mycomplex_t* proj_mapFFT = &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]; + // *************************************************************************************** + // ***** **** Internal Loop over convolutions **** ***** + for (int iConv = 0; iConv < param.nTotCTFs; iConv++) + { + // *** Calculating convolutions of projection map and crosscorrelations *** - if (DebugOutput >= 2) timer.ResetStart(); - createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); - if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank); + if (DebugOutput >= 2) timer.ResetStart(); + createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); + if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank); - // *************************************************************************************** - // *** Comparing each calculated convoluted map with all experimental maps *** - if (DebugOutput >= 2) timer.ResetStart(); - compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); + // *************************************************************************************** + // *** Comparing each calculated convoluted map with all experimental maps *** + if (DebugOutput >= 2) timer.ResetStart(); + compareRefMaps(iOrient, iConv, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); - if (DebugOutput >= 2) + if (DebugOutput >= 2) + { + const double compTime = timer.GetCurrentElapsedTime(); + const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; + const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts * + (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime; + const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts * + (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime; + const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime; + + printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank); + } + } + if (DebugOutput >= 1) { - const double compTime = timer.GetCurrentElapsedTime(); - const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1; - const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts * - (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime; - const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts * - (((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime; - const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime; - - printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank); + printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank); + timer2.ResetStart(); } } - if (DebugOutput >= 1) printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank); } //deallocating fftw_complex vector - myfftw_free(proj_mapFFT); + myfftw_free(proj_mapsFFT); myfftw_free(conv_mapFFT); if (!FFTAlgo) myfftw_free(conv_map); @@ -626,7 +641,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc if (FFTAlgo) { //With FFT Algorithm - #pragma omp parallel for + #pragma omp parallel for schedule(dynamic, 1) for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) { const int num = omp_get_thread_num(); @@ -636,7 +651,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc else { //Without FFT Algorithm - #pragma omp parallel for + #pragma omp parallel for schedule(dynamic, 1) for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++) { compareRefMapShifted < -1 > (iRefMap, iOrient, iConv, conv_map, pProb, param.param_device, RefMap); @@ -716,52 +731,52 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT) int i, j; //********** Projection with radius *************** - if(param.doaaradius){ - - int irad; - myfloat_t dist, rad2; + if(param.doaaradius) + { + int irad; + myfloat_t dist, rad2; - for(int n = 0; n < Model.nPointsModel; n++) - { - //Getting Centers of Sphere - i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); - j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); + for(int n = 0; n < Model.nPointsModel; n++) + { + //Getting Centers of Sphere + i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); + j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); //Getting the radius irad=int( Model.points[n].radius / param.pixelSize ); - rad2= Model.points[n].radius * Model.points[n].radius; + rad2= Model.points[n].radius * Model.points[n].radius; - //Projecting over the radius - for(int ii= i - irad; ii < i + irad; ii++) + //Projecting over the radius + for(int ii= i - irad; ii < i + irad; ii++) { for(int jj = j - irad; jj < j + irad; jj++) { - dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize ; - if( dist < rad2 ) + dist= ( (myfloat_t) (ii-i)*(ii-i)+(jj-j)*(jj-j) ) * param.pixelSize ; + if( dist < rad2 ) { - localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density - / Model.NormDen * 3 / (4 * M_PI * rad2 * Model.points[n].radius); + localproj[ii * param.param_device.NumberPixels + jj] += param.pixelSize * 2 * sqrt( rad2 - dist ) * Model.points[n].density + / Model.NormDen * 3 / (4 * M_PI * rad2 * Model.points[n].radius); } - } - } + } + } } } else { - // ************ Simple Projection over the Z axis******************** + // ************ Simple Projection over the Z axis******************** for(int n = 0; n < Model.nPointsModel; n++) { - //Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 ) - i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); - j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); + //Getting pixel that represents coordinates & shifting the start at to Numpix/2,Numpix/2 ) + i = floor(RotatedPointsModel[n].pos[0] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); + j = floor(RotatedPointsModel[n].pos[1] / param.pixelSize + (myfloat_t) param.param_device.NumberPixels / 2.0f + 0.5f); - if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels) - { - if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n"; - continue; - } + if (i < 0 || j < 0 || i >= param.param_device.NumberPixels || j >= param.param_device.NumberPixels) + { + if (DebugOutput >= 3) cout << "Model Point out of map: " << i << ", " << j << "\n"; + continue; + } - localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density / Model.NormDen; - } + localproj[i * param.param_device.NumberPixels + j] += Model.points[n].density / Model.NormDen; + } } // **** Output Just to check**** diff --git a/include/bioem.h b/include/bioem.h index 7f673ddbb4c9e39f7164c40fba0113d90ffdc09b..4f4f12a41f3c76fafce9a534fefe5a16ec9b40a7 100644 --- a/include/bioem.h +++ b/include/bioem.h @@ -51,8 +51,9 @@ protected: int nProjectionMaps; //Maps in memory at a time int nProjectionMapsTotal; //Maps in total - int FFTAlgo; - int DebugOutput; + int FFTAlgo; //Use the FFT Algorithm (Default 1) + int DebugOutput; //Debug Output Level (Default 2) + int nProjectionsAtOnce; //Number of projections to do at once via OpenMP (Default 1) }; #endif diff --git a/param.cpp b/param.cpp index 55d9dc085ce4d87be4587a453d5fb486659f7191..079ea66dab1100bea4d3ba85a7e5c697687a6ec3 100644 --- a/param.cpp +++ b/param.cpp @@ -277,7 +277,6 @@ int bioem_param::readParameters(const char* fileinput) exit(1); } - param_device.NumberFFTPixels1D = param_device.NumberPixels / 2 + 1; FFTMapSize = param_device.NumberPixels * param_device.NumberFFTPixels1D;