Commit 6a821c9b authored by David Rohr's avatar David Rohr
Browse files

improve debug output, use openmp during initialization where applicable, move...

improve debug output, use openmp during initialization where applicable, move fft scrach spaces to param class
parent f4d2e710
......@@ -222,21 +222,34 @@ int bioem::configure(int ac, char* av[])
RefMap.readRefMaps(param, mapfile.c_str());
}
HighResTimer timer;
#ifdef WITH_MPI
MPI_Bcast(&param, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD);
//refCtf, CtfParam, angles automatically filled by precalculare function below
MPI_Bcast(&Model, sizeof(Model), MPI_BYTE, 0, MPI_COMM_WORLD);
if (mpi_rank != 0) Model.points = (bioem_model::bioem_model_point*) mallocchk(sizeof(bioem_model::bioem_model_point) * Model.nPointsModel);
MPI_Bcast(Model.points, sizeof(bioem_model::bioem_model_point) * Model.nPointsModel, MPI_BYTE, 0, MPI_COMM_WORLD);
MPI_Bcast(&RefMap, sizeof(RefMap), MPI_BYTE, 0, MPI_COMM_WORLD);
if (mpi_rank != 0) RefMap.maps = (myfloat_t*) mallocchk(RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap);
MPI_Bcast(RefMap.maps, RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap, MPI_BYTE, 0, MPI_COMM_WORLD);
#endif
if (mpi_size > 1)
{
if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
MPI_Bcast(&param, sizeof(param), MPI_BYTE, 0, MPI_COMM_WORLD);
//refCtf, CtfParam, angles automatically filled by precalculare function below
MPI_Bcast(&Model, sizeof(Model), MPI_BYTE, 0, MPI_COMM_WORLD);
if (mpi_rank != 0) Model.points = (bioem_model::bioem_model_point*) mallocchk(sizeof(bioem_model::bioem_model_point) * Model.nPointsModel);
MPI_Bcast(Model.points, sizeof(bioem_model::bioem_model_point) * Model.nPointsModel, MPI_BYTE, 0, MPI_COMM_WORLD);
MPI_Bcast(&RefMap, sizeof(RefMap), MPI_BYTE, 0, MPI_COMM_WORLD);
if (mpi_rank != 0) RefMap.maps = (myfloat_t*) mallocchk(RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap);
MPI_Bcast(RefMap.maps, RefMap.refMapSize * sizeof(myfloat_t) * RefMap.ntotRefMap, MPI_BYTE, 0, MPI_COMM_WORLD);
if (DebugOutput >= 2 && mpi_rank == 0) printf("MPI Broadcast Data %f\n", timer.GetCurrentElapsedTime());
}
#endif
// ****************** Precalculating Necessary Stuff *********************
if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
param.PrepareFFTs();
if (DebugOutput >= 2 && mpi_rank == 0)
{
printf("Time Prepare FFTs %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
precalculate();
if (getenv("BIOEM_DEBUG_BREAK"))
......@@ -246,9 +259,20 @@ int bioem::configure(int ac, char* av[])
if (param.nTotCTFs > cut) param.nTotCTFs = cut;
}
if (DebugOutput >= 2 && mpi_rank == 0)
{
printf("Time Precalculate %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
pProb.init(RefMap.ntotRefMap, param.nTotGridAngles, *this);
if (DebugOutput >= 2 && mpi_rank == 0)
{
printf("Time Init Probabilities %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
deviceInit();
if (DebugOutput >= 2 && mpi_rank == 0) printf("Time Device Init %f\n", timer.GetCurrentElapsedTime());
return(0);
}
......@@ -265,15 +289,28 @@ int bioem::precalculate()
// **************************************************************************************
// **Precalculating Routine of Orientation grids, Map crosscorrelations and CTF Kernels**
// **************************************************************************************
HighResTimer timer;
// Generating Grids of orientations
if (DebugOutput >= 3) timer.ResetStart();
param.CalculateGridsParam();
if (DebugOutput >= 3)
{
printf("\tTime Precalculate Grids Param: %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
// Precalculating CTF Kernels stored in class Param
param.CalculateRefCTF();
if (DebugOutput >= 3)
{
printf("\tTime Precalculate CTFs: %f\n", timer.GetCurrentElapsedTime());
timer.ResetStart();
}
//Precalculate Maps
RefMap.precalculate(param, *this);
if (DebugOutput >= 3) printf("\tTime Precalculate Maps: %f\n", timer.GetCurrentElapsedTime());
return(0);
}
......@@ -315,20 +352,6 @@ int bioem::run()
}
// **************************************************************************************
deviceStartRun();
{
const int count = omp_get_max_threads();
localCCT = new mycomplex_t*[count];
lCC = new myfloat_t*[count];
#pragma omp parallel
{
#pragma omp critical
{
const int i = omp_get_thread_num();
localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
}
}
}
// ******************************** MAIN CYCLE ******************************************
......@@ -395,125 +418,118 @@ int bioem::run()
delete[] conv_map;
deviceFinishRun();
{
const int count = omp_get_max_threads();
for (int i = 0;i < count;i++)
{
myfftw_free(localCCT[i]);
myfftw_free(lCC[i]);
}
delete[] localCCT;
delete[] lCC;
}
// ************* Writing Out Probabilities ***************
// *** Angular Probability ***
#ifdef WITH_MPI
if (DebugOutput >= 2 && mpi_rank == 0) timer.ResetStart();
//Reduce Constant and summarize probabilities
if (mpi_size > 1)
{
myfloat_t* tmp1 = new myfloat_t[RefMap.ntotRefMap];
myfloat_t* tmp2 = new myfloat_t[RefMap.ntotRefMap];
myfloat_t* tmp3 = new myfloat_t[RefMap.ntotRefMap];
for (int i = 0;i < RefMap.ntotRefMap;i++)
if (DebugOutput >= 1 && mpi_rank == 0) timer.ResetStart();
//Reduce Constant and summarize probabilities
{
tmp1[i] = pProb.getProbMap(i).Constoadd;
}
MPI_Allreduce(tmp1, tmp2, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
bioem_Probability_map& pProbMap = pProb.getProbMap(i);
tmp1[i] = pProbMap.Total * exp(pProbMap.Constoadd - tmp2[i]);
}
MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
//Find MaxProb
MPI_Status mpistatus;
{
int* tmpi1 = new int[RefMap.ntotRefMap];
int* tmpi2 = new int[RefMap.ntotRefMap];
myfloat_t* tmp1 = new myfloat_t[RefMap.ntotRefMap];
myfloat_t* tmp2 = new myfloat_t[RefMap.ntotRefMap];
myfloat_t* tmp3 = new myfloat_t[RefMap.ntotRefMap];
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
bioem_Probability_map& pProbMap = pProb.getProbMap(i);
tmpi1[i] = tmp2[i] <= pProbMap.Constoadd ? mpi_rank : -1;
tmp1[i] = pProb.getProbMap(i).Constoadd;
}
MPI_Allreduce(tmpi1, tmpi2, RefMap.ntotRefMap, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
MPI_Allreduce(tmp1, tmp2, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
if (tmpi2[i] == -1)
bioem_Probability_map& pProbMap = pProb.getProbMap(i);
tmp1[i] = pProbMap.Total * exp(pProbMap.Constoadd - tmp2[i]);
}
MPI_Reduce(tmp1, tmp3, RefMap.ntotRefMap, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
//Find MaxProb
MPI_Status mpistatus;
{
int* tmpi1 = new int[RefMap.ntotRefMap];
int* tmpi2 = new int[RefMap.ntotRefMap];
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
if (mpi_rank == 0) printf("Error: Could not find highest probability\n");
bioem_Probability_map& pProbMap = pProb.getProbMap(i);
tmpi1[i] = tmp2[i] <= pProbMap.Constoadd ? mpi_rank : -1;
}
else if (tmpi2[i] != 0) //Skip if rank 0 already has highest probability
MPI_Allreduce(tmpi1, tmpi2, RefMap.ntotRefMap, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
if (mpi_rank == 0)
if (tmpi2[i] == -1)
{
MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, &mpistatus);
if (mpi_rank == 0) printf("Error: Could not find highest probability\n");
}
else if (mpi_rank == tmpi2[i])
else if (tmpi2[i] != 0) //Skip if rank 0 already has highest probability
{
MPI_Send(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, 0, i, MPI_COMM_WORLD);
if (mpi_rank == 0)
{
MPI_Recv(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, tmpi2[i], i, MPI_COMM_WORLD, &mpistatus);
}
else if (mpi_rank == tmpi2[i])
{
MPI_Send(&pProb.getProbMap(i).max, sizeof(pProb.getProbMap(i).max), MPI_BYTE, 0, i, MPI_COMM_WORLD);
}
}
}
delete[] tmpi1;
delete[] tmpi2;
}
delete[] tmpi1;
delete[] tmpi2;
}
if (mpi_rank == 0)
{
for (int i = 0;i < RefMap.ntotRefMap;i++)
if (mpi_rank == 0)
{
bioem_Probability_map& pProbMap = pProb.getProbMap(i);
pProbMap.Total = tmp3[i];
pProbMap.Constoadd = tmp2[i];
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
bioem_Probability_map& pProbMap = pProb.getProbMap(i);
pProbMap.Total = tmp3[i];
pProbMap.Constoadd = tmp2[i];
}
}
}
delete[] tmp1;
delete[] tmp2;
delete[] tmp3;
if (DebugOutput >= 2 && mpi_rank == 0) printf("Time MPI Reduction: %f\n", timer.GetCurrentElapsedTime());
}
//Angle Reduction and Probability summation for individual angles
if (param.param_device.writeAngles)
{
const int count = RefMap.ntotRefMap * param.nTotGridAngles;
myfloat_t* tmp1 = new myfloat_t[count];
myfloat_t* tmp2 = new myfloat_t[count];
myfloat_t* tmp3 = new myfloat_t[count];
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
tmp1[i] = pProb.getProbMap(i).Constoadd;
delete[] tmp1;
delete[] tmp2;
delete[] tmp3;
if (DebugOutput >= 1 && mpi_rank == 0 && mpi_size > 1) printf("Time MPI Reduction: %f\n", timer.GetCurrentElapsedTime());
}
MPI_Allreduce(tmp1, tmp2, count, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
for (int i = 0;i < RefMap.ntotRefMap;i++)
//Angle Reduction and Probability summation for individual angles
if (param.param_device.writeAngles)
{
for (int j = 0;j < param.nTotGridAngles;j++)
const int count = RefMap.ntotRefMap * param.nTotGridAngles;
myfloat_t* tmp1 = new myfloat_t[count];
myfloat_t* tmp2 = new myfloat_t[count];
myfloat_t* tmp3 = new myfloat_t[count];
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
tmp1[i * param.nTotGridAngles + j] = pProbAngle.forAngles * exp(pProbAngle.ConstAngle - tmp2[i * param.nTotGridAngles + j]);
tmp1[i] = pProb.getProbMap(i).Constoadd;
}
}
MPI_Reduce(tmp1, tmp3, count, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
if (mpi_rank == 0)
{
MPI_Allreduce(tmp1, tmp2, count, MY_MPI_FLOAT, MPI_MAX, MPI_COMM_WORLD);
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
for (int j = 0;j < param.nTotGridAngles;j++)
{
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
pProbAngle.forAngles = tmp3[i * param.nTotGridAngles + j];
pProbAngle.ConstAngle = tmp2[i * param.nTotGridAngles + j];
tmp1[i * param.nTotGridAngles + j] = pProbAngle.forAngles * exp(pProbAngle.ConstAngle - tmp2[i * param.nTotGridAngles + j]);
}
}
MPI_Reduce(tmp1, tmp3, count, MY_MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
if (mpi_rank == 0)
{
for (int i = 0;i < RefMap.ntotRefMap;i++)
{
for (int j = 0;j < param.nTotGridAngles;j++)
{
bioem_Probability_angle& pProbAngle = pProb.getProbAngle(i, j);
pProbAngle.forAngles = tmp3[i * param.nTotGridAngles + j];
pProbAngle.ConstAngle = tmp2[i * param.nTotGridAngles + j];
}
}
}
delete[] tmp1;
delete[] tmp2;
delete[] tmp3;
}
delete[] tmp1;
delete[] tmp2;
delete[] tmp3;
}
#endif
......@@ -582,7 +598,7 @@ int bioem::compareRefMaps(int iOrient, int iConv, const myfloat_t* conv_map, myc
for (int iRefMap = startMap; iRefMap < RefMap.ntotRefMap; iRefMap ++)
{
const int num = omp_get_thread_num();
calculateCCFFT(iRefMap, iOrient, iConv, sumC, sumsquareC, localmultFFT, localCCT[num], lCC[num]);
calculateCCFFT(iRefMap, iOrient, iConv, sumC, sumsquareC, localmultFFT, param.fft_scratch_complex[num], param.fft_scratch_real[num]);
}
}
else
......@@ -628,7 +644,7 @@ int bioem::createProjection(int iMap, mycomplex_t* mapFFT)
myfloat_t alpha, gam, beta;
myfloat_t* localproj;
localproj = lCC[omp_get_thread_num()];
localproj = param.fft_scratch_real[omp_get_thread_num()];
memset(localproj, 0, param.param_device.NumberPixels * param.param_device.NumberPixels * sizeof(*localproj));
alpha = param.angles[iMap].pos[0];
......@@ -722,7 +738,7 @@ int bioem::createConvolutedProjectionMap(int iMap, int iConv, mycomplex_t* lproj
cuda_custom_timeslot("Convolution", 1);
mycomplex_t* tmp = localCCT[omp_get_thread_num()];
mycomplex_t* tmp = param.fft_scratch_complex[omp_get_thread_num()];
// **** Multiplying FFTmap with corresponding kernel ****
const mycomplex_t* refCTF = &param.refCTF[iConv * param.FFTMapSize];
......
......@@ -53,9 +53,6 @@ protected:
int FFTAlgo;
int DebugOutput;
mycomplex_t** localCCT;
myfloat_t** lCC;
};
#endif
......@@ -64,7 +64,7 @@ public:
myfloat3_t* angles;
int nTotGridAngles;
int nTotCTFs;
bool printModel;
int printModelOrientation;
int printModelConvolution;
......@@ -72,6 +72,9 @@ public:
int fft_plans_created;
myfftw_plan fft_plan_c2c_forward, fft_plan_c2c_backward, fft_plan_r2c_forward, fft_plan_c2r_backward;
mycomplex_t** fft_scratch_complex;
myfloat_t** fft_scratch_real;
bool dumpMap, loadMap;
private:
......
......@@ -15,6 +15,10 @@
#include <math.h>
#include <fftw3.h>
#ifdef WITH_OPENMP
#include <omp.h>
#endif
#include "map.h"
#include "param.h"
#include "bioem.h"
......@@ -203,17 +207,15 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param)
// ********** Routine that pre-calculates Kernels for Convolution **********************
// ************************************************************************************
myfloat_t* localMap;
localMap = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
RefMapsFFT = new mycomplex_t[ntotRefMap * param.FFTMapSize];
mycomplex_t* localout;
localout = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
#pragma omp parallel for
for (int iRefMap = 0; iRefMap < ntotRefMap ; iRefMap++)
{
const int num = omp_get_thread_num();
myfloat_t* localMap = param.fft_scratch_real[num];
mycomplex_t* localout = param.fft_scratch_complex[num];
//Assigning localMap values to padded Map
for(int i = 0; i < param.param_device.NumberPixels; i++)
{
......@@ -226,7 +228,7 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param)
//Calling FFT_Forward
myfftw_execute_dft_r2c(param.fft_plan_r2c_forward, localMap, localout);
// Normalizing and saving the Reference CTFs
//Saving the Reference CTFs (RefMap array possibly has incorrect alignment, so we copy here. Stupid but in fact does not matter.)
mycomplex_t* RefMap = &RefMapsFFT[iRefMap * param.FFTMapSize];
for(int i = 0; i < param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D ; i++ )
{
......@@ -235,9 +237,6 @@ int bioem_RefMap::PreCalculateMapsFFT(bioem_param& param)
}
}
myfftw_free(localMap);
myfftw_free(localout);
return(0);
}
......@@ -250,11 +249,11 @@ int bioem_RefMap::precalculate(bioem_param& param, bioem& bio)
sum_RefMap = (myfloat_t*) mallocchk(sizeof(myfloat_t) * ntotRefMap);
sumsquare_RefMap = (myfloat_t*) mallocchk(sizeof(myfloat_t) * ntotRefMap);
myfloat_t sum, sumsquare;
//Precalculating cross-correlations of maps
#pragma omp parallel for
for (int iRefMap = 0; iRefMap < ntotRefMap ; iRefMap++)
{
myfloat_t sum, sumsquare;
bio.calcross_cor(getmap(iRefMap), sum, sumsquare);
//Storing Crosscorrelations in Map class
sum_RefMap[iRefMap] = sum;
......
......@@ -15,6 +15,10 @@
#include <math.h>
#include <fftw3.h>
#ifdef WITH_OPENMP
#include <omp.h>
#endif
#include "param.h"
#include "map.h"
......@@ -295,6 +299,20 @@ void bioem_param::PrepareFFTs()
myfftw_free(tmp_map);
myfftw_free(tmp_map2);
const int count = omp_get_max_threads();
fft_scratch_complex = new mycomplex_t*[count];
fft_scratch_real = new myfloat_t*[count];
#pragma omp parallel
{
#pragma omp critical
{
const int i = omp_get_thread_num();
fft_scratch_complex[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param_device.NumberPixels * param_device.NumberFFTPixels1D);
fft_scratch_real[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param_device.NumberPixels * param_device.NumberPixels);
}
}
fft_plans_created = 1;
}
......@@ -302,6 +320,15 @@ void bioem_param::releaseFFTPlans()
{
if (fft_plans_created)
{
const int count = omp_get_max_threads();
for (int i = 0;i < count;i++)
{
myfftw_free(fft_scratch_complex[i]);
myfftw_free(fft_scratch_real[i]);
}
delete[] fft_scratch_complex;
delete[] fft_scratch_real;
myfftw_destroy_plan(fft_plan_c2c_forward);
myfftw_destroy_plan(fft_plan_c2c_backward);
myfftw_destroy_plan(fft_plan_r2c_forward);
......@@ -343,7 +370,6 @@ int bioem_param::CalculateGridsParam() //TO DO FOR QUATERNIONS
}
nTotGridAngles = n;
// ********** Calculating normalized volumen element *********
param_device.volu = grid_alpha * grid_alpha * cos_grid_beta * (myfloat_t) param_device.GridSpaceCenter * pixelSize * (myfloat_t) param_device.GridSpaceCenter * pixelSize
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment