Commit a53afc0b authored by Luka Stanisic's avatar Luka Stanisic

additional statistical summary of the execution together with the improved overall output

parent bad8337a
......@@ -103,7 +103,7 @@ ostream& operator<<(ostream& os, const vector<T>& v)
bioem::bioem()
{
FFTAlgo = getenv("FFTALGO") == NULL ? 1 : atoi(getenv("FFTALGO"));
DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 2 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
DebugOutput = getenv("BIOEM_DEBUG_OUTPUT") == NULL ? 0 : atoi(getenv("BIOEM_DEBUG_OUTPUT"));
nProjectionsAtOnce = getenv("BIOEM_PROJECTIONS_AT_ONCE") == NULL ? 1 : atoi(getenv("BIOEM_PROJECTIONS_AT_ONCE"));
Autotuning = false;
}
......@@ -562,6 +562,9 @@ int bioem::run()
int iOrientEnd = (int) ((long long int) (mpi_rank + 1) * param.nTotGridAngles / mpi_size);
if (iOrientEnd > param.nTotGridAngles) iOrientEnd = param.nTotGridAngles;
/* Vectors for computing statistic on different parts of the code */
TimeStat ts((iOrientEnd - iOrientStart), param.nTotCTFs);
if (DebugOutput >= 1) ts.InitTimeStat(4);
// **************************Loop Over orientations***************************************
......@@ -569,8 +572,11 @@ int bioem::run()
{
// ***************************************************************************************
// ***** Creating Projection for given orientation and transforming to Fourier space *****
if (DebugOutput >= 1) timer2.ResetStart();
if (DebugOutput >= 2) timer.ResetStart();
if (DebugOutput >= 1)
{
timer2.ResetStart();
timer.ResetStart();
}
int iTmpEnd = std::min(iOrientEnd, iOrientAtOnce + nProjectionsAtOnce);
// **************************Parallel orientations for projections at once***************
......@@ -580,8 +586,12 @@ int bioem::run()
{
createProjection(iOrient, &proj_mapsFFT[(iOrient - iOrientAtOnce) * ProjMapSize]);
}
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, timer.GetCurrentElapsedTime(), mpi_rank);
if (DebugOutput >= 1)
{
ts.time = timer.GetCurrentElapsedTime();
ts.Add(TS_PROJECTION);
if (DebugOutput >= 2) printf("\tTime Projection %d: %f (rank %d)\n", iOrientAtOnce, ts.time, mpi_rank);
}
/* Recalibrate if needed */
if (Autotuning && ((iOrientAtOnce - iOrientStart) % RECALIB_FACTOR == 0) && ((iOrientEnd - iOrientAtOnce) > RECALIB_FACTOR) && (iOrientAtOnce != iOrientStart))
{
......@@ -599,52 +609,69 @@ int bioem::run()
for (int iConv = 0; iConv < param.nTotCTFs; iConv++)
{
// *** Calculating convolutions of projection map and crosscorrelations ***
if (DebugOutput >= 2) timer.ResetStart();
if (DebugOutput >= 1) timer.ResetStart();
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);
if (DebugOutput >= 1)
{
ts.time = timer.GetCurrentElapsedTime();
ts.Add(TS_CONVOLUTION);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, ts.time, mpi_rank);
}
if ((DebugOutput >= 2) || (Autotuning && aut.Needed(iConv))) timer.ResetStart();
myfloat_t amp,pha,env;
if ((DebugOutput >= 1) || (Autotuning && aut.Needed(iConv))) timer.ResetStart();
myfloat_t amp,pha,env;
amp=param.CtfParam[iConv].pos[0];
pha=param.CtfParam[iConv].pos[1];
env=param.CtfParam[iConv].pos[2];
amp=param.CtfParam[iConv].pos[0];
pha=param.CtfParam[iConv].pos[1];
env=param.CtfParam[iConv].pos[2];
// ******************Internal loop over Reference images CUDA or OpenMP******************
// *** Comparing each calculated convoluted map with all experimental maps ***
compareRefMaps(iOrient, iConv, amp, pha, env, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
double compTime=0.;
ts.time = 0.;
if (DebugOutput >= 1)
{
ts.time = timer.GetCurrentElapsedTime();
ts.Add(TS_COMPARISON);
}
if (DebugOutput >= 2)
{
compTime = timer.GetCurrentElapsedTime();
const int nShifts = 2 * param.param_device.maxDisplaceCenter / param.param_device.GridSpaceCenter + 1;
const double nFlops = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / compTime;
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 5. + 25.) / ts.time;
const double nGBs = (double) RefMap.ntotRefMap * (double) nShifts * (double) nShifts *
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / compTime;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / compTime;
(((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * ((double) param.param_device.NumberPixels - (double) param.param_device.maxDisplaceCenter / 2.) * 2. + 8.) * (double) sizeof(myfloat_t) / ts.time;
const double nGBs2 = (double) RefMap.ntotRefMap * ((double) param.param_device.NumberPixels * (double) param.param_device.NumberPixels + 8.) * (double) sizeof(myfloat_t) / ts.time;
if (Autotuning) printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., aut.Workload(), mpi_rank);
else printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
if (Autotuning) printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s, with GPU workload %d%%) (rank %d)\n", iOrient, iConv, ts.time, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., aut.Workload(), mpi_rank);
else printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, ts.time, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
}
if (Autotuning && aut.Needed(iConv))
{
if (compTime == 0.) compTime = timer.GetCurrentElapsedTime();
aut.Tune(compTime);
if (aut.Finished() && DebugOutput >= 2) printf("\t\tOptimal GPU workload %d%% (rank %d)\n", aut.Workload(), mpi_rank);
if (ts.time == 0.) ts.time = timer.GetCurrentElapsedTime();
aut.Tune(ts.time);
if (aut.Finished() && DebugOutput >= 1) printf("\tOptimal GPU workload %d%% (rank %d)\n", aut.Workload(), mpi_rank);
rebalanceWrapper(aut.Workload());
}
}
if (DebugOutput >= 1)
{
printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, timer2.GetCurrentElapsedTime(), mpi_rank);
ts.time = timer2.GetCurrentElapsedTime();
ts.Add(TS_TPROJECTION);
printf("\tTotal time for projection %d: %f (rank %d)\n", iOrient, ts.time, mpi_rank);
timer2.ResetStart();
}
}
}
/* Statistical summary on different parts of the code */
if (DebugOutput >= 1)
{
ts.PrintTimeStat(mpi_rank);
ts.EmptyTimeStat();
}
//deallocating fftw_complex vector
myfftw_free(proj_mapsFFT);
myfftw_free(conv_mapFFT);
......
......@@ -11,6 +11,15 @@
#ifndef TIMER_H
#define TIMER_H
#include <stdio.h>
#include <string>
#include <numeric>
#include <vector>
#include <algorithm>
#include <cmath>
using namespace std;
class HighResTimer {
public:
......@@ -30,6 +39,40 @@ private:
double ElapsedTime;
double StartTime;
int running;
};
};
/* Structure for saving a vector of timings */
typedef struct _TimeLog {
vector<double> vec;
double sum;
double stdev;
string name;
}TimeLog;
enum TS_NAMES{TS_TPROJECTION, TS_PROJECTION, TS_CONVOLUTION, TS_COMPARISON};
/* Structure for saving timings of different parts of code and doing basic statistics on them */
class TimeStat {
public:
TimeStat(int Angles, int CTFs) : time(0),tl(NULL) {angles = Angles; ctfs = CTFs;};
~TimeStat() {EmptyTimeStat();};
void InitTimeLog(int log, int size, string s);
void InitTimeStat(int nlogs);
void EmptyTimeStat();
void inline Add(int log) {tl[log].vec.push_back(time);};
void ComputeTimeStat();
void PrintTimeStat(int mpi_rank);
/* Variable for storing times during the execution */
double time;
private:
TimeLog* tl;
int total_logs;
int angles;
int ctfs;
};
#endif
......@@ -352,7 +352,7 @@ void bioem_Probability::init(size_t maps, size_t angles, size_t cc, bioem& bio)
nAngles = angles;
nCC = cc;
ptr = bio.malloc_device_host(get_size(maps, angles, cc, bio.param.param_device.writeAngles, bio.param.param_device.writeCC));
cout << "Allocation #Maps " << maps << " #Angles " << angles << " #cross.cor " << cc << "\n";
if (bio.DebugOutput >= 1) cout << "Allocation #Maps " << maps << " #Angles " << angles << " #cross.cor " << cc << "\n";
//<< " == " << get_size(maps, angles, cc, bio.param.param_device.writeAngles, bio.param.param_device.writeCC)<< "\n";
set_pointers();
}
......
......@@ -90,4 +90,59 @@ double HighResTimer::GetFrequency()
#endif
}
double HighResTimer::Frequency = HighResTimer::GetFrequency();
\ No newline at end of file
double HighResTimer::Frequency = HighResTimer::GetFrequency();
void TimeStat::InitTimeLog(int log, int size, string s)
{
tl[log].vec.reserve(size);
tl[log].name = s;
tl[log].sum = 0.;
tl[log].stdev = 0.;
}
void TimeStat::InitTimeStat(int nlogs)
{
total_logs = nlogs;
tl = new TimeLog[total_logs];
InitTimeLog(TS_TPROJECTION, angles, "Total time of projection");
InitTimeLog(TS_PROJECTION, angles, "Projection");
InitTimeLog(TS_CONVOLUTION, angles * ctfs, "Convolution");
InitTimeLog(TS_COMPARISON, angles * ctfs, "Comparison");
}
void TimeStat::EmptyTimeStat()
{
if (tl == NULL) return;
delete [ ] tl;
tl = NULL;
time = 0.;
}
void TimeStat::ComputeTimeStat()
{
double mean, sq_sum;
vector<double> diff;
for (int i = 0; i < total_logs; i++)
{
tl[i].sum = std::accumulate(tl[i].vec.begin(), tl[i].vec.end(), 0.0);
mean = tl[i].sum / tl[i].vec.size();
diff.resize(tl[i].vec.size());
std::transform(tl[i].vec.begin(), tl[i].vec.end(), diff.begin(), std::bind2nd(std::minus<double>(), mean));
sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0);
tl[i].stdev = std::sqrt(sq_sum / tl[i].vec.size());
}
}
void TimeStat::PrintTimeStat(int mpi_rank)
{
ComputeTimeStat();
for (int i = 0; i < total_logs; i++)
{
printf("SUMMARY -> %s: Total %f sec; Mean %f sec; Std.Dev. %f (rank %d)\n", tl[i].name.c_str(), tl[i].sum, tl[i].sum / tl[i].vec.size(), tl[i].stdev, mpi_rank);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment