Commit 254d53db authored by Luka Stanisic's avatar Luka Stanisic

nvtx+summary: improving nvtx by tracing initialization, which also fixes...

nvtx+summary: improving nvtx by tracing initialization, which also fixes potential bogus first measurement of Projection for the summary
parent a53afc0b
......@@ -53,7 +53,7 @@
const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
const int num_colors = sizeof(colors)/sizeof(colors[0]);
enum myColor { COLOR_PROJECTION, COLOR_CONVOLUTION, COLOR_COMPARISON, COLOR_WORKLOAD };
enum myColor { COLOR_PROJECTION, COLOR_CONVOLUTION, COLOR_COMPARISON, COLOR_WORKLOAD, COLOR_INIT };
// Projection number is stored in category attribute
// Convolution number is stored in payload attribute
......@@ -474,7 +474,7 @@ int bioem::run()
// **** If we want to control the number of threads -> omp_set_num_threads(XX); ******
// ****************** Declarying class of Probability Pointer *************************
cuda_custom_timeslot("Initialization", -1, -1, COLOR_INIT);
if (mpi_rank == 0) printf("\tInitializing Probabilities\n");
// Contros for MPI
......@@ -542,7 +542,8 @@ int bioem::run()
proj_mapsFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * ProjMapSize * nProjectionsAtOnce);
conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
cuda_custom_timeslot_end; //Ending initialization
HighResTimer timer, timer2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment