Commit 254d53db authored by Luka Stanisic's avatar Luka Stanisic
Browse files

nvtx+summary: improving nvtx by tracing initialization, which also fixes...

nvtx+summary: improving nvtx by tracing initialization, which also fixes potential bogus first measurement of Projection for the summary
parent a53afc0b
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff }; const uint32_t colors[] = { 0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff };
const int num_colors = sizeof(colors)/sizeof(colors[0]); const int num_colors = sizeof(colors)/sizeof(colors[0]);
enum myColor { COLOR_PROJECTION, COLOR_CONVOLUTION, COLOR_COMPARISON, COLOR_WORKLOAD }; enum myColor { COLOR_PROJECTION, COLOR_CONVOLUTION, COLOR_COMPARISON, COLOR_WORKLOAD, COLOR_INIT };
// Projection number is stored in category attribute // Projection number is stored in category attribute
// Convolution number is stored in payload attribute // Convolution number is stored in payload attribute
...@@ -474,7 +474,7 @@ int bioem::run() ...@@ -474,7 +474,7 @@ int bioem::run()
// **** If we want to control the number of threads -> omp_set_num_threads(XX); ****** // **** If we want to control the number of threads -> omp_set_num_threads(XX); ******
// ****************** Declarying class of Probability Pointer ************************* // ****************** Declarying class of Probability Pointer *************************
cuda_custom_timeslot("Initialization", -1, -1, COLOR_INIT);
if (mpi_rank == 0) printf("\tInitializing Probabilities\n"); if (mpi_rank == 0) printf("\tInitializing Probabilities\n");
// Contros for MPI // Contros for MPI
...@@ -543,6 +543,7 @@ int bioem::run() ...@@ -543,6 +543,7 @@ int bioem::run()
conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D); conv_mapFFT = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels); if (!FFTAlgo) conv_map = (myfloat_t*) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
cuda_custom_timeslot_end; //Ending initialization
HighResTimer timer, timer2; HighResTimer timer, timer2;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment