Commit a0474f31 authored by Luka Stanisic's avatar Luka Stanisic
Browse files

second algorithm for autotuning

parent 7a5cfaf6
...@@ -570,7 +570,6 @@ int bioem::run() ...@@ -570,7 +570,6 @@ int bioem::run()
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV); createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank); if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);
if (Autotuning && !stopTuning) timer.ResetStart(); if (Autotuning && !stopTuning) timer.ResetStart();
if (DebugOutput >= 2) timer.ResetStart(); if (DebugOutput >= 2) timer.ResetStart();
myfloat_t amp,pha,env; myfloat_t amp,pha,env;
...@@ -597,11 +596,11 @@ int bioem::run() ...@@ -597,11 +596,11 @@ int bioem::run()
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank); printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
} }
if (Autotuning && !stopTuning && (iConv % 5 == 4)) if (Autotuning == 1 && !stopTuning && (iConv % 5 == 4))
{ {
if (compTime == 0.) compTime = timer.GetCurrentElapsedTime(); if (compTime == 0.) compTime = timer.GetCurrentElapsedTime();
if (best_time==0 || compTime < best_time) if (best_time == 0 || compTime < best_time)
{ {
best_time = compTime; best_time = compTime;
best_workload = workload; best_workload = workload;
...@@ -613,10 +612,29 @@ int bioem::run() ...@@ -613,10 +612,29 @@ int bioem::run()
stopTuning=true; stopTuning=true;
workload=best_workload; workload=best_workload;
} }
rebalance(workload);
}
if (Autotuning == 2 && !stopTuning && (iConv == 3 || iConv == 7))
{
if (compTime == 0.) compTime = timer.GetCurrentElapsedTime();
deviceFinishRun(); if (iConv == 3)
{
best_time = compTime;
workload = 1;
}
else if (iConv == 7)
{
workload = (int) 100 * ( compTime / (best_time+compTime) );
if (DebugOutput >= 1)
{
printf("\t\tComparison on GPU only time: %.6f\n", best_time);
printf("\t\tComparison on CPU only time: %.6f\n", compTime);
printf("\t\tOptimal GPU workload: %d%%\n", workload);
}
stopTuning=true;
}
rebalance(workload); rebalance(workload);
deviceStartRun();
} }
} }
if (DebugOutput >= 1) if (DebugOutput >= 1)
......
...@@ -689,15 +689,19 @@ void bioem_cuda::free_device_host(void* ptr) ...@@ -689,15 +689,19 @@ void bioem_cuda::free_device_host(void* ptr)
void bioem_cuda::rebalance(int workload) void bioem_cuda::rebalance(int workload)
{ {
if ((workload < 0) || (workload > 100) || (workload > GPUWorkload)) return; if ((workload < 0) || (workload > 100) || (workload == GPUWorkload)) return;
if (DebugOutput >= 1) deviceFinishRun();
{
printf("\t\tSetting GPU workload to %d%%\n", workload); if (DebugOutput >= 2)
} {
printf("\t\tSetting GPU workload to %d%%\n", workload);
}
GPUWorkload = workload; GPUWorkload = workload;
maxRef = (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100; maxRef = (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100;
deviceStartRun();
} }
bioem* bioem_cuda_create() bioem* bioem_cuda_create()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment