Commit a0474f31 authored by Luka Stanisic's avatar Luka Stanisic

second algorithm for autotuning

parent 7a5cfaf6
......@@ -570,7 +570,6 @@ int bioem::run()
createConvolutedProjectionMap(iOrient, iConv, proj_mapFFT, conv_map, conv_mapFFT, sumCONV, sumsquareCONV);
if (DebugOutput >= 2) printf("\t\tTime Convolution %d %d: %f (rank %d)\n", iOrient, iConv, timer.GetCurrentElapsedTime(), mpi_rank);
if (Autotuning && !stopTuning) timer.ResetStart();
if (DebugOutput >= 2) timer.ResetStart();
myfloat_t amp,pha,env;
......@@ -597,11 +596,11 @@ int bioem::run()
printf("\t\tTime Comparison %d %d: %f sec (%f GFlops, %f GB/s (cached), %f GB/s) (rank %d)\n", iOrient, iConv, compTime, nFlops / 1000000000., nGBs / 1000000000., nGBs2 / 1000000000., mpi_rank);
}
if (Autotuning && !stopTuning && (iConv % 5 == 4))
if (Autotuning == 1 && !stopTuning && (iConv % 5 == 4))
{
if (compTime == 0.) compTime = timer.GetCurrentElapsedTime();
if (best_time==0 || compTime < best_time)
if (best_time == 0 || compTime < best_time)
{
best_time = compTime;
best_workload = workload;
......@@ -613,10 +612,29 @@ int bioem::run()
stopTuning=true;
workload=best_workload;
}
rebalance(workload);
}
if (Autotuning == 2 && !stopTuning && (iConv == 3 || iConv == 7))
{
if (compTime == 0.) compTime = timer.GetCurrentElapsedTime();
deviceFinishRun();
if (iConv == 3)
{
best_time = compTime;
workload = 1;
}
else if (iConv == 7)
{
workload = (int) 100 * ( compTime / (best_time+compTime) );
if (DebugOutput >= 1)
{
printf("\t\tComparison on GPU only time: %.6f\n", best_time);
printf("\t\tComparison on CPU only time: %.6f\n", compTime);
printf("\t\tOptimal GPU workload: %d%%\n", workload);
}
stopTuning=true;
}
rebalance(workload);
deviceStartRun();
}
}
if (DebugOutput >= 1)
......
......@@ -689,15 +689,19 @@ void bioem_cuda::free_device_host(void* ptr)
void bioem_cuda::rebalance(int workload)
{
if ((workload < 0) || (workload > 100) || (workload > GPUWorkload)) return;
if (DebugOutput >= 1)
{
printf("\t\tSetting GPU workload to %d%%\n", workload);
}
if ((workload < 0) || (workload > 100) || (workload == GPUWorkload)) return;
deviceFinishRun();
if (DebugOutput >= 2)
{
printf("\t\tSetting GPU workload to %d%%\n", workload);
}
GPUWorkload = workload;
maxRef = (size_t) RefMap.ntotRefMap * (size_t) GPUWorkload / 100;
deviceStartRun();
}
bioem* bioem_cuda_create()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment