Commit f4d2e710 authored by David Rohr's avatar David Rohr
Browse files

numa aware allocation of FFT scratch space

parent 2f7652fb
......@@ -319,10 +319,14 @@ int bioem::run()
const int count = omp_get_max_threads();
localCCT = new mycomplex_t*[count];
lCC = new myfloat_t*[count];
for (int i = 0;i < count;i++)
#pragma omp parallel
{
localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
#pragma omp critical
{
const int i = omp_get_thread_num();
localCCT[i] = (mycomplex_t *) myfftw_malloc(sizeof(mycomplex_t) * param.param_device.NumberPixels * param.param_device.NumberFFTPixels1D);
lCC[i] = (myfloat_t *) myfftw_malloc(sizeof(myfloat_t) * param.param_device.NumberPixels * param.param_device.NumberPixels);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment