Commit 9ee752c2 authored by Philipp Arras's avatar Philipp Arras
Browse files

Add atomics

parent 7668801b
Pipeline #104511 passed with stage
in 6 minutes and 24 seconds
......@@ -54,7 +54,8 @@ void vis2dirty(const ducc0::mav<double, 2> &uvw,
// Gridding
size_t supp{kernel.support()};
ducc0::mav<complex<T>, 2> grid{{nx_padded, ny_padded}};
ducc0::mav<T, 2> gridre{{nx_padded, ny_padded}};
ducc0::mav<T, 2> gridim{{nx_padded, ny_padded}};
////////////////////////////////////////////////////////////////////////////
sycl::queue q{sycl::default_selector()};
......@@ -62,20 +63,23 @@ void vis2dirty(const ducc0::mav<double, 2> &uvw,
MR_assert(uvw.contiguous());
MR_assert(freq.contiguous());
MR_assert(vis.contiguous());
MR_assert(grid.contiguous());
MR_assert(gridre.contiguous());
MR_assert(gridim.contiguous());
sycl::buffer<double, 2> bufuvw{uvw.cdata(), sycl::range<2>(nrow, 3)};
sycl::buffer<double, 1> buffreq{freq.cdata(), sycl::range<1>(freq.size())};
// QUESTION If replace vdata -> cdata, the computation silently fails
sycl::buffer<complex<T>, 1> bufvis{ vis.cdata(),
sycl::range<1>(nrow*nchan)};
sycl::buffer<complex<T>, 2> bufgrid{grid.vdata(),
sycl::range<2>(nx_padded, ny_padded)};
sycl::buffer<T, 2> bufgridre{gridre.vdata(),
sycl::range<2>(nx_padded, ny_padded)};
sycl::buffer<T, 2> bufgridim{gridim.vdata(),
sycl::range<2>(nx_padded, ny_padded)};
q.submit([&](sycl::handler &cgh){
auto accuvw{bufuvw.get_access<sycl::access::mode::read>(cgh)};
auto accfreq{buffreq.get_access<sycl::access::mode::read>(cgh)};
auto accvis{bufvis.template get_access<sycl::access::mode::read>(cgh)};
// FIXME Use atomics somehow
auto accgrid{bufgrid.template get_access<sycl::access::mode::write>(cgh)};
auto accgridre{bufgridre.template get_access<sycl::access::mode::read_write>(cgh)};
auto accgridim{bufgridim.template get_access<sycl::access::mode::read_write>(cgh)};
cgh.parallel_for(sycl::range<1>(nrow*nchan), [=](sycl::item<1> item){
const size_t i{item.get_linear_id()};
......@@ -100,8 +104,10 @@ void vis2dirty(const ducc0::mav<double, 2> &uvw,
for (size_t a=0, indx=xle2; a<supp; ++a, indx = (indx+1==nx_padded)? 0 : indx+1) {
auto kernelx = kernel.eval(a - ratposx + xle);
for (size_t b=0, indy=yle2; b<supp; ++b, indy = (indy+1==ny_padded)? 0 : indy+1) {
// FIXME Use atomics somehow
accgrid[indx][indy] = accgrid[indx][indy] + accvis[i] * kernelx*kernely[b];
sycl::atomic_ref<T, sycl::memory_order::relaxed, sycl::memory_scope::device> re(accgridre[indx][indy]);
sycl::atomic_ref<T, sycl::memory_order::relaxed, sycl::memory_scope::device> im(accgridim[indx][indy]);
re += (accvis[i]*kernelx*kernely[b]).real();
im += (accvis[i]*kernelx*kernely[b]).imag();
}
}
// /Accumulate from relevant patch
......@@ -111,6 +117,15 @@ void vis2dirty(const ducc0::mav<double, 2> &uvw,
} // /Device buffer scope
////////////////////////////////////////////////////////////////////////////
// FIXME The grid is now allocated twice
ducc0::mav<complex<T>, 2> grid{{nx_padded, ny_padded}};
for (size_t i=0; i<nx_padded; ++i){
for (size_t j=0; j<ny_padded; ++j){
const complex<T> val{gridre.c(i, j), gridim.c(i, j)};
grid.v(i, j) = val;
}
}
// FFT
{
ducc0::fmav<complex<T>> fgrid{grid};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment