Commit 6a6b4293 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

faster real FFTs

parent 82d33ce9
......@@ -131,7 +131,14 @@ template<typename T, size_t ndim> class mav
};
template<typename T, size_t ndim> using const_mav = mav<const T, ndim>;
template<typename T, size_t ndim> const_mav<T, ndim> cmav (const mav<T, ndim> &mav)
{ return const_mav<T, ndim>(mav.data(), mav.shape()); }
template<typename T, size_t ndim> const_mav<T, ndim> nullmav()
{
array<size_t,ndim> shp;
shp.fill(0);
return const_mav<T, ndim>(nullptr, shp);
}
template<typename T, size_t ndim> class tmpStorage
{
private:
......@@ -148,7 +155,8 @@ template<typename T, size_t ndim> class tmpStorage
public:
tmpStorage(const array<size_t,ndim> &shp)
: d(prod(shp)), mav_(d.data(), shp) {}
mav<T,ndim> &getMav() { return mav_; };
mav<T,ndim> &getMav() { return mav_; }
const_mav<T,ndim> getCmav() { return cmav(mav_); }
void fill(const T & val)
{ std::fill(d.begin(), d.end(), val); }
};
......@@ -1157,8 +1165,6 @@ template<typename T, typename T2> void apply_wcorr(const GridderConfig<T> &gconf
template<typename T, typename Serv> void x2dirty(
const GridderConfig<T> &gconf, const Serv &srv, const mav<T,2> &dirty, size_t verbosity=0)
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
auto nu=gconf.Nu();
auto nv=gconf.Nv();
......@@ -1171,21 +1177,16 @@ template<typename T, typename Serv> void x2dirty(
auto grid=grid_.getMav();
grid_.fill(0.);
x2grid_c(gconf, srv, grid);
tmpStorage<complex<T>,2> cdirty_({nx_dirty,ny_dirty});
auto cdirty=cdirty_.getMav();
// FIXME: this could use symmetries to accelerate the FFT
gconf.grid2dirty_c(grid, cdirty);
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
dirty(i,j) = cdirty(i,j).real();
tmpStorage<T,2> rgrid_({nu, nv});
auto rgrid=rgrid_.getMav();
complex2hartley(cmav(grid), rgrid, gconf.Nthreads());
gconf.grid2dirty(rgrid, dirty);
}
template<typename T, typename Serv> void dirty2x(
const GridderConfig<T> &gconf, const const_mav<T,2> &dirty,
const Serv &srv, size_t verbosity=0)
{
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
auto nu=gconf.Nu();
auto nv=gconf.Nv();
size_t nvis = srv.Nvis();
......@@ -1198,17 +1199,13 @@ template<typename T, typename Serv> void dirty2x(
for (size_t i=0; i<nvis; ++i)
srv.setVis(i, 0.);
tmpStorage<complex<T>,2> cdirty_({nx_dirty,ny_dirty});
auto cdirty=cdirty_.getMav();
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
cdirty(i,j) = dirty(i,j);
tmpStorage<complex<T>,2> grid_({nu,nv});
tmpStorage<T,2> grid_({nu,nv});
auto grid=grid_.getMav();
// FIXME: this could use symmetries to accelerate the FFT
gconf.dirty2grid_c(cdirty, grid);
grid2x_c(gconf, const_mav<complex<T>,2>(grid.data(),{nu,nv}), srv);
gconf.dirty2grid(dirty, grid);
tmpStorage<complex<T>,2> grid2_({nu,nv});
auto grid2=grid2_.getMav();
hartley2complex(cmav(grid), grid2, gconf.Nthreads());
grid2x_c(gconf, cmav(grid2), srv);
}
template<typename T, typename Serv> void wstack_common(
......@@ -1320,7 +1317,7 @@ template<typename T, typename Serv> void x2dirty_wstack(
idx_loc[i] = srv.getIdx(ipart);
}
grid.fill(0);
vis2grid_c(srv.getBaselines(), gconf, const_mav<uint32_t,1>(idx_loc), const_mav<complex<T>,1>(vis_loc), grid, const_mav<T,1>(nullptr,{0}), wcur, T(dw));
vis2grid_c(srv.getBaselines(), gconf, cmav(idx_loc), cmav(vis_loc), grid, nullmav<T,1>(), wcur, T(dw));
gconf.grid2dirty_c(grid, tdirty);
gconf.apply_wscreen(tdirty, tdirty, wcur, false);
#pragma omp parallel for num_threads(nthreads)
......@@ -1459,7 +1456,7 @@ template<typename T, typename Serv> void dirty2x_wstack(
tdirty2(i,j) = tdirty(i,j);
gconf.apply_wscreen(tdirty2, tdirty2, wcur, true);
gconf.dirty2grid_c(tdirty2, grid);
grid2vis_c(srv.getBaselines(), gconf, const_mav<uint32_t,1>(idx_loc), const_mav<complex<T>,2>(grid), vis_loc, const_mav<T,1>(nullptr,{0}), wcur, T(dw));
grid2vis_c(srv.getBaselines(), gconf, cmav(idx_loc), cmav(grid), vis_loc, const_mav<T,1>(nullptr,{0}), wcur, T(dw));
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment