Planned maintenance on Wednesday, 2021-01-20, 17:00-18:00. Expect some interruptions during that time

Commit 11e3210f authored by Martin Reinecke's avatar Martin Reinecke

add nifty_gridder

parent 9c95b942
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H
/*
* This file is part of nifty_gridder.
*
* nifty_gridder is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* nifty_gridder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with nifty_gridder; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 2019 Max-Planck-Society
Author: Martin Reinecke */
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <array>
#include "mr_util/error_handling.h"
#include "mr_util/fft.h"
#include "mr_util/threading.h"
#include "mr_util/useful_macros.h"
#include "mr_util/mav.h"
#include "mr_util/gl_integrator.h"
#if defined(__GNUC__)
#define ALIGNED(align) __attribute__ ((aligned(align)))
#else
#define ALIGNED(align)
#endif
namespace gridder {
namespace detail {
using namespace std;
using namespace mr;
template<size_t ndim> void checkShape
(const array<size_t, ndim> &shp1, const array<size_t, ndim> &shp2)
{
for (size_t i=0; i<ndim; ++i)
MR_assert(shp1[i]==shp2[i], "shape mismatch");
}
template<typename T> inline T fmod1 (T v)
{ return v-floor(v); }
template<typename T, size_t ndim> class tmpStorage
{
private:
vector<T> d;
mav<T,ndim> mav_;
static size_t prod(const array<size_t,ndim> &shp)
{
size_t res=1;
for (auto v: shp) res*=v;
return res;
}
public:
tmpStorage(const array<size_t,ndim> &shp)
: d(prod(shp)), mav_(d.data(), shp) {}
mav<T,ndim> &getMav() { return mav_; }
const_mav<T,ndim> getCmav() { return cmav(mav_); }
void fill(const T & val)
{ std::fill(d.begin(), d.end(), val); }
};
//
// Start of real gridder functionality
//
template<typename T> void complex2hartley
(const const_mav<complex<T>, 2> &grid, const mav<T,2> &grid2, size_t nthreads)
{
checkShape(grid.shape(), grid2.shape());
size_t nu=grid.shape(0), nv=grid.shape(1);
execStatic(nu, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
{
size_t xu = (u==0) ? 0 : nu-u;
for (size_t v=0; v<nv; ++v)
{
size_t xv = (v==0) ? 0 : nv-v;
grid2(u,v) += T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
grid(xu,xv).real()-grid(xu,xv).imag());
}
}
});
}
template<typename T> void hartley2complex
(const const_mav<T,2> &grid, const mav<complex<T>,2> &grid2, size_t nthreads)
{
checkShape(grid.shape(), grid2.shape());
size_t nu=grid.shape(0), nv=grid.shape(1);
execStatic(nu, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
{
size_t xu = (u==0) ? 0 : nu-u;
for (size_t v=0; v<nv; ++v)
{
size_t xv = (v==0) ? 0 : nv-v;
T v1 = T(0.5)*grid( u, v);
T v2 = T(0.5)*grid(xu,xv);
grid2(u,v) = std::complex<T>(v1+v2, v1-v2);
}
}
});
}
template<typename T> void hartley2_2D(const const_mav<T,2> &in,
const mav<T,2> &out, size_t nthreads)
{
checkShape(in.shape(), out.shape());
size_t nu=in.shape(0), nv=in.shape(1);
ptrdiff_t sz=ptrdiff_t(sizeof(T));
stride_t stri{sz*in.stride(0), sz*in.stride(1)};
stride_t stro{sz*out.stride(0), sz*out.stride(1)};
auto d_i = in.data();
auto ptmp = out.data();
r2r_separable_hartley({nu, nv}, stri, stro, {0,1}, d_i, ptmp, T(1),
nthreads);
execStatic((nu+1)/2-1, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i<rng.hi+1; ++i)
for(size_t j=1; j<(nv+1)/2; ++j)
{
T a = ptmp[i*nv+j];
T b = ptmp[(nu-i)*nv+j];
T c = ptmp[i*nv+nv-j];
T d = ptmp[(nu-i)*nv+nv-j];
ptmp[i*nv+j] = T(0.5)*(a+b+c-d);
ptmp[(nu-i)*nv+j] = T(0.5)*(a+b+d-c);
ptmp[i*nv+nv-j] = T(0.5)*(a+c+d-b);
ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a);
}
});
}
class ES_Kernel
{
private:
static constexpr double pi = 3.141592653589793238462643383279502884197;
double beta;
int p;
vector<double> x, wgt, psi;
size_t supp;
public:
ES_Kernel(size_t supp_, double ofactor, size_t nthreads)
: beta(get_beta(supp_,ofactor)*supp_), p(int(1.5*supp_+2)), supp(supp_)
{
GL_Integrator integ(2*p,nthreads);
x = integ.coordsSymmetric();
wgt = integ.weightsSymmetric();
psi=x;
for (auto &v:psi)
v=operator()(v);
}
ES_Kernel(size_t supp_, size_t nthreads)
: ES_Kernel(supp_, 2., nthreads){}
double operator()(double v) const { return exp(beta*(sqrt(1.-v*v)-1.)); }
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
double corfac(double v) const
{
double tmp=0;
for (int i=0; i<p; ++i)
tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
return 2./(supp*tmp);
}
static double get_beta(size_t supp, double ofactor=2)
{
MR_assert((supp>=2) && (supp<=15), "unsupported support size");
if (ofactor>=2)
{
static const vector<double> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
MR_assert(supp<opt_beta.size(), "bad support size");
return opt_beta[supp];
}
if (ofactor>=1.2)
{
// empirical, but pretty accurate approximation
static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
auto x0 = 1./(2*ofactor);
auto bcstrength=1.+(x0-0.25)*2.5;
return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
}
MR_fail("oversampling factor is too small");
}
static size_t get_supp(double epsilon, double ofactor=2)
{
double epssq = epsilon*epsilon;
if (ofactor>=2)
{
static const vector<double> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
for (size_t i=2; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
MR_fail("requested epsilon too small - minimum is 1e-13");
}
if (ofactor>=1.2)
{
for (size_t w=2; w<16; ++w)
{
auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
if (epssq>estimate) return w;
}
MR_fail("requested epsilon too small");
}
MR_fail("oversampling factor is too small");
}
};
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
vector<double> correction_factors(size_t n, double ofactor, size_t nval, size_t supp,
size_t nthreads)
{
ES_Kernel kernel(supp, ofactor, nthreads);
vector<double> res(nval);
double xn = 1./n;
execStatic(nval, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
res[i] = kernel.corfac(i*xn);
});
return res;
}
using idx_t = uint32_t;
struct RowChan
{
idx_t row, chan;
};
struct UVW
{
double u, v, w;
UVW() {}
UVW(double u_, double v_, double w_) : u(u_), v(v_), w(w_) {}
UVW operator* (double fct) const
{ return UVW(u*fct, v*fct, w*fct); }
void Flip() { u=-u; v=-v; w=-w; }
bool FixW()
{
bool flip = w<0;
if (flip) Flip();
return flip;
}
};
class Baselines
{
protected:
vector<UVW> coord;
vector<double> f_over_c;
idx_t nrows, nchan;
idx_t shift, mask;
public:
template<typename T> Baselines(const const_mav<T,2> &coord_,
const const_mav<T,1> &freq, bool negate_v=false)
{
constexpr double speedOfLight = 299792458.;
MR_assert(coord_.shape(1)==3, "dimension mismatch");
auto hugeval = size_t(~(idx_t(0)));
MR_assert(coord_.shape(0)<hugeval, "too many entries in MS");
MR_assert(coord_.shape(1)<hugeval, "too many entries in MS");
MR_assert(coord_.size()<hugeval, "too many entries in MS");
nrows = coord_.shape(0);
nchan = freq.shape(0);
shift=0;
while((idx_t(1)<<shift)<nchan) ++shift;
mask=(idx_t(1)<<shift)-1;
MR_assert(nrows*(mask+1)<hugeval, "too many entries in MS");
f_over_c.resize(nchan);
for (size_t i=0; i<nchan; ++i)
{
MR_assert(freq[i]>0, "negative channel frequency encountered");
f_over_c[i] = freq(i)/speedOfLight;
}
coord.resize(nrows);
if (negate_v)
for (size_t i=0; i<coord.size(); ++i)
coord[i] = UVW(coord_(i,0), -coord_(i,1), coord_(i,2));
else
for (size_t i=0; i<coord.size(); ++i)
coord[i] = UVW(coord_(i,0), coord_(i,1), coord_(i,2));
}
RowChan getRowChan(idx_t index) const
{ return RowChan{index>>shift, index&mask}; }
UVW effectiveCoord(const RowChan &rc) const
{ return coord[rc.row]*f_over_c[rc.chan]; }
UVW effectiveCoord(idx_t index) const
{ return effectiveCoord(getRowChan(index)); }
size_t Nrows() const { return nrows; }
size_t Nchannels() const { return nchan; }
idx_t getIdx(idx_t irow, idx_t ichan) const
{ return ichan+(irow<<shift); }
};
class GridderConfig
{
protected:
size_t nx_dirty, ny_dirty, nu, nv;
double ofactor, eps, psx, psy;
size_t supp, nsafe;
double beta;
size_t nthreads;
double ushift, vshift;
int maxiu0, maxiv0;
template<typename T> complex<T> wscreen(T x, T y, T w, bool adjoint) const
{
constexpr T pi = T(3.141592653589793238462643383279502884197);
T tmp = 1-x-y;
if (tmp<=0) return 1; // no phase factor beyond the horizon
T nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1
T phase = 2*pi*w*nm1;
if (adjoint) phase *= -1;
return complex<T>(cos(phase), sin(phase));
}
public:
GridderConfig(size_t nxdirty, size_t nydirty, size_t nu_, size_t nv_,
double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
: nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
eps(epsilon),
psx(pixsize_x), psy(pixsize_y),
supp(ES_Kernel::get_supp(epsilon, ofactor)), nsafe((supp+1)/2),
beta(ES_Kernel::get_beta(supp, ofactor)*supp),
nthreads(nthreads_),
ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv),
maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp)
{
MR_assert(nu>=2*nsafe, "nu too small");
MR_assert(nv>=2*nsafe, "nv too small");
MR_assert((nx_dirty&1)==0, "nx_dirty must be even");
MR_assert((ny_dirty&1)==0, "ny_dirty must be even");
MR_assert((nu&1)==0, "nu must be even");
MR_assert((nv&1)==0, "nv must be even");
MR_assert(epsilon>0, "epsilon must be positive");
MR_assert(pixsize_x>0, "pixsize_x must be positive");
MR_assert(pixsize_y>0, "pixsize_y must be positive");
MR_assert(ofactor>=1.2, "oversampling factor smaller than 1.2");
}
GridderConfig(size_t nxdirty, size_t nydirty,
double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
: GridderConfig(nxdirty, nydirty, max<size_t>(30,2*nxdirty),
max<size_t>(30,2*nydirty), epsilon, pixsize_x,
pixsize_y, nthreads_) {}
size_t Nxdirty() const { return nx_dirty; }
size_t Nydirty() const { return ny_dirty; }
double Epsilon() const { return eps; }
double Pixsize_x() const { return psx; }
double Pixsize_y() const { return psy; }
size_t Nu() const { return nu; }
size_t Nv() const { return nv; }
size_t Supp() const { return supp; }
size_t Nsafe() const { return nsafe; }
double Beta() const { return beta; }
size_t Nthreads() const { return nthreads; }
double Ofactor() const{ return ofactor; }
template<typename T> void grid2dirty_post(const mav<T,2> &tmav,
const mav<T,2> &dirty) const
{
checkShape(dirty.shape(), {nx_dirty,ny_dirty});
auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
{
int icfu = abs(int(nx_dirty/2)-int(i));
for (size_t j=0; j<ny_dirty; ++j)
{
int icfv = abs(int(ny_dirty/2)-int(j));
size_t i2 = nu-nx_dirty/2+i;
if (i2>=nu) i2-=nu;
size_t j2 = nv-ny_dirty/2+j;
if (j2>=nv) j2-=nv;
// FIXME: for some reason g++ warns about double-to-float conversion
// here, even though there is an explicit cast...
dirty(i,j) = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]);
}
}
});
}
template<typename T> void grid2dirty_post2(
const mav<complex<T>,2> &tmav, const mav<T,2> &dirty, T w) const
{
checkShape(dirty.shape(), {nx_dirty,ny_dirty});
double x0 = -0.5*nx_dirty*psx,
y0 = -0.5*ny_dirty*psy;
execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
{
T fx = T(x0+i*psx);
fx *= fx;
for (size_t j=0; j<=ny_dirty/2; ++j)
{
T fy = T(y0+j*psy);
auto ws = wscreen(fx, fy*fy, w, true);
size_t ix = nu-nx_dirty/2+i;
if (ix>=nu) ix-=nu;
size_t jx = nv-ny_dirty/2+j;
if (jx>=nv) jx-=nv;
dirty(i,j) += (tmav(ix,jx)*ws).real(); // lower left
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
size_t ix2 = nu-nx_dirty/2+i2;
if (ix2>=nu) ix2-=nu;
size_t jx2 = nv-ny_dirty/2+j2;
if (jx2>=nv) jx2-=nv;
if ((i>0)&&(i<i2))
{
dirty(i2,j) += (tmav(ix2,jx)*ws).real(); // lower right
if ((j>0)&&(j<j2))
dirty(i2,j2) += (tmav(ix2,jx2)*ws).real(); // upper right
}
if ((j>0)&&(j<j2))
dirty(i,j2) += (tmav(ix,jx2)*ws).real(); // upper left
}
}
});
}
template<typename T> void grid2dirty(const const_mav<T,2> &grid,
const mav<T,2> &dirty) const
{
checkShape(grid.shape(), {nu,nv});
tmpStorage<T,2> tmpdat({nu,nv});
auto tmav = tmpdat.getMav();
hartley2_2D<T>(grid, tmav, nthreads);
grid2dirty_post(tmav, dirty);
}
template<typename T> void grid2dirty_c_overwrite_wscreen_add
(const mav<complex<T>,2> &grid, const mav<T,2> &dirty, T w) const
{
checkShape(grid.shape(), {nu,nv});
constexpr auto sc = ptrdiff_t(sizeof(complex<T>));
c2c({nu,nv},{grid.stride(0)*sc,grid.stride(1)*sc},
{grid.stride(0)*sc, grid.stride(1)*sc}, {0,1}, BACKWARD,
grid.data(), grid.data(), T(1), nthreads);
grid2dirty_post2(grid, dirty, w);
}
template<typename T> void dirty2grid_pre(const const_mav<T,2> &dirty,
const mav<T,2> &grid) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
checkShape(grid.shape(), {nu, nv});
auto cfu = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
auto cfv = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
grid.fill(0);
execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
{
int icfu = abs(int(nx_dirty/2)-int(i));
for (size_t j=0; j<ny_dirty; ++j)
{
int icfv = abs(int(ny_dirty/2)-int(j));
size_t i2 = nu-nx_dirty/2+i;
if (i2>=nu) i2-=nu;
size_t j2 = nv-ny_dirty/2+j;
if (j2>=nv) j2-=nv;
grid(i2,j2) = dirty(i,j)*T(cfu[icfu]*cfv[icfv]);
}
}
});
}
template<typename T> void dirty2grid_pre2(const const_mav<T,2> &dirty,
const mav<complex<T>,2> &grid, T w) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
checkShape(grid.shape(), {nu, nv});
grid.fill(0);
double x0 = -0.5*nx_dirty*psx,
y0 = -0.5*ny_dirty*psy;
execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
{
T fx = T(x0+i*psx);
fx *= fx;
for (size_t j=0; j<=ny_dirty/2; ++j)
{
T fy = T(y0+j*psy);
auto ws = wscreen(fx, fy*fy, w, false);
size_t ix = nu-nx_dirty/2+i;
if (ix>=nu) ix-=nu;
size_t jx = nv-ny_dirty/2+j;
if (jx>=nv) jx-=nv;
grid(ix,jx) = dirty(i,j)*ws; // lower left
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
size_t ix2 = nu-nx_dirty/2+i2;
if (ix2>=nu) ix2-=nu;
size_t jx2 = nv-ny_dirty/2+j2;
if (jx2>=nv) jx2-=nv;
if ((i>0)&&(i<i2))
{
grid(ix2,jx) = dirty(i2,j)*ws; // lower right
if ((j>0)&&(j<j2))
grid(ix2,jx2) = dirty(i2,j2)*ws; // upper right
}
if ((j>0)&&(j<j2))
grid(ix,jx2) = dirty(i,j2)*ws; // upper left
}
}
});
}
template<typename T> void dirty2grid(const const_mav<T,2> &dirty,
const mav<T,2> &grid) const
{
dirty2grid_pre(dirty, grid);
hartley2_2D<T>(cmav(grid), grid, nthreads);
}
template<typename T> void dirty2grid_c_wscreen(const const_mav<T,2> &dirty,
const mav<complex<T>,2> &grid, T w) const
{
dirty2grid_pre2(dirty, grid, w);
constexpr auto sc = ptrdiff_t(sizeof(complex<T>));
stride_t strides{grid.stride(0)*sc,grid.stride(1)*sc};
c2c({nu,nv}, strides, strides, {0,1}, FORWARD,
grid.data(), grid.data(), T(1), nthreads);
}
void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
{
u=fmod1(u_in*psx)*nu;
iu0 = min(int(u+ushift)-int(nu), maxiu0);
v=fmod1(v_in*psy)*nv;
iv0 = min(int(v+vshift)-int(nv), maxiv0);
}
template<typename T> void apply_wscreen(const const_mav<complex<T>,2> &dirty,
mav<complex<T>,2> &dirty2, double w, bool adjoint) const
{
checkShape(dirty.shape(), {nx_dirty, ny_dirty});
checkShape(dirty2.shape(), {nx_dirty, ny_dirty});
double x0 = -0.5*nx_dirty*psx,
y0 = -0.5*ny_dirty*psy;
execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
{
T fx = T(x0+i*psx);
fx *= fx;
for (size_t j=0; j<=ny_dirty/2; ++j)
{
T fy = T(y0+j*psy);
auto ws = wscreen(fx, fy*fy, T(w), adjoint);
dirty2(i,j) = dirty(i,j)*ws; // lower left
size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
if ((i>0)&&(i<i2))
{
dirty2(i2,j) = dirty(i2,j)*ws; // lower right
if ((j>0)&&(j<j2))
dirty2(i2,j2) = dirty(i2,j2)*ws; // upper right
}
if ((j>0)&&(j<j2))
dirty2(i,j2) = dirty(i,j2)*ws; // upper left
}
}
});
}
};
constexpr int logsquare=4;