Commit c32c14c7 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

intermediate

parent c3266a8b
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H
/*
* This file is part of nifty_gridder.
*
* nifty_gridder is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* nifty_gridder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with nifty_gridder; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* Copyright (C) 2019 Max-Planck-Society
Author: Martin Reinecke */
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include "pocketfft_hdronly.h"
#include <array>
template<typename T, size_t ndim, bool c_contiguous> class mav
{
static_assert((ndim>0) && (ndim<3), "only supports 1D and 2D arrays");
private:
T *d;
std::array<size_t, ndim> shp;
std::array<ptrdiff_t, ndim> str;
public:
mav(T *d_, const std::array<size_t,ndim> &shp_,
const std::array<ptrdiff_t,ndim> &str_)
: d(d_), shp(shp_), str(str_)
{ static_assert(!c_contiguous, "this type does not accept strides"); }
mav(T *d_, const std::array<size_t,ndim> &shp_)
: d(d_), shp(shp_)
{
static_assert(c_contiguous, "this type requires strides");
str[ndim-1]=1;
for (size_t d=2; d<=ndim; ++d)
str[ndim-d] = str[ndim-d+1]*shp[ndim-d+1];
}
operator mav<const T, ndim, true>() const
{
static_assert(c_contiguous);
return mav<const T, ndim, true>(d,shp);
}
operator mav<const T, ndim, false>() const
{ return mav<const T, ndim, false>(d,shp, str); }
T &operator[](size_t i)
{ return operator()(i); }
const T &operator[](size_t i) const
{ return operator()(i); }
T &operator()(size_t i)
{
static_assert(ndim==1, "ndim must be 1");
return c_contiguous ? d[i] : d[str[0]*i];
}
const T &operator()(size_t i) const
{
static_assert(ndim==1, "ndim must be 1");
return c_contiguous ? d[i] : d[str[0]*i];
}
T &operator()(size_t i, size_t j)
{
static_assert(ndim==2, "ndim must be 2");
return c_contiguous ? d[str[0]*i + j] : d[str[0]*i + str[1]*j];
}
const T &operator()(size_t i, size_t j) const
{
static_assert(ndim==2, "ndim must be 2");
return c_contiguous ? d[str[0]*i + j] : d[str[0]*i + str[1]*j];
}
size_t shape(size_t i) const { return shp[i]; }
const std::array<size_t,ndim> &shape() const { return shp; }
size_t size() const
{
size_t res=1;
for (auto v: shp) res*=v;
return res;
}
size_t stride(size_t i) const { return str[i]; }
T *data()
{
// static_assert(c_contiguous, "type is not C contiguous");
return d;
}
const T *data() const
{
// static_assert(c_contiguous, "type is not C contiguous");
return d;
}
};
template<typename T, size_t ndim> using cmav = mav<T,ndim,true>;
template<typename T, size_t ndim> using smav = mav<T,ndim,false>;
//
// basic utilities
//
void myassert(bool cond, const char *msg)
{
if (cond) return;
throw std::runtime_error(msg);
}
template<size_t ndim> void checkShape
(const std::array<size_t, ndim> &shp1, const std::array<size_t, ndim> &shp2)
{
for (size_t i=0; i<ndim; ++i)
myassert(shp1[i]==shp2[i], "shape mismatch");
}
/*! Returns the remainder of the division \a v1/v2.
The result is non-negative.
\a v1 can be positive or negative; \a v2 must be positive. */
template<typename T> inline T fmodulo (T v1, T v2)
{
if (v1>=0)
return (v1<v2) ? v1 : fmod(v1,v2);
T tmp=fmod(v1,v2)+v2;
return (tmp==v2) ? T(0) : tmp;
}
static size_t nthreads = 1;
constexpr auto set_nthreads_DS = R"""(
Specifies the number of threads to be used by the module
Parameters
==========
nthreads: int
the number of threads to be used. Must be >=1.
)""";
void set_nthreads(size_t nthreads_)
{
myassert(nthreads_>=1, "nthreads must be >= 1");
nthreads = nthreads_;
}
constexpr auto get_nthreads_DS = R"""(
Returns the number of threads used by the module
Returns
=======
int : the number of threads used by the module
)""";
size_t get_nthreads()
{ return nthreads; }
//
// Utilities for Gauss-Legendre quadrature
//
static inline double one_minus_x2 (double x)
{ return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
void legendre_prep(int n, std::vector<double> &x, std::vector<double> &w)
{
constexpr double pi = 3.141592653589793238462643383279502884197;
constexpr double eps = 3e-14;
int m = (n+1)>>1;
x.resize(m);
w.resize(m);
double t0 = 1 - (1-1./n) / (8.*n*n);
double t1 = 1./(4.*n+2.);
#pragma omp parallel num_threads(nthreads)
{
int i;
#pragma omp for schedule(dynamic,100)
for (i=1; i<=m; ++i)
{
double x0 = cos(pi * ((i<<2)-1) * t1) * t0;
int dobreak=0;
int j=0;
double dpdx;
while(1)
{
double P_1 = 1.0;
double P0 = x0;
double dx, x1;
for (int k=2; k<=n; k++)
{
double P_2 = P_1;
P_1 = P0;
// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
}
dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);
/* Newton step */
x1 = x0 - P0/dpdx;
dx = x0-x1;
x0 = x1;
if (dobreak) break;
if (abs(dx)<=eps) dobreak=1;
myassert(++j<100, "convergence problem");
}
x[m-i] = x0;
w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
}
} // end of parallel region
}
//
// Start of real gridder functionality
//
size_t get_supp(double epsilon)
{
static const std::vector<double> maxmaperr { 1e8, 0.32, 0.021, 6.2e-4,
1.08e-5, 1.25e-7, 8.25e-10, 5.70e-12, 1.22e-13, 2.48e-15, 4.82e-17,
6.74e-19, 5.41e-21, 4.41e-23, 7.88e-25, 3.9e-26 };
double epssq = epsilon*epsilon;
for (size_t i=1; i<maxmaperr.size(); ++i)
if (epssq>maxmaperr[i]) return i;
throw std::runtime_error("requested epsilon too small - minimum is 2e-13");
}
template<typename T> void complex2hartley
(const smav<const std::complex<T>, 2> &grid, smav<T,2> &grid2)
{
myassert(grid.shape()==grid2.shape(), "shape mismatch");
size_t nu=grid.shape(0), nv=grid.shape(1);
#pragma omp parallel for num_threads(nthreads)
for (size_t u=0; u<nu; ++u)
{
size_t xu = (u==0) ? 0 : nu-u;
for (size_t v=0; v<nv; ++v)
{
size_t xv = (v==0) ? 0 : nv-v;
grid2(u,v) += T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
grid(xu,xv).real()-grid(xu,xv).imag());
}
}
}
template<typename T> void hartley2complex
(const smav<const T,2> &grid, smav<std::complex<T>,2> &grid2)
{
myassert(grid.shape()==grid2.shape(), "shape mismatch");
size_t nu=grid.shape(0), nv=grid.shape(1);
#pragma omp parallel for num_threads(nthreads)
for (size_t u=0; u<nu; ++u)
{
size_t xu = (u==0) ? 0 : nu-u;
for (size_t v=0; v<nv; ++v)
{
size_t xv = (v==0) ? 0 : nv-v;
T v1 = T(0.5)*grid( u, v);
T v2 = T(0.5)*grid(xu,xv);
grid2(u,v) = std::complex<T>(v1+v2, v1-v2);
}
}
}
template<typename T> void hartley2_2D(const smav<const T,2> &in, smav<T,2> &out)
{
myassert(in.shape()==out.shape(), "shape mismatch");
size_t nu=in.shape(0), nv=in.shape(1), sz=sizeof(T);
pocketfft::stride_t str{ptrdiff_t(sz*nv), ptrdiff_t(sz)};
auto d_i = in.data();
auto ptmp = out.data();
pocketfft::r2r_separable_hartley({nu, nv}, str, str, {0,1}, d_i, ptmp, T(1),
nthreads);
#pragma omp parallel for num_threads(nthreads)
for(size_t i=1; i<(nu+1)/2; ++i)
for(size_t j=1; j<(nv+1)/2; ++j)
{
T a = ptmp[i*nv+j];
T b = ptmp[(nu-i)*nv+j];
T c = ptmp[i*nv+nv-j];
T d = ptmp[(nu-i)*nv+nv-j];
ptmp[i*nv+j] = T(0.5)*(a+b+c-d);
ptmp[(nu-i)*nv+j] = T(0.5)*(a+b+d-c);
ptmp[i*nv+nv-j] = T(0.5)*(a+c+d-b);
ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a);
}
}
template<typename T> class EC_Kernel
{
protected:
size_t supp;
T beta, emb_;
public:
EC_Kernel(size_t supp_)
: supp(supp_), beta(2.3*supp_), emb_(exp(-beta)) {}
T operator()(T v) const { return exp(beta*(sqrt(T(1)-v*v)-T(1))); }
T val_no_emb(T v) const { return exp(beta*sqrt(T(1)-v*v)); }
T emb() const { return emb_; }
};
template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T>
{
protected:
static constexpr T pi = T(3.141592653589793238462643383279502884197L);
int p;
std::vector<T> x, wgt, psi;
using EC_Kernel<T>::supp;
public:
using EC_Kernel<T>::operator();
EC_Kernel_with_correction(size_t supp_)
: EC_Kernel<T>(supp_), p(int(1.5*supp_+2))
{
legendre_prep(2*p,x,wgt);
psi=x;
for (auto &v:psi)
v=operator()(v);
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
T corfac(T v) const
{
T tmp=0;
for (int i=0; i<p; ++i)
tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
return T(1)/(supp*tmp);
}
};
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
std::vector<double> correction_factors (size_t n, size_t nval, size_t supp)
{
EC_Kernel_with_correction<double> kernel(supp);
std::vector<double> res(nval);
double xn = 1./n;
#pragma omp parallel for schedule(static) num_threads(nthreads)
for (size_t k=0; k<nval; ++k)
res[k] = kernel.corfac(k*xn);
return res;
}
template<typename T> struct UVW
{
T u, v, w;
UVW () {}
UVW (T u_, T v_, T w_) : u(u_), v(v_), w(w_) {}
UVW operator* (T fct) const
{ return UVW(u*fct, v*fct, w*fct); }
};
template<typename T> class Baselines
{
protected:
std::vector<UVW<T>> coord;
std::vector<T> f_over_c;
size_t nrows, nchan;
public:
Baselines(const smav<const T,2> &coord_, const smav<const T,1> &freq)
{
constexpr double speedOfLight = 299792458.;
myassert(coord_.shape(1)==3, "dimension mismatch");
nrows = coord_.shape(0);
nchan = freq.shape(0);
myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS");
f_over_c.resize(nchan);
for (size_t i=0; i<nchan; ++i)
f_over_c[i] = freq(i)/speedOfLight;
coord.resize(nrows);
for (size_t i=0; i<coord.size(); ++i)
coord[i] = UVW<T>(coord_(i,0), coord_(i,1), coord_(i,2));
}
UVW<T> effectiveCoord(uint32_t index) const
{
size_t irow = index/nchan;
size_t ichan = index-nchan*irow;
return coord[irow]*f_over_c[ichan];
}
UVW<T> effectiveCoord(size_t irow, size_t ichan) const
{ return coord[irow]*f_over_c[ichan]; }
size_t Nrows() const { return nrows; }
size_t Nchannels() const { return nchan; }
void effectiveUVW(const smav<const uint32_t,1> &idx, smav<T,2> &res) const
{
size_t nvis = idx.shape(0);
myassert(res.shape(0)==nvis, "shape mismatch");
myassert(res.shape(1)==3, "shape mismatch");
for (size_t i=0; i<nvis; i++)
{
auto uvw = effectiveCoord(idx(i));
res(i,0) = uvw.u;
res(i,1) = uvw.v;
res(i,2) = uvw.w;
}
}
template<typename T2> void ms2vis(const smav<const T2,2> &ms,
const smav<const uint32_t,1> &idx, smav<T2,1> &vis) const
{
myassert(ms.shape(0)==nrows, "shape mismatch");
myassert(ms.shape(1)==nchan, "shape mismatch");
size_t nvis = idx.shape(0);
myassert(vis.shape(0)==nvis, "shape mismatch");
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis; ++i)
{
auto t = idx(i);
auto row = t/nchan;
auto chan = t-row*nchan;
vis[i] = ms(row, chan);
}
}
template<typename T2> void vis2ms(const smav<const T2,1> &vis,
const smav<const uint32_t,1> &idx, smav<T2,2> &ms) const
{
size_t nvis = vis.shape(0);
myassert(idx.shape(0)==nvis, "shape mismatch");
myassert(ms.shape(0)==nrows, "shape mismatch");
myassert(ms.shape(1)==nchan, "shape mismatch");
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis; ++i)
{
auto t = idx(i);
auto row = t/nchan;
auto chan = t-row*nchan;
ms(row, chan) += vis(i);
}
}
};
template<typename T> class GridderConfig
{
protected:
size_t nx_dirty, ny_dirty;
double eps, psx, psy;
size_t supp, nsafe, nu, nv;
T beta;
std::vector<T> cfu, cfv;
std::complex<T> wscreen(double x, double y, double w, bool adjoint) const
{
using namespace std;
constexpr double pi = 3.141592653589793238462643383279502884197;
#if 1
double eps = sqrt(x+y);
double s = sin(eps);
double nm1 = -s*s/(1.+cos(eps));
#else
double nm1 = (-x-y)/(sqrt(1.-x-y)+1);
#endif
double n = nm1+1., xn = 1./n;
double phase = 2*pi*w*nm1;
if (adjoint) phase *= -1;
return complex<T>(cos(phase)*xn, sin(phase)*xn);
}
public:
GridderConfig(size_t nxdirty, size_t nydirty, double epsilon,
double pixsize_x, double pixsize_y)
: nx_dirty(nxdirty), ny_dirty(nydirty), eps(epsilon),
psx(pixsize_x), psy(pixsize_y),
supp(get_supp(epsilon)), nsafe((supp+1)/2),
nu(std::max(2*nsafe,2*nx_dirty)), nv(std::max(2*nsafe,2*ny_dirty)),
beta(2.3*supp),
cfu(nx_dirty), cfv(ny_dirty)
{
myassert((nx_dirty&1)==0, "nx_dirty must be even");
myassert((ny_dirty&1)==0, "ny_dirty must be even");
myassert(epsilon>0, "epsilon must be positive");
myassert(pixsize_x>0, "pixsize_x must be positive");
myassert(pixsize_y>0, "pixsize_y must be positive");
auto tmp = correction_factors(nu, nx_dirty/2+1, supp);
cfu[nx_dirty/2]=tmp[0];
cfu[0]=tmp[nx_dirty/2];
for (size_t i=1; i<nx_dirty/2; ++i)
cfu[nx_dirty/2-i] = cfu[nx_dirty/2+i] = tmp[i];
tmp = correction_factors(nv, ny_dirty/2+1, supp);
cfv[ny_dirty/2]=tmp[0];
cfv[0]=tmp[ny_dirty/2];
for (size_t i=1; i<ny_dirty/2; ++i)
cfv[ny_dirty/2-i] = cfv[ny_dirty/2+i] = tmp[i];
}
size_t Nxdirty() const { return nx_dirty; }
size_t Nydirty() const { return ny_dirty; }
double Epsilon() const { return eps; }
double Pixsize_x() const { return psx; }
double Pixsize_y() const { return psy; }
size_t Nu() const { return nu; }
size_t Nv() const { return nv; }
size_t Supp() const { return supp; }
size_t Nsafe() const { return nsafe; }
T Beta() const { return beta; }
void grid2dirty(const smav<const T,2> &grid, smav<T,2> &grid2) const
{
checkShape(grid.shape(), {nu,nv});
std::vector<T> tmpdat(nu*nv);
auto tmp=smav<T,2>(tmpdat.data(),{nu,nv},{nv,1});
hartley2_2D<T>(grid, tmp);
checkShape(grid2.shape(), {nx_dirty, ny_dirty});
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
{
size_t i2 = nu-nx_dirty/2+i;
if (i2>=nu) i2-=nu;
size_t j2 = nv-ny_dirty/2+j;
if (j2>=nv) j2-=nv;
grid2(i,j) = tmp(i2,j2)*cfu[i]*cfv[j];
}
}
#if 0
pyarr_c<T> apply_taper(const pyarr_c<T> &img, bool divide) const
{
checkArray(img, "img", {nx_dirty, ny_dirty});
auto pin = img.data();
auto res = makeArray<T>({nx_dirty, ny_dirty});
auto pout = res.mutable_data();
{
py::gil_scoped_release release;
if (divide)
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
pout[ny_dirty*i + j] = pin[ny_dirty*i + j]/(cfu[i]*cfv[j]);
else
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
pout[ny_dirty*i + j] = pin[ny_dirty*i + j]*cfu[i]*cfv[j];
}
return res;
}
pyarr_c<complex<T>> grid2dirty_c(const pyarr_c<complex<T>> &grid) const
{
checkArray(grid, "grid", {nu, nv});
auto tmp = makeArray<complex<T>>({nu, nv});
auto ptmp = tmp.mutable_data();
pocketfft::c2c({nu,nv},{grid.strides(0),grid.strides(1)},
{tmp.strides(0), tmp.strides(1)}, {0,1}, pocketfft::BACKWARD,
grid.data(), tmp.mutable_data(), T(1), nthreads);
auto res = makeArray<complex<T>>({nx_dirty, ny_dirty});
auto pout = res.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
{
size_t i2 = nu-nx_dirty/2+i;
if (i2>=nu) i2-=nu;
size_t j2 = nv-ny_dirty/2+j;
if (j2>=nv) j2-=nv;
pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j];
}
}
return res;
}
pyarr_c<T> dirty2grid(const pyarr_c<T> &dirty) const
{
checkArray(dirty, "dirty", {nx_dirty, ny_dirty});
auto pdirty = dirty.data();
auto tmp = makeArray<T>({nu, nv});
auto ptmp = tmp.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nu*nv; ++i)
ptmp[i] = 0.;
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
{
size_t i2 = nu-nx_dirty/2+i;
if (i2>=nu) i2-=nu;
size_t j2 = nv-ny_dirty/2+j;
if (j2>=nv) j2-=nv;
ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j];
}
}
hartley2_2D<T>(tmp, tmp);
return tmp;
}
pyarr_c<complex<T>> dirty2grid_c(const pyarr_c<complex<T>> &dirty) const
{
checkArray(dirty, "dirty", {nx_dirty, ny_dirty});
auto pdirty = dirty.data();
auto tmp = makeArray<complex<T>>({nu, nv});
auto ptmp = tmp.mutable_data();
pocketfft::stride_t strides{tmp.strides(0),tmp.strides(1)};
{
py::gil_scoped_release release;
for (size_t i=0; i<nu*nv; ++i)