gridder_cxx.h 35.4 KB
Newer Older
Martin Reinecke's avatar
Martin Reinecke committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H

/*
 *  This file is part of nifty_gridder.
 *
 *  nifty_gridder is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  nifty_gridder is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with nifty_gridder; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

22
/* Copyright (C) 2019-2020 Max-Planck-Society
Martin Reinecke's avatar
Martin Reinecke committed
23 24 25 26 27 28 29 30
   Author: Martin Reinecke */

#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <array>
Martin Reinecke's avatar
Martin Reinecke committed
31
#include <memory>
Martin Reinecke's avatar
Martin Reinecke committed
32

Martin Reinecke's avatar
Martin Reinecke committed
33 34 35 36 37
#include "ducc0/infra/error_handling.h"
#include "ducc0/math/fft.h"
#include "ducc0/infra/threading.h"
#include "ducc0/infra/useful_macros.h"
#include "ducc0/infra/mav.h"
Martin Reinecke's avatar
Martin Reinecke committed
38
#include "ducc0/infra/simd.h"
Martin Reinecke's avatar
Martin Reinecke committed
39
#include "ducc0/math/es_kernel.h"
Martin Reinecke's avatar
Martin Reinecke committed
40
#include "ducc0/math/gridding_kernel.h"
Martin Reinecke's avatar
Martin Reinecke committed
41

Martin Reinecke's avatar
Martin Reinecke committed
42
namespace ducc0 {
Martin Reinecke's avatar
Martin Reinecke committed
43

Martin Reinecke's avatar
Martin Reinecke committed
44
namespace detail_gridder {
Martin Reinecke's avatar
Martin Reinecke committed
45 46 47 48 49

using namespace std;

template<size_t ndim> void checkShape
  (const array<size_t, ndim> &shp1, const array<size_t, ndim> &shp2)
50
  { MR_assert(shp1==shp2, "shape mismatch"); }
Martin Reinecke's avatar
Martin Reinecke committed
51 52 53 54 55 56 57 58 59

template<typename T> inline T fmod1 (T v)
  { return v-floor(v); }

//
// Start of real gridder functionality
//

template<typename T> void complex2hartley
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
60
  (const mav<complex<T>, 2> &grid, mav<T,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
61
  {
62
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
63 64 65 66 67 68 69 70 71 72
  size_t nu=grid.shape(0), nv=grid.shape(1);

  execStatic(nu, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
      {
      size_t xu = (u==0) ? 0 : nu-u;
      for (size_t v=0; v<nv; ++v)
        {
        size_t xv = (v==0) ? 0 : nv-v;
Martin Reinecke's avatar
Martin Reinecke committed
73 74
        grid2.v(u,v) = T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
                               grid(xu,xv).real()-grid(xu,xv).imag());
Martin Reinecke's avatar
Martin Reinecke committed
75 76 77 78 79 80
        }
      }
    });
  }

template<typename T> void hartley2complex
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
81
  (const mav<T,2> &grid, mav<complex<T>,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
82
  {
83
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
84 85 86 87 88 89 90 91 92 93 94 95
  size_t nu=grid.shape(0), nv=grid.shape(1);

  execStatic(nu, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
      {
      size_t xu = (u==0) ? 0 : nu-u;
      for (size_t v=0; v<nv; ++v)
        {
        size_t xv = (v==0) ? 0 : nv-v;
        T v1 = T(0.5)*grid( u, v);
        T v2 = T(0.5)*grid(xu,xv);
Martin Reinecke's avatar
Martin Reinecke committed
96
        grid2.v(u,v) = std::complex<T>(v1+v2, v1-v2);
Martin Reinecke's avatar
Martin Reinecke committed
97 98 99 100 101
        }
      }
    });
  }

Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
102 103
template<typename T> void hartley2_2D(const mav<T,2> &in,
  mav<T,2> &out, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
104
  {
105
  MR_assert(in.conformable(out), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
106
  size_t nu=in.shape(0), nv=in.shape(1);
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
107 108
  fmav<T> fin(in), fout(out);
  r2r_separable_hartley(fin, fout, {0,1}, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
109 110 111 112 113
  execStatic((nu+1)/2-1, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i<rng.hi+1; ++i)
      for(size_t j=1; j<(nv+1)/2; ++j)
         {
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
114 115 116 117
         T a = out(i,j);
         T b = out(nu-i,j);
         T c = out(i,nv-j);
         T d = out(nu-i,nv-j);
Martin Reinecke's avatar
Martin Reinecke committed
118 119 120 121
         out.v(i,j) = T(0.5)*(a+b+c-d);
         out.v(nu-i,j) = T(0.5)*(a+b+d-c);
         out.v(i,nv-j) = T(0.5)*(a+c+d-b);
         out.v(nu-i,nv-j) = T(0.5)*(b+c+d-a);
Martin Reinecke's avatar
Martin Reinecke committed
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
         }
     });
  }

using idx_t = uint32_t;

struct RowChan
  {
  idx_t row, chan;
  };

struct UVW
  {
  double u, v, w;
  UVW() {}
  UVW(double u_, double v_, double w_) : u(u_), v(v_), w(w_) {}
  UVW operator* (double fct) const
    { return UVW(u*fct, v*fct, w*fct); }
  void Flip() { u=-u; v=-v; w=-w; }
  bool FixW()
    {
    bool flip = w<0;
    if (flip) Flip();
    return flip;
    }
  };

class Baselines
  {
  protected:
    vector<UVW> coord;
    vector<double> f_over_c;
    idx_t nrows, nchan;
    idx_t shift, mask;

  public:
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
158 159
    template<typename T> Baselines(const mav<T,2> &coord_,
      const mav<T,1> &freq, bool negate_v=false)
Martin Reinecke's avatar
Martin Reinecke committed
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
      {
      constexpr double speedOfLight = 299792458.;
      MR_assert(coord_.shape(1)==3, "dimension mismatch");
      auto hugeval = size_t(~(idx_t(0)));
      MR_assert(coord_.shape(0)<hugeval, "too many entries in MS");
      MR_assert(coord_.shape(1)<hugeval, "too many entries in MS");
      MR_assert(coord_.size()<hugeval, "too many entries in MS");
      nrows = coord_.shape(0);
      nchan = freq.shape(0);
      shift=0;
      while((idx_t(1)<<shift)<nchan) ++shift;
      mask=(idx_t(1)<<shift)-1;
      MR_assert(nrows*(mask+1)<hugeval, "too many entries in MS");
      f_over_c.resize(nchan);
      for (size_t i=0; i<nchan; ++i)
        {
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
176
        MR_assert(freq(i)>0, "negative channel frequency encountered");
Martin Reinecke's avatar
Martin Reinecke committed
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
        f_over_c[i] = freq(i)/speedOfLight;
        }
      coord.resize(nrows);
      if (negate_v)
        for (size_t i=0; i<coord.size(); ++i)
          coord[i] = UVW(coord_(i,0), -coord_(i,1), coord_(i,2));
      else
        for (size_t i=0; i<coord.size(); ++i)
          coord[i] = UVW(coord_(i,0), coord_(i,1), coord_(i,2));
      }

    RowChan getRowChan(idx_t index) const
      { return RowChan{index>>shift, index&mask}; }

    UVW effectiveCoord(const RowChan &rc) const
      { return coord[rc.row]*f_over_c[rc.chan]; }
    UVW effectiveCoord(idx_t index) const
      { return effectiveCoord(getRowChan(index)); }
    size_t Nrows() const { return nrows; }
    size_t Nchannels() const { return nchan; }
    idx_t getIdx(idx_t irow, idx_t ichan) const
      { return ichan+(irow<<shift); }
  };

Martin Reinecke's avatar
Martin Reinecke committed
201
template<typename T> class GridderConfig
Martin Reinecke's avatar
Martin Reinecke committed
202 203 204 205 206 207 208 209 210
  {
  protected:
    size_t nx_dirty, ny_dirty, nu, nv;
    double ofactor, eps, psx, psy;
    size_t supp, nsafe;
    double beta;
    size_t nthreads;
    double ushift, vshift;
    int maxiu0, maxiv0;
Martin Reinecke's avatar
Martin Reinecke committed
211 212 213
  public:
    shared_ptr<GriddingKernel<T>> krn;
  protected:
Martin Reinecke's avatar
Martin Reinecke committed
214

Martin Reinecke's avatar
Martin Reinecke committed
215
    complex<T> wscreen(T x, T y, T w, bool adjoint) const
Martin Reinecke's avatar
Martin Reinecke committed
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
      {
      constexpr T pi = T(3.141592653589793238462643383279502884197);
      T tmp = 1-x-y;
      if (tmp<=0) return 1; // no phase factor beyond the horizon
      T nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1
      T phase = 2*pi*w*nm1;
      if (adjoint) phase *= -1;
      return complex<T>(cos(phase), sin(phase));
      }

  public:
    GridderConfig(size_t nxdirty, size_t nydirty, size_t nu_, size_t nv_,
      double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
      : nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
        ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
        eps(epsilon),
        psx(pixsize_x), psy(pixsize_y),
        supp(ES_Kernel::get_supp(epsilon, ofactor)), nsafe((supp+1)/2),
        beta(ES_Kernel::get_beta(supp, ofactor)*supp),
        nthreads(nthreads_),
        ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv),
Martin Reinecke's avatar
Martin Reinecke committed
237 238
        maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp),
        krn(make_shared<HornerKernel<T>>(supp, supp+3, [this](double v){return double(esk(v,double(beta)));}))
Martin Reinecke's avatar
Martin Reinecke committed
239 240 241 242 243 244 245 246 247 248
      {
      MR_assert(nu>=2*nsafe, "nu too small");
      MR_assert(nv>=2*nsafe, "nv too small");
      MR_assert((nx_dirty&1)==0, "nx_dirty must be even");
      MR_assert((ny_dirty&1)==0, "ny_dirty must be even");
      MR_assert((nu&1)==0, "nu must be even");
      MR_assert((nv&1)==0, "nv must be even");
      MR_assert(epsilon>0, "epsilon must be positive");
      MR_assert(pixsize_x>0, "pixsize_x must be positive");
      MR_assert(pixsize_y>0, "pixsize_y must be positive");
Martin Reinecke's avatar
Martin Reinecke committed
249 250
      MR_assert(ofactor>=1.175,
        "oversampling factor too small (>=1.2 recommended)");
Martin Reinecke's avatar
Martin Reinecke committed
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
      }
    GridderConfig(size_t nxdirty, size_t nydirty,
      double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
      : GridderConfig(nxdirty, nydirty, max<size_t>(30,2*nxdirty),
                      max<size_t>(30,2*nydirty), epsilon, pixsize_x,
                      pixsize_y, nthreads_) {}
    size_t Nxdirty() const { return nx_dirty; }
    size_t Nydirty() const { return ny_dirty; }
    double Epsilon() const { return eps; }
    double Pixsize_x() const { return psx; }
    double Pixsize_y() const { return psy; }
    size_t Nu() const { return nu; }
    size_t Nv() const { return nv; }
    size_t Supp() const { return supp; }
    size_t Nsafe() const { return nsafe; }
    size_t Nthreads() const { return nthreads; }
    double Ofactor() const{ return ofactor; }

Martin Reinecke's avatar
Martin Reinecke committed
269
    void grid2dirty_post(mav<T,2> &tmav,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
270
      mav<T,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
271
      {
272
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
Martin Reinecke's avatar
Martin Reinecke committed
273 274
      auto cfu = krn->corfunc(nx_dirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(ny_dirty/2+1, 1./nv, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
275 276 277 278 279 280 281 282 283 284 285 286 287 288
      execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          int icfu = abs(int(nx_dirty/2)-int(i));
          for (size_t j=0; j<ny_dirty; ++j)
            {
            int icfv = abs(int(ny_dirty/2)-int(j));
            size_t i2 = nu-nx_dirty/2+i;
            if (i2>=nu) i2-=nu;
            size_t j2 = nv-ny_dirty/2+j;
            if (j2>=nv) j2-=nv;
            // FIXME: for some reason g++ warns about double-to-float conversion
            // here, even though there is an explicit cast...
Martin Reinecke's avatar
Martin Reinecke committed
289
            dirty.v(i,j) = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
290 291 292 293
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
294
    void grid2dirty_post2(
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
295
      mav<complex<T>,2> &tmav, mav<T,2> &dirty, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
      {
      checkShape(dirty.shape(), {nx_dirty,ny_dirty});
      double x0 = -0.5*nx_dirty*psx,
             y0 = -0.5*ny_dirty*psy;
      execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          T fx = T(x0+i*psx);
          fx *= fx;
          for (size_t j=0; j<=ny_dirty/2; ++j)
            {
            T fy = T(y0+j*psy);
            auto ws = wscreen(fx, fy*fy, w, true);
            size_t ix = nu-nx_dirty/2+i;
            if (ix>=nu) ix-=nu;
            size_t jx = nv-ny_dirty/2+j;
            if (jx>=nv) jx-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
314
            dirty.v(i,j) += (tmav(ix,jx)*ws).real(); // lower left
Martin Reinecke's avatar
Martin Reinecke committed
315 316 317 318 319 320 321
            size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
            size_t ix2 = nu-nx_dirty/2+i2;
            if (ix2>=nu) ix2-=nu;
            size_t jx2 = nv-ny_dirty/2+j2;
            if (jx2>=nv) jx2-=nv;
            if ((i>0)&&(i<i2))
              {
Martin Reinecke's avatar
Martin Reinecke committed
322
              dirty.v(i2,j) += (tmav(ix2,jx)*ws).real(); // lower right
Martin Reinecke's avatar
Martin Reinecke committed
323
              if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
324
                dirty.v(i2,j2) += (tmav(ix2,jx2)*ws).real(); // upper right
Martin Reinecke's avatar
Martin Reinecke committed
325 326
              }
            if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
327
              dirty.v(i,j2) += (tmav(ix,jx2)*ws).real(); // upper left
Martin Reinecke's avatar
Martin Reinecke committed
328 329 330 331 332
            }
          }
        });
      }

Martin Reinecke's avatar
Martin Reinecke committed
333
    void grid2dirty(const mav<T,2> &grid,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
334
      mav<T,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
335 336
      {
      checkShape(grid.shape(), {nu,nv});
337
      mav<T,2> tmav({nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
338 339 340 341
      hartley2_2D<T>(grid, tmav, nthreads);
      grid2dirty_post(tmav, dirty);
      }

Martin Reinecke's avatar
Martin Reinecke committed
342
    void grid2dirty_c_overwrite_wscreen_add
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
343
      (mav<complex<T>,2> &grid, mav<T,2> &dirty, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
344 345
      {
      checkShape(grid.shape(), {nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
346 347
      fmav<complex<T>> inout(grid);
      c2c(inout, inout, {0,1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
348 349 350
      grid2dirty_post2(grid, dirty, w);
      }

Martin Reinecke's avatar
Martin Reinecke committed
351
    void dirty2grid_pre(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
352
      mav<T,2> &grid) const
Martin Reinecke's avatar
Martin Reinecke committed
353 354 355
      {
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(grid.shape(), {nu, nv});
Martin Reinecke's avatar
Martin Reinecke committed
356 357
      auto cfu = krn->corfunc(nx_dirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(ny_dirty/2+1, 1./nv, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
358 359 360 361 362 363 364 365 366 367 368 369 370
      grid.fill(0);
      execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          int icfu = abs(int(nx_dirty/2)-int(i));
          for (size_t j=0; j<ny_dirty; ++j)
            {
            int icfv = abs(int(ny_dirty/2)-int(j));
            size_t i2 = nu-nx_dirty/2+i;
            if (i2>=nu) i2-=nu;
            size_t j2 = nv-ny_dirty/2+j;
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
371
            grid.v(i2,j2) = dirty(i,j)*T(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
372 373 374 375
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
376
    void dirty2grid_pre2(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
377
      mav<complex<T>,2> &grid, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
      {
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(grid.shape(), {nu, nv});
      grid.fill(0);

      double x0 = -0.5*nx_dirty*psx,
             y0 = -0.5*ny_dirty*psy;
      execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          T fx = T(x0+i*psx);
          fx *= fx;
          for (size_t j=0; j<=ny_dirty/2; ++j)
            {
            T fy = T(y0+j*psy);
            auto ws = wscreen(fx, fy*fy, w, false);
            size_t ix = nu-nx_dirty/2+i;
            if (ix>=nu) ix-=nu;
            size_t jx = nv-ny_dirty/2+j;
            if (jx>=nv) jx-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
399
            grid.v(ix,jx) = dirty(i,j)*ws; // lower left
Martin Reinecke's avatar
Martin Reinecke committed
400 401 402 403 404 405 406
            size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
            size_t ix2 = nu-nx_dirty/2+i2;
            if (ix2>=nu) ix2-=nu;
            size_t jx2 = nv-ny_dirty/2+j2;
            if (jx2>=nv) jx2-=nv;
            if ((i>0)&&(i<i2))
              {
Martin Reinecke's avatar
Martin Reinecke committed
407
              grid.v(ix2,jx) = dirty(i2,j)*ws; // lower right
Martin Reinecke's avatar
Martin Reinecke committed
408
              if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
409
                grid.v(ix2,jx2) = dirty(i2,j2)*ws; // upper right
Martin Reinecke's avatar
Martin Reinecke committed
410 411
              }
            if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
412
              grid.v(ix,jx2) = dirty(i,j2)*ws; // upper left
Martin Reinecke's avatar
Martin Reinecke committed
413 414 415 416 417
            }
          }
        });
      }

Martin Reinecke's avatar
Martin Reinecke committed
418
    void dirty2grid(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
419
      mav<T,2> &grid) const
Martin Reinecke's avatar
Martin Reinecke committed
420 421
      {
      dirty2grid_pre(dirty, grid);
Martin Reinecke's avatar
Martin Reinecke committed
422
      hartley2_2D<T>(grid, grid, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
423 424
      }

Martin Reinecke's avatar
Martin Reinecke committed
425
    void dirty2grid_c_wscreen(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
426
      mav<complex<T>,2> &grid, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
427 428
      {
      dirty2grid_pre2(dirty, grid, w);
Martin Reinecke's avatar
Martin Reinecke committed
429 430
      fmav<complex<T>> inout(grid);
      c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
431 432 433 434 435 436 437 438 439 440
      }

    void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
      {
      u=fmod1(u_in*psx)*nu;
      iu0 = min(int(u+ushift)-int(nu), maxiu0);
      v=fmod1(v_in*psy)*nv;
      iv0 = min(int(v+vshift)-int(nv), maxiv0);
      }

Martin Reinecke's avatar
Martin Reinecke committed
441
    void apply_wscreen(const mav<complex<T>,2> &dirty,
Martin Reinecke's avatar
Martin Reinecke committed
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
      mav<complex<T>,2> &dirty2, double w, bool adjoint) const
      {
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(dirty2.shape(), {nx_dirty, ny_dirty});

      double x0 = -0.5*nx_dirty*psx,
             y0 = -0.5*ny_dirty*psy;
      execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          T fx = T(x0+i*psx);
          fx *= fx;
          for (size_t j=0; j<=ny_dirty/2; ++j)
            {
            T fy = T(y0+j*psy);
            auto ws = wscreen(fx, fy*fy, T(w), adjoint);
            dirty2(i,j) = dirty(i,j)*ws; // lower left
            size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
            if ((i>0)&&(i<i2))
              {
              dirty2(i2,j) = dirty(i2,j)*ws; // lower right
              if ((j>0)&&(j<j2))
                dirty2(i2,j2) = dirty(i2,j2)*ws; // upper right
              }
            if ((j>0)&&(j<j2))
              dirty2(i,j2) = dirty(i,j2)*ws; // upper left
            }
          }
        });
      }
  };

constexpr int logsquare=4;

template<typename T, typename T2=complex<T>> class Helper
  {
  private:
Martin Reinecke's avatar
Martin Reinecke committed
480
    const GridderConfig<T> &gconf;
Martin Reinecke's avatar
Martin Reinecke committed
481 482 483 484 485 486
    int nu, nv, nsafe, supp;
    const T2 *grid_r;
    T2 *grid_w;
    int su, sv;
    int iu0, iv0; // start index of the current visibility
    int bu0, bv0; // start index of the current buffer
Martin Reinecke's avatar
Martin Reinecke committed
487
    T wfac;
Martin Reinecke's avatar
Martin Reinecke committed
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536

    vector<T2> rbuf, wbuf;
    bool do_w_gridding;
    double w0, xdw;
    size_t nexp;
    size_t nvecs;
    vector<std::mutex> &locks;

    void dump() const
      {
      if (bu0<-nsafe) return; // nothing written into buffer yet

      int idxu = (bu0+nu)%nu;
      int idxv0 = (bv0+nv)%nv;
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        {
        std::lock_guard<std::mutex> lock(locks[idxu]);
        for (int iv=0; iv<sv; ++iv)
          {
          grid_w[idxu*nv + idxv] += wbuf[iu*sv + iv];
          if (++idxv>=nv) idxv=0;
          }
        }
        if (++idxu>=nu) idxu=0;
        }
      }

    void load()
      {
      int idxu = (bu0+nu)%nu;
      int idxv0 = (bv0+nv)%nv;
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        for (int iv=0; iv<sv; ++iv)
          {
          rbuf[iu*sv + iv] = grid_r[idxu*nv + idxv];
          if (++idxv>=nv) idxv=0;
          }
        if (++idxu>=nu) idxu=0;
        }
      }

  public:
    const T2 *p0r;
    T2 *p0w;
    static constexpr size_t vlen=native_simd<T>::size();
Martin Reinecke's avatar
Martin Reinecke committed
537
    union kbuf {
Martin Reinecke's avatar
Martin Reinecke committed
538 539
      T scalar[64];
      native_simd<T> simd[64/vlen];
Martin Reinecke's avatar
Martin Reinecke committed
540 541
      };
    kbuf bufx, bufy;
Martin Reinecke's avatar
Martin Reinecke committed
542

Martin Reinecke's avatar
Martin Reinecke committed
543
    Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_,
Martin Reinecke's avatar
Martin Reinecke committed
544 545
      vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
      : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
Martin Reinecke's avatar
Martin Reinecke committed
546
        supp(gconf.Supp()), grid_r(grid_r_),
Martin Reinecke's avatar
Martin Reinecke committed
547 548 549 550 551 552 553 554
        grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
        bu0(-1000000), bv0(-1000000),
        rbuf(su*sv*(grid_r!=nullptr),T(0)),
        wbuf(su*sv*(grid_w!=nullptr),T(0)),
        do_w_gridding(dw_>0),
        w0(w0_),
        xdw(T(1)/dw_),
        nexp(2*supp + do_w_gridding),
Martin Reinecke's avatar
Martin Reinecke committed
555
        nvecs((nexp+vlen-1)/vlen),
Martin Reinecke's avatar
Martin Reinecke committed
556
        locks(locks_)
Martin Reinecke's avatar
Martin Reinecke committed
557
      {
Martin Reinecke's avatar
Martin Reinecke committed
558
      MR_assert(supp<=64, "support too large");
Martin Reinecke's avatar
Martin Reinecke committed
559
      }
Martin Reinecke's avatar
Martin Reinecke committed
560 561 562
    ~Helper() { if (grid_w) dump(); }

    int lineJump() const { return sv; }
Martin Reinecke's avatar
Martin Reinecke committed
563
    T Wfac() const { return wfac; }
Martin Reinecke's avatar
Martin Reinecke committed
564 565
    void prep(const UVW &in)
      {
Martin Reinecke's avatar
Martin Reinecke committed
566
      const auto &krn(*(gconf.krn));
Martin Reinecke's avatar
Martin Reinecke committed
567 568 569 570 571
      double u, v;
      gconf.getpix(in.u, in.v, u, v, iu0, iv0);
      double xsupp=2./supp;
      double x0 = xsupp*(iu0-u);
      double y0 = xsupp*(iv0-v);
Martin Reinecke's avatar
Martin Reinecke committed
572 573
      krn.eval(T(x0), bufx.simd);
      krn.eval(T(y0), bufy.simd);
Martin Reinecke's avatar
Martin Reinecke committed
574
      if (do_w_gridding)
Martin Reinecke's avatar
Martin Reinecke committed
575
        wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w)));
Martin Reinecke's avatar
Martin Reinecke committed
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
      if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
        {
        if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); }
        bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe;
        bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
        if (grid_r) load();
        }
      p0r = grid_r ? rbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
      p0w = grid_w ? wbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
      }
  };

template<class T, class Serv> class SubServ
  {
  private:
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
591 592
    Serv &srv;
    mav<idx_t,1> subidx;
Martin Reinecke's avatar
Martin Reinecke committed
593 594

  public:
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
595
    SubServ(Serv &orig, const mav<idx_t,1> &subidx_)
Martin Reinecke's avatar
Martin Reinecke committed
596 597 598 599
      : srv(orig), subidx(subidx_){}
    size_t Nvis() const { return subidx.size(); }
    const Baselines &getBaselines() const { return srv.getBaselines(); }
    UVW getCoord(size_t i) const
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
600
      { return srv.getCoord(subidx(i)); }
Martin Reinecke's avatar
Martin Reinecke committed
601
    complex<T> getVis(size_t i) const
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
602 603 604 605 606 607
      { return srv.getVis(subidx(i)); }
    idx_t getIdx(size_t i) const { return srv.getIdx(subidx(i)); }
    void setVis (size_t i, const complex<T> &v)
      { srv.setVis(subidx(i), v); }
    void addVis (size_t i, const complex<T> &v)
      { srv.addVis(subidx(i), v); }
Martin Reinecke's avatar
Martin Reinecke committed
608 609 610 611 612 613
  };

template<class T, class T2> class MsServ
  {
  private:
    const Baselines &baselines;
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
614
    mav<idx_t,1> idx;
Martin Reinecke's avatar
Martin Reinecke committed
615
    T2 ms;
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
616
    mav<T,2> wgt;
Martin Reinecke's avatar
Martin Reinecke committed
617 618 619 620 621 622 623
    size_t nvis;
    bool have_wgt;

  public:
    using Tsub = SubServ<T, MsServ>;

    MsServ(const Baselines &baselines_,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
624
    const mav<idx_t,1> &idx_, T2 ms_, const mav<T,2> &wgt_)
Martin Reinecke's avatar
Martin Reinecke committed
625 626 627 628 629 630
      : baselines(baselines_), idx(idx_), ms(ms_), wgt(wgt_),
        nvis(idx.shape(0)), have_wgt(wgt.size()!=0)
      {
      checkShape(ms.shape(), {baselines.Nrows(), baselines.Nchannels()});
      if (have_wgt) checkShape(wgt.shape(), ms.shape());
      }
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
631
    Tsub getSubserv(const mav<idx_t,1> &subidx)
Martin Reinecke's avatar
Martin Reinecke committed
632 633 634 635 636 637 638 639 640 641 642
      { return Tsub(*this, subidx); }
    size_t Nvis() const { return nvis; }
    const Baselines &getBaselines() const { return baselines; }
    UVW getCoord(size_t i) const
      { return baselines.effectiveCoord(idx(i)); }
    complex<T> getVis(size_t i) const
      {
      auto rc = baselines.getRowChan(idx(i));
      return have_wgt ? ms(rc.row, rc.chan)*wgt(rc.row, rc.chan)
                      : ms(rc.row, rc.chan);
      }
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
643 644
    idx_t getIdx(size_t i) const { return idx(i); }
    void setVis (size_t i, const complex<T> &v)
Martin Reinecke's avatar
Martin Reinecke committed
645 646
      {
      auto rc = baselines.getRowChan(idx(i));
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
647
      ms.w()(rc.row, rc.chan) = have_wgt ? v*wgt(rc.row, rc.chan) : v;
Martin Reinecke's avatar
Martin Reinecke committed
648
      }
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
649
    void addVis (size_t i, const complex<T> &v)
Martin Reinecke's avatar
Martin Reinecke committed
650 651
      {
      auto rc = baselines.getRowChan(idx(i));
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
652
      ms.v(rc.row, rc.chan) += have_wgt ? v*wgt(rc.row, rc.chan) : v;
Martin Reinecke's avatar
Martin Reinecke committed
653 654 655 656
      }
  };
template<class T, class T2> MsServ<T, T2> makeMsServ
  (const Baselines &baselines,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
657
   const mav<idx_t,1> &idx, T2 &ms, const mav<T,2> &wgt)
Martin Reinecke's avatar
Martin Reinecke committed
658 659 660
  { return MsServ<T, T2>(baselines, idx, ms, wgt); }

template<typename T, typename Serv> void x2grid_c
Martin Reinecke's avatar
Martin Reinecke committed
661
  (const GridderConfig<T> &gconf, Serv &srv, mav<complex<T>,2> &grid,
Martin Reinecke's avatar
Martin Reinecke committed
662 663 664 665 666 667 668 669 670 671 672 673
  double w0=-1, double dw=-1)
  {
  checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
  MR_assert(grid.contiguous(), "grid is not contiguous");
  size_t supp = gconf.Supp();
  size_t nthreads = gconf.Nthreads();
  bool do_w_gridding = dw>0;
  vector<std::mutex> locks(gconf.Nu());

  size_t np = srv.Nvis();
  execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
    {
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
674
    Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
Martin Reinecke's avatar
Martin Reinecke committed
675
    int jump = hlp.lineJump();
Martin Reinecke's avatar
Martin Reinecke committed
676 677
    const T * DUCC0_RESTRICT ku = hlp.bufx.scalar;
    const T * DUCC0_RESTRICT kv = hlp.bufy.scalar;
Martin Reinecke's avatar
Martin Reinecke committed
678 679 680 681 682 683

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
Martin Reinecke's avatar
Martin Reinecke committed
684
      auto * DUCC0_RESTRICT ptr = hlp.p0w;
Martin Reinecke's avatar
Martin Reinecke committed
685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707
      auto v(srv.getVis(ipart));
      if (do_w_gridding) v*=hlp.Wfac();
      if (flip) v=conj(v);
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(v*ku[cu]);
        size_t cv=0;
        for (; cv+3<supp; cv+=4)
          {
          ptr[cv  ] += tmp*kv[cv  ];
          ptr[cv+1] += tmp*kv[cv+1];
          ptr[cv+2] += tmp*kv[cv+2];
          ptr[cv+3] += tmp*kv[cv+3];
          }
        for (; cv<supp; ++cv)
          ptr[cv] += tmp*kv[cv];
        ptr+=jump;
        }
      }
    });
  }

template<typename T, typename Serv> void grid2x_c
Martin Reinecke's avatar
Martin Reinecke committed
708
  (const GridderConfig<T> &gconf, const mav<complex<T>,2> &grid,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
709
  Serv &srv, double w0=-1, double dw=-1)
Martin Reinecke's avatar
Martin Reinecke committed
710 711 712 713 714 715 716 717 718 719 720 721 722 723
  {
  checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
  MR_assert(grid.contiguous(), "grid is not contiguous");
  size_t supp = gconf.Supp();
  size_t nthreads = gconf.Nthreads();
  bool do_w_gridding = dw>0;
  vector<std::mutex> locks(gconf.Nu());

  // Loop over sampling points
  size_t np = srv.Nvis();
  execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
    int jump = hlp.lineJump();
Martin Reinecke's avatar
Martin Reinecke committed
724 725
    const T * DUCC0_RESTRICT ku = hlp.bufx.scalar;
    const T * DUCC0_RESTRICT kv = hlp.bufy.scalar;
Martin Reinecke's avatar
Martin Reinecke committed
726 727 728 729 730 731 732

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      complex<T> r = 0;
Martin Reinecke's avatar
Martin Reinecke committed
733
      const auto * DUCC0_RESTRICT ptr = hlp.p0r;
Martin Reinecke's avatar
Martin Reinecke committed
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(0);
        size_t cv=0;
        for (; cv+3<supp; cv+=4)
          tmp += ptr[cv  ]*kv[cv  ]
               + ptr[cv+1]*kv[cv+1]
               + ptr[cv+2]*kv[cv+2]
               + ptr[cv+3]*kv[cv+3];
        for (; cv<supp; ++cv)
          tmp += ptr[cv] * kv[cv];
        r += tmp*ku[cu];
        ptr += jump;
        }
      if (flip) r=conj(r);
      if (do_w_gridding) r*=hlp.Wfac();
      srv.addVis(ipart, r);
      }
    });
  }

Martin Reinecke's avatar
Martin Reinecke committed
755 756
template<typename T> void apply_global_corrections(const GridderConfig<T> &gconf,
  mav<T,2> &dirty, double dw, bool divide_by_n)
Martin Reinecke's avatar
Martin Reinecke committed
757 758 759 760 761 762 763 764
  {
  auto nx_dirty=gconf.Nxdirty();
  auto ny_dirty=gconf.Nydirty();
  size_t nthreads = gconf.Nthreads();
  auto psx=gconf.Pixsize_x();
  auto psy=gconf.Pixsize_y();
  double x0 = -0.5*nx_dirty*psx,
         y0 = -0.5*ny_dirty*psy;
Martin Reinecke's avatar
Martin Reinecke committed
765 766
  auto cfu = gconf.krn->corfunc(nx_dirty/2+1, 1./gconf.Nu(), nthreads);
  auto cfv = gconf.krn->corfunc(ny_dirty/2+1, 1./gconf.Nv(), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
  execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
      {
      auto fx = T(x0+i*psx);
      fx *= fx;
      for (size_t j=0; j<=ny_dirty/2; ++j)
        {
        auto fy = T(y0+j*psy);
        fy*=fy;
        T fct = 0;
        auto tmp = 1-fx-fy;
        if (tmp>=0)
          {
          auto nm1 = (-fx-fy)/(sqrt(tmp)+1); // accurate form of sqrt(1-x-y)-1
Martin Reinecke's avatar
Martin Reinecke committed
782
          fct = T(gconf.krn->corfunc(nm1*dw));
Martin Reinecke's avatar
Martin Reinecke committed
783 784 785 786 787 788 789 790 791 792
          if (divide_by_n)
            fct /= nm1+1;
          }
        else // beyond the horizon, don't really know what to do here
          {
          if (divide_by_n)
            fct=0;
          else
            {
            auto nm1 = sqrt(-tmp)-1;
Martin Reinecke's avatar
Martin Reinecke committed
793
            fct = T(gconf.krn->corfunc(nm1*dw));
Martin Reinecke's avatar
Martin Reinecke committed
794 795 796 797
            }
          }
        fct *= T(cfu[nx_dirty/2-i]*cfv[ny_dirty/2-j]);
        size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
Martin Reinecke's avatar
Martin Reinecke committed
798
        dirty.v(i,j)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
799 800
        if ((i>0)&&(i<i2))
          {
Martin Reinecke's avatar
Martin Reinecke committed
801
          dirty.v(i2,j)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
802
          if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
803
            dirty.v(i2,j2)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
804 805
          }
        if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
806
          dirty.v(i,j2)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
807 808 809 810 811
        }
      }
    });
  }

Martin Reinecke's avatar
Martin Reinecke committed
812
template<typename T, typename Serv> class WgridHelper
Martin Reinecke's avatar
Martin Reinecke committed
813 814
  {
  private:
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
815
    Serv &srv;
Martin Reinecke's avatar
Martin Reinecke committed
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
    double wmin, dw;
    size_t nplanes, supp;
    vector<vector<idx_t>> minplane;
    size_t verbosity;

    int curplane;
    vector<idx_t> subidx;

    static void wminmax(const Serv &srv, double &wmin, double &wmax)
      {
      size_t nvis = srv.Nvis();

      wmin= 1e38;
      wmax=-1e38;
      // FIXME maybe this can be done more intelligently
      for (size_t ipart=0; ipart<nvis; ++ipart)
        {
        auto wval = abs(srv.getCoord(ipart).w);
        wmin = min(wmin,wval);
        wmax = max(wmax,wval);
        }
      }

Martin Reinecke's avatar
Martin Reinecke committed
839 840
    template<typename T2> static void update_idx(vector<T2> &v, const vector<T2> &add,
      const vector<T2> &del)
Martin Reinecke's avatar
Martin Reinecke committed
841 842
      {
      MR_assert(v.size()>=del.size(), "must not happen");
Martin Reinecke's avatar
Martin Reinecke committed
843
      vector<T2> res;
Martin Reinecke's avatar
Martin Reinecke committed
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
      res.reserve((v.size()+add.size())-del.size());
      auto iin=v.begin(), ein=v.end();
      auto iadd=add.begin(), eadd=add.end();
      auto irem=del.begin(), erem=del.end();

      while(iin!=ein)
        {
        if ((irem!=erem) && (*iin==*irem))
          {  ++irem; ++iin; } // skip removed entry
        else if ((iadd!=eadd) && (*iadd<*iin))
           res.push_back(*(iadd++)); // add new entry
        else
          res.push_back(*(iin++));
        }
      MR_assert(irem==erem, "must not happen");
      while(iadd!=eadd)
        res.push_back(*(iadd++));
      MR_assert(res.size()==(v.size()+add.size())-del.size(), "must not happen");
      v.swap(res);
      }

  public:
Martin Reinecke's avatar
Martin Reinecke committed
866
    WgridHelper(const GridderConfig<T> &gconf, Serv &srv_, size_t verbosity_)
Martin Reinecke's avatar
Martin Reinecke committed
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
      : srv(srv_), verbosity(verbosity_), curplane(-1)
      {
      size_t nvis = srv.Nvis();
      size_t nthreads = gconf.Nthreads();
      double wmax;

      wminmax(srv, wmin, wmax);
      if (verbosity>0) cout << "Using " << nthreads << " thread"
                            << ((nthreads!=1) ? "s" : "") << endl;
      if (verbosity>0) cout << "W range: " << wmin << " to " << wmax << endl;

      double x0 = -0.5*gconf.Nxdirty()*gconf.Pixsize_x(),
             y0 = -0.5*gconf.Nydirty()*gconf.Pixsize_y();
      double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
      if (x0*x0+y0*y0>1.)
        nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
      dw = 0.25/abs(nmin);
      nplanes = size_t((wmax-wmin)/dw+2);
      dw = (1.+1e-13)*(wmax-wmin)/(nplanes-1);

      supp = gconf.Supp();
      wmin -= (0.5*supp-1)*dw;
      wmax += (0.5*supp-1)*dw;
      nplanes += supp-2;
      if (verbosity>0) cout << "Kernel support: " << supp << endl;
      if (verbosity>0) cout << "nplanes: " << nplanes << endl;

      minplane.resize(nplanes);
#if 0
      // extra short, but potentially inefficient version:
      for (size_t ipart=0; ipart<nvis; ++ipart)
        {
        int plane0 = max(0,int(1+(abs(srv.getCoord(ipart).w)-(0.5*supp*dw)-wmin)/dw));
        minplane[plane0].push_back(idx_t(ipart));
        }
#else
      // more efficient: precalculate final vector sizes and avoid reallocations
      vector<size_t> cnt(nplanes,0);
      for(size_t ipart=0; ipart<nvis; ++ipart)
        {
        int plane0 = max(0,int(1+(abs(srv.getCoord(ipart).w)-(0.5*supp*dw)-wmin)/dw));
        ++cnt[plane0];
        }

      // fill minplane
      for (size_t j=0; j<nplanes; ++j)
        minplane[j].resize(cnt[j]);
      vector<size_t> ofs(nplanes, 0);
      for (size_t ipart=0; ipart<nvis; ++ipart)
        {
        int plane0 = max(0,int(1+(abs(srv.getCoord(ipart).w)-(0.5*supp*dw)-wmin)/dw));
        minplane[plane0][ofs[plane0]++]=idx_t(ipart);
        }
#endif
      }

    typename Serv::Tsub getSubserv() const
      {
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
925
      auto subidx2 = mav<idx_t, 1>(subidx.data(), {subidx.size()});
Martin Reinecke's avatar
Martin Reinecke committed
926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942
      return srv.getSubserv(subidx2);
      }
    double W() const { return wmin+curplane*dw; }
    size_t Nvis() const { return subidx.size(); }
    double DW() const { return dw; }
    bool advance()
      {
      if (++curplane>=int(nplanes)) return false;
      update_idx(subidx, minplane[curplane], curplane>=int(supp) ? minplane[curplane-supp] : vector<idx_t>());
      if (verbosity>1)
        cout << "Working on plane " << curplane << " containing " << subidx.size()
             << " visibilities" << endl;
      return true;
      }
  };

template<typename T, typename Serv> void x2dirty(
Martin Reinecke's avatar
Martin Reinecke committed
943
  const GridderConfig<T> &gconf, Serv &srv, mav<T,2> &dirty,
Martin Reinecke's avatar
Martin Reinecke committed
944 945 946 947 948
  bool do_wstacking, size_t verbosity)
  {
  if (do_wstacking)
    {
    if (verbosity>0) cout << "Gridding using improved w-stacking" << endl;
Martin Reinecke's avatar
Martin Reinecke committed
949
    WgridHelper<T, Serv> hlp(gconf, srv, verbosity);
Martin Reinecke's avatar
Martin Reinecke committed
950 951
    double dw = hlp.DW();
    dirty.fill(0);
952
    mav<complex<T>,2> grid({gconf.Nu(),gconf.Nv()});
Martin Reinecke's avatar
Martin Reinecke committed
953 954 955 956
    while(hlp.advance())  // iterate over w planes
      {
      if (hlp.Nvis()==0) continue;
      grid.fill(0);
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
957 958
      auto serv = hlp.getSubserv();
      x2grid_c(gconf, serv, grid, hlp.W(), dw);
Martin Reinecke's avatar
Martin Reinecke committed
959 960 961
      gconf.grid2dirty_c_overwrite_wscreen_add(grid, dirty, T(hlp.W()));
      }
    // correct for w gridding etc.
Martin Reinecke's avatar
Martin Reinecke committed
962
    apply_global_corrections(gconf, dirty, dw, true);
Martin Reinecke's avatar
Martin Reinecke committed
963 964 965 966 967 968 969 970
    }
  else
    {
    if (verbosity>0)
      cout << "Gridding without w-stacking: " << srv.Nvis()
           << " visibilities" << endl;
    if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl;

971 972
    mav<complex<T>,2> grid({gconf.Nu(), gconf.Nv()});
    grid.fill(0.);
Martin Reinecke's avatar
Martin Reinecke committed
973
    x2grid_c(gconf, srv, grid);
974
    mav<T,2> rgrid(grid.shape());
Martin Reinecke's avatar
Martin Reinecke committed
975 976
    complex2hartley(grid, rgrid, gconf.Nthreads());
    gconf.grid2dirty(rgrid, dirty);
Martin Reinecke's avatar
Martin Reinecke committed
977 978 979 980
    }
  }

template<typename T, typename Serv> void dirty2x(
Martin Reinecke's avatar
Martin Reinecke committed
981
  const GridderConfig<T> &gconf,  const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
982
  Serv &srv, bool do_wstacking, size_t verbosity)
Martin Reinecke's avatar
Martin Reinecke committed
983 984 985 986 987
  {
  if (do_wstacking)
    {
    size_t nx_dirty=gconf.Nxdirty(), ny_dirty=gconf.Nydirty();
    if (verbosity>0) cout << "Degridding using improved w-stacking" << endl;
Martin Reinecke's avatar
Martin Reinecke committed
988
    WgridHelper<T, Serv> hlp(gconf, srv, verbosity);
Martin Reinecke's avatar
Martin Reinecke committed
989
    double dw = hlp.DW();
990
    mav<T,2> tdirty({nx_dirty,ny_dirty});
Martin Reinecke's avatar
Martin Reinecke committed
991 992
    for (size_t i=0; i<nx_dirty; ++i)
      for (size_t j=0; j<ny_dirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
993
        tdirty.v(i,j) = dirty(i,j);
Martin Reinecke's avatar
Martin Reinecke committed
994
    // correct for w gridding etc.
Martin Reinecke's avatar
Martin Reinecke committed
995
    apply_global_corrections(gconf, tdirty, dw, true);
996
    mav<complex<T>,2> grid({gconf.Nu(),gconf.Nv()});
Martin Reinecke's avatar
Martin Reinecke committed
997 998 999
    while(hlp.advance())  // iterate over w planes
      {
      if (hlp.Nvis()==0) continue;
Martin Reinecke's avatar
Martin Reinecke committed
1000
      gconf.dirty2grid_c_wscreen(tdirty, grid, T(hlp.W()));
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1001 1002
      auto serv = hlp.getSubserv();
      grid2x_c(gconf, grid, serv, hlp.W(), dw);
Martin Reinecke's avatar
Martin Reinecke committed
1003 1004 1005 1006 1007 1008 1009 1010 1011
      }
    }
  else
    {
    if (verbosity>0)
      cout << "Degridding without w-stacking: " << srv.Nvis()
           << " visibilities" << endl;
    if (verbosity>0) cout << "Using " << gconf.Nthreads() << " threads" << endl;

1012
    mav<T,2> grid({gconf.Nu(), gconf.Nv()});
Martin Reinecke's avatar
Martin Reinecke committed
1013
    gconf.dirty2grid(dirty, grid);
1014
    mav<complex<T>,2> grid2(grid.shape());
Martin Reinecke's avatar
Martin Reinecke committed
1015 1016
    hartley2complex(grid, grid2, gconf.Nthreads());
    grid2x_c(gconf, grid2, srv);
Martin Reinecke's avatar
Martin Reinecke committed
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
    }
  }

void calc_share(size_t nshares, size_t myshare, size_t nwork, size_t &lo,
  size_t &hi)
  {
  size_t nbase = nwork/nshares;
  size_t additional = nwork%nshares;
  lo = myshare*nbase + ((myshare<additional) ? myshare : additional);
  hi = lo+nbase+(myshare<additional);
  }


template<typename T> vector<idx_t> getWgtIndices(const Baselines &baselines,
Martin Reinecke's avatar
Martin Reinecke committed
1031
  const GridderConfig<T> &gconf, const mav<T,2> &wgt,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1032
  const mav<complex<T>,2> &ms)
Martin Reinecke's avatar
Martin Reinecke committed
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
  {
  size_t nrow=baselines.Nrows(),
         nchan=baselines.Nchannels(),
         nsafe=gconf.Nsafe();
  bool have_wgt=wgt.size()!=0;
  if (have_wgt) checkShape(wgt.shape(),{nrow,nchan});
  bool have_ms=ms.size()!=0;
  if (have_ms) checkShape(ms.shape(), {nrow,nchan});
  constexpr int side=1<<logsquare;
  size_t nbu = (gconf.Nu()+1+side-1) >> logsquare,
         nbv = (gconf.Nv()+1+side-1) >> logsquare;
  vector<idx_t> acc(nbu*nbv+1,0);
  vector<idx_t> tmp(nrow*nchan);

  for (idx_t irow=0, idx=0; irow<nrow; ++irow)
    for (idx_t ichan=0; ichan<nchan; ++ichan, ++idx)
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1049
      if (((!have_ms ) || (norm(ms(irow,ichan))!=0)) &&
Martin Reinecke's avatar
Martin Reinecke committed
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075
          ((!have_wgt) || (wgt(irow,ichan)!=0)))
        {
        auto uvw = baselines.effectiveCoord(RowChan{irow,idx_t(ichan)});
        if (uvw.w<0) uvw.Flip();
        double u, v;
        int iu0, iv0;
        gconf.getpix(uvw.u, uvw.v, u, v, iu0, iv0);
        iu0 = (iu0+nsafe)>>logsquare;
        iv0 = (iv0+nsafe)>>logsquare;
        ++acc[nbv*iu0 + iv0 + 1];
        tmp[idx] = nbv*iu0 + iv0;
        }
      else
        tmp[idx] = ~idx_t(0);

  for (size_t i=1; i<acc.size(); ++i)
    acc[i] += acc[i-1];

  vector<idx_t> res(acc.back());
  for (size_t irow=0, idx=0; irow<nrow; ++irow)
    for (size_t ichan=0; ichan<nchan; ++ichan, ++idx)
      if (tmp[idx]!=(~idx_t(0)))
        res[acc[tmp[idx]]++] = baselines.getIdx(irow, ichan);
  return res;
  }

1076
template<typename T> void ms2dirty(const mav<double,2> &uvw,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1077 1078 1079
  const mav<double,1> &freq, const mav<complex<T>,2> &ms,
  const mav<T,2> &wgt, double pixsize_x, double pixsize_y, size_t nu, size_t nv, double epsilon,
  bool do_wstacking, size_t nthreads, mav<T,2> &dirty, size_t verbosity,
Martin Reinecke's avatar
Martin Reinecke committed
1080 1081 1082
  bool negate_v=false)
  {
  Baselines baselines(uvw, freq, negate_v);
Martin Reinecke's avatar
Martin Reinecke committed
1083
  GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
1084
  auto idx = getWgtIndices(baselines, gconf, wgt, ms);
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1085 1086 1087
  auto idx2 = mav<idx_t,1>(idx.data(),{idx.size()});
  auto serv = makeMsServ(baselines,idx2,ms,wgt);
  x2dirty(gconf, serv, dirty, do_wstacking, verbosity);
Martin Reinecke's avatar
Martin Reinecke committed
1088 1089
  }

1090
template<typename T> void dirty2ms(const mav<double,2> &uvw,
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1091 1092 1093
  const mav<double,1> &freq, const mav<T,2> &dirty,
  const mav<T,2> &wgt, double pixsize_x, double pixsize_y, size_t nu, size_t nv,double epsilon,
  bool do_wstacking, size_t nthreads, mav<complex<T>,2> &ms,
Martin Reinecke's avatar
Martin Reinecke committed
1094 1095 1096
  size_t verbosity, bool negate_v=false)
  {
  Baselines baselines(uvw, freq, negate_v);
Martin Reinecke's avatar
Martin Reinecke committed
1097
  GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, epsilon, pixsize_x, pixsize_y, nthreads);
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1098
  mav<complex<T>,2> null_ms(nullptr, {0,0}, true);
Martin Reinecke's avatar
Martin Reinecke committed
1099
  auto idx = getWgtIndices(baselines, gconf, wgt, null_ms);
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1100
  auto idx2 = mav<idx_t,1>(idx.data(),{idx.size()});
Martin Reinecke's avatar
Martin Reinecke committed
1101
  ms.fill(0);
Martin Reinecke's avatar
stage 2  
Martin Reinecke committed
1102 1103
  auto serv = makeMsServ(baselines,idx2,ms,wgt);
  dirty2x(gconf, dirty, serv, do_wstacking, verbosity);
Martin Reinecke's avatar
Martin Reinecke committed
1104 1105
  }

Martin Reinecke's avatar
Martin Reinecke committed
1106
} // namespace detail_gridder
Martin Reinecke's avatar
Martin Reinecke committed
1107 1108

// public names
Martin Reinecke's avatar
Martin Reinecke committed
1109 1110 1111
using detail_gridder::ms2dirty;
using detail_gridder::dirty2ms;

Martin Reinecke's avatar
Martin Reinecke committed
1112
} // namespace ducc0
Martin Reinecke's avatar
Martin Reinecke committed
1113 1114

#endif