gridder_cxx.h 42.4 KB
Newer Older
Martin Reinecke's avatar
Martin Reinecke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H

/*
 *  This file is part of nifty_gridder.
 *
 *  nifty_gridder is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  nifty_gridder is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with nifty_gridder; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

22
/* Copyright (C) 2019-2020 Max-Planck-Society
Martin Reinecke's avatar
Martin Reinecke committed
23
24
25
26
27
28
29
30
   Author: Martin Reinecke */

#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <array>
Martin Reinecke's avatar
Martin Reinecke committed
31
#include <memory>
Martin Reinecke's avatar
Martin Reinecke committed
32

Martin Reinecke's avatar
Martin Reinecke committed
33
34
35
#include "ducc0/infra/error_handling.h"
#include "ducc0/math/fft.h"
#include "ducc0/infra/threading.h"
Martin Reinecke's avatar
Martin Reinecke committed
36
#include "ducc0/infra/misc_utils.h"
Martin Reinecke's avatar
Martin Reinecke committed
37
38
#include "ducc0/infra/useful_macros.h"
#include "ducc0/infra/mav.h"
Martin Reinecke's avatar
Martin Reinecke committed
39
#include "ducc0/infra/simd.h"
Martin Reinecke's avatar
Martin Reinecke committed
40
#include "ducc0/infra/timers.h"
Martin Reinecke's avatar
Martin Reinecke committed
41
#include "ducc0/math/gridding_kernel.h"
Martin Reinecke's avatar
Martin Reinecke committed
42

Martin Reinecke's avatar
Martin Reinecke committed
43
namespace ducc0 {
Martin Reinecke's avatar
Martin Reinecke committed
44

Martin Reinecke's avatar
Martin Reinecke committed
45
namespace detail_gridder {
Martin Reinecke's avatar
Martin Reinecke committed
46
47
48

using namespace std;

49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
template<typename T> complex<T> hsum_cmplx(native_simd<T> vr, native_simd<T> vi)
  { return complex<T>(reduce(vr, std::plus<>()), reduce(vi, std::plus<>())); }

#if (defined(__AVX__) && (!defined(__AVX512F__)))
inline complex<float> hsum_cmplx(native_simd<float> vr, native_simd<float> vi)
  {
  auto t1 = _mm256_hadd_ps(vr, vi);
  auto t2 = _mm_hadd_ps(_mm256_extractf128_ps(t1, 0), _mm256_extractf128_ps(t1, 1));
  t2 += _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(1,0,3,2));
  return complex<float>(t2[0], t2[1]);
//FIXME perhaps some shuffling?
  return complex<float>(t2[0]+t2[2], t2[1]+t2[3]);
  }
#endif

Martin Reinecke's avatar
Martin Reinecke committed
64
65
template<size_t ndim> void checkShape
  (const array<size_t, ndim> &shp1, const array<size_t, ndim> &shp2)
66
  { MR_assert(shp1==shp2, "shape mismatch"); }
Martin Reinecke's avatar
Martin Reinecke committed
67
68
69
70
71
72
73
74
75

template<typename T> inline T fmod1 (T v)
  { return v-floor(v); }

//
// Start of real gridder functionality
//

template<typename T> void complex2hartley
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
76
  (const mav<complex<T>, 2> &grid, mav<T,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
77
  {
78
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
79
80
81
82
83
84
85
86
87
88
  size_t nu=grid.shape(0), nv=grid.shape(1);

  execStatic(nu, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
      {
      size_t xu = (u==0) ? 0 : nu-u;
      for (size_t v=0; v<nv; ++v)
        {
        size_t xv = (v==0) ? 0 : nv-v;
Martin Reinecke's avatar
Martin Reinecke committed
89
90
        grid2.v(u,v) = T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
                               grid(xu,xv).real()-grid(xu,xv).imag());
Martin Reinecke's avatar
Martin Reinecke committed
91
92
93
94
95
96
        }
      }
    });
  }

template<typename T> void hartley2complex
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
97
  (const mav<T,2> &grid, mav<complex<T>,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
98
  {
99
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
100
101
102
103
104
105
106
107
108
109
110
111
  size_t nu=grid.shape(0), nv=grid.shape(1);

  execStatic(nu, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
      {
      size_t xu = (u==0) ? 0 : nu-u;
      for (size_t v=0; v<nv; ++v)
        {
        size_t xv = (v==0) ? 0 : nv-v;
        T v1 = T(0.5)*grid( u, v);
        T v2 = T(0.5)*grid(xu,xv);
Martin Reinecke's avatar
Martin Reinecke committed
112
        grid2.v(u,v) = std::complex<T>(v1+v2, v1-v2);
Martin Reinecke's avatar
Martin Reinecke committed
113
114
115
116
117
        }
      }
    });
  }

118
119
template<typename T> void hartley2_2D(mav<T,2> &arr, size_t vlim,
  bool first_fast, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
120
  {
Martin Reinecke's avatar
Martin Reinecke committed
121
122
  size_t nu=arr.shape(0), nv=arr.shape(1);
  fmav<T> farr(arr);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
123
  if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
124
    {
125
    if (!first_fast)
Martin Reinecke's avatar
Martin Reinecke committed
126
      r2r_separable_hartley(farr, farr, {1}, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
127
    auto flo = farr.subarray({0,0},{farr.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
128
    r2r_separable_hartley(flo, flo, {0}, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
129
    auto fhi = farr.subarray({0,farr.shape(1)-vlim},{farr.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
130
    r2r_separable_hartley(fhi, fhi, {0}, T(1), nthreads);
131
    if (first_fast)
Martin Reinecke's avatar
Martin Reinecke committed
132
133
134
135
      r2r_separable_hartley(farr, farr, {1}, T(1), nthreads);
    }
  else
    r2r_separable_hartley(farr, farr, {0,1}, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
136
137
138
139
140
  execStatic((nu+1)/2-1, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i<rng.hi+1; ++i)
      for(size_t j=1; j<(nv+1)/2; ++j)
         {
Martin Reinecke's avatar
Martin Reinecke committed
141
142
143
144
145
146
147
148
         T a = arr(i,j);
         T b = arr(nu-i,j);
         T c = arr(i,nv-j);
         T d = arr(nu-i,nv-j);
         arr.v(i,j) = T(0.5)*(a+b+c-d);
         arr.v(nu-i,j) = T(0.5)*(a+b+d-c);
         arr.v(i,nv-j) = T(0.5)*(a+c+d-b);
         arr.v(nu-i,nv-j) = T(0.5)*(b+c+d-a);
Martin Reinecke's avatar
Martin Reinecke committed
149
150
151
152
         }
     });
  }

Martin Reinecke's avatar
temp    
Martin Reinecke committed
153
154
class visrange
  {
Martin Reinecke's avatar
Martin Reinecke committed
155
  public:
Martin Reinecke's avatar
temp    
Martin Reinecke committed
156
157
158
159
160
161
162
163
164
165
166
167
    uint32_t row;
    uint16_t tile_u, tile_v, minplane, ch_begin, ch_end;

  public:
    visrange(uint16_t tile_u_, uint16_t tile_v_, uint16_t minplane_,
             uint32_t row_, uint16_t ch_begin_, uint16_t ch_end_)
      : row(row_), tile_u(tile_u_), tile_v(tile_v_), minplane(minplane_),
        ch_begin(ch_begin_), ch_end(ch_end_) {}
    uint64_t uvwidx() const
      { return (uint64_t(tile_u)<<32) + (uint64_t(tile_v)<<16) + minplane; }
  };

Martin Reinecke's avatar
Martin Reinecke committed
168
using VVR = vector<visrange>;
Martin Reinecke's avatar
Martin Reinecke committed
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

struct UVW
  {
  double u, v, w;
  UVW() {}
  UVW(double u_, double v_, double w_) : u(u_), v(v_), w(w_) {}
  UVW operator* (double fct) const
    { return UVW(u*fct, v*fct, w*fct); }
  void Flip() { u=-u; v=-v; w=-w; }
  bool FixW()
    {
    bool flip = w<0;
    if (flip) Flip();
    return flip;
    }
  };

class Baselines
  {
  protected:
    vector<UVW> coord;
    vector<double> f_over_c;
Martin Reinecke's avatar
Martin Reinecke committed
191
    size_t nrows, nchan;
Martin Reinecke's avatar
Martin Reinecke committed
192
    double umax, vmax;
Martin Reinecke's avatar
Martin Reinecke committed
193
194

  public:
Martin Reinecke's avatar
Martin Reinecke committed
195
    Baselines() = default;
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
196
197
    template<typename T> Baselines(const mav<T,2> &coord_,
      const mav<T,1> &freq, bool negate_v=false)
Martin Reinecke's avatar
Martin Reinecke committed
198
199
200
201
202
203
      {
      constexpr double speedOfLight = 299792458.;
      MR_assert(coord_.shape(1)==3, "dimension mismatch");
      nrows = coord_.shape(0);
      nchan = freq.shape(0);
      f_over_c.resize(nchan);
Martin Reinecke's avatar
Martin Reinecke committed
204
      double fcmax = 0;
Martin Reinecke's avatar
Martin Reinecke committed
205
206
      for (size_t i=0; i<nchan; ++i)
        {
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
207
        MR_assert(freq(i)>0, "negative channel frequency encountered");
Martin Reinecke's avatar
Martin Reinecke committed
208
        f_over_c[i] = freq(i)/speedOfLight;
Martin Reinecke's avatar
Martin Reinecke committed
209
        fcmax = max(fcmax, abs(f_over_c[i]));
Martin Reinecke's avatar
Martin Reinecke committed
210
211
        }
      coord.resize(nrows);
Martin Reinecke's avatar
Martin Reinecke committed
212
213
214
215
216
217
218
219
220
221
      double vfac = negate_v ? -1 : 1;
      vmax=0;
      for (size_t i=0; i<coord.size(); ++i)
        {
        coord[i] = UVW(coord_(i,0), vfac*coord_(i,1), coord_(i,2));
        umax = max(umax, abs(coord_(i,0)));
        vmax = max(vmax, abs(coord_(i,1)));
        }
      umax *= fcmax;
      vmax *= fcmax;
Martin Reinecke's avatar
Martin Reinecke committed
222
223
      }

Martin Reinecke's avatar
Martin Reinecke committed
224
225
    UVW effectiveCoord(size_t row, size_t chan) const
      { return coord[row]*f_over_c[chan]; }
Martin Reinecke's avatar
Martin Reinecke committed
226
227
    size_t Nrows() const { return nrows; }
    size_t Nchannels() const { return nchan; }
Martin Reinecke's avatar
Martin Reinecke committed
228
229
    double Umax() const { return umax; }
    double Vmax() const { return vmax; }
Martin Reinecke's avatar
Martin Reinecke committed
230
231
  };

Martin Reinecke's avatar
Martin Reinecke committed
232

233
234
235
constexpr int logsquare=4;

template<typename T> class Params
Martin Reinecke's avatar
Martin Reinecke committed
236
  {
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
  private:
  bool gridding;
  TimerHierarchy timers;
  const mav<complex<T>,2> &ms_in;
  mav<complex<T>,2> &ms_out;
  const mav<T,2> &dirty_in;
  mav<T,2> &dirty_out;
  const mav<T,2> &wgt;
  const mav<uint8_t,2> &mask;
  double pixsize_x, pixsize_y;
  size_t nxdirty, nydirty;
  double epsilon;
  bool do_wgridding;
  size_t nthreads;
  size_t verbosity;
  bool negate_v, divide_by_n;

  Baselines bl;
  VVR ranges;
  double wmin_d, wmax_d;
  size_t nvis;
  double wmin, dw;
  size_t nplanes;
  double nm1min;
  vector<uint8_t> active;

    size_t nu, nv;
    double ofactor;
Martin Reinecke's avatar
Martin Reinecke committed
265

Martin Reinecke's avatar
Martin Reinecke committed
266
    shared_ptr<HornerKernel<T>> krn;
Martin Reinecke's avatar
Martin Reinecke committed
267

Martin Reinecke's avatar
Martin Reinecke committed
268
269
270
    size_t supp, nsafe;
    double ushift, vshift;
    int maxiu0, maxiv0;
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
271
    size_t vlim;
272
    bool uv_side_fast;
Martin Reinecke's avatar
Martin Reinecke committed
273

274
    static T phase (T x, T y, T w, bool adjoint)
Martin Reinecke's avatar
Martin Reinecke committed
275
276
277
278
279
      {
      constexpr T pi = T(3.141592653589793238462643383279502884197);
      T tmp = 1-x-y;
      if (tmp<=0) return 1; // no phase factor beyond the horizon
      T nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1
Martin Reinecke's avatar
Martin Reinecke committed
280
281
282
      T phs = 2*pi*w*nm1;
      if (adjoint) phs *= -1;
      return phs;
Martin Reinecke's avatar
Martin Reinecke committed
283
284
      }

Martin Reinecke's avatar
Martin Reinecke committed
285
    void grid2dirty_post(mav<T,2> &tmav,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
286
      mav<T,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
287
      {
288
289
290
291
      checkShape(dirty.shape(), {nxdirty, nydirty});
      auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
      execStatic(nxdirty, nthreads, 0, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
292
293
294
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
295
296
          int icfu = abs(int(nxdirty/2)-int(i));
          for (size_t j=0; j<nydirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
297
            {
298
299
            int icfv = abs(int(nydirty/2)-int(j));
            size_t i2 = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
300
            if (i2>=nu) i2-=nu;
301
            size_t j2 = nv-nydirty/2+j;
Martin Reinecke's avatar
Martin Reinecke committed
302
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
303
            dirty.v(i,j) = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
304
305
306
307
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
308
    void grid2dirty_post2(
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
309
      mav<complex<T>,2> &tmav, mav<T,2> &dirty, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
310
      {
311
312
313
314
      checkShape(dirty.shape(), {nxdirty,nydirty});
      double x0 = -0.5*nxdirty*pixsize_x,
             y0 = -0.5*nydirty*pixsize_y;
      execStatic(nxdirty/2+1, nthreads, 0, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
315
        {
Martin Reinecke's avatar
Martin Reinecke committed
316
317
        using vtype = native_simd<T>;
        constexpr size_t vlen=vtype::size();
318
        size_t nvec = (nydirty/2+1+(vlen-1))/vlen;
Martin Reinecke's avatar
Martin Reinecke committed
319
        vector<vtype> ph(nvec), sp(nvec), cp(nvec);
Martin Reinecke's avatar
Martin Reinecke committed
320
321
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
322
          T fx = T(x0+i*pixsize_x);
Martin Reinecke's avatar
Martin Reinecke committed
323
          fx *= fx;
324
          size_t ix = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
325
          if (ix>=nu) ix-=nu;
326
327
          size_t i2 = nxdirty-i;
          size_t ix2 = nu-nxdirty/2+i2;
Martin Reinecke's avatar
Martin Reinecke committed
328
          if (ix2>=nu) ix2-=nu;
329
          for (size_t j=0; j<=nydirty/2; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
330
            {
331
            T fy = T(y0+j*pixsize_y);
Martin Reinecke's avatar
Martin Reinecke committed
332
333
334
335
336
337
338
339
340
            ph[j/vlen][j%vlen] = phase(fx, fy*fy, w, true);
            }
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
               sp[j][k]=sin(ph[j][k]);
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
              cp[j][k]=cos(ph[j][k]);
          if ((i>0)&&(i<i2))
341
            for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
342
              {
343
              size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
344
345
346
              T re = cp[j2/vlen][j2%vlen], im = sp[j2/vlen][j2%vlen];
              dirty.v(i,j) += tmav(ix,jx).real()*re - tmav(ix,jx).imag()*im;
              dirty.v(i2,j) += tmav(ix2,jx).real()*re - tmav(ix2,jx).imag()*im;
Martin Reinecke's avatar
Martin Reinecke committed
347
              }
Martin Reinecke's avatar
Martin Reinecke committed
348
          else
349
            for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
350
              {
351
              size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
352
353
              T re = cp[j2/vlen][j2%vlen], im = sp[j2/vlen][j2%vlen];
              dirty.v(i,j) += tmav(ix,jx).real()*re - tmav(ix,jx).imag()*im; // lower left
Martin Reinecke's avatar
Martin Reinecke committed
354
              }
Martin Reinecke's avatar
Martin Reinecke committed
355
356
357
358
          }
        });
      }

359
    void grid2dirty_overwrite(mav<T,2> &grid, mav<T,2> &dirty)
Martin Reinecke's avatar
Martin Reinecke committed
360
      {
Martin Reinecke's avatar
Martin Reinecke committed
361
      timers.push("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
362
      checkShape(grid.shape(), {nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
363
      hartley2_2D<T>(grid, vlim, uv_side_fast, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
364
      timers.poppush("grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
365
      grid2dirty_post(grid, dirty);
Martin Reinecke's avatar
Martin Reinecke committed
366
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
367
368
      }

Martin Reinecke's avatar
Martin Reinecke committed
369
    void grid2dirty_c_overwrite_wscreen_add
370
      (mav<complex<T>,2> &grid, mav<T,2> &dirty, T w)
Martin Reinecke's avatar
Martin Reinecke committed
371
      {
Martin Reinecke's avatar
Martin Reinecke committed
372
      timers.push("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
373
      checkShape(grid.shape(), {nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
374
      fmav<complex<T>> inout(grid);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
375
      if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
376
        {
377
378
        if (!uv_side_fast)
          c2c(inout, inout, {1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
379
        auto inout_lo = inout.subarray({0,0},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
380
        c2c(inout_lo, inout_lo, {0}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
381
        auto inout_hi = inout.subarray({0,inout.shape(1)-vlim},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
382
        c2c(inout_hi, inout_hi, {0}, BACKWARD, T(1), nthreads);
383
384
        if (uv_side_fast)
          c2c(inout, inout, {1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
385
386
387
        }
      else
        c2c(inout, inout, {0,1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
388
      timers.poppush("wscreen+grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
389
      grid2dirty_post2(grid, dirty, w);
Martin Reinecke's avatar
Martin Reinecke committed
390
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
391
392
      }

Martin Reinecke's avatar
Martin Reinecke committed
393
    void dirty2grid_pre(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
394
      mav<T,2> &grid) const
Martin Reinecke's avatar
Martin Reinecke committed
395
      {
396
      checkShape(dirty.shape(), {nxdirty, nydirty});
Martin Reinecke's avatar
Martin Reinecke committed
397
      checkShape(grid.shape(), {nu, nv});
398
399
      auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
400
      // FIXME: maybe we don't have to fill everything and can save some time
Martin Reinecke's avatar
Martin Reinecke committed
401
      grid.fill(0);
402
      execStatic(nxdirty, nthreads, 0, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
403
404
405
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
406
407
          int icfu = abs(int(nxdirty/2)-int(i));
          for (size_t j=0; j<nydirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
408
            {
409
410
            int icfv = abs(int(nydirty/2)-int(j));
            size_t i2 = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
411
            if (i2>=nu) i2-=nu;
412
            size_t j2 = nv-nydirty/2+j;
Martin Reinecke's avatar
Martin Reinecke committed
413
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
414
            grid.v(i2,j2) = dirty(i,j)*T(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
415
416
417
418
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
419
    void dirty2grid_pre2(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
420
      mav<complex<T>,2> &grid, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
421
      {
422
      checkShape(dirty.shape(), {nxdirty, nydirty});
Martin Reinecke's avatar
Martin Reinecke committed
423
      checkShape(grid.shape(), {nu, nv});
424
      // FIXME: maybe we don't have to fill everything and can save some time
Martin Reinecke's avatar
Martin Reinecke committed
425
426
      grid.fill(0);

427
428
429
      double x0 = -0.5*nxdirty*pixsize_x,
             y0 = -0.5*nydirty*pixsize_y;
      execStatic(nxdirty/2+1, nthreads, 0, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
430
        {
Martin Reinecke's avatar
Martin Reinecke committed
431
432
        using vtype = native_simd<T>;
        constexpr size_t vlen=vtype::size();
433
        size_t nvec = (nydirty/2+1+(vlen-1))/vlen;
Martin Reinecke's avatar
Martin Reinecke committed
434
        vector<vtype> ph(nvec), sp(nvec), cp(nvec);
Martin Reinecke's avatar
Martin Reinecke committed
435
436
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
437
          T fx = T(x0+i*pixsize_x);
Martin Reinecke's avatar
Martin Reinecke committed
438
          fx *= fx;
439
          size_t ix = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
440
          if (ix>=nu) ix-=nu;
441
442
          size_t i2 = nxdirty-i;
          size_t ix2 = nu-nxdirty/2+i2;
Martin Reinecke's avatar
Martin Reinecke committed
443
          if (ix2>=nu) ix2-=nu;
444
          for (size_t j=0; j<=nydirty/2; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
445
            {
446
            T fy = T(y0+j*pixsize_y);
Martin Reinecke's avatar
Martin Reinecke committed
447
448
449
450
451
452
453
454
455
            ph[j/vlen][j%vlen] = phase(fx, fy*fy, w, false);
            }
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
               sp[j][k]=sin(ph[j][k]);
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
              cp[j][k]=cos(ph[j][k]);
          if ((i>0)&&(i<i2))
456
            for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
457
              {
458
              size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
459
460
              auto ws = complex<T>(cp[j2/vlen][j2%vlen],sp[j2/vlen][j2%vlen]);
              grid.v(ix,jx) = dirty(i,j)*ws; // lower left
Martin Reinecke's avatar
Martin Reinecke committed
461
              grid.v(ix2,jx) = dirty(i2,j)*ws; // lower right
Martin Reinecke's avatar
Martin Reinecke committed
462
              }
Martin Reinecke's avatar
Martin Reinecke committed
463
          else
464
            for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
465
              {
466
              size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
467
468
469
              auto ws = complex<T>(cp[j2/vlen][j2%vlen],sp[j2/vlen][j2%vlen]);
              grid.v(ix,jx) = dirty(i,j)*ws; // lower left
              }
Martin Reinecke's avatar
Martin Reinecke committed
470
471
472
473
          }
        });
      }

474
    void dirty2grid(const mav<T,2> &dirty, mav<T,2> &grid)
Martin Reinecke's avatar
Martin Reinecke committed
475
      {
Martin Reinecke's avatar
Martin Reinecke committed
476
      timers.push("grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
477
      dirty2grid_pre(dirty, grid);
Martin Reinecke's avatar
Martin Reinecke committed
478
      timers.poppush("FFT");
479
      hartley2_2D<T>(grid, vlim, !uv_side_fast, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
480
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
481
482
      }

Martin Reinecke's avatar
Martin Reinecke committed
483
    void dirty2grid_c_wscreen(const mav<T,2> &dirty,
484
      mav<complex<T>,2> &grid, T w)
Martin Reinecke's avatar
Martin Reinecke committed
485
      {
Martin Reinecke's avatar
Martin Reinecke committed
486
      timers.push("wscreen+grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
487
      dirty2grid_pre2(dirty, grid, w);
Martin Reinecke's avatar
Martin Reinecke committed
488
      timers.poppush("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
489
      fmav<complex<T>> inout(grid);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
490
      if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
491
        {
492
493
        if (uv_side_fast)
          c2c(inout, inout, {1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
494
        auto inout_lo = inout.subarray({0,0},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
495
        c2c(inout_lo, inout_lo, {0}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
496
        auto inout_hi = inout.subarray({0,inout.shape(1)-vlim},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
497
        c2c(inout_hi, inout_hi, {0}, FORWARD, T(1), nthreads);
498
499
        if (!uv_side_fast)
          c2c(inout, inout, {1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
500
501
502
        }
      else
        c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
503
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
504
505
      }

Martin Reinecke's avatar
Martin Reinecke committed
506
    [[gnu::always_inline]] void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
Martin Reinecke's avatar
Martin Reinecke committed
507
      {
508
      u=fmod1(u_in*pixsize_x)*nu;
Martin Reinecke's avatar
Martin Reinecke committed
509
      iu0 = min(int(u+ushift)-int(nu), maxiu0);
510
      v=fmod1(v_in*pixsize_y)*nv;
Martin Reinecke's avatar
Martin Reinecke committed
511
512
      iv0 = min(int(v+vshift)-int(nv), maxiv0);
      }
Martin Reinecke's avatar
Martin Reinecke committed
513

514
void report()
Martin Reinecke's avatar
Martin Reinecke committed
515
516
517
518
519
  {
  if (verbosity==0) return;
  cout << (gridding ? "Gridding" : "Degridding")
       << ": nthreads=" << nthreads << ", "
       << "dirty=(" << nxdirty << "x" << nydirty << "), "
520
       << "grid=(" << nu << "x" << nv;
Martin Reinecke's avatar
Martin Reinecke committed
521
522
  if (nplanes>0) cout << "x" << nplanes;
  cout << "), nvis=" << nvis
523
524
       << ", supp=" << supp
       << ", eps=" << (epsilon * ((nplanes==0) ? 2 : 3))
Martin Reinecke's avatar
Martin Reinecke committed
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
       << endl;
  cout << "  w=[" << wmin_d << "; " << wmax_d << "], min(n-1)=" << nm1min << ", dw=" << dw
       << ", wmax/dw=" << wmax_d/dw << ", nranges=" << ranges.size() << endl;
  }

void scanData()
  {
  timers.push("Initial scan");
  size_t nrow=bl.Nrows(),
         nchan=bl.Nchannels();
  bool have_wgt=wgt.size()!=0;
  if (have_wgt) checkShape(wgt.shape(),{nrow,nchan});
  bool have_ms=ms_in.size()!=0;
  if (have_ms) checkShape(ms_in.shape(), {nrow,nchan});
  bool have_mask=mask.size()!=0;
  if (have_mask) checkShape(mask.shape(), {nrow,nchan});

  active.resize(nrow*nchan, 0);
  nvis=0;
  wmin_d=1e300;
  wmax_d=-1e300;
  mutex mut;
  execParallel(nthreads, [&](Scheduler &sched)
    {
    double lwmin_d=1e300, lwmax_d=-1e300;
    size_t lnvis=0;
    auto tid = sched.thread_num();
    auto [lo, hi] = calcShare(nthreads, tid, nrow);
    for(auto irow=lo; irow<hi; ++irow)
      for (size_t ichan=0, idx=irow*nchan; ichan<nchan; ++ichan, ++idx)
        if (((!have_ms ) || (norm(ms_in(irow,ichan))!=0)) &&
            ((!have_wgt) || (wgt(irow,ichan)!=0)) &&
            ((!have_mask) || (mask(irow,ichan)!=0)))
          {
          ++lnvis;
          active[irow*nchan+ichan] = 1;
          auto uvw = bl.effectiveCoord(irow,ichan);
          double w = abs(uvw.w);
          lwmin_d = min(lwmin_d, w);
          lwmax_d = max(lwmax_d, w);
          }
    {
    lock_guard<mutex> lock(mut);
    wmin_d = min(wmin_d, lwmin_d);
    wmax_d = max(wmax_d, lwmax_d);
    nvis += lnvis;
    }
    });
  timers.pop();
  }

auto getNuNv()
  {
  timers.push("parameter calculation");
  double x0 = -0.5*nxdirty*pixsize_x,
         y0 = -0.5*nydirty*pixsize_y;
  nm1min = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
  if (x0*x0+y0*y0>1.)
    nm1min = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
  auto idx = getAvailableKernels<T>(epsilon);
  double mincost = 1e300;
  constexpr double nref_fft=2048;
  constexpr double costref_fft=0.0693;
  size_t minnu=0, minnv=0, minidx=KernelDB.size();
  constexpr size_t vlen = native_simd<T>::size();
  for (size_t i=0; i<idx.size(); ++i)
    {
    const auto &krn(KernelDB[idx[i]]);
    auto supp = krn.W;
    auto nvec = (supp+vlen-1)/vlen;
    auto ofactor = krn.ofactor;
    size_t nu=2*good_size_complex(size_t(nxdirty*ofactor*0.5)+1);
    size_t nv=2*good_size_complex(size_t(nydirty*ofactor*0.5)+1);
    double logterm = log(nu*nv)/log(nref_fft*nref_fft);
    double fftcost = nu/nref_fft*nv/nref_fft*logterm*costref_fft;
    double gridcost = 2.2e-10*nvis*(supp*nvec*vlen + ((2*nvec+1)*(supp+3)*vlen));
    if (do_wgridding)
      {
      double dw = 0.5/ofactor/abs(nm1min);
      size_t nplanes = size_t((wmax_d-wmin_d)/dw+supp);
      fftcost *= nplanes;
      gridcost *= supp;
      }
    double cost = fftcost+gridcost;
    if (cost<mincost)
      {
      mincost=cost;
      minnu=nu;
      minnv=nv;
      minidx = idx[i];
      }
    }
  timers.pop();
618
619
620
  nu = minnu;
  nv = minnv;
  return minidx;
Martin Reinecke's avatar
Martin Reinecke committed
621
622
  }

623
void countRanges()
Martin Reinecke's avatar
Martin Reinecke committed
624
625
626
  {
  timers.push("range count");
  size_t nrow=bl.Nrows(),
627
         nchan=bl.Nchannels();
Martin Reinecke's avatar
Martin Reinecke committed
628

629
  dw = 0.5/ofactor/abs(nm1min);
Martin Reinecke's avatar
Martin Reinecke committed
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
  nplanes = size_t((wmax_d-wmin_d)/dw+supp);
  wmin = (wmin_d+wmax_d)*0.5 - 0.5*(nplanes-1)*dw;

  struct bufvec
    {
    VVR v;
    uint64_t dummy[8];
    };
  auto Sorter = [](const visrange &a, const visrange &b) { return a.uvwidx()<b.uvwidx(); };
  vector<bufvec> lranges(nthreads);
  execParallel(nthreads, [&](Scheduler &sched)
    {
    auto tid = sched.thread_num();
    auto &myranges(lranges[tid].v);
    auto [lo, hi] = calcShare(nthreads, tid, nrow);
    for(auto irow=lo; irow<hi; ++irow)
      {
      bool on=false;
      int iulast, ivlast, plast;
      size_t chan0=0;
      for (size_t ichan=0; ichan<nchan; ++ichan)
        {
        if (active[irow*nchan+ichan])
          {
          auto uvw = bl.effectiveCoord(irow, ichan);
          if (uvw.w<0) uvw.Flip();
          double u, v;
          int iu0, iv0, iw;
658
          getpix(uvw.u, uvw.v, u, v, iu0, iv0);
Martin Reinecke's avatar
Martin Reinecke committed
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
          iu0 = (iu0+nsafe)>>logsquare;
          iv0 = (iv0+nsafe)>>logsquare;
          iw = max(0,int(1+(abs(uvw.w)-(0.5*supp*dw)-wmin)/dw));
          if (!on) // new active region
            {
            on=true;
            iulast=iu0; ivlast=iv0; plast=iw; chan0=ichan;
            }
          else if ((iu0!=iulast) || (iv0!=ivlast) || (iw!=plast)) // change of active region
            {
            myranges.emplace_back(iulast, ivlast, plast, irow, chan0, ichan);
            iulast=iu0; ivlast=iv0; plast=iw; chan0=ichan;
            }
          }
        else if (on) // end of active region
          {
          myranges.emplace_back(iulast, ivlast, plast, irow, chan0, ichan);
          on=false;
          }
        }
      if (on) // end of active region at last channel
        myranges.emplace_back(iulast, ivlast, plast, irow, chan0, nchan);
      }
    sort(myranges.begin(), myranges.end(), Sorter);
    });

  // free mask memory
  vector<uint8_t>().swap(active);
  timers.poppush("range merging");
  size_t nth = nthreads;
  while (nth>1)
    {
    auto nmerge=nth/2;
    execParallel(nmerge, [&](Scheduler &sched)
      {
      auto tid = sched.thread_num();
      auto tid_partner = nth-1-tid;
      VVR tmp;
      tmp.reserve(lranges[tid].v.size() + lranges[tid_partner].v.size());
      merge(lranges[tid].v.begin(), lranges[tid].v.end(),
            lranges[tid_partner].v.begin(), lranges[tid_partner].v.end(),
            back_inserter(tmp), Sorter);
      lranges[tid].v.swap(tmp);
      VVR().swap(lranges[tid_partner].v);
      });
    nth-=nmerge;
    }
  ranges.swap(lranges[0].v);
  timers.pop();
  }

710
void apply_global_corrections(mav<T,2> &dirty)
Martin Reinecke's avatar
Martin Reinecke committed
711
712
713
714
  {
  timers.push("global corrections");
  double x0 = -0.5*nxdirty*pixsize_x,
         y0 = -0.5*nydirty*pixsize_y;
715
716
  auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
  auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
717
  execStatic(nxdirty/2+1, nthreads, 0, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
718
719
720
721
722
    {
    while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
      {
      auto fx = T(x0+i*pixsize_x);
      fx *= fx;
723
      for (size_t j=0; j<=nydirty/2; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
724
725
726
727
728
729
730
731
        {
        auto fy = T(y0+j*pixsize_y);
        fy*=fy;
        T fct = 0;
        auto tmp = 1-fx-fy;
        if (tmp>=0)
          {
          auto nm1 = (-fx-fy)/(sqrt(tmp)+1); // accurate form of sqrt(1-x-y)-1
732
          fct = T(krn->corfunc(nm1*dw));
Martin Reinecke's avatar
Martin Reinecke committed
733
734
735
736
737
738
739
740
741
742
          if (divide_by_n)
            fct /= nm1+1;
          }
        else // beyond the horizon, don't really know what to do here
          {
          if (divide_by_n)
            fct=0;
          else
            {
            auto nm1 = sqrt(-tmp)-1;
743
            fct = T(krn->corfunc(nm1*dw));
Martin Reinecke's avatar
Martin Reinecke committed
744
745
            }
          }
746
747
        fct *= T(cfu[nxdirty/2-i]*cfv[nydirty/2-j]);
        size_t i2 = nxdirty-i, j2 = nydirty-j;
Martin Reinecke's avatar
Martin Reinecke committed
748
749
750
751
752
753
754
755
756
757
758
759
760
761
        dirty.v(i,j)*=fct;
        if ((i>0)&&(i<i2))
          {
          dirty.v(i2,j)*=fct;
          if ((j>0)&&(j<j2))
            dirty.v(i2,j2)*=fct;
          }
        if ((j>0)&&(j<j2))
          dirty.v(i,j2)*=fct;
        }
      }
    });
  timers.pop();
  }
Martin Reinecke's avatar
Martin Reinecke committed
762
template<size_t supp, bool wgrid> class HelperX2g2
Martin Reinecke's avatar
Martin Reinecke committed
763
  {
Martin Reinecke's avatar
test    
Martin Reinecke committed
764
765
766
767
  public:
    static constexpr size_t vlen = native_simd<T>::size();
    static constexpr size_t nvec = (supp+vlen-1)/vlen;

Martin Reinecke's avatar
Martin Reinecke committed
768
  private:
Martin Reinecke's avatar
test    
Martin Reinecke committed
769
770
771
772
773
    static constexpr int nsafe = (supp+1)/2;
    static constexpr int su = 2*nsafe+(1<<logsquare);
    static constexpr int sv = 2*nsafe+(1<<logsquare);
    static constexpr int svvec = ((sv+vlen-1)/vlen)*vlen;
    static constexpr double xsupp=2./supp;
774
775
const Params *parent;
    TemplateKernel<supp, T> tkrn;
Martin Reinecke's avatar
Martin Reinecke committed
776
    mav<complex<T>,2> &grid;
Martin Reinecke's avatar
Martin Reinecke committed
777
778
779
    int iu0, iv0; // start index of the current visibility
    int bu0, bv0; // start index of the current buffer

Martin Reinecke's avatar
Martin Reinecke committed
780
    mav<T,2> bufr, bufi;
Martin Reinecke's avatar
Martin Reinecke committed
781
    T *px0r, *px0i;
Martin Reinecke's avatar
Martin Reinecke committed
782
783
784
    double w0, xdw;
    vector<std::mutex> &locks;

Martin Reinecke's avatar
Martin Reinecke committed
785
    DUCC0_NOINLINE void dump()
Martin Reinecke's avatar
Martin Reinecke committed
786
      {
787
788
      int inu = int(parent->nu);
      int inv = int(parent->nv);
Martin Reinecke's avatar
test    
Martin Reinecke committed
789
      if (bu0<-nsafe) return; // nothing written into buffer yet
Martin Reinecke's avatar
Martin Reinecke committed
790

791
792
      int idxu = (bu0+inu)%inu;
      int idxv0 = (bv0+inv)%inv;
Martin Reinecke's avatar
Martin Reinecke committed
793
794
795
796
797
798
799
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        {
        std::lock_guard<std::mutex> lock(locks[idxu]);
        for (int iv=0; iv<sv; ++iv)
          {
Martin Reinecke's avatar
Martin Reinecke committed
800
          grid.v(idxu,idxv) += complex<T>(bufr(iu,iv), bufi(iu,iv));
801
          bufr.v(iu,iv) = bufi.v(iu,iv) = 0;
802
          if (++idxv>=inv) idxv=0;
Martin Reinecke's avatar
Martin Reinecke committed
803
804
          }
        }
805
        if (++idxu>=inu) idxu=0;
Martin Reinecke's avatar
Martin Reinecke committed
806
807
808
        }
      }

Martin Reinecke's avatar
Martin Reinecke committed
809
  public:
Martin Reinecke's avatar
test    
Martin Reinecke committed
810
    T * DUCC0_RESTRICT p0r, * DUCC0_RESTRICT p0i;
Martin Reinecke's avatar
Martin Reinecke committed
811
    union kbuf {
Martin Reinecke's avatar
test    
Martin Reinecke committed
812
813
      T scalar[2*nvec*vlen];
      native_simd<T> simd[2*nvec];
Martin Reinecke's avatar
Martin Reinecke committed
814
815
816
      };
    kbuf buf;

817
    HelperX2g2(const Params *parent_, mav<complex<T>,2> &grid_,
Martin Reinecke's avatar
Martin Reinecke committed
818
      vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
819
      : parent(parent_), tkrn(*parent->krn), grid(grid_),
Martin Reinecke's avatar
Martin Reinecke committed
820
        iu0(-1000000), iv0(-1000000),
Martin Reinecke's avatar
Martin Reinecke committed
821
822
823
        bu0(-1000000), bv0(-1000000),
        bufr({size_t(su),size_t(svvec)}),
        bufi({size_t(su),size_t(svvec)}),
Martin Reinecke's avatar
Martin Reinecke committed
824
        px0r(bufr.vdata()), px0i(bufi.vdata()),
Martin Reinecke's avatar
Martin Reinecke committed
825
826
        w0(w0_),
        xdw(T(1)/dw_),
Martin Reinecke's avatar
test    
Martin Reinecke committed
827
        locks(locks_)
828
      { checkShape(grid.shape(), {parent->nu,parent->nv}); }
Martin Reinecke's avatar
Martin Reinecke committed
829
    ~HelperX2g2() { dump(); }
Martin Reinecke's avatar
Martin Reinecke committed
830

Martin Reinecke's avatar
Martin Reinecke committed
831
832
    constexpr int lineJump() const { return svvec; }
    [[gnu::always_inline]] [[gnu::hot]] void prep(const UVW &in)
Martin Reinecke's avatar
Martin Reinecke committed
833
834
      {
      double u, v;
Martin Reinecke's avatar
Martin Reinecke committed
835
836
      auto iu0old = iu0;
      auto iv0old = iv0;
837
      parent->getpix(in.u, in.v, u, v, iu0, iv0);
Martin Reinecke's avatar
Martin Reinecke committed
838
      T x0 = (iu0-T(u))*2+(supp-1);
Philipp Arras's avatar
Philipp Arras committed
839
      T y0 = (iv0-T(v))*2+(supp-1);
Martin Reinecke's avatar
Martin Reinecke committed
840
      if constexpr(wgrid)
841
        tkrn.eval2s(x0, y0, T(xdw*(w0-in.w)), &buf.simd[0]);
Martin Reinecke's avatar
Martin Reinecke committed
842
      else
843
        tkrn.eval2(x0, y0, &buf.simd[0]);
Martin Reinecke's avatar
Martin Reinecke committed
844
      if ((iu0==iu0old) && (iv0==iv0old)) return;
Martin Reinecke's avatar
Martin Reinecke committed
845
      if ((iu0<bu0) || (iv0<bv0) || (iu0+int(supp)>bu0+su) || (iv0+int(supp)>bv0+sv))
Martin Reinecke's avatar
Martin Reinecke committed
846
847
        {
        dump();
Martin Reinecke's avatar
test    
Martin Reinecke committed
848
849
        bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe;
        bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
Martin Reinecke's avatar
Martin Reinecke committed
850
        }
Martin Reinecke's avatar
Martin Reinecke committed
851
852
853
      auto ofs = (iu0-bu0)*svvec + iv0-bv0;
      p0r = px0r+ofs;
      p0i = px0i+ofs;
Martin Reinecke's avatar
Martin Reinecke committed
854
855
856
      }
  };

Martin Reinecke's avatar
Martin Reinecke committed
857
858

template<size_t SUPP, bool wgrid> [[gnu::hot]] void x2grid_c_helper
859
  (mav<complex<T>,2> &grid,
Martin Reinecke's avatar
Martin Reinecke committed
860
861
862
   size_t p0, double w0)
  {
  bool have_wgt = wgt.size()!=0;
863
  vector<std::mutex> locks(nu);
Martin Reinecke's avatar
Martin Reinecke committed
864

865
  execGuided(ranges.size(), nthreads, 100, 0.2, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
866
867
868
    {
    constexpr size_t vlen=native_simd<T>::size();
    constexpr size_t NVEC((SUPP+vlen-1)/vlen);
869
    HelperX2g2<SUPP,wgrid> hlp(this, grid, locks, w0, dw);
Martin Reinecke's avatar
Martin Reinecke committed
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
    constexpr int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC;

    while (auto rng=sched.getNext()) for(auto irng=rng.lo; irng<rng.hi; ++irng)
      {
      if ((!wgrid) || ((ranges[irng].minplane+SUPP>p0)&&(ranges[irng].minplane<=p0)))
        {
        size_t row = ranges[irng].row;
        for (size_t ch=ranges[irng].ch_begin; ch<ranges[irng].ch_end; ++ch)
          {
          UVW coord = bl.effectiveCoord(row, ch);
          auto flip = coord.FixW();
          hlp.prep(coord);
          auto v(ms_in(row, ch));

          if (flip) v=conj(v);
          if (have_wgt) v*=wgt(row, ch);
          native_simd<T> vr(v.real()), vi(v.imag());
          for (size_t cu=0; cu<SUPP; ++cu)
            {
            if constexpr (NVEC==1)
              {
              auto fct = kv[0]*ku[cu];
              auto * DUCC0_RESTRICT pxr = hlp.p0r+cu*jump;
              auto * DUCC0_RESTRICT pxi = hlp.p0i+cu*jump;
              auto tr = native_simd<T>::loadu(pxr);
              auto ti = native_simd<T>::loadu(pxi);
              tr += vr*fct;
              ti += vi*fct;
              tr.storeu(pxr);
              ti.storeu(pxi);
              }
            else
              {
              native_simd<T> tmpr=vr*ku[cu], tmpi=vi*ku[cu];
              for (size_t cv=0; cv<NVEC; ++cv)
                {
                auto * DUCC0_RESTRICT pxr = hlp.p0r+cu*jump+cv*hlp.vlen;
                auto * DUCC0_RESTRICT pxi = hlp.p0i+cu*jump+cv*hlp.vlen;
                auto tr = native_simd<T>::loadu(pxr);
                tr += tmpr*kv[cv];
                tr.storeu(pxr);
                auto ti = native_simd<T>::loadu(pxi);
                ti += tmpi*kv[cv];
                ti.storeu(pxi);
                }
              }
            }
          }
        }
      }
    });
  }

template<bool wgrid> void x2grid_c
926
  (mav<complex<T>,2> &grid,
Martin Reinecke's avatar
Martin Reinecke committed
927
928
929
   size_t p0, double w0=-1)
  {
  timers.push("gridding proper");
930
  checkShape(grid.shape(), {nu, nv});
Martin Reinecke's avatar
Martin Reinecke committed
931
932

  if constexpr (is_same<T, float>::value)
933
    switch(supp)
Martin Reinecke's avatar
Martin Reinecke committed
934
      {
935
936
937
938
939
      case  4: x2grid_c_helper< 4, wgrid>(grid, p0, w0); break;
      case  5: x2grid_c_helper< 5, wgrid>(grid, p0, w0); break;
      case  6: x2grid_c_helper< 6, wgrid>(grid, p0, w0); break;
      case  7: x2grid_c_helper< 7, wgrid>(grid, p0, w0); break;
      case  8: x2grid_c_helper< 8, wgrid>(grid, p0, w0); break;
Martin Reinecke's avatar
Martin Reinecke committed
940
941
942
      default: MR_fail("must not happen");
      }
  else
943
    switch(supp)
Martin Reinecke's avatar
Martin Reinecke committed
944
      {
945
946
947
948
949
950
951
952
953
954
955
956
957
      case  4: x2grid_c_helper< 4, wgrid>(grid, p0, w0); break;
      case  5: x2grid_c_helper< 5, wgrid>(grid, p0, w0); break;
      case  6: x2grid_c_helper< 6, wgrid>(grid, p0, w0); break;
      case  7: x2grid_c_helper< 7, wgrid>(grid, p0, w0); break;
      case  8: x2grid_c_helper< 8, wgrid>(grid, p0, w0); break;
      case  9: x2grid_c_helper< 9, wgrid>(grid, p0, w0); break;
      case 10: x2grid_c_helper<10, wgrid>(grid, p0, w0); break;
      case 11: x2grid_c_helper<11, wgrid>(grid, p0, w0); break;
      case 12: x2grid_c_helper<12, wgrid>(grid, p0, w0); break;
      case 13: x2grid_c_helper<13, wgrid>(grid, p0, w0); break;
      case 14: x2grid_c_helper<14, wgrid>(grid, p0, w0); break;
      case 15: x2grid_c_helper<15, wgrid>(grid, p0, w0); break;
      case 16: x2grid_c_helper<16, wgrid>(grid, p0, w0); break;
Martin Reinecke's avatar
Martin Reinecke committed
958
959
960
961
962
      default: MR_fail("must not happen");
      }
  timers.pop();
  }

963
void x2dirty()
Martin Reinecke's avatar
Martin Reinecke committed
964
965
966
967
968
969
  {
  if (do_wgridding)
    {
    timers.push("zeroing dirty image");
    dirty_out.fill(0);
    timers.poppush("allocating grid");
970
    auto grid = mav<complex<T>,2>::build_noncritical({nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
971
972
973
974
975
976
977
    timers.pop();
    for (size_t pl=0; pl<nplanes; ++pl)
      {
      double w = wmin+pl*dw;
      timers.push("zeroing grid");
      grid.fill(0);
      timers.pop();
978
979
      x2grid_c<true>(grid, pl, w);
      grid2dirty_c_overwrite_wscreen_add(grid, dirty_out, T(w));
Martin Reinecke's avatar
Martin Reinecke committed
980
981
      }
    // correct for w gridding etc.
982
    apply_global_corrections(dirty_out);
Martin Reinecke's avatar
Martin Reinecke committed
983
984
985
986
    }
  else
    {
    timers.push("allocating grid");
987
    auto grid = mav<complex<T>,2>::build_noncritical({nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
988
    timers.pop();
989
    x2grid_c<false>(grid, 0);
Martin Reinecke's avatar
Martin Reinecke committed
990
991
992
    timers.push("allocating rgrid");
    auto rgrid = mav<T,2>::build_noncritical(grid.shape());
    timers.poppush("complex2hartley");
993
    complex2hartley(grid, rgrid, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
994
    timers.pop();
995
    grid2dirty_overwrite(rgrid, dirty_out);
Martin Reinecke's avatar
Martin Reinecke committed
996
997
998
    }
  }
template<size_t supp, bool wgrid> class HelperG2x2
Martin Reinecke's avatar
Martin Reinecke committed
999
  {
Martin Reinecke's avatar
test    
Martin Reinecke committed
1000
1001
1002
1003
  public:
    static constexpr size_t vlen = native_simd<T>::size();
    static constexpr size_t nvec = (supp+vlen-1)/vlen;

Martin Reinecke's avatar
Martin Reinecke committed
1004
  private:
Martin Reinecke's avatar
test    
Martin Reinecke committed
1005
1006
1007
1008
1009
    static constexpr int nsafe = (supp+1)/2;
    static constexpr int su = 2*nsafe+(1<<logsquare);
    static constexpr int sv = 2*nsafe+(1<<logsquare);
    static constexpr int svvec = ((sv+vlen-1)/vlen)*vlen;
    static constexpr double xsupp=2./supp;
1010
const Params *parent;
Martin Reinecke's avatar
test    
Martin Reinecke committed
1011

1012
    TemplateKernel<supp, T> tkrn;
Martin Reinecke's avatar
Martin Reinecke committed
1013
1014
1015
1016
1017
    const mav<complex<T>,2> &grid;
    int iu0, iv0; // start index of the current visibility
    int bu0, bv0; // start index of the current buffer

    mav<T,2> bufr, bufi;
Martin Reinecke's avatar
Martin Reinecke committed
1018
    const T *px0r, *px0i;
Martin Reinecke's avatar
Martin Reinecke committed
1019
1020
    double w0, xdw;

Martin Reinecke's avatar
Martin Reinecke committed
1021
    DUCC0_NOINLINE void load()
Martin Reinecke's avatar
Martin Reinecke committed
1022
      {
1023
1024
1025
1026
      int inu = int(parent->nu);
      int inv = int(parent->nv);
      int idxu = (bu0+inu)%inu;
      int idxv0 = (bv0+inv)%inv;
Martin Reinecke's avatar
Martin Reinecke committed
1027
1028
1029
1030
1031
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        for (int iv=0; iv<sv; ++iv)
          {
Martin Reinecke's avatar
Martin Reinecke committed
1032
1033
          bufr.v(iu,iv) = grid(idxu, idxv).real();
          bufi.v(iu,iv) = grid(idxu, idxv).imag();
1034
          if (++idxv>=inv) idxv=0;
Martin Reinecke's avatar
Martin Reinecke committed
1035
          }
1036
        if (++idxu>=inu) idxu=0;
Martin Reinecke's avatar
Martin Reinecke committed
1037
1038
1039
1040
        }
      }

  public:
Martin Reinecke's avatar
test    
Martin Reinecke committed
1041
    const T * DUCC0_RESTRICT p0r, * DUCC0_RESTRICT p0i;
Martin Reinecke's avatar
Martin Reinecke committed
1042
    union kbuf {
Martin Reinecke's avatar
test    
Martin Reinecke committed
1043
1044
      T scalar[2*nvec*vlen];
      native_simd<T> simd[2*nvec];
Martin Reinecke's avatar
Martin Reinecke committed
1045
      };
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
1046
    kbuf buf;
Martin Reinecke's avatar
Martin Reinecke committed
1047