wgridder.h 55.1 KB
Newer Older
Martin Reinecke's avatar
Martin Reinecke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 *  This file is part of nifty_gridder.
 *
 *  nifty_gridder is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  nifty_gridder is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with nifty_gridder; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

Martin Reinecke's avatar
Martin Reinecke committed
19
/* Copyright (C) 2019-2021 Max-Planck-Society
Martin Reinecke's avatar
Martin Reinecke committed
20
21
   Author: Martin Reinecke */

Martin Reinecke's avatar
Martin Reinecke committed
22
23
#ifndef DUCC0_WGRIDDER_H
#define DUCC0_WGRIDDER_H
24

Martin Reinecke's avatar
Martin Reinecke committed
25
26
27
28
29
30
31
32
#include <cstring>
#include <complex>
#include <cstdint>
#include <functional>
#include <map>
#include <type_traits>
#include <utility>
#include <mutex>
Martin Reinecke's avatar
Martin Reinecke committed
33
34
35
36
37
38
#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <array>
Martin Reinecke's avatar
Martin Reinecke committed
39
#include <memory>
Martin Reinecke's avatar
Martin Reinecke committed
40
#if ((!defined(DUCC0_NO_SIMD)) && (defined(__AVX__)||defined(__SSE3__)))
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
41
42
#include <x86intrin.h>
#endif
Martin Reinecke's avatar
Martin Reinecke committed
43

Martin Reinecke's avatar
Martin Reinecke committed
44
#include "ducc0/infra/error_handling.h"
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
45
#include "ducc0/math/constants.h"
Martin Reinecke's avatar
Martin Reinecke committed
46
#include "ducc0/fft/fft.h"
Martin Reinecke's avatar
Martin Reinecke committed
47
#include "ducc0/infra/threading.h"
Martin Reinecke's avatar
Martin Reinecke committed
48
#include "ducc0/infra/misc_utils.h"
Martin Reinecke's avatar
Martin Reinecke committed
49
50
#include "ducc0/infra/useful_macros.h"
#include "ducc0/infra/mav.h"
Martin Reinecke's avatar
Martin Reinecke committed
51
#include "ducc0/infra/simd.h"
Martin Reinecke's avatar
Martin Reinecke committed
52
#include "ducc0/infra/timers.h"
Martin Reinecke's avatar
Martin Reinecke committed
53
#include "ducc0/math/gridding_kernel.h"
Martin Reinecke's avatar
Martin Reinecke committed
54

Martin Reinecke's avatar
Martin Reinecke committed
55
namespace ducc0 {
Martin Reinecke's avatar
Martin Reinecke committed
56

Martin Reinecke's avatar
Martin Reinecke committed
57
namespace detail_gridder {
Martin Reinecke's avatar
Martin Reinecke committed
58
59

using namespace std;
60
61
// the next line is necessary to address some sloppy name choices in hipSYCL
using std::min, std::max;
Martin Reinecke's avatar
Martin Reinecke committed
62

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
63
64
template<typename T> constexpr inline int mysimdlen
  = min<int>(8, native_simd<T>::size());
65

Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
66
template<typename T> using mysimd = typename simd_select<T,mysimdlen<T>>::type;
67

Martin Reinecke's avatar
Martin Reinecke committed
68
69
template<typename T> T sqr(T val) { return val*val; }

Martin Reinecke's avatar
Martin Reinecke committed
70
71
72
73
74
template<typename T> void quickzero(mav<T,2> &arr, size_t nthreads)
  {
#if 0
  arr.fill(T(0));
#else
Martin Reinecke's avatar
Martin Reinecke committed
75
76
  MR_assert((arr.stride(0)>0) && (arr.stride(1)>0), "bad memory ordering");
  MR_assert(arr.stride(0)>=arr.stride(1), "bad memory ordering");
Martin Reinecke's avatar
Martin Reinecke committed
77
78
79
  size_t s0=arr.shape(0), s1=arr.shape(1);
  execParallel(s0, nthreads, [&](size_t lo, size_t hi)
    {
Martin Reinecke's avatar
Martin Reinecke committed
80
81
82
83
84
85
86
87
88
89
90
91
    if (arr.stride(1)==1)
      {
      if (size_t(arr.stride(0))==arr.shape(1))
        memset(reinterpret_cast<char *>(&arr.v(lo,0)), 0, sizeof(T)*s1*(hi-lo));
      else
        for (auto i=lo; i<hi; ++i)
          memset(reinterpret_cast<char *>(&arr.v(i,0)), 0, sizeof(T)*s1);
      }
    else
      for (auto i=lo; i<hi; ++i)
        for (size_t j=0; j<s1; ++j)
          arr.v(i,j) = T(0);
Martin Reinecke's avatar
Martin Reinecke committed
92
93
94
95
    });
#endif
  }

Martin Reinecke's avatar
Martin Reinecke committed
96
template<typename T, typename F> [[gnu::hot]] void expi(vector<complex<T>> &res, vector<T> &buf, F getang)
Martin Reinecke's avatar
Martin Reinecke committed
97
98
99
  {
  using Tsimd = native_simd<T>;
  static constexpr auto vlen = Tsimd::size();
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
100
  auto n=res.size();
Martin Reinecke's avatar
Martin Reinecke committed
101
102
103
  for (size_t j=0; j<n; ++j)
    buf[j] = getang(j);
  size_t i=0;
Martin Reinecke's avatar
Martin Reinecke committed
104
105
  for (; i+vlen-1<n; i+=vlen)
    {
106
    auto vang = Tsimd(&buf[i],element_aligned_tag());
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
107
108
    auto vcos = cos(vang);
    auto vsin = sin(vang);
Martin Reinecke's avatar
Martin Reinecke committed
109
110
111
112
    for (size_t ii=0; ii<vlen; ++ii)
      res[i+ii] = complex<T>(vcos[ii], vsin[ii]);
    }
  for (; i<n; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
113
    res[i] = complex<T>(cos(buf[i]), sin(buf[i]));
Martin Reinecke's avatar
Martin Reinecke committed
114
115
  }

116
117
template<typename T> complex<T> hsum_cmplx(mysimd<T> vr, mysimd<T> vi)
  { return complex<T>(reduce(vr, plus<>()), reduce(vi, plus<>())); }
118

Martin Reinecke's avatar
Martin Reinecke committed
119
#if (!defined(DUCC0_NO_SIMD))
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
120
121
#if (defined(__AVX__))
#if 1
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
122
template<> inline complex<float> hsum_cmplx<float>(mysimd<float> vr, mysimd<float> vi)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
123
  {
Martin Reinecke's avatar
Martin Reinecke committed
124
  auto t1 = _mm256_hadd_ps(__m256(vr), __m256(vi));
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
125
126
127
128
129
130
  auto t2 = _mm_hadd_ps(_mm256_extractf128_ps(t1, 0), _mm256_extractf128_ps(t1, 1));
  t2 += _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(1,0,3,2));
  return complex<float>(t2[0], t2[1]);
  }
#else
// this version may be slightly faster, but this needs more benchmarking
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
131
template<> inline complex<float> hsum_cmplx<float>(mysimd<float> vr, mysimd<float> vi)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
132
133
134
135
136
137
138
139
140
141
142
  {
  auto t1 = _mm256_shuffle_ps(vr, vi, _MM_SHUFFLE(0,2,0,2));
  auto t2 = _mm256_shuffle_ps(vr, vi, _MM_SHUFFLE(1,3,1,3));
  auto t3 = _mm256_add_ps(t1,t2);
  t3 = _mm256_shuffle_ps(t3, t3, _MM_SHUFFLE(3,0,2,1));
  auto t4 = _mm_add_ps(_mm256_extractf128_ps(t3, 1), _mm256_castps256_ps128(t3));
  auto t5 = _mm_add_ps(t4, _mm_movehl_ps(t4, t4));
  return complex<float>(t5[0], t5[1]);
  }
#endif
#elif defined(__SSE3__)
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
143
template<> inline complex<float> hsum_cmplx<float>(mysimd<float> vr, mysimd<float> vi)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
144
  {
Martin Reinecke's avatar
Martin Reinecke committed
145
  auto t1 = _mm_hadd_ps(__m128(vr), __m128(vi));
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
146
147
148
149
  t1 += _mm_shuffle_ps(t1, t1, _MM_SHUFFLE(2,3,0,1));
  return complex<float>(t1[0], t1[2]);
  }
#endif
Martin Reinecke's avatar
Martin Reinecke committed
150
#endif
151

Martin Reinecke's avatar
Martin Reinecke committed
152
153
template<size_t ndim> void checkShape
  (const array<size_t, ndim> &shp1, const array<size_t, ndim> &shp2)
154
  { MR_assert(shp1==shp2, "shape mismatch"); }
Martin Reinecke's avatar
Martin Reinecke committed
155
156
157
158
159
160

//
// Start of real gridder functionality
//

template<typename T> void complex2hartley
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
161
  (const mav<complex<T>, 2> &grid, mav<T,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
162
  {
163
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
164
165
  size_t nu=grid.shape(0), nv=grid.shape(1);

166
  execParallel(nu, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
167
    {
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
168
169
    for(auto u=lo, xu=(u==0) ? 0 : nu-u; u<hi; ++u, xu=nu-u)
      for (size_t v=0, xv=0; v<nv; ++v, xv=nv-v)
Martin Reinecke's avatar
Martin Reinecke committed
170
171
        grid2.v(u,v) = T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
                               grid(xu,xv).real()-grid(xu,xv).imag());
Martin Reinecke's avatar
Martin Reinecke committed
172
173
174
175
    });
  }

template<typename T> void hartley2complex
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
176
  (const mav<T,2> &grid, mav<complex<T>,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
177
  {
178
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
179
180
  size_t nu=grid.shape(0), nv=grid.shape(1);

181
  execParallel(nu, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
182
    {
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
183
184
    for(size_t u=lo, xu=(u==0) ? 0 : nu-u; u<hi; ++u, xu=nu-u)
      for (size_t v=0, xv=0; v<nv; ++v, xv=nv-v)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
185
186
        grid2.v(u,v) = complex<T>(T(.5)*(grid(u,v)+grid(xu,xv)),
                                  T(.5)*(grid(u,v)-grid(xu,xv)));
Martin Reinecke's avatar
Martin Reinecke committed
187
188
189
    });
  }

190
191
template<typename T> void hartley2_2D(mav<T,2> &arr, size_t vlim,
  bool first_fast, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
192
  {
Martin Reinecke's avatar
Martin Reinecke committed
193
194
  size_t nu=arr.shape(0), nv=arr.shape(1);
  fmav<T> farr(arr);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
195
  if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
196
    {
197
    if (!first_fast)
Martin Reinecke's avatar
Martin Reinecke committed
198
      r2r_separable_hartley(farr, farr, {1}, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
199
    auto flo = subarray(farr, {0,0},{MAXIDX,vlim});
Martin Reinecke's avatar
Martin Reinecke committed
200
    r2r_separable_hartley(flo, flo, {0}, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
201
    auto fhi = subarray(farr, {0,farr.shape(1)-vlim},{MAXIDX,vlim});
Martin Reinecke's avatar
Martin Reinecke committed
202
    r2r_separable_hartley(fhi, fhi, {0}, T(1), nthreads);
203
    if (first_fast)
Martin Reinecke's avatar
Martin Reinecke committed
204
205
206
207
      r2r_separable_hartley(farr, farr, {1}, T(1), nthreads);
    }
  else
    r2r_separable_hartley(farr, farr, {0,1}, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
208

209
  execParallel((nu+1)/2-1, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
210
    {
Martin Reinecke's avatar
Martin Reinecke committed
211
    for(auto i=lo+1; i<hi+1; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
212
213
      for(size_t j=1; j<(nv+1)/2; ++j)
         {
Martin Reinecke's avatar
Martin Reinecke committed
214
215
216
217
218
219
220
221
         T a = arr(i,j);
         T b = arr(nu-i,j);
         T c = arr(i,nv-j);
         T d = arr(nu-i,nv-j);
         arr.v(i,j) = T(0.5)*(a+b+c-d);
         arr.v(nu-i,j) = T(0.5)*(a+b+d-c);
         arr.v(i,nv-j) = T(0.5)*(a+c+d-b);
         arr.v(nu-i,nv-j) = T(0.5)*(b+c+d-a);
Martin Reinecke's avatar
Martin Reinecke committed
222
223
224
225
         }
     });
  }

226
class Uvwidx
Martin Reinecke's avatar
temp    
Martin Reinecke committed
227
  {
Martin Reinecke's avatar
Martin Reinecke committed
228
  public:
229
    uint16_t tile_u, tile_v, minplane;
Martin Reinecke's avatar
temp    
Martin Reinecke committed
230

231
232
233
234
235
    Uvwidx() {}
    Uvwidx(uint16_t tile_u_, uint16_t tile_v_, uint16_t minplane_)
      : tile_u(tile_u_), tile_v(tile_v_), minplane(minplane_) {}

    uint64_t idx() const
Martin Reinecke's avatar
temp    
Martin Reinecke committed
236
      { return (uint64_t(tile_u)<<32) + (uint64_t(tile_v)<<16) + minplane; }
237
238
239
240
    bool operator!=(const Uvwidx &other) const
      { return idx()!=other.idx(); }
    bool operator<(const Uvwidx &other) const
      { return idx()<other.idx(); }
Martin Reinecke's avatar
temp    
Martin Reinecke committed
241
242
  };

243
244
245
246
247
248
249
250
251
252
253
class RowchanRange
  {
  public:
    uint32_t row;
    uint16_t ch_begin, ch_end;

    RowchanRange(uint32_t row_, uint16_t ch_begin_, uint16_t ch_end_)
      : row(row_), ch_begin(ch_begin_), ch_end(ch_end_) {}
  };

using VVR = vector<pair<Uvwidx, vector<RowchanRange>>>;
Martin Reinecke's avatar
Martin Reinecke committed
254
255
256
257
258
259
260
261
262

struct UVW
  {
  double u, v, w;
  UVW() {}
  UVW(double u_, double v_, double w_) : u(u_), v(v_), w(w_) {}
  UVW operator* (double fct) const
    { return UVW(u*fct, v*fct, w*fct); }
  void Flip() { u=-u; v=-v; w=-w; }
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
263
  double FixW()
Martin Reinecke's avatar
Martin Reinecke committed
264
    {
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
265
266
267
    double res=1.-2.*(w<0);
    u*=res; v*=res; w*=res;
    return res;
Martin Reinecke's avatar
Martin Reinecke committed
268
269
270
271
272
273
274
275
    }
  };

class Baselines
  {
  protected:
    vector<UVW> coord;
    vector<double> f_over_c;
Martin Reinecke's avatar
Martin Reinecke committed
276
    size_t nrows, nchan;
Martin Reinecke's avatar
Martin Reinecke committed
277
    double umax, vmax;
Martin Reinecke's avatar
Martin Reinecke committed
278
279

  public:
Martin Reinecke's avatar
Martin Reinecke committed
280
    Baselines() = default;
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
281
282
    template<typename T> Baselines(const mav<T,2> &coord_,
      const mav<T,1> &freq, bool negate_v=false)
Martin Reinecke's avatar
Martin Reinecke committed
283
284
285
286
287
288
      {
      constexpr double speedOfLight = 299792458.;
      MR_assert(coord_.shape(1)==3, "dimension mismatch");
      nrows = coord_.shape(0);
      nchan = freq.shape(0);
      f_over_c.resize(nchan);
Martin Reinecke's avatar
Martin Reinecke committed
289
      double fcmax = 0;
Martin Reinecke's avatar
Martin Reinecke committed
290
291
      for (size_t i=0; i<nchan; ++i)
        {
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
292
        MR_assert(freq(i)>0, "negative channel frequency encountered");
Martin Reinecke's avatar
Martin Reinecke committed
293
        f_over_c[i] = freq(i)/speedOfLight;
Martin Reinecke's avatar
Martin Reinecke committed
294
        fcmax = max(fcmax, abs(f_over_c[i]));
Martin Reinecke's avatar
Martin Reinecke committed
295
296
        }
      coord.resize(nrows);
Martin Reinecke's avatar
Martin Reinecke committed
297
      double vfac = negate_v ? -1 : 1;
Martin Reinecke's avatar
Martin Reinecke committed
298
      umax=vmax=0;
Martin Reinecke's avatar
Martin Reinecke committed
299
300
301
302
303
304
305
306
      for (size_t i=0; i<coord.size(); ++i)
        {
        coord[i] = UVW(coord_(i,0), vfac*coord_(i,1), coord_(i,2));
        umax = max(umax, abs(coord_(i,0)));
        vmax = max(vmax, abs(coord_(i,1)));
        }
      umax *= fcmax;
      vmax *= fcmax;
Martin Reinecke's avatar
Martin Reinecke committed
307
308
      }

Martin Reinecke's avatar
Martin Reinecke committed
309
310
    UVW effectiveCoord(size_t row, size_t chan) const
      { return coord[row]*f_over_c[chan]; }
Martin Reinecke's avatar
Martin Reinecke committed
311
312
    double absEffectiveW(size_t row, size_t chan) const
      { return abs(coord[row].w*f_over_c[chan]); }
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
313
314
315
316
    UVW baseCoord(size_t row) const
      { return coord[row]; }
    double ffact(size_t chan) const
      { return f_over_c[chan];}
Martin Reinecke's avatar
Martin Reinecke committed
317
318
    size_t Nrows() const { return nrows; }
    size_t Nchannels() const { return nchan; }
Martin Reinecke's avatar
Martin Reinecke committed
319
320
    double Umax() const { return umax; }
    double Vmax() const { return vmax; }
Martin Reinecke's avatar
Martin Reinecke committed
321
322
  };

Martin Reinecke's avatar
Martin Reinecke committed
323

324
325
constexpr int logsquare=4;

Martin Reinecke's avatar
Martin Reinecke committed
326
template<typename Tcalc, typename Tacc, typename Tms, typename Timg> class Params
Martin Reinecke's avatar
Martin Reinecke committed
327
  {
328
  private:
Martin Reinecke's avatar
Martin Reinecke committed
329
330
    bool gridding;
    TimerHierarchy timers;
Martin Reinecke's avatar
Martin Reinecke committed
331
332
333
334
335
    const mav<complex<Tms>,2> &ms_in;
    mav<complex<Tms>,2> &ms_out;
    const mav<Timg,2> &dirty_in;
    mav<Timg,2> &dirty_out;
    const mav<Tms,2> &wgt;
Martin Reinecke's avatar
Martin Reinecke committed
336
337
338
339
340
341
342
343
    const mav<uint8_t,2> &mask;
    double pixsize_x, pixsize_y;
    size_t nxdirty, nydirty;
    double epsilon;
    bool do_wgridding;
    size_t nthreads;
    size_t verbosity;
    bool negate_v, divide_by_n;
Martin Reinecke's avatar
Martin Reinecke committed
344
    double sigma_min, sigma_max;
Martin Reinecke's avatar
Martin Reinecke committed
345
346
347
348
349
350
351

    Baselines bl;
    VVR ranges;
    double wmin_d, wmax_d;
    size_t nvis;
    double wmin, dw;
    size_t nplanes;
352
    double nm1min, nm1max;
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
353
354
355

    double lshift, mshift, nshift;
    bool shifting, lmshift, no_nshift;
356
357
358

    size_t nu, nv;
    double ofactor;
Martin Reinecke's avatar
Martin Reinecke committed
359

360
    shared_ptr<HornerKernel> krn;
Martin Reinecke's avatar
Martin Reinecke committed
361

Martin Reinecke's avatar
Martin Reinecke committed
362
363
364
    size_t supp, nsafe;
    double ushift, vshift;
    int maxiu0, maxiv0;
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
365
    size_t vlim;
366
    bool uv_side_fast;
Martin Reinecke's avatar
Martin Reinecke committed
367

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
368
369
370
371
    static_assert(sizeof(Tcalc)<=sizeof(Tacc), "bad type combination");
    static_assert(sizeof(Tms)<=sizeof(Tcalc), "bad type combination");
    static_assert(sizeof(Timg)<=sizeof(Tcalc), "bad type combination");

Martin Reinecke's avatar
Martin Reinecke committed
372
    static double phase(double x, double y, double w, bool adjoint, double nshift)
Martin Reinecke's avatar
Martin Reinecke committed
373
      {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
374
      double tmp = 1.-x-y;
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
375
      if (tmp<=0) return 0; // no phase factor beyond the horizon
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
376
      double nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1
Martin Reinecke's avatar
Martin Reinecke committed
377
      double phs = w*(nm1+nshift);
Martin Reinecke's avatar
Martin Reinecke committed
378
      if (adjoint) phs *= -1;
Martin Reinecke's avatar
Martin Reinecke committed
379
380
      if constexpr (is_same<Tcalc, double>::value)
        return twopi*phs;
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
381
      // we are reducing accuracy, so let's better do range reduction first
Martin Reinecke's avatar
Martin Reinecke committed
382
      return twopi*(phs-floor(phs));
Martin Reinecke's avatar
Martin Reinecke committed
383
384
      }

Martin Reinecke's avatar
Martin Reinecke committed
385
    void grid2dirty_post(mav<Tcalc,2> &tmav, mav<Timg,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
386
      {
387
388
389
      checkShape(dirty.shape(), {nxdirty, nydirty});
      auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
390
      execParallel(nxdirty, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
391
        {
Martin Reinecke's avatar
Martin Reinecke committed
392
        for (auto i=lo; i<hi; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
393
          {
394
395
          int icfu = abs(int(nxdirty/2)-int(i));
          for (size_t j=0; j<nydirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
396
            {
397
398
            int icfv = abs(int(nydirty/2)-int(j));
            size_t i2 = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
399
            if (i2>=nu) i2-=nu;
400
            size_t j2 = nv-nydirty/2+j;
Martin Reinecke's avatar
Martin Reinecke committed
401
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
402
            dirty.v(i,j) = Timg(tmav(i2,j2)*cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
403
404
405
406
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
407
    void grid2dirty_post2(mav<complex<Tcalc>,2> &tmav, mav<Timg,2> &dirty, double w) const
Martin Reinecke's avatar
Martin Reinecke committed
408
      {
409
      checkShape(dirty.shape(), {nxdirty,nydirty});
Martin Reinecke's avatar
Martin Reinecke committed
410
411
      double x0 = lshift-0.5*nxdirty*pixsize_x,
             y0 = mshift-0.5*nydirty*pixsize_y;
Martin Reinecke's avatar
Martin Reinecke committed
412
413
      size_t nxd = lmshift ? nxdirty : (nxdirty/2+1);
      execParallel(nxd, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
414
        {
Martin Reinecke's avatar
Martin Reinecke committed
415
416
        vector<complex<Tcalc>> phases(lmshift ? nydirty : (nydirty/2+1));
        vector<Tcalc> buf(lmshift ? nydirty : (nydirty/2+1));
Martin Reinecke's avatar
Martin Reinecke committed
417
        for (auto i=lo; i<hi; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
418
          {
Martin Reinecke's avatar
Martin Reinecke committed
419
          double fx = sqr(x0+i*pixsize_x);
420
          size_t ix = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
421
          if (ix>=nu) ix-=nu;
Martin Reinecke's avatar
Martin Reinecke committed
422
          expi(phases, buf, [&](size_t i)
Martin Reinecke's avatar
Martin Reinecke committed
423
            { return Tcalc(phase(fx, sqr(y0+i*pixsize_y), w, true, nshift)); });
Martin Reinecke's avatar
Martin Reinecke committed
424
          if (lmshift)
425
            for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
426
427
              dirty.v(i,j) += Timg(tmav(ix,jx).real()*phases[j].real()
                                 - tmav(ix,jx).imag()*phases[j].imag());
Martin Reinecke's avatar
Martin Reinecke committed
428
          else
429
            {
Martin Reinecke's avatar
Martin Reinecke committed
430
431
432
433
434
435
436
            size_t i2 = nxdirty-i;
            size_t ix2 = nu-nxdirty/2+i2;
            if (ix2>=nu) ix2-=nu;
            if ((i>0)&&(i<i2))
              for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
                {
                size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
437
                Tcalc re = phases[j2].real(), im = phases[j2].imag();
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
438
                dirty.v(i ,j) += Timg(tmav(ix ,jx).real()*re - tmav(ix ,jx).imag()*im);
Martin Reinecke's avatar
Martin Reinecke committed
439
                dirty.v(i2,j) += Timg(tmav(ix2,jx).real()*re - tmav(ix2,jx).imag()*im);
Martin Reinecke's avatar
Martin Reinecke committed
440
441
442
443
444
                }
            else
              for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
                {
                size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
445
446
                Tcalc re = phases[j2].real(), im = phases[j2].imag();
                dirty.v(i,j) += Timg(tmav(ix,jx).real()*re - tmav(ix,jx).imag()*im); // lower left
Martin Reinecke's avatar
Martin Reinecke committed
447
                }
448
            }
Martin Reinecke's avatar
Martin Reinecke committed
449
450
451
452
          }
        });
      }

Martin Reinecke's avatar
Martin Reinecke committed
453
    void grid2dirty_overwrite(mav<Tcalc,2> &grid, mav<Timg,2> &dirty)
Martin Reinecke's avatar
Martin Reinecke committed
454
      {
Martin Reinecke's avatar
Martin Reinecke committed
455
      timers.push("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
456
      checkShape(grid.shape(), {nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
457
      hartley2_2D(grid, vlim, uv_side_fast, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
458
      timers.poppush("grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
459
      grid2dirty_post(grid, dirty);
Martin Reinecke's avatar
Martin Reinecke committed
460
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
461
462
      }

Martin Reinecke's avatar
Martin Reinecke committed
463
    void grid2dirty_c_overwrite_wscreen_add
Martin Reinecke's avatar
Martin Reinecke committed
464
      (mav<complex<Tcalc>,2> &grid, mav<Timg,2> &dirty, double w)
Martin Reinecke's avatar
Martin Reinecke committed
465
      {
Martin Reinecke's avatar
Martin Reinecke committed
466
      timers.push("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
467
      checkShape(grid.shape(), {nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
468
      fmav<complex<Tcalc>> inout(grid);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
469
      if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
470
        {
471
        if (!uv_side_fast)
Martin Reinecke's avatar
Martin Reinecke committed
472
          c2c(inout, inout, {1}, BACKWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
473
        auto inout_lo = inout.subarray({0,0},{MAXIDX,vlim});
Martin Reinecke's avatar
Martin Reinecke committed
474
        c2c(inout_lo, inout_lo, {0}, BACKWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
475
        auto inout_hi = inout.subarray({0,inout.shape(1)-vlim},{MAXIDX,vlim});
Martin Reinecke's avatar
Martin Reinecke committed
476
        c2c(inout_hi, inout_hi, {0}, BACKWARD, Tcalc(1), nthreads);
477
        if (uv_side_fast)
Martin Reinecke's avatar
Martin Reinecke committed
478
          c2c(inout, inout, {1}, BACKWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
479
480
        }
      else
Martin Reinecke's avatar
Martin Reinecke committed
481
        c2c(inout, inout, {0,1}, BACKWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
482
      timers.poppush("wscreen+grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
483
      grid2dirty_post2(grid, dirty, w);
Martin Reinecke's avatar
Martin Reinecke committed
484
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
485
486
      }

Martin Reinecke's avatar
Martin Reinecke committed
487
    void dirty2grid_pre(const mav<Timg,2> &dirty, mav<Tcalc,2> &grid)
Martin Reinecke's avatar
Martin Reinecke committed
488
      {
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
489
      timers.push("zeroing grid");
490
      checkShape(dirty.shape(), {nxdirty, nydirty});
Martin Reinecke's avatar
Martin Reinecke committed
491
      checkShape(grid.shape(), {nu, nv});
492
493
      auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
494
      // only zero the parts of the grid that are not filled afterwards anyway
Martin Reinecke's avatar
Martin Reinecke committed
495
496
497
      { auto a0 = subarray<2>(grid, {0,nydirty/2}, {nxdirty/2, nv-nydirty+1}); quickzero(a0, nthreads); }
      { auto a0 = subarray<2>(grid, {nxdirty/2,0}, {nu-nxdirty+1, nv}); quickzero(a0, nthreads); }
      { auto a0 = subarray<2>(grid, {nu-nxdirty/2+1, nydirty/2}, {nxdirty/2-1, nv-nydirty+1}); quickzero(a0, nthreads); }
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
498
      timers.poppush("grid correction");
499
      execParallel(nxdirty, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
500
501
        {
        for (auto i=lo; i<hi; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
502
          {
503
504
          int icfu = abs(int(nxdirty/2)-int(i));
          for (size_t j=0; j<nydirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
505
            {
506
507
            int icfv = abs(int(nydirty/2)-int(j));
            size_t i2 = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
508
            if (i2>=nu) i2-=nu;
509
            size_t j2 = nv-nydirty/2+j;
Martin Reinecke's avatar
Martin Reinecke committed
510
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
511
            grid.v(i2,j2) = dirty(i,j)*Tcalc(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
512
513
514
            }
          }
        });
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
515
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
516
      }
Martin Reinecke's avatar
Martin Reinecke committed
517
    void dirty2grid_pre2(const mav<Timg,2> &dirty, mav<complex<Tcalc>,2> &grid, double w)
Martin Reinecke's avatar
Martin Reinecke committed
518
      {
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
519
      timers.push("zeroing grid");
520
      checkShape(dirty.shape(), {nxdirty, nydirty});
Martin Reinecke's avatar
Martin Reinecke committed
521
      checkShape(grid.shape(), {nu, nv});
Martin Reinecke's avatar
Martin Reinecke committed
522
      // only zero the parts of the grid that are not filled afterwards anyway
Martin Reinecke's avatar
Martin Reinecke committed
523
524
525
      { auto a0 = subarray<2>(grid, {0,nydirty/2}, {nxdirty/2, nv-nydirty+1}); quickzero(a0, nthreads); }
      { auto a0 = subarray<2>(grid, {nxdirty/2,0}, {nu-nxdirty+1, nv}); quickzero(a0, nthreads); }
      { auto a0 = subarray<2>(grid, {nu-nxdirty/2+1, nydirty/2}, {nxdirty/2-1, nv-nydirty+1}); quickzero(a0, nthreads); }
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
526
      timers.poppush("wscreen+grid correction");
Martin Reinecke's avatar
Martin Reinecke committed
527
528
      double x0 = lshift-0.5*nxdirty*pixsize_x,
             y0 = mshift-0.5*nydirty*pixsize_y;
Martin Reinecke's avatar
Martin Reinecke committed
529
530
      size_t nxd = lmshift ? nxdirty : (nxdirty/2+1);
      execParallel(nxd, nthreads, [&](size_t lo, size_t hi)
Martin Reinecke's avatar
Martin Reinecke committed
531
        {
Martin Reinecke's avatar
Martin Reinecke committed
532
533
        vector<complex<Tcalc>> phases(lmshift ? nydirty : (nydirty/2+1)); 
        vector<Tcalc> buf(lmshift ? nydirty : (nydirty/2+1)); 
Martin Reinecke's avatar
Martin Reinecke committed
534
        for(auto i=lo; i<hi; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
535
          {
Martin Reinecke's avatar
Martin Reinecke committed
536
          double fx = sqr(x0+i*pixsize_x);
537
          size_t ix = nu-nxdirty/2+i;
Martin Reinecke's avatar
Martin Reinecke committed
538
          if (ix>=nu) ix-=nu;
Martin Reinecke's avatar
Martin Reinecke committed
539
          expi(phases, buf, [&](size_t i)
Martin Reinecke's avatar
Martin Reinecke committed
540
            { return Tcalc(phase(fx, sqr(y0+i*pixsize_y), w, false, nshift)); });
Martin Reinecke's avatar
Martin Reinecke committed
541
          if (lmshift)
542
            for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
543
              grid.v(ix,jx) = Tcalc(dirty(i,j))*phases[j];
Martin Reinecke's avatar
Martin Reinecke committed
544
          else
545
            {
Martin Reinecke's avatar
Martin Reinecke committed
546
547
548
549
550
551
552
            size_t i2 = nxdirty-i;
            size_t ix2 = nu-nxdirty/2+i2;
            if (ix2>=nu) ix2-=nu;
            if ((i>0)&&(i<i2))
              for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
                {
                size_t j2 = min(j, nydirty-j);
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
553
                grid.v(ix ,jx) = Tcalc(dirty(i ,j))*phases[j2]; // lower left
Martin Reinecke's avatar
Martin Reinecke committed
554
                grid.v(ix2,jx) = Tcalc(dirty(i2,j))*phases[j2]; // lower right
Martin Reinecke's avatar
Martin Reinecke committed
555
556
557
                }
            else
              for (size_t j=0, jx=nv-nydirty/2; j<nydirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
558
                grid.v(ix,jx) = Tcalc(dirty(i,j))*phases[min(j, nydirty-j)]; // lower left
559
            }
Martin Reinecke's avatar
Martin Reinecke committed
560
561
          }
        });
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
562
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
563
564
      }

Martin Reinecke's avatar
Martin Reinecke committed
565
    void dirty2grid(const mav<Timg,2> &dirty, mav<Tcalc,2> &grid)
Martin Reinecke's avatar
Martin Reinecke committed
566
567
      {
      dirty2grid_pre(dirty, grid);
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
568
      timers.push("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
569
      hartley2_2D(grid, vlim, !uv_side_fast, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
570
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
571
572
      }

Martin Reinecke's avatar
Martin Reinecke committed
573
574
    void dirty2grid_c_wscreen(const mav<Timg,2> &dirty,
      mav<complex<Tcalc>,2> &grid, double w)
Martin Reinecke's avatar
Martin Reinecke committed
575
576
      {
      dirty2grid_pre2(dirty, grid, w);
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
577
      timers.push("FFT");
Martin Reinecke's avatar
Martin Reinecke committed
578
      fmav<complex<Tcalc>> inout(grid);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
579
      if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
580
        {
581
        if (uv_side_fast)
Martin Reinecke's avatar
Martin Reinecke committed
582
          c2c(inout, inout, {1}, FORWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
583
        auto inout_lo = inout.subarray({0,0},{MAXIDX,vlim});
Martin Reinecke's avatar
Martin Reinecke committed
584
        c2c(inout_lo, inout_lo, {0}, FORWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
585
        auto inout_hi = inout.subarray({0,inout.shape(1)-vlim},{MAXIDX,vlim});
Martin Reinecke's avatar
Martin Reinecke committed
586
        c2c(inout_hi, inout_hi, {0}, FORWARD, Tcalc(1), nthreads);
587
        if (!uv_side_fast)
Martin Reinecke's avatar
Martin Reinecke committed
588
          c2c(inout, inout, {1}, FORWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
589
590
        }
      else
Martin Reinecke's avatar
Martin Reinecke committed
591
        c2c(inout, inout, {0,1}, FORWARD, Tcalc(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
592
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
593
594
      }

Martin Reinecke's avatar
Martin Reinecke committed
595
    [[gnu::always_inline]] void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
Martin Reinecke's avatar
Martin Reinecke committed
596
      {
Martin Reinecke's avatar
Martin Reinecke committed
597
598
599
600
601
602
603
604
      u = u_in*pixsize_x;
      u = (u-floor(u))*nu;
      iu0 = min(int(u+ushift)-int(nu), maxiu0);
      u -= iu0;
      v = v_in*pixsize_y;
      v = (v-floor(v))*nv;
      iv0 = min(int(v+vshift)-int(nv), maxiv0);
      v -= iv0;
Martin Reinecke's avatar
Martin Reinecke committed
605
      }
Martin Reinecke's avatar
Martin Reinecke committed
606

Martin Reinecke's avatar
Martin Reinecke committed
607
    void countRanges()
Martin Reinecke's avatar
Martin Reinecke committed
608
      {
609
      timers.push("building index");
Martin Reinecke's avatar
Martin Reinecke committed
610
611
      size_t nrow=bl.Nrows(),
             nchan=bl.Nchannels();
Martin Reinecke's avatar
Martin Reinecke committed
612

Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
613
614
      if (do_wgridding)
        {
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
615
        dw = 0.5/ofactor/max(abs(nm1max+nshift), abs(nm1min+nshift));
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
616
        nplanes = size_t((wmax_d-wmin_d)/dw+supp);
617
        MR_assert(nplanes<(size_t(1)<<16), "too many w planes");
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
618
619
620
        wmin = (wmin_d+wmax_d)*0.5 - 0.5*(nplanes-1)*dw;
        }
      else
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
621
        dw = wmin = nplanes = 0;
Martin Reinecke's avatar
Martin Reinecke committed
622
623
624
625
      size_t nbunch = do_wgridding ? supp : 1;
      // we want a maximum deviation of 1% in gridding time between threads
      constexpr double max_asymm = 0.01;
      size_t max_allowed = size_t(nvis/double(nbunch*nthreads)*max_asymm);
Martin Reinecke's avatar
Martin Reinecke committed
626

Martin Reinecke's avatar
Martin Reinecke committed
627
628
      struct tmp2
        {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
629
630
        size_t sz=0;
        vector<vector<RowchanRange>> v;
Martin Reinecke's avatar
Martin Reinecke committed
631
632
        void add(const RowchanRange &rng, size_t max_allowed)
          {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
633
          if (v.empty() || (sz>=max_allowed))
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
634
            { v.emplace_back(); sz=0; }
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
635
636
          v.back().push_back(rng);
          sz += rng.ch_end-rng.ch_begin;
Martin Reinecke's avatar
Martin Reinecke committed
637
638
639
          }
        };
      using Vmap = map<Uvwidx, tmp2>;
Martin Reinecke's avatar
revert    
Martin Reinecke committed
640
      struct bufmap
Martin Reinecke's avatar
Martin Reinecke committed
641
        {
Martin Reinecke's avatar
revert    
Martin Reinecke committed
642
        Vmap m;
Martin Reinecke's avatar
Martin Reinecke committed
643
        mutex mut;
644
        uint64_t dummy[8]; // separator to keep every entry on a different cache line
Martin Reinecke's avatar
Martin Reinecke committed
645
        };
Martin Reinecke's avatar
Martin Reinecke committed
646
647
648
649
      checkShape(wgt.shape(),{nrow,nchan});
      checkShape(ms_in.shape(), {nrow,nchan});
      checkShape(mask.shape(), {nrow,nchan});

Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
650
651
      size_t ntiles_u = (nu>>logsquare) + 20,
             ntiles_v = (nv>>logsquare) + 20;
Martin Reinecke's avatar
Martin Reinecke committed
652
653
      vector<bufmap> buf(ntiles_u*ntiles_v);
      auto chunk = max<size_t>(1, nrow/(20*nthreads));
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
654
655
      auto xdw = 1./dw;
      auto shift = dw-(0.5*supp*dw)-wmin;
Martin Reinecke's avatar
Martin Reinecke committed
656
      execDynamic(nrow, nthreads, chunk, [&](Scheduler &sched)
Martin Reinecke's avatar
Martin Reinecke committed
657
        {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
658
        vector<pair<uint16_t, uint16_t>> interbuf;
Martin Reinecke's avatar
Martin Reinecke committed
659
660
        while (auto rng=sched.getNext())
        for(auto irow=rng.lo; irow<rng.hi; ++irow)
Martin Reinecke's avatar
Martin Reinecke committed
661
          {
Martin Reinecke's avatar
Martin Reinecke committed
662
          bool on=false;
Martin Reinecke's avatar
Martin Reinecke committed
663
          Uvwidx uvwlast(0,0,0);
Martin Reinecke's avatar
Martin Reinecke committed
664
          size_t chan0=0;
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
665
666
667
668
669
670
671

          auto flush=[&]()
            {
            if (interbuf.empty()) return;
            auto tileidx = uvwlast.tile_u + ntiles_u*uvwlast.tile_v;
            lock_guard<mutex> lock(buf[tileidx].mut);
            auto &loc(buf[tileidx].m[uvwlast]);
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
672
            for (auto &x: interbuf)
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
673
674
675
676
              loc.add(RowchanRange(irow, x.first, x.second), max_allowed);
            interbuf.clear();
            };
          auto add=[&](uint16_t cb, uint16_t ce)
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
677
            { interbuf.emplace_back(cb, ce); };
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
678

Martin Reinecke's avatar
Martin Reinecke committed
679
          for (size_t ichan=0; ichan<nchan; ++ichan)
Martin Reinecke's avatar
Martin Reinecke committed
680
            {
Martin Reinecke's avatar
Martin Reinecke committed
681
            if (norm(ms_in(irow,ichan))*wgt(irow,ichan)*mask(irow,ichan)!=0)
Martin Reinecke's avatar
Martin Reinecke committed
682
683
              {
              auto uvw = bl.effectiveCoord(irow, ichan);
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
684
              uvw.FixW();
Martin Reinecke's avatar
Martin Reinecke committed
685
              double udum, vdum;
Martin Reinecke's avatar
Martin Reinecke committed
686
              int iu0, iv0, iw;
Martin Reinecke's avatar
Martin Reinecke committed
687
              getpix(uvw.u, uvw.v, udum, vdum, iu0, iv0);
Martin Reinecke's avatar
Martin Reinecke committed
688
689
              iu0 = (iu0+nsafe)>>logsquare;
              iv0 = (iv0+nsafe)>>logsquare;
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
690
              iw = do_wgridding ? max(0,int((uvw.w+shift)*xdw)) : 0;
691
              Uvwidx uvwcur(iu0, iv0, iw);
Martin Reinecke's avatar
Martin Reinecke committed
692
693
694
              if (!on) // new active region
                {
                on=true;
Martin Reinecke's avatar
test    
Martin Reinecke committed
695
                if (uvwlast!=uvwcur) flush();
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
696
                uvwlast=uvwcur; chan0=ichan;
Martin Reinecke's avatar
Martin Reinecke committed
697
                }
698
              else if (uvwlast!=uvwcur) // change of active region
Martin Reinecke's avatar
Martin Reinecke committed
699
                {
Martin Reinecke's avatar
test    
Martin Reinecke committed
700
701
                add(chan0, ichan);
                flush();
Martin Reinecke's avatar
Martin Reinecke committed
702
                uvwlast=uvwcur; chan0=ichan;
Martin Reinecke's avatar
Martin Reinecke committed
703
704
705
706
                }
              }
            else if (on) // end of active region
              {
Martin Reinecke's avatar
test    
Martin Reinecke committed
707
              add(chan0, ichan);
Martin Reinecke's avatar
Martin Reinecke committed
708
709
              on=false;
              }
Martin Reinecke's avatar
Martin Reinecke committed
710
            }
Martin Reinecke's avatar
Martin Reinecke committed
711
          if (on) // end of active region at last channel
Martin Reinecke's avatar
test    
Martin Reinecke committed
712
713
            add(chan0, nchan);
          flush();
Martin Reinecke's avatar
Martin Reinecke committed
714
715
          }
        });
Martin Reinecke's avatar
Martin Reinecke committed
716

Martin Reinecke's avatar
Martin Reinecke committed
717
      size_t total=0;
Martin Reinecke's avatar
Martin Reinecke committed
718
719
720
      for (const auto &x: buf)
        for (const auto &y: x.m)
          total += y.second.v.size();
Martin Reinecke's avatar
Martin Reinecke committed
721
      ranges.reserve(total);
Martin Reinecke's avatar
test    
Martin Reinecke committed
722
      for (auto &x: buf)
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
723
724
725
        for (auto &y: x.m)
          for (auto &z: y.second.v)
            ranges.emplace_back(y.first, move(z));
Martin Reinecke's avatar
Martin Reinecke committed
726
      timers.pop();
Martin Reinecke's avatar
Martin Reinecke committed
727
      }
Martin Reinecke's avatar
test    
Martin Reinecke committed
728

Martin Reinecke's avatar
Martin Reinecke committed
729
    template<size_t supp, bool wgrid> class HelperX2g2
Martin Reinecke's avatar
Martin Reinecke committed
730
      {
Martin Reinecke's avatar
Martin Reinecke committed
731
      public:
Martin Reinecke's avatar
Martin Reinecke committed
732
        static constexpr size_t vlen = mysimd<Tacc>::size();
Martin Reinecke's avatar
Martin Reinecke committed
733
734
735
736
737
738
        static constexpr size_t nvec = (supp+vlen-1)/vlen;

      private:
        static constexpr int nsafe = (supp+1)/2;
        static constexpr int su = 2*nsafe+(1<<logsquare);
        static constexpr int sv = 2*nsafe+(1<<logsquare);
Martin Reinecke's avatar
bug fix    
Martin Reinecke committed
739
        static constexpr int svvec = sv+vlen-1;
Martin Reinecke's avatar
Martin Reinecke committed
740
741
        static constexpr double xsupp=2./supp;
        const Params *parent;
Martin Reinecke's avatar
Martin Reinecke committed
742
        TemplateKernel<supp, mysimd<Tacc>> tkrn;
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
743
        mav<complex<Tcalc>,2> &grid;
Martin Reinecke's avatar
Martin Reinecke committed
744
745
746
        int iu0, iv0; // start index of the current visibility
        int bu0, bv0; // start index of the current buffer

Martin Reinecke's avatar
Martin Reinecke committed
747
748
        mav<Tacc,2> bufr, bufi;
        Tacc *px0r, *px0i;
Martin Reinecke's avatar
Martin Reinecke committed
749
        double w0, xdw;
Martin Reinecke's avatar
Martin Reinecke committed
750
        vector<mutex> &locks;
Martin Reinecke's avatar
Martin Reinecke committed
751
752
753
754
755
756

        DUCC0_NOINLINE void dump()
          {
          int inu = int(parent->nu);
          int inv = int(parent->nv);
          if (bu0<-nsafe) return; // nothing written into buffer yet
Martin Reinecke's avatar
Martin Reinecke committed
757

Martin Reinecke's avatar
Martin Reinecke committed
758
759
760
761
762
763
          int idxu = (bu0+inu)%inu;
          int idxv0 = (bv0+inv)%inv;
          for (int iu=0; iu<su; ++iu)
            {
            int idxv = idxv0;
            {
Martin Reinecke's avatar
Martin Reinecke committed
764
            lock_guard<mutex> lock(locks[idxu]);
Martin Reinecke's avatar
Martin Reinecke committed
765
766
            for (int iv=0; iv<sv; ++iv)
              {
Martin Reinecke's avatar
Martin Reinecke committed
767
              grid.v(idxu,idxv) += complex<Tcalc>(Tcalc(bufr(iu,iv)), Tcalc(bufi(iu,iv)));
Martin Reinecke's avatar
Martin Reinecke committed
768
769
770
771
772
773
774
              bufr.v(iu,iv) = bufi.v(iu,iv) = 0;
              if (++idxv>=inv) idxv=0;
              }
            }
            if (++idxu>=inu) idxu=0;
            }
          }
Martin Reinecke's avatar
Martin Reinecke committed
775

Martin Reinecke's avatar
Martin Reinecke committed
776
      public:
Martin Reinecke's avatar
Martin Reinecke committed
777
        Tacc * DUCC0_RESTRICT p0r, * DUCC0_RESTRICT p0i;
Martin Reinecke's avatar
Martin Reinecke committed
778
        union kbuf {
Martin Reinecke's avatar
Martin Reinecke committed
779
780
          Tacc scalar[2*nvec*vlen];
          mysimd<Tacc> simd[2*nvec];
781
782
783
#if defined(_MSC_VER)
          kbuf() {}
#endif
Martin Reinecke's avatar
Martin Reinecke committed
784
785
786
          };
        kbuf buf;

Martin Reinecke's avatar
Martin Reinecke committed
787
        HelperX2g2(const Params *parent_, mav<complex<Tcalc>,2> &grid_,
Martin Reinecke's avatar
Martin Reinecke committed
788
          vector<mutex> &locks_, double w0_=-1, double dw_=-1)
Martin Reinecke's avatar
Martin Reinecke committed
789
790
791
792
793
794
795
          : parent(parent_), tkrn(*parent->krn), grid(grid_),
            iu0(-1000000), iv0(-1000000),
            bu0(-1000000), bv0(-1000000),
            bufr({size_t(su),size_t(svvec)}),
            bufi({size_t(su),size_t(svvec)}),
            px0r(bufr.vdata()), px0i(bufi.vdata()),
            w0(w0_),
Martin Reinecke's avatar
Martin Reinecke committed
796
            xdw(1./dw_),
Martin Reinecke's avatar
Martin Reinecke committed
797
798
799
800
801
            locks(locks_)
          { checkShape(grid.shape(), {parent->nu,parent->nv}); }
        ~HelperX2g2() { dump(); }

        constexpr int lineJump() const { return svvec; }
Martin Reinecke's avatar
Martin Reinecke committed
802
803

        [[gnu::always_inline]] [[gnu::hot]] void prep(const UVW &in, size_t nth=0)
Martin Reinecke's avatar
Martin Reinecke committed
804
          {
Martin Reinecke's avatar
Martin Reinecke committed
805
          double ufrac, vfrac;
Martin Reinecke's avatar
Martin Reinecke committed
806
807
          auto iu0old = iu0;
          auto iv0old = iv0;
Martin Reinecke's avatar
Martin Reinecke committed
808
809
810
          parent->getpix(in.u, in.v, ufrac, vfrac, iu0, iv0);
          auto x0 = -ufrac*2+(supp-1);
          auto y0 = -vfrac*2+(supp-1);
Martin Reinecke's avatar
Martin Reinecke committed
811
          if constexpr(wgrid)
Martin Reinecke's avatar
Martin Reinecke committed
812
            tkrn.eval2s(Tacc(x0), Tacc(y0), Tacc(xdw*(w0-in.w)), nth, &buf.simd[0]);
Martin Reinecke's avatar
Martin Reinecke committed
813
          else
Martin Reinecke's avatar
Martin Reinecke committed
814
            tkrn.eval2(Tacc(x0), Tacc(y0), &buf.simd[0]);
Martin Reinecke's avatar
Martin Reinecke committed
815
816
817
818
819
820
821
822
823
824
825
826
          if ((iu0==iu0old) && (iv0==iv0old)) return;
          if ((iu0<bu0) || (iv0<bv0) || (iu0+int(supp)>bu0+su) || (iv0+int(supp)>bv0+sv))
            {
            dump();
            bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe;
            bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
            }
          auto ofs = (iu0-bu0)*svvec + iv0-bv0;
          p0r = px0r+ofs;
          p0i = px0i+ofs;
          }
      };
Martin Reinecke's avatar
Martin Reinecke committed
827