gridder_cxx.h 49.4 KB
Newer Older
Martin Reinecke's avatar
Martin Reinecke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H

/*
 *  This file is part of nifty_gridder.
 *
 *  nifty_gridder is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  nifty_gridder is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with nifty_gridder; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

22
/* Copyright (C) 2019-2020 Max-Planck-Society
Martin Reinecke's avatar
Martin Reinecke committed
23
24
25
26
27
28
29
30
   Author: Martin Reinecke */

#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
#include <array>
Martin Reinecke's avatar
Martin Reinecke committed
31
#include <memory>
Martin Reinecke's avatar
Martin Reinecke committed
32

Martin Reinecke's avatar
Martin Reinecke committed
33
34
35
36
37
#include "ducc0/infra/error_handling.h"
#include "ducc0/math/fft.h"
#include "ducc0/infra/threading.h"
#include "ducc0/infra/useful_macros.h"
#include "ducc0/infra/mav.h"
Martin Reinecke's avatar
Martin Reinecke committed
38
#include "ducc0/infra/simd.h"
Martin Reinecke's avatar
Martin Reinecke committed
39
#include "ducc0/math/gridding_kernel.h"
Martin Reinecke's avatar
Martin Reinecke committed
40

Martin Reinecke's avatar
Martin Reinecke committed
41
namespace ducc0 {
Martin Reinecke's avatar
Martin Reinecke committed
42

Martin Reinecke's avatar
Martin Reinecke committed
43
namespace detail_gridder {
Martin Reinecke's avatar
Martin Reinecke committed
44
45
46
47
48

using namespace std;

template<size_t ndim> void checkShape
  (const array<size_t, ndim> &shp1, const array<size_t, ndim> &shp2)
49
  { MR_assert(shp1==shp2, "shape mismatch"); }
Martin Reinecke's avatar
Martin Reinecke committed
50
51
52
53
54
55
56
57
58

template<typename T> inline T fmod1 (T v)
  { return v-floor(v); }

//
// Start of real gridder functionality
//

template<typename T> void complex2hartley
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
59
  (const mav<complex<T>, 2> &grid, mav<T,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
60
  {
61
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
62
63
64
65
66
67
68
69
70
71
  size_t nu=grid.shape(0), nv=grid.shape(1);

  execStatic(nu, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
      {
      size_t xu = (u==0) ? 0 : nu-u;
      for (size_t v=0; v<nv; ++v)
        {
        size_t xv = (v==0) ? 0 : nv-v;
Martin Reinecke's avatar
Martin Reinecke committed
72
73
        grid2.v(u,v) = T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
                               grid(xu,xv).real()-grid(xu,xv).imag());
Martin Reinecke's avatar
Martin Reinecke committed
74
75
76
77
78
79
        }
      }
    });
  }

template<typename T> void hartley2complex
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
80
  (const mav<T,2> &grid, mav<complex<T>,2> &grid2, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
81
  {
82
  MR_assert(grid.conformable(grid2), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
83
84
85
86
87
88
89
90
91
92
93
94
  size_t nu=grid.shape(0), nv=grid.shape(1);

  execStatic(nu, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u)
      {
      size_t xu = (u==0) ? 0 : nu-u;
      for (size_t v=0; v<nv; ++v)
        {
        size_t xv = (v==0) ? 0 : nv-v;
        T v1 = T(0.5)*grid( u, v);
        T v2 = T(0.5)*grid(xu,xv);
Martin Reinecke's avatar
Martin Reinecke committed
95
        grid2.v(u,v) = std::complex<T>(v1+v2, v1-v2);
Martin Reinecke's avatar
Martin Reinecke committed
96
97
98
99
100
        }
      }
    });
  }

101
102
template<typename T> void hartley2_2D(mav<T,2> &arr, size_t vlim,
  bool first_fast, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
103
  {
Martin Reinecke's avatar
Martin Reinecke committed
104
105
  size_t nu=arr.shape(0), nv=arr.shape(1);
  fmav<T> farr(arr);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
106
  if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
107
    {
108
    if (!first_fast)
Martin Reinecke's avatar
Martin Reinecke committed
109
      r2r_separable_hartley(farr, farr, {1}, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
110
    auto flo = farr.subarray({0,0},{farr.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
111
    r2r_separable_hartley(flo, flo, {0}, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
112
    auto fhi = farr.subarray({0,farr.shape(1)-vlim},{farr.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
113
    r2r_separable_hartley(fhi, fhi, {0}, T(1), nthreads);
114
    if (first_fast)
Martin Reinecke's avatar
Martin Reinecke committed
115
116
117
118
      r2r_separable_hartley(farr, farr, {1}, T(1), nthreads);
    }
  else
    r2r_separable_hartley(farr, farr, {0,1}, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
119
120
121
122
123
  execStatic((nu+1)/2-1, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i<rng.hi+1; ++i)
      for(size_t j=1; j<(nv+1)/2; ++j)
         {
Martin Reinecke's avatar
Martin Reinecke committed
124
125
126
127
128
129
130
131
         T a = arr(i,j);
         T b = arr(nu-i,j);
         T c = arr(i,nv-j);
         T d = arr(nu-i,nv-j);
         arr.v(i,j) = T(0.5)*(a+b+c-d);
         arr.v(nu-i,j) = T(0.5)*(a+b+d-c);
         arr.v(i,nv-j) = T(0.5)*(a+c+d-b);
         arr.v(nu-i,nv-j) = T(0.5)*(b+c+d-a);
Martin Reinecke's avatar
Martin Reinecke committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
         }
     });
  }

using idx_t = uint32_t;

struct RowChan
  {
  idx_t row, chan;
  };

struct UVW
  {
  double u, v, w;
  UVW() {}
  UVW(double u_, double v_, double w_) : u(u_), v(v_), w(w_) {}
  UVW operator* (double fct) const
    { return UVW(u*fct, v*fct, w*fct); }
  void Flip() { u=-u; v=-v; w=-w; }
  bool FixW()
    {
    bool flip = w<0;
    if (flip) Flip();
    return flip;
    }
  };

class Baselines
  {
  protected:
    vector<UVW> coord;
    vector<double> f_over_c;
    idx_t nrows, nchan;
    idx_t shift, mask;
Martin Reinecke's avatar
Martin Reinecke committed
166
    double umax, vmax;
Martin Reinecke's avatar
Martin Reinecke committed
167
168

  public:
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
169
170
    template<typename T> Baselines(const mav<T,2> &coord_,
      const mav<T,1> &freq, bool negate_v=false)
Martin Reinecke's avatar
Martin Reinecke committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
      {
      constexpr double speedOfLight = 299792458.;
      MR_assert(coord_.shape(1)==3, "dimension mismatch");
      auto hugeval = size_t(~(idx_t(0)));
      MR_assert(coord_.shape(0)<hugeval, "too many entries in MS");
      MR_assert(coord_.shape(1)<hugeval, "too many entries in MS");
      MR_assert(coord_.size()<hugeval, "too many entries in MS");
      nrows = coord_.shape(0);
      nchan = freq.shape(0);
      shift=0;
      while((idx_t(1)<<shift)<nchan) ++shift;
      mask=(idx_t(1)<<shift)-1;
      MR_assert(nrows*(mask+1)<hugeval, "too many entries in MS");
      f_over_c.resize(nchan);
Martin Reinecke's avatar
Martin Reinecke committed
185
      double fcmax = 0;
Martin Reinecke's avatar
Martin Reinecke committed
186
187
      for (size_t i=0; i<nchan; ++i)
        {
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
188
        MR_assert(freq(i)>0, "negative channel frequency encountered");
Martin Reinecke's avatar
Martin Reinecke committed
189
        f_over_c[i] = freq(i)/speedOfLight;
Martin Reinecke's avatar
Martin Reinecke committed
190
        fcmax = max(fcmax, abs(f_over_c[i]));
Martin Reinecke's avatar
Martin Reinecke committed
191
192
        }
      coord.resize(nrows);
Martin Reinecke's avatar
Martin Reinecke committed
193
194
195
196
197
198
199
200
201
202
      double vfac = negate_v ? -1 : 1;
      vmax=0;
      for (size_t i=0; i<coord.size(); ++i)
        {
        coord[i] = UVW(coord_(i,0), vfac*coord_(i,1), coord_(i,2));
        umax = max(umax, abs(coord_(i,0)));
        vmax = max(vmax, abs(coord_(i,1)));
        }
      umax *= fcmax;
      vmax *= fcmax;
Martin Reinecke's avatar
Martin Reinecke committed
203
204
205
206
207
208
209
210
211
212
213
214
215
      }

    RowChan getRowChan(idx_t index) const
      { return RowChan{index>>shift, index&mask}; }

    UVW effectiveCoord(const RowChan &rc) const
      { return coord[rc.row]*f_over_c[rc.chan]; }
    UVW effectiveCoord(idx_t index) const
      { return effectiveCoord(getRowChan(index)); }
    size_t Nrows() const { return nrows; }
    size_t Nchannels() const { return nchan; }
    idx_t getIdx(idx_t irow, idx_t ichan) const
      { return ichan+(irow<<shift); }
Martin Reinecke's avatar
Martin Reinecke committed
216
217
    double Umax() const { return umax; }
    double Vmax() const { return vmax; }
Martin Reinecke's avatar
Martin Reinecke committed
218
219
  };

Martin Reinecke's avatar
Martin Reinecke committed
220
template<typename T> class GridderConfig
Martin Reinecke's avatar
Martin Reinecke committed
221
222
223
  {
  protected:
    size_t nx_dirty, ny_dirty, nu, nv;
Martin Reinecke's avatar
Martin Reinecke committed
224
225
    double ofactor;

Martin Reinecke's avatar
Martin Reinecke committed
226
  // FIXME: this should probably be done more cleanly
Martin Reinecke's avatar
Martin Reinecke committed
227
228
229
230
231
  public:
    shared_ptr<GriddingKernel<T>> krn;

  protected:
    double psx, psy;
Martin Reinecke's avatar
Martin Reinecke committed
232
233
234
235
236
    size_t supp, nsafe;
    double beta;
    size_t nthreads;
    double ushift, vshift;
    int maxiu0, maxiv0;
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
237
    size_t vlim;
238
    bool uv_side_fast;
Martin Reinecke's avatar
Martin Reinecke committed
239

Martin Reinecke's avatar
Martin Reinecke committed
240
    T phase (T x, T y, T w, bool adjoint) const
Martin Reinecke's avatar
Martin Reinecke committed
241
242
243
244
245
      {
      constexpr T pi = T(3.141592653589793238462643383279502884197);
      T tmp = 1-x-y;
      if (tmp<=0) return 1; // no phase factor beyond the horizon
      T nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1
Martin Reinecke's avatar
Martin Reinecke committed
246
247
248
      T phs = 2*pi*w*nm1;
      if (adjoint) phs *= -1;
      return phs;
Martin Reinecke's avatar
Martin Reinecke committed
249
250
251
252
      }

  public:
    GridderConfig(size_t nxdirty, size_t nydirty, size_t nu_, size_t nv_,
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
253
254
      double epsilon, double pixsize_x, double pixsize_y,
      const Baselines &baselines, size_t nthreads_)
Martin Reinecke's avatar
Martin Reinecke committed
255
256
      : nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
        ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
257
        krn(selectKernel<T>(ofactor, epsilon)),
Martin Reinecke's avatar
Martin Reinecke committed
258
        psx(pixsize_x), psy(pixsize_y),
Martin Reinecke's avatar
Martin Reinecke committed
259
        supp(krn->support()), nsafe((supp+1)/2),
Martin Reinecke's avatar
Martin Reinecke committed
260
261
        nthreads(nthreads_),
        ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv),
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
262
        maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp),
263
264
        vlim(min(nv/2, size_t(nv*baselines.Vmax()*psy+0.5*supp+1))),
        uv_side_fast(true)
Martin Reinecke's avatar
Martin Reinecke committed
265
      {
266
267
268
269
270
271
      size_t vlim2 = (nydirty+1)/2+(supp+1)/2;
      if (vlim2<vlim)
        {
        vlim = vlim2;
        uv_side_fast = false;
        }
Martin Reinecke's avatar
Martin Reinecke committed
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
      MR_assert(nu>=2*nsafe, "nu too small");
      MR_assert(nv>=2*nsafe, "nv too small");
      MR_assert((nx_dirty&1)==0, "nx_dirty must be even");
      MR_assert((ny_dirty&1)==0, "ny_dirty must be even");
      MR_assert((nu&1)==0, "nu must be even");
      MR_assert((nv&1)==0, "nv must be even");
      MR_assert(epsilon>0, "epsilon must be positive");
      MR_assert(pixsize_x>0, "pixsize_x must be positive");
      MR_assert(pixsize_y>0, "pixsize_y must be positive");
      }
    size_t Nxdirty() const { return nx_dirty; }
    size_t Nydirty() const { return ny_dirty; }
    double Pixsize_x() const { return psx; }
    double Pixsize_y() const { return psy; }
    size_t Nu() const { return nu; }
    size_t Nv() const { return nv; }
    size_t Supp() const { return supp; }
    size_t Nsafe() const { return nsafe; }
    size_t Nthreads() const { return nthreads; }

Martin Reinecke's avatar
Martin Reinecke committed
292
    void grid2dirty_post(mav<T,2> &tmav,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
293
      mav<T,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
294
      {
295
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
Martin Reinecke's avatar
Martin Reinecke committed
296
297
      auto cfu = krn->corfunc(nx_dirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(ny_dirty/2+1, 1./nv, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
298
299
300
301
302
303
304
305
306
307
308
309
      execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          int icfu = abs(int(nx_dirty/2)-int(i));
          for (size_t j=0; j<ny_dirty; ++j)
            {
            int icfv = abs(int(ny_dirty/2)-int(j));
            size_t i2 = nu-nx_dirty/2+i;
            if (i2>=nu) i2-=nu;
            size_t j2 = nv-ny_dirty/2+j;
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
310
            dirty.v(i,j) = tmav(i2,j2)*T(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
311
312
313
314
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
315
    void grid2dirty_post2(
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
316
      mav<complex<T>,2> &tmav, mav<T,2> &dirty, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
317
318
319
320
321
322
      {
      checkShape(dirty.shape(), {nx_dirty,ny_dirty});
      double x0 = -0.5*nx_dirty*psx,
             y0 = -0.5*ny_dirty*psy;
      execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
        {
Martin Reinecke's avatar
Martin Reinecke committed
323
324
325
326
        using vtype = native_simd<T>;
        constexpr size_t vlen=vtype::size();
        size_t nvec = (ny_dirty/2+1+(vlen-1))/vlen;
        vector<vtype> ph(nvec), sp(nvec), cp(nvec);
Martin Reinecke's avatar
Martin Reinecke committed
327
328
329
330
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          T fx = T(x0+i*psx);
          fx *= fx;
Martin Reinecke's avatar
Martin Reinecke committed
331
332
333
334
335
          size_t ix = nu-nx_dirty/2+i;
          if (ix>=nu) ix-=nu;
          size_t i2 = nx_dirty-i;
          size_t ix2 = nu-nx_dirty/2+i2;
          if (ix2>=nu) ix2-=nu;
Martin Reinecke's avatar
Martin Reinecke committed
336
337
338
          for (size_t j=0; j<=ny_dirty/2; ++j)
            {
            T fy = T(y0+j*psy);
Martin Reinecke's avatar
Martin Reinecke committed
339
340
341
342
343
344
345
346
347
348
            ph[j/vlen][j%vlen] = phase(fx, fy*fy, w, true);
            }
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
               sp[j][k]=sin(ph[j][k]);
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
              cp[j][k]=cos(ph[j][k]);
          if ((i>0)&&(i<i2))
            for (size_t j=0, jx=nv-ny_dirty/2; j<ny_dirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
349
              {
Martin Reinecke's avatar
Martin Reinecke committed
350
              size_t j2 = min(j, ny_dirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
351
352
353
              T re = cp[j2/vlen][j2%vlen], im = sp[j2/vlen][j2%vlen];
              dirty.v(i,j) += tmav(ix,jx).real()*re - tmav(ix,jx).imag()*im;
              dirty.v(i2,j) += tmav(ix2,jx).real()*re - tmav(ix2,jx).imag()*im;
Martin Reinecke's avatar
Martin Reinecke committed
354
              }
Martin Reinecke's avatar
Martin Reinecke committed
355
356
357
358
          else
            for (size_t j=0, jx=nv-ny_dirty/2; j<ny_dirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
              {
              size_t j2 = min(j, ny_dirty-j);
Martin Reinecke's avatar
Martin Reinecke committed
359
360
              T re = cp[j2/vlen][j2%vlen], im = sp[j2/vlen][j2%vlen];
              dirty.v(i,j) += tmav(ix,jx).real()*re - tmav(ix,jx).imag()*im; // lower left
Martin Reinecke's avatar
Martin Reinecke committed
361
              }
Martin Reinecke's avatar
Martin Reinecke committed
362
363
364
365
          }
        });
      }

Martin Reinecke's avatar
Martin Reinecke committed
366
    void grid2dirty(const mav<T,2> &grid,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
367
      mav<T,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
368
369
      {
      checkShape(grid.shape(), {nu,nv});
370
      mav<T,2> tmav({nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
371
      tmav.apply(grid, [](T&a, T b) {a=b;});
372
      hartley2_2D<T>(tmav, vlim, uv_side_fast, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
373
374
375
      grid2dirty_post(tmav, dirty);
      }

Martin Reinecke's avatar
Martin Reinecke committed
376
    void grid2dirty_c_overwrite_wscreen_add
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
377
      (mav<complex<T>,2> &grid, mav<T,2> &dirty, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
378
379
      {
      checkShape(grid.shape(), {nu,nv});
Martin Reinecke's avatar
Martin Reinecke committed
380
      fmav<complex<T>> inout(grid);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
381
      if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
382
        {
383
384
        if (!uv_side_fast)
          c2c(inout, inout, {1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
385
        auto inout_lo = inout.subarray({0,0},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
386
        c2c(inout_lo, inout_lo, {0}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
387
        auto inout_hi = inout.subarray({0,inout.shape(1)-vlim},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
388
        c2c(inout_hi, inout_hi, {0}, BACKWARD, T(1), nthreads);
389
390
        if (uv_side_fast)
          c2c(inout, inout, {1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
391
392
393
        }
      else
        c2c(inout, inout, {0,1}, BACKWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
394
395
396
      grid2dirty_post2(grid, dirty, w);
      }

Martin Reinecke's avatar
Martin Reinecke committed
397
    void dirty2grid_pre(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
398
      mav<T,2> &grid) const
Martin Reinecke's avatar
Martin Reinecke committed
399
400
401
      {
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(grid.shape(), {nu, nv});
Martin Reinecke's avatar
Martin Reinecke committed
402
403
      auto cfu = krn->corfunc(nx_dirty/2+1, 1./nu, nthreads);
      auto cfv = krn->corfunc(ny_dirty/2+1, 1./nv, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
404
405
406
407
408
409
410
411
412
413
414
415
416
      grid.fill(0);
      execStatic(nx_dirty, nthreads, 0, [&](Scheduler &sched)
        {
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          int icfu = abs(int(nx_dirty/2)-int(i));
          for (size_t j=0; j<ny_dirty; ++j)
            {
            int icfv = abs(int(ny_dirty/2)-int(j));
            size_t i2 = nu-nx_dirty/2+i;
            if (i2>=nu) i2-=nu;
            size_t j2 = nv-ny_dirty/2+j;
            if (j2>=nv) j2-=nv;
Martin Reinecke's avatar
Martin Reinecke committed
417
            grid.v(i2,j2) = dirty(i,j)*T(cfu[icfu]*cfv[icfv]);
Martin Reinecke's avatar
Martin Reinecke committed
418
419
420
421
            }
          }
        });
      }
Martin Reinecke's avatar
Martin Reinecke committed
422
    void dirty2grid_pre2(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
423
      mav<complex<T>,2> &grid, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
424
425
426
427
428
429
430
431
432
      {
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(grid.shape(), {nu, nv});
      grid.fill(0);

      double x0 = -0.5*nx_dirty*psx,
             y0 = -0.5*ny_dirty*psy;
      execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
        {
Martin Reinecke's avatar
Martin Reinecke committed
433
434
435
436
        using vtype = native_simd<T>;
        constexpr size_t vlen=vtype::size();
        size_t nvec = (ny_dirty/2+1+(vlen-1))/vlen;
        vector<vtype> ph(nvec), sp(nvec), cp(nvec);
Martin Reinecke's avatar
Martin Reinecke committed
437
438
439
440
        while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
          {
          T fx = T(x0+i*psx);
          fx *= fx;
Martin Reinecke's avatar
Martin Reinecke committed
441
442
443
444
445
          size_t ix = nu-nx_dirty/2+i;
          if (ix>=nu) ix-=nu;
          size_t i2 = nx_dirty-i;
          size_t ix2 = nu-nx_dirty/2+i2;
          if (ix2>=nu) ix2-=nu;
Martin Reinecke's avatar
Martin Reinecke committed
446
447
448
          for (size_t j=0; j<=ny_dirty/2; ++j)
            {
            T fy = T(y0+j*psy);
Martin Reinecke's avatar
Martin Reinecke committed
449
450
451
452
453
454
455
456
457
458
            ph[j/vlen][j%vlen] = phase(fx, fy*fy, w, false);
            }
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
               sp[j][k]=sin(ph[j][k]);
          for (size_t j=0; j<nvec; ++j)
            for (size_t k=0; k<vlen; ++k)
              cp[j][k]=cos(ph[j][k]);
          if ((i>0)&&(i<i2))
            for (size_t j=0, jx=nv-ny_dirty/2; j<ny_dirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
Martin Reinecke's avatar
Martin Reinecke committed
459
              {
Martin Reinecke's avatar
Martin Reinecke committed
460
461
462
              size_t j2 = min(j, ny_dirty-j);
              auto ws = complex<T>(cp[j2/vlen][j2%vlen],sp[j2/vlen][j2%vlen]);
              grid.v(ix,jx) = dirty(i,j)*ws; // lower left
Martin Reinecke's avatar
Martin Reinecke committed
463
              grid.v(ix2,jx) = dirty(i2,j)*ws; // lower right
Martin Reinecke's avatar
Martin Reinecke committed
464
              }
Martin Reinecke's avatar
Martin Reinecke committed
465
466
467
468
469
470
471
          else
            for (size_t j=0, jx=nv-ny_dirty/2; j<ny_dirty; ++j, jx=(jx+1>=nv)? jx+1-nv : jx+1)
              {
              size_t j2 = min(j, ny_dirty-j);
              auto ws = complex<T>(cp[j2/vlen][j2%vlen],sp[j2/vlen][j2%vlen]);
              grid.v(ix,jx) = dirty(i,j)*ws; // lower left
              }
Martin Reinecke's avatar
Martin Reinecke committed
472
473
474
475
          }
        });
      }

Martin Reinecke's avatar
Martin Reinecke committed
476
    void dirty2grid(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
477
      mav<T,2> &grid) const
Martin Reinecke's avatar
Martin Reinecke committed
478
479
      {
      dirty2grid_pre(dirty, grid);
480
      hartley2_2D<T>(grid, vlim, !uv_side_fast, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
481
482
      }

Martin Reinecke's avatar
Martin Reinecke committed
483
    void dirty2grid_c_wscreen(const mav<T,2> &dirty,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
484
      mav<complex<T>,2> &grid, T w) const
Martin Reinecke's avatar
Martin Reinecke committed
485
486
      {
      dirty2grid_pre2(dirty, grid, w);
Martin Reinecke's avatar
Martin Reinecke committed
487
      fmav<complex<T>> inout(grid);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
488
      if (2*vlim<nv)
Martin Reinecke's avatar
Martin Reinecke committed
489
        {
490
491
        if (uv_side_fast)
          c2c(inout, inout, {1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
492
        auto inout_lo = inout.subarray({0,0},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
493
        c2c(inout_lo, inout_lo, {0}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
494
        auto inout_hi = inout.subarray({0,inout.shape(1)-vlim},{inout.shape(0),vlim});
Martin Reinecke's avatar
Martin Reinecke committed
495
        c2c(inout_hi, inout_hi, {0}, FORWARD, T(1), nthreads);
496
497
        if (!uv_side_fast)
          c2c(inout, inout, {1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
498
499
500
        }
      else
        c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
501
502
503
504
505
506
507
508
509
510
511
512
513
      }

    void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
      {
      u=fmod1(u_in*psx)*nu;
      iu0 = min(int(u+ushift)-int(nu), maxiu0);
      v=fmod1(v_in*psy)*nv;
      iv0 = min(int(v+vshift)-int(nv), maxiv0);
      }
  };

constexpr int logsquare=4;

514
template<typename T> class Helper
Martin Reinecke's avatar
Martin Reinecke committed
515
516
  {
  private:
517
    using T2=complex<T>;
Martin Reinecke's avatar
Martin Reinecke committed
518
    const GridderConfig<T> &gconf;
Martin Reinecke's avatar
Martin Reinecke committed
519
520
521
    int nu, nv, nsafe, supp;
    const T2 *grid_r;
    T2 *grid_w;
522
    int su, sv, svvec;
Martin Reinecke's avatar
Martin Reinecke committed
523
524
    int iu0, iv0; // start index of the current visibility
    int bu0, bv0; // start index of the current buffer
Martin Reinecke's avatar
Martin Reinecke committed
525
    T wfac;
Martin Reinecke's avatar
Martin Reinecke committed
526

527
    mav<T,2> rbufr, rbufi, wbufr, wbufi;
Martin Reinecke's avatar
Martin Reinecke committed
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
    bool do_w_gridding;
    double w0, xdw;
    vector<std::mutex> &locks;

    void dump() const
      {
      if (bu0<-nsafe) return; // nothing written into buffer yet

      int idxu = (bu0+nu)%nu;
      int idxv0 = (bv0+nv)%nv;
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        {
        std::lock_guard<std::mutex> lock(locks[idxu]);
        for (int iv=0; iv<sv; ++iv)
          {
545
          grid_w[idxu*nv + idxv] += complex<T>(wbufr(iu,iv), wbufi(iu,iv));
Martin Reinecke's avatar
Martin Reinecke committed
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
          if (++idxv>=nv) idxv=0;
          }
        }
        if (++idxu>=nu) idxu=0;
        }
      }

    void load()
      {
      int idxu = (bu0+nu)%nu;
      int idxv0 = (bv0+nv)%nv;
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        for (int iv=0; iv<sv; ++iv)
          {
562
563
          rbufr.v(iu,iv) = grid_r[idxu*nv + idxv].real();
          rbufi.v(iu,iv) = grid_r[idxu*nv + idxv].imag();
Martin Reinecke's avatar
Martin Reinecke committed
564
565
566
567
568
569
570
          if (++idxv>=nv) idxv=0;
          }
        if (++idxu>=nu) idxu=0;
        }
      }

  public:
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
571
    size_t nvec;
572
573
    const T *p0rr, *p0ri;
    T *p0wr, *p0wi;
Martin Reinecke's avatar
Martin Reinecke committed
574
    static constexpr size_t vlen=native_simd<T>::size();
Martin Reinecke's avatar
Martin Reinecke committed
575
    union kbuf {
Martin Reinecke's avatar
Martin Reinecke committed
576
577
      T scalar[64];
      native_simd<T> simd[64/vlen];
Martin Reinecke's avatar
Martin Reinecke committed
578
      };
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
579
    kbuf buf;
Martin Reinecke's avatar
Martin Reinecke committed
580

Martin Reinecke's avatar
Martin Reinecke committed
581
    Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_,
Martin Reinecke's avatar
Martin Reinecke committed
582
583
      vector<std::mutex> &locks_, double w0_=-1, double dw_=-1)
      : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
Martin Reinecke's avatar
Martin Reinecke committed
584
        supp(gconf.Supp()), grid_r(grid_r_),
585
586
587
588
        grid_w(grid_w_),
        su(2*nsafe+(1<<logsquare)),
        sv(2*nsafe+(1<<logsquare)),
        svvec(((sv+vlen-1)/vlen)*vlen),
Martin Reinecke's avatar
Martin Reinecke committed
589
        bu0(-1000000), bv0(-1000000),
590
591
592
593
        rbufr({size_t(su),size_t(svvec)*(grid_r!=nullptr)}),
        rbufi({size_t(su),size_t(svvec)*(grid_r!=nullptr)}),
        wbufr({size_t(su),size_t(svvec)*(grid_w!=nullptr)}),
        wbufi({size_t(su),size_t(svvec)*(grid_w!=nullptr)}),
Martin Reinecke's avatar
Martin Reinecke committed
594
595
596
        do_w_gridding(dw_>0),
        w0(w0_),
        xdw(T(1)/dw_),
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
597
598
        locks(locks_),
        nvec((supp+vlen-1)/vlen)
Martin Reinecke's avatar
Martin Reinecke committed
599
      {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
600
      MR_assert(supp<=32, "support too large");
Martin Reinecke's avatar
Martin Reinecke committed
601
      }
Martin Reinecke's avatar
Martin Reinecke committed
602
603
    ~Helper() { if (grid_w) dump(); }

604
    int lineJump() const { return svvec; }
Martin Reinecke's avatar
Martin Reinecke committed
605
    T Wfac() const { return wfac; }
Martin Reinecke's avatar
Martin Reinecke committed
606
607
    void prep(const UVW &in)
      {
Martin Reinecke's avatar
Martin Reinecke committed
608
      const auto &krn(*(gconf.krn));
Martin Reinecke's avatar
Martin Reinecke committed
609
610
611
612
613
      double u, v;
      gconf.getpix(in.u, in.v, u, v, iu0, iv0);
      double xsupp=2./supp;
      double x0 = xsupp*(iu0-u);
      double y0 = xsupp*(iv0-v);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
614
615
      krn.eval(T(x0), &buf.simd[0]);
      krn.eval(T(y0), &buf.simd[nvec]);
Martin Reinecke's avatar
Martin Reinecke committed
616
      if (do_w_gridding)
Martin Reinecke's avatar
Martin Reinecke committed
617
        wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w)));
Martin Reinecke's avatar
Martin Reinecke committed
618
619
      if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
        {
620
621
622
623
624
625
        if (grid_w)
          {
          dump();
          wbufr.apply([](T &v){v=0;});
          wbufi.apply([](T &v){v=0;});
          }
Martin Reinecke's avatar
Martin Reinecke committed
626
627
628
629
        bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe;
        bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
        if (grid_r) load();
        }
630
631
632
633
      p0rr = grid_r ? &rbufr(iu0-bu0, iv0-bv0) : nullptr;
      p0ri = grid_r ? &rbufi(iu0-bu0, iv0-bv0) : nullptr;
      p0wr = grid_w ? &wbufr.v(iu0-bu0, iv0-bv0) : nullptr;
      p0wi = grid_w ? &wbufi.v(iu0-bu0, iv0-bv0) : nullptr;
Martin Reinecke's avatar
Martin Reinecke committed
634
635
636
637
638
639
      }
  };

template<class T, class Serv> class SubServ
  {
  private:
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
640
641
    Serv &srv;
    mav<idx_t,1> subidx;
Martin Reinecke's avatar
Martin Reinecke committed
642
643

  public:
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
644
    SubServ(Serv &orig, const mav<idx_t,1> &subidx_)
Martin Reinecke's avatar
Martin Reinecke committed
645
646
647
648
      : srv(orig), subidx(subidx_){}
    size_t Nvis() const { return subidx.size(); }
    const Baselines &getBaselines() const { return srv.getBaselines(); }
    UVW getCoord(size_t i) const
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
649
      { return srv.getCoord(subidx(i)); }
Martin Reinecke's avatar
Martin Reinecke committed
650
    complex<T> getVis(size_t i) const
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
651
652
653
654
655
656
      { return srv.getVis(subidx(i)); }
    idx_t getIdx(size_t i) const { return srv.getIdx(subidx(i)); }
    void setVis (size_t i, const complex<T> &v)
      { srv.setVis(subidx(i), v); }
    void addVis (size_t i, const complex<T> &v)
      { srv.addVis(subidx(i), v); }
Martin Reinecke's avatar
Martin Reinecke committed
657
658
659
660
661
662
  };

template<class T, class T2> class MsServ
  {
  private:
    const Baselines &baselines;
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
663
    mav<idx_t,1> idx;
Martin Reinecke's avatar
Martin Reinecke committed
664
    T2 ms;
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
665
    mav<T,2> wgt;
Martin Reinecke's avatar
Martin Reinecke committed
666
667
668
669
670
671
672
    size_t nvis;
    bool have_wgt;

  public:
    using Tsub = SubServ<T, MsServ>;

    MsServ(const Baselines &baselines_,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
673
    const mav<idx_t,1> &idx_, T2 ms_, const mav<T,2> &wgt_)
Martin Reinecke's avatar
Martin Reinecke committed
674
675
676
677
678
679
      : baselines(baselines_), idx(idx_), ms(ms_), wgt(wgt_),
        nvis(idx.shape(0)), have_wgt(wgt.size()!=0)
      {
      checkShape(ms.shape(), {baselines.Nrows(), baselines.Nchannels()});
      if (have_wgt) checkShape(wgt.shape(), ms.shape());
      }
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
680
    Tsub getSubserv(const mav<idx_t,1> &subidx)
Martin Reinecke's avatar
Martin Reinecke committed
681
682
683
684
685
686
687
688
689
690
691
      { return Tsub(*this, subidx); }
    size_t Nvis() const { return nvis; }
    const Baselines &getBaselines() const { return baselines; }
    UVW getCoord(size_t i) const
      { return baselines.effectiveCoord(idx(i)); }
    complex<T> getVis(size_t i) const
      {
      auto rc = baselines.getRowChan(idx(i));
      return have_wgt ? ms(rc.row, rc.chan)*wgt(rc.row, rc.chan)
                      : ms(rc.row, rc.chan);
      }
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
692
693
    idx_t getIdx(size_t i) const { return idx(i); }
    void setVis (size_t i, const complex<T> &v)
Martin Reinecke's avatar
Martin Reinecke committed
694
695
      {
      auto rc = baselines.getRowChan(idx(i));
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
696
      ms.w()(rc.row, rc.chan) = have_wgt ? v*wgt(rc.row, rc.chan) : v;
Martin Reinecke's avatar
Martin Reinecke committed
697
      }
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
698
    void addVis (size_t i, const complex<T> &v)
Martin Reinecke's avatar
Martin Reinecke committed
699
700
      {
      auto rc = baselines.getRowChan(idx(i));
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
701
      ms.v(rc.row, rc.chan) += have_wgt ? v*wgt(rc.row, rc.chan) : v;
Martin Reinecke's avatar
Martin Reinecke committed
702
703
704
705
      }
  };
template<class T, class T2> MsServ<T, T2> makeMsServ
  (const Baselines &baselines,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
706
   const mav<idx_t,1> &idx, T2 &ms, const mav<T,2> &wgt)
Martin Reinecke's avatar
Martin Reinecke committed
707
708
709
  { return MsServ<T, T2>(baselines, idx, ms, wgt); }

template<typename T, typename Serv> void x2grid_c
Martin Reinecke's avatar
Martin Reinecke committed
710
  (const GridderConfig<T> &gconf, Serv &srv, mav<complex<T>,2> &grid,
Martin Reinecke's avatar
Martin Reinecke committed
711
712
713
714
715
716
717
718
  double w0=-1, double dw=-1)
  {
  checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
  MR_assert(grid.contiguous(), "grid is not contiguous");
  size_t supp = gconf.Supp();
  size_t nthreads = gconf.Nthreads();
  bool do_w_gridding = dw>0;
  vector<std::mutex> locks(gconf.Nu());
719
720
  constexpr size_t vlen=native_simd<T>::size();
  size_t nvec((supp+vlen-1)/vlen);
Martin Reinecke's avatar
Martin Reinecke committed
721
  size_t np = srv.Nvis();
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
  if (nvec==1)
  execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      auto * DUCC0_RESTRICT ptrr = hlp.p0wr;
      auto * DUCC0_RESTRICT ptri = hlp.p0wi;
      auto v(srv.getVis(ipart));
      if (do_w_gridding) v*=hlp.Wfac();
      if (flip) v=conj(v);
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(v*ku[cu]);
        for (size_t cv=0; cv<1; ++cv)
          {
          auto tr = native_simd<T>::loadu(ptrr+cv*hlp.vlen);
          tr += tmp.real()*kv[cv];
          tr.storeu(ptrr+cv*hlp.vlen);
          auto ti = native_simd<T>::loadu(ptri+cv*hlp.vlen);
          ti += tmp.imag()*kv[cv];
          ti.storeu(ptri+cv*hlp.vlen);
          }
        ptrr+=jump;
        ptri+=jump;
        }
      }
    });
  else if (nvec==2)
  execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      auto * DUCC0_RESTRICT ptrr = hlp.p0wr;
      auto * DUCC0_RESTRICT ptri = hlp.p0wi;
      auto v(srv.getVis(ipart));
      if (do_w_gridding) v*=hlp.Wfac();
      if (flip) v=conj(v);
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(v*ku[cu]);
        for (size_t cv=0; cv<2; ++cv)
          {
          auto tr = native_simd<T>::loadu(ptrr+cv*hlp.vlen);
          tr += tmp.real()*kv[cv];
          tr.storeu(ptrr+cv*hlp.vlen);
          auto ti = native_simd<T>::loadu(ptri+cv*hlp.vlen);
          ti += tmp.imag()*kv[cv];
          ti.storeu(ptri+cv*hlp.vlen);
          }
        ptrr+=jump;
        ptri+=jump;
        }
      }
    });
  else if (nvec==3)
  execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      auto * DUCC0_RESTRICT ptrr = hlp.p0wr;
      auto * DUCC0_RESTRICT ptri = hlp.p0wi;
      auto v(srv.getVis(ipart));
      if (do_w_gridding) v*=hlp.Wfac();
      if (flip) v=conj(v);
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(v*ku[cu]);
        for (size_t cv=0; cv<3; ++cv)
          {
          auto tr = native_simd<T>::loadu(ptrr+cv*hlp.vlen);
          tr += tmp.real()*kv[cv];
          tr.storeu(ptrr+cv*hlp.vlen);
          auto ti = native_simd<T>::loadu(ptri+cv*hlp.vlen);
          ti += tmp.imag()*kv[cv];
          ti.storeu(ptri+cv*hlp.vlen);
          }
        ptrr+=jump;
        ptri+=jump;
        }
      }
    });
  else if (nvec==4)
  execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      auto * DUCC0_RESTRICT ptrr = hlp.p0wr;
      auto * DUCC0_RESTRICT ptri = hlp.p0wi;
      auto v(srv.getVis(ipart));
      if (do_w_gridding) v*=hlp.Wfac();
      if (flip) v=conj(v);
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(v*ku[cu]);
        for (size_t cv=0; cv<4; ++cv)
          {
          auto tr = native_simd<T>::loadu(ptrr+cv*hlp.vlen);
          tr += tmp.real()*kv[cv];
          tr.storeu(ptrr+cv*hlp.vlen);
          auto ti = native_simd<T>::loadu(ptri+cv*hlp.vlen);
          ti += tmp.imag()*kv[cv];
          ti.storeu(ptri+cv*hlp.vlen);
          }
        ptrr+=jump;
        ptri+=jump;
        }
      }
    });
  else
Martin Reinecke's avatar
Martin Reinecke committed
863
864
  execGuided(np, nthreads, 100, 0.2, [&](Scheduler &sched)
    {
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
865
    Helper<T> hlp(gconf, nullptr, grid.vdata(), locks, w0, dw);
Martin Reinecke's avatar
Martin Reinecke committed
866
    int jump = hlp.lineJump();
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
867
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
868
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;
Martin Reinecke's avatar
Martin Reinecke committed
869
870
871
872
873
874

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
875
876
      auto * DUCC0_RESTRICT ptrr = hlp.p0wr;
      auto * DUCC0_RESTRICT ptri = hlp.p0wi;
Martin Reinecke's avatar
Martin Reinecke committed
877
878
879
880
881
882
      auto v(srv.getVis(ipart));
      if (do_w_gridding) v*=hlp.Wfac();
      if (flip) v=conj(v);
      for (size_t cu=0; cu<supp; ++cu)
        {
        complex<T> tmp(v*ku[cu]);
883
        for (size_t cv=0; cv<hlp.nvec; ++cv)
Martin Reinecke's avatar
Martin Reinecke committed
884
          {
885
886
887
888
889
890
          auto tr = native_simd<T>::loadu(ptrr+cv*hlp.vlen);
          tr += tmp.real()*kv[cv];
          tr.storeu(ptrr+cv*hlp.vlen);
          auto ti = native_simd<T>::loadu(ptri+cv*hlp.vlen);
          ti += tmp.imag()*kv[cv];
          ti.storeu(ptri+cv*hlp.vlen);
Martin Reinecke's avatar
Martin Reinecke committed
891
          }
892
893
        ptrr+=jump;
        ptri+=jump;
Martin Reinecke's avatar
Martin Reinecke committed
894
895
896
897
898
899
        }
      }
    });
  }

template<typename T, typename Serv> void grid2x_c
Martin Reinecke's avatar
Martin Reinecke committed
900
  (const GridderConfig<T> &gconf, const mav<complex<T>,2> &grid,
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
901
  Serv &srv, double w0=-1, double dw=-1)
Martin Reinecke's avatar
Martin Reinecke committed
902
903
904
905
906
907
908
  {
  checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
  MR_assert(grid.contiguous(), "grid is not contiguous");
  size_t supp = gconf.Supp();
  size_t nthreads = gconf.Nthreads();
  bool do_w_gridding = dw>0;
  vector<std::mutex> locks(gconf.Nu());
909
910
  constexpr size_t vlen=native_simd<T>::size();
  size_t nvec((supp+vlen-1)/vlen);
Martin Reinecke's avatar
Martin Reinecke committed
911
912
913

  // Loop over sampling points
  size_t np = srv.Nvis();
914
  if (nvec==1)
Martin Reinecke's avatar
Martin Reinecke committed
915
916
917
918
  execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
    int jump = hlp.lineJump();
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
919
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
920
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;
Martin Reinecke's avatar
Martin Reinecke committed
921
922
923
924
925
926
927

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      complex<T> r = 0;
928
929
      const auto * DUCC0_RESTRICT ptrr = hlp.p0rr;
      const auto * DUCC0_RESTRICT ptri = hlp.p0ri;
Martin Reinecke's avatar
Martin Reinecke committed
930
931
      for (size_t cu=0; cu<supp; ++cu)
        {
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
        native_simd<T> tmpr(0), tmpi(0);
        for (size_t cv=0; cv<1; ++cv)
          {
          tmpr += kv[cv]*native_simd<T>::loadu(ptrr+hlp.vlen*cv);
          tmpi += kv[cv]*native_simd<T>::loadu(ptri+hlp.vlen*cv);
          }
        r += ku[cu]*complex<T>(reduce(tmpr, std::plus<>()), reduce(tmpi, std::plus<>()));
        ptrr += jump;
        ptri += jump;
        }
      if (flip) r=conj(r);
      if (do_w_gridding) r*=hlp.Wfac();
      srv.addVis(ipart, r);
      }
    });
  else if (nvec==2)
  execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      complex<T> r = 0;
      const auto * DUCC0_RESTRICT ptrr = hlp.p0rr;
      const auto * DUCC0_RESTRICT ptri = hlp.p0ri;
      for (size_t cu=0; cu<supp; ++cu)
        {
        native_simd<T> tmpr(0), tmpi(0);
        for (size_t cv=0; cv<2; ++cv)
          {
          tmpr += kv[cv]*native_simd<T>::loadu(ptrr+hlp.vlen*cv);
          tmpi += kv[cv]*native_simd<T>::loadu(ptri+hlp.vlen*cv);
          }
        r += ku[cu]*complex<T>(reduce(tmpr, std::plus<>()), reduce(tmpi, std::plus<>()));
        ptrr += jump;
        ptri += jump;
        }
      if (flip) r=conj(r);
      if (do_w_gridding) r*=hlp.Wfac();
      srv.addVis(ipart, r);
      }
    });
  else if (nvec==3)
  execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      complex<T> r = 0;
      const auto * DUCC0_RESTRICT ptrr = hlp.p0rr;
      const auto * DUCC0_RESTRICT ptri = hlp.p0ri;
      for (size_t cu=0; cu<supp; ++cu)
        {
        native_simd<T> tmpr(0), tmpi(0);
        for (size_t cv=0; cv<3; ++cv)
          {
          tmpr += kv[cv]*native_simd<T>::loadu(ptrr+hlp.vlen*cv);
          tmpi += kv[cv]*native_simd<T>::loadu(ptri+hlp.vlen*cv);
          }
        r += ku[cu]*complex<T>(reduce(tmpr, std::plus<>()), reduce(tmpi, std::plus<>()));
        ptrr += jump;
        ptri += jump;
        }
      if (flip) r=conj(r);
      if (do_w_gridding) r*=hlp.Wfac();
      srv.addVis(ipart, r);
      }
    });
  else if (nvec==4)
  execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      complex<T> r = 0;
      const auto * DUCC0_RESTRICT ptrr = hlp.p0rr;
      const auto * DUCC0_RESTRICT ptri = hlp.p0ri;
      for (size_t cu=0; cu<supp; ++cu)
        {
        native_simd<T> tmpr(0), tmpi(0);
        for (size_t cv=0; cv<4; ++cv)
          {
          tmpr += kv[cv]*native_simd<T>::loadu(ptrr+hlp.vlen*cv);
          tmpi += kv[cv]*native_simd<T>::loadu(ptri+hlp.vlen*cv);
          }
        r += ku[cu]*complex<T>(reduce(tmpr, std::plus<>()), reduce(tmpi, std::plus<>()));
        ptrr += jump;
        ptri += jump;
        }
      if (flip) r=conj(r);
      if (do_w_gridding) r*=hlp.Wfac();
      srv.addVis(ipart, r);
      }
    });
  else
  execGuided(np, nthreads, 1000, 0.5, [&](Scheduler &sched)
    {
    Helper<T> hlp(gconf, grid.data(), nullptr, locks, w0, dw);
    int jump = hlp.lineJump();
    const T * DUCC0_RESTRICT ku = hlp.buf.scalar;
    const auto * DUCC0_RESTRICT kv = hlp.buf.simd+hlp.nvec;

    while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart<rng.hi; ++ipart)
      {
      UVW coord = srv.getCoord(ipart);
      auto flip = coord.FixW();
      hlp.prep(coord);
      complex<T> r = 0;
      const auto * DUCC0_RESTRICT ptrr = hlp.p0rr;
      const auto * DUCC0_RESTRICT ptri = hlp.p0ri;
      for (size_t cu=0; cu<supp; ++cu)
        {
        native_simd<T> tmpr(0), tmpi(0);
        for (size_t cv=0; cv<hlp.nvec; ++cv)
          {
          tmpr += kv[cv]*native_simd<T>::loadu(ptrr+hlp.vlen*cv);
          tmpi += kv[cv]*native_simd<T>::loadu(ptri+hlp.vlen*cv);
          }
        r += ku[cu]*complex<T>(reduce(tmpr, std::plus<>()), reduce(tmpi, std::plus<>()));
        ptrr += jump;
        ptri += jump;
Martin Reinecke's avatar
Martin Reinecke committed
1073
1074
1075
1076
1077
1078
1079
1080
        }
      if (flip) r=conj(r);
      if (do_w_gridding) r*=hlp.Wfac();
      srv.addVis(ipart, r);
      }
    });
  }

Martin Reinecke's avatar
Martin Reinecke committed
1081
1082
template<typename T> void apply_global_corrections(const GridderConfig<T> &gconf,
  mav<T,2> &dirty, double dw, bool divide_by_n)
Martin Reinecke's avatar
Martin Reinecke committed
1083
1084
1085
1086
1087
1088
1089
1090
  {
  auto nx_dirty=gconf.Nxdirty();
  auto ny_dirty=gconf.Nydirty();
  size_t nthreads = gconf.Nthreads();
  auto psx=gconf.Pixsize_x();
  auto psy=gconf.Pixsize_y();
  double x0 = -0.5*nx_dirty*psx,
         y0 = -0.5*ny_dirty*psy;
Martin Reinecke's avatar
Martin Reinecke committed
1091
1092
  auto cfu = gconf.krn->corfunc(nx_dirty/2+1, 1./gconf.Nu(), nthreads);
  auto cfv = gconf.krn->corfunc(ny_dirty/2+1, 1./gconf.Nv(), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
  execStatic(nx_dirty/2+1, nthreads, 0, [&](Scheduler &sched)
    {
    while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
      {
      auto fx = T(x0+i*psx);
      fx *= fx;
      for (size_t j=0; j<=ny_dirty/2; ++j)
        {
        auto fy = T(y0+j*psy);
        fy*=fy;
        T fct = 0;
        auto tmp = 1-fx-fy;
        if (tmp>=0)
          {
          auto nm1 = (-fx-fy)/(sqrt(tmp)+1); // accurate form of sqrt(1-x-y)-1
Martin Reinecke's avatar
Martin Reinecke committed
1108
          fct = T(gconf.krn->corfunc(nm1*dw));
Martin Reinecke's avatar
Martin Reinecke committed
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
          if (divide_by_n)
            fct /= nm1+1;
          }
        else // beyond the horizon, don't really know what to do here
          {
          if (divide_by_n)
            fct=0;
          else
            {
            auto nm1 = sqrt(-tmp)-1;
Martin Reinecke's avatar
Martin Reinecke committed
1119
            fct = T(gconf.krn->corfunc(nm1*dw));
Martin Reinecke's avatar
Martin Reinecke committed
1120
1121
1122
1123
            }
          }
        fct *= T(cfu[nx_dirty/2-i]*cfv[ny_dirty/2-j]);
        size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
Martin Reinecke's avatar
Martin Reinecke committed
1124
        dirty.v(i,j)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
1125
1126
        if ((i>0)&&(i<i2))
          {
Martin Reinecke's avatar
Martin Reinecke committed
1127
          dirty.v(i2,j)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
1128
          if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
1129
            dirty.v(i2,j2)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
1130
1131
          }
        if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
1132
          dirty.v(i,j2)*=fct;
Martin Reinecke's avatar
Martin Reinecke committed
1133
1134
1135
1136
1137
        }
      }
    });
  }

Martin Reinecke's avatar
Martin Reinecke committed
1138
1139
1140
1141
1142
1143
1144
1145
1146
auto calc_share(size_t nshares, size_t myshare, size_t nwork)
  {
  size_t nbase = nwork/nshares;
  size_t additional = nwork%nshares;
  size_t lo = myshare*nbase + ((myshare<additional) ? myshare : additional);
  size_t hi = lo+nbase+(myshare<additional);
  return make_tuple(lo, hi);
  }

Martin Reinecke's avatar
Martin Reinecke committed
1147
template<typename T, typename Serv> class WgridHelper
Martin Reinecke's avatar
Martin Reinecke committed
1148
1149
  {
  private:
Martin Reinecke's avatar
stage 2    
Martin Reinecke committed
1150
    Serv &srv;
Martin Reinecke's avatar
Martin Reinecke committed
1151
    double wmin, dw;
Martin Reinecke's avatar
Martin Reinecke committed
1152
    size_t nplanes, supp, nthreads;
Martin Reinecke's avatar
Martin Reinecke committed
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
    vector<vector<idx_t>> minplane;
    size_t verbosity;

    int curplane;
    vector<idx_t> subidx;

    static void wminmax(const Serv &srv, double &wmin, double &wmax)
      {
      size_t nvis = srv.Nvis();

      wmin= 1e38;
      wmax=-1e38;
      // FIXME maybe this can be done more intelligently
      for (size_t ipart=0; ipart<nvis; ++ipart)
        {
        auto wval = abs(srv.getCoord(ipart).w);
        wmin = min(wmin,wval);
        wmax = max(wmax,wval);
        }
      }

Martin Reinecke's avatar
Martin Reinecke committed
1174
    template<typename T2> static void update_idx(vector<T2> &v, const vector<T2> &add,
Martin Reinecke's avatar
Martin Reinecke committed
1175
      const vector<T2> &del, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
1176
1177
      {
      MR_assert(v.size()>=del.size(), "must not happen");
Martin Reinecke's avatar
Martin Reinecke committed
1178
      vector<T2> res;
Martin Reinecke's avatar
Martin Reinecke committed
1179
      res.reserve((v.size()+add.size())-del.size());
Martin Reinecke's avatar
Martin Reinecke committed
1180
#if 0
Martin Reinecke's avatar
Martin Reinecke committed
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
      auto iin=v.begin(), ein=v.end();
      auto iadd=add.begin(), eadd=add.end();
      auto irem=del.begin(), erem=del.end();

      while(iin!=ein)
        {
        if ((irem!=erem) && (*iin==*irem))
          {  ++irem; ++iin; } // skip removed entry
        else if ((iadd!=eadd) && (*iadd<*iin))
           res.push_back(*(iadd++)); // add new entry
        else
          res.push_back(*(iin++));
        }
      MR_assert(irem==erem, "must not happen");
      while(iadd!=eadd)
        res.push_back(*(iadd++));
Martin Reinecke's avatar
Martin Reinecke committed
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
#else
      if (v.empty()) //special case
        {
        MR_assert(del.empty(), "must not happen");
        for (auto x: add)
          res.push_back(x);
        }
      else
        {
        res.resize((v.size()+add.size())-del.size());
        execParallel(nthreads, [&](Scheduler &sched) {
          auto tid = sched.thread_num();
          auto [lo, hi] = calc_share(nthreads, tid, v.size());
          if (lo==hi) return; // if interval is empty, do nothing
          auto iin=v.begin()+lo, ein=v.begin()+hi;
          auto iadd = (iin==v.begin()) ? add.begin() : lower_bound(add.begin(), add.end(), *iin);
          auto eadd = (ein==v.end()) ? add.end() : lower_bound(add.begin(), add.end(), *ein);
          auto irem = (iin==v.begin()) ? del.begin() : lower_bound(del.begin(), del.end(), *iin);
          auto erem = (ein==v.end()) ? del.end() : lower_bound(del.begin(), del.end(), *ein);
          auto iout = res.begin()+lo-(irem-del.begin())+(iadd-add.begin());
          while(iin!=ein)
            {
            if ((irem!=erem) && (*iin==*irem))
              { ++irem; ++iin; } // skip removed entry
            else if ((iadd!=eadd) && (*iadd<*iin))
              *(iout++) = *(iadd++); // add new entry
            else
              *(iout++) = *(iin++);
            }
          MR_assert(irem==erem, "must not happen");
          while(iadd!=eadd)
            *(iout++) = *(iadd++);
          });
        }
#endif
Martin Reinecke's avatar
Martin Reinecke committed
1232
1233
1234
1235
1236
      MR_assert(res.size()==(v.size()+add.size())-del.size(), "must not happen");
      v.swap(res);
      }

  public:
Martin Reinecke's avatar
Martin Reinecke committed
1237
    WgridHelper(const GridderConfig<T> &gconf, Serv &srv_, size_t verbosity_)
Martin Reinecke's avatar
Martin Reinecke committed
1238
1239
      : srv(srv_), supp(gconf.Supp()), nthreads(gconf.Nthreads()),
        verbosity(verbosity_), curplane(-1)
Martin Reinecke's avatar
Martin Reinecke committed
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
      {
      size_t nvis = srv.Nvis();
      double wmax;

      wminmax(srv, wmin, wmax);
      if (verbosity>0) cout << "Using " << nthreads << " thread"
                            << ((nthreads!=1) ? "s" : "") << endl;
      if (verbosity>0) cout << "W range: " << wmin << " to " << wmax << endl;

      double x0 = -0.5*gconf.Nxdirty()*gconf.Pixsize_x(),
             y0 = -0.5*gconf.Nydirty()*gconf.Pixsize_y();
      double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
      if (x0*x0+y0*y0>1.)
        nmin = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
      dw = 0.25/abs(nmin);
Martin Reinecke's avatar
Martin Reinecke committed
1255
      nplanes = size_t((wmax-wmin)/dw+supp);
Martin Reinecke's avatar
Martin Reinecke committed
1256
      wmin = (wmin+wmax)*0.5 - 0.5*(nplanes-1)*dw;
Martin Reinecke's avatar
Martin Reinecke committed
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
      if (verbosity>0) cout << "Kernel support: " << supp << endl;
      if (verbosity>0) cout << "nplanes: " << nplanes << endl;

      minplane.resize(nplanes);
#if 0
      // extra short, but potentially inefficient version:
      for (size_t ipart=0; ipart<nvis; ++ipart)
        {
        int plane0 = max(0,int(1+(abs(srv.getCoord(ipart).w)-(0.5*supp*dw)-wmin)/dw));
        minplane[plane0].push_back(idx_t(ipart));
        }
#else
      // more efficient: precalculate final vector sizes and avoid reallocations
Martin Reinecke's avatar
Martin Reinecke committed
1270
      vector<int> p0(nvis);
Martin Reinecke's avatar
Martin Reinecke committed
1271
      mav<size_t,2> cnt({nthreads, nplanes+16}); // safety distance against false sharing
Martin Reinecke's avatar
Martin Reinecke committed