gridder_cxx.h 59.4 KB
Newer Older
Martin Reinecke's avatar
Martin Reinecke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#ifndef GRIDDER_CXX_H
#define GRIDDER_CXX_H

/*
 *  This file is part of nifty_gridder.
 *
 *  nifty_gridder is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  nifty_gridder is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with nifty_gridder; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/* Copyright (C) 2019 Max-Planck-Society
   Author: Martin Reinecke */

#include <iostream>
#include <algorithm>
#include <cstdlib>
#include <cmath>
#include <vector>
Martin Reinecke's avatar
Martin Reinecke committed
30
#include <array>
Martin Reinecke's avatar
Martin Reinecke committed
31

Martin Reinecke's avatar
Martin Reinecke committed
32
33
34
35
#ifdef _OPENMP
#include <omp.h>
#endif

Martin Reinecke's avatar
Martin Reinecke committed
36
#include "pocketfft_hdronly.h"
Martin Reinecke's avatar
Martin Reinecke committed
37
38

#if defined(__GNUC__)
Martin Reinecke's avatar
tweaks  
Martin Reinecke committed
39
#define NOINLINE __attribute__((noinline))
Martin Reinecke's avatar
Martin Reinecke committed
40
#define ALIGNED(align) __attribute__ ((aligned(align)))
Martin Reinecke's avatar
Martin Reinecke committed
41
#define RESTRICT __restrict__
Martin Reinecke's avatar
Martin Reinecke committed
42
43
44
#else
#define NOINLINE
#define ALIGNED(align)
Martin Reinecke's avatar
Martin Reinecke committed
45
#define RESTRICT
Martin Reinecke's avatar
Martin Reinecke committed
46
#endif
Martin Reinecke's avatar
Martin Reinecke committed
47

Martin Reinecke's avatar
Martin Reinecke committed
48
49
namespace gridder {

50
51
namespace detail {

Martin Reinecke's avatar
Martin Reinecke committed
52
53
using namespace std;

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
template<typename T> struct VLEN { static constexpr size_t val=1; };

#if (defined(__AVX512F__))
template<> struct VLEN<float> { static constexpr size_t val=16; };
template<> struct VLEN<double> { static constexpr size_t val=8; };
#elif (defined(__AVX__))
template<> struct VLEN<float> { static constexpr size_t val=8; };
template<> struct VLEN<double> { static constexpr size_t val=4; };
#elif (defined(__SSE2__))
template<> struct VLEN<float> { static constexpr size_t val=4; };
template<> struct VLEN<double> { static constexpr size_t val=2; };
#elif (defined(__VSX__))
template<> struct VLEN<float> { static constexpr size_t val=4; };
template<> struct VLEN<double> { static constexpr size_t val=2; };
#endif

Martin Reinecke's avatar
Martin Reinecke committed
70
71

// "mav" stands for "multidimensional array view"
Martin Reinecke's avatar
Martin Reinecke committed
72
template<typename T, size_t ndim> class mav
Martin Reinecke's avatar
Martin Reinecke committed
73
74
75
76
77
  {
  static_assert((ndim>0) && (ndim<3), "only supports 1D and 2D arrays");

  private:
    T *d;
Martin Reinecke's avatar
Martin Reinecke committed
78
79
    array<size_t, ndim> shp;
    array<ptrdiff_t, ndim> str;
Martin Reinecke's avatar
Martin Reinecke committed
80
81

  public:
Martin Reinecke's avatar
Martin Reinecke committed
82
83
84
85
    mav(T *d_, const array<size_t,ndim> &shp_,
        const array<ptrdiff_t,ndim> &str_)
      : d(d_), shp(shp_), str(str_) {}
    mav(T *d_, const array<size_t,ndim> &shp_)
Martin Reinecke's avatar
Martin Reinecke committed
86
87
88
89
90
91
      : d(d_), shp(shp_)
      {
      str[ndim-1]=1;
      for (size_t d=2; d<=ndim; ++d)
        str[ndim-d] = str[ndim-d+1]*shp[ndim-d+1];
      }
92
    T &operator[](size_t i) const
Martin Reinecke's avatar
Martin Reinecke committed
93
      { return operator()(i); }
94
    T &operator()(size_t i) const
Martin Reinecke's avatar
Martin Reinecke committed
95
96
      {
      static_assert(ndim==1, "ndim must be 1");
Martin Reinecke's avatar
Martin Reinecke committed
97
      return d[str[0]*i];
Martin Reinecke's avatar
Martin Reinecke committed
98
      }
99
    T &operator()(size_t i, size_t j) const
Martin Reinecke's avatar
Martin Reinecke committed
100
101
      {
      static_assert(ndim==2, "ndim must be 2");
Martin Reinecke's avatar
Martin Reinecke committed
102
      return d[str[0]*i + str[1]*j];
Martin Reinecke's avatar
Martin Reinecke committed
103
104
      }
    size_t shape(size_t i) const { return shp[i]; }
Martin Reinecke's avatar
Martin Reinecke committed
105
    const array<size_t,ndim> &shape() const { return shp; }
Martin Reinecke's avatar
Martin Reinecke committed
106
107
108
109
110
111
    size_t size() const
      {
      size_t res=1;
      for (auto v: shp) res*=v;
      return res;
      }
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
112
    ptrdiff_t stride(size_t i) const { return str[i]; }
113
    T *data() const
Martin Reinecke's avatar
Martin Reinecke committed
114
      { return d; }
115
116
117
118
119
120
121
122
    bool last_contiguous() const
      { return (str[ndim-1]==1) || (str[ndim-1]==0); }
    void check_storage(const char *name) const
      {
      if (!last_contiguous())
        cout << "Array '" << name << "': last dimension is not contiguous.\n"
                "This may slow down computation significantly!\n";
      }
Martin Reinecke's avatar
updates  
Martin Reinecke committed
123
124
125
126
127
128
129
130
131
132
    bool contiguous() const
      {
      ptrdiff_t stride=1;
      for (size_t i=0; i<ndim; ++i)
        {
        if (str[ndim-1-i]!=stride) return false;
        stride *= shp[ndim-1-i];
        }
      return true;
      }
Martin Reinecke's avatar
Martin Reinecke committed
133
134
    void fill(const T &val) const
      {
Martin Reinecke's avatar
Martin Reinecke committed
135
      // FIXME: special cases for contiguous arrays and/or zeroing?
Martin Reinecke's avatar
Martin Reinecke committed
136
137
138
139
140
141
142
143
      if (ndim==1)
        for (size_t i=0; i<shp[0]; ++i)
          d[str[0]*i]=val;
      else if (ndim==2)
        for (size_t i=0; i<shp[0]; ++i)
          for (size_t j=0; j<shp[1]; ++j)
            d[str[0]*i + str[1]*j] = val;
      }
Martin Reinecke's avatar
Martin Reinecke committed
144
145
  };

Martin Reinecke's avatar
updates  
Martin Reinecke committed
146
template<typename T, size_t ndim> using const_mav = mav<const T, ndim>;
Martin Reinecke's avatar
Martin Reinecke committed
147
148
149
150
151
152
153
154
template<typename T, size_t ndim> const_mav<T, ndim> cmav (const mav<T, ndim> &mav)
  { return const_mav<T, ndim>(mav.data(), mav.shape()); }
template<typename T, size_t ndim> const_mav<T, ndim> nullmav()
  {
  array<size_t,ndim> shp;
  shp.fill(0);
  return const_mav<T, ndim>(nullptr, shp);
  }
Martin Reinecke's avatar
Martin Reinecke committed
155

Martin Reinecke's avatar
Martin Reinecke committed
156
157
158
//
// basic utilities
//
Martin Reinecke's avatar
Martin Reinecke committed
159
160
161
162
163
#if defined (__GNUC__)
#define LOC_ CodeLocation(__FILE__, __LINE__, __PRETTY_FUNCTION__)
#else
#define LOC_ CodeLocation(__FILE__, __LINE__)
#endif
Martin Reinecke's avatar
Martin Reinecke committed
164

Martin Reinecke's avatar
Martin Reinecke committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#define myfail(...) \
  do { \
    std::ostringstream os; \
    streamDump__(os, LOC_, "\n", ##__VA_ARGS__, "\n"); \
    throw std::runtime_error(os.str()); \
    } while(0)

#define myassert(cond,...) \
  do { \
    if (cond); \
    else { myfail("Assertion failure\n", ##__VA_ARGS__); } \
    } while(0)

template<typename T>
inline void streamDump__(std::ostream &os, const T& value)
  { os << value; }

template<typename T, typename ... Args>
inline void streamDump__(std::ostream &os, const T& value,
  const Args& ... args)
Martin Reinecke's avatar
Martin Reinecke committed
185
  {
Martin Reinecke's avatar
Martin Reinecke committed
186
187
  os << value;
  streamDump__(os, args...);
Martin Reinecke's avatar
Martin Reinecke committed
188
  }
Martin Reinecke's avatar
Martin Reinecke committed
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
// to be replaced with std::source_location once available
class CodeLocation
  {
  private:
    const char *file, *func;
    int line;

  public:
    CodeLocation(const char *file_, int line_, const char *func_=nullptr)
      : file(file_), func(func_), line(line_) {}

    ostream &print(ostream &os) const
      {
      os << "file: " << file <<  ", line: " <<  line;
      if (func) os << ", function: " << func;
      return os;
      }
  };

inline std::ostream &operator<<(std::ostream &os, const CodeLocation &loc)
  { return loc.print(os); }
Martin Reinecke's avatar
Martin Reinecke committed
210
211

template<size_t ndim> void checkShape
Martin Reinecke's avatar
Martin Reinecke committed
212
  (const array<size_t, ndim> &shp1, const array<size_t, ndim> &shp2)
Martin Reinecke's avatar
Martin Reinecke committed
213
214
215
216
217
  {
  for (size_t i=0; i<ndim; ++i)
    myassert(shp1[i]==shp2[i], "shape mismatch");
  }

Martin Reinecke's avatar
Martin Reinecke committed
218
219
template<typename T> inline T fmod1 (T v)
  { return v-floor(v); }
Martin Reinecke's avatar
Martin Reinecke committed
220

Martin Reinecke's avatar
Martin Reinecke committed
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
template<typename T, size_t ndim> class tmpStorage
  {
  private:
    vector<T> d;
    mav<T,ndim> mav_;

    static size_t prod(const array<size_t,ndim> &shp)
      {
      size_t res=1;
      for (auto v: shp) res*=v;
      return res;
      }

  public:
    tmpStorage(const array<size_t,ndim> &shp)
      : d(prod(shp)), mav_(d.data(), shp) {}
    mav<T,ndim> &getMav() { return mav_; }
    const_mav<T,ndim> getCmav() { return cmav(mav_); }
    void fill(const T & val)
      { std::fill(d.begin(), d.end(), val); }
  };

Martin Reinecke's avatar
Martin Reinecke committed
243
244
245
246
247
248
249
//
// Utilities for Gauss-Legendre quadrature
//

static inline double one_minus_x2 (double x)
  { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }

Martin Reinecke's avatar
Martin Reinecke committed
250
void legendre_prep(int n, vector<double> &x, vector<double> &w, size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
251
252
253
254
255
256
257
  {
  constexpr double pi = 3.141592653589793238462643383279502884197;
  constexpr double eps = 3e-14;
  int m = (n+1)>>1;
  x.resize(m);
  w.resize(m);

Martin Reinecke's avatar
Martin Reinecke committed
258
259
  const double t0 = 1 - (1-1./n) / (8.*n*n);
  const double t1 = 1./(4.*n+2.);
Martin Reinecke's avatar
Martin Reinecke committed
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

#pragma omp parallel num_threads(nthreads)
{
  int i;
#pragma omp for schedule(dynamic,100)
  for (i=1; i<=m; ++i)
    {
    double x0 = cos(pi * ((i<<2)-1) * t1) * t0;

    int dobreak=0;
    int j=0;
    double dpdx;
    while(1)
      {
      double P_1 = 1.0;
      double P0 = x0;
      double dx, x1;

      for (int k=2; k<=n; k++)
        {
        double P_2 = P_1;
        P_1 = P0;
//        P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
        P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
        }

      dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);

      /* Newton step */
      x1 = x0 - P0/dpdx;
      dx = x0-x1;
      x0 = x1;
      if (dobreak) break;

      if (abs(dx)<=eps) dobreak=1;
      myassert(++j<100, "convergence problem");
      }

    x[m-i] = x0;
    w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
    }
} // end of parallel region
  }

//
// Start of real gridder functionality
//

308
309
310
template<typename T> void complex2hartley
  (const const_mav<complex<T>, 2> &grid, const mav<T,2> &grid2, size_t nthreads)
  {
Martin Reinecke's avatar
Martin Reinecke committed
311
  checkShape(grid.shape(), grid2.shape());
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
  size_t nu=grid.shape(0), nv=grid.shape(1);

#pragma omp parallel for num_threads(nthreads)
  for (size_t u=0; u<nu; ++u)
    {
    size_t xu = (u==0) ? 0 : nu-u;
    for (size_t v=0; v<nv; ++v)
      {
      size_t xv = (v==0) ? 0 : nv-v;
      grid2(u,v) += T(0.5)*(grid( u, v).real()+grid( u, v).imag()+
                            grid(xu,xv).real()-grid(xu,xv).imag());
      }
    }
  }

template<typename T> void hartley2complex
  (const const_mav<T,2> &grid, const mav<complex<T>,2> &grid2, size_t nthreads)
  {
Martin Reinecke's avatar
Martin Reinecke committed
330
  checkShape(grid.shape(), grid2.shape());
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
  size_t nu=grid.shape(0), nv=grid.shape(1);

#pragma omp parallel for num_threads(nthreads)
  for (size_t u=0; u<nu; ++u)
    {
    size_t xu = (u==0) ? 0 : nu-u;
    for (size_t v=0; v<nv; ++v)
      {
      size_t xv = (v==0) ? 0 : nv-v;
      T v1 = T(0.5)*grid( u, v);
      T v2 = T(0.5)*grid(xu,xv);
      grid2(u,v) = std::complex<T>(v1+v2, v1-v2);
      }
    }
  }

Martin Reinecke's avatar
Martin Reinecke committed
347
348
template<typename T> void hartley2_2D(const const_mav<T,2> &in,
  const mav<T,2> &out, size_t nthreads)
349
  {
Martin Reinecke's avatar
Martin Reinecke committed
350
  checkShape(in.shape(), out.shape());
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
  size_t nu=in.shape(0), nv=in.shape(1);
  ptrdiff_t sz=ptrdiff_t(sizeof(T));
  pocketfft::stride_t stri{sz*in.stride(0), sz*in.stride(1)};
  pocketfft::stride_t stro{sz*out.stride(0), sz*out.stride(1)};
  auto d_i = in.data();
  auto ptmp = out.data();
  pocketfft::r2r_separable_hartley({nu, nv}, stri, stro, {0,1}, d_i, ptmp, T(1),
    nthreads);
#pragma omp parallel for num_threads(nthreads)
  for(size_t i=1; i<(nu+1)/2; ++i)
    for(size_t j=1; j<(nv+1)/2; ++j)
       {
       T a = ptmp[i*nv+j];
       T b = ptmp[(nu-i)*nv+j];
       T c = ptmp[i*nv+nv-j];
       T d = ptmp[(nu-i)*nv+nv-j];
       ptmp[i*nv+j] = T(0.5)*(a+b+c-d);
       ptmp[(nu-i)*nv+j] = T(0.5)*(a+b+d-c);
       ptmp[i*nv+nv-j] = T(0.5)*(a+c+d-b);
       ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a);
       }
  }


Martin Reinecke's avatar
Martin Reinecke committed
375
class ES_Kernel
Martin Reinecke's avatar
Martin Reinecke committed
376
  {
Martin Reinecke's avatar
Martin Reinecke committed
377
  private:
Martin Reinecke's avatar
Martin Reinecke committed
378
    static constexpr double pi = 3.141592653589793238462643383279502884197;
Martin Reinecke's avatar
Martin Reinecke committed
379
    double beta;
Martin Reinecke's avatar
Martin Reinecke committed
380
    int p;
381
    vector<double> x, wgt, psi;
Martin Reinecke's avatar
Martin Reinecke committed
382
    size_t supp;
Martin Reinecke's avatar
Martin Reinecke committed
383
384

  public:
Martin Reinecke's avatar
Martin Reinecke committed
385
386
    ES_Kernel(size_t supp_, double ofactor, size_t nthreads)
      : beta(get_beta(supp_,ofactor)*supp_), p(int(1.5*supp_+2)), supp(supp_)
Martin Reinecke's avatar
Martin Reinecke committed
387
      {
Martin Reinecke's avatar
Martin Reinecke committed
388
      legendre_prep(2*p,x,wgt,nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
389
390
391
392
      psi=x;
      for (auto &v:psi)
        v=operator()(v);
      }
Martin Reinecke's avatar
Martin Reinecke committed
393
394
395
    ES_Kernel(size_t supp_, size_t nthreads)
      : ES_Kernel(supp_, 2., nthreads){}

Martin Reinecke's avatar
Martin Reinecke committed
396
    double operator()(double v) const { return exp(beta*(sqrt(1.-v*v)-1.)); }
Martin Reinecke's avatar
Martin Reinecke committed
397
398
    /* Compute correction factors for the ES gridding kernel
       This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
Martin Reinecke's avatar
Martin Reinecke committed
399
    double corfac(double v) const
Martin Reinecke's avatar
Martin Reinecke committed
400
      {
Martin Reinecke's avatar
Martin Reinecke committed
401
      double tmp=0;
Martin Reinecke's avatar
Martin Reinecke committed
402
403
      for (int i=0; i<p; ++i)
        tmp += wgt[i]*psi[i]*cos(pi*supp*v*x[i]);
Martin Reinecke's avatar
Martin Reinecke committed
404
      return 1./(supp*tmp);
Martin Reinecke's avatar
Martin Reinecke committed
405
      }
Martin Reinecke's avatar
Martin Reinecke committed
406
    static double get_beta(size_t supp, double ofactor=2)
407
      {
Martin Reinecke's avatar
Martin Reinecke committed
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
      myassert((supp>=2) && (supp<=15), "unsupported support size");
      if (ofactor>=2)
        {
        static const vector<double> opt_beta {-1, 0.14, 1.70, 2.08, 2.205, 2.26,
          2.29, 2.307, 2.316, 2.3265, 2.3324, 2.282, 2.294, 2.304, 2.3138, 2.317};
        myassert(supp<opt_beta.size(), "bad support size");
        return opt_beta[supp];
        }
      if (ofactor>=1.2)
        {
        // empirical, but pretty accurate approximation
        static const array<double,16> betacorr{0,0,-0.51,-0.21,-0.1,-0.05,-0.025,-0.0125,0,0,0,0,0,0,0,0};
        auto x0 = 1./(2*ofactor);
        auto bcstrength=1.+(x0-0.25)*2.5;
        return 2.32+bcstrength*betacorr[supp]+(0.25-x0)*3.1;
        }
      myfail("oversampling factor is too small");
425
426
      }

Martin Reinecke's avatar
Martin Reinecke committed
427
    static size_t get_supp(double epsilon, double ofactor=2)
Martin Reinecke's avatar
Martin Reinecke committed
428
429
      {
      double epssq = epsilon*epsilon;
Martin Reinecke's avatar
Martin Reinecke committed
430
431
432
433
434
      if (ofactor>=2)
        {
        static const vector<double> maxmaperr { 1e8, 0.19, 2.98e-3, 5.98e-5,
          1.11e-6, 2.01e-8, 3.55e-10, 5.31e-12, 8.81e-14, 1.34e-15, 2.17e-17,
          2.12e-19, 2.88e-21, 3.92e-23, 8.21e-25, 7.13e-27 };
Martin Reinecke's avatar
Martin Reinecke committed
435

Martin Reinecke's avatar
Martin Reinecke committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
        for (size_t i=1; i<maxmaperr.size(); ++i)
          if (epssq>maxmaperr[i]) return i;
        myfail("requested epsilon too small - minimum is 1e-13");
        }
      if (ofactor>=1.2)
        {
        for (size_t w=2; w<16; ++w)
          {
          auto estimate = 12*exp(-2.*w*ofactor); // empirical, not very good approximation
          if (epssq>estimate) return w;
          }
        myfail("requested epsilon too small");
        }
      myfail("oversampling factor is too small");
Martin Reinecke's avatar
Martin Reinecke committed
450
      }
Martin Reinecke's avatar
Martin Reinecke committed
451
  };
Martin Reinecke's avatar
Martin Reinecke committed
452

Martin Reinecke's avatar
Martin Reinecke committed
453
454
/* Compute correction factors for the ES gridding kernel
   This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
Martin Reinecke's avatar
Martin Reinecke committed
455
vector<double> correction_factors(size_t n, double ofactor, size_t nval, size_t supp,
Martin Reinecke's avatar
Martin Reinecke committed
456
  size_t nthreads)
Martin Reinecke's avatar
Martin Reinecke committed
457
  {
Martin Reinecke's avatar
Martin Reinecke committed
458
  ES_Kernel kernel(supp, ofactor, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
459
  vector<double> res(nval);
Martin Reinecke's avatar
Martin Reinecke committed
460
461
462
463
464
465
466
  double xn = 1./n;
#pragma omp parallel for schedule(static) num_threads(nthreads)
  for (size_t k=0; k<nval; ++k)
    res[k] = kernel.corfac(k*xn);
  return res;
  }

467
468
469
470
471
472
473
using idx_t = uint32_t;

struct RowChan
  {
  idx_t row, chan;
  };

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
474
struct UVW
Martin Reinecke's avatar
Martin Reinecke committed
475
  {
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
476
477
478
479
  double u, v, w;
  UVW() {}
  UVW(double u_, double v_, double w_) : u(u_), v(v_), w(w_) {}
  UVW operator* (double fct) const
Martin Reinecke's avatar
Martin Reinecke committed
480
    { return UVW(u*fct, v*fct, w*fct); }
481
482
483
484
485
486
487
  void Flip() { u=-u; v=-v; w=-w; }
  bool FixW()
    {
    bool flip = w<0;
    if (flip) Flip();
    return flip;
    }
Martin Reinecke's avatar
Martin Reinecke committed
488
489
  };

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
490
class Baselines
Martin Reinecke's avatar
Martin Reinecke committed
491
492
  {
  protected:
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
493
494
    vector<UVW> coord;
    vector<double> f_over_c;
495
496
    idx_t nrows, nchan;
    idx_t shift, mask;
Martin Reinecke's avatar
Martin Reinecke committed
497
498

  public:
Martin Reinecke's avatar
Martin Reinecke committed
499
    template<typename T> Baselines(const const_mav<T,2> &coord_,
500
      const const_mav<T,1> &freq, bool negate_v=false)
Martin Reinecke's avatar
Martin Reinecke committed
501
502
503
      {
      constexpr double speedOfLight = 299792458.;
      myassert(coord_.shape(1)==3, "dimension mismatch");
504
505
506
507
      auto hugeval = size_t(~(idx_t(0)));
      myassert(coord_.shape(0)<hugeval, "too many entries in MS");
      myassert(coord_.shape(1)<hugeval, "too many entries in MS");
      myassert(coord_.size()<hugeval, "too many entries in MS");
Martin Reinecke's avatar
Martin Reinecke committed
508
509
      nrows = coord_.shape(0);
      nchan = freq.shape(0);
510
      shift=0;
511
512
      while((idx_t(1)<<shift)<nchan) ++shift;
      mask=(idx_t(1)<<shift)-1;
Martin Reinecke's avatar
Martin Reinecke committed
513
      myassert(nrows*(mask+1)<hugeval, "too many entries in MS");
Martin Reinecke's avatar
Martin Reinecke committed
514
515
      f_over_c.resize(nchan);
      for (size_t i=0; i<nchan; ++i)
516
517
        {
        myassert(freq[i]>0, "negative channel frequency encountered");
Martin Reinecke's avatar
Martin Reinecke committed
518
        f_over_c[i] = freq(i)/speedOfLight;
519
        }
Martin Reinecke's avatar
Martin Reinecke committed
520
      coord.resize(nrows);
521
522
523
524
525
526
      if (negate_v)
        for (size_t i=0; i<coord.size(); ++i)
          coord[i] = UVW(coord_(i,0), -coord_(i,1), coord_(i,2));
      else
        for (size_t i=0; i<coord.size(); ++i)
          coord[i] = UVW(coord_(i,0), coord_(i,1), coord_(i,2));
Martin Reinecke's avatar
Martin Reinecke committed
527
528
      }

529
    RowChan getRowChan(idx_t index) const
530
      { return RowChan{index>>shift, index&mask}; }
Martin Reinecke's avatar
Martin Reinecke committed
531

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
532
    UVW effectiveCoord(const RowChan &rc) const
Martin Reinecke's avatar
Martin Reinecke committed
533
      { return coord[rc.row]*f_over_c[rc.chan]; }
534
    UVW effectiveCoord(idx_t index) const
Martin Reinecke's avatar
Martin Reinecke committed
535
      { return effectiveCoord(getRowChan(index)); }
Martin Reinecke's avatar
Martin Reinecke committed
536
537
    size_t Nrows() const { return nrows; }
    size_t Nchannels() const { return nchan; }
538
    idx_t getIdx(idx_t irow, idx_t ichan) const
539
      { return ichan+(irow<<shift); }
Martin Reinecke's avatar
Martin Reinecke committed
540

541
    template<typename T> void effectiveUVW(const mav<const idx_t,1> &idx,
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
542
      mav<T,2> &res) const
Martin Reinecke's avatar
Martin Reinecke committed
543
544
      {
      size_t nvis = idx.shape(0);
Martin Reinecke's avatar
Martin Reinecke committed
545
      checkShape(res.shape(), {idx.shape(0), 3});
Martin Reinecke's avatar
Martin Reinecke committed
546
547
548
549
550
551
552
553
554
      for (size_t i=0; i<nvis; i++)
        {
        auto uvw = effectiveCoord(idx(i));
        res(i,0) = uvw.u;
        res(i,1) = uvw.v;
        res(i,2) = uvw.w;
        }
      }

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
555
    template<typename T> void ms2vis(const mav<const T,2> &ms,
556
      const mav<const idx_t,1> &idx, mav<T,1> &vis, size_t nthreads) const
Martin Reinecke's avatar
Martin Reinecke committed
557
      {
Martin Reinecke's avatar
Martin Reinecke committed
558
      checkShape(ms.shape(), {nrows, nchan});
Martin Reinecke's avatar
Martin Reinecke committed
559
      size_t nvis = idx.shape(0);
Martin Reinecke's avatar
Martin Reinecke committed
560
      checkShape(vis.shape(), {nvis});
Martin Reinecke's avatar
Martin Reinecke committed
561
562
563
#pragma omp parallel for num_threads(nthreads)
      for (size_t i=0; i<nvis; ++i)
        {
Martin Reinecke's avatar
Martin Reinecke committed
564
565
        auto rc = getRowChan(idx(i));
        vis[i] = ms(rc.row, rc.chan);
Martin Reinecke's avatar
Martin Reinecke committed
566
567
568
        }
      }

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
569
    template<typename T> void vis2ms(const mav<const T,1> &vis,
570
      const mav<const idx_t,1> &idx, mav<T,2> &ms, size_t nthreads) const
Martin Reinecke's avatar
Martin Reinecke committed
571
572
      {
      size_t nvis = vis.shape(0);
Martin Reinecke's avatar
Martin Reinecke committed
573
574
      checkShape(idx.shape(), {nvis});
      checkShape(ms.shape(), {nrows, nchan});
Martin Reinecke's avatar
Martin Reinecke committed
575
576
577
#pragma omp parallel for num_threads(nthreads)
      for (size_t i=0; i<nvis; ++i)
        {
Martin Reinecke's avatar
Martin Reinecke committed
578
579
        auto rc = getRowChan(idx(i));
        ms(rc.row, rc.chan) += vis(i);
Martin Reinecke's avatar
Martin Reinecke committed
580
581
582
583
        }
      }
  };

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
584
class GridderConfig
Martin Reinecke's avatar
Martin Reinecke committed
585
586
  {
  protected:
Martin Reinecke's avatar
Martin Reinecke committed
587
588
589
    size_t nx_dirty, ny_dirty, nu, nv;
    double ofactor, eps, psx, psy;
    size_t supp, nsafe;
Martin Reinecke's avatar
Martin Reinecke committed
590
591
    double beta;
    vector<double> cfu, cfv;
Martin Reinecke's avatar
Martin Reinecke committed
592
    size_t nthreads;
Martin Reinecke's avatar
Martin Reinecke committed
593
    double ushift, vshift;
Martin Reinecke's avatar
Martin Reinecke committed
594
    int maxiu0, maxiv0;
Martin Reinecke's avatar
Martin Reinecke committed
595

Martin Reinecke's avatar
Martin Reinecke committed
596
597
    complex<double> wscreen(double x, double y, double w, bool adjoint,
      bool divide_by_n) const
Martin Reinecke's avatar
Martin Reinecke committed
598
599
      {
      constexpr double pi = 3.141592653589793238462643383279502884197;
600
      double tmp = 1-x-y;
Martin Reinecke's avatar
Martin Reinecke committed
601
602
      if (tmp<=0) return divide_by_n ? 0. : 1.; // no phase factor beyond the horizon
      double nm1 = (-x-y)/(sqrt(tmp)+1); // more accurate form of sqrt(1-x-y)-1
Martin Reinecke's avatar
Martin Reinecke committed
603
604
      double phase = 2*pi*w*nm1;
      if (adjoint) phase *= -1;
Martin Reinecke's avatar
Martin Reinecke committed
605
      double xn = divide_by_n ? 1./(nm1+1) : 1;
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
606
      return complex<double>(cos(phase)*xn, sin(phase)*xn);
Martin Reinecke's avatar
Martin Reinecke committed
607
608
609
      }

  public:
Martin Reinecke's avatar
Martin Reinecke committed
610
611
612
613
614
    GridderConfig(size_t nxdirty, size_t nydirty, size_t nu_, size_t nv_,
      double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
      : nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
        ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
        eps(epsilon),
Martin Reinecke's avatar
Martin Reinecke committed
615
        psx(pixsize_x), psy(pixsize_y),
Martin Reinecke's avatar
Martin Reinecke committed
616
617
        supp(ES_Kernel::get_supp(epsilon, ofactor)), nsafe((supp+1)/2),
        beta(ES_Kernel::get_beta(supp, ofactor)*supp),
Martin Reinecke's avatar
Martin Reinecke committed
618
        cfu(nx_dirty), cfv(ny_dirty), nthreads(nthreads_),
Martin Reinecke's avatar
Martin Reinecke committed
619
        ushift(supp*(-0.5)+1+nu), vshift(supp*(-0.5)+1+nv),
Martin Reinecke's avatar
Martin Reinecke committed
620
        maxiu0((nu+nsafe)-supp), maxiv0((nv+nsafe)-supp)
Martin Reinecke's avatar
Martin Reinecke committed
621
      {
Martin Reinecke's avatar
Martin Reinecke committed
622
623
      myassert(nu>=2*nsafe, "nu too small");
      myassert(nv>=2*nsafe, "nv too small");
Martin Reinecke's avatar
Martin Reinecke committed
624
625
      myassert((nx_dirty&1)==0, "nx_dirty must be even");
      myassert((ny_dirty&1)==0, "ny_dirty must be even");
Martin Reinecke's avatar
Martin Reinecke committed
626
627
      myassert((nu&1)==0, "nu must be even");
      myassert((nv&1)==0, "nv must be even");
Martin Reinecke's avatar
Martin Reinecke committed
628
629
630
      myassert(epsilon>0, "epsilon must be positive");
      myassert(pixsize_x>0, "pixsize_x must be positive");
      myassert(pixsize_y>0, "pixsize_y must be positive");
Martin Reinecke's avatar
Martin Reinecke committed
631
      myassert(ofactor>=1.2, "oversampling factor smaller than 1.2");
Martin Reinecke's avatar
Martin Reinecke committed
632

Martin Reinecke's avatar
Martin Reinecke committed
633
      auto tmp = correction_factors(nu, ofactor, nx_dirty/2+1, supp, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
634
635
636
637
      cfu[nx_dirty/2]=tmp[0];
      cfu[0]=tmp[nx_dirty/2];
      for (size_t i=1; i<nx_dirty/2; ++i)
        cfu[nx_dirty/2-i] = cfu[nx_dirty/2+i] = tmp[i];
Martin Reinecke's avatar
Martin Reinecke committed
638
      tmp = correction_factors(nv, ofactor, ny_dirty/2+1, supp, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
639
640
641
642
643
      cfv[ny_dirty/2]=tmp[0];
      cfv[0]=tmp[ny_dirty/2];
      for (size_t i=1; i<ny_dirty/2; ++i)
        cfv[ny_dirty/2-i] = cfv[ny_dirty/2+i] = tmp[i];
      }
Martin Reinecke's avatar
Martin Reinecke committed
644
645
    GridderConfig(size_t nxdirty, size_t nydirty,
      double epsilon, double pixsize_x, double pixsize_y, size_t nthreads_)
646
647
648
      : GridderConfig(nxdirty, nydirty, max<size_t>(30,2*nxdirty),
                      max<size_t>(30,2*nydirty), epsilon, pixsize_x,
                      pixsize_y, nthreads_) {}
Martin Reinecke's avatar
Martin Reinecke committed
649
650
651
652
653
654
655
656
657
    size_t Nxdirty() const { return nx_dirty; }
    size_t Nydirty() const { return ny_dirty; }
    double Epsilon() const { return eps; }
    double Pixsize_x() const { return psx; }
    double Pixsize_y() const { return psy; }
    size_t Nu() const { return nu; }
    size_t Nv() const { return nv; }
    size_t Supp() const { return supp; }
    size_t Nsafe() const { return nsafe; }
Martin Reinecke's avatar
Martin Reinecke committed
658
    double Beta() const { return beta; }
Martin Reinecke's avatar
Martin Reinecke committed
659
    size_t Nthreads() const { return nthreads; }
Martin Reinecke's avatar
Martin Reinecke committed
660
    double Ofactor() const{ return ofactor; }
Martin Reinecke's avatar
Martin Reinecke committed
661

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
662
    template<typename T> void apply_taper(const mav<const T,2> &img, mav<T,2> &img2, bool divide) const
Martin Reinecke's avatar
Martin Reinecke committed
663
      {
Martin Reinecke's avatar
Martin Reinecke committed
664
665
      checkShape(img.shape(), {nx_dirty, ny_dirty});
      checkShape(img2.shape(), {nx_dirty, ny_dirty});
Martin Reinecke's avatar
Martin Reinecke committed
666
667
668
      if (divide)
        for (size_t i=0; i<nx_dirty; ++i)
          for (size_t j=0; j<ny_dirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
669
            img2(i,j) = img(i,j)/(cfu[i]*cfv[j]);
Martin Reinecke's avatar
Martin Reinecke committed
670
671
672
      else
        for (size_t i=0; i<nx_dirty; ++i)
          for (size_t j=0; j<ny_dirty; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
673
            img2(i,j) = img(i,j)*cfu[i]*cfv[j];
Martin Reinecke's avatar
Martin Reinecke committed
674
675
      }

676
    template<typename T> void grid2dirty_post(const mav<T,2> &tmav, const mav<T,2> &dirty) const
677
      {
Martin Reinecke's avatar
Martin Reinecke committed
678
      checkShape(dirty.shape(), {nx_dirty,ny_dirty});
679
680
681
682
683
684
685
      for (size_t i=0; i<nx_dirty; ++i)
        for (size_t j=0; j<ny_dirty; ++j)
          {
          size_t i2 = nu-nx_dirty/2+i;
          if (i2>=nu) i2-=nu;
          size_t j2 = nv-ny_dirty/2+j;
          if (j2>=nv) j2-=nv;
686
687
          // FIXME: for some reason g++ warns about double-to-float conversion
          // here, even though there is an explicit cast...
Martin Reinecke's avatar
Martin Reinecke committed
688
          dirty(i,j) = tmav(i2,j2)*T(cfu[i]*cfv[j]);
689
690
          }
      }
Martin Reinecke's avatar
Martin Reinecke committed
691

692
693
694
695
696
697
698
699
    template<typename T> void grid2dirty(const const_mav<T,2> &grid, const mav<T,2> &dirty) const
      {
      checkShape(grid.shape(), {nu,nv});
      tmpStorage<T,2> tmpdat({nu,nv});
      auto tmav = tmpdat.getMav();
      hartley2_2D<T>(grid, tmav, nthreads);
      grid2dirty_post(tmav, dirty);
      }
700

Martin Reinecke's avatar
Martin Reinecke committed
701
    template<typename T> void grid2dirty_c(const const_mav<complex<T>,2> &grid, mav<complex<T>,2> &dirty) const
Martin Reinecke's avatar
Martin Reinecke committed
702
      {
Martin Reinecke's avatar
Martin Reinecke committed
703
      checkShape(grid.shape(), {nu,nv});
704
705
      tmpStorage<complex<T>,2> tmpdat({nu,nv});
      auto tmp = tmpdat.getMav();
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
706
      constexpr auto sc = ptrdiff_t(sizeof(complex<T>));
Martin Reinecke's avatar
Martin Reinecke committed
707
708
709
      pocketfft::c2c({nu,nv},{grid.stride(0)*sc,grid.stride(1)*sc},
        {tmp.stride(0)*sc, tmp.stride(1)*sc}, {0,1}, pocketfft::BACKWARD,
        grid.data(), tmp.data(), T(1), nthreads);
710
      grid2dirty_post(tmp, dirty);
Martin Reinecke's avatar
Martin Reinecke committed
711
      }
Martin Reinecke's avatar
updates  
Martin Reinecke committed
712

Martin Reinecke's avatar
Martin Reinecke committed
713
714
715
716
717
718
719
720
721
722
    template<typename T> void grid2dirty_c_overwrite(const mav<complex<T>,2> &grid, mav<complex<T>,2> &dirty) const
      {
      checkShape(grid.shape(), {nu,nv});
      constexpr auto sc = ptrdiff_t(sizeof(complex<T>));
      pocketfft::c2c({nu,nv},{grid.stride(0)*sc,grid.stride(1)*sc},
        {grid.stride(0)*sc, grid.stride(1)*sc}, {0,1}, pocketfft::BACKWARD,
        grid.data(), grid.data(), T(1), nthreads);
      grid2dirty_post(grid, dirty);
      }

723
    template<typename T> void dirty2grid_pre(const const_mav<T,2> &dirty, mav<T,2> &grid) const
724
725
726
727
728
729
730
731
732
733
734
      {
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(grid.shape(), {nu, nv});
      grid.fill(0);
      for (size_t i=0; i<nx_dirty; ++i)
        for (size_t j=0; j<ny_dirty; ++j)
          {
          size_t i2 = nu-nx_dirty/2+i;
          if (i2>=nu) i2-=nu;
          size_t j2 = nv-ny_dirty/2+j;
          if (j2>=nv) j2-=nv;
735
736
          // FIXME: for some reason g++ warns about double-to-float conversion
          // here, even though there is an explicit cast...
Martin Reinecke's avatar
Martin Reinecke committed
737
          grid(i2,j2) = dirty(i,j)*T(cfu[i]*cfv[j]);
738
          }
739
      }
Martin Reinecke's avatar
Martin Reinecke committed
740

741
742
743
    template<typename T> void dirty2grid(const const_mav<T,2> &dirty, mav<T,2> &grid) const
      {
      dirty2grid_pre(dirty, grid);
Martin Reinecke's avatar
Martin Reinecke committed
744
      hartley2_2D<T>(cmav(grid), grid, nthreads);
745
746
      }

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
747
    template<typename T> void dirty2grid_c(const const_mav<complex<T>,2> &dirty,
Martin Reinecke's avatar
updates  
Martin Reinecke committed
748
      mav<complex<T>,2> &grid) const
Martin Reinecke's avatar
Martin Reinecke committed
749
      {
750
      dirty2grid_pre(dirty, grid);
Martin Reinecke's avatar
cleanup  
Martin Reinecke committed
751
      constexpr auto sc = ptrdiff_t(sizeof(complex<T>));
Martin Reinecke's avatar
updates  
Martin Reinecke committed
752
      pocketfft::stride_t strides{grid.stride(0)*sc,grid.stride(1)*sc};
Martin Reinecke's avatar
Martin Reinecke committed
753
      pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD,
Martin Reinecke's avatar
updates  
Martin Reinecke committed
754
        grid.data(), grid.data(), T(1), nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
755
      }
Martin Reinecke's avatar
updates  
Martin Reinecke committed
756

Martin Reinecke's avatar
Martin Reinecke committed
757
    void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
Martin Reinecke's avatar
Martin Reinecke committed
758
      {
Martin Reinecke's avatar
Martin Reinecke committed
759
760
      u=fmod1(u_in*psx)*nu;
      iu0 = min(int(u+ushift)-int(nu), maxiu0);
Martin Reinecke's avatar
Martin Reinecke committed
761
      v=fmod1(v_in*psy)*nv;
Martin Reinecke's avatar
Martin Reinecke committed
762
      iv0 = min(int(v+vshift)-int(nv), maxiv0);
Martin Reinecke's avatar
Martin Reinecke committed
763
      }
Martin Reinecke's avatar
updates  
Martin Reinecke committed
764

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
765
    template<typename T> void apply_wscreen(const const_mav<complex<T>,2> &dirty,
Martin Reinecke's avatar
Martin Reinecke committed
766
      mav<complex<T>,2> &dirty2, double w, bool adjoint, bool divide_by_n) const
Martin Reinecke's avatar
Martin Reinecke committed
767
      {
Martin Reinecke's avatar
updates  
Martin Reinecke committed
768
769
      checkShape(dirty.shape(), {nx_dirty, ny_dirty});
      checkShape(dirty2.shape(), {nx_dirty, ny_dirty});
Martin Reinecke's avatar
Martin Reinecke committed
770
771
#if 0
      if ((nx_dirty==ny_dirty) && (psx==psy)) // extra symmetry
Martin Reinecke's avatar
Martin Reinecke committed
772
        {
Martin Reinecke's avatar
Martin Reinecke committed
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
        double p0 = -0.5*nx_dirty*psx;
#pragma omp parallel for num_threads(nthreads) schedule(dynamic,10)
        for (size_t i=0; i<=nx_dirty/2; ++i)
          {
          double fx = p0+i*psx;
          fx *= fx;
          for (size_t j=0; j<=i; ++j)
            {
            double fy = p0+j*psy;
            auto ws = complex<T>(wscreen(fx, fy*fy, w, adjoint, divide_by_n));
            dirty2(i,j) = dirty(i,j)*ws; // lower left
            if (j!=i) dirty2(j,i) = dirty(j,i)*ws; // lower left
            size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
            if ((i>0)&&(i<i2))
              {
              dirty2(i2,j) = dirty(i2,j)*ws; // lower right
              if (i!=j) dirty2(j,i2) = dirty(j,i2)*ws; // lower right
              if ((j>0)&&(j<j2))
                {
                dirty2(i2,j2) = dirty(i2,j2)*ws; // upper right
                if (i!=j) dirty2(j2,i2) = dirty(j2,i2)*ws; // upper right
                }
              }
            if ((j>0)&&(j<j2))
              {
              dirty2(i,j2) = dirty(i,j2)*ws; // upper left
              if (i!=j) dirty2(j2,i) = dirty(j2,i)*ws; // upper left
              }
            }
          }
        }
      else
#endif
        {
        double x0 = -0.5*nx_dirty*psx,
               y0 = -0.5*ny_dirty*psy;
#pragma omp parallel for num_threads(nthreads) schedule(static)
        for (size_t i=0; i<=nx_dirty/2; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
811
          {
Martin Reinecke's avatar
Martin Reinecke committed
812
813
814
          double fx = x0+i*psx;
          fx *= fx;
          for (size_t j=0; j<=ny_dirty/2; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
815
            {
Martin Reinecke's avatar
Martin Reinecke committed
816
817
818
819
820
821
822
823
824
825
            double fy = y0+j*psy;
            auto ws = complex<T>(wscreen(fx, fy*fy, w, adjoint, divide_by_n));
            dirty2(i,j) = dirty(i,j)*ws; // lower left
            size_t i2 = nx_dirty-i, j2 = ny_dirty-j;
            if ((i>0)&&(i<i2))
              {
              dirty2(i2,j) = dirty(i2,j)*ws; // lower right
              if ((j>0)&&(j<j2))
                dirty2(i2,j2) = dirty(i2,j2)*ws; // upper right
              }
Martin Reinecke's avatar
Martin Reinecke committed
826
            if ((j>0)&&(j<j2))
Martin Reinecke's avatar
Martin Reinecke committed
827
              dirty2(i,j2) = dirty(i,j2)*ws; // upper left
Martin Reinecke's avatar
Martin Reinecke committed
828
829
830
831
832
833
834
835
            }
          }
        }
      }
  };

constexpr int logsquare=4;

Martin Reinecke's avatar
Martin Reinecke committed
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
#ifdef _OPENMP
class Lock
  {
  private:
    omp_lock_t lck;

    Lock(const Lock &) = delete;
    Lock& operator=(const Lock &) = delete;

  public:
    Lock() { omp_init_lock(&lck); }
    ~Lock() { omp_destroy_lock(&lck); }
    void lock() { omp_set_lock(&lck); }
    void unlock() { omp_unset_lock(&lck); }
  };
Martin Reinecke's avatar
Martin Reinecke committed
851
852
853
int my_max_threads() { return omp_get_max_threads(); }
int my_num_threads() { return omp_get_num_threads(); }
int my_thread_num() { return omp_get_thread_num(); }
Martin Reinecke's avatar
Martin Reinecke committed
854
855
856
857
858
859
860
#else
class Lock
  {
  public:
    Lock() {}
    ~Lock() {}
    void lock() {}
Martin Reinecke's avatar
Martin Reinecke committed
861
    void unlock() {}
Martin Reinecke's avatar
Martin Reinecke committed
862
  };
Martin Reinecke's avatar
Martin Reinecke committed
863
864
865
int my_max_threads() { return 1; }
int my_num_threads() { return 1; }
int my_thread_num() { return 0; }
Martin Reinecke's avatar
Martin Reinecke committed
866
867
#endif

Martin Reinecke's avatar
Martin Reinecke committed
868
template<typename T, typename T2=complex<T>> class Helper
Martin Reinecke's avatar
Martin Reinecke committed
869
870
  {
  private:
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
871
    const GridderConfig &gconf;
Martin Reinecke's avatar
Martin Reinecke committed
872
873
874
875
876
877
878
879
    int nu, nv, nsafe, supp;
    T beta;
    const T2 *grid_r;
    T2 *grid_w;
    int su, sv;
    int iu0, iv0; // start index of the current visibility
    int bu0, bv0; // start index of the current buffer

Martin Reinecke's avatar
Martin Reinecke committed
880
    vector<T2> rbuf, wbuf;
Martin Reinecke's avatar
Martin Reinecke committed
881
    bool do_w_gridding;
Martin Reinecke's avatar
Martin Reinecke committed
882
    double w0, xdw;
Martin Reinecke's avatar
Martin Reinecke committed
883
    size_t nexp;
884
    size_t nvecs;
Martin Reinecke's avatar
Martin Reinecke committed
885
    vector<Lock> &locks;
Martin Reinecke's avatar
Martin Reinecke committed
886
887
888
889
890
891
892
893
894
895

    void dump() const
      {
      if (bu0<-nsafe) return; // nothing written into buffer yet

      int idxu = (bu0+nu)%nu;
      int idxv0 = (bv0+nv)%nv;
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
Martin Reinecke's avatar
Martin Reinecke committed
896
        locks[idxu].lock();
Martin Reinecke's avatar
Martin Reinecke committed
897
898
899
900
901
        for (int iv=0; iv<sv; ++iv)
          {
          grid_w[idxu*nv + idxv] += wbuf[iu*sv + iv];
          if (++idxv>=nv) idxv=0;
          }
Martin Reinecke's avatar
Martin Reinecke committed
902
        locks[idxu].unlock();
Martin Reinecke's avatar
Martin Reinecke committed
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
        if (++idxu>=nu) idxu=0;
        }
      }

    void load()
      {
      int idxu = (bu0+nu)%nu;
      int idxv0 = (bv0+nv)%nv;
      for (int iu=0; iu<su; ++iu)
        {
        int idxv = idxv0;
        for (int iv=0; iv<sv; ++iv)
          {
          rbuf[iu*sv + iv] = grid_r[idxu*nv + idxv];
          if (++idxv>=nv) idxv=0;
          }
        if (++idxu>=nu) idxu=0;
        }
      }

  public:
    const T2 *p0r;
    T2 *p0w;
Martin Reinecke's avatar
Martin Reinecke committed
926
    T kernel[64] ALIGNED(64);
Martin Reinecke's avatar
Martin Reinecke committed
927

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
928
    Helper(const GridderConfig &gconf_, const T2 *grid_r_, T2 *grid_w_,
Martin Reinecke's avatar
merge  
Martin Reinecke committed
929
      vector<Lock> &locks_, double w0_=-1, double dw_=-1)
Martin Reinecke's avatar
Martin Reinecke committed
930
      : gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
Martin Reinecke's avatar
Martin Reinecke committed
931
        supp(gconf.Supp()), beta(T(gconf.Beta())), grid_r(grid_r_),
Martin Reinecke's avatar
Martin Reinecke committed
932
933
934
935
        grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
        bu0(-1000000), bv0(-1000000),
        rbuf(su*sv*(grid_r!=nullptr),T(0)),
        wbuf(su*sv*(grid_w!=nullptr),T(0)),
Martin Reinecke's avatar
Martin Reinecke committed
936
937
938
939
        do_w_gridding(dw_>0),
        w0(w0_),
        xdw(T(1)/dw_),
        nexp(2*supp + do_w_gridding),
Martin Reinecke's avatar
Martin Reinecke committed
940
941
        nvecs(VLEN<T>::val*((nexp+VLEN<T>::val-1)/VLEN<T>::val)),
        locks(locks_)
Martin Reinecke's avatar
Martin Reinecke committed
942
943
944
945
      {}
    ~Helper() { if (grid_w) dump(); }

    int lineJump() const { return sv; }
Martin Reinecke's avatar
Martin Reinecke committed
946
    T Wfac() const { return kernel[2*supp]; }
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
947
    void prep(const UVW &in)
Martin Reinecke's avatar
Martin Reinecke committed
948
      {
Martin Reinecke's avatar
Martin Reinecke committed
949
950
951
952
953
      double u, v;
      gconf.getpix(in.u, in.v, u, v, iu0, iv0);
      double xsupp=2./supp;
      double x0 = xsupp*(iu0-u);
      double y0 = xsupp*(iv0-v);
Martin Reinecke's avatar
Martin Reinecke committed
954
955
      for (int i=0; i<supp; ++i)
        {
Martin Reinecke's avatar
Martin Reinecke committed
956
957
        kernel[i  ] = T(x0+i*xsupp);
        kernel[i+supp] = T(y0+i*xsupp);
Martin Reinecke's avatar
Martin Reinecke committed
958
        }
Martin Reinecke's avatar
Martin Reinecke committed
959
      if (do_w_gridding)
Martin Reinecke's avatar
Martin Reinecke committed
960
        kernel[2*supp] = min(T(1), T(xdw*xsupp*abs(w0-in.w)));
Martin Reinecke's avatar
Martin Reinecke committed
961
      for (size_t i=nexp; i<nvecs; ++i)
962
963
        kernel[i]=0;
      for (size_t i=0; i<nvecs; ++i)
Martin Reinecke's avatar
Martin Reinecke committed
964
        kernel[i] = exp(beta*(sqrt(T(1)-kernel[i]*kernel[i])-T(1)));
Martin Reinecke's avatar
Martin Reinecke committed
965
966
967
968
969
970
971
972
973
974
975
976
      if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
        {
        if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); }
        bu0=((((iu0+nsafe)>>logsquare)<<logsquare))-nsafe;
        bv0=((((iv0+nsafe)>>logsquare)<<logsquare))-nsafe;
        if (grid_r) load();
        }
      p0r = grid_r ? rbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
      p0w = grid_w ? wbuf.data() + sv*(iu0-bu0) + iv0-bv0 : nullptr;
      }
  };

Martin Reinecke's avatar
Martin Reinecke committed
977
978
979
980
template<class T, class Serv> class SubServ
  {
  private:
    const Serv &srv;
981
    const_mav<idx_t,1> subidx;
Martin Reinecke's avatar
Martin Reinecke committed
982
983

  public:
984
    SubServ(const Serv &orig, const const_mav<idx_t,1> &subidx_)
Martin Reinecke's avatar
Martin Reinecke committed
985
986
987
988
989
990
991
      : srv(orig), subidx(subidx_){}
    size_t Nvis() const { return subidx.size(); }
    const Baselines &getBaselines() const { return srv.getBaselines(); }
    UVW getCoord(size_t i) const
      { return srv.getCoord(subidx[i]); }
    complex<T> getVis(size_t i) const
      { return srv.getVis(subidx[i]); }
992
    idx_t getIdx(size_t i) const { return srv.getIdx(subidx[i]); }
Martin Reinecke's avatar
Martin Reinecke committed
993
994
995
996
997
998
    void setVis (size_t i, const complex<T> &v) const
      { srv.setVis(subidx[i], v); }
    void addVis (size_t i, const complex<T> &v) const
      { srv.addVis(subidx[i], v); }
  };

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
999
template<class T, class T2> class VisServ
Martin Reinecke's avatar
updates  
Martin Reinecke committed
1000
  {
1001
  private:
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
1002
    const Baselines &baselines;
1003
    const_mav<idx_t,1> idx;
1004
    T2 vis;
1005
    const_mav<T,1> wgt;
1006
1007
    size_t nvis;
    bool have_wgt;
Martin Reinecke's avatar
updates  
Martin Reinecke committed
1008

1009
  public:
Martin Reinecke's avatar
Martin Reinecke committed
1010
1011
    using Tsub = SubServ<T, VisServ>;

Martin Reinecke's avatar
step 1  
Martin Reinecke committed
1012
    VisServ(const Baselines &baselines_,
1013
      const const_mav<idx_t,1> &idx_, T2 vis_, const const_mav<T,1> &wgt_)
Martin Reinecke's avatar
Martin Reinecke committed
1014
1015
      : baselines(baselines_), idx(idx_), vis(vis_), wgt(wgt_),
        nvis(vis.shape(0)), have_wgt(wgt.size()!=0)
1016
1017
1018
1019
      {
      checkShape(idx.shape(), {nvis});
      if (have_wgt) checkShape(wgt.shape(), {nvis});
      }
Martin Reinecke's avatar
Martin Reinecke committed
1020
1021
    Tsub getSubserv(const const_mav<idx_t,1> &subidx) const
      { return Tsub(*this, subidx); }
1022
    size_t Nvis() const { return nvis; }
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
1023
1024
    const Baselines &getBaselines() const { return baselines; }
    UVW getCoord(size_t i) const
1025
1026
1027
      { return baselines.effectiveCoord(idx[i]); }
    complex<T> getVis(size_t i) const
      { return have_wgt ? vis(i)*wgt(i) : vis(i); }
1028
    idx_t getIdx(size_t i) const { return idx[i]; }
1029
1030
1031
1032
1033
    void setVis (size_t i, const complex<T> &v) const
      { vis(i) = have_wgt ? v*wgt(i) : v; }
    void addVis (size_t i, const complex<T> &v) const
      { vis(i) += have_wgt ? v*wgt(i) : v; }
  };
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
1034
1035
template<class T, class T2> VisServ<T, T2> makeVisServ
  (const Baselines &baselines,
1036
   const const_mav<idx_t,1> &idx, const T2 &vis, const const_mav<T,1> &wgt)
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
1037
1038
1039
  { return VisServ<T, T2>(baselines, idx, vis, wgt); }

template<class T, class T2> class MsServ
1040
1041
  {
  private:
Martin Reinecke's avatar
step 1  
Martin Reinecke committed
1042
    const Baselines &baselines;