nifty_gridder.cc 12.2 KB
Newer Older
Martin Reinecke's avatar
import  
Martin Reinecke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <iostream>
#include <algorithm>

using namespace std;

namespace py = pybind11;

namespace {

constexpr double pi = 3.141592653589793238462643383279502884197;

static const uint16_t utab[] = {
#define Z(a) 0x##a##0, 0x##a##1, 0x##a##4, 0x##a##5
#define Y(a) Z(a##0), Z(a##1), Z(a##4), Z(a##5)
#define X(a) Y(a##0), Y(a##1), Y(a##4), Y(a##5)
X(0),X(1),X(4),X(5)
#undef X
#undef Y
#undef Z
};

uint32_t coord2morton2D_32 (uint32_t x, uint32_t y)
  {
  typedef uint32_t I;
  return  (I)(utab[x&0xff])     | ((I)(utab[(x>>8)&0xff])<<16)
       | ((I)(utab[y&0xff])<<1) | ((I)(utab[(y>>8)&0xff])<<17);
  }

static const uint8_t m2p2D_1[4][4] = {
{ 4, 1, 11, 2},{0,15, 5, 6},{10,9,3,12},{14,7,13,8}};
static uint8_t m2p2D_3[4][64];
static const uint8_t p2m2D_1[4][4] = {
{ 4, 1, 3, 10},{0,6,7,13},{15,9,8,2},{11,14,12,5}};
static uint8_t p2m2D_3[4][64];
static int peano2d_done=0;

static void init_peano2d (void)
  {
  peano2d_done=1;

  for (int d=0; d<4; ++d)
    for (uint32_t p=0; p<64; ++p)
      {
      unsigned rot = d;
      uint32_t v = p<<26;
      uint32_t res = 0;
      for (int i=0; i<3; ++i)
        {
        unsigned tab=m2p2D_1[rot][v>>30];
        v<<=2;
        res = (res<<2) | (tab&0x3);
        rot = tab>>2;
        }
      m2p2D_3[d][p]=res|(rot<<6);
      }
  for (int d=0; d<4; ++d)
    for (uint32_t p=0; p<64; ++p)
      {
      unsigned rot = d;
      uint32_t v = p<<26;
      uint32_t res = 0;
      for (int i=0; i<3; ++i)
        {
        unsigned tab=p2m2D_1[rot][v>>30];
        v<<=2;
        res = (res<<2) | (tab&0x3);
        rot = tab>>2;
        }
      p2m2D_3[d][p]=res|(rot<<6);
      }
  }

uint32_t morton2peano2D_32(uint32_t v, int bits)
  {
  if (!peano2d_done) init_peano2d();
  unsigned rot = 0;
  uint32_t res = 0;
  v<<=32-(bits<<1);
  int i=0;
  for (; i<bits-2; i+=3)
    {
    unsigned tab=m2p2D_3[rot][v>>26];
    v<<=6;
    res = (res<<6) | (tab&0x3fu);
    rot = tab>>6;
    }
  for (; i<bits; ++i)
    {
    unsigned tab=m2p2D_1[rot][v>>30];
    v<<=2;
    res = (res<<2) | (tab&0x3);
    rot = tab>>2;
    }
  return res;
  }

void myassert(bool cond, const char *msg)
  {
  if (cond) return;
  cerr << msg << endl;
  throw 42;
  }

template<typename It, typename Comp> class IdxComp__
  {
  private:
    It begin;
    Comp comp;
  public:
    IdxComp__ (It begin_, Comp comp_): begin(begin_), comp(comp_) {}
    bool operator() (std::size_t a, std::size_t b) const
      { return comp(*(begin+a),*(begin+b)); }
  };
/*! Performs an indirect sort on the supplied iterator range and returns in
    \a idx a \a vector containing the indices of the smallest, second smallest,
    third smallest, etc. element, according to \a comp. */
template<typename It, typename T2, typename Comp>
  inline void buildIndex (It begin, It end, std::vector<T2> &idx, Comp comp)
  {
  using namespace std;
  T2 num=end-begin;
  idx.resize(num);
  for (T2 i=0; i<num; ++i) idx[i] = i;
  sort (idx.begin(),idx.end(),IdxComp__<It,Comp>(begin,comp));
  }

/*! Performs an indirect sort on the supplied iterator range and returns in
    \a idx a \a vector containing the indices of the smallest, second smallest,
    third smallest, etc. element. */
template<typename It, typename T2> inline void buildIndex (It begin, It end,
  std::vector<T2> &idx)
  {
  using namespace std;
  typedef typename iterator_traits<It>::value_type T;
  buildIndex(begin,end,idx,less<T>());
  }

140
141
142
143
144
145
146
147
148
149
150
151
/*! Returns the remainder of the division \a v1/v2.
    The result is non-negative.
    \a v1 can be positive or negative; \a v2 must be positive. */
inline double fmodulo (double v1, double v2)
  {
  if (v1>=0)
    return (v1<v2) ? v1 : fmod(v1,v2);
  double tmp=fmod(v1,v2)+v2;
  return (tmp==v2) ? 0. : tmp;
//  return (v1>=0) ? ((v1<v2) ? v1 : fmod(v1,v2)) : (fmod(v1,v2)+v2);
  }

Martin Reinecke's avatar
import  
Martin Reinecke committed
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
using a_i_c = py::array_t<int, py::array::c_style | py::array::forcecast>;
using a_d_c = py::array_t<double, py::array::c_style | py::array::forcecast>;
using a_c_c = py::array_t<complex<double>,
  py::array::c_style | py::array::forcecast>;

a_i_c peanoindex(const a_d_c &uv_, int nu, int nv)
  {
  myassert(uv_.ndim()==2, "uv array must be 2D");
  myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
  int nvis = uv_.shape(0);
  auto uv = uv_.data();

  int npmax = max(nu, nv);
  int nbits = 0;
  for (int istart = npmax-1; istart!=0; istart>>=1, ++nbits);
  vector<int> ipeano(nvis);
  for (int i=0; i<nvis; ++i)
    {
170
    auto u = fmodulo(uv[2*i], 1.)*nu;
Martin Reinecke's avatar
import  
Martin Reinecke committed
171
    auto iu = min(nu-1, int(u));
172
    auto v = fmodulo(uv[2*i+1], 1.)*nv;
Martin Reinecke's avatar
import  
Martin Reinecke committed
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
    auto iv = min(nv-1, int(v));
    ipeano[i] = morton2peano2D_32(coord2morton2D_32(iu,iv),nbits);
    }
  vector<int> newind;
  buildIndex(ipeano.begin(), ipeano.end(), newind);
  int odim[] = {nvis};
  a_i_c res(odim);
  auto iout = res.mutable_data();
  for (int i=0; i<nvis; ++i)
    iout[i] = newind[i];
  return res;
  }

class Helper
  {
  private:
    int nu, nv, nspread, nbuf;
    double r2lamb, fac;
    vector<double> kernel;

  public:
    vector<double> ku, kv;
    int iu, iv, idxu0, idxv0;
    complex<double> val;

    Helper(int nu_, int nv_, int nspread_, double r2lamb_)
      : nu(nu_), nv(nv_), nspread(nspread_), nbuf(2*nspread_), r2lamb(r2lamb_),
        fac(pi/r2lamb), kernel(nspread+1), ku(nbuf), kv(nbuf)
      {
      // Precompute gridding kernel
      for (size_t i=0; i<kernel.size(); ++i)
        kernel[i] = exp(-pi/r2lamb*i*i);
      }
    void update(double u_in, double v_in, complex<double> vis)
      {
208
      auto u = fmodulo(u_in, 1.)*nu;
Martin Reinecke's avatar
import  
Martin Reinecke committed
209
210
211
      iu = min(nu-1, int(u));
      auto du = u-iu;
      idxu0 = (iu-nspread+1+nu)%nu;
212
      auto v = fmodulo(v_in, 1.)*nv;
Martin Reinecke's avatar
import  
Martin Reinecke committed
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
      iv = min(nv-1, int(v));
      auto dv = v-iv;
      idxv0 = (iv-nspread+1+nv)%nv;

      val = vis*exp(-fac*(du*du + dv*dv));

      auto fu0 = exp(2*fac*du);
      auto fv0 = exp(2*fac*dv);
      auto fu = 1.;
      auto fv = 1.;
      for (int i=0; i<nspread; ++i)
        {
        ku[nspread-i-1] = kernel[i]/fu;
        kv[nspread-i-1] = kernel[i]/fv;
        fu *= fu0;
        fv *= fv0;
        ku[nspread+i] = kernel[i+1]*fu;
        kv[nspread+i] = kernel[i+1]*fv;
        }
      }
  };


class Buffer
  {
  protected:
    int nu, nv, nspread, su;
  public:
    int sv;
  protected:
    int u0, v0;

    bool need_to_move(int iu, int iv) const
      {
      return (abs(iu-u0)>su-nspread) || (abs(iv-v0)>sv-nspread);
      }

    void update_position(int iu, int iv)
      {
      int safe_u = su-nspread, safe_v = sv-nspread;
      u0=max(safe_u, min(nu-1-safe_u, iu));
      v0=max(safe_v, min(nv-1-safe_v, iv));
      }

  public:
    Buffer(int nu_, int nv_, int nspread_)
      : nu(nu_), nv(nv_), nspread(nspread_),
        su(nspread+min(nspread, nu)), sv(nspread+min(nspread, nv)),
        u0(-1000000), v0(-1000000)
      {}
  };

class WriteBuffer: public Buffer
  {
  private:
    vector<complex<double>> data;
    complex<double> *grid;

    void dump()
      {
      if (u0<0) return;
#pragma omp critical
{
      int idxu = (u0-su+1+nu)%nu;
      int idxv0 = (v0-sv+1+nv)%nv;
      for (int iu=0; iu<2*su; ++iu)
        {
        int idxv = idxv0;
        for (int iv=0; iv<2*sv; ++iv)
          {
          grid[idxu*nv + idxv] += data[iu*2*sv + iv];
          if (++idxv>=nv) idxv=0;
          }
        if (++idxu>=nu) idxu=0;
        }
}
      }

  public:
    complex<double> *p0;
    WriteBuffer(int nu_, int nv_, int nspread_, complex<double> *grid_)
      : Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {}
    ~WriteBuffer() { dump(); }

    void prep_write(int iu, int iv)
    /* iu = [0; nu-1]; iv = [0; nv-1] */
      {
      if (need_to_move(iu, iv))
        {
        dump();
        update_position(iu, iv);
        fill(data.begin(), data.end(), 0.);
        }
      p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread;
      }
  };

class ReadBuffer: public Buffer
  {
  private:
    vector<complex<double>> data;
    const complex<double> *grid;

    void load()
      {
      int idxu = (u0-su+1+nu)%nu;
      int idxv0 = (v0-sv+1+nv)%nv;
      for (int iu=0; iu<2*su; ++iu)
        {
        int idxv = idxv0;
        for (int iv=0; iv<2*sv; ++iv)
          {
          data[iu*2*sv + iv] = grid[idxu*nv + idxv];
          if (++idxv>=nv) idxv=0;
          }
        if (++idxu>=nu) idxu=0;
        }
      }

  public:
    const complex<double> *p0;
    ReadBuffer(int nu_, int nv_, int nspread_, const complex<double> *grid_)
      : Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {}

    void prep_read(int iu, int iv)
    /* iu = [0; nu-1]; iv = [0; nv-1] */
      {
      if (need_to_move(iu, iv))
        {
        update_position(iu, iv);
        load();
        }
      p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread;
      }
  };

a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
               int nu, int nv, int nspread, double r2lamb)
  {
  myassert(uv_.ndim()==2, "uv array must be 2D");
  myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
  int nvis = uv_.shape(0);
  myassert(vis_.ndim()==1, "vis array must be 1D");
  myassert(vis_.shape(0)==nvis, "array size mismatch");
  auto uv = uv_.data();
  auto vis = vis_.data();

  int odim[] = {nu,nv};
  a_c_c res(odim);
  auto grid = res.mutable_data();
  for (int i=0; i<nu*nv; ++i) grid[i] = 0.;

#pragma omp parallel
{
  Helper hlp(nu, nv, nspread, r2lamb);
  WriteBuffer buf(nu, nv, nspread, grid);

  // Loop over sampling points
#pragma omp for schedule(dynamic,10000)
  for (int ipart=0; ipart<nvis; ++ipart)
    {
    hlp.update(uv[2*ipart], uv[2*ipart+1], vis[ipart]);
    buf.prep_write(hlp.iu, hlp.iv);
    for (int cu=0; cu<2*nspread; ++cu)
      {
      complex<double> tmp = hlp.val*hlp.ku[cu];
      for (int cv=0; cv<2*nspread; ++cv)
        buf.p0[cu*2*buf.sv + cv] += tmp*hlp.kv[cv];
      }
    }
} // end of parallel region
  return res;
  }

Martin Reinecke's avatar
Martin Reinecke committed
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
a_d_c to_grid_post (const a_c_c &grid_)
  {
  myassert(grid_.ndim()==2, "grid array must be 2D");
  int nu = grid_.shape(0), nv = grid_.shape(1);
  auto grid = grid_.data();

  int odim[] = {nu,nv};
  a_d_c res(odim);
  auto grid2 = res.mutable_data();
  for (int u=0; u<nu; ++u)
    {
    int xu = (u==0) ? 0 : nu-u;
    for (int v=0; v<nv; ++v)
      {
      int xv = (v==0) ? 0 : nv-v;
      int i1 = u*nv+v;
      int i2 = xu*nv+xv;
      grid2[i1] = 0.5*(grid[i1].real()+grid[i1].imag()+
                       grid[i2].real()-grid[i2].imag());
      }
    }
  return res;
  }

Martin Reinecke's avatar
import  
Martin Reinecke committed
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
  int nu, int nv, int nspread, double r2lamb)
  {
  myassert(uv_.ndim()==2, "uv array must be 2D");
  myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
  myassert(grid_.ndim()==2, "grid array must be 2D");
  int nvis = uv_.shape(0);
  auto uv = uv_.data();
  auto grid = grid_.data();

  myassert(nu==grid_.shape(0), "oops");
  myassert(nv==grid_.shape(1), "oops");

  int odim[] = {nvis};
  a_c_c res(odim);
  auto vis = res.mutable_data();

  // Loop over sampling points
#pragma omp parallel
{
  Helper hlp(nu, nv, nspread, r2lamb);
  ReadBuffer buf(nu, nv, nspread, grid);
#pragma omp for schedule(dynamic,10000)
  for (int ipart=0; ipart<nvis; ++ipart)
    {
    hlp.update(uv[2*ipart], uv[2*ipart+1], 1.);
    complex<double> r = 0.;
    buf.prep_read(hlp.iu, hlp.iv);
    for (int cu=0; cu<2*nspread; ++cu)
      {
      complex<double> tmp = 0.;
      for (int cv=0; cv<2*nspread; ++cv)
        tmp += buf.p0[cu*2*buf.sv + cv]*hlp.kv[cv];
      r+=tmp*hlp.ku[cu];
      }
    vis[ipart] = r*hlp.val;
    }
}
  return res;
  }

Martin Reinecke's avatar
Martin Reinecke committed
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
a_c_c from_grid_pre (const a_d_c &grid_)
  {
  myassert(grid_.ndim()==2, "grid array must be 2D");
  int nu = grid_.shape(0), nv = grid_.shape(1);
  auto grid = grid_.data();

  int odim[] = {nu,nv};
  a_c_c res(odim);
  auto grid2 = res.mutable_data();
  for (int u=0; u<nu; ++u)
    {
    int xu = (u==0) ? 0 : nu-u;
    for (int v=0; v<nv; ++v)
      {
      int xv = (v==0) ? 0 : nv-v;
      int i1 = u*nv+v;
      int i2 = xu*nv+xv;
      double v1 = 0.5*grid[i1];
      double v2 = 0.5*grid[i2];
      grid2[i1] = complex<double>(v1+v2, v1-v2);
      }
    }
  return res;
  }

Martin Reinecke's avatar
import  
Martin Reinecke committed
477
478
479
480
481
482
483
484
} // unnamed namespace

PYBIND11_MODULE(nifty_gridder, m)
  {
  using namespace pybind11::literals;

  m.def("peanoindex",&peanoindex);
  m.def("to_grid",&to_grid);
Martin Reinecke's avatar
Martin Reinecke committed
485
  m.def("to_grid_post",&to_grid_post);
Martin Reinecke's avatar
import  
Martin Reinecke committed
486
  m.def("from_grid",&from_grid);
Martin Reinecke's avatar
Martin Reinecke committed
487
  m.def("from_grid_pre",&from_grid_pre);
Martin Reinecke's avatar
import  
Martin Reinecke committed
488
  }