totalconvolve.h 32.5 KB
Newer Older
1
2
3
4
5
/*
 *  Copyright (C) 2020 Max-Planck-Society
 *  Author: Martin Reinecke
 */

Martin Reinecke's avatar
Martin Reinecke committed
6
7
8
#ifndef MRUTIL_INTERPOL_NG_H
#define MRUTIL_INTERPOL_NG_H

9
#define SIMD_INTERPOL
Martin Reinecke's avatar
Martin Reinecke committed
10
#define SPECIAL_CASING
11

12
13
#include <vector>
#include <complex>
Martin Reinecke's avatar
Martin Reinecke committed
14
#include <cmath>
15
16
17
18
#include "mr_util/math/constants.h"
#include "mr_util/math/gl_integrator.h"
#include "mr_util/math/es_kernel.h"
#include "mr_util/infra/mav.h"
19
#include "mr_util/infra/simd.h"
20
21
22
#include "mr_util/sharp/sharp.h"
#include "mr_util/sharp/sharp_almhelpers.h"
#include "mr_util/sharp/sharp_geomhelpers.h"
23
#include "python/alm.h"
24
#include "mr_util/math/fft.h"
Martin Reinecke's avatar
Martin Reinecke committed
25

Martin Reinecke's avatar
Martin Reinecke committed
26
namespace mr {
27

Martin Reinecke's avatar
Martin Reinecke committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
namespace detail_fft {

using std::vector;

template<typename T, typename T0> aligned_array<T> alloc_tmp_conv
  (const fmav_info &info, size_t axis, size_t len)
  {
  auto othersize = info.size()/info.shape(axis);
  constexpr auto vlen = native_simd<T0>::size();
  auto tmpsize = len*((othersize>=vlen) ? vlen : 1);
  return aligned_array<T>(tmpsize);
  }

template<typename Tplan, typename T, typename T0, typename Exec>
MRUTIL_NOINLINE void general_convolve(const fmav<T> &in, fmav<T> &out,
  const size_t axis, const vector<T0> &kernel, size_t nthreads,
Martin Reinecke's avatar
Martin Reinecke committed
44
  const Exec &exec)
Martin Reinecke's avatar
Martin Reinecke committed
45
  {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
46
  std::unique_ptr<Tplan> plan1, plan2;
Martin Reinecke's avatar
Martin Reinecke committed
47
48
49
50

  size_t l_in=in.shape(axis), l_out=out.shape(axis);
  size_t l_min=std::min(l_in, l_out), l_max=std::max(l_in, l_out);
  MR_assert(kernel.size()==l_min/2+1, "bad kernel size");
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
51
52
  plan1 = std::make_unique<Tplan>(l_in);
  plan2 = std::make_unique<Tplan>(l_out);
Martin Reinecke's avatar
Martin Reinecke committed
53
54
55
56
57

  execParallel(
    util::thread_count(nthreads, in, axis, native_simd<T0>::size()),
    [&](Scheduler &sched) {
      constexpr auto vlen = native_simd<T0>::size();
Martin Reinecke's avatar
Martin Reinecke committed
58
      auto storage = alloc_tmp_conv<T,T0>(in, axis, l_max);
Martin Reinecke's avatar
Martin Reinecke committed
59
60
61
62
63
64
65
66
67
68
69
70
71
      multi_iter<vlen> it(in, out, axis, sched.num_threads(), sched.thread_num());
#ifndef MRUTIL_NO_SIMD
      if (vlen>1)
        while (it.remaining()>=vlen)
          {
          it.advance(vlen);
          auto tdatav = reinterpret_cast<add_vec_t<T> *>(storage.data());
          exec(it, in, out, tdatav, *plan1, *plan2, kernel);
          }
#endif
      while (it.remaining()>0)
        {
        it.advance(1);
Martin Reinecke's avatar
Martin Reinecke committed
72
        auto buf = reinterpret_cast<T *>(storage.data());
Martin Reinecke's avatar
Martin Reinecke committed
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
        exec(it, in, out, buf, *plan1, *plan2, kernel);
        }
    });  // end of parallel region
  }

struct ExecConvR1
  {
  template <typename T0, typename T, size_t vlen> void operator() (
    const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out,
    T * buf, const pocketfft_r<T0> &plan1, const pocketfft_r<T0> &plan2,
    const vector<T0> &kernel) const
    {
    size_t l_in = plan1.length(),
           l_out = plan2.length(),
           l_min = std::min(l_in, l_out);
    copy_input(it, in, buf);
    plan1.exec(buf, T0(1), true);
Martin Reinecke's avatar
Martin Reinecke committed
90
    for (size_t i=0; i<l_min; ++i) buf[i]*=kernel[(i+1)/2];
Martin Reinecke's avatar
Martin Reinecke committed
91
92
93
94
95
96
97
98
99
100
101
102
    for (size_t i=l_in; i<l_out; ++i) buf[i] = T(0);
    plan2.exec(buf, T0(1), false);
    copy_output(it, buf, out);
    }
  };

template<typename T> void convolve_1d(const fmav<T> &in,
  fmav<T> &out, size_t axis, const vector<T> &kernel, size_t nthreads=1)
  {
  MR_assert(axis<in.ndim(), "bad axis number");
  MR_assert(in.ndim()==out.ndim(), "dimensionality mismatch");
  if (in.data()==out.data())
Martin Reinecke's avatar
Martin Reinecke committed
103
    MR_assert(in.stride()==out.stride(), "strides mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
104
105
106
  for (size_t i=0; i<in.ndim(); ++i)
    if (i!=axis)
      MR_assert(in.shape(i)==out.shape(i), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
107
108
  MR_assert(!((in.shape(axis)&1) || (out.shape(axis)&1)),
    "input and output axis lengths must be even");
Martin Reinecke's avatar
Martin Reinecke committed
109
110
111
112
113
114
  if (in.size()==0) return;
  general_convolve<pocketfft_r<T>>(in, out, axis, kernel, nthreads,
    ExecConvR1());
  }

}
Martin Reinecke's avatar
Martin Reinecke committed
115
116
117

using detail_fft::convolve_1d;

118
namespace detail_totalconvolve {
119

Martin Reinecke's avatar
Martin Reinecke committed
120
using namespace std;
121
122
123
124

template<typename T> class Interpolator
  {
  protected:
125
    bool adjoint;
Martin Reinecke's avatar
Martin Reinecke committed
126
    size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta;
Martin Reinecke's avatar
Martin Reinecke committed
127
    int nthreads;
128
    T ofactor;
Martin Reinecke's avatar
fix    
Martin Reinecke committed
129
    size_t supp;
130
    ES_Kernel kernel;
131
    size_t ncomp;
132
133
134
#ifdef SIMD_INTERPOL
    mav<native_simd<T>,4> scube;
#endif
135
    mav<T,4> cube; // the data cube (theta, phi, 2*mbeam+1, TGC)
136

137
    void correct(mav<T,2> &arr, int spin)
138
      {
139
      T sfct = (spin&1) ? -1 : 1;
Martin Reinecke's avatar
Martin Reinecke committed
140
141
142
143
      mav<T,2> tmp({nphi,nphi0});
      // copy and extend to second half
      for (size_t j=0; j<nphi0; ++j)
        tmp.v(0,j) = arr(0,j);
Martin Reinecke's avatar
Martin Reinecke committed
144
      for (size_t i=1, i2=nphi0-1; i+1<ntheta0; ++i,--i2)
Martin Reinecke's avatar
Martin Reinecke committed
145
        for (size_t j=0,j2=nphi0/2; j<nphi0; ++j,++j2)
146
          {
Martin Reinecke's avatar
Martin Reinecke committed
147
          if (j2>=nphi0) j2-=nphi0;
Martin Reinecke's avatar
Martin Reinecke committed
148
149
          tmp.v(i,j2) = arr(i,j2);
          tmp.v(i2,j) = sfct*tmp(i,j2);
150
          }
Martin Reinecke's avatar
Martin Reinecke committed
151
152
      for (size_t j=0; j<nphi0; ++j)
        tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
Martin Reinecke's avatar
Martin Reinecke committed
153
      auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
154
155
      vector<T> k2(fct.size());
      for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
Martin Reinecke's avatar
Martin Reinecke committed
156
157
      fmav<T> ftmp(tmp);
      fmav<T> ftmp0(tmp.template subarray<2>({0,0},{nphi0, nphi0}));
158
      convolve_1d(ftmp0, ftmp, 0, k2, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
159
      fmav<T> ftmp2(tmp.template subarray<2>({0,0},{ntheta, nphi0}));
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
160
      fmav<T> farr(arr);
161
      convolve_1d(ftmp2, farr, 1, k2, nthreads);
162
      }
163
164
    void decorrect(mav<T,2> &arr, int spin)
      {
165
      T sfct = (spin&1) ? -1 : 1;
Martin Reinecke's avatar
Martin Reinecke committed
166
167
      mav<T,2> tmp({nphi,nphi0});
      auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
168
169
      vector<T> k2(fct.size());
      for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
Martin Reinecke's avatar
Martin Reinecke committed
170
171
      fmav<T> farr(arr);
      fmav<T> ftmp2(tmp.template subarray<2>({0,0},{ntheta, nphi0}));
172
      convolve_1d(farr, ftmp2, 1, k2, nthreads);
173
      // extend to second half
Martin Reinecke's avatar
Martin Reinecke committed
174
      for (size_t i=1, i2=nphi-1; i+1<ntheta; ++i,--i2)
Martin Reinecke's avatar
Martin Reinecke committed
175
        for (size_t j=0,j2=nphi0/2; j<nphi0; ++j,++j2)
176
          {
Martin Reinecke's avatar
Martin Reinecke committed
177
          if (j2>=nphi0) j2-=nphi0;
178
179
          tmp.v(i2,j) = sfct*tmp(i,j2);
          }
Martin Reinecke's avatar
Martin Reinecke committed
180
181
      fmav<T> ftmp(tmp);
      fmav<T> ftmp0(tmp.template subarray<2>({0,0},{nphi0, nphi0}));
182
      convolve_1d(ftmp, ftmp0, 0, k2, nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
183
      for (size_t j=0; j<nphi0; ++j)
184
        arr.v(0,j) = T(0.5)*tmp(0,j);
Martin Reinecke's avatar
Martin Reinecke committed
185
      for (size_t i=1; i+1<ntheta0; ++i)
186
        for (size_t j=0; j<nphi0; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
187
188
          arr.v(i,j) = tmp(i,j);
      for (size_t j=0; j<nphi0; ++j)
189
        arr.v(ntheta0-1,j) = T(0.5)*tmp(ntheta0-1,j);
190
      }
191

Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
192
193
194
195
196
197
198
199
200
201
202
    vector<size_t> getIdx(const mav<T,2> &ptg) const
      {
      vector<size_t> idx(ptg.shape(0));
      constexpr size_t cellsize=16;
      size_t nct = ntheta/cellsize+1,
             ncp = nphi/cellsize+1;
      vector<vector<size_t>> mapper(nct*ncp);
      for (size_t i=0; i<ptg.shape(0); ++i)
        {
        size_t itheta=min(nct-1,size_t(ptg(i,0)/pi*nct)),
               iphi=min(ncp-1,size_t(ptg(i,1)/(2*pi)*ncp));
203
//        MR_assert((itheta<nct)&&(iphi<ncp), "oops");
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
204
205
206
207
208
209
210
211
212
        mapper[itheta*ncp+iphi].push_back(i);
        }
      size_t cnt=0;
      for (const auto &vec: mapper)
        for (auto i:vec)
          idx[cnt++] = i;
      return idx;
      }

213
  public:
214
215
    Interpolator(const vector<Alm<complex<T>>> &slm,
                 const vector<Alm<complex<T>>> &blm,
216
                 bool separate, T epsilon, T ofmin, int nthreads_)
217
      : adjoint(false),
218
219
        lmax(slm.at(0).Lmax()),
        kmax(blm.at(0).Mmax()),
Martin Reinecke's avatar
Martin Reinecke committed
220
221
        nphi0(2*good_size_real(lmax+1)),
        ntheta0(nphi0/2+1),
Martin Reinecke's avatar
Martin Reinecke committed
222
        nphi(std::max<size_t>(20,2*good_size_real(size_t((2*lmax+1)*ofmin/2.)))),
Martin Reinecke's avatar
Martin Reinecke committed
223
        ntheta(nphi/2+1),
Martin Reinecke's avatar
Martin Reinecke committed
224
        nthreads(nthreads_),
225
        ofactor(T(nphi)/(2*lmax+1)),
Martin Reinecke's avatar
fix    
Martin Reinecke committed
226
        supp(ES_Kernel::get_supp(epsilon, ofactor)),
Martin Reinecke's avatar
Martin Reinecke committed
227
        kernel(supp, ofactor, nthreads),
228
        ncomp(separate ? slm.size() : 1),
229
230
231
232
233
234
#ifdef SIMD_INTERPOL
        scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
        cube(reinterpret_cast<T *>(scube.vdata()),{ntheta+2*supp, nphi+2*supp, ncomp, ((2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size())*native_simd<T>::size()},true)
#else
        cube({ntheta+2*supp, nphi+2*supp, ncomp, 2*kmax+1})
#endif
235
      {
236
      MR_assert((ncomp==1)||(ncomp==3), "currently only 1 or 3 components allowed");
237
238
239
240
241
242
243
244
245
      MR_assert(slm.size()==blm.size(), "inconsistent slm and blm vectors");
      for (size_t i=0; i<slm.size(); ++i)
        {
        MR_assert(slm[i].Lmax()==lmax, "inconsistent Sky lmax");
        MR_assert(slm[i].Mmax()==lmax, "Sky lmax must be equal to Sky mmax");
        MR_assert(blm[i].Lmax()==lmax, "Sky and beam lmax must be equal");
        MR_assert(blm[i].Mmax()==kmax, "Inconcistent beam mmax");
        }

246
247
      MR_assert((supp<=ntheta) && (supp<=nphi), "support too large!");
      Alm<complex<T>> a1(lmax, lmax), a2(lmax,lmax);
Martin Reinecke's avatar
Martin Reinecke committed
248
      auto ginfo = sharp_make_cc_geom_info(ntheta0,nphi0,0.,cube.stride(1),cube.stride(0));
249
250
      auto ainfo = sharp_make_triangular_alm_info(lmax,lmax,1);

251
      vector<T>lnorm(lmax+1);
252
      for (size_t i=0; i<=lmax; ++i)
253
        lnorm[i]=T(std::sqrt(4*pi/(2*i+1.)));
254

255
      for (size_t icomp=0; icomp<ncomp; ++icomp)
256
257
258
259
        {
        for (size_t m=0; m<=lmax; ++m)
          for (size_t l=m; l<=lmax; ++l)
            {
260
            if (separate)
261
              a1(l,m) = slm[icomp](l,m)*blm[icomp](l,0).real()*lnorm[l];
262
263
            else
              {
264
265
              a1(l,m) = 0;
              for (size_t j=0; j<slm.size(); ++j)
266
                a1(l,m) += slm[j](l,m)*blm[j](l,0).real()*lnorm[l];
267
268
              }
            }
269
        auto m1 = cube.template subarray<2>({supp,supp,icomp,0},{ntheta,nphi,0,0});
270
271
272
273
274
275
276
277
278
279
280
281
282
283
        sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, 0, nthreads);
        correct(m1,0);

        for (size_t k=1; k<=kmax; ++k)
          {
          for (size_t m=0; m<=lmax; ++m)
            for (size_t l=m; l<=lmax; ++l)
              {
              if (l<k)
                a1(l,m)=a2(l,m)=0.;
              else
                {
                if (separate)
                  {
284
                  auto tmp = blm[icomp](l,k)*(-2*lnorm[l]);
285
286
287
288
289
290
291
292
                  a1(l,m) = slm[icomp](l,m)*tmp.real();
                  a2(l,m) = slm[icomp](l,m)*tmp.imag();
                  }
                else
                  {
                  a1(l,m) = a2(l,m) = 0;
                  for (size_t j=0; j<slm.size(); ++j)
                    {
293
                    auto tmp = blm[j](l,k)*(-2*lnorm[l]);
294
295
296
297
                    a1(l,m) += slm[j](l,m)*tmp.real();
                    a2(l,m) += slm[j](l,m)*tmp.imag();
                    }
                  }
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
298
                }
299
              }
300
301
          auto m1 = cube.template subarray<2>({supp,supp,icomp,2*k-1},{ntheta,nphi,0,0});
          auto m2 = cube.template subarray<2>({supp,supp,icomp,2*k  },{ntheta,nphi,0,0});
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
302
303
304
305
          sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(),
            m2.vdata(), *ginfo, *ainfo, 0, nthreads);
          correct(m1,k);
          correct(m2,k);
306
          }
307
        }
308

309
310
311
      // fill border regions
      for (size_t i=0; i<supp; ++i)
        for (size_t j=0, j2=nphi/2; j<nphi; ++j,++j2)
312
          for (size_t k=0; k<cube.shape(3); ++k)
313
            {
314
            T fct = (((k+1)/2)&1) ? -1 : 1;
315
            if (j2>=nphi) j2-=nphi;
316
            for (size_t l=0; l<cube.shape(2); ++l)
317
              {
318
319
              cube.v(supp-1-i,j2+supp,l,k) = fct*cube(supp+1+i,j+supp,l,k);
              cube.v(supp+ntheta+i,j2+supp,l,k) = fct*cube(supp+ntheta-2-i,j+supp,l,k);
320
              }
321
322
323
            }
      for (size_t i=0; i<ntheta+2*supp; ++i)
        for (size_t j=0; j<supp; ++j)
324
325
          for (size_t k=0; k<cube.shape(3); ++k)
            for (size_t l=0; l<cube.shape(2); ++l)
326
            {
327
328
            cube.v(i,j,l,k) = cube(i,j+nphi,l,k);
            cube.v(i,j+nphi+supp,l,k) = cube(i,j+supp,l,k);
329
330
331
            }
      }

332
    Interpolator(size_t lmax_, size_t kmax_, size_t ncomp_, T epsilon, T ofmin, int nthreads_)
333
334
335
336
337
      : adjoint(true),
        lmax(lmax_),
        kmax(kmax_),
        nphi0(2*good_size_real(lmax+1)),
        ntheta0(nphi0/2+1),
Martin Reinecke's avatar
Martin Reinecke committed
338
        nphi(std::max<size_t>(20,2*good_size_real(size_t((2*lmax+1)*ofmin/2.)))),
339
340
        ntheta(nphi/2+1),
        nthreads(nthreads_),
341
        ofactor(T(nphi)/(2*lmax+1)),
342
343
        supp(ES_Kernel::get_supp(epsilon, ofactor)),
        kernel(supp, ofactor, nthreads),
344
        ncomp(ncomp_),
345
346
347
348
349
350
#ifdef SIMD_INTERPOL
        scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
        cube(reinterpret_cast<T *>(scube.vdata()),{ntheta+2*supp, nphi+2*supp, ncomp, ((2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size())*native_simd<T>::size()},true)
#else
        cube({ntheta+2*supp, nphi+2*supp, ncomp, 2*kmax+1})
#endif
351
      {
352
      MR_assert((ncomp==1)||(ncomp==3), "currently only 1 or 3 components allowed");
353
354
355
356
      MR_assert((supp<=ntheta) && (supp<=nphi), "support too large!");
      cube.apply([](T &v){v=0.;});
      }

Martin Reinecke's avatar
Martin Reinecke committed
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
#ifdef SIMD_INTERPOL
    template<size_t nv, size_t nc> void interpol_help0(const T * MRUTIL_RESTRICT wt,
      const T * MRUTIL_RESTRICT wp, const native_simd<T> * MRUTIL_RESTRICT p, size_t d0, size_t d1, const native_simd<T> * MRUTIL_RESTRICT psiarr2, mav<T,2> &res, size_t idx) const
      {
      array<native_simd<T>,nc> vv;
      for (auto &vvv:vv) vvv=0;
      for (size_t j=0; j<supp; ++j, p+=d0)
        {
        auto p1=p;
        T wtj = wt[j];
        for (size_t k=0; k<supp; ++k, p1+=d1)
          {
          array<native_simd<T>,nc> tvv;
          for (auto &vvv:tvv) vvv=0;
          for (size_t l=0; l<nv; ++l)
            for (size_t c=0; c<nc; ++c)
              tvv[c] += p1[l+nv*c]*psiarr2[l];
          for (size_t c=0; c<nc; ++c)
          vv[c] += (wtj*wp[k])*tvv[c];
          }
        }
      for (size_t c=0; c<nc; ++c)
        res.v(idx,c) = reduce(vv[c], std::plus<>());
      }
    template<size_t nv, size_t nc> void deinterpol_help0(const T * MRUTIL_RESTRICT wt,
      const T * MRUTIL_RESTRICT wp, native_simd<T> * MRUTIL_RESTRICT p, size_t d0, size_t d1, const native_simd<T> * MRUTIL_RESTRICT psiarr2, const mav<T,2> &data, size_t idx) const
      {
      array<native_simd<T>,nc> vv;
      for (size_t i=0; i<nc; ++i) vv[i] = data(idx,i);
      for (size_t j=0; j<supp; ++j, p+=d0)
        {
        auto p1=p;
        T wtj = wt[j];
        for (size_t k=0; k<supp; ++k, p1+=d1)
          {
          native_simd<T> wtjwpk = wtj*wp[k];
          for (size_t l=0; l<nv; ++l)
            {
            auto tmp = wtjwpk*psiarr2[l];
            for (size_t c=0; c<nc; ++c)
              p1[l+nv*c] += vv[c]*tmp;
            }
          }
        }
      }
#endif

404
    void interpol (const mav<T,2> &ptg, mav<T,2> &res) const
405
      {
406
407
408
#ifdef SIMD_INTERPOL
      constexpr size_t vl=native_simd<T>::size();
      MR_assert(scube.stride(3)==1, "bad stride");
Martin Reinecke's avatar
Martin Reinecke committed
409
      size_t nv=scube.shape(3);
410
#endif
411
      MR_assert(!adjoint, "cannot be called in adjoint mode");
412
413
      MR_assert(ptg.shape(0)==res.shape(0), "dimension mismatch");
      MR_assert(ptg.shape(1)==3, "second dimension must have length 3");
414
      MR_assert(res.shape(1)==ncomp, "# of components mismatch");
415
416
417
      T delta = T(2)/supp;
      T xdtheta = T((ntheta-1)/pi),
        xdphi = T(nphi/(2*pi));
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
418
      auto idx = getIdx(ptg);
Martin Reinecke's avatar
Martin Reinecke committed
419
      execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched)
420
        {
Martin Reinecke's avatar
Martin Reinecke committed
421
422
        vector<T> wt(supp), wp(supp);
        vector<T> psiarr(2*kmax+1);
423
424
425
426
#ifdef SIMD_INTERPOL
        vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
        for (auto &v:psiarr2) v=0;
#endif
Martin Reinecke's avatar
Martin Reinecke committed
427
        while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
428
          {
Martin Reinecke's avatar
Martin Reinecke committed
429
          size_t i=idx[ind];
430
431
          T f0=T(0.5*supp+ptg(i,0)*xdtheta);
          size_t i0 = size_t(f0+T(1));
Martin Reinecke's avatar
Martin Reinecke committed
432
433
          for (size_t t=0; t<supp; ++t)
            wt[t] = kernel((t+i0-f0)*delta - 1);
434
          T f1=T(0.5)*supp+ptg(i,1)*xdphi;
Martin Reinecke's avatar
Martin Reinecke committed
435
436
437
438
          size_t i1 = size_t(f1+1.);
          for (size_t t=0; t<supp; ++t)
            wp[t] = kernel((t+i1-f1)*delta - 1);
          psiarr[0]=1.;
439
440
          double psi=ptg(i,2);
          double cpsi=cos(psi), spsi=sin(psi);
Martin Reinecke's avatar
Martin Reinecke committed
441
442
443
          double cnpsi=cpsi, snpsi=spsi;
          for (size_t l=1; l<=kmax; ++l)
            {
444
445
            psiarr[2*l-1]=T(cnpsi);
            psiarr[2*l]=T(snpsi);
Martin Reinecke's avatar
Martin Reinecke committed
446
447
448
449
            const double tmp = snpsi*cpsi + cnpsi*spsi;
            cnpsi=cnpsi*cpsi - snpsi*spsi;
            snpsi=tmp;
            }
450
451
452
453
#ifdef SIMD_INTERPOL
          memcpy(reinterpret_cast<T *>(psiarr2.data()), psiarr.data(),
            (2*kmax+1)*sizeof(T));
#endif
454
455
          if (ncomp==1)
            {
456
#ifndef SIMD_INTERPOL
457
            T vv=0;
458
459
460
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                for (size_t l=0; l<2*kmax+1; ++l)
461
                  vv += cube(i0+j,i1+k,0,l)*wt[j]*wp[k]*psiarr[l];
462
            res.v(i,0) = vv;
463
464
465
466
#else
            const native_simd<T> *p=&scube(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
467
            switch (nv)
468
              {
Martin Reinecke's avatar
Martin Reinecke committed
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
#ifdef SPECIAL_CASING
              case 1:
                interpol_help0<1,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 2:
                interpol_help0<2,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 3:
                interpol_help0<3,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 4:
                interpol_help0<4,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 5:
                interpol_help0<5,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 6:
                interpol_help0<6,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 7:
                interpol_help0<7,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
#endif
              default:
493
                {
Martin Reinecke's avatar
Martin Reinecke committed
494
495
496
497
498
499
500
501
502
503
504
505
506
507
                native_simd<T> vv=0.;
                for (size_t j=0; j<supp; ++j, p+=d0)
                  {
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> tvv=0;
                    for (size_t l=0; l<nv; ++l)
                      tvv += p1[l]*psiarr2[l];
                    vv += wtj*wp[k]*tvv;
                    }
                  }
                res.v(i,0) = reduce(vv, std::plus<>());
508
509
510
                }
              }
#endif
511
512
513
            }
          else // ncomp==3
            {
514
#ifndef SIMD_INTERPOL
515
            T v0=0., v1=0., v2=0.;
516
517
518
519
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                for (size_t l=0; l<2*kmax+1; ++l)
                  {
520
                  auto tmp = wt[j]*wp[k]*psiarr[l];
521
522
523
                  v0 += cube(i0+j,i1+k,0,l)*tmp;
                  v1 += cube(i0+j,i1+k,1,l)*tmp;
                  v2 += cube(i0+j,i1+k,2,l)*tmp;
524
525
526
527
                  }
            res.v(i,0) = v0;
            res.v(i,1) = v1;
            res.v(i,2) = v2;
528
529
530
531
#else
            const native_simd<T> *p=&scube(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
532
            switch (nv)
533
              {
Martin Reinecke's avatar
Martin Reinecke committed
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
#ifdef SPECIAL_CASING
              case 1:
                interpol_help0<1,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 2:
                interpol_help0<2,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 3:
                interpol_help0<3,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 4:
                interpol_help0<4,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 5:
                interpol_help0<5,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 6:
                interpol_help0<6,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
              case 7:
                interpol_help0<7,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), res, i);
                break;
#endif
              default:
558
                {
Martin Reinecke's avatar
Martin Reinecke committed
559
560
561
                ptrdiff_t d2 = scube.stride(2);
                native_simd<T> v0=0., v1=0., v2=0.;
                for (size_t j=0; j<supp; ++j, p+=d0)
562
                  {
Martin Reinecke's avatar
Martin Reinecke committed
563
564
565
566
567
568
569
570
571
572
573
574
575
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> wtjwpk = wtj*wp[k];
                    for (size_t l=0; l<nv; ++l)
                      {
                      auto tmp = wtjwpk*psiarr2[l];
                      v0 += p1[l]*tmp;
                      v1 += p1[l+d2]*tmp;
                      v2 += p1[l+2*d2]*tmp;
                      }
                    }
576
                  }
Martin Reinecke's avatar
Martin Reinecke committed
577
578
579
                res.v(i,0) = reduce(v0, std::plus<>());
                res.v(i,1) = reduce(v1, std::plus<>());
                res.v(i,2) = reduce(v2, std::plus<>());
580
581
582
                }
              }
#endif
583
            }
584
          }
Martin Reinecke's avatar
Martin Reinecke committed
585
        });
586
      }
587

588
589
590
    size_t support() const
      { return supp; }

591
    void deinterpol (const mav<T,2> &ptg, const mav<T,2> &data)
592
      {
593
594
595
#ifdef SIMD_INTERPOL
      constexpr size_t vl=native_simd<T>::size();
      MR_assert(scube.stride(3)==1, "bad stride");
Martin Reinecke's avatar
Martin Reinecke committed
596
597
      MR_assert(scube.stride(2)==ptrdiff_t(scube.shape(3)), "bad stride");
      size_t nv=scube.shape(3);
598
#endif
599
600
601
      MR_assert(adjoint, "can only be called in adjoint mode");
      MR_assert(ptg.shape(0)==data.shape(0), "dimension mismatch");
      MR_assert(ptg.shape(1)==3, "second dimension must have length 3");
602
      MR_assert(data.shape(1)==ncomp, "# of components mismatch");
603
604
605
      T delta = T(2)/supp;
      T xdtheta = T((ntheta-1)/pi),
        xdphi = T(nphi/(2*pi));
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
606
      auto idx = getIdx(ptg);
607
608
609
610
611
612
613

      constexpr size_t cellsize=16;
      size_t nct = ntheta/cellsize+5,
             ncp = nphi/cellsize+5;
      mav<std::mutex,2> locks({nct,ncp});

      execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched)
614
        {
615
        size_t b_theta=99999999999999, b_phi=9999999999999999;
616
617
        vector<T> wt(supp), wp(supp);
        vector<T> psiarr(2*kmax+1);
618
619
620
621
#ifdef SIMD_INTERPOL
        vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
        for (auto &v:psiarr2) v=0;
#endif
622
623
624
        while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
          {
          size_t i=idx[ind];
625
          T f0=T(0.5)*supp+ptg(i,0)*xdtheta;
626
627
628
          size_t i0 = size_t(f0+1.);
          for (size_t t=0; t<supp; ++t)
            wt[t] = kernel((t+i0-f0)*delta - 1);
629
          T f1=T(0.5)*supp+ptg(i,1)*xdphi;
630
631
632
633
634
635
636
637
638
          size_t i1 = size_t(f1+1.);
          for (size_t t=0; t<supp; ++t)
            wp[t] = kernel((t+i1-f1)*delta - 1);
          psiarr[0]=1.;
          double psi=ptg(i,2);
          double cpsi=cos(psi), spsi=sin(psi);
          double cnpsi=cpsi, snpsi=spsi;
          for (size_t l=1; l<=kmax; ++l)
            {
639
640
            psiarr[2*l-1]=T(cnpsi);
            psiarr[2*l]=T(snpsi);
641
642
643
644
            const double tmp = snpsi*cpsi + cnpsi*spsi;
            cnpsi=cnpsi*cpsi - snpsi*spsi;
            snpsi=tmp;
            }
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
          size_t b_theta_new = i0/cellsize,
                 b_phi_new = i1/cellsize;
          if ((b_theta_new!=b_theta) || (b_phi_new!=b_phi))
            {
            if (b_theta<locks.shape(0))  // unlock
              {
              locks.v(b_theta,b_phi).unlock();
              locks.v(b_theta,b_phi+1).unlock();
              locks.v(b_theta+1,b_phi).unlock();
              locks.v(b_theta+1,b_phi+1).unlock();
              }
            b_theta = b_theta_new;
            b_phi = b_phi_new;
            locks.v(b_theta,b_phi).lock();
            locks.v(b_theta,b_phi+1).lock();
            locks.v(b_theta+1,b_phi).lock();
            locks.v(b_theta+1,b_phi+1).lock();
            }
663
664
665
666
#ifdef SIMD_INTERPOL
          memcpy(reinterpret_cast<T *>(psiarr2.data()), psiarr.data(),
            (2*kmax+1)*sizeof(T));
#endif
667
668
          if (ncomp==1)
            {
669
#ifndef SIMD_INTERPOL
670
            T val = data(i,0);
671
672
673
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                for (size_t l=0; l<2*kmax+1; ++l)
674
675
676
677
678
                  cube.v(i0+j,i1+k,0,l) += val*wt[j]*wp[k]*psiarr[l];
#else
            native_simd<T> *p=&scube.v(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
679
            switch (nv)
680
              {
Martin Reinecke's avatar
Martin Reinecke committed
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
#ifdef SPECIAL_CASING
              case 1:
                deinterpol_help0<1,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 2:
                deinterpol_help0<2,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 3:
                deinterpol_help0<3,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 4:
                deinterpol_help0<4,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 5:
                deinterpol_help0<5,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 6:
                deinterpol_help0<6,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 7:
                deinterpol_help0<7,1>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
#endif
              default:
705
                {
Martin Reinecke's avatar
Martin Reinecke committed
706
707
708
709
710
711
712
713
714
715
716
717
                native_simd<T> val = data(i,0);
                for (size_t j=0; j<supp; ++j, p+=d0)
                  {
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> tv = wtj*wp[k]*val;
                    for (size_t l=0; l<nv; ++l)
                      p1[l] += tv*psiarr2[l];
                    } 
                  }
718
719
720
                }
              }
#endif
721
722
723
            }
          else // ncomp==3
            {
724
#ifndef SIMD_INTERPOL
725
            T v0=data(i,0), v1=data(i,1), v2=data(i,2);
726
727
728
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                {
729
                T t0 = wt[j]*wp[k];
730
731
                for (size_t l=0; l<2*kmax+1; ++l)
                  {
732
                  T tmp = t0*psiarr[l];
733
734
735
736
737
738
739
740
741
                  cube.v(i0+j,i1+k,0,l) += v0*tmp;
                  cube.v(i0+j,i1+k,1,l) += v1*tmp;
                  cube.v(i0+j,i1+k,2,l) += v2*tmp;
                  }
                }
#else
            native_simd<T> *p=&scube.v(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
742
            switch (nv)
743
              {
Martin Reinecke's avatar
Martin Reinecke committed
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
#ifdef SPECIAL_CASING
              case 1:
                deinterpol_help0<1,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 2:
                deinterpol_help0<2,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 3:
                deinterpol_help0<3,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 4:
                deinterpol_help0<4,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 5:
                deinterpol_help0<5,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 6:
                deinterpol_help0<6,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
              case 7:
                deinterpol_help0<7,3>(wt.data(), wp.data(), p, d0, d1, psiarr2.data(), data, i);
                break;
#endif
              default:
768
                {
Martin Reinecke's avatar
Martin Reinecke committed
769
770
771
                native_simd<T> v0=data(i,0), v1=data(i,1), v2=data(i,2);
                ptrdiff_t d2 = scube.stride(2);
                for (size_t j=0; j<supp; ++j, p+=d0)
772
                  {
Martin Reinecke's avatar
Martin Reinecke committed
773
774
775
776
777
778
779
780
781
782
783
784
785
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> wtjwpk = wtj*wp[k];
                    for (size_t l=0; l<nv; ++l)
                      {
                      auto tmp = wtjwpk*psiarr2[l];
                      p1[l] += v0*tmp;
                      p1[l+d2] += v1*tmp;
                      p1[l+2*d2] += v2*tmp;
                      }
                    }
786
787
                  }
                }
788
789
              }
#endif
790
            }
791
          }
792
793
794
795
796
797
798
        if (b_theta<locks.shape(0))  // unlock
          {
          locks.v(b_theta,b_phi).unlock();
          locks.v(b_theta,b_phi+1).unlock();
          locks.v(b_theta+1,b_phi).unlock();
          locks.v(b_theta+1,b_phi+1).unlock();
          }
799
800
        });
      }
801
    void getSlm (const vector<Alm<complex<T>>> &blm, vector<Alm<complex<T>>> &slm)
802
803
      {
      MR_assert(adjoint, "can only be called in adjoint mode");
804
805
      MR_assert((blm.size()==ncomp) || (ncomp==1), "incorrect number of beam a_lm sets");
      MR_assert((slm.size()==ncomp) || (ncomp==1), "incorrect number of sky a_lm sets");
806
807
808
809
810
      Alm<complex<T>> a1(lmax, lmax), a2(lmax,lmax);
      auto ginfo = sharp_make_cc_geom_info(ntheta0,nphi0,0.,cube.stride(1),cube.stride(0));
      auto ainfo = sharp_make_triangular_alm_info(lmax,lmax,1);

      // move stuff from border regions onto the main grid
811
      for (size_t i=0; i<cube.shape(0); ++i)
812
        for (size_t j=0; j<supp; ++j)
813
814
          for (size_t k=0; k<cube.shape(3); ++k)
            for (size_t l=0; l<cube.shape(2); ++l)
815
              {
816
817
              cube.v(i,j+nphi,l,k) += cube(i,j,l,k);
              cube.v(i,j+supp,l,k) += cube(i,j+nphi+supp,l,k);
818
              }
819
820
      for (size_t i=0; i<supp; ++i)
        for (size_t j=0, j2=nphi/2; j<nphi; ++j,++j2)
821
          for (size_t k=0; k<cube.shape(3); ++k)
822
            {
823
            T fct = (((k+1)/2)&1) ? -1 : 1;
824
            if (j2>=nphi) j2-=nphi;
825
            for (size_t l=0; l<cube.shape(2); ++l)
826
              {
827
828
              cube.v(supp+1+i,j+supp,l,k) += fct*cube(supp-1-i,j2+supp,l,k);
              cube.v(supp+ntheta-2-i, j+supp,l,k) += fct*cube(supp+ntheta+i,j2+supp,l,k);
829
              }
830
            }
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
831
832
833

      // special treatment for poles
      for (size_t j=0,j2=nphi/2; j<nphi/2; ++j,++j2)
834
835
        for (size_t k=0; k<cube.shape(3); ++k)
          for (size_t l=0; l<cube.shape(2); ++l)
836
            {
837
            T fct = (((k+1)/2)&1) ? -1 : 1;
838
            if (j2>=nphi) j2-=nphi;
839
840
841
842
843
844
            T tval = (cube(supp,j+supp,l,k) + fct*cube(supp,j2+supp,l,k));
            cube.v(supp,j+supp,l,k) = tval;
            cube.v(supp,j2+supp,l,k) = fct*tval;
            tval = (cube(supp+ntheta-1,j+supp,l,k) + fct*cube(supp+ntheta-1,j2+supp,l,k));
            cube.v(supp+ntheta-1,j+supp,l,k) = tval;
            cube.v(supp+ntheta-1,j2+supp,l,k) = fct*tval;
845
            }
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
846

847
      vector<T>lnorm(lmax+1);
848
      for (size_t i=0; i<=lmax; ++i)
849
        lnorm[i]=T(std::sqrt(4*pi/(2*i+1.)));
850

851
852
853
      for (size_t j=0; j<blm.size(); ++j)
        slm[j].SetToZero();

854
      for (size_t icomp=0; icomp<ncomp; ++icomp)
855
        {
856
        bool separate = ncomp>1;
857
        {
858
        auto m1 = cube.template subarray<2>({supp,supp,icomp,0},{ntheta,nphi,0,0});
859
860
        decorrect(m1,0);
        sharp_alm2map_adjoint(a1.Alms().vdata(), m1.data(), *ginfo, *ainfo, 0, nthreads);
861
862
        for (size_t m=0; m<=lmax; ++m)
          for (size_t l=m; l<=lmax; ++l)
863
            if (separate)
864
              slm[icomp](l,m) += conj(a1(l,m))*blm[icomp](l,0).real()*lnorm[l];
865
866
            else
              for (size_t j=0; j<blm.size(); ++j)
867
                slm[j](l,m) += conj(a1(l,m))*blm[j](l,0).real()*lnorm[l];
868
869
870
        }
        for (size_t k=1; k<=kmax; ++k)
          {
871
872
          auto m1 = cube.template subarray<2>({supp,supp,icomp,2*k-1},{ntheta,nphi,0,0});
          auto m2 = cube.template subarray<2>({supp,supp,icomp,2*k  },{ntheta,nphi,0,0});
873
874
875
876
877
878
879
880
          decorrect(m1,k);
          decorrect(m2,k);

          sharp_alm2map_spin_adjoint(k, a1.Alms().vdata(), a2.Alms().vdata(), m1.data(),
            m2.data(), *ginfo, *ainfo, 0, nthreads);
          for (size_t m=0; m<=lmax; ++m)
            for (size_t l=m; l<=lmax; ++l)
              if (l>=k)
881
882
                {
                if (separate)
883
                  {
884
                  auto tmp = conj(blm[icomp](l,k))*(-2*lnorm[l]);
885
886
                  slm[icomp](l,m) += conj(a1(l,m))*tmp.real();
                  slm[icomp](l,m) -= conj(a2(l,m))*tmp.imag();
887
                  }
888
889
890
                else
                  for (size_t j=0; j<blm.size(); ++j)
                    {
891
                    auto tmp = conj(blm[j](l,k))*(-2*lnorm[l]);
892
893
894
895
                    slm[j](l,m) += conj(a1(l,m))*tmp.real();
                    slm[j](l,m) -= conj(a2(l,m))*tmp.imag();
                    }
                }
896
          }
897
898
        }
      }
899
900
  };

901
902
903
double epsilon_guess(size_t support, double ofactor)
  { return std::sqrt(12.)*std::exp(-(support*ofactor)); }

Martin Reinecke's avatar
Martin Reinecke committed
904
}
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
905

906
907
using detail_totalconvolve::Interpolator;
using detail_totalconvolve::epsilon_guess;
908

Martin Reinecke's avatar
Martin Reinecke committed
909
}
910

Martin Reinecke's avatar
Martin Reinecke committed
911
#endif