totalconvolve.h 33.5 KB
Newer Older
Martin Reinecke's avatar
Martin Reinecke committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
/*
 *  This code is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This code is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this code; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

17
18
19
20
21
/*
 *  Copyright (C) 2020 Max-Planck-Society
 *  Author: Martin Reinecke
 */

Martin Reinecke's avatar
Martin Reinecke committed
22
23
#ifndef DUCC0_INTERPOL_NG_H
#define DUCC0_INTERPOL_NG_H
Martin Reinecke's avatar
Martin Reinecke committed
24

25
#define SIMD_INTERPOL
Martin Reinecke's avatar
Martin Reinecke committed
26
#define SPECIAL_CASING
27

28
29
#include <vector>
#include <complex>
Martin Reinecke's avatar
Martin Reinecke committed
30
#include <cmath>
Martin Reinecke's avatar
Martin Reinecke committed
31
32
33
34
35
36
37
38
#include "ducc0/math/constants.h"
#include "ducc0/math/gl_integrator.h"
#include "ducc0/math/es_kernel.h"
#include "ducc0/infra/mav.h"
#include "ducc0/infra/simd.h"
#include "ducc0/sharp/sharp.h"
#include "ducc0/sharp/sharp_almhelpers.h"
#include "ducc0/sharp/sharp_geomhelpers.h"
39
#include "python/alm.h"
Martin Reinecke's avatar
Martin Reinecke committed
40
#include "ducc0/math/fft.h"
Martin Reinecke's avatar
Martin Reinecke committed
41

Martin Reinecke's avatar
Martin Reinecke committed
42
namespace ducc0 {
43

Martin Reinecke's avatar
Martin Reinecke committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
namespace detail_fft {

using std::vector;

template<typename T, typename T0> aligned_array<T> alloc_tmp_conv
  (const fmav_info &info, size_t axis, size_t len)
  {
  auto othersize = info.size()/info.shape(axis);
  constexpr auto vlen = native_simd<T0>::size();
  auto tmpsize = len*((othersize>=vlen) ? vlen : 1);
  return aligned_array<T>(tmpsize);
  }

template<typename Tplan, typename T, typename T0, typename Exec>
Martin Reinecke's avatar
Martin Reinecke committed
58
DUCC0_NOINLINE void general_convolve(const fmav<T> &in, fmav<T> &out,
Martin Reinecke's avatar
Martin Reinecke committed
59
  const size_t axis, const vector<T0> &kernel, size_t nthreads,
Martin Reinecke's avatar
Martin Reinecke committed
60
  const Exec &exec)
Martin Reinecke's avatar
Martin Reinecke committed
61
  {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
62
  std::unique_ptr<Tplan> plan1, plan2;
Martin Reinecke's avatar
Martin Reinecke committed
63
64
65
66

  size_t l_in=in.shape(axis), l_out=out.shape(axis);
  size_t l_min=std::min(l_in, l_out), l_max=std::max(l_in, l_out);
  MR_assert(kernel.size()==l_min/2+1, "bad kernel size");
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
67
68
  plan1 = std::make_unique<Tplan>(l_in);
  plan2 = std::make_unique<Tplan>(l_out);
Martin Reinecke's avatar
Martin Reinecke committed
69
70
71
72
73

  execParallel(
    util::thread_count(nthreads, in, axis, native_simd<T0>::size()),
    [&](Scheduler &sched) {
      constexpr auto vlen = native_simd<T0>::size();
Martin Reinecke's avatar
Martin Reinecke committed
74
      auto storage = alloc_tmp_conv<T,T0>(in, axis, l_max);
Martin Reinecke's avatar
Martin Reinecke committed
75
      multi_iter<vlen> it(in, out, axis, sched.num_threads(), sched.thread_num());
Martin Reinecke's avatar
Martin Reinecke committed
76
#ifndef DUCC0_NO_SIMD
Martin Reinecke's avatar
Martin Reinecke committed
77
78
79
80
81
82
83
84
85
86
87
      if (vlen>1)
        while (it.remaining()>=vlen)
          {
          it.advance(vlen);
          auto tdatav = reinterpret_cast<add_vec_t<T> *>(storage.data());
          exec(it, in, out, tdatav, *plan1, *plan2, kernel);
          }
#endif
      while (it.remaining()>0)
        {
        it.advance(1);
Martin Reinecke's avatar
Martin Reinecke committed
88
        auto buf = reinterpret_cast<T *>(storage.data());
Martin Reinecke's avatar
Martin Reinecke committed
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
        exec(it, in, out, buf, *plan1, *plan2, kernel);
        }
    });  // end of parallel region
  }

struct ExecConvR1
  {
  template <typename T0, typename T, size_t vlen> void operator() (
    const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out,
    T * buf, const pocketfft_r<T0> &plan1, const pocketfft_r<T0> &plan2,
    const vector<T0> &kernel) const
    {
    size_t l_in = plan1.length(),
           l_out = plan2.length(),
           l_min = std::min(l_in, l_out);
    copy_input(it, in, buf);
    plan1.exec(buf, T0(1), true);
Martin Reinecke's avatar
Martin Reinecke committed
106
    for (size_t i=0; i<l_min; ++i) buf[i]*=kernel[(i+1)/2];
Martin Reinecke's avatar
Martin Reinecke committed
107
108
109
110
111
112
113
114
115
116
117
118
    for (size_t i=l_in; i<l_out; ++i) buf[i] = T(0);
    plan2.exec(buf, T0(1), false);
    copy_output(it, buf, out);
    }
  };

template<typename T> void convolve_1d(const fmav<T> &in,
  fmav<T> &out, size_t axis, const vector<T> &kernel, size_t nthreads=1)
  {
  MR_assert(axis<in.ndim(), "bad axis number");
  MR_assert(in.ndim()==out.ndim(), "dimensionality mismatch");
  if (in.data()==out.data())
Martin Reinecke's avatar
Martin Reinecke committed
119
    MR_assert(in.stride()==out.stride(), "strides mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
120
121
122
  for (size_t i=0; i<in.ndim(); ++i)
    if (i!=axis)
      MR_assert(in.shape(i)==out.shape(i), "shape mismatch");
Martin Reinecke's avatar
Martin Reinecke committed
123
124
  MR_assert(!((in.shape(axis)&1) || (out.shape(axis)&1)),
    "input and output axis lengths must be even");
Martin Reinecke's avatar
Martin Reinecke committed
125
126
127
128
129
130
  if (in.size()==0) return;
  general_convolve<pocketfft_r<T>>(in, out, axis, kernel, nthreads,
    ExecConvR1());
  }

}
Martin Reinecke's avatar
Martin Reinecke committed
131
132
133

using detail_fft::convolve_1d;

134
namespace detail_totalconvolve {
135

Martin Reinecke's avatar
Martin Reinecke committed
136
using namespace std;
137
138
139
140

template<typename T> class Interpolator
  {
  protected:
141
    bool adjoint;
Martin Reinecke's avatar
Martin Reinecke committed
142
    size_t lmax, kmax, nphi0, ntheta0, nphi, ntheta;
Martin Reinecke's avatar
Martin Reinecke committed
143
    int nthreads;
144
    T ofactor;
Martin Reinecke's avatar
fix    
Martin Reinecke committed
145
    size_t supp;
146
    ES_Kernel kernel;
147
    size_t ncomp;
148
149
150
#ifdef SIMD_INTERPOL
    mav<native_simd<T>,4> scube;
#endif
151
    mav<T,4> cube; // the data cube (theta, phi, 2*mbeam+1, TGC)
152

153
    void correct(mav<T,2> &arr, int spin)
154
      {
155
      T sfct = (spin&1) ? -1 : 1;
Martin Reinecke's avatar
Martin Reinecke committed
156
157
158
159
      mav<T,2> tmp({nphi,nphi0});
      // copy and extend to second half
      for (size_t j=0; j<nphi0; ++j)
        tmp.v(0,j) = arr(0,j);
Martin Reinecke's avatar
Martin Reinecke committed
160
      for (size_t i=1, i2=nphi0-1; i+1<ntheta0; ++i,--i2)
Martin Reinecke's avatar
Martin Reinecke committed
161
        for (size_t j=0,j2=nphi0/2; j<nphi0; ++j,++j2)
162
          {
Martin Reinecke's avatar
Martin Reinecke committed
163
          if (j2>=nphi0) j2-=nphi0;
Martin Reinecke's avatar
Martin Reinecke committed
164
165
          tmp.v(i,j2) = arr(i,j2);
          tmp.v(i2,j) = sfct*tmp(i,j2);
166
          }
Martin Reinecke's avatar
Martin Reinecke committed
167
168
      for (size_t j=0; j<nphi0; ++j)
        tmp.v(ntheta0-1,j) = arr(ntheta0-1,j);
Martin Reinecke's avatar
Martin Reinecke committed
169
      auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
170
171
      vector<T> k2(fct.size());
      for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
Martin Reinecke's avatar
Martin Reinecke committed
172
173
      fmav<T> ftmp(tmp);
      fmav<T> ftmp0(tmp.template subarray<2>({0,0},{nphi0, nphi0}));
174
      convolve_1d(ftmp0, ftmp, 0, k2, nthreads);
Martin Reinecke's avatar
Martin Reinecke committed
175
      fmav<T> ftmp2(tmp.template subarray<2>({0,0},{ntheta, nphi0}));
Martin Reinecke's avatar
tweaks    
Martin Reinecke committed
176
      fmav<T> farr(arr);
177
      convolve_1d(ftmp2, farr, 1, k2, nthreads);
178
      }
179
180
    void decorrect(mav<T,2> &arr, int spin)
      {
181
      T sfct = (spin&1) ? -1 : 1;
Martin Reinecke's avatar
Martin Reinecke committed
182
183
      mav<T,2> tmp({nphi,nphi0});
      auto fct = kernel.correction_factors(nphi, nphi0/2+1, nthreads);
184
185
      vector<T> k2(fct.size());
      for (size_t i=0; i<fct.size(); ++i) k2[i] = T(fct[i]/nphi0);
Martin Reinecke's avatar
Martin Reinecke committed
186
187
      fmav<T> farr(arr);
      fmav<T> ftmp2(tmp.template subarray<2>({0,0},{ntheta, nphi0}));
188
      convolve_1d(farr, ftmp2, 1, k2, nthreads);
189
      // extend to second half
Martin Reinecke's avatar
Martin Reinecke committed
190
      for (size_t i=1, i2=nphi-1; i+1<ntheta; ++i,--i2)
Martin Reinecke's avatar
Martin Reinecke committed
191
        for (size_t j=0,j2=nphi0/2; j<nphi0; ++j,++j2)
192
          {
Martin Reinecke's avatar
Martin Reinecke committed
193
          if (j2>=nphi0) j2-=nphi0;
194
195
          tmp.v(i2,j) = sfct*tmp(i,j2);
          }
Martin Reinecke's avatar
Martin Reinecke committed
196
197
      fmav<T> ftmp(tmp);
      fmav<T> ftmp0(tmp.template subarray<2>({0,0},{nphi0, nphi0}));
198
      convolve_1d(ftmp, ftmp0, 0, k2, nthreads);
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
199
      for (size_t j=0; j<nphi0; ++j)
200
        arr.v(0,j) = T(0.5)*tmp(0,j);
Martin Reinecke's avatar
Martin Reinecke committed
201
      for (size_t i=1; i+1<ntheta0; ++i)
202
        for (size_t j=0; j<nphi0; ++j)
Martin Reinecke's avatar
Martin Reinecke committed
203
204
          arr.v(i,j) = tmp(i,j);
      for (size_t j=0; j<nphi0; ++j)
205
        arr.v(ntheta0-1,j) = T(0.5)*tmp(ntheta0-1,j);
206
      }
207

Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
208
209
210
211
212
213
214
215
216
217
218
    vector<size_t> getIdx(const mav<T,2> &ptg) const
      {
      vector<size_t> idx(ptg.shape(0));
      constexpr size_t cellsize=16;
      size_t nct = ntheta/cellsize+1,
             ncp = nphi/cellsize+1;
      vector<vector<size_t>> mapper(nct*ncp);
      for (size_t i=0; i<ptg.shape(0); ++i)
        {
        size_t itheta=min(nct-1,size_t(ptg(i,0)/pi*nct)),
               iphi=min(ncp-1,size_t(ptg(i,1)/(2*pi)*ncp));
219
//        MR_assert((itheta<nct)&&(iphi<ncp), "oops");
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
220
221
222
        mapper[itheta*ncp+iphi].push_back(i);
        }
      size_t cnt=0;
Martin Reinecke's avatar
Martin Reinecke committed
223
224
      for (auto &vec: mapper)
        {
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
225
226
        for (auto i:vec)
          idx[cnt++] = i;
Martin Reinecke's avatar
Martin Reinecke committed
227
228
229
        vec.clear();
        vec.shrink_to_fit();
        }
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
230
231
232
      return idx;
      }

233
  public:
234
235
    Interpolator(const vector<Alm<complex<T>>> &slm,
                 const vector<Alm<complex<T>>> &blm,
236
                 bool separate, T epsilon, T ofmin, int nthreads_)
237
      : adjoint(false),
238
239
        lmax(slm.at(0).Lmax()),
        kmax(blm.at(0).Mmax()),
Martin Reinecke's avatar
Martin Reinecke committed
240
241
        nphi0(2*good_size_real(lmax+1)),
        ntheta0(nphi0/2+1),
Martin Reinecke's avatar
Martin Reinecke committed
242
        nphi(std::max<size_t>(20,2*good_size_real(size_t((2*lmax+1)*ofmin/2.)))),
Martin Reinecke's avatar
Martin Reinecke committed
243
        ntheta(nphi/2+1),
Martin Reinecke's avatar
Martin Reinecke committed
244
        nthreads(nthreads_),
245
        ofactor(T(nphi)/(2*lmax+1)),
Martin Reinecke's avatar
fix    
Martin Reinecke committed
246
        supp(ES_Kernel::get_supp(epsilon, ofactor)),
Martin Reinecke's avatar
Martin Reinecke committed
247
        kernel(supp, ofactor, nthreads),
248
        ncomp(separate ? slm.size() : 1),
249
250
251
252
253
254
#ifdef SIMD_INTERPOL
        scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
        cube(reinterpret_cast<T *>(scube.vdata()),{ntheta+2*supp, nphi+2*supp, ncomp, ((2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size())*native_simd<T>::size()},true)
#else
        cube({ntheta+2*supp, nphi+2*supp, ncomp, 2*kmax+1})
#endif
255
      {
256
      MR_assert((ncomp==1)||(ncomp==3), "currently only 1 or 3 components allowed");
257
258
259
260
261
262
263
264
265
      MR_assert(slm.size()==blm.size(), "inconsistent slm and blm vectors");
      for (size_t i=0; i<slm.size(); ++i)
        {
        MR_assert(slm[i].Lmax()==lmax, "inconsistent Sky lmax");
        MR_assert(slm[i].Mmax()==lmax, "Sky lmax must be equal to Sky mmax");
        MR_assert(blm[i].Lmax()==lmax, "Sky and beam lmax must be equal");
        MR_assert(blm[i].Mmax()==kmax, "Inconcistent beam mmax");
        }

266
267
      MR_assert((supp<=ntheta) && (supp<=nphi), "support too large!");
      Alm<complex<T>> a1(lmax, lmax), a2(lmax,lmax);
Martin Reinecke's avatar
Martin Reinecke committed
268
      auto ginfo = sharp_make_cc_geom_info(ntheta0,nphi0,0.,cube.stride(1),cube.stride(0));
269
270
      auto ainfo = sharp_make_triangular_alm_info(lmax,lmax,1);

271
      vector<T>lnorm(lmax+1);
272
      for (size_t i=0; i<=lmax; ++i)
273
        lnorm[i]=T(std::sqrt(4*pi/(2*i+1.)));
274

275
      for (size_t icomp=0; icomp<ncomp; ++icomp)
276
277
278
279
        {
        for (size_t m=0; m<=lmax; ++m)
          for (size_t l=m; l<=lmax; ++l)
            {
280
            if (separate)
281
              a1(l,m) = slm[icomp](l,m)*blm[icomp](l,0).real()*lnorm[l];
282
283
            else
              {
284
285
              a1(l,m) = 0;
              for (size_t j=0; j<slm.size(); ++j)
286
                a1(l,m) += slm[j](l,m)*blm[j](l,0).real()*lnorm[l];
287
288
              }
            }
289
        auto m1 = cube.template subarray<2>({supp,supp,icomp,0},{ntheta,nphi,0,0});
290
291
292
293
294
295
296
297
298
299
300
301
302
303
        sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, 0, nthreads);
        correct(m1,0);

        for (size_t k=1; k<=kmax; ++k)
          {
          for (size_t m=0; m<=lmax; ++m)
            for (size_t l=m; l<=lmax; ++l)
              {
              if (l<k)
                a1(l,m)=a2(l,m)=0.;
              else
                {
                if (separate)
                  {
304
                  auto tmp = blm[icomp](l,k)*(-2*lnorm[l]);
305
306
307
308
309
310
311
312
                  a1(l,m) = slm[icomp](l,m)*tmp.real();
                  a2(l,m) = slm[icomp](l,m)*tmp.imag();
                  }
                else
                  {
                  a1(l,m) = a2(l,m) = 0;
                  for (size_t j=0; j<slm.size(); ++j)
                    {
313
                    auto tmp = blm[j](l,k)*(-2*lnorm[l]);
314
315
316
317
                    a1(l,m) += slm[j](l,m)*tmp.real();
                    a2(l,m) += slm[j](l,m)*tmp.imag();
                    }
                  }
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
318
                }
319
              }
320
321
          auto m1 = cube.template subarray<2>({supp,supp,icomp,2*k-1},{ntheta,nphi,0,0});
          auto m2 = cube.template subarray<2>({supp,supp,icomp,2*k  },{ntheta,nphi,0,0});
Martin Reinecke's avatar
fixes    
Martin Reinecke committed
322
323
324
325
          sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(),
            m2.vdata(), *ginfo, *ainfo, 0, nthreads);
          correct(m1,k);
          correct(m2,k);
326
          }
327
        }
328

329
330
331
      // fill border regions
      for (size_t i=0; i<supp; ++i)
        for (size_t j=0, j2=nphi/2; j<nphi; ++j,++j2)
332
          for (size_t k=0; k<cube.shape(3); ++k)
333
            {
334
            T fct = (((k+1)/2)&1) ? -1 : 1;
335
            if (j2>=nphi) j2-=nphi;
336
            for (size_t l=0; l<cube.shape(2); ++l)
337
              {
338
339
              cube.v(supp-1-i,j2+supp,l,k) = fct*cube(supp+1+i,j+supp,l,k);
              cube.v(supp+ntheta+i,j2+supp,l,k) = fct*cube(supp+ntheta-2-i,j+supp,l,k);
340
              }
341
342
343
            }
      for (size_t i=0; i<ntheta+2*supp; ++i)
        for (size_t j=0; j<supp; ++j)
344
345
          for (size_t k=0; k<cube.shape(3); ++k)
            for (size_t l=0; l<cube.shape(2); ++l)
346
            {
347
348
            cube.v(i,j,l,k) = cube(i,j+nphi,l,k);
            cube.v(i,j+nphi+supp,l,k) = cube(i,j+supp,l,k);
349
350
351
            }
      }

352
    Interpolator(size_t lmax_, size_t kmax_, size_t ncomp_, T epsilon, T ofmin, int nthreads_)
353
354
355
356
357
      : adjoint(true),
        lmax(lmax_),
        kmax(kmax_),
        nphi0(2*good_size_real(lmax+1)),
        ntheta0(nphi0/2+1),
Martin Reinecke's avatar
Martin Reinecke committed
358
        nphi(std::max<size_t>(20,2*good_size_real(size_t((2*lmax+1)*ofmin/2.)))),
359
360
        ntheta(nphi/2+1),
        nthreads(nthreads_),
361
        ofactor(T(nphi)/(2*lmax+1)),
362
363
        supp(ES_Kernel::get_supp(epsilon, ofactor)),
        kernel(supp, ofactor, nthreads),
364
        ncomp(ncomp_),
365
366
367
368
369
370
#ifdef SIMD_INTERPOL
        scube({ntheta+2*supp, nphi+2*supp, ncomp, (2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size()}),
        cube(reinterpret_cast<T *>(scube.vdata()),{ntheta+2*supp, nphi+2*supp, ncomp, ((2*kmax+1+native_simd<T>::size()-1)/native_simd<T>::size())*native_simd<T>::size()},true)
#else
        cube({ntheta+2*supp, nphi+2*supp, ncomp, 2*kmax+1})
#endif
371
      {
372
      MR_assert((ncomp==1)||(ncomp==3), "currently only 1 or 3 components allowed");
373
374
375
376
      MR_assert((supp<=ntheta) && (supp<=nphi), "support too large!");
      cube.apply([](T &v){v=0.;});
      }

Martin Reinecke's avatar
Martin Reinecke committed
377
#ifdef SIMD_INTERPOL
Martin Reinecke's avatar
Martin Reinecke committed
378
379
    template<size_t nv, size_t nc> void interpol_help0(const T * DUCC0_RESTRICT wt,
      const T * DUCC0_RESTRICT wp, const native_simd<T> * DUCC0_RESTRICT p, size_t d0, size_t d1, const native_simd<T> * DUCC0_RESTRICT psiarr2, mav<T,2> &res, size_t idx) const
Martin Reinecke's avatar
Martin Reinecke committed
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
      {
      array<native_simd<T>,nc> vv;
      for (auto &vvv:vv) vvv=0;
      for (size_t j=0; j<supp; ++j, p+=d0)
        {
        auto p1=p;
        T wtj = wt[j];
        for (size_t k=0; k<supp; ++k, p1+=d1)
          {
          array<native_simd<T>,nc> tvv;
          for (auto &vvv:tvv) vvv=0;
          for (size_t l=0; l<nv; ++l)
            for (size_t c=0; c<nc; ++c)
              tvv[c] += p1[l+nv*c]*psiarr2[l];
          for (size_t c=0; c<nc; ++c)
          vv[c] += (wtj*wp[k])*tvv[c];
          }
        }
      for (size_t c=0; c<nc; ++c)
        res.v(idx,c) = reduce(vv[c], std::plus<>());
      }
Martin Reinecke's avatar
Martin Reinecke committed
401
402
    template<size_t nv, size_t nc> void deinterpol_help0(const T * DUCC0_RESTRICT wt,
      const T * DUCC0_RESTRICT wp, native_simd<T> * DUCC0_RESTRICT p, size_t d0, size_t d1, const native_simd<T> * DUCC0_RESTRICT psiarr2, const mav<T,2> &data, size_t idx) const
Martin Reinecke's avatar
Martin Reinecke committed
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
      {
      array<native_simd<T>,nc> vv;
      for (size_t i=0; i<nc; ++i) vv[i] = data(idx,i);
      for (size_t j=0; j<supp; ++j, p+=d0)
        {
        auto p1=p;
        T wtj = wt[j];
        for (size_t k=0; k<supp; ++k, p1+=d1)
          {
          native_simd<T> wtjwpk = wtj*wp[k];
          for (size_t l=0; l<nv; ++l)
            {
            auto tmp = wtjwpk*psiarr2[l];
            for (size_t c=0; c<nc; ++c)
              p1[l+nv*c] += vv[c]*tmp;
            }
          }
        }
      }
#endif

424
    void interpol (const mav<T,2> &ptg, mav<T,2> &res) const
425
      {
426
427
428
#ifdef SIMD_INTERPOL
      constexpr size_t vl=native_simd<T>::size();
      MR_assert(scube.stride(3)==1, "bad stride");
Martin Reinecke's avatar
Martin Reinecke committed
429
      size_t nv=scube.shape(3);
430
#endif
431
      MR_assert(!adjoint, "cannot be called in adjoint mode");
432
433
      MR_assert(ptg.shape(0)==res.shape(0), "dimension mismatch");
      MR_assert(ptg.shape(1)==3, "second dimension must have length 3");
434
      MR_assert(res.shape(1)==ncomp, "# of components mismatch");
435
436
437
      T delta = T(2)/supp;
      T xdtheta = T((ntheta-1)/pi),
        xdphi = T(nphi/(2*pi));
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
438
      auto idx = getIdx(ptg);
Martin Reinecke's avatar
Martin Reinecke committed
439
      execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched)
440
        {
Martin Reinecke's avatar
Martin Reinecke committed
441
442
443
444
445
446
        union {
          native_simd<T> simd[64/vl];
          T scalar[64];
          } kdata;
        T *wt(kdata.scalar), *wp(kdata.scalar+supp);
        size_t nvec = (2*supp+vl-1)/vl;
Martin Reinecke's avatar
Martin Reinecke committed
447
448
        for (auto &v: kdata.simd) v=0;
        MR_assert(2*supp<=64, "support too large");
Martin Reinecke's avatar
Martin Reinecke committed
449
        vector<T> psiarr(2*kmax+1);
450
451
452
453
#ifdef SIMD_INTERPOL
        vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
        for (auto &v:psiarr2) v=0;
#endif
Martin Reinecke's avatar
Martin Reinecke committed
454
        while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
455
          {
Martin Reinecke's avatar
Martin Reinecke committed
456
          size_t i=idx[ind];
457
458
          T f0=T(0.5*supp+ptg(i,0)*xdtheta);
          size_t i0 = size_t(f0+T(1));
Martin Reinecke's avatar
Martin Reinecke committed
459
          for (size_t t=0; t<supp; ++t)
Martin Reinecke's avatar
Martin Reinecke committed
460
            wt[t] = (t+i0-f0)*delta - 1;
461
          T f1=T(0.5)*supp+ptg(i,1)*xdphi;
Martin Reinecke's avatar
Martin Reinecke committed
462
463
          size_t i1 = size_t(f1+1.);
          for (size_t t=0; t<supp; ++t)
Martin Reinecke's avatar
Martin Reinecke committed
464
465
466
            wp[t] = (t+i1-f1)*delta - 1;
          for (size_t t=0; t<nvec; ++t)
            kdata.simd[t] = kernel(kdata.simd[t]);
Martin Reinecke's avatar
Martin Reinecke committed
467
          psiarr[0]=1.;
468
469
          double psi=ptg(i,2);
          double cpsi=cos(psi), spsi=sin(psi);
Martin Reinecke's avatar
Martin Reinecke committed
470
471
472
          double cnpsi=cpsi, snpsi=spsi;
          for (size_t l=1; l<=kmax; ++l)
            {
473
474
            psiarr[2*l-1]=T(cnpsi);
            psiarr[2*l]=T(snpsi);
Martin Reinecke's avatar
Martin Reinecke committed
475
476
477
478
            const double tmp = snpsi*cpsi + cnpsi*spsi;
            cnpsi=cnpsi*cpsi - snpsi*spsi;
            snpsi=tmp;
            }
479
480
481
482
#ifdef SIMD_INTERPOL
          memcpy(reinterpret_cast<T *>(psiarr2.data()), psiarr.data(),
            (2*kmax+1)*sizeof(T));
#endif
483
484
          if (ncomp==1)
            {
485
#ifndef SIMD_INTERPOL
486
            T vv=0;
487
488
489
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                for (size_t l=0; l<2*kmax+1; ++l)
490
                  vv += cube(i0+j,i1+k,0,l)*wt[j]*wp[k]*psiarr[l];
491
            res.v(i,0) = vv;
492
493
494
495
#else
            const native_simd<T> *p=&scube(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
496
            switch (nv)
497
              {
Martin Reinecke's avatar
Martin Reinecke committed
498
499
#ifdef SPECIAL_CASING
              case 1:
Martin Reinecke's avatar
Martin Reinecke committed
500
                interpol_help0<1,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
501
502
                break;
              case 2:
Martin Reinecke's avatar
Martin Reinecke committed
503
                interpol_help0<2,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
504
505
                break;
              case 3:
Martin Reinecke's avatar
Martin Reinecke committed
506
                interpol_help0<3,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
507
508
                break;
              case 4:
Martin Reinecke's avatar
Martin Reinecke committed
509
                interpol_help0<4,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
510
511
                break;
              case 5:
Martin Reinecke's avatar
Martin Reinecke committed
512
                interpol_help0<5,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
513
514
                break;
              case 6:
Martin Reinecke's avatar
Martin Reinecke committed
515
                interpol_help0<6,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
516
517
                break;
              case 7:
Martin Reinecke's avatar
Martin Reinecke committed
518
                interpol_help0<7,1>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
519
520
521
                break;
#endif
              default:
522
                {
Martin Reinecke's avatar
Martin Reinecke committed
523
524
525
526
527
528
529
530
531
532
533
534
535
536
                native_simd<T> vv=0.;
                for (size_t j=0; j<supp; ++j, p+=d0)
                  {
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> tvv=0;
                    for (size_t l=0; l<nv; ++l)
                      tvv += p1[l]*psiarr2[l];
                    vv += wtj*wp[k]*tvv;
                    }
                  }
                res.v(i,0) = reduce(vv, std::plus<>());
537
538
539
                }
              }
#endif
540
541
542
            }
          else // ncomp==3
            {
543
#ifndef SIMD_INTERPOL
544
            T v0=0., v1=0., v2=0.;
545
546
547
548
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                for (size_t l=0; l<2*kmax+1; ++l)
                  {
549
                  auto tmp = wt[j]*wp[k]*psiarr[l];
550
551
552
                  v0 += cube(i0+j,i1+k,0,l)*tmp;
                  v1 += cube(i0+j,i1+k,1,l)*tmp;
                  v2 += cube(i0+j,i1+k,2,l)*tmp;
553
554
555
556
                  }
            res.v(i,0) = v0;
            res.v(i,1) = v1;
            res.v(i,2) = v2;
557
558
559
560
#else
            const native_simd<T> *p=&scube(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
561
            switch (nv)
562
              {
Martin Reinecke's avatar
Martin Reinecke committed
563
564
#ifdef SPECIAL_CASING
              case 1:
Martin Reinecke's avatar
Martin Reinecke committed
565
                interpol_help0<1,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
566
567
                break;
              case 2:
Martin Reinecke's avatar
Martin Reinecke committed
568
                interpol_help0<2,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
569
570
                break;
              case 3:
Martin Reinecke's avatar
Martin Reinecke committed
571
                interpol_help0<3,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
572
573
                break;
              case 4:
Martin Reinecke's avatar
Martin Reinecke committed
574
                interpol_help0<4,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
575
576
                break;
              case 5:
Martin Reinecke's avatar
Martin Reinecke committed
577
                interpol_help0<5,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
578
579
                break;
              case 6:
Martin Reinecke's avatar
Martin Reinecke committed
580
                interpol_help0<6,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
581
582
                break;
              case 7:
Martin Reinecke's avatar
Martin Reinecke committed
583
                interpol_help0<7,3>(wt, wp, p, d0, d1, psiarr2.data(), res, i);
Martin Reinecke's avatar
Martin Reinecke committed
584
585
586
                break;
#endif
              default:
587
                {
Martin Reinecke's avatar
Martin Reinecke committed
588
589
590
                ptrdiff_t d2 = scube.stride(2);
                native_simd<T> v0=0., v1=0., v2=0.;
                for (size_t j=0; j<supp; ++j, p+=d0)
591
                  {
Martin Reinecke's avatar
Martin Reinecke committed
592
593
594
595
596
597
598
599
600
601
602
603
604
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> wtjwpk = wtj*wp[k];
                    for (size_t l=0; l<nv; ++l)
                      {
                      auto tmp = wtjwpk*psiarr2[l];
                      v0 += p1[l]*tmp;
                      v1 += p1[l+d2]*tmp;
                      v2 += p1[l+2*d2]*tmp;
                      }
                    }
605
                  }
Martin Reinecke's avatar
Martin Reinecke committed
606
607
608
                res.v(i,0) = reduce(v0, std::plus<>());
                res.v(i,1) = reduce(v1, std::plus<>());
                res.v(i,2) = reduce(v2, std::plus<>());
609
610
611
                }
              }
#endif
612
            }
613
          }
Martin Reinecke's avatar
Martin Reinecke committed
614
        });
615
      }
616

617
618
619
    size_t support() const
      { return supp; }

620
    void deinterpol (const mav<T,2> &ptg, const mav<T,2> &data)
621
      {
622
623
624
#ifdef SIMD_INTERPOL
      constexpr size_t vl=native_simd<T>::size();
      MR_assert(scube.stride(3)==1, "bad stride");
Martin Reinecke's avatar
Martin Reinecke committed
625
626
      MR_assert(scube.stride(2)==ptrdiff_t(scube.shape(3)), "bad stride");
      size_t nv=scube.shape(3);
627
#endif
628
629
630
      MR_assert(adjoint, "can only be called in adjoint mode");
      MR_assert(ptg.shape(0)==data.shape(0), "dimension mismatch");
      MR_assert(ptg.shape(1)==3, "second dimension must have length 3");
631
      MR_assert(data.shape(1)==ncomp, "# of components mismatch");
632
633
634
      T delta = T(2)/supp;
      T xdtheta = T((ntheta-1)/pi),
        xdphi = T(nphi/(2*pi));
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
635
      auto idx = getIdx(ptg);
636
637
638
639
640
641
642

      constexpr size_t cellsize=16;
      size_t nct = ntheta/cellsize+5,
             ncp = nphi/cellsize+5;
      mav<std::mutex,2> locks({nct,ncp});

      execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched)
643
        {
644
        size_t b_theta=99999999999999, b_phi=9999999999999999;
Martin Reinecke's avatar
Martin Reinecke committed
645
646
647
648
649
650
        union {
          native_simd<T> simd[64/vl];
          T scalar[64];
          } kdata;
        T *wt(kdata.scalar), *wp(kdata.scalar+supp);
        size_t nvec = (2*supp+vl-1)/vl;
Martin Reinecke's avatar
Martin Reinecke committed
651
652
        for (auto &v: kdata.simd) v=0;
        MR_assert(2*supp<=64, "support too large");
653
        vector<T> psiarr(2*kmax+1);
654
655
656
657
#ifdef SIMD_INTERPOL
        vector<native_simd<T>> psiarr2((2*kmax+1+vl-1)/vl);
        for (auto &v:psiarr2) v=0;
#endif
658
659
660
        while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
          {
          size_t i=idx[ind];
661
          T f0=T(0.5)*supp+ptg(i,0)*xdtheta;
662
663
          size_t i0 = size_t(f0+1.);
          for (size_t t=0; t<supp; ++t)
Martin Reinecke's avatar
Martin Reinecke committed
664
            wt[t] = (t+i0-f0)*delta - 1;
665
          T f1=T(0.5)*supp+ptg(i,1)*xdphi;
666
667
          size_t i1 = size_t(f1+1.);
          for (size_t t=0; t<supp; ++t)
Martin Reinecke's avatar
Martin Reinecke committed
668
669
670
            wp[t] = (t+i1-f1)*delta - 1;
          for (size_t t=0; t<nvec; ++t)
            kdata.simd[t] = kernel(kdata.simd[t]);
671
672
673
674
675
676
          psiarr[0]=1.;
          double psi=ptg(i,2);
          double cpsi=cos(psi), spsi=sin(psi);
          double cnpsi=cpsi, snpsi=spsi;
          for (size_t l=1; l<=kmax; ++l)
            {
677
678
            psiarr[2*l-1]=T(cnpsi);
            psiarr[2*l]=T(snpsi);
679
680
681
682
            const double tmp = snpsi*cpsi + cnpsi*spsi;
            cnpsi=cnpsi*cpsi - snpsi*spsi;
            snpsi=tmp;
            }
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
          size_t b_theta_new = i0/cellsize,
                 b_phi_new = i1/cellsize;
          if ((b_theta_new!=b_theta) || (b_phi_new!=b_phi))
            {
            if (b_theta<locks.shape(0))  // unlock
              {
              locks.v(b_theta,b_phi).unlock();
              locks.v(b_theta,b_phi+1).unlock();
              locks.v(b_theta+1,b_phi).unlock();
              locks.v(b_theta+1,b_phi+1).unlock();
              }
            b_theta = b_theta_new;
            b_phi = b_phi_new;
            locks.v(b_theta,b_phi).lock();
            locks.v(b_theta,b_phi+1).lock();
            locks.v(b_theta+1,b_phi).lock();
            locks.v(b_theta+1,b_phi+1).lock();
            }
701
702
703
704
#ifdef SIMD_INTERPOL
          memcpy(reinterpret_cast<T *>(psiarr2.data()), psiarr.data(),
            (2*kmax+1)*sizeof(T));
#endif
705
706
          if (ncomp==1)
            {
707
#ifndef SIMD_INTERPOL
708
            T val = data(i,0);
709
710
711
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                for (size_t l=0; l<2*kmax+1; ++l)
712
713
714
715
716
                  cube.v(i0+j,i1+k,0,l) += val*wt[j]*wp[k]*psiarr[l];
#else
            native_simd<T> *p=&scube.v(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
717
            switch (nv)
718
              {
Martin Reinecke's avatar
Martin Reinecke committed
719
720
#ifdef SPECIAL_CASING
              case 1:
Martin Reinecke's avatar
Martin Reinecke committed
721
                deinterpol_help0<1,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
722
723
                break;
              case 2:
Martin Reinecke's avatar
Martin Reinecke committed
724
                deinterpol_help0<2,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
725
726
                break;
              case 3:
Martin Reinecke's avatar
Martin Reinecke committed
727
                deinterpol_help0<3,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
728
729
                break;
              case 4:
Martin Reinecke's avatar
Martin Reinecke committed
730
                deinterpol_help0<4,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
731
732
                break;
              case 5:
Martin Reinecke's avatar
Martin Reinecke committed
733
                deinterpol_help0<5,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
734
735
                break;
              case 6:
Martin Reinecke's avatar
Martin Reinecke committed
736
                deinterpol_help0<6,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
737
738
                break;
              case 7:
Martin Reinecke's avatar
Martin Reinecke committed
739
                deinterpol_help0<7,1>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
740
741
742
                break;
#endif
              default:
743
                {
Martin Reinecke's avatar
Martin Reinecke committed
744
745
746
747
748
749
750
751
752
753
754
755
                native_simd<T> val = data(i,0);
                for (size_t j=0; j<supp; ++j, p+=d0)
                  {
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> tv = wtj*wp[k]*val;
                    for (size_t l=0; l<nv; ++l)
                      p1[l] += tv*psiarr2[l];
                    } 
                  }
756
757
758
                }
              }
#endif
759
760
761
            }
          else // ncomp==3
            {
762
#ifndef SIMD_INTERPOL
763
            T v0=data(i,0), v1=data(i,1), v2=data(i,2);
764
765
766
            for (size_t j=0; j<supp; ++j)
              for (size_t k=0; k<supp; ++k)
                {
767
                T t0 = wt[j]*wp[k];
768
769
                for (size_t l=0; l<2*kmax+1; ++l)
                  {
770
                  T tmp = t0*psiarr[l];
771
772
773
774
775
776
777
778
779
                  cube.v(i0+j,i1+k,0,l) += v0*tmp;
                  cube.v(i0+j,i1+k,1,l) += v1*tmp;
                  cube.v(i0+j,i1+k,2,l) += v2*tmp;
                  }
                }
#else
            native_simd<T> *p=&scube.v(i0,i1,0,0);
            ptrdiff_t d0 = scube.stride(0);
            ptrdiff_t d1 = scube.stride(1);
Martin Reinecke's avatar
Martin Reinecke committed
780
            switch (nv)
781
              {
Martin Reinecke's avatar
Martin Reinecke committed
782
783
#ifdef SPECIAL_CASING
              case 1:
Martin Reinecke's avatar
Martin Reinecke committed
784
                deinterpol_help0<1,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
785
786
                break;
              case 2:
Martin Reinecke's avatar
Martin Reinecke committed
787
                deinterpol_help0<2,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
788
789
                break;
              case 3:
Martin Reinecke's avatar
Martin Reinecke committed
790
                deinterpol_help0<3,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
791
792
                break;
              case 4:
Martin Reinecke's avatar
Martin Reinecke committed
793
                deinterpol_help0<4,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
794
795
                break;
              case 5:
Martin Reinecke's avatar
Martin Reinecke committed
796
                deinterpol_help0<5,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
797
798
                break;
              case 6:
Martin Reinecke's avatar
Martin Reinecke committed
799
                deinterpol_help0<6,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
800
801
                break;
              case 7:
Martin Reinecke's avatar
Martin Reinecke committed
802
                deinterpol_help0<7,3>(wt, wp, p, d0, d1, psiarr2.data(), data, i);
Martin Reinecke's avatar
Martin Reinecke committed
803
804
805
                break;
#endif
              default:
806
                {
Martin Reinecke's avatar
Martin Reinecke committed
807
808
809
                native_simd<T> v0=data(i,0), v1=data(i,1), v2=data(i,2);
                ptrdiff_t d2 = scube.stride(2);
                for (size_t j=0; j<supp; ++j, p+=d0)
810
                  {
Martin Reinecke's avatar
Martin Reinecke committed
811
812
813
814
815
816
817
818
819
820
821
822
823
                  auto p1=p;
                  T wtj = wt[j];
                  for (size_t k=0; k<supp; ++k, p1+=d1)
                    {
                    native_simd<T> wtjwpk = wtj*wp[k];
                    for (size_t l=0; l<nv; ++l)
                      {
                      auto tmp = wtjwpk*psiarr2[l];
                      p1[l] += v0*tmp;
                      p1[l+d2] += v1*tmp;
                      p1[l+2*d2] += v2*tmp;
                      }
                    }
824
825
                  }
                }
826
827
              }
#endif
828
            }
829
          }
830
831
832
833
834
835
836
        if (b_theta<locks.shape(0))  // unlock
          {
          locks.v(b_theta,b_phi).unlock();
          locks.v(b_theta,b_phi+1).unlock();
          locks.v(b_theta+1,b_phi).unlock();
          locks.v(b_theta+1,b_phi+1).unlock();
          }
837
838
        });
      }
839
    void getSlm (const vector<Alm<complex<T>>> &blm, vector<Alm<complex<T>>> &slm)
840
841
      {
      MR_assert(adjoint, "can only be called in adjoint mode");
842
843
      MR_assert((blm.size()==ncomp) || (ncomp==1), "incorrect number of beam a_lm sets");
      MR_assert((slm.size()==ncomp) || (ncomp==1), "incorrect number of sky a_lm sets");
844
845
846
847
848
      Alm<complex<T>> a1(lmax, lmax), a2(lmax,lmax);
      auto ginfo = sharp_make_cc_geom_info(ntheta0,nphi0,0.,cube.stride(1),cube.stride(0));
      auto ainfo = sharp_make_triangular_alm_info(lmax,lmax,1);

      // move stuff from border regions onto the main grid
849
      for (size_t i=0; i<cube.shape(0); ++i)
850
        for (size_t j=0; j<supp; ++j)
851
852
          for (size_t k=0; k<cube.shape(3); ++k)
            for (size_t l=0; l<cube.shape(2); ++l)
853
              {
854
855
              cube.v(i,j+nphi,l,k) += cube(i,j,l,k);
              cube.v(i,j+supp,l,k) += cube(i,j+nphi+supp,l,k);
856
              }
857
858
      for (size_t i=0; i<supp; ++i)
        for (size_t j=0, j2=nphi/2; j<nphi; ++j,++j2)
859
          for (size_t k=0; k<cube.shape(3); ++k)
860
            {
861
            T fct = (((k+1)/2)&1) ? -1 : 1;
862
            if (j2>=nphi) j2-=nphi;
863
            for (size_t l=0; l<cube.shape(2); ++l)
864
              {
865
866
              cube.v(supp+1+i,j+supp,l,k) += fct*cube(supp-1-i,j2+supp,l,k);
              cube.v(supp+ntheta-2-i, j+supp,l,k) += fct*cube(supp+ntheta+i,j2+supp,l,k);
867
              }
868
            }
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
869
870
871

      // special treatment for poles
      for (size_t j=0,j2=nphi/2; j<nphi/2; ++j,++j2)
872
873
        for (size_t k=0; k<cube.shape(3); ++k)
          for (size_t l=0; l<cube.shape(2); ++l)
874
            {
875
            T fct = (((k+1)/2)&1) ? -1 : 1;
876
            if (j2>=nphi) j2-=nphi;
877
878
879
880
881
882
            T tval = (cube(supp,j+supp,l,k) + fct*cube(supp,j2+supp,l,k));
            cube.v(supp,j+supp,l,k) = tval;
            cube.v(supp,j2+supp,l,k) = fct*tval;
            tval = (cube(supp+ntheta-1,j+supp,l,k) + fct*cube(supp+ntheta-1,j2+supp,l,k));
            cube.v(supp+ntheta-1,j+supp,l,k) = tval;
            cube.v(supp+ntheta-1,j2+supp,l,k) = fct*tval;
883
            }
Martin Reinecke's avatar
cleanup    
Martin Reinecke committed
884

885
      vector<T>lnorm(lmax+1);
886
      for (size_t i=0; i<=lmax; ++i)
887
        lnorm[i]=T(std::sqrt(4*pi/(2*i+1.)));
888

889
890
891
      for (size_t j=0; j<blm.size(); ++j)
        slm[j].SetToZero();

892
      for (size_t icomp=0; icomp<ncomp; ++icomp)
893
        {
894
        bool separate = ncomp>1;
895
        {
896
        auto m1 = cube.template subarray<2>({supp,supp,icomp,0},{ntheta,nphi,0,0});
897
898
        decorrect(m1,0);
        sharp_alm2map_adjoint(a1.Alms().vdata(), m1.data(), *ginfo, *ainfo, 0, nthreads);
899
900
        for (size_t m=0; m<=lmax; ++m)
          for (size_t l=m; l<=lmax; ++l)