Commit 7428a83a authored by Martin Reinecke's avatar Martin Reinecke
Browse files

classification, part 1

parent 4c18159c
...@@ -846,8 +846,8 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job) ...@@ -846,8 +846,8 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm); mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
job->norm_l = (job->type==SHARP_ALM2MAP_DERIV1) ? job->norm_l = (job->type==SHARP_ALM2MAP_DERIV1) ?
sharp_Ylmgen_get_d1norm (lmax) : sharp_Ylmgen::get_d1norm (lmax) :
sharp_Ylmgen_get_norm (lmax, job->spin); sharp_Ylmgen::get_norm (lmax, job->spin);
/* clear output arrays if requested */ /* clear output arrays if requested */
init_output (job); init_output (job);
...@@ -881,8 +881,7 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job) ...@@ -881,8 +881,7 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
{ {
sharp_job ljob = *job; sharp_job ljob = *job;
ljob.opcnt=0; ljob.opcnt=0;
sharp_Ylmgen_C generator; sharp_Ylmgen generator(lmax,mmax,ljob.spin);
sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin);
alloc_almtmp(&ljob,lmax); alloc_almtmp(&ljob,lmax);
while (auto rng=sched.getNext()) for(auto mi=rng.lo; mi<rng.hi; ++mi) while (auto rng=sched.getNext()) for(auto mi=rng.lo; mi<rng.hi; ++mi)
...@@ -890,13 +889,12 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job) ...@@ -890,13 +889,12 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
/* alm->alm_tmp where necessary */ /* alm->alm_tmp where necessary */
alm2almtmp (&ljob, lmax, mi); alm2almtmp (&ljob, lmax, mi);
inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, mlim); inner_loop (&ljob, ispair, cth, sth, llim, ulim, generator, mi, mlim);
/* alm_tmp->alm where necessary */ /* alm_tmp->alm where necessary */
almtmp2alm (&ljob, lmax, mi); almtmp2alm (&ljob, lmax, mi);
} }
sharp_Ylmgen_destroy(&generator);
dealloc_almtmp(&ljob); dealloc_almtmp(&ljob);
opcnt+=ljob.opcnt; opcnt+=ljob.opcnt;
...@@ -911,7 +909,6 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job) ...@@ -911,7 +909,6 @@ MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
DEALLOC(sth); DEALLOC(sth);
} /* end of chunk loop */ } /* end of chunk loop */
DEALLOC(job->norm_l);
dealloc_phase (job); dealloc_phase (job);
job->opcnt = opcnt; job->opcnt = opcnt;
job->time=timer(); job->time=timer();
...@@ -929,7 +926,6 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, ...@@ -929,7 +926,6 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
MR_assert((spin>=0)&&(spin<=alm_info->lmax), "bad spin"); MR_assert((spin>=0)&&(spin<=alm_info->lmax), "bad spin");
job->type = type; job->type = type;
job->spin = spin; job->spin = spin;
job->norm_l = NULL;
job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1); job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1); job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
job->ginfo = geom_info; job->ginfo = geom_info;
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
typedef void (*t_inner_loop) (sharp_job *job, const int *ispair, typedef void (*t_inner_loop) (sharp_job *job, const int *ispair,
const double *cth_, const double *sth_, int llim, int ulim, const double *cth_, const double *sth_, int llim, int ulim,
sharp_Ylmgen_C *gen, int mi, const int *mlim); sharp_Ylmgen &gen, int mi, const int *mlim);
typedef int (*t_veclen) (void); typedef int (*t_veclen) (void);
typedef int (*t_max_nvec) (int spin); typedef int (*t_max_nvec) (int spin);
typedef const char *(*t_architecture) (void); typedef const char *(*t_architecture) (void);
...@@ -64,7 +64,7 @@ static int XCONCATX2(have,arch)(void) \ ...@@ -64,7 +64,7 @@ static int XCONCATX2(have,arch)(void) \
\ \
void XCONCATX2(inner_loop,arch) (sharp_job *job, const int *ispair, \ void XCONCATX2(inner_loop,arch) (sharp_job *job, const int *ispair, \
const double *cth_, const double *sth_, int llim, int ulim, \ const double *cth_, const double *sth_, int llim, int ulim, \
sharp_Ylmgen_C *gen, int mi, const int *mlim); \ sharp_Ylmgen &gen, int mi, const int *mlim); \
int XCONCATX2(sharp_veclen,arch) (void); \ int XCONCATX2(sharp_veclen,arch) (void); \
int XCONCATX2(sharp_max_nvec,arch) (int spin); \ int XCONCATX2(sharp_max_nvec,arch) (int spin); \
const char *XCONCATX2(sharp_architecture,arch) (void); const char *XCONCATX2(sharp_architecture,arch) (void);
...@@ -108,7 +108,7 @@ DECL2(avx) ...@@ -108,7 +108,7 @@ DECL2(avx)
#pragma GCC visibility push(hidden) #pragma GCC visibility push(hidden)
void inner_loop (sharp_job *job, const int *ispair,const double *cth, void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen &gen, int mi,
const int *mlim) const int *mlim)
{ {
if (!inner_loop_) assign_funcs(); if (!inner_loop_) assign_funcs();
......
...@@ -116,7 +116,7 @@ static inline void Tvnormalize (Tv * MRUTIL_RESTRICT val, Tv * MRUTIL_RESTRICT s ...@@ -116,7 +116,7 @@ static inline void Tvnormalize (Tv * MRUTIL_RESTRICT val, Tv * MRUTIL_RESTRICT s
} }
} }
static void mypow(Tv val, int npow, const double * MRUTIL_RESTRICT powlimit, static void mypow(Tv val, int npow, const vector<double> &powlimit,
Tv * MRUTIL_RESTRICT resd, Tv * MRUTIL_RESTRICT ress) Tv * MRUTIL_RESTRICT resd, Tv * MRUTIL_RESTRICT ress)
{ {
Tv vminv=powlimit[npow]; Tv vminv=powlimit[npow];
...@@ -157,7 +157,7 @@ static void mypow(Tv val, int npow, const double * MRUTIL_RESTRICT powlimit, ...@@ -157,7 +157,7 @@ static void mypow(Tv val, int npow, const double * MRUTIL_RESTRICT powlimit,
} }
static inline void getCorfac(Tv scale, Tv * MRUTIL_RESTRICT corfac, static inline void getCorfac(Tv scale, Tv * MRUTIL_RESTRICT corfac,
const double * MRUTIL_RESTRICT cf) const vector<double> &cf)
{ {
typedef union typedef union
{ Tv v; double s[VLEN]; } Tvu; { Tv v; double s[VLEN]; } Tvu;
...@@ -183,17 +183,17 @@ static inline bool rescale(Tv * MRUTIL_RESTRICT v1, Tv * MRUTIL_RESTRICT v2, Tv ...@@ -183,17 +183,17 @@ static inline bool rescale(Tv * MRUTIL_RESTRICT v1, Tv * MRUTIL_RESTRICT v2, Tv
return false; return false;
} }
MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen &gen,
s0data_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int * MRUTIL_RESTRICT il_, int nv2) s0data_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int * MRUTIL_RESTRICT il_, int nv2)
{ {
int l=gen->m, il=0; int l=gen.m, il=0;
Tv mfac = (gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]; Tv mfac = (gen.m&1) ? -gen.mfac[gen.m]:gen.mfac[gen.m];
Tv limscale=sharp_limscale; Tv limscale=sharp_limscale;
int below_limit = 1; int below_limit = 1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
d->lam1[i]=0; d->lam1[i]=0;
mypow(d->sth[i],gen->m,gen->powlimit,&d->lam2[i],&d->scale[i]); mypow(d->sth[i],gen.m,gen.powlimit,&d->lam2[i],&d->scale[i]);
d->lam2[i] *= mfac; d->lam2[i] *= mfac;
Tvnormalize(&d->lam2[i],&d->scale[i],sharp_ftol); Tvnormalize(&d->lam2[i],&d->scale[i],sharp_ftol);
below_limit &= all_of(d->scale[i]<limscale); below_limit &= all_of(d->scale[i]<limscale);
...@@ -201,10 +201,10 @@ MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT ...@@ -201,10 +201,10 @@ MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT
while (below_limit) while (below_limit)
{ {
if (l+4>gen->lmax) {*l_=gen->lmax+1;return;} if (l+4>gen.lmax) {*l_=gen.lmax+1;return;}
below_limit=1; below_limit=1;
Tv a1=gen->coef[il ].a, b1=gen->coef[il ].b; Tv a1=gen.coef[il ].a, b1=gen.coef[il ].b;
Tv a2=gen->coef[il+1].a, b2=gen->coef[il+1].b; Tv a2=gen.coef[il+1].a, b2=gen.coef[il+1].b;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
d->lam1[i] = (a1*d->csq[i] + b1)*d->lam2[i] + d->lam1[i]; d->lam1[i] = (a1*d->csq[i] + b1)*d->lam2[i] + d->lam1[i];
...@@ -218,7 +218,7 @@ MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT ...@@ -218,7 +218,7 @@ MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT
} }
MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d, MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d,
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, const dcmplx * MRUTIL_RESTRICT alm, const vector<sharp_ylmgen_dbl2> &coef, const dcmplx * MRUTIL_RESTRICT alm,
int l, int il, int lmax, int nv2) int l, int il, int lmax, int nv2)
{ {
if (nv2==nv0) if (nv2==nv0)
...@@ -290,21 +290,21 @@ MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d, ...@@ -290,21 +290,21 @@ MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d,
} }
MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job, MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job,
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth) const sharp_Ylmgen &gen, s0data_v * MRUTIL_RESTRICT d, int nth)
{ {
int l,il,lmax=gen->lmax; int l,il,lmax=gen.lmax;
int nv2 = (nth+VLEN-1)/VLEN; int nv2 = (nth+VLEN-1)/VLEN;
iter_to_ieee(gen, d, &l, &il, nv2); iter_to_ieee(gen, d, &l, &il, nv2);
job->opcnt += il * 4*nth; job->opcnt += il * 4*nth;
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 6*nth; job->opcnt += (lmax+1-l) * 6*nth;
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef; auto &coef = gen.coef;
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp; const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
getCorfac(d->scale[i], &d->corfac[i], gen->cf); getCorfac(d->scale[i], &d->corfac[i], gen.cf);
full_ieee &= all_of(d->scale[i]>=sharp_minscale); full_ieee &= all_of(d->scale[i]>=sharp_minscale);
} }
...@@ -324,7 +324,7 @@ MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job, ...@@ -324,7 +324,7 @@ MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job,
d->lam1[i] = d->lam2[i]; d->lam1[i] = d->lam2[i];
d->lam2[i] = tmp; d->lam2[i] = tmp;
if (rescale(&d->lam1[i], &d->lam2[i], &d->scale[i], sharp_ftol)) if (rescale(&d->lam1[i], &d->lam2[i], &d->scale[i], sharp_ftol))
getCorfac(d->scale[i], &d->corfac[i], gen->cf); getCorfac(d->scale[i], &d->corfac[i], gen.cf);
full_ieee &= all_of(d->scale[i]>=sharp_minscale); full_ieee &= all_of(d->scale[i]>=sharp_minscale);
} }
l+=2; ++il; l+=2; ++il;
...@@ -340,7 +340,7 @@ MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job, ...@@ -340,7 +340,7 @@ MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job,
} }
MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d, MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d,
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, dcmplx * MRUTIL_RESTRICT alm, int l, const vector<sharp_ylmgen_dbl2> &coef, dcmplx * MRUTIL_RESTRICT alm, int l,
int il, int lmax, int nv2) int il, int lmax, int nv2)
{ {
for (; l<=lmax-2; il+=2, l+=4) for (; l<=lmax-2; il+=2, l+=4)
...@@ -384,21 +384,21 @@ MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d, ...@@ -384,21 +384,21 @@ MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d,
} }
MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job, MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job,
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth) const sharp_Ylmgen &gen, s0data_v * MRUTIL_RESTRICT d, int nth)
{ {
int l,il,lmax=gen->lmax; int l,il,lmax=gen.lmax;
int nv2 = (nth+VLEN-1)/VLEN; int nv2 = (nth+VLEN-1)/VLEN;
iter_to_ieee(gen, d, &l, &il, nv2); iter_to_ieee(gen, d, &l, &il, nv2);
job->opcnt += il * 4*nth; job->opcnt += il * 4*nth;
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 6*nth; job->opcnt += (lmax+1-l) * 6*nth;
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef; auto &coef = gen.coef;
dcmplx * MRUTIL_RESTRICT alm=job->almtmp; dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
getCorfac(d->scale[i], &d->corfac[i], gen->cf); getCorfac(d->scale[i], &d->corfac[i], gen.cf);
full_ieee &= all_of(d->scale[i]>=sharp_minscale); full_ieee &= all_of(d->scale[i]>=sharp_minscale);
} }
...@@ -417,7 +417,7 @@ MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job, ...@@ -417,7 +417,7 @@ MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job,
d->lam1[i] = d->lam2[i]; d->lam1[i] = d->lam2[i];
d->lam2[i] = tmp; d->lam2[i] = tmp;
if (rescale(&d->lam1[i], &d->lam2[i], &d->scale[i], sharp_ftol)) if (rescale(&d->lam1[i], &d->lam2[i], &d->scale[i], sharp_ftol))
getCorfac(d->scale[i], &d->corfac[i], gen->cf); getCorfac(d->scale[i], &d->corfac[i], gen.cf);
full_ieee &= all_of(d->scale[i]>=sharp_minscale); full_ieee &= all_of(d->scale[i]>=sharp_minscale);
} }
vhsum_cmplx_special (atmp[0], atmp[1], atmp[2], atmp[3], &alm[l]); vhsum_cmplx_special (atmp[0], atmp[1], atmp[2], atmp[3], &alm[l]);
...@@ -433,12 +433,12 @@ MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job, ...@@ -433,12 +433,12 @@ MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job,
map2alm_kernel(d, coef, alm, l, il, lmax, nv2); map2alm_kernel(d, coef, alm, l, il, lmax, nv2);
} }
MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen &gen,
sxdata_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int nv2) sxdata_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int nv2)
{ {
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef; const auto &fx = gen.coef;
Tv prefac=gen->prefac[gen->m], Tv prefac=gen.prefac[gen.m],
prescale=gen->fscale[gen->m]; prescale=gen.fscale[gen.m];
Tv limscale=sharp_limscale; Tv limscale=sharp_limscale;
int below_limit=1; int below_limit=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
...@@ -450,10 +450,10 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES ...@@ -450,10 +450,10 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES
where(mask&(d->cth[i]<0),sth2)*=-1.; where(mask&(d->cth[i]<0),sth2)*=-1.;
Tv ccp, ccps, ssp, ssps, csp, csps, scp, scps; Tv ccp, ccps, ssp, ssps, csp, csps, scp, scps;
mypow(cth2,gen->cosPow,gen->powlimit,&ccp,&ccps); mypow(cth2,gen.cosPow,gen.powlimit,&ccp,&ccps);
mypow(sth2,gen->sinPow,gen->powlimit,&ssp,&ssps); mypow(sth2,gen.sinPow,gen.powlimit,&ssp,&ssps);
mypow(cth2,gen->sinPow,gen->powlimit,&csp,&csps); mypow(cth2,gen.sinPow,gen.powlimit,&csp,&csps);
mypow(sth2,gen->cosPow,gen->powlimit,&scp,&scps); mypow(sth2,gen.cosPow,gen.powlimit,&scp,&scps);
d->l1p[i] = 0; d->l1p[i] = 0;
d->l1m[i] = 0; d->l1m[i] = 0;
...@@ -467,11 +467,11 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES ...@@ -467,11 +467,11 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES
d->scp[i] += ssps; d->scp[i] += ssps;
d->l2m[i] *= scp; d->l2m[i] *= scp;
d->scm[i] += scps; d->scm[i] += scps;
if (gen->preMinus_p) if (gen.preMinus_p)
d->l2p[i] = -d->l2p[i]; d->l2p[i] = -d->l2p[i];
if (gen->preMinus_m) if (gen.preMinus_m)
d->l2m[i] = -d->l2m[i]; d->l2m[i] = -d->l2m[i];
if (gen->s&1) if (gen.s&1)
d->l2p[i] = -d->l2p[i]; d->l2p[i] = -d->l2p[i];
Tvnormalize(&d->l2m[i],&d->scm[i],sharp_ftol); Tvnormalize(&d->l2m[i],&d->scm[i],sharp_ftol);
...@@ -481,11 +481,11 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES ...@@ -481,11 +481,11 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES
all_of(d->scp[i]<limscale); all_of(d->scp[i]<limscale);
} }
int l=gen->mhi; int l=gen.mhi;
while (below_limit) while (below_limit)
{ {
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;} if (l+2>gen.lmax) {*l_=gen.lmax+1;return;}
below_limit=1; below_limit=1;
Tv fx10=fx[l+1].a,fx11=fx[l+1].b; Tv fx10=fx[l+1].a,fx11=fx[l+1].b;
Tv fx20=fx[l+2].a,fx21=fx[l+2].b; Tv fx20=fx[l+2].a,fx21=fx[l+2].b;
...@@ -507,7 +507,7 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES ...@@ -507,7 +507,7 @@ MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RES
} }
MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d, MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm, const vector<sharp_ylmgen_dbl2> &fx, const dcmplx * MRUTIL_RESTRICT alm,
int l, int lmax, int nv2) int l, int lmax, int nv2)
{ {
int lsave = l; int lsave = l;
...@@ -563,22 +563,22 @@ MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d, ...@@ -563,22 +563,22 @@ MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
} }
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job, MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job,
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth) const sharp_Ylmgen &gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
{ {
int l,lmax=gen->lmax; int l,lmax=gen.lmax;
int nv2 = (nth+VLEN-1)/VLEN; int nv2 = (nth+VLEN-1)/VLEN;
iter_to_ieee_spin(gen, d, &l, nv2); iter_to_ieee_spin(gen, d, &l, nv2);
job->opcnt += (l-gen->mhi) * 7*nth; job->opcnt += (l-gen.mhi) * 7*nth;
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 23*nth; job->opcnt += (lmax+1-l) * 23*nth;
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef; const auto &fx = gen.coef;
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp; const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
getCorfac(d->scp[i], &d->cfp[i], gen->cf); getCorfac(d->scp[i], &d->cfp[i], gen.cf);
getCorfac(d->scm[i], &d->cfm[i], gen->cf); getCorfac(d->scm[i], &d->cfm[i], gen.cf);
full_ieee &= all_of(d->scp[i]>=sharp_minscale) && full_ieee &= all_of(d->scp[i]>=sharp_minscale) &&
all_of(d->scm[i]>=sharp_minscale); all_of(d->scm[i]>=sharp_minscale);
} }
...@@ -613,10 +613,10 @@ MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job, ...@@ -613,10 +613,10 @@ MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job,
d->l2p[i] = (d->cth[i]*fx20 - fx21)*d->l1p[i] - d->l2p[i]; d->l2p[i] = (d->cth[i]*fx20 - fx21)*d->l1p[i] - d->l2p[i];
d->l2m[i] = (d->cth[i]*fx20 + fx21)*d->l1m[i] - d->l2m[i]; d->l2m[i] = (d->cth[i]*fx20 + fx21)*d->l1m[i] - d->l2m[i];
if (rescale(&d->l1p[i], &d->l2p[i], &d->scp[i], sharp_ftol)) if (rescale(&d->l1p[i], &d->l2p[i], &d->scp[i], sharp_ftol))
getCorfac(d->scp[i], &d->cfp[i], gen->cf); getCorfac(d->scp[i], &d->cfp[i], gen.cf);
full_ieee &= all_of(d->scp[i]>=sharp_minscale); full_ieee &= all_of(d->scp[i]>=sharp_minscale);
if (rescale(&d->l1m[i], &d->l2m[i], &d->scm[i], sharp_ftol)) if (rescale(&d->l1m[i], &d->l2m[i], &d->scm[i], sharp_ftol))
getCorfac(d->scm[i], &d->cfm[i], gen->cf); getCorfac(d->scm[i], &d->cfm[i], gen.cf);
full_ieee &= all_of(d->scm[i]>=sharp_minscale); full_ieee &= all_of(d->scm[i]>=sharp_minscale);
} }
l+=2; l+=2;
...@@ -643,7 +643,7 @@ MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job, ...@@ -643,7 +643,7 @@ MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job,
} }
MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d, MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, dcmplx * MRUTIL_RESTRICT alm, const vector<sharp_ylmgen_dbl2> &fx, dcmplx * MRUTIL_RESTRICT alm,
int l, int lmax, int nv2) int l, int lmax, int nv2)
{ {
int lsave=l; int lsave=l;
...@@ -697,22 +697,22 @@ MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d, ...@@ -697,22 +697,22 @@ MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
} }
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job, MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job,
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth) const sharp_Ylmgen &gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
{ {
int l,lmax=gen->lmax; int l,lmax=gen.lmax;
int nv2 = (nth+VLEN-1)/VLEN; int nv2 = (nth+VLEN-1)/VLEN;
iter_to_ieee_spin(gen, d, &l, nv2); iter_to_ieee_spin(gen, d, &l, nv2);
job->opcnt += (l-gen->mhi) * 7*nth; job->opcnt += (l-gen.mhi) * 7*nth;
if (l>lmax) return; if (l>lmax) return;
job->opcnt += (lmax+1-l) * 23*nth; job->opcnt += (lmax+1-l) * 23*nth;
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef; const auto &fx = gen.coef;
dcmplx * MRUTIL_RESTRICT alm=job->almtmp; dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
int full_ieee=1; int full_ieee=1;
for (int i=0; i<nv2; ++i) for (int i=0; i<nv2; ++i)
{ {
getCorfac(d->scp[i], &d->cfp[i], gen->cf); getCorfac(d->scp[i], &d->cfp[i], gen.cf);
getCorfac(d->scm[i], &d->cfm[i], gen->cf); getCorfac(d->scm[i], &d->cfm[i], gen.cf);
full_ieee &= all_of(d->scp[i]>=sharp_minscale) && full_ieee &= all_of(d->scp[i]>=sharp_minscale) &&
all_of(d->scm[i]>=sharp_minscale); all_of(d->scm[i]>=sharp_minscale);
} }
...@@ -750,10 +750,10 @@ MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job, ...@@ -750,10 +750,10 @@ MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job,
d->l2p[i] = (d->cth[i]*fx20 - fx21)*d->l1p[i] - d->l2p[i]; d->l2p[i] = (d->cth[i]*fx20 - fx21)*d->l1p[i] - d->l2p[i];
d->l2m[i] = (d->cth[i]*fx20 + fx21)*d->l1m[i] - d->l2m[i]; d->l2m[i] = (d->cth[i]*fx20 + fx21)*d->l1m[i] - d->l2m[i];
if (rescale(&d->l1p[i], &d->l2p[i], &d->scp[i], sharp_ftol)) if (rescale(&d->l1p[i], &d->l2p[i], &d->scp[i], sharp_ftol))
getCorfac(d->scp[i], &d->cfp[i], gen->cf); getCorfac(d->scp[i], &d->cfp[i], gen.cf);
full_ieee &= all_of(d->scp[i]>=sharp_minscale); full_ieee &= all_of(d->scp[i]>=sharp_minscale);
if (rescale(&d->l1m[i], &d->l2m[i], &d->scm[i], sharp_ftol)) if (rescale(&d->l1m[i], &d->l2m[i], &d->scm[i], sharp_ftol))
getCorfac(d->scm[i], &d->cfm[i], gen->cf); getCorfac(d->scm[i], &d->cfm[i], gen.cf);
full_ieee &= all_of(d->scm[i]>=sharp_minscale); full_ieee &= all_of(d->scm[i]>=sharp_minscale);
} }
vhsum_cmplx_special (agr1,agi1,acr1,aci1,&alm[2*l]); vhsum_cmplx_special (agr1,agi1,acr1,aci1,&alm[2*l]);
...@@ -774,7 +774,7 @@ MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job, ...@@ -774,7 +774,7 @@ MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d, MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d,
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm, const vector<sharp_ylmgen_dbl2> &fx, const dcmplx * MRUTIL_RESTRICT alm,
int l, int lmax, int nv2) int l, int lmax, int nv2)
{ {
int lsave=l; int lsave=l;
...@@ -818,22 +818,22 @@ MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d, ...@@ -818,22 +818,22 @@ MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d,
} }
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job * MRUTIL_RESTRICT job,