Commit 5a11a3b4 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

more type safety

parent dc7877d7
......@@ -197,181 +197,158 @@ struct ringhelper
}
};
MRUTIL_NOINLINE void sharp_job::init_output()
template<typename T>void sharp_job<T>::init_output()
{
if (flags&SHARP_ADD) return;
if (type == SHARP_MAP2ALM)
for (size_t i=0; i<nalm; ++i)
(flags&SHARP_DP) ? ainfo->clear_alm (reinterpret_cast<dcmplx *>(alm[i]))
: ainfo->clear_alm (reinterpret_cast<fcmplx *>(alm[i]));
for (size_t i=0; i<alm.size(); ++i)
ainfo.clear_alm (alm[i]);
else
for (size_t i=0; i<nmaps; ++i)
(flags&SHARP_DP) ? ginfo->clear_map(reinterpret_cast<double *>(map[i]))
: ginfo->clear_map(reinterpret_cast<float *>(map[i]));
for (size_t i=0; i<map.size(); ++i)
ginfo.clear_map(map[i]);
}
MRUTIL_NOINLINE void sharp_job::alloc_phase (size_t nm, size_t ntheta, vector<dcmplx> &data)
MRUTIL_NOINLINE void sharp_protojob::alloc_phase (size_t nm, size_t ntheta, vector<dcmplx> &data)
{
if (type==SHARP_MAP2ALM)
{
s_m=2*nmaps;
s_m=2*nmaps();
if (((s_m*16*nm)&1023)==0) nm+=3; // hack to avoid critical strides
s_th=s_m*nm;
}
else
{
s_th=2*nmaps;
s_th=2*nmaps();
if (((s_th*16*ntheta)&1023)==0) ntheta+=3; // hack to avoid critical strides
s_m=s_th*ntheta;
}
data.resize(2*nmaps*nm*ntheta);
data.resize(2*nmaps()*nm*ntheta);
phase=data.data();
}
void sharp_job::alloc_almtmp (size_t lmax, vector<dcmplx> &data)
void sharp_protojob::alloc_almtmp (size_t lmax, vector<dcmplx> &data)
{
data.resize(nalm*(lmax+2));
data.resize(nalm()*(lmax+2));
almtmp=data.data();
}
MRUTIL_NOINLINE void sharp_job::alm2almtmp (size_t lmax, size_t mi)
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::alm2almtmp (size_t mi)
{
size_t nalm_ = nalm();
size_t lmax = ainfo.lmax();
if (type!=SHARP_MAP2ALM)
{
auto m=ainfo->mval(mi);
auto m=ainfo.mval(mi);
auto lmin=(m<spin) ? spin : m;
if (flags&SHARP_DP)
{
for (size_t i=0; i<nalm; ++i)
ainfo->get_alm(mi, reinterpret_cast<dcmplx **>(alm)[i],almtmp+i,nalm);
for (auto l=m; l<lmin; ++l)
for (size_t i=0; i<nalm; ++i)
almtmp[nalm*l+i] = 0;
for (size_t i=0; i<nalm; ++i)
almtmp[nalm*(lmax+1)+i] = 0;
}
else
{
for (size_t i=0; i<nalm; ++i)
ainfo->get_alm(mi, reinterpret_cast<fcmplx **>(alm)[i],almtmp+i,nalm);
for (auto l=m; l<lmin; ++l)
for (size_t i=0; i<nalm; ++i)
almtmp[nalm*l+i] = 0;
for (size_t i=0; i<nalm; ++i)
almtmp[nalm*(lmax+1)+i] = 0;
}
for (size_t i=0; i<nalm_; ++i)
ainfo.get_alm(mi, alm[i], almtmp+i, nalm_);
for (auto l=m; l<lmin; ++l)
for (size_t i=0; i<nalm_; ++i)
almtmp[nalm_*l+i] = 0;
for (size_t i=0; i<nalm_; ++i)
almtmp[nalm_*(lmax+1)+i] = 0;
if (spin>0)
for (auto l=lmin; l<=lmax; ++l)
for (size_t i=0; i<nalm; ++i)
almtmp[nalm*l+i] *= norm_l[l];
for (size_t i=0; i<nalm_; ++i)
almtmp[nalm_*l+i] *= norm_l[l];
}
else
for (size_t i=nalm*ainfo->mval(mi); i<nalm*(lmax+2); ++i)
for (size_t i=nalm_*ainfo.mval(mi); i<nalm_*(lmax+2); ++i)
almtmp[i]=0;
}
MRUTIL_NOINLINE void sharp_job::almtmp2alm (size_t lmax, size_t mi)
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::almtmp2alm (size_t mi)
{
if (type != SHARP_MAP2ALM) return;
auto m=ainfo->mval(mi);
size_t lmax = ainfo.lmax();
auto m=ainfo.mval(mi);
auto lmin=(m<spin) ? spin : m;
size_t nalm_ = nalm();
if (spin>0)
for (auto l=lmin; l<=lmax; ++l)
for (size_t i=0; i<nalm; ++i)
almtmp[nalm*l+i] *= norm_l[l];
if (flags&SHARP_DP)
for (size_t i=0; i<nalm; ++i)
ainfo->add_alm(mi, almtmp+i, reinterpret_cast<dcmplx **>(alm)[i],nalm);
else
for (size_t i=0; i<nalm; ++i)
ainfo->add_alm(mi, almtmp+i, reinterpret_cast<fcmplx **>(alm)[i],nalm);
for (size_t i=0; i<nalm_; ++i)
almtmp[nalm_*l+i] *= norm_l[l];
for (size_t i=0; i<nalm_; ++i)
ainfo.add_alm(mi, almtmp+i, alm[i], nalm_);
}
MRUTIL_NOINLINE void sharp_job::ringtmp2ring (size_t iring,
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::ringtmp2ring (size_t iring,
const vector<double> &ringtmp, ptrdiff_t rstride)
{
if (flags & SHARP_DP)
for (size_t i=0; i<nmaps; ++i)
ginfo->add_ring(flags&SHARP_USE_WEIGHTS, iring, &ringtmp[i*rstride+1], ((double **)map)[i]);
else
for (size_t i=0; i<nmaps; ++i)
ginfo->add_ring(flags&SHARP_USE_WEIGHTS, iring, &ringtmp[i*rstride+1], ((float **)map)[i]);
for (size_t i=0; i<nmaps(); ++i)
ginfo.add_ring(flags&SHARP_USE_WEIGHTS, iring, &ringtmp[i*rstride+1], map[i]);
}
MRUTIL_NOINLINE void sharp_job::ring2ringtmp (size_t iring,
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::ring2ringtmp (size_t iring,
vector<double> &ringtmp, ptrdiff_t rstride)
{
if (flags & SHARP_DP)
for (size_t i=0; i<nmaps; ++i)
ginfo->get_ring(flags&SHARP_USE_WEIGHTS, iring, ((double **)map)[i], &ringtmp[i*rstride+1]);
else
for (size_t i=0; i<nmaps; ++i)
ginfo->get_ring(flags&SHARP_USE_WEIGHTS, iring, ((float **)map)[i], &ringtmp[i*rstride+1]);
for (size_t i=0; i<nmaps(); ++i)
ginfo.get_ring(flags&SHARP_USE_WEIGHTS, iring, map[i], &ringtmp[i*rstride+1]);
}
//FIXME: set phase to zero if not SHARP_MAP2ALM?
MRUTIL_NOINLINE void sharp_job::map2phase (size_t mmax, size_t llim, size_t ulim)
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::map2phase (size_t mmax, size_t llim, size_t ulim)
{
if (type != SHARP_MAP2ALM) return;
int pstride = s_m;
mr::execDynamic(ulim-llim, 0, 1, [&](mr::Scheduler &sched)
{
ringhelper helper;
int rstride=ginfo->nphmax()+2;
vector<double> ringtmp(nmaps*rstride);
int rstride=ginfo.nphmax()+2;
vector<double> ringtmp(nmaps()*rstride);
while (auto rng=sched.getNext()) for(auto ith=rng.lo+llim; ith<rng.hi+llim; ++ith)
{
int dim2 = s_th*(ith-llim);
ring2ringtmp(ginfo->pair(ith).r1,ringtmp,rstride);
for (size_t i=0; i<nmaps; ++i)
helper.ring2phase (*ginfo, ginfo->pair(ith).r1,
ring2ringtmp(ginfo.pair(ith).r1,ringtmp,rstride);
for (size_t i=0; i<nmaps(); ++i)
helper.ring2phase (ginfo, ginfo.pair(ith).r1,
&ringtmp[i*rstride],mmax,&phase[dim2+2*i],pstride);
if (ginfo->pair(ith).r2!=~size_t(0))
if (ginfo.pair(ith).r2!=~size_t(0))
{
ring2ringtmp(ginfo->pair(ith).r2,ringtmp,rstride);
for (size_t i=0; i<nmaps; ++i)
helper.ring2phase (*ginfo, ginfo->pair(ith).r2,
ring2ringtmp(ginfo.pair(ith).r2,ringtmp,rstride);
for (size_t i=0; i<nmaps(); ++i)
helper.ring2phase (ginfo, ginfo.pair(ith).r2,
&ringtmp[i*rstride],mmax,&phase[dim2+2*i+1],pstride);
}
}
}); /* end of parallel region */
}
MRUTIL_NOINLINE void sharp_job::phase2map (size_t mmax, size_t llim, size_t ulim)
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::phase2map (size_t mmax, size_t llim, size_t ulim)
{
if (type == SHARP_MAP2ALM) return;
int pstride = s_m;
mr::execDynamic(ulim-llim, 0, 1, [&](mr::Scheduler &sched)
{
ringhelper helper;
int rstride=ginfo->nphmax()+2;
vector<double> ringtmp(nmaps*rstride);
int rstride=ginfo.nphmax()+2;
vector<double> ringtmp(nmaps()*rstride);
while (auto rng=sched.getNext()) for(auto ith=rng.lo+llim; ith<rng.hi+llim; ++ith)
{
int dim2 = s_th*(ith-llim);
for (size_t i=0; i<nmaps; ++i)
helper.phase2ring (*ginfo, ginfo->pair(ith).r1,
for (size_t i=0; i<nmaps(); ++i)
helper.phase2ring (ginfo, ginfo.pair(ith).r1,
&ringtmp[i*rstride],mmax,&phase[dim2+2*i],pstride);
ringtmp2ring(ginfo->pair(ith).r1,ringtmp,rstride);
if (ginfo->pair(ith).r2!=~size_t(0))
ringtmp2ring(ginfo.pair(ith).r1,ringtmp,rstride);
if (ginfo.pair(ith).r2!=~size_t(0))
{
for (size_t i=0; i<nmaps; ++i)
helper.phase2ring (*ginfo, ginfo->pair(ith).r2,
for (size_t i=0; i<nmaps(); ++i)
helper.phase2ring (ginfo, ginfo.pair(ith).r2,
&ringtmp[i*rstride],mmax,&phase[dim2+2*i+1],pstride);
ringtmp2ring(ginfo->pair(ith).r2,ringtmp,rstride);
ringtmp2ring(ginfo.pair(ith).r2,ringtmp,rstride);
}
}
}); /* end of parallel region */
}
MRUTIL_NOINLINE void sharp_job::execute()
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::execute()
{
mr::SimpleTimer timer;
opcnt=0;
size_t lmax = ainfo->lmax(),
mmax = ainfo->mmax();
size_t lmax = ainfo.lmax(),
mmax = ainfo.mmax();
norm_l = (type==SHARP_ALM2MAP_DERIV1) ?
sharp_Ylmgen::get_d1norm (lmax) :
......@@ -381,7 +358,7 @@ MRUTIL_NOINLINE void sharp_job::execute()
init_output();
int nchunks, chunksize;
get_chunk_info(ginfo->npairs(),sharp_veclen()*sharp_max_nvec(spin),
get_chunk_info(ginfo.npairs(),sharp_veclen()*sharp_max_nvec(spin),
nchunks,chunksize);
vector<dcmplx> phasebuffer;
//FIXME: needs to be changed to "nm"
......@@ -391,22 +368,22 @@ MRUTIL_NOINLINE void sharp_job::execute()
/* chunk loop */
for (int chunk=0; chunk<nchunks; ++chunk)
{
size_t llim=chunk*chunksize, ulim=min(llim+chunksize,ginfo->npairs());
size_t llim=chunk*chunksize, ulim=min(llim+chunksize,ginfo.npairs());
vector<int> ispair(ulim-llim);
vector<size_t> mlim(ulim-llim);
vector<double> cth(ulim-llim), sth(ulim-llim);
for (size_t i=0; i<ulim-llim; ++i)
{
ispair[i] = ginfo->pair(i+llim).r2!=~size_t(0);
cth[i] = ginfo->cth(ginfo->pair(i+llim).r1);
sth[i] = ginfo->sth(ginfo->pair(i+llim).r1);
ispair[i] = ginfo.pair(i+llim).r2!=~size_t(0);
cth[i] = ginfo.cth(ginfo.pair(i+llim).r1);
sth[i] = ginfo.sth(ginfo.pair(i+llim).r1);
mlim[i] = sharp_get_mlim(lmax, spin, sth[i], cth[i]);
}
/* map->phase where necessary */
map2phase(mmax, llim, ulim);
mr::execDynamic(ainfo->nm(), 0, 1, [&](mr::Scheduler &sched)
mr::execDynamic(ainfo.nm(), 0, 1, [&](mr::Scheduler &sched)
{
sharp_job ljob = *this;
ljob.opcnt=0;
......@@ -417,12 +394,12 @@ MRUTIL_NOINLINE void sharp_job::execute()
while (auto rng=sched.getNext()) for(auto mi=rng.lo; mi<rng.hi; ++mi)
{
/* alm->alm_tmp where necessary */
ljob.alm2almtmp(lmax, mi);
ljob.alm2almtmp(mi);
inner_loop (ljob, ispair.data(), cth.data(), sth.data(), llim, ulim, generator, mi, mlim.data());
/* alm_tmp->alm where necessary */
ljob.almtmp2alm(lmax, mi);
ljob.almtmp2alm(mi);
}
a_opcnt+=ljob.opcnt;
......@@ -436,41 +413,36 @@ MRUTIL_NOINLINE void sharp_job::execute()
time=timer();
}
void sharp_job::build_common (sharp_jobtype type,
size_t spin, void *alm, void *map, const sharp_geom_info &geom_info,
const sharp_alm_info &alm_info, size_t flags)
template<typename T> sharp_job<T>::sharp_job (sharp_jobtype type_,
size_t spin_, const vector<complex<T> *> &alm_, const vector<T *> &map_,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info, size_t flags_)
: sharp_protojob(type_, spin_, geom_info, alm_info, flags_), alm(alm_), map(map_)
{
if (type==SHARP_ALM2MAP_DERIV1) spin=1;
if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
MR_assert(spin<=alm_info.lmax(), "bad spin");
this->type = type;
this->spin = spin;
nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
ginfo = &geom_info;
ainfo = &alm_info;
this->flags = flags;
time = 0.;
opcnt = 0;
this->alm=(void **)alm;
this->map=(void **)map;
MR_assert(alm.size()==nalm(), "incorrect # of a_lm components");
MR_assert(map.size()==nmaps(), "incorrect # of a_lm components");
}
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
template<typename T> void sharp_execute (sharp_jobtype type, int spin, const vector<complex<T> *> &alm,
const vector<T *> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
int flags, double *time, unsigned long long *opcnt)
{
sharp_job job;
job.build_common (type, spin, alm, map, geom_info, alm_info, flags);
sharp_job job(type, spin, alm, map, geom_info, alm_info, flags);
job.execute();
if (time!=nullptr) *time = job.time;
if (opcnt!=nullptr) *opcnt = job.opcnt;
}
template void sharp_execute<double> (sharp_jobtype type, int spin, const vector<complex<double> *> &alm,
const vector<double *> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
int flags, double *time, unsigned long long *opcnt);
template void sharp_execute<float> (sharp_jobtype type, int spin, const vector<complex<float> *> &alm,
const vector<float *> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
int flags, double *time, unsigned long long *opcnt);
void sharp_set_chunksize_min(int new_chunksize_min)
{ chunksize_min=new_chunksize_min; }
void sharp_set_nchunks_max(int new_nchunks_max)
......
......@@ -99,8 +99,7 @@ enum sharp_jobtype { SHARP_YtW=0, /*!< analysis */
};
/*! Job flags */
enum sharp_jobflags { SHARP_DP = 1<<4,
/*!< map and a_lm are in double precision */
enum sharp_jobflags {
SHARP_ADD = 1<<5,
/*!< results are added to the output arrays, instead of
overwriting them */
......@@ -132,7 +131,8 @@ enum sharp_jobflags { SHARP_DP = 1<<4,
(in seconds) will be written here.
\param opcnt If not nullptr, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
template<typename T> void sharp_execute (sharp_jobtype type, int spin, const std::vector<std::complex<T> *> &alm,
const std::vector<T *> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
int flags, double *time, unsigned long long *opcnt);
......
......@@ -31,7 +31,7 @@
#undef GENERIC_ARCH
#undef ARCH
using t_inner_loop = void (*) (sharp_job &job, const int *ispair,
using t_inner_loop = void (*) (sharp_protojob &job, const int *ispair,
const double *cth_, const double *sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const size_t *mlim);
using t_veclen = size_t (*) (void);
......@@ -62,7 +62,7 @@ static int XCONCATX2(have,arch)(void) \
return res; \
} \
\
void XCONCATX2(inner_loop,arch) (sharp_job &job, const int *ispair, \
void XCONCATX2(inner_loop,arch) (sharp_protojob &job, const int *ispair, \
const double *cth_, const double *sth_, size_t llim, size_t ulim, \
sharp_Ylmgen &gen, size_t mi, const size_t *mlim); \
size_t XCONCATX2(sharp_veclen,arch) (void); \
......@@ -107,7 +107,7 @@ DECL2(avx)
#pragma GCC visibility push(hidden)
void inner_loop (sharp_job &job, const int *ispair,const double *cth,
void inner_loop (sharp_protojob &job, const int *ispair,const double *cth,
const double *sth, size_t llim, size_t ulim, sharp_Ylmgen &gen, size_t mi,
const size_t *mlim)
{
......
......@@ -330,7 +330,7 @@ MRUTIL_NOINLINE static void alm2map_kernel(s0data_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_alm2map (sharp_job & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_alm2map (sharp_protojob & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, s0data_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,il=0,lmax=gen.lmax;
......@@ -424,7 +424,7 @@ MRUTIL_NOINLINE static void map2alm_kernel(s0data_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_map2alm (sharp_job & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_map2alm (sharp_protojob & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, s0data_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,il=0,lmax=gen.lmax;
......@@ -603,7 +603,7 @@ MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_protojob & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, sxdata_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,lmax=gen.lmax;
......@@ -737,7 +737,7 @@ MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_protojob & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, sxdata_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,lmax=gen.lmax;
......@@ -858,7 +858,7 @@ MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_protojob & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, sxdata_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,lmax=gen.lmax;
......@@ -939,11 +939,11 @@ MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job & MRUTIL_RESTRICT job,
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job &job, const int *ispair,
MRUTIL_NOINLINE static void inner_loop_a2m(sharp_protojob &job, const int *ispair,
const double *cth_, const double *sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const size_t *mlim)
{
const size_t m = job.ainfo->mval(mi);
const size_t m = job.ainfo.mval(mi);
gen.prepare(m);
switch (job.type)
......@@ -1017,15 +1017,10 @@ MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job &job, const int *ispair,
else
{
//adjust the a_lm for the new algorithm
if (job.nalm==2)
for (size_t l=gen.mhi; l<=gen.lmax+1; ++l)
{
job.almtmp[2*l ]*=gen.alpha[l];
job.almtmp[2*l+1]*=gen.alpha[l];
}
else
for (size_t l=gen.mhi; l<=gen.lmax+1; ++l)
job.almtmp[l]*=gen.alpha[l];
auto nalm = job.nalm();
for (size_t l=gen.mhi; l<=gen.lmax+1; ++l)
for (size_t i=0; i<nalm; ++i)
job.almtmp[nalm*l+i]*=gen.alpha[l];
const size_t nval=nvx*VLEN;
size_t ith=0;
......@@ -1098,11 +1093,11 @@ MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job &job, const int *ispair,
}
}
MRUTIL_NOINLINE static void inner_loop_m2a(sharp_job &job, const int *ispair,
MRUTIL_NOINLINE static void inner_loop_m2a(sharp_protojob &job, const int *ispair,
const double *cth_, const double *sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const size_t *mlim)
{
const size_t m = job.ainfo->mval(mi);
const size_t m = job.ainfo.mval(mi);
gen.prepare(m);
switch (job.type)
......@@ -1218,10 +1213,10 @@ MRUTIL_NOINLINE static void inner_loop_m2a(sharp_job &job, const int *ispair,
}
}
void XARCH(inner_loop) (sharp_job &job, const int *ispair,
void XARCH(inner_loop) (sharp_protojob &job, const int *ispair,
const double *cth_, const double *sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const size_t *mlim);
void XARCH(inner_loop) (sharp_job &job, const int *ispair,
void XARCH(inner_loop) (sharp_protojob &job, const int *ispair,
const double *cth_, const double *sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const size_t *mlim)
{
......
......@@ -31,42 +31,68 @@
#include <complex>
#include "libsharp2/sharp.h"
#include "libsharp2/sharp_ylmgen.h"
#include "mr_util/error_handling.h"
using std::complex;
struct sharp_job
class sharp_protojob
{
sharp_jobtype type;
size_t spin;
size_t nmaps, nalm;
size_t flags;
void **map;
void **alm;
ptrdiff_t s_m, s_th; // strides in m and theta direction
complex<double> *phase;
vector<double> norm_l;
complex<double> *almtmp;
const sharp_geom_info *ginfo;
const sharp_alm_info *ainfo;
double time;
unsigned long long opcnt;
void build_common (sharp_jobtype type,
size_t spin, void *alm, void *map, const sharp_geom_info &geom_info,
const sharp_alm_info &alm_info, size_t flags);
void alloc_phase (size_t nm, size_t ntheta, std::vector<complex<double>> &data);
void alloc_almtmp (size_t lmax, std::vector<complex<double>> &data);
void init_output();
void alm2almtmp (size_t lmax, size_t mi);
void almtmp2alm (size_t lmax, size_t mi);
void ring2ringtmp (size_t iring, std::vector<double> &ringtmp,
ptrdiff_t rstride);
void ringtmp2ring (size_t iring, const std::vector<double> &ringtmp, ptrdiff_t rstride);
void map2phase (size_t mmax, size_t llim, size_t ulim);
void phase2map (size_t mmax, size_t llim, size_t ulim);
void execute();
public:
sharp_jobtype type;
size_t spin;
size_t flags;
ptrdiff_t s_m, s_th; // strides in m and theta direction
complex<double> *phase;
vector<double> norm_l;
complex<double> *almtmp;
const sharp_geom_info &ginfo;
const sharp_alm_info &ainfo;
double time;
unsigned long long opcnt;
sharp_protojob(sharp_jobtype type_, size_t spin_, const sharp_geom_info &ginfo_,
const sharp_alm_info &ainfo_, size_t flags_)
: type(type_), spin(spin_), flags(flags_), ginfo(ginfo_), ainfo(ainfo_),
time(0.), opcnt(0)
{
if (type==SHARP_ALM2MAP_DERIV1) spin_=1;
if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
MR_assert(spin<=ainfo.lmax(), "bad spin");
}
void alloc_phase (size_t nm, size_t ntheta, std::vector<complex<double>> &data);
void alloc_almtmp (size_t lmax, std::vector<complex<double>> &data);
size_t nmaps() const { return 1+(spin>0); }
size_t nalm() const { return (type==SHARP_ALM2MAP_DERIV1) ? 1 : (1+(spin>0)); }
};
template<typename T> class sharp_job: public sharp_protojob
{
private:
std::vector<std::complex<T> *> alm;
std::vector<T *> map;
void init_output();
void alm2almtmp (size_t mi);
void almtmp2alm (size_t mi);
void ring2ringtmp (size_t iring, std::vector<double> &ringtmp,