Commit f150903f authored by Martin Reinecke's avatar Martin Reinecke
Browse files

simplifications

parent 718634a3
......@@ -196,7 +196,7 @@ struct ringhelper
}
};
template<typename T>void sharp_job<T>::init_output()
void sharp_job::init_output()
{
if (flags&SHARP_ADD) return;
if (type == SHARP_MAP2ALM)
......@@ -207,7 +207,7 @@ template<typename T>void sharp_job<T>::init_output()
ginfo.clear_map(map[i]);
}
MRUTIL_NOINLINE void sharp_protojob::alloc_phase (size_t nm, size_t ntheta, vector<dcmplx> &data)
MRUTIL_NOINLINE void sharp_job::alloc_phase (size_t nm, size_t ntheta, vector<dcmplx> &data)
{
if (type==SHARP_MAP2ALM)
{
......@@ -225,13 +225,13 @@ MRUTIL_NOINLINE void sharp_protojob::alloc_phase (size_t nm, size_t ntheta, vect
phase=data.data();
}
void sharp_protojob::alloc_almtmp (size_t lmax, vector<dcmplx> &data)
void sharp_job::alloc_almtmp (size_t lmax, vector<dcmplx> &data)
{
data.resize(nalm()*(lmax+2));
almtmp=data.data();
}
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::alm2almtmp (size_t mi)
MRUTIL_NOINLINE void sharp_job::alm2almtmp (size_t mi)
{
size_t nalm_ = nalm();
size_t lmax = ainfo.lmax();
......@@ -256,7 +256,7 @@ template<typename T> MRUTIL_NOINLINE void sharp_job<T>::alm2almtmp (size_t mi)
almtmp[i]=0;
}
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::almtmp2alm (size_t mi)
MRUTIL_NOINLINE void sharp_job::almtmp2alm (size_t mi)
{
if (type != SHARP_MAP2ALM) return;
size_t lmax = ainfo.lmax();
......@@ -271,14 +271,14 @@ template<typename T> MRUTIL_NOINLINE void sharp_job<T>::almtmp2alm (size_t mi)
ainfo.add_alm(mi, almtmp+i, alm[i], nalm_);
}
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::ringtmp2ring (size_t iring,
MRUTIL_NOINLINE void sharp_job::ringtmp2ring (size_t iring,
const vector<double> &ringtmp, size_t rstride)
{
for (size_t i=0; i<nmaps(); ++i)
ginfo.add_ring(flags&SHARP_USE_WEIGHTS, iring, &ringtmp[i*rstride+1], map[i]);
}
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::ring2ringtmp (size_t iring,
MRUTIL_NOINLINE void sharp_job::ring2ringtmp (size_t iring,
vector<double> &ringtmp, size_t rstride)
{
for (size_t i=0; i<nmaps(); ++i)
......@@ -286,7 +286,7 @@ template<typename T> MRUTIL_NOINLINE void sharp_job<T>::ring2ringtmp (size_t iri
}
//FIXME: set phase to zero if not SHARP_MAP2ALM?
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::map2phase (size_t mmax, size_t llim, size_t ulim)
MRUTIL_NOINLINE void sharp_job::map2phase (size_t mmax, size_t llim, size_t ulim)
{
if (type != SHARP_MAP2ALM) return;
size_t pstride = s_m;
......@@ -314,7 +314,7 @@ template<typename T> MRUTIL_NOINLINE void sharp_job<T>::map2phase (size_t mmax,
}); /* end of parallel region */
}
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::phase2map (size_t mmax, size_t llim, size_t ulim)
MRUTIL_NOINLINE void sharp_job::phase2map (size_t mmax, size_t llim, size_t ulim)
{
if (type == SHARP_MAP2ALM) return;
size_t pstride = s_m;
......@@ -342,7 +342,7 @@ template<typename T> MRUTIL_NOINLINE void sharp_job<T>::phase2map (size_t mmax,
}); /* end of parallel region */
}
template<typename T> MRUTIL_NOINLINE void sharp_job<T>::execute()
MRUTIL_NOINLINE void sharp_job::execute()
{
mr::SimpleTimer timer;
opcnt=0;
......@@ -412,17 +412,24 @@ template<typename T> MRUTIL_NOINLINE void sharp_job<T>::execute()
time=timer();
}
template<typename T> sharp_job<T>::sharp_job (sharp_jobtype type_,
size_t spin_, const vector<complex<T> *> &alm_, const vector<T *> &map_,
sharp_job::sharp_job (sharp_jobtype type_,
size_t spin_, const vector<any> &alm_, const vector<any> &map_,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info, size_t flags_)
: sharp_protojob(type_, spin_, geom_info, alm_info, flags_), alm(alm_), map(map_)
: alm(alm_), map(map_), type(type_), spin(spin_), flags(flags_), ginfo(geom_info), ainfo(alm_info),
time(0.), opcnt(0)
{
if (type==SHARP_ALM2MAP_DERIV1) spin_=1;
if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
MR_assert(spin<=ainfo.lmax(), "bad spin");
MR_assert(alm.size()==nalm(), "incorrect # of a_lm components");
MR_assert(map.size()==nmaps(), "incorrect # of a_lm components");
}
template<typename T> void sharp_execute (sharp_jobtype type, size_t spin, const vector<complex<T> *> &alm,
const vector<T *> &map,
void sharp_execute (sharp_jobtype type, size_t spin, const vector<any> &alm,
const vector<any> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
{
......@@ -433,15 +440,6 @@ template<typename T> void sharp_execute (sharp_jobtype type, size_t spin, const
if (opcnt!=nullptr) *opcnt = job.opcnt;
}
template void sharp_execute<double> (sharp_jobtype type, size_t spin, const vector<complex<double> *> &alm,
const vector<double *> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt);
template void sharp_execute<float> (sharp_jobtype type, size_t spin, const vector<complex<float> *> &alm,
const vector<float *> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt);
void sharp_set_chunksize_min(size_t new_chunksize_min)
{ chunksize_min=new_chunksize_min; }
void sharp_set_nchunks_max(size_t new_nchunks_max)
......
......@@ -101,33 +101,8 @@ enum sharp_jobflags {
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
};
/*! Performs a libsharp2 SHT job. The interface deliberately does not use
the C99 "complex" data type, in order to be callable from C89 and C++.
\param type the type of SHT
\param spin the spin of the quantities to be transformed
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
alm[0] points to the a_lm of the SHT. If \a spin>0, alm[0] and alm[1]
point to the two a_lm sets of the SHT. The exact data type of \a alm
depends on whether the SHARP_DP flag is set.
\param map contains pointers to the maps. If \a spin==0,
map[0] points to the map of the SHT. If \a spin>0, or \a type is
SHARP_ALM2MAP_DERIV1, map[0] and map[1] point to the two maps of the SHT.
The exact data type of \a map depends on whether the SHARP_DP flag is set.
\param geom_info A \c sharp_geom_info object compatible with the provided
\a map arrays.
\param alm_info A \c sharp_alm_info object compatible with the provided
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
exactly once.
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
\a alm is expected to have the type "complex double **" and \a map is
expected to have the type "double **"; otherwise, the expected
types are "complex float **" and "float **", respectively.
\param time If not nullptr, the wall clock time required for this SHT
(in seconds) will be written here.
\param opcnt If not nullptr, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
template<typename T> void sharp_execute (sharp_jobtype type, size_t spin, const std::vector<std::complex<T> *> &alm,
const std::vector<T *> &map,
void sharp_execute (sharp_jobtype type, size_t spin, const std::vector<std::any> &alm,
const std::vector<std::any> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt);
......@@ -135,45 +110,25 @@ template<typename T> void sharp_alm2map(const std::complex<T> *alm, T *map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
{
std::vector<std::complex<T> *> va;
va.push_back(const_cast<std::complex<T> *>(alm));
std::vector<T *> vm;
vm.push_back(map);
sharp_execute(SHARP_Y, 0, va, vm, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Y, 0, {alm}, {map}, geom_info, alm_info, flags, time, opcnt);
}
template<typename T> void sharp_alm2map_spin(size_t spin, const std::complex<T> *alm1, const std::complex<T> *alm2, T *map1, T *map2,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
{
std::vector<std::complex<T> *> va;
va.push_back(const_cast<std::complex<T> *>(alm1));
va.push_back(const_cast<std::complex<T> *>(alm2));
std::vector<T *> vm;
vm.push_back(map1);
vm.push_back(map2);
sharp_execute(SHARP_Y, spin, va, vm, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Y, spin, {alm1, alm2}, {map1, map2}, geom_info, alm_info, flags, time, opcnt);
}
template<typename T> void sharp_map2alm(std::complex<T> *alm, const T *map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
{
std::vector<std::complex<T> *> va;
va.push_back(alm);
std::vector<T *> vm;
vm.push_back(const_cast<T *>(map));
sharp_execute(SHARP_Yt, 0, va, vm, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Yt, 0, {alm}, {map}, geom_info, alm_info, flags, time, opcnt);
}
template<typename T> void sharp_map2alm_spin(size_t spin, std::complex<T> *alm1, std::complex<T> *alm2, const T *map1, const T *map2,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
{
std::vector<std::complex<T> *> va;
va.push_back(alm1);
va.push_back(alm2);
std::vector<T *> vm;
vm.push_back(const_cast<T *>(map1));
vm.push_back(const_cast<T *>(map2));
sharp_execute(SHARP_Yt, spin, va, vm, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Yt, spin, {alm1, alm2}, {map1, map2}, geom_info, alm_info, flags, time, opcnt);
}
void sharp_set_chunksize_min(size_t new_chunksize_min);
......
......@@ -31,7 +31,7 @@
#undef GENERIC_ARCH
#undef ARCH
using t_inner_loop = void (*) (sharp_protojob &job, const vector<bool> &ispair,
using t_inner_loop = void (*) (sharp_job &job, const vector<bool> &ispair,
const vector<double> &cth_, const vector<double> &sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const vector<size_t> &mlim);
using t_veclen = size_t (*) (void);
......@@ -62,7 +62,7 @@ static int XCONCATX2(have,arch)(void) \
return res; \
} \
\
void XCONCATX2(inner_loop,arch) (sharp_protojob &job, const vector<bool> &ispair, \
void XCONCATX2(inner_loop,arch) (sharp_job &job, const vector<bool> &ispair, \
const vector<double> &cth_, const vector<double> &sth_, size_t llim, size_t ulim, \
sharp_Ylmgen &gen, size_t mi, const vector<size_t> &mlim); \
size_t XCONCATX2(sharp_veclen,arch) (void); \
......@@ -107,7 +107,7 @@ DECL2(avx)
#pragma GCC visibility push(hidden)
void inner_loop (sharp_protojob &job, const vector<bool> &ispair,
void inner_loop (sharp_job &job, const vector<bool> &ispair,
const vector<double> &cth, const vector<double> &sth,
size_t llim, size_t ulim, sharp_Ylmgen &gen, size_t mi,
const vector<size_t> &mlim)
......
......@@ -332,7 +332,7 @@ MRUTIL_NOINLINE static void alm2map_kernel(s0data_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_alm2map (sharp_protojob & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_alm2map (sharp_job & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, s0data_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,il=0,lmax=gen.lmax;
......@@ -426,7 +426,7 @@ MRUTIL_NOINLINE static void map2alm_kernel(s0data_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_map2alm (sharp_protojob & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_map2alm (sharp_job & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, s0data_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,il=0,lmax=gen.lmax;
......@@ -605,7 +605,7 @@ MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_protojob & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, sxdata_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,lmax=gen.lmax;
......@@ -739,7 +739,7 @@ MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_protojob & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, sxdata_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,lmax=gen.lmax;
......@@ -860,7 +860,7 @@ MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v & MRUTIL_RESTRICT d,
}
}
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_protojob & MRUTIL_RESTRICT job,
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job & MRUTIL_RESTRICT job,
const sharp_Ylmgen &gen, sxdata_v & MRUTIL_RESTRICT d, size_t nth)
{
size_t l,lmax=gen.lmax;
......@@ -941,7 +941,7 @@ MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_protojob & MRUTIL_RESTRICT
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
MRUTIL_NOINLINE static void inner_loop_a2m(sharp_protojob &job, const vector<bool> & ispair,
MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job &job, const vector<bool> & ispair,
const vector<double> &cth_, const vector<double> &sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const vector<size_t> &mlim)
{
......@@ -1095,7 +1095,7 @@ MRUTIL_NOINLINE static void inner_loop_a2m(sharp_protojob &job, const vector<boo
}
}
MRUTIL_NOINLINE static void inner_loop_m2a(sharp_protojob &job, const vector<bool> &ispair,
MRUTIL_NOINLINE static void inner_loop_m2a(sharp_job &job, const vector<bool> &ispair,
const vector<double> &cth_, const vector<double> &sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const vector<size_t> &mlim)
{
......@@ -1215,7 +1215,7 @@ MRUTIL_NOINLINE static void inner_loop_m2a(sharp_protojob &job, const vector<boo
}
}
void XARCH(inner_loop) (sharp_protojob &job, const vector<bool> &ispair,
void XARCH(inner_loop) (sharp_job &job, const vector<bool> &ispair,
const vector<double> &cth_, const vector<double> &sth_, size_t llim, size_t ulim,
sharp_Ylmgen &gen, size_t mi, const vector<size_t> &mlim)
{
......
......@@ -86,8 +86,21 @@ class sharp_Ylmgen
std::vector<double> flm1, flm2, inv;
};
class sharp_protojob
class sharp_job
{
private:
std::vector<std::any> alm;
std::vector<std::any> map;
void init_output();
void alm2almtmp (size_t mi);
void almtmp2alm (size_t mi);
void ring2ringtmp (size_t iring, std::vector<double> &ringtmp,
size_t rstride);
void ringtmp2ring (size_t iring, const std::vector<double> &ringtmp, size_t rstride);
void map2phase (size_t mmax, size_t llim, size_t ulim);
void phase2map (size_t mmax, size_t llim, size_t ulim);
public:
sharp_jobtype type;
size_t spin;
......@@ -101,49 +114,20 @@ class sharp_protojob
double time;
unsigned long long opcnt;
sharp_protojob(sharp_jobtype type_, size_t spin_, const sharp_geom_info &ginfo_,
const sharp_alm_info &ainfo_, size_t flags_)
: type(type_), spin(spin_), flags(flags_), ginfo(ginfo_), ainfo(ainfo_),
time(0.), opcnt(0)
{
if (type==SHARP_ALM2MAP_DERIV1) spin_=1;
if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
if (type==SHARP_Yt) type=SHARP_MAP2ALM;
if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
MR_assert(spin<=ainfo.lmax(), "bad spin");
}
sharp_job(sharp_jobtype type,
size_t spin, const std::vector<std::any> &alm_,
const std::vector<std::any> &map, const sharp_geom_info &geom_info,
const sharp_alm_info &alm_info, size_t flags);
void alloc_phase (size_t nm, size_t ntheta, std::vector<complex<double>> &data);
void alloc_almtmp (size_t lmax, std::vector<complex<double>> &data);
size_t nmaps() const { return 1+(spin>0); }
size_t nalm() const { return (type==SHARP_ALM2MAP_DERIV1) ? 1 : (1+(spin>0)); }
};
template<typename T> class sharp_job: public sharp_protojob
{
private:
std::vector<std::complex<T> *> alm;
std::vector<T *> map;
void init_output();
void alm2almtmp (size_t mi);
void almtmp2alm (size_t mi);
void ring2ringtmp (size_t iring, std::vector<double> &ringtmp,
size_t rstride);
void ringtmp2ring (size_t iring, const std::vector<double> &ringtmp, size_t rstride);
void map2phase (size_t mmax, size_t llim, size_t ulim);
void phase2map (size_t mmax, size_t llim, size_t ulim);
public:
sharp_job(sharp_jobtype type,
size_t spin, const std::vector<std::complex<T> *> &alm_,
const std::vector<T *> &map, const sharp_geom_info &geom_info,
const sharp_alm_info &alm_info, size_t flags);
void execute();
void execute();
};
void inner_loop (sharp_protojob &job, const std::vector<bool> &ispair,
void inner_loop (sharp_job &job, const std::vector<bool> &ispair,
const std::vector<double> &cth, const std::vector<double> &sth, size_t llim,
size_t ulim, sharp_Ylmgen &gen, size_t mi, const std::vector<size_t> &mlim);
......
......@@ -311,8 +311,7 @@ static void check_sign_scale(void)
/* use mirrored indices to emulate the "old" Gaussian grid geometry */
/* original indices were 0, npix/2 and npix-1 */
size_t i0=npix-ppring, i1=npix/2, i2=ppring-1;
sharp_execute(SHARP_ALM2MAP,0,alm1,map1,*tinfo,*alms,0,
nullptr,nullptr);
sharp_alm2map(&balm[0],&bmap[0],*tinfo,*alms,0,nullptr,nullptr);
MR_assert(approx(map1[0][i0], 3.588246976618616912e+00,1e-12),
"error");
MR_assert(approx(map1[0][i1], 4.042209792157496651e+01,1e-12),
......@@ -320,7 +319,7 @@ static void check_sign_scale(void)
MR_assert(approx(map1[0][i2],-1.234675107554816442e+01,1e-12),
"error");
sharp_execute(SHARP_ALM2MAP,1,alm2,map2,*tinfo,*alms,0,
sharp_alm2map_spin(1, &balm[0], &balm[nalms], &bmap[0], &bmap[npix],*tinfo,*alms,0,
nullptr,nullptr);
MR_assert(approx(map2[0][i0], 2.750897760535633285e+00,1e-12),
"error");
......@@ -335,7 +334,7 @@ static void check_sign_scale(void)
MR_assert(approx(map2[1][i2],-1.412765834230440021e+01,1e-12),
"error");
sharp_execute(SHARP_ALM2MAP,2,alm2,map2,*tinfo,*alms,0,
sharp_alm2map_spin(2,&balm[0], &balm[nalms],&bmap[0], &bmap[npix],*tinfo,*alms,0,
nullptr,nullptr);
MR_assert(approx(map2[0][i0],-1.398186224727334448e+00,1e-12),
"error");
......@@ -350,7 +349,7 @@ static void check_sign_scale(void)
MR_assert(approx(map2[1][i2],-1.863257892248353897e+01,1e-12),
"error");
sharp_execute(SHARP_ALM2MAP_DERIV1,1,alm1,map2,*tinfo,*alms,
sharp_execute(SHARP_ALM2MAP_DERIV1,1,{&balm[0]},{&bmap[0], &bmap[npix]},*tinfo,*alms,
0,nullptr,nullptr);
MR_assert(approx(map2[0][i0],-6.859393905369091105e-01,1e-11),
"error");
......@@ -395,8 +394,7 @@ static void do_sht (sharp_geom_info &ginfo, sharp_standard_alm_info &ainfo,
if (op_a2m!=nullptr) *op_a2m=0;
for (size_t itrans=0; itrans<ntrans; ++itrans)
{
vector<dcmplx *> av;
vector<double *> mv;
vector<any> av, mv;
for (size_t i=0; i<ncomp; ++i)
{
av.push_back(alm[itrans*ncomp+i]);
......@@ -411,8 +409,7 @@ static void do_sht (sharp_geom_info &ginfo, sharp_standard_alm_info &ainfo,
if (op_m2a!=nullptr) *op_m2a=0;
for (size_t itrans=0; itrans<ntrans; ++itrans)
{
vector<dcmplx *> av;
vector<double *> mv;
vector<any> av, mv;
for (size_t i=0; i<ncomp; ++i)
{
av.push_back(alm[itrans*ncomp+i]);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment