Commit de2785c5 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

allow nthread specification in libsharp

parent c81d636f
Pipeline #70126 passed with stages
in 8 minutes and 52 seconds
......@@ -118,9 +118,9 @@ template<typename T> class Interpolator
auto m1 = cube.template subarray<2>({supp,supp,kidx1},{ntheta,nphi,0});
auto m2 = cube.template subarray<2>({supp,supp,kidx2},{ntheta,nphi,0});
if (k==0)
sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_alm2map(a1.Alms().data(), m1.vdata(), *ginfo, *ainfo, 0, nthreads);
else
sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(), m2.vdata(), *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_alm2map_spin(k, a1.Alms().data(), a2.Alms().data(), m1.vdata(), m2.vdata(), *ginfo, *ainfo, 0, nthreads);
correct(m1,k);
if (k!=0) correct(m2,k);
if (k!=0)
......
......@@ -107,7 +107,7 @@ static void random_alm (dcmplx *alm, sharp_standard_alm_info &helper, size_t spi
}
}
static unsigned long long totalops (unsigned long long val)
static uint64_t totalops (uint64_t val)
{
return val;
}
......@@ -319,7 +319,7 @@ static void check_sign_scale(void)
/* use mirrored indices to emulate the "old" Gaussian grid geometry */
/* original indices were 0, npix/2 and npix-1 */
size_t i0=npix-ppring, i1=npix/2, i2=ppring-1;
sharp_alm2map(&balm[0],&bmap[0],*tinfo,*alms,0,nullptr,nullptr);
sharp_alm2map(&balm[0],&bmap[0],*tinfo,*alms,0,0,nullptr,nullptr);
MR_assert(approx(map1[0][i0], 3.588246976618616912e+00,1e-12),
"error");
MR_assert(approx(map1[0][i1], 4.042209792157496651e+01,1e-12),
......@@ -327,7 +327,7 @@ static void check_sign_scale(void)
MR_assert(approx(map1[0][i2],-1.234675107554816442e+01,1e-12),
"error");
sharp_alm2map_spin(1, &balm[0], &balm[nalms], &bmap[0], &bmap[npix],*tinfo,*alms,0,
sharp_alm2map_spin(1, &balm[0], &balm[nalms], &bmap[0], &bmap[npix],*tinfo,*alms,0,0,
nullptr,nullptr);
MR_assert(approx(map2[0][i0], 2.750897760535633285e+00,1e-12),
"error");
......@@ -342,7 +342,7 @@ static void check_sign_scale(void)
MR_assert(approx(map2[1][i2],-1.412765834230440021e+01,1e-12),
"error");
sharp_alm2map_spin(2,&balm[0], &balm[nalms],&bmap[0], &bmap[npix],*tinfo,*alms,0,
sharp_alm2map_spin(2,&balm[0], &balm[nalms],&bmap[0], &bmap[npix],*tinfo,*alms,0,0,
nullptr,nullptr);
MR_assert(approx(map2[0][i0],-1.398186224727334448e+00,1e-12),
"error");
......@@ -358,7 +358,7 @@ static void check_sign_scale(void)
"error");
sharp_execute(SHARP_ALM2MAP_DERIV1,1,{&balm[0]},{&bmap[0], &bmap[npix]},*tinfo,*alms,
0,nullptr,nullptr);
0,0,nullptr,nullptr);
MR_assert(approx(map2[0][i0],-6.859393905369091105e-01,1e-11),
"error");
MR_assert(approx(map2[0][i1],-2.103947835973212364e+02,1e-12),
......@@ -375,8 +375,8 @@ static void check_sign_scale(void)
static void do_sht (sharp_geom_info &ginfo, sharp_standard_alm_info &ainfo,
int spin, vector<double> &err_abs, vector<double> &err_rel,
double *t_a2m, double *t_m2a, unsigned long long *op_a2m,
unsigned long long *op_m2a, size_t ntrans)
double *t_a2m, double *t_m2a, uint64_t *op_a2m,
uint64_t *op_m2a, size_t ntrans)
{
ptrdiff_t nalms = get_nalms(ainfo);
size_t ncomp = (spin==0) ? 1 : 2;
......@@ -396,7 +396,7 @@ static void do_sht (sharp_geom_info &ginfo, sharp_standard_alm_info &ainfo,
}
double tta2m, ttm2a;
unsigned long long toa2m, tom2a;
uint64_t toa2m, tom2a;
if (t_a2m!=nullptr) *t_a2m=0;
if (op_a2m!=nullptr) *op_a2m=0;
......@@ -408,7 +408,7 @@ static void do_sht (sharp_geom_info &ginfo, sharp_standard_alm_info &ainfo,
av.push_back(alm[itrans*ncomp+i]);
mv.push_back(map[itrans*ncomp+i]);
}
sharp_execute(SHARP_ALM2MAP,spin,av,mv,ginfo,ainfo, 0,&tta2m,&toa2m);
sharp_execute(SHARP_ALM2MAP,spin,av,mv,ginfo,ainfo,0,0,&tta2m,&toa2m);
if (t_a2m!=nullptr) *t_a2m+=maxTime(tta2m);
if (op_a2m!=nullptr) *op_a2m+=totalops(toa2m);
}
......@@ -424,7 +424,7 @@ static void do_sht (sharp_geom_info &ginfo, sharp_standard_alm_info &ainfo,
mv.push_back(map[itrans*ncomp+i]);
}
sharp_execute(SHARP_MAP2ALM,spin,av,mv,ginfo,ainfo,
SHARP_ADD,&ttm2a,&tom2a);
SHARP_ADD,0,&ttm2a,&tom2a);
if (t_m2a!=nullptr) *t_m2a+=maxTime(ttm2a);
if (op_m2a!=nullptr) *op_m2a+=totalops(tom2a);
}
......@@ -494,7 +494,7 @@ static void sharp_test (int argc, const char **argv)
int ncomp = (spin==0) ? 1 : 2;
double t_a2m=1e30, t_m2a=1e30;
unsigned long long op_a2m, op_m2a;
uint64_t op_a2m, op_m2a;
vector<double> err_abs, err_rel;
double t_acc=0;
......
......@@ -60,9 +60,10 @@ template<typename T> class py_sharpjob
unique_ptr<sharp_geom_info> ginfo;
unique_ptr<sharp_alm_info> ainfo;
int64_t lmax_, mmax_, npix_;
int nthreads;
public:
py_sharpjob () : lmax_(0), mmax_(0), npix_(0) {}
py_sharpjob () : lmax_(0), mmax_(0), npix_(0), nthreads(1) {}
string repr() const
{
......@@ -70,6 +71,8 @@ template<typename T> class py_sharpjob
", mmax=" + dataToString(mmax_) + ", npix=", dataToString(npix_) +".>";
}
void set_nthreads(int64_t nthreads_)
{ nthreads = int(nthreads_); }
void set_gauss_geometry(int64_t nrings, int64_t nphi)
{
MR_assert((nrings>0)&&(nphi>0),"bad grid dimensions");
......@@ -136,7 +139,7 @@ template<typename T> class py_sharpjob
a_d_c map(npix_);
auto mr=map.mutable_unchecked<1>();
auto ar=alm.unchecked<1>();
sharp_alm2map(&ar[0], &mr[0], *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_alm2map(&ar[0], &mr[0], *ginfo, *ainfo, 0, nthreads);
return map;
}
a_c_c alm2map_adjoint (const a_d_c &map) const
......@@ -146,7 +149,7 @@ template<typename T> class py_sharpjob
a_c_c alm(n_alm());
auto mr=map.unchecked<1>();
auto ar=alm.mutable_unchecked<1>();
sharp_map2alm(&ar[0], &mr[0], *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_map2alm(&ar[0], &mr[0], *ginfo, *ainfo, 0, nthreads);
return alm;
}
a_c_c map2alm (const a_d_c &map) const
......@@ -156,7 +159,7 @@ template<typename T> class py_sharpjob
a_c_c alm(n_alm());
auto mr=map.unchecked<1>();
auto ar=alm.mutable_unchecked<1>();
sharp_map2alm(&ar[0], &mr[0], *ginfo, *ainfo, SHARP_USE_WEIGHTS, nullptr, nullptr);
sharp_map2alm(&ar[0], &mr[0], *ginfo, *ainfo, SHARP_USE_WEIGHTS, nthreads);
return alm;
}
a_d_c alm2map_spin (const a_c_c &alm, int64_t spin) const
......@@ -167,7 +170,7 @@ template<typename T> class py_sharpjob
"incorrect size of a_lm array");
a_d_c map(vector<size_t>{2,size_t(npix_)});
auto mr=map.mutable_unchecked<2>();
sharp_alm2map_spin(spin, &ar(0,0), &ar(1,0), &mr(0,0), &mr(1,0), *ginfo, *ainfo, 0, nullptr, nullptr);
sharp_alm2map_spin(spin, &ar(0,0), &ar(1,0), &mr(0,0), &mr(1,0), *ginfo, *ainfo, 0, nthreads);
return map;
}
a_c_c map2alm_spin (const a_d_c &map, int64_t spin) const
......@@ -178,7 +181,7 @@ template<typename T> class py_sharpjob
"incorrect size of map array");
a_c_c alm(vector<size_t>{2,size_t(n_alm())});
auto ar=alm.mutable_unchecked<2>();
sharp_map2alm_spin(spin, &ar(0,0), &ar(1,0), &mr(0,0), &mr(1,0), *ginfo, *ainfo, SHARP_USE_WEIGHTS, nullptr, nullptr);
sharp_map2alm_spin(spin, &ar(0,0), &ar(1,0), &mr(0,0), &mr(1,0), *ginfo, *ainfo, SHARP_USE_WEIGHTS, nthreads);
return alm;
}
};
......@@ -273,6 +276,7 @@ PYBIND11_MODULE(pysharp, m)
py::class_<py_sharpjob<double>> (m, "sharpjob_d")
.def(py::init<>())
.def("set_nthreads", &py_sharpjob<double>::set_nthreads, "nthreads"_a)
.def("set_gauss_geometry", &py_sharpjob<double>::set_gauss_geometry,
"nrings"_a,"nphi"_a)
.def("set_healpix_geometry", &py_sharpjob<double>::set_healpix_geometry,
......
......@@ -293,7 +293,7 @@ MRUTIL_NOINLINE void sharp_job::map2phase (size_t mmax, size_t llim, size_t ulim
{
if (type != SHARP_MAP2ALM) return;
size_t pstride = s_m;
mr::execDynamic(ulim-llim, 0, 1, [&](mr::Scheduler &sched)
mr::execDynamic(ulim-llim, nthreads, 1, [&](mr::Scheduler &sched)
{
ringhelper helper;
size_t rstride=ginfo.nphmax()+2;
......@@ -321,7 +321,7 @@ MRUTIL_NOINLINE void sharp_job::phase2map (size_t mmax, size_t llim, size_t ulim
{
if (type == SHARP_MAP2ALM) return;
size_t pstride = s_m;
mr::execDynamic(ulim-llim, 0, 1, [&](mr::Scheduler &sched)
mr::execDynamic(ulim-llim, nthreads, 1, [&](mr::Scheduler &sched)
{
ringhelper helper;
size_t rstride=ginfo.nphmax()+2;
......@@ -365,7 +365,7 @@ MRUTIL_NOINLINE void sharp_job::execute()
vector<dcmplx> phasebuffer;
//FIXME: needs to be changed to "nm"
alloc_phase(mmax+1,chunksize, phasebuffer);
std::atomic<unsigned long long> a_opcnt(0);
std::atomic<uint64_t> a_opcnt(0);
/* chunk loop */
for (size_t chunk=0; chunk<nchunks; ++chunk)
......@@ -385,7 +385,7 @@ MRUTIL_NOINLINE void sharp_job::execute()
/* map->phase where necessary */
map2phase(mmax, llim, ulim);
mr::execDynamic(ainfo.nm(), 0, 1, [&](mr::Scheduler &sched)
mr::execDynamic(ainfo.nm(), nthreads, 1, [&](mr::Scheduler &sched)
{
sharp_job ljob = *this;
ljob.opcnt=0;
......@@ -417,9 +417,9 @@ MRUTIL_NOINLINE void sharp_job::execute()
sharp_job::sharp_job (sharp_jobtype type_,
size_t spin_, const vector<any> &alm_, const vector<any> &map_,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info, size_t flags_)
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info, size_t flags_, int nthreads_)
: alm(alm_), map(map_), type(type_), spin(spin_), flags(flags_), ginfo(geom_info), ainfo(alm_info),
time(0.), opcnt(0)
nthreads(nthreads_), time(0.), opcnt(0)
{
if (type==SHARP_ALM2MAP_DERIV1) spin_=1;
if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
......@@ -434,9 +434,9 @@ sharp_job::sharp_job (sharp_jobtype type_,
void sharp_execute (sharp_jobtype type, size_t spin, const vector<any> &alm,
const vector<any> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
size_t flags, int nthreads, double *time, uint64_t *opcnt)
{
sharp_job job(type, spin, alm, map, geom_info, alm_info, flags);
sharp_job job(type, spin, alm, map, geom_info, alm_info, flags, nthreads);
job.execute();
if (time!=nullptr) *time = job.time;
......
......@@ -108,31 +108,31 @@ enum sharp_jobflags {
void sharp_execute (sharp_jobtype type, size_t spin, const std::vector<std::any> &alm,
const std::vector<std::any> &map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt);
size_t flags, int nthreads=1, double *time=nullptr, uint64_t *opcnt=nullptr);
template<typename T> void sharp_alm2map(const std::complex<T> *alm, T *map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
size_t flags, int nthreads=1, double *time=nullptr, uint64_t *opcnt=nullptr)
{
sharp_execute(SHARP_Y, 0, {alm}, {map}, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Y, 0, {alm}, {map}, geom_info, alm_info, flags, nthreads, time, opcnt);
}
template<typename T> void sharp_alm2map_spin(size_t spin, const std::complex<T> *alm1, const std::complex<T> *alm2, T *map1, T *map2,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
size_t flags, int nthreads=1, double *time=nullptr, uint64_t *opcnt=nullptr)
{
sharp_execute(SHARP_Y, spin, {alm1, alm2}, {map1, map2}, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Y, spin, {alm1, alm2}, {map1, map2}, geom_info, alm_info, flags, nthreads, time, opcnt);
}
template<typename T> void sharp_map2alm(std::complex<T> *alm, const T *map,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
size_t flags, int nthreads=1, double *time=nullptr, uint64_t *opcnt=nullptr)
{
sharp_execute(SHARP_Yt, 0, {alm}, {map}, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Yt, 0, {alm}, {map}, geom_info, alm_info, flags, nthreads, time, opcnt);
}
template<typename T> void sharp_map2alm_spin(size_t spin, std::complex<T> *alm1, std::complex<T> *alm2, const T *map1, const T *map2,
const sharp_geom_info &geom_info, const sharp_alm_info &alm_info,
size_t flags, double *time, unsigned long long *opcnt)
size_t flags, int nthreads=1, double *time=nullptr, uint64_t *opcnt=nullptr)
{
sharp_execute(SHARP_Yt, spin, {alm1, alm2}, {map1, map2}, geom_info, alm_info, flags, time, opcnt);
sharp_execute(SHARP_Yt, spin, {alm1, alm2}, {map1, map2}, geom_info, alm_info, flags, nthreads, time, opcnt);
}
void sharp_set_chunksize_min(size_t new_chunksize_min);
......
......@@ -115,13 +115,14 @@ class sharp_job
complex<double> *almtmp;
const sharp_geom_info &ginfo;
const sharp_alm_info &ainfo;
int nthreads;
double time;
unsigned long long opcnt;
uint64_t opcnt;
sharp_job(sharp_jobtype type,
size_t spin, const std::vector<std::any> &alm_,
const std::vector<std::any> &map, const sharp_geom_info &geom_info,
const sharp_alm_info &alm_info, size_t flags);
const sharp_alm_info &alm_info, size_t flags, int nthreads_);
void alloc_phase (size_t nm, size_t ntheta, std::vector<complex<double>> &data);
void alloc_almtmp (size_t lmax, std::vector<complex<double>> &data);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment