From 0724e68a5598ac2921cf1fd8763c320fcb7d1abc Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Wed, 5 Aug 2020 13:06:35 +0200 Subject: [PATCH 1/2] test --- python/gridder_cxx.h | 76 ++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/python/gridder_cxx.h b/python/gridder_cxx.h index 032c962..b0fcb4f 100644 --- a/python/gridder_cxx.h +++ b/python/gridder_cxx.h @@ -515,12 +515,21 @@ constexpr int logsquare=4; template class HelperX2g2 { + public: + static constexpr size_t vlen = native_simd::size(); + static constexpr size_t nvec = (supp+vlen-1)/vlen; + private: + static constexpr int nsafe = (supp+1)/2; + static constexpr int su = 2*nsafe+(1< &gconf; TemplateKernel krn; mav,2> &grid; int nu, nv; - int su, sv, svvec; int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer T wfac; @@ -534,7 +543,7 @@ template class HelperX2g2 { int nu = int(gconf.Nu()); int nv = int(gconf.Nv()); - if (bu0<-int(gconf.Nsafe())) return; // nothing written into buffer yet + if (bu0<-nsafe) return; // nothing written into buffer yet int idxu = (bu0+nu)%nu; int idxv0 = (bv0+nv)%nv; @@ -555,30 +564,23 @@ template class HelperX2g2 } public: - size_t nvec; - T *p0r, *p0i; - static constexpr size_t vlen=native_simd::size(); - static_assert(supp<=32, "support too large"); + T * DUCC0_RESTRICT p0r, * DUCC0_RESTRICT p0i; union kbuf { - T scalar[64]; - native_simd simd[64/vlen]; + T scalar[2*nvec*vlen]; + native_simd simd[2*nvec]; }; kbuf buf; HelperX2g2(const GridderConfig &gconf_, mav,2> &grid_, vector &locks_, double w0_=-1, double dw_=-1) : gconf(gconf_), krn(*gconf.krn), grid(grid_), - su(2*gconf.Nsafe()+(1<0), w0(w0_), xdw(T(1)/dw_), - locks(locks_), - nvec((supp+vlen-1)/vlen) + locks(locks_) { checkShape(grid.shape(), {gconf.Nu(),gconf.Nv()}); } ~HelperX2g2() { dump(); } @@ -588,7 +590,6 @@ template class HelperX2g2 { double u, v; gconf.getpix(in.u, in.v, u, v, iu0, iv0); - constexpr double xsupp=2./supp; double x0 = xsupp*(iu0-u); double y0 = xsupp*(iv0-v); krn.eval(T(x0), &buf.simd[0]); @@ -598,8 +599,8 @@ template class HelperX2g2 if ((iu0bu0+su) || (iv0+int(supp)>bv0+sv)) { dump(); - bu0=((((iu0+gconf.Nsafe())>>logsquare)<>logsquare)<>logsquare)<>logsquare)< class HelperX2g2 template class HelperG2x2 { + public: + static constexpr size_t vlen = native_simd::size(); + static constexpr size_t nvec = (supp+vlen-1)/vlen; + private: + static constexpr int nsafe = (supp+1)/2; + static constexpr int su = 2*nsafe+(1< &gconf; TemplateKernel krn; const mav,2> &grid; - int su, sv, svvec; int iu0, iv0; // start index of the current visibility int bu0, bv0; // start index of the current buffer T wfac; @@ -641,29 +651,22 @@ template class HelperG2x2 } public: - size_t nvec; - const T *p0r, *p0i; - static constexpr size_t vlen=native_simd::size(); - static_assert(supp<=32, "support too large"); + const T * DUCC0_RESTRICT p0r, * DUCC0_RESTRICT p0i; union kbuf { - T scalar[64]; - native_simd simd[64/vlen]; + T scalar[2*nvec*vlen]; + native_simd simd[2*nvec]; }; kbuf buf; HelperG2x2(const GridderConfig &gconf_, const mav,2> &grid_, double w0_=-1, double dw_=-1) : gconf(gconf_), krn(*gconf.krn), grid(grid_), - su(2*gconf.Nsafe()+(1<0), w0(w0_), - xdw(T(1)/dw_), - nvec((supp+vlen-1)/vlen) + xdw(T(1)/dw_) { checkShape(grid.shape(), {gconf.Nu(),gconf.Nv()}); } int lineJump() const { return svvec; } @@ -672,7 +675,6 @@ template class HelperG2x2 { double u, v; gconf.getpix(in.u, in.v, u, v, iu0, iv0); - constexpr double xsupp=2./supp; double x0 = xsupp*(iu0-u); double y0 = xsupp*(iv0-v); krn.eval(T(x0), &buf.simd[0]); @@ -681,8 +683,8 @@ template class HelperG2x2 wfac = krn.eval_single(T(xdw*xsupp*abs(w0-in.w))); if ((iu0bu0+su) || (iv0+int(supp)>bv0+sv)) { - bu0=((((iu0+gconf.Nsafe())>>logsquare)<>logsquare)<>logsquare)<>logsquare)< [[gnu::hot]] void x2grid_c_help int jump = hlp.lineJump(); const T * DUCC0_RESTRICT ku = hlp.buf.scalar; const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC; + array,NVEC> txr, txi; while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart [[gnu::hot]] void x2grid_c_help if (do_w_gridding) v*=hlp.Wfac(); if (flip) v=conj(v); native_simd vr(v.real()), vi(v.imag()); + for (size_t i=0; i tmpr=vr*ku[cu], tmpi=vi*ku[cu]; + // native_simd tmpr=vr*ku[cu], tmpi=vi*ku[cu]; for (size_t cv=0; cv::loadu(ptrr+cv*hlp.vlen); - tr += tmpr*kv[cv]; + tr += txr[cv]*ku[cu]; tr.storeu(ptrr+cv*hlp.vlen); auto ti = native_simd::loadu(ptri+cv*hlp.vlen); - ti += tmpi*kv[cv]; + ti += txi[cv]*ku[cu]; ti.storeu(ptri+cv*hlp.vlen); } ptrr+=jump; -- GitLab From aad563c639c6fca9b601a4993fd7e806af8bdbb4 Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Wed, 5 Aug 2020 13:20:51 +0200 Subject: [PATCH 2/2] partial revert --- python/gridder_cxx.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/python/gridder_cxx.h b/python/gridder_cxx.h index b0fcb4f..346d312 100644 --- a/python/gridder_cxx.h +++ b/python/gridder_cxx.h @@ -781,7 +781,6 @@ template [[gnu::hot]] void x2grid_c_help int jump = hlp.lineJump(); const T * DUCC0_RESTRICT ku = hlp.buf.scalar; const auto * DUCC0_RESTRICT kv = hlp.buf.simd+NVEC; - array,NVEC> txr, txi; while (auto rng=sched.getNext()) for(auto ipart=rng.lo; ipart [[gnu::hot]] void x2grid_c_help if (do_w_gridding) v*=hlp.Wfac(); if (flip) v=conj(v); native_simd vr(v.real()), vi(v.imag()); - for (size_t i=0; i tmpr=vr*ku[cu], tmpi=vi*ku[cu]; + native_simd tmpr=vr*ku[cu], tmpi=vi*ku[cu]; for (size_t cv=0; cv::loadu(ptrr+cv*hlp.vlen); - tr += txr[cv]*ku[cu]; + tr += tmpr*kv[cv]; tr.storeu(ptrr+cv*hlp.vlen); auto ti = native_simd::loadu(ptri+cv*hlp.vlen); - ti += txi[cv]*ku[cu]; + ti += tmpi*kv[cv]; ti.storeu(ptri+cv*hlp.vlen); } ptrr+=jump; -- GitLab