diff --git a/pocketfft_hdronly.h b/pocketfft_hdronly.h index 1b4af745c984cf726517019348b29ea3a1cac051..03817a0c4eb0a996f601c3d7de15533eacd582bf 100644 --- a/pocketfft_hdronly.h +++ b/pocketfft_hdronly.h @@ -20,6 +20,9 @@ #include #include #include +#if defined(_WIN32) +#include +#endif #ifdef __GNUC__ #define NOINLINE __attribute__((noinline)) @@ -38,6 +41,21 @@ using stride_t = vector; namespace detail { +#ifndef POCKETFFT_NO_VECTORS +#if (defined(__AVX512F__)) +#define HAVE_VECSUPPORT +constexpr int VBYTELEN=64; +#elif (defined(__AVX__)) +#define HAVE_VECSUPPORT +constexpr int VBYTELEN=32; +#elif (defined(__SSE2__)) +#define HAVE_VECSUPPORT +constexpr int VBYTELEN=16; +#else +#define POCKETFFT_NO_VECTORS +#endif +#endif + template struct arr { private: @@ -51,9 +69,12 @@ template struct arr void *res = malloc(num*sizeof(T)); if (!res) throw bad_alloc(); #else -#if 0 +#if __cplusplus >= 201703L void *res = aligned_alloc(64,num*sizeof(T)); if (!res) throw bad_alloc(); +#elif defined(_WIN32) + void *res = _aligned_malloc(num*sizeof(T), 64); + if (!res) throw bad_alloc(); #else void *res(nullptr); if (posix_memalign(&res, 64, num*sizeof(T))!=0) @@ -172,7 +193,7 @@ template class sincos_2pibyn if (n==0) return; res[0]=1.; res[1]=0.; if (n==1) return; - size_t l1=(size_t)sqrt(n); + size_t l1 = size_t(sqrt(n)); arr tmp(2*l1); for (size_t i=1; i class sincos_2pibyn { return reinterpret_cast *>(data.data()); } }; -NOINLINE size_t largest_prime_factor (size_t n) +struct util // hack to avoid duplicate symbols { - size_t res=1; - while ((n&1)==0) - { res=2; n>>=1; } - - size_t limit=size_t(sqrt(n+0.01)); - for (size_t x=3; x<=limit; x+=2) - while ((n/x)*x==n) + static NOINLINE size_t largest_prime_factor (size_t n) { - res=x; - n/=x; - limit=size_t(sqrt(n+0.01)); - } - if (n>1) res=n; + size_t res=1; + while ((n&1)==0) + { res=2; n>>=1; } - return res; - } + size_t limit=size_t(sqrt(n+0.01)); + for (size_t x=3; x<=limit; x+=2) + while ((n/x)*x==n) + { + res=x; + n/=x; + limit=size_t(sqrt(n+0.01)); + } + if (n>1) res=n; -NOINLINE double cost_guess (size_t n) - { - const double lfp=1.1; // penalty for non-hardcoded larger factors - size_t ni=n; - double result=0.; - while ((n&1)==0) - { result+=2; n>>=1; } - - size_t limit=size_t(sqrt(n+0.01)); - for (size_t x=3; x<=limit; x+=2) - while ((n/x)*x==n) + return res; + } + + static NOINLINE double cost_guess (size_t n) { - result+= (x<=5) ? x : lfp*x; // penalize larger prime factors - n/=x; - limit=size_t(sqrt(n+0.01)); + const double lfp=1.1; // penalty for non-hardcoded larger factors + size_t ni=n; + double result=0.; + while ((n&1)==0) + { result+=2; n>>=1; } + + size_t limit=size_t(sqrt(n+0.01)); + for (size_t x=3; x<=limit; x+=2) + while ((n/x)*x==n) + { + result+= (x<=5) ? x : lfp*x; // penalize larger prime factors + n/=x; + limit=size_t(sqrt(n+0.01)); + } + if (n>1) result+=(n<=5) ? n : lfp*n; + + return result*ni; } - if (n>1) result+=(n<=5) ? n : lfp*n; - return result*ni; - } + /* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ + static NOINLINE size_t good_size(size_t n) + { + if (n<=12) return n; + + size_t bestfac=2*n; + for (size_t f2=1; f2=n) bestfac=f235711; + return bestfac; + } -/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ -NOINLINE size_t good_size(size_t n) - { - if (n<=12) return n; - - size_t bestfac=2*n; - for (size_t f2=1; f2=n) bestfac=f235711; - return bestfac; - } + static size_t prod(const shape_t &shape) + { + size_t res=1; + for (auto sz: shape) + res*=sz; + return res; + } + }; #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] @@ -397,12 +429,12 @@ template class cfftp size_t length, nfct; arr> mem; - fctdata fct[NFCT]; + fctdata fact[NFCT]; void add_factor(size_t factor) { if (nfct>=NFCT) throw runtime_error("too many prime factors"); - fct[nfct++].fct = factor; + fact[nfct++].fct = factor; } template void pass2 (size_t ido, size_t l1, @@ -844,33 +876,33 @@ template void passg (size_t ido, size_t ip, #undef CX2 #undef CX -template void pass_all(T c[], T0 fact) +template void pass_all(T c[], T0 fct) { - if (length==1) { c[0]*=fact; return; } + if (length==1) { c[0]*=fct; return; } size_t l1=1; arr ch(length); T *p1=c, *p2=ch.data(); for(size_t k1=0; k1 (ido, l1, p1, p2, fct[k1].tw); + pass4 (ido, l1, p1, p2, fact[k1].tw); else if(ip==2) - pass2(ido, l1, p1, p2, fct[k1].tw); + pass2(ido, l1, p1, p2, fact[k1].tw); else if(ip==3) - pass3 (ido, l1, p1, p2, fct[k1].tw); + pass3 (ido, l1, p1, p2, fact[k1].tw); else if(ip==5) - pass5 (ido, l1, p1, p2, fct[k1].tw); + pass5 (ido, l1, p1, p2, fact[k1].tw); else if(ip==7) - pass7 (ido, l1, p1, p2, fct[k1].tw); + pass7 (ido, l1, p1, p2, fact[k1].tw); else if(ip==11) - pass11 (ido, l1, p1, p2, fct[k1].tw); + pass11 (ido, l1, p1, p2, fact[k1].tw); else { - passg(ido, ip, l1, p1, p2, fct[k1].tw, fct[k1].tws); + passg(ido, ip, l1, p1, p2, fact[k1].tw, fact[k1].tws); swap(p1,p2); } swap(p1,p2); @@ -878,16 +910,16 @@ template void pass_all(T c[], T0 fact) } if (p1!=c) { - if (fact!=1.) + if (fct!=1.) for (size_t i=0; i void pass_all(T c[], T0 fact) len>>=1; // factor 2 should be at the front of the factor list add_factor(2); - swap(fct[0].fct, fct[nfct-1].fct); + swap(fact[0].fct, fact[nfct-1].fct); } - size_t maxl=(size_t)(sqrt((double)len))+1; + size_t maxl = size_t(sqrt(double(len)))+1; for (size_t divisor=3; (len>1)&&(divisor void pass_all(T c[], T0 fact) add_factor(divisor); len/=divisor; } - maxl=size_t(sqrt((double)len))+1; + maxl=size_t(sqrt(double(len)))+1; } if (len>1) add_factor(len); } @@ -934,7 +966,7 @@ template void pass_all(T c[], T0 fact) size_t twsize=0, l1=1; for (size_t k=0; k11) twsize+=ip; @@ -951,18 +983,18 @@ template void pass_all(T c[], T0 fact) size_t memofs=0; for (size_t k=0; k11) { - fct[k].tws=mem.data()+memofs; + fact[k].tws=mem.data()+memofs; memofs+=ip; for (size_t j=0; j class rfftp size_t length, nfct; arr mem; - fctdata fct[NFCT]; + fctdata fact[NFCT]; void add_factor(size_t factor) { if (nfct>=NFCT) throw runtime_error("too many prime factors"); - fct[nfct++].fct = factor; + fact[nfct++].fct = factor; } #define WA(x,i) wa[(i)+(x)*(ido-1)] @@ -1632,9 +1664,9 @@ template void copy_and_norm(T *c, T *p1, size_t n, T0 fct) public: -template void forward(T c[], T0 fact) +template void forward(T c[], T0 fct) { - if (length==1) { c[0]*=fact; return; } + if (length==1) { c[0]*=fct; return; } size_t n=length; size_t l1=n, nf=nfct; arr ch(n); @@ -1643,30 +1675,30 @@ template void forward(T c[], T0 fact) for(size_t k1=0; k1 void backward(T c[], T0 fact) +template void backward(T c[], T0 fct) { - if (length==1) { c[0]*=fact; return; } + if (length==1) { c[0]*=fct; return; } size_t n=length; size_t l1=1, nf=nfct; arr ch(n); @@ -1674,22 +1706,22 @@ template void backward(T c[], T0 fact) for(size_t k=0; k>=1; // factor 2 should be at the front of the factor list add_factor(2); - swap(fct[0].fct, fct[nfct-1].fct); + swap(fact[0].fct, fact[nfct-1].fct); } - size_t maxl=(size_t)(sqrt((double)len))+1; + size_t maxl = size_t(sqrt(double(len)))+1; for (size_t divisor=3; (len>1)&&(divisor1) add_factor(len); } @@ -1726,7 +1758,7 @@ size_t twsize() const size_t twsz=0, l1=1; for (size_t k=0; k5) twsz+=2*ip; l1*=ip; @@ -1741,28 +1773,28 @@ void comp_twiddle() T0 *ptr=mem.data(); for (size_t k=0; k5) // special factors required by *g functions { - fct[k].tws=ptr; ptr+=2*ip; - fct[k].tws[0] = 1.; - fct[k].tws[1] = 0.; + fact[k].tws=ptr; ptr+=2*ip; + fact[k].tws[0] = 1.; + fact[k].tws[1] = 0.; for (size_t i=1; i<=(ip>>1); ++i) { - fct[k].tws[2*i ] = twid[2*i*(length/ip)]; - fct[k].tws[2*i+1] = twid[2*i*(length/ip)+1]; - fct[k].tws[2*(ip-i) ] = twid[2*i*(length/ip)]; - fct[k].tws[2*(ip-i)+1] = -twid[2*i*(length/ip)+1]; + fact[k].tws[2*i ] = twid[2*i*(length/ip)]; + fact[k].tws[2*i+1] = twid[2*i*(length/ip)+1]; + fact[k].tws[2*(ip-i) ] = twid[2*i*(length/ip)]; + fact[k].tws[2*(ip-i)+1] = -twid[2*i*(length/ip)+1]; } } l1*=ip; @@ -1822,7 +1854,7 @@ template class fftblue public: NOINLINE fftblue(size_t length) - : n(length), n2(good_size(n*2-1)), plan(n2), mem(n+n2), + : n(length), n2(util::good_size(n*2-1)), plan(n2), mem(n+n2), bk(mem.data()), bkf(mem.data()+n) { /* initialize b_k */ @@ -1858,7 +1890,8 @@ template class fftblue { arr> tmp(n); tmp[0].Set(c[0],c[0]*0); - memcpy ((void *)(tmp.data()+1),(void *)(c+1), (n-1)*sizeof(T)); + memcpy (reinterpret_cast(tmp.data()+1), + reinterpret_cast(c+1), (n-1)*sizeof(T)); if ((n&1)==0) tmp[n/2].i=c[0]*0.; for (size_t m=1; 2*m class pocketfft_c : len(length) { if (length==0) throw runtime_error("zero-length FFT requested"); - if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) + if ((length<50) || (util::largest_prime_factor(length)<=sqrt(length))) { packplan=unique_ptr>(new cfftp(length)); return; } - double comp1 = cost_guess(length); - double comp2 = 2*cost_guess(good_size(2*length-1)); + double comp1 = util::cost_guess(length); + double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2>(new fftblue(length)); @@ -1908,16 +1941,16 @@ template class pocketfft_c packplan=unique_ptr>(new cfftp(length)); } - template NOINLINE void backward(T c[], T0 fct) + template NOINLINE void backward(cmplx c[], T0 fct) { - packplan ? packplan->backward((cmplx *)c,fct) - : blueplan->backward((cmplx *)c,fct); + packplan ? packplan->backward(c,fct) + : blueplan->backward(c,fct); } - template NOINLINE void forward(T c[], T0 fct) + template NOINLINE void forward(cmplx c[], T0 fct) { - packplan ? packplan->forward((cmplx *)c,fct) - : blueplan->forward((cmplx *)c,fct); + packplan ? packplan->forward(c,fct) + : blueplan->forward(c,fct); } size_t length() const { return len; } @@ -1939,13 +1972,13 @@ template class pocketfft_r : len(length) { if (length==0) throw runtime_error("zero-length FFT requested"); - if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) + if ((length<50) || (util::largest_prime_factor(length)<=sqrt(length))) { packplan=unique_ptr>(new rfftp(length)); return; } - double comp1 = 0.5*cost_guess(length); - double comp2 = 2*cost_guess(good_size(2*length-1)); + double comp1 = 0.5*util::cost_guess(length); + double comp2 = 2*util::cost_guess(util::good_size(2*length-1)); comp2*=1.5; /* fudge factor that appears to give good overall performance */ if (comp2>(new fftblue(length)); @@ -1972,14 +2005,6 @@ template class pocketfft_r // multi-D infrastructure // -size_t prod(const shape_t &shape) - { - size_t res=1; - for (auto sz: shape) - res*=sz; - return res; - } - template class ndarr { private: @@ -1996,7 +2021,7 @@ template class ndarr : d(reinterpret_cast(data_)), cd(reinterpret_cast(data_)), shp(shape_), str(stride_) {} size_t ndim() const { return shp.size(); } - size_t size() const { return prod(shp); } + size_t size() const { return util::prod(shp); } const shape_t &shape() const { return shp; } size_t shape(size_t i) const { return shp[i]; } const stride_t &stride() const { return str; } @@ -2067,20 +2092,6 @@ template class multi_iter bool contiguous_out() const { return str_o==sizeof(To); } }; - -#ifndef POCKETFFT_NO_VECTORS -#if (defined(__AVX512F__)) -#define HAVE_VECSUPPORT -constexpr int VBYTELEN=64; -#elif (defined(__AVX__)) -#define HAVE_VECSUPPORT -constexpr int VBYTELEN=32; -#elif (defined(__SSE2__)) -#define HAVE_VECSUPPORT -constexpr int VBYTELEN=16; -#endif -#endif - #if defined(HAVE_VECSUPPORT) template struct VTYPE{}; template<> struct VTYPE @@ -2104,14 +2115,14 @@ template<> struct VTYPE template arr alloc_tmp(const shape_t &shape, size_t axsize, size_t elemsize) { - auto othersize = prod(shape)/axsize; + auto othersize = util::prod(shape)/axsize; auto tmpsize = axsize*((othersize>=VTYPE::vlen) ? VTYPE::vlen : 1); return arr(tmpsize*elemsize); } template arr alloc_tmp(const shape_t &shape, const shape_t &axes, size_t elemsize) { - size_t fullsize=prod(shape); + size_t fullsize=util::prod(shape); size_t tmpsize=0; for (size_t i=0; i arr alloc_tmp(const shape_t &shape, return arr(tmpsize*elemsize); } -template NOINLINE void pocketfft_general_c( +template NOINLINE void general_c( const ndarr> &in, ndarr> &out, const shape_t &axes, bool forward, T fct) { @@ -2143,15 +2154,15 @@ template NOINLINE void pocketfft_general_c( { using vtype = typename VTYPE::type; it.advance(vlen); - auto tdatav = (cmplx *)storage.data(); + auto tdatav = reinterpret_cast *>(storage.data()); for (size_t i=0; iforward ((vtype *)tdatav, fct) - : plan->backward((vtype *)tdatav, fct); + forward ? plan->forward (tdatav, fct) + : plan->backward(tdatav, fct); for (size_t i=0; i NOINLINE void pocketfft_general_c( while (it.remaining()>0) { it.advance(1); - auto tdata = (cmplx *)storage.data(); + auto tdata = reinterpret_cast *>(storage.data()); if (it.inplace() && it.contiguous_out()) // fully in-place - forward ? plan->forward ((T *)(&it.out(0)), fct) - : plan->backward((T *)(&it.out(0)), fct); + forward ? plan->forward ((&it.out(0)), fct) + : plan->backward((&it.out(0)), fct); else if (it.contiguous_out()) // compute FFT in output location { for (size_t i=0; iforward ((T *)(&it.out(0)), fct) - : plan->backward((T *)(&it.out(0)), fct); + forward ? plan->forward ((&it.out(0)), fct) + : plan->backward((&it.out(0)), fct); } else { for (size_t i=0; iforward ((T *)tdata, fct) - : plan->backward((T *)tdata, fct); + forward ? plan->forward (tdata, fct) + : plan->backward(tdata, fct); for (size_t i=0; i NOINLINE void pocketfft_general_c( } } -template NOINLINE void pocketfft_general_hartley( +template NOINLINE void general_hartley( const ndarr &in, ndarr &out, const shape_t &axes, T fct) { auto storage = alloc_tmp(in.shape(), axes, sizeof(T)); @@ -2204,11 +2215,11 @@ template NOINLINE void pocketfft_general_hartley( { using vtype = typename VTYPE::type; it.advance(vlen); - auto tdatav = (vtype *)storage.data(); + auto tdatav = reinterpret_cast(storage.data()); for (size_t i=0; iforward((vtype *)tdatav, fct); + plan->forward(tdatav, fct); for (size_t j=0; j NOINLINE void pocketfft_general_hartley( while (it.remaining()>0) { it.advance(1); - auto tdata = (T *)storage.data(); + auto tdata = reinterpret_cast(storage.data()); for (size_t i=0; iforward((T *)tdata, fct); + plan->forward(tdata, fct); // Hartley order it.out(0) = tdata[0]; size_t i=1, i1=1, i2=len-1; @@ -2245,7 +2256,7 @@ template NOINLINE void pocketfft_general_hartley( } } -template NOINLINE void pocketfft_general_r2c( +template NOINLINE void general_r2c( const ndarr &in, ndarr> &out, size_t axis, T fct) { auto storage = alloc_tmp(in.shape(), in.shape(axis), sizeof(T)); @@ -2260,11 +2271,11 @@ template NOINLINE void pocketfft_general_r2c( { using vtype = typename VTYPE::type; it.advance(vlen); - auto tdatav = (vtype *)storage.data(); + auto tdatav = reinterpret_cast(storage.data()); for (size_t i=0; i NOINLINE void pocketfft_general_r2c( while (it.remaining()>0) { it.advance(1); - auto tdata = (T *)storage.data(); + auto tdata = reinterpret_cast(storage.data()); for (size_t i=0; i NOINLINE void pocketfft_general_r2c( it.out(ii).Set(tdata[i]); } } -template NOINLINE void pocketfft_general_c2r( +template NOINLINE void general_c2r( const ndarr> &in, ndarr &out, size_t axis, T fct) { auto storage = alloc_tmp(out.shape(), out.shape(axis), sizeof(T)); @@ -2306,9 +2317,10 @@ template NOINLINE void pocketfft_general_c2r( { using vtype = typename VTYPE::type; it.advance(vlen); - auto tdatav = (vtype *)storage.data(); + auto tdatav = reinterpret_cast(storage.data()); for (size_t j=0; j NOINLINE void pocketfft_general_c2r( if (i NOINLINE void pocketfft_general_c2r( while (it.remaining()>0) { it.advance(1); - auto tdata = (T *)storage.data(); + auto tdata = reinterpret_cast(storage.data()); tdata[0]=it.in(0).r; + { size_t i=1, ii=1; for (; i NOINLINE void pocketfft_general_c2r( } if (i NOINLINE void pocketfft_general_r( +template NOINLINE void general_r( const ndarr &in, ndarr &out, size_t axis, bool forward, T fct) { auto storage = alloc_tmp(in.shape(), in.shape(axis), sizeof(T)); @@ -2359,7 +2374,7 @@ template NOINLINE void pocketfft_general_r( { using vtype = typename VTYPE::type; it.advance(vlen); - auto tdatav = (vtype *)storage.data(); + auto tdatav = reinterpret_cast(storage.data()); for (size_t i=0; i NOINLINE void pocketfft_general_r( while (it.remaining()>0) { it.advance(1); - auto tdata = (T *)storage.data(); + auto tdata = reinterpret_cast(storage.data()); if (it.inplace() && it.contiguous_out()) // fully in-place forward ? plan.forward (&it.out(0), fct) : plan.backward(&it.out(0), fct); @@ -2407,7 +2422,7 @@ template void c2c(const shape_t &shape, using namespace detail; ndarr> ain(data_in, shape, stride_in); ndarr> aout(data_out, shape, stride_out); - pocketfft_general_c(ain, aout, axes, forward, fct); + general_c(ain, aout, axes, forward, fct); } template void r2c(const shape_t &shape, @@ -2417,7 +2432,7 @@ template void r2c(const shape_t &shape, using namespace detail; ndarr ain(data_in, shape, stride_in); ndarr> aout(data_out, shape, stride_out); - pocketfft_general_r2c(ain, aout, axis, fct); + general_r2c(ain, aout, axis, fct); } template void c2r(const shape_t &shape, size_t new_size, @@ -2429,7 +2444,7 @@ template void c2r(const shape_t &shape, size_t new_size, shape_out[axis] = new_size; ndarr> ain(data_in, shape, stride_in); ndarr aout(data_out, shape_out, stride_out); - pocketfft_general_c2r(ain, aout, axis, fct); + general_c2r(ain, aout, axis, fct); } template void r2r_fftpack(const shape_t &shape, @@ -2439,7 +2454,7 @@ template void r2r_fftpack(const shape_t &shape, using namespace detail; ndarr ain(data_in, shape, stride_in); ndarr aout(data_out, shape, stride_out); - pocketfft_general_r(ain, aout, axis, forward, fct); + general_r(ain, aout, axis, forward, fct); } template void r2r_hartley(const shape_t &shape, @@ -2449,7 +2464,7 @@ template void r2r_hartley(const shape_t &shape, using namespace detail; ndarr ain(data_in, shape, stride_in); ndarr aout(data_out, shape, stride_out); - pocketfft_general_hartley(ain, aout, axes, fct); + general_hartley(ain, aout, axes, fct); } } // namespace pocketfft diff --git a/pypocketfft.cc b/pypocketfft.cc index 2e2cd9ac1b01ae520e1ae688741577d77aafb757..96b2ad6b1558d4a83ae7bb82a861bab2824bc35e 100644 --- a/pypocketfft.cc +++ b/pypocketfft.cc @@ -85,7 +85,7 @@ template py::array xfftn_internal(const py::array &in, py::array res = inplace ? in : py::array_t>(dims); ndarr> ain(in.data(), dims, copy_strides(in)); ndarr> aout(res.mutable_data(), dims, copy_strides(res)); - pocketfft_general_c(ain, aout, axes, fwd, fct); + general_c(ain, aout, axes, fwd, fct); return res; } @@ -110,10 +110,10 @@ template py::array rfftn_internal(const py::array &in, py::array res = py::array_t>(dims_out); ndarr ain(in.data(), dims_in, copy_strides(in)); ndarr> aout(res.mutable_data(), dims_out, copy_strides(res)); - pocketfft_general_r2c(ain, aout, axes.back(), fct); + general_r2c(ain, aout, axes.back(), fct); if (axes.size()==1) return res; shape_t axes2(axes.begin(), --axes.end()); - pocketfft_general_c(aout, aout, axes2, true, 1.); + general_c(aout, aout, axes2, true, 1.); return res; } py::array rfftn(const py::array &in, py::object axes_, double fct) @@ -128,7 +128,7 @@ template py::array xrfft_scipy(const py::array &in, py::array res = inplace ? in : py::array_t(dims); ndarr ain(in.data(), dims, copy_strides(in)); ndarr aout(res.mutable_data(), dims, copy_strides(res)); - pocketfft_general_r(ain, aout, axis, fwd, fct); + general_r(ain, aout, axis, fwd, fct); return res; } py::array rfft_scipy(const py::array &in, size_t axis, double fct, bool inplace) @@ -160,7 +160,7 @@ template py::array irfftn_internal(const py::array &in, py::array res = py::array_t(dims_out); ndarr> ain(inter.data(), copy_shape(inter), copy_strides(inter)); ndarr aout(res.mutable_data(), dims_out, copy_strides(res)); - pocketfft_general_c2r(ain, aout, axis, fct); + general_c2r(ain, aout, axis, fct); return res; } py::array irfftn(const py::array &in, py::object axes_, size_t lastsize, @@ -178,7 +178,7 @@ template py::array hartley_internal(const py::array &in, py::array res = inplace ? in : py::array_t(dims); ndarr ain(in.data(), copy_shape(in), copy_strides(in)); ndarr aout(res.mutable_data(), copy_shape(res), copy_strides(res)); - pocketfft_general_hartley(ain, aout, makeaxes(in, axes_), fct); + general_hartley(ain, aout, makeaxes(in, axes_), fct); return res; } py::array hartley(const py::array &in, py::object axes_, double fct,