Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pypocketfft
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
4
Issues
4
List
Boards
Labels
Service Desk
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Reinecke
pypocketfft
Commits
b87f51f5
Commit
b87f51f5
authored
Sep 26, 2019
by
Martin Reinecke
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use system sin/cos
parent
189bffbf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
304 deletions
+80
-304
bench.py
bench.py
+1
-2
pocketfft_hdronly.h
pocketfft_hdronly.h
+51
-276
stress.py
stress.py
+28
-26
No files found.
bench.py
View file @
b87f51f5
import
numpy
as
np
import
pypocketfft
import
pyfftw
from
time
import
time
import
matplotlib.pyplot
as
plt
import
math
...
...
@@ -108,7 +107,7 @@ def bench_nd(ndim, nmax, ntry, tp, funcs, nrepeat, ttl="", filename=""):
plt
.
show
()
funcs
=
(
measure_pypocketfft
,
measure_fftw
)
funcs
=
(
measure_pypocketfft
,
measure_fftw
_np_interface
)
ttl
=
"pypocketfft/FFTW()"
bench_nd
(
1
,
8192
,
100
,
"c16"
,
funcs
,
10
,
ttl
,
"1d.png"
)
bench_nd
(
2
,
2048
,
100
,
"c16"
,
funcs
,
2
,
ttl
,
"2d.png"
)
...
...
pocketfft_hdronly.h
View file @
b87f51f5
...
...
@@ -266,295 +266,69 @@ template<bool fwd, typename T> void ROTX90(cmplx<T> &a)
//
// twiddle factor section
//
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline float sinpi0(float a)
{
// adapted from https://stackoverflow.com/questions/42792939/
float s = a * a;
float r = -5.957031250000000000e-01f;
r = fma (r, s, 2.550399541854858398e+00f);
r = fma (r, s, -5.167724132537841797e+00f);
r = (a * s) * r;
return fma (a, 3.141592741012573242e+00f, r);
}
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline double sinpi0(double a)
{
// adapted from https://stackoverflow.com/questions/42792939/
double s = a * a;
double r = 4.6151442520157035e-4;
r = fma (r, s, -7.3700183130883555e-3);
r = fma (r, s, 8.2145868949323936e-2);
r = fma (r, s, -5.9926452893214921e-1);
r = fma (r, s, 2.5501640398732688e+0);
r = fma (r, s, -5.1677127800499516e+0);
s = s * a;
r = r * s;
return fma (a, 3.1415926535897931e+0, r);
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline float cosm1pi0(float a)
{
// adapted from https://stackoverflow.com/questions/42792939/
float s = a * a;
float r = 2.313842773437500000e-01f;
r = fmaf (r, s, -1.335021972656250000e+00f);
r = fmaf (r, s, 4.058703899383544922e+00f);
r = fmaf (r, s, -4.934802055358886719e+00f);
return r*s;
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline double cosm1pi0(double a)
{
// adapted from https://stackoverflow.com/questions/42792939/
double s = a * a;
double r = -1.0369917389758117e-4;
r = fma (r, s, 1.9294935641298806e-3);
r = fma (r, s, -2.5806887942825395e-2);
r = fma (r, s, 2.3533063028328211e-1);
r = fma (r, s, -1.3352627688538006e+0);
r = fma (r, s, 4.0587121264167623e+0);
r = fma (r, s, -4.9348022005446790e+0);
return r*s;
}
template <typename T> void sincosm1pi0(T a_, T *POCKETFFT_RESTRICT res)
{
if (sizeof(T)>sizeof(double)) // don't have the code for long double
{
constexpr T pi = T(3.141592653589793238462643383279502884197L);
auto s = sin(pi*a_);
res[1] = s;
res[0] = (s*s)/(-sqrt((1-s)*(1+s))-1);
return;
}
res[0] = T(cosm1pi0(double(a_)));
res[1] = T(sinpi0(double(a_)));
}
template <typename T> T sinpi(T a)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
switch (i%4)
{
case 0:
return sinpi0(t);
case 1: case -3:
return cosm1pi0(t) + T(1.);
case 2: case -2:
return T(0.) - sinpi0(t);
case 3: case -1:
return T(-1.) - cosm1pi0(t);
}
throw runtime_error("cannot happen");
}
template <typename T> T cospi(T a)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
switch (i%4)
{
case 0:
return cosm1pi0(t) + T(1.);
case 1: case -3:
return T(0.) - sinpi0(t);
case 2: case -2:
return T(-1.) - cosm1pi0(t);
case 3: case -1:
return sinpi0(t);
}
throw runtime_error("cannot happen");
}
inline long double cospi(long double a)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
return sizeof(long double) > sizeof(double) ? cos(a * pi) :
static_cast<long double>(cospi(static_cast<double>(a)));
}
inline long double sinpi(long double a)
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
return sizeof(long double) > sizeof(double) ? sin(a * pi) :
static_cast<long double>(sinpi(static_cast<double>(a)));
}
template <typename T> void sincospi(T a, T *POCKETFFT_RESTRICT res)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto r = nearbyint (a + a); // must use IEEE-754 "to nearest" rounding
auto i = (int64_t)r;
auto t = fma (T(-0.5), r, a);
auto c=cosm1pi0(t)+T(1.);
auto s=sinpi0(t);
// map results according to quadrant
if (i & 2)
{
s = T(0.)-s;
c = T(0.)-c;
}
if (i & 1)
{
swap(s, c);
c = T(0.)-c;
}
res[0]=c;
res[1]=s;
}
inline void sincospi(long double a, long double *POCKETFFT_RESTRICT res)
{
if (sizeof(long double) > sizeof(double))
{
constexpr auto pi = 3.141592653589793238462643383279502884197L;
res[0] = cos(pi * a);
res[1] = sin(pi * a);
}
else
{
double sincos[2];
sincospi(static_cast<double>(a), sincos);
res[0] = static_cast<long double>(sincos[0]);
res[1] = static_cast<long double>(sincos[1]);
}
}
template
<
typename
T
>
class
sincos_2pibyn
{
private:
using
Thigh
=
typename
conditional
<
(
sizeof
(
T
)
>
sizeof
(
double
)),
T
,
double
>::
type
;
size_t sz;
arr<cmplx<T>> data;
POCKETFFT_NOINLINE void calc_first_octant(size_t den,
cmplx<T> * POCKETFFT_RESTRICT res)
{
size_t n = (den+4)>>3;
if (n==0) return;
res[0].Set(1.,0.);
if (n==1) return;
size_t l1 = size_t(sqrt(n));
arr<cmplx<Thigh>> tmp(l1);
for (size_t i=1; i<l1; ++i)
{
sincosm1pi0(Thigh(2*i)/Thigh(den),&tmp[i].r);
res[i].Set(T(tmp[i].r)+1,T(tmp[i].i));
}
size_t start=l1;
while(start<n)
{
cmplx<Thigh> cs;
sincosm1pi0((Thigh(2*start))/Thigh(den),&cs.r);
res[start].Set(T(cs.r+1), T(cs.i));
size_t end = l1;
if (start+end>n) end = n-start;
for (size_t i=1; i<end; ++i)
{
cmplx<Thigh> csx=tmp[i];
res[start+i].Set(T(((cs.r*csx.r - cs.i*csx.i + cs.r) + csx.r) + 1),
T((cs.r*csx.i + cs.i*csx.r) + cs.i + csx.i));
}
start += l1;
}
}
size_t
mask
,
shift
;
arr
<
cmplx
<
Thigh
>>
v1
,
v2
;
void calc_first_quadrant(size_t n, cmplx<T> * POCKETFFT_RESTRICT res
)
static
cmplx
<
Thigh
>
calc
(
size_t
x
,
size_t
n
,
Thigh
ang
)
{
arr<cmplx<T>> p(n/2); // n is always even here
calc_first_octant(n<<1, p.data());
size_t ndone=(n+2)>>2;
size_t i=0, idx1=0, idx2=ndone-1;
for (; i+1<ndone; i+=2, ++idx1, --idx2)
x
<<=
3
;
if
(
x
<
4
*
n
)
// first half
{
res[idx1] = p[i];
res[idx2].Set(p[i+1].i, p[i+1].r);
if
(
x
<
2
*
n
)
// first quadrant
{
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
cos
(
Thigh
(
x
)
*
ang
),
sin
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
),
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
else
// second quadrant
{
x
-=
2
*
n
;
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
-
sin
(
Thigh
(
x
)
*
ang
),
cos
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
-
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
),
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
}
if (i!=ndone)
res[idx1] = p[i];
}
void calc_first_half(size_t n, cmplx<T> * POCKETFFT_RESTRICT res)
{
int ndone=int(n+1)>>1;
arr<cmplx<T>> p((n+1)/2); // n is always odd here
calc_first_octant(n<<2, p.data());
int i4=0, in=int(n), i=0;
for (; i4<=in-i4; ++i, i4+=4) // octant 0
res[i] = p[i4];
for (; i4-in <= 0; ++i, i4+=4) // octant 1
{ auto xm = in-i4; res[i].Set(p[xm].i, p[xm].r); }
for (; i4<=3*in-i4; ++i, i4+=4) // octant 2
{ auto xm = i4-in; res[i].Set(-p[xm].i, p[xm].r); }
for (; i<ndone; ++i, i4+=4) // octant 3
{ auto xm = 2*in-i4; res[i].Set(-p[xm].r, p[xm].i); }
}
void fill_first_quadrant(size_t n, cmplx<T> * POCKETFFT_RESTRICT res)
{
constexpr T hsqt2 = T(0.707106781186547524400844362104849L);
size_t quart = n>>2;
if ((n&7)==0)
res[quart/2].Set(hsqt2, hsqt2);
for (size_t i=1, j=quart-1; 2*i<quart; ++i, --j)
{ res[j].Set(res[i].i, res[i].r); }
}
POCKETFFT_NOINLINE void fill_first_half(size_t n, cmplx<T> * POCKETFFT_RESTRICT res)
{
size_t half = n>>1;
if ((n&3)==0)
for (size_t i=0; i<half/2; ++i)
res[i+half/2].Set(-res[i].i, res[i].r);
else
for (size_t i=1, j=half-1; 2*i<half; ++i, --j)
res[j].Set(-res[i].r, res[i].i);
res[half].Set(T(-1),T(0));
}
POCKETFFT_NOINLINE void sincos_2pibyn_half(size_t n, cmplx<T> * POCKETFFT_RESTRICT res)
{
if ((n&3)==0)
{
calc_first_octant(n, res);
fill_first_quadrant(n, res);
fill_first_half(n, res);
}
else if ((n&1)==0)
{
calc_first_quadrant(n, res);
fill_first_half(n, res);
x
=
8
*
n
-
x
;
if
(
x
<
2
*
n
)
// third quadrant
{
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
cos
(
Thigh
(
x
)
*
ang
),
-
sin
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
),
-
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
else
// fourth quadrant
{
x
-=
2
*
n
;
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
-
sin
(
Thigh
(
x
)
*
ang
),
-
cos
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
-
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
),
-
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
}
else
calc_first_half(n, res);
}
public:
POCKETFFT_NOINLINE
sincos_2pibyn
(
size_t
n
)
: sz(n), data(n/2+1)
{ sincos_2pibyn_half(n, data.data()); }
{
constexpr
auto
pi
=
3.141592653589793238462643383279502884197
L
;
Thigh
ang
=
Thigh
(
0.25
L
*
pi
/
n
);
shift
=
1
;
while
((
size_t
(
1
)
<<
shift
)
*
(
size_t
(
1
)
<<
shift
)
<
n
)
++
shift
;
mask
=
(
size_t
(
1
)
<<
shift
)
-
1
;
v1
.
resize
(
mask
+
1
);
v1
[
0
].
Set
(
Thigh
(
1
),
Thigh
(
0
));
for
(
size_t
i
=
1
;
i
<
v1
.
size
();
++
i
)
v1
[
i
]
=
calc
(
i
,
n
,
ang
);
v2
.
resize
((
n
+
mask
)
/
(
mask
+
1
));
v2
[
0
].
Set
(
Thigh
(
1
),
Thigh
(
0
));
for
(
size_t
i
=
1
;
i
<
v2
.
size
();
++
i
)
v2
[
i
]
=
calc
(
i
*
(
mask
+
1
),
n
,
ang
);
}
cmplx
<
T
>
operator
[](
size_t
idx
)
const
{
if (idx<data.size())
return data[idx];
auto c = data[sz-idx];
c.i = -c.i;
return c;
auto
x1
=
v1
[
idx
&
mask
],
x2
=
v2
[
idx
>>
shift
];
return
cmplx
<
T
>
(
T
(
x1
.
r
*
x2
.
r
-
x1
.
i
*
x2
.
i
),
T
(
x1
.
r
*
x2
.
i
+
x1
.
i
*
x2
.
r
));
}
};
...
...
@@ -2710,9 +2484,10 @@ template<typename T0> class T_dcst23
POCKETFFT_NOINLINE
T_dcst23
(
size_t
length
)
:
fftplan
(
length
),
twiddle
(
length
)
{
const auto oo2n = T0(0.5)/T0(length);
constexpr
auto
pi
=
T0
(
3.141592653589793238462643383279502884197
L
);
const
auto
oo2n
=
pi
*
T0
(
0.5
)
/
T0
(
length
);
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
twiddle[i] = cos
pi
(oo2n*T0(i+1));
twiddle
[
i
]
=
cos
(
oo2n
*
T0
(
i
+
1
));
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
,
...
...
@@ -2789,9 +2564,9 @@ template<typename T0> class T_dcst4
if
((
N
&
1
)
==
0
)
for
(
size_t
i
=
0
;
i
<
N
/
2
;
++
i
)
{
T0 sincos[2]
;
sincospi(oon*(T0(i)+T0(0.125)), sincos
);
C2[i].Set(
sincos[0], sincos[1]
);
constexpr
auto
pi
=
T0
(
3.141592653589793238462643383279502884197
L
)
;
T0
ang
=
pi
*
oon
*
(
T0
(
i
)
+
T0
(
0.125
)
);
C2
[
i
].
Set
(
cos
(
ang
),
sin
(
ang
)
);
}
}
...
...
stress.py
View file @
b87f51f5
...
...
@@ -2,8 +2,10 @@ import numpy as np
import
pypocketfft
def
_l2error
(
a
,
b
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
np
.
random
.
seed
(
42
)
def
_l2error
(
a
,
b
,
axes
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
/
np
.
log2
(
np
.
max
([
2
,
np
.
prod
(
np
.
take
(
a
.
shape
,
axes
))]))
def
fftn
(
a
,
axes
=
None
,
inorm
=
0
,
out
=
None
,
nthreads
=
1
):
...
...
@@ -26,7 +28,7 @@ def irfftn(a, axes=None, lastsize=0, inorm=0, nthreads=1):
inorm
=
inorm
,
nthreads
=
nthreads
)
nthreads
=
0
nthreads
=
1
def
update_err
(
err
,
name
,
value
):
...
...
@@ -40,9 +42,9 @@ def update_err(err, name, value):
def
test
(
err
):
ndim
=
np
.
random
.
randint
(
1
,
5
)
ndim
=
3
#
np.random.randint(1, 5)
axlen
=
int
((
2
**
20
)
**
(
1.
/
ndim
))
shape
=
np
.
random
.
randint
(
1
,
axlen
,
ndim
)
shape
=
(
np
.
random
.
randint
(
1
,
axlen
,
ndim
)
//
2
)
*
2
+
1
axes
=
np
.
arange
(
ndim
)
np
.
random
.
shuffle
(
axes
)
nax
=
np
.
random
.
randint
(
1
,
ndim
+
1
)
...
...
@@ -52,85 +54,85 @@ def test(err):
a_32
=
a
.
astype
(
np
.
complex64
)
b
=
ifftn
(
fftn
(
a
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
,
b
))
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
,
b
,
axes
))
b
=
ifftn
(
fftn
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
fftn
(
ifftn
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
irfftn
(
rfftn
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
lastsize
=
lastsize
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"rmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"rmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
ifftn
(
fftn
(
a
.
astype
(
np
.
complex64
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmaxf"
,
_l2error
(
a
.
astype
(
np
.
complex64
),
b
))
err
=
update_err
(
err
,
"cmaxf"
,
_l2error
(
a
.
astype
(
np
.
complex64
),
b
,
axes
))
b
=
irfftn
(
rfftn
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
lastsize
=
lastsize
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"rmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
))
err
=
update_err
(
err
,
"rmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
,
axes
))
b
=
pypocketfft
.
separable_hartley
(
pypocketfft
.
separable_hartley
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
genuine_hartley
(
pypocketfft
.
genuine_hartley
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
separable_hartley
(
pypocketfft
.
separable_hartley
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
))
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
,
axes
))
b
=
pypocketfft
.
genuine_hartley
(
pypocketfft
.
genuine_hartley
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
))
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
,
axes
))
if
all
(
a
.
shape
[
i
]
>
1
for
i
in
axes
):
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
1
),
axes
=
axes
,
type
=
1
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c1max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"c1max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
1
),
axes
=
axes
,
type
=
1
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c1maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"c1maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c23max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"c23max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c23maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"c23maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c4max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"c4max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c4maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"c4maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
1
),
axes
=
axes
,
type
=
1
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s1maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"s1maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s23max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"s23max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s23maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"s23maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s4max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"s4max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s4maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"s4maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
err
=
dict
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment