Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pypocketfft
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
4
Issues
4
List
Boards
Labels
Service Desk
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Reinecke
pypocketfft
Commits
b87f51f5
Commit
b87f51f5
authored
Sep 26, 2019
by
Martin Reinecke
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use system sin/cos
parent
189bffbf
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
304 deletions
+80
-304
bench.py
bench.py
+1
-2
pocketfft_hdronly.h
pocketfft_hdronly.h
+51
-276
stress.py
stress.py
+28
-26
No files found.
bench.py
View file @
b87f51f5
import
numpy
as
np
import
pypocketfft
import
pyfftw
from
time
import
time
import
matplotlib.pyplot
as
plt
import
math
...
...
@@ -108,7 +107,7 @@ def bench_nd(ndim, nmax, ntry, tp, funcs, nrepeat, ttl="", filename=""):
plt
.
show
()
funcs
=
(
measure_pypocketfft
,
measure_fftw
)
funcs
=
(
measure_pypocketfft
,
measure_fftw
_np_interface
)
ttl
=
"pypocketfft/FFTW()"
bench_nd
(
1
,
8192
,
100
,
"c16"
,
funcs
,
10
,
ttl
,
"1d.png"
)
bench_nd
(
2
,
2048
,
100
,
"c16"
,
funcs
,
2
,
ttl
,
"2d.png"
)
...
...
pocketfft_hdronly.h
View file @
b87f51f5
...
...
@@ -266,295 +266,69 @@ template<bool fwd, typename T> void ROTX90(cmplx<T> &a)
//
// twiddle factor section
//
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline
float
sinpi0
(
float
a
)
{
// adapted from https://stackoverflow.com/questions/42792939/
float
s
=
a
*
a
;
float
r
=
-
5.957031250000000000e-01
f
;
r
=
fma
(
r
,
s
,
2.550399541854858398e+00
f
);
r
=
fma
(
r
,
s
,
-
5.167724132537841797e+00
f
);
r
=
(
a
*
s
)
*
r
;
return
fma
(
a
,
3.141592741012573242e+00
f
,
r
);
}
/** Approximate sin(pi*a) within the range [-0.25, 0.25] */
inline
double
sinpi0
(
double
a
)
{
// adapted from https://stackoverflow.com/questions/42792939/
double
s
=
a
*
a
;
double
r
=
4.6151442520157035e-4
;
r
=
fma
(
r
,
s
,
-
7.3700183130883555e-3
);
r
=
fma
(
r
,
s
,
8.2145868949323936e-2
);
r
=
fma
(
r
,
s
,
-
5.9926452893214921e-1
);
r
=
fma
(
r
,
s
,
2.5501640398732688e+0
);
r
=
fma
(
r
,
s
,
-
5.1677127800499516e+0
);
s
=
s
*
a
;
r
=
r
*
s
;
return
fma
(
a
,
3.1415926535897931e+0
,
r
);
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline
float
cosm1pi0
(
float
a
)
{
// adapted from https://stackoverflow.com/questions/42792939/
float
s
=
a
*
a
;
float
r
=
2.313842773437500000e-01
f
;
r
=
fmaf
(
r
,
s
,
-
1.335021972656250000e+00
f
);
r
=
fmaf
(
r
,
s
,
4.058703899383544922e+00
f
);
r
=
fmaf
(
r
,
s
,
-
4.934802055358886719e+00
f
);
return
r
*
s
;
}
/** Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
inline
double
cosm1pi0
(
double
a
)
{
// adapted from https://stackoverflow.com/questions/42792939/
double
s
=
a
*
a
;
double
r
=
-
1.0369917389758117e-4
;
r
=
fma
(
r
,
s
,
1.9294935641298806e-3
);
r
=
fma
(
r
,
s
,
-
2.5806887942825395e-2
);
r
=
fma
(
r
,
s
,
2.3533063028328211e-1
);
r
=
fma
(
r
,
s
,
-
1.3352627688538006e+0
);
r
=
fma
(
r
,
s
,
4.0587121264167623e+0
);
r
=
fma
(
r
,
s
,
-
4.9348022005446790e+0
);
return
r
*
s
;
}
template
<
typename
T
>
void
sincosm1pi0
(
T
a_
,
T
*
POCKETFFT_RESTRICT
res
)
{
if
(
sizeof
(
T
)
>
sizeof
(
double
))
// don't have the code for long double
{
constexpr
T
pi
=
T
(
3.141592653589793238462643383279502884197
L
);
auto
s
=
sin
(
pi
*
a_
);
res
[
1
]
=
s
;
res
[
0
]
=
(
s
*
s
)
/
(
-
sqrt
((
1
-
s
)
*
(
1
+
s
))
-
1
);
return
;
}
res
[
0
]
=
T
(
cosm1pi0
(
double
(
a_
)));
res
[
1
]
=
T
(
sinpi0
(
double
(
a_
)));
}
template
<
typename
T
>
T
sinpi
(
T
a
)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto
r
=
nearbyint
(
a
+
a
);
// must use IEEE-754 "to nearest" rounding
auto
i
=
(
int64_t
)
r
;
auto
t
=
fma
(
T
(
-
0.5
),
r
,
a
);
switch
(
i
%
4
)
{
case
0
:
return
sinpi0
(
t
);
case
1
:
case
-
3
:
return
cosm1pi0
(
t
)
+
T
(
1.
);
case
2
:
case
-
2
:
return
T
(
0.
)
-
sinpi0
(
t
);
case
3
:
case
-
1
:
return
T
(
-
1.
)
-
cosm1pi0
(
t
);
}
throw
runtime_error
(
"cannot happen"
);
}
template
<
typename
T
>
T
cospi
(
T
a
)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto
r
=
nearbyint
(
a
+
a
);
// must use IEEE-754 "to nearest" rounding
auto
i
=
(
int64_t
)
r
;
auto
t
=
fma
(
T
(
-
0.5
),
r
,
a
);
switch
(
i
%
4
)
{
case
0
:
return
cosm1pi0
(
t
)
+
T
(
1.
);
case
1
:
case
-
3
:
return
T
(
0.
)
-
sinpi0
(
t
);
case
2
:
case
-
2
:
return
T
(
-
1.
)
-
cosm1pi0
(
t
);
case
3
:
case
-
1
:
return
sinpi0
(
t
);
}
throw
runtime_error
(
"cannot happen"
);
}
inline
long
double
cospi
(
long
double
a
)
{
constexpr
auto
pi
=
3.141592653589793238462643383279502884197
L
;
return
sizeof
(
long
double
)
>
sizeof
(
double
)
?
cos
(
a
*
pi
)
:
static_cast
<
long
double
>
(
cospi
(
static_cast
<
double
>
(
a
)));
}
inline
long
double
sinpi
(
long
double
a
)
{
constexpr
auto
pi
=
3.141592653589793238462643383279502884197
L
;
return
sizeof
(
long
double
)
>
sizeof
(
double
)
?
sin
(
a
*
pi
)
:
static_cast
<
long
double
>
(
sinpi
(
static_cast
<
double
>
(
a
)));
}
template
<
typename
T
>
void
sincospi
(
T
a
,
T
*
POCKETFFT_RESTRICT
res
)
{
// reduce argument to primary approximation interval (-0.25, 0.25)
auto
r
=
nearbyint
(
a
+
a
);
// must use IEEE-754 "to nearest" rounding
auto
i
=
(
int64_t
)
r
;
auto
t
=
fma
(
T
(
-
0.5
),
r
,
a
);
auto
c
=
cosm1pi0
(
t
)
+
T
(
1.
);
auto
s
=
sinpi0
(
t
);
// map results according to quadrant
if
(
i
&
2
)
{
s
=
T
(
0.
)
-
s
;
c
=
T
(
0.
)
-
c
;
}
if
(
i
&
1
)
{
swap
(
s
,
c
);
c
=
T
(
0.
)
-
c
;
}
res
[
0
]
=
c
;
res
[
1
]
=
s
;
}
inline
void
sincospi
(
long
double
a
,
long
double
*
POCKETFFT_RESTRICT
res
)
{
if
(
sizeof
(
long
double
)
>
sizeof
(
double
))
{
constexpr
auto
pi
=
3.141592653589793238462643383279502884197
L
;
res
[
0
]
=
cos
(
pi
*
a
);
res
[
1
]
=
sin
(
pi
*
a
);
}
else
{
double
sincos
[
2
];
sincospi
(
static_cast
<
double
>
(
a
),
sincos
);
res
[
0
]
=
static_cast
<
long
double
>
(
sincos
[
0
]);
res
[
1
]
=
static_cast
<
long
double
>
(
sincos
[
1
]);
}
}
template
<
typename
T
>
class
sincos_2pibyn
{
private:
using
Thigh
=
typename
conditional
<
(
sizeof
(
T
)
>
sizeof
(
double
)),
T
,
double
>::
type
;
size_t
sz
;
arr
<
cmplx
<
T
>>
data
;
POCKETFFT_NOINLINE
void
calc_first_octant
(
size_t
den
,
cmplx
<
T
>
*
POCKETFFT_RESTRICT
res
)
{
size_t
n
=
(
den
+
4
)
>>
3
;
if
(
n
==
0
)
return
;
res
[
0
].
Set
(
1.
,
0.
);
if
(
n
==
1
)
return
;
size_t
l1
=
size_t
(
sqrt
(
n
));
arr
<
cmplx
<
Thigh
>>
tmp
(
l1
);
for
(
size_t
i
=
1
;
i
<
l1
;
++
i
)
{
sincosm1pi0
(
Thigh
(
2
*
i
)
/
Thigh
(
den
),
&
tmp
[
i
].
r
);
res
[
i
].
Set
(
T
(
tmp
[
i
].
r
)
+
1
,
T
(
tmp
[
i
].
i
));
}
size_t
start
=
l1
;
while
(
start
<
n
)
{
cmplx
<
Thigh
>
cs
;
sincosm1pi0
((
Thigh
(
2
*
start
))
/
Thigh
(
den
),
&
cs
.
r
);
res
[
start
].
Set
(
T
(
cs
.
r
+
1
),
T
(
cs
.
i
));
size_t
end
=
l1
;
if
(
start
+
end
>
n
)
end
=
n
-
start
;
for
(
size_t
i
=
1
;
i
<
end
;
++
i
)
{
cmplx
<
Thigh
>
csx
=
tmp
[
i
];
res
[
start
+
i
].
Set
(
T
(((
cs
.
r
*
csx
.
r
-
cs
.
i
*
csx
.
i
+
cs
.
r
)
+
csx
.
r
)
+
1
),
T
((
cs
.
r
*
csx
.
i
+
cs
.
i
*
csx
.
r
)
+
cs
.
i
+
csx
.
i
));
}
start
+=
l1
;
}
}
size_t
mask
,
shift
;
arr
<
cmplx
<
Thigh
>>
v1
,
v2
;
void
calc_first_quadrant
(
size_t
n
,
cmplx
<
T
>
*
POCKETFFT_RESTRICT
res
)
static
cmplx
<
Thigh
>
calc
(
size_t
x
,
size_t
n
,
Thigh
ang
)
{
arr
<
cmplx
<
T
>>
p
(
n
/
2
);
// n is always even here
calc_first_octant
(
n
<<
1
,
p
.
data
());
size_t
ndone
=
(
n
+
2
)
>>
2
;
size_t
i
=
0
,
idx1
=
0
,
idx2
=
ndone
-
1
;
for
(;
i
+
1
<
ndone
;
i
+=
2
,
++
idx1
,
--
idx2
)
x
<<=
3
;
if
(
x
<
4
*
n
)
// first half
{
res
[
idx1
]
=
p
[
i
];
res
[
idx2
].
Set
(
p
[
i
+
1
].
i
,
p
[
i
+
1
].
r
);
if
(
x
<
2
*
n
)
// first quadrant
{
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
cos
(
Thigh
(
x
)
*
ang
),
sin
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
),
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
else
// second quadrant
{
x
-=
2
*
n
;
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
-
sin
(
Thigh
(
x
)
*
ang
),
cos
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
-
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
),
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
}
if
(
i
!=
ndone
)
res
[
idx1
]
=
p
[
i
];
}
void
calc_first_half
(
size_t
n
,
cmplx
<
T
>
*
POCKETFFT_RESTRICT
res
)
{
int
ndone
=
int
(
n
+
1
)
>>
1
;
arr
<
cmplx
<
T
>>
p
((
n
+
1
)
/
2
);
// n is always odd here
calc_first_octant
(
n
<<
2
,
p
.
data
());
int
i4
=
0
,
in
=
int
(
n
),
i
=
0
;
for
(;
i4
<=
in
-
i4
;
++
i
,
i4
+=
4
)
// octant 0
res
[
i
]
=
p
[
i4
];
for
(;
i4
-
in
<=
0
;
++
i
,
i4
+=
4
)
// octant 1
{
auto
xm
=
in
-
i4
;
res
[
i
].
Set
(
p
[
xm
].
i
,
p
[
xm
].
r
);
}
for
(;
i4
<=
3
*
in
-
i4
;
++
i
,
i4
+=
4
)
// octant 2
{
auto
xm
=
i4
-
in
;
res
[
i
].
Set
(
-
p
[
xm
].
i
,
p
[
xm
].
r
);
}
for
(;
i
<
ndone
;
++
i
,
i4
+=
4
)
// octant 3
{
auto
xm
=
2
*
in
-
i4
;
res
[
i
].
Set
(
-
p
[
xm
].
r
,
p
[
xm
].
i
);
}
}
void
fill_first_quadrant
(
size_t
n
,
cmplx
<
T
>
*
POCKETFFT_RESTRICT
res
)
{
constexpr
T
hsqt2
=
T
(
0.707106781186547524400844362104849
L
);
size_t
quart
=
n
>>
2
;
if
((
n
&
7
)
==
0
)
res
[
quart
/
2
].
Set
(
hsqt2
,
hsqt2
);
for
(
size_t
i
=
1
,
j
=
quart
-
1
;
2
*
i
<
quart
;
++
i
,
--
j
)
{
res
[
j
].
Set
(
res
[
i
].
i
,
res
[
i
].
r
);
}
}
POCKETFFT_NOINLINE
void
fill_first_half
(
size_t
n
,
cmplx
<
T
>
*
POCKETFFT_RESTRICT
res
)
{
size_t
half
=
n
>>
1
;
if
((
n
&
3
)
==
0
)
for
(
size_t
i
=
0
;
i
<
half
/
2
;
++
i
)
res
[
i
+
half
/
2
].
Set
(
-
res
[
i
].
i
,
res
[
i
].
r
);
else
for
(
size_t
i
=
1
,
j
=
half
-
1
;
2
*
i
<
half
;
++
i
,
--
j
)
res
[
j
].
Set
(
-
res
[
i
].
r
,
res
[
i
].
i
);
res
[
half
].
Set
(
T
(
-
1
),
T
(
0
));
}
POCKETFFT_NOINLINE
void
sincos_2pibyn_half
(
size_t
n
,
cmplx
<
T
>
*
POCKETFFT_RESTRICT
res
)
{
if
((
n
&
3
)
==
0
)
{
calc_first_octant
(
n
,
res
);
fill_first_quadrant
(
n
,
res
);
fill_first_half
(
n
,
res
);
}
else
if
((
n
&
1
)
==
0
)
{
calc_first_quadrant
(
n
,
res
);
fill_first_half
(
n
,
res
);
x
=
8
*
n
-
x
;
if
(
x
<
2
*
n
)
// third quadrant
{
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
cos
(
Thigh
(
x
)
*
ang
),
-
sin
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
),
-
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
else
// fourth quadrant
{
x
-=
2
*
n
;
if
(
x
<
n
)
return
cmplx
<
Thigh
>
(
-
sin
(
Thigh
(
x
)
*
ang
),
-
cos
(
Thigh
(
x
)
*
ang
));
return
cmplx
<
Thigh
>
(
-
cos
(
Thigh
(
2
*
n
-
x
)
*
ang
),
-
sin
(
Thigh
(
2
*
n
-
x
)
*
ang
));
}
}
else
calc_first_half
(
n
,
res
);
}
public:
POCKETFFT_NOINLINE
sincos_2pibyn
(
size_t
n
)
:
sz
(
n
),
data
(
n
/
2
+
1
)
{
sincos_2pibyn_half
(
n
,
data
.
data
());
}
{
constexpr
auto
pi
=
3.141592653589793238462643383279502884197
L
;
Thigh
ang
=
Thigh
(
0.25
L
*
pi
/
n
);
shift
=
1
;
while
((
size_t
(
1
)
<<
shift
)
*
(
size_t
(
1
)
<<
shift
)
<
n
)
++
shift
;
mask
=
(
size_t
(
1
)
<<
shift
)
-
1
;
v1
.
resize
(
mask
+
1
);
v1
[
0
].
Set
(
Thigh
(
1
),
Thigh
(
0
));
for
(
size_t
i
=
1
;
i
<
v1
.
size
();
++
i
)
v1
[
i
]
=
calc
(
i
,
n
,
ang
);
v2
.
resize
((
n
+
mask
)
/
(
mask
+
1
));
v2
[
0
].
Set
(
Thigh
(
1
),
Thigh
(
0
));
for
(
size_t
i
=
1
;
i
<
v2
.
size
();
++
i
)
v2
[
i
]
=
calc
(
i
*
(
mask
+
1
),
n
,
ang
);
}
cmplx
<
T
>
operator
[](
size_t
idx
)
const
{
if
(
idx
<
data
.
size
())
return
data
[
idx
];
auto
c
=
data
[
sz
-
idx
];
c
.
i
=
-
c
.
i
;
return
c
;
auto
x1
=
v1
[
idx
&
mask
],
x2
=
v2
[
idx
>>
shift
];
return
cmplx
<
T
>
(
T
(
x1
.
r
*
x2
.
r
-
x1
.
i
*
x2
.
i
),
T
(
x1
.
r
*
x2
.
i
+
x1
.
i
*
x2
.
r
));
}
};
...
...
@@ -2710,9 +2484,10 @@ template<typename T0> class T_dcst23
POCKETFFT_NOINLINE
T_dcst23
(
size_t
length
)
:
fftplan
(
length
),
twiddle
(
length
)
{
const
auto
oo2n
=
T0
(
0.5
)
/
T0
(
length
);
constexpr
auto
pi
=
T0
(
3.141592653589793238462643383279502884197
L
);
const
auto
oo2n
=
pi
*
T0
(
0.5
)
/
T0
(
length
);
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
twiddle
[
i
]
=
cos
pi
(
oo2n
*
T0
(
i
+
1
));
twiddle
[
i
]
=
cos
(
oo2n
*
T0
(
i
+
1
));
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
,
...
...
@@ -2789,9 +2564,9 @@ template<typename T0> class T_dcst4
if
((
N
&
1
)
==
0
)
for
(
size_t
i
=
0
;
i
<
N
/
2
;
++
i
)
{
T0
sincos
[
2
]
;
sincospi
(
oon
*
(
T0
(
i
)
+
T0
(
0.125
)),
sincos
);
C2
[
i
].
Set
(
sincos
[
0
],
sincos
[
1
]
);
constexpr
auto
pi
=
T0
(
3.141592653589793238462643383279502884197
L
)
;
T0
ang
=
pi
*
oon
*
(
T0
(
i
)
+
T0
(
0.125
)
);
C2
[
i
].
Set
(
cos
(
ang
),
sin
(
ang
)
);
}
}
...
...
stress.py
View file @
b87f51f5
...
...
@@ -2,8 +2,10 @@ import numpy as np
import
pypocketfft
def
_l2error
(
a
,
b
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
np
.
random
.
seed
(
42
)
def
_l2error
(
a
,
b
,
axes
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
/
np
.
log2
(
np
.
max
([
2
,
np
.
prod
(
np
.
take
(
a
.
shape
,
axes
))]))
def
fftn
(
a
,
axes
=
None
,
inorm
=
0
,
out
=
None
,
nthreads
=
1
):
...
...
@@ -26,7 +28,7 @@ def irfftn(a, axes=None, lastsize=0, inorm=0, nthreads=1):
inorm
=
inorm
,
nthreads
=
nthreads
)
nthreads
=
0
nthreads
=
1
def
update_err
(
err
,
name
,
value
):
...
...
@@ -40,9 +42,9 @@ def update_err(err, name, value):
def
test
(
err
):
ndim
=
np
.
random
.
randint
(
1
,
5
)
ndim
=
3
#
np.random.randint(1, 5)
axlen
=
int
((
2
**
20
)
**
(
1.
/
ndim
))
shape
=
np
.
random
.
randint
(
1
,
axlen
,
ndim
)
shape
=
(
np
.
random
.
randint
(
1
,
axlen
,
ndim
)
//
2
)
*
2
+
1
axes
=
np
.
arange
(
ndim
)
np
.
random
.
shuffle
(
axes
)
nax
=
np
.
random
.
randint
(
1
,
ndim
+
1
)
...
...
@@ -52,85 +54,85 @@ def test(err):
a_32
=
a
.
astype
(
np
.
complex64
)
b
=
ifftn
(
fftn
(
a
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
,
b
))
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
,
b
,
axes
))
b
=
ifftn
(
fftn
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
fftn
(
ifftn
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"cmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
irfftn
(
rfftn
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
lastsize
=
lastsize
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"rmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"rmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
ifftn
(
fftn
(
a
.
astype
(
np
.
complex64
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"cmaxf"
,
_l2error
(
a
.
astype
(
np
.
complex64
),
b
))
err
=
update_err
(
err
,
"cmaxf"
,
_l2error
(
a
.
astype
(
np
.
complex64
),
b
,
axes
))
b
=
irfftn
(
rfftn
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
lastsize
=
lastsize
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"rmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
))
err
=
update_err
(
err
,
"rmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
,
axes
))
b
=
pypocketfft
.
separable_hartley
(
pypocketfft
.
separable_hartley
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
genuine_hartley
(
pypocketfft
.
genuine_hartley
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"hmax"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
separable_hartley
(
pypocketfft
.
separable_hartley
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
))
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
,
axes
))
b
=
pypocketfft
.
genuine_hartley
(
pypocketfft
.
genuine_hartley
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
,
nthreads
=
nthreads
),
axes
=
axes
,
inorm
=
2
,
nthreads
=
nthreads
)
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
))
err
=
update_err
(
err
,
"hmaxf"
,
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
,
axes
))
if
all
(
a
.
shape
[
i
]
>
1
for
i
in
axes
):
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
1
),
axes
=
axes
,
type
=
1
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c1max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"c1max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
1
),
axes
=
axes
,
type
=
1
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c1maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"c1maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c23max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"c23max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c23maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"c23maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c4max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"c4max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dct
(
pypocketfft
.
dct
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"c4maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"c4maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
1
),
axes
=
axes
,
type
=
1
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s1maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"s1maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s23max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"s23max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
2
),
axes
=
axes
,
type
=
3
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s23maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"s23maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s4max"
,
_l2error
(
a
.
real
,
b
))
err
=
update_err
(
err
,
"s4max"
,
_l2error
(
a
.
real
,
b
,
axes
))
b
=
pypocketfft
.
dst
(
pypocketfft
.
dst
(
a_32
.
real
,
axes
=
axes
,
nthreads
=
nthreads
,
type
=
4
),
axes
=
axes
,
type
=
4
,
nthreads
=
nthreads
,
inorm
=
2
)
err
=
update_err
(
err
,
"s4maxf"
,
_l2error
(
a_32
.
real
,
b
))
err
=
update_err
(
err
,
"s4maxf"
,
_l2error
(
a_32
.
real
,
b
,
axes
))
err
=
dict
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment