Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pypocketfft
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
4
Issues
4
List
Boards
Labels
Service Desk
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Reinecke
pypocketfft
Commits
8a3d6ff7
Commit
8a3d6ff7
authored
May 10, 2019
by
Martin Reinecke
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
sync
parent
0ad63b5f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
103 additions
and
79 deletions
+103
-79
pocketfft_hdronly.h
pocketfft_hdronly.h
+103
-79
No files found.
pocketfft_hdronly.h
View file @
8a3d6ff7
...
...
@@ -37,6 +37,8 @@ namespace pocketfft {
using
shape_t
=
std
::
vector
<
size_t
>
;
using
stride_t
=
std
::
vector
<
ptrdiff_t
>
;
constexpr
bool
FORWARD
=
true
,
BACKWARD
=
false
;
namespace
detail
{
...
...
@@ -157,13 +159,29 @@ template<typename T> void ROTM90(cmplx<T> &a)
template
<
typename
T
>
class
sincos_2pibyn
{
private:
template
<
typename
Ta
,
typename
Tb
,
bool
bigger
>
struct
TypeSelector
{};
template
<
typename
Ta
,
typename
Tb
>
struct
TypeSelector
<
Ta
,
Tb
,
true
>
{
using
type
=
Ta
;
};
template
<
typename
Ta
,
typename
Tb
>
struct
TypeSelector
<
Ta
,
Tb
,
false
>
{
using
type
=
Tb
;
};
using
Thigh
=
typename
TypeSelector
<
T
,
double
,
(
sizeof
(
T
)
>
sizeof
(
double
))
>::
type
;
arr
<
T
>
data
;
// adapted from https://stackoverflow.com/questions/42792939/
// CAUTION: this function only works for arguments in the range
// [-0.25; 0.25]!
void
my_sincosm1pi
(
double
a
,
double
*
restrict
res
)
void
my_sincosm1pi
(
Thigh
a
,
Thigh
*
restrict
res
)
{
if
(
sizeof
(
Thigh
)
>
sizeof
(
double
))
// don't have the code for long double
{
Thigh
pi
=
Thigh
(
3.141592653589793238462643383279502884197
L
);
res
[
1
]
=
sin
(
pi
*
a
);
auto
s
=
res
[
1
];
res
[
0
]
=
(
s
*
s
)
/
(
-
sqrt
((
1
-
s
)
*
(
1
+
s
))
-
1
);
return
;
}
double
s
=
a
*
a
;
/* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */
double
r
=
-
1.0369917389758117e-4
;
...
...
@@ -195,25 +213,25 @@ template<typename T> class sincos_2pibyn
res
[
0
]
=
1.
;
res
[
1
]
=
0.
;
if
(
n
==
1
)
return
;
size_t
l1
=
size_t
(
sqrt
(
n
));
arr
<
double
>
tmp
(
2
*
l1
);
arr
<
Thigh
>
tmp
(
2
*
l1
);
for
(
size_t
i
=
1
;
i
<
l1
;
++
i
)
{
my_sincosm1pi
((
2.
*
i
)
/
den
,
&
tmp
[
2
*
i
]);
my_sincosm1pi
((
Thigh
(
2
)
*
i
)
/
den
,
&
tmp
[
2
*
i
]);
res
[
2
*
i
]
=
tmp
[
2
*
i
]
+
1.
;
res
[
2
*
i
+
1
]
=
tmp
[
2
*
i
+
1
];
}
size_t
start
=
l1
;
while
(
start
<
n
)
{
double
cs
[
2
];
my_sincosm1pi
((
2.
*
start
)
/
den
,
cs
);
Thigh
cs
[
2
];
my_sincosm1pi
((
Thigh
(
2
)
*
start
)
/
den
,
cs
);
res
[
2
*
start
]
=
cs
[
0
]
+
1.
;
res
[
2
*
start
+
1
]
=
cs
[
1
];
size_t
end
=
l1
;
if
(
start
+
end
>
n
)
end
=
n
-
start
;
for
(
size_t
i
=
1
;
i
<
end
;
++
i
)
{
double
csx
[
2
]
=
{
tmp
[
2
*
i
],
tmp
[
2
*
i
+
1
]};
Thigh
csx
[
2
]
=
{
tmp
[
2
*
i
],
tmp
[
2
*
i
+
1
]};
res
[
2
*
(
start
+
i
)]
=
((
cs
[
0
]
*
csx
[
0
]
-
cs
[
1
]
*
csx
[
1
]
+
cs
[
0
])
+
csx
[
0
])
+
1.
;
res
[
2
*
(
start
+
i
)
+
1
]
=
(
cs
[
0
]
*
csx
[
1
]
+
cs
[
1
]
*
csx
[
0
])
+
cs
[
1
]
+
csx
[
1
];
}
...
...
@@ -254,7 +272,7 @@ template<typename T> class sincos_2pibyn
void
fill_first_quadrant
(
size_t
n
,
T
*
restrict
res
)
{
const
double
hsqt2
=
0.707106781186547524400844362104849
;
const
expr
Thigh
hsqt2
=
Thigh
(
0.707106781186547524400844362104849
L
)
;
size_t
quart
=
n
>>
2
;
if
((
n
&
7
)
==
0
)
res
[
quart
]
=
res
[
quart
+
1
]
=
hsqt2
;
...
...
@@ -337,7 +355,7 @@ struct util // hack to avoid duplicate symbols
static
NOINLINE
double
cost_guess
(
size_t
n
)
{
const
double
lfp
=
1.1
;
// penalty for non-hardcoded larger factors
const
expr
double
lfp
=
1.1
;
// penalty for non-hardcoded larger factors
size_t
ni
=
n
;
double
result
=
0.
;
while
((
n
&
1
)
==
0
)
...
...
@@ -388,9 +406,8 @@ struct util // hack to avoid duplicate symbols
throw
runtime_error
(
"stride dimension mismatch"
);
for
(
auto
shp
:
shape
)
if
(
shp
<
1
)
throw
runtime_error
(
"zero extent detected"
);
if
(
inplace
)
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
if
(
stride_in
[
i
]
!=
stride_out
[
i
])
throw
runtime_error
(
"stride mismatch"
);
if
(
inplace
&&
(
stride_in
!=
stride_out
))
throw
runtime_error
(
"stride mismatch"
);
}
static
NOINLINE
void
sanity_check
(
const
shape_t
&
shape
,
...
...
@@ -427,7 +444,6 @@ struct util // hack to avoid duplicate symbols
template
<
typename
T0
>
class
cfftp
{
private:
struct
fctdata
{
size_t
fct
;
...
...
@@ -489,7 +505,8 @@ template<bool bwd, typename T> void pass3 (size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
cmplx
<
T0
>
*
restrict
wa
)
{
constexpr
size_t
cdim
=
3
;
constexpr
T0
tw1r
=-
0.5
,
tw1i
=
(
bwd
?
1.
:
-
1.
)
*
0.86602540378443864676
;
constexpr
T0
tw1r
=-
0.5
,
tw1i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.8660254037844386467637231707529362
L
);
if
(
ido
==
1
)
for
(
size_t
k
=
0
;
k
<
l1
;
++
k
)
...
...
@@ -591,10 +608,10 @@ template<bool bwd, typename T> void pass5 (size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
cmplx
<
T0
>
*
restrict
wa
)
{
constexpr
size_t
cdim
=
5
;
constexpr
T0
tw1r
=
0.3090169943749474241
,
tw1i
=
(
bwd
?
1
.
:
-
1.
)
*
0.95105651629515357212
,
tw2r
=
-
0.8090169943749474241
,
tw2i
=
(
bwd
?
1
.
:
-
1.
)
*
0.58778525229247312917
;
constexpr
T0
tw1r
=
T0
(
0.3090169943749474241022934171828191
L
)
,
tw1i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.9510565162951535721164393333793821
L
)
,
tw2r
=
T0
(
-
0.8090169943749474241022934171828191
L
)
,
tw2i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.5877852522924731291687059546390728
L
)
;
if
(
ido
==
1
)
for
(
size_t
k
=
0
;
k
<
l1
;
++
k
)
...
...
@@ -655,12 +672,12 @@ template<bool bwd, typename T> void pass7(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
cmplx
<
T0
>
*
restrict
wa
)
{
constexpr
size_t
cdim
=
7
;
constexpr
T0
tw1r
=
0.623489801858733530525
,
tw1i
=
(
bwd
?
1
.
:
-
1.
)
*
0.7818314824680298087084
,
tw2r
=
-
0.222520933956314404289
,
tw2i
=
(
bwd
?
1
.
:
-
1.
)
*
0.9749279121818236070181
,
tw3r
=
-
0.9009688679024191262361
,
tw3i
=
(
bwd
?
1
.
:
-
1.
)
*
0.4338837391175581204758
;
constexpr
T0
tw1r
=
T0
(
0.6234898018587335305250048840042398
L
)
,
tw1i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.7818314824680298087084445266740578
L
)
,
tw2r
=
T0
(
-
0.2225209339563144042889025644967948
L
)
,
tw2i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.9749279121818236070181316829939312
L
)
,
tw3r
=
T0
(
-
0.9009688679024191262361023195074451
L
)
,
tw3i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.433883739117558120475768332848359
L
)
;
if
(
ido
==
1
)
for
(
size_t
k
=
0
;
k
<
l1
;
++
k
)
...
...
@@ -725,17 +742,17 @@ template<bool bwd, typename T> void pass7(size_t ido, size_t l1,
template
<
bool
bwd
,
typename
T
>
void
pass11
(
size_t
ido
,
size_t
l1
,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
cmplx
<
T0
>
*
restrict
wa
)
{
const
size_t
cdim
=
11
;
const
T0
tw1r
=
0.8412535328311811688618
,
tw1i
=
(
bwd
?
1.
:
-
1.
)
*
0.5406408174555975821076
,
tw2r
=
0.4154150130018864255293
,
tw2i
=
(
bwd
?
1.
:
-
1.
)
*
0.9096319953545183714117
,
tw3r
=
-
0.1423148382732851404438
,
tw3i
=
(
bwd
?
1.
:
-
1.
)
*
0.9898214418809327323761
,
tw4r
=
-
0.6548607339452850640569
,
tw4i
=
(
bwd
?
1.
:
-
1.
)
*
0.755749574354258283774
,
tw5r
=
-
0.9594929736144973898904
,
tw5i
=
(
bwd
?
1.
:
-
1.
)
*
0.2817325568414296977114
;
const
expr
size_t
cdim
=
11
;
const
expr
T0
tw1r
=
T0
(
0.8412535328311811688618116489193677
L
)
,
tw1i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.5406408174555975821076359543186917
L
)
,
tw2r
=
T0
(
0.4154150130018864255292741492296232
L
)
,
tw2i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.9096319953545183714117153830790285
L
)
,
tw3r
=
T0
(
-
0.1423148382732851404437926686163697
L
)
,
tw3i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.9898214418809327323760920377767188
L
)
,
tw4r
=
T0
(
-
0.6548607339452850640569250724662936
L
)
,
tw4i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.7557495743542582837740358439723444
L
)
,
tw5r
=
T0
(
-
0.9594929736144973898903680570663277
L
)
,
tw5i
=
(
bwd
?
1
:
-
1
)
*
T0
(
0.2817325568414296977114179153466169
L
)
;
if
(
ido
==
1
)
for
(
size_t
k
=
0
;
k
<
l1
;
++
k
)
...
...
@@ -1072,7 +1089,7 @@ template<typename T> void radf3(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
T0
*
restrict
wa
)
{
constexpr
size_t
cdim
=
3
;
constexpr
T0
taur
=-
0.5
,
taui
=
0.86602540378443864676
;
constexpr
T0
taur
=-
0.5
,
taui
=
T0
(
0.8660254037844386467637231707529362
L
)
;
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
{
...
...
@@ -1106,7 +1123,7 @@ template<typename T> void radf4(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
T0
*
restrict
wa
)
{
constexpr
size_t
cdim
=
4
;
constexpr
T0
hsqt2
=
0.70710678118654752440
;
constexpr
T0
hsqt2
=
T0
(
0.707106781186547524400844362104849
L
)
;
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
{
...
...
@@ -1147,8 +1164,10 @@ template<typename T> void radf5(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
T0
*
restrict
wa
)
{
constexpr
size_t
cdim
=
5
;
constexpr
T0
tr11
=
0.3090169943749474241
,
ti11
=
0.95105651629515357212
,
tr12
=-
0.8090169943749474241
,
ti12
=
0.58778525229247312917
;
constexpr
T0
tr11
=
T0
(
0.3090169943749474241022934171828191
L
),
ti11
=
T0
(
0.9510565162951535721164393333793821
L
),
tr12
=
T0
(
-
0.8090169943749474241022934171828191
L
),
ti12
=
T0
(
0.5877852522924731291687059546390728
L
);
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
{
...
...
@@ -1372,7 +1391,7 @@ template<typename T> void radb3(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
T0
*
restrict
wa
)
{
constexpr
size_t
cdim
=
3
;
constexpr
T0
taur
=-
0.5
,
taui
=
0.86602540378443864676
;
constexpr
T0
taur
=-
0.5
,
taui
=
T0
(
0.8660254037844386467637231707529362
L
)
;
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
{
...
...
@@ -1407,7 +1426,7 @@ template<typename T> void radb4(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
T0
*
restrict
wa
)
{
constexpr
size_t
cdim
=
4
;
constexpr
T0
sqrt2
=
1.41421356237309504880
;
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
)
;
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
{
...
...
@@ -1453,8 +1472,10 @@ template<typename T> void radb5(size_t ido, size_t l1,
const
T
*
restrict
cc
,
T
*
restrict
ch
,
const
T0
*
restrict
wa
)
{
constexpr
size_t
cdim
=
5
;
constexpr
T0
tr11
=
0.3090169943749474241
,
ti11
=
0.95105651629515357212
,
tr12
=-
0.8090169943749474241
,
ti12
=
0.58778525229247312917
;
constexpr
T0
tr11
=
T0
(
0.3090169943749474241022934171828191
L
),
ti11
=
T0
(
0.9510565162951535721164393333793821
L
),
tr12
=
T0
(
-
0.8090169943749474241022934171828191
L
),
ti12
=
T0
(
0.5877852522924731291687059546390728
L
);
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
{
...
...
@@ -1862,7 +1883,7 @@ template<typename T0> class fftblue
}
/* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */
T0
xn2
=
1.
/
n2
;
T0
xn2
=
T0
(
1
)
/
n2
;
bkf
[
0
]
=
bk
[
0
]
*
xn2
;
for
(
size_t
m
=
1
;
m
<
n
;
++
m
)
bkf
[
m
]
=
bkf
[
n2
-
m
]
=
bk
[
m
]
*
xn2
;
...
...
@@ -2033,10 +2054,9 @@ template<size_t N, typename Ti, typename To> class multi_iter
for
(
int
i
=
pos
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
if
(
i
==
int
(
idim
))
continue
;
++
pos
[
i
];
p_ii
+=
iarr
.
stride
(
i
);
p_oi
+=
oarr
.
stride
(
i
);
if
(
pos
[
i
]
<
iarr
.
shape
(
i
))
if
(
++
pos
[
i
]
<
iarr
.
shape
(
i
))
return
;
pos
[
i
]
=
0
;
p_ii
-=
iarr
.
shape
(
i
)
*
iarr
.
stride
(
i
);
...
...
@@ -2076,7 +2096,12 @@ template<size_t N, typename Ti, typename To> class multi_iter
};
#if defined(HAVE_VECSUPPORT)
template
<
typename
T
>
struct
VTYPE
{};
template
<
typename
T
>
struct
VTYPE
{};
template
<
>
struct
VTYPE
<
long
double
>
{
using
type
=
long
double
__attribute__
((
vector_size
(
sizeof
(
long
double
))));
static
constexpr
int
vlen
=
1
;
};
template
<
>
struct
VTYPE
<
double
>
{
using
type
=
double
__attribute__
((
vector_size
(
VBYTELEN
)));
...
...
@@ -2088,10 +2113,7 @@ template<> struct VTYPE<float>
static
constexpr
int
vlen
=
VBYTELEN
/
sizeof
(
float
);
};
#else
template
<
typename
T
>
struct
VTYPE
{};
template
<
>
struct
VTYPE
<
double
>
{
static
constexpr
int
vlen
=
1
;
};
template
<
>
struct
VTYPE
<
float
>
template
<
typename
T
>
struct
VTYPE
{
static
constexpr
int
vlen
=
1
;
};
#endif
...
...
@@ -2393,68 +2415,70 @@ template<typename T> void c2c(const shape_t &shape, const stride_t &stride_in,
general_c
(
ain
,
aout
,
axes
,
forward
,
fct
);
}
template
<
typename
T
>
void
r2c
(
const
shape_t
&
shape
,
const
stride_t
&
stride
_in
,
const
stride_t
&
stride_
out
,
size_t
axis
,
const
T
*
data_in
,
std
::
complex
<
T
>
*
data_out
,
T
fct
)
template
<
typename
T
>
void
r2c
(
const
shape_t
&
shape_in
,
const
stride_t
&
stride_
in
,
const
stride_t
&
stride_out
,
size_t
axis
,
const
T
*
data_in
,
std
::
complex
<
T
>
*
data_out
,
T
fct
)
{
using
namespace
detail
;
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
false
,
axis
);
ndarr
<
T
>
ain
(
data_in
,
shape
,
stride_in
);
ndarr
<
cmplx
<
T
>>
aout
(
data_out
,
shape
,
stride_out
);
util
::
sanity_check
(
shape
_in
,
stride_in
,
stride_out
,
false
,
axis
);
ndarr
<
T
>
ain
(
data_in
,
shape
_in
,
stride_in
);
ndarr
<
cmplx
<
T
>>
aout
(
data_out
,
shape
_in
,
stride_out
);
// FIXME
general_r2c
(
ain
,
aout
,
axis
,
fct
);
}
template
<
typename
T
>
void
r2c
(
const
shape_t
&
shape
,
const
stride_t
&
stride
_in
,
const
stride_t
&
stride_
out
,
const
shape_t
&
axes
,
const
T
*
data_in
,
std
::
complex
<
T
>
*
data_out
,
T
fct
)
template
<
typename
T
>
void
r2c
(
const
shape_t
&
shape_in
,
const
stride_t
&
stride_
in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
const
T
*
data_in
,
std
::
complex
<
T
>
*
data_out
,
T
fct
)
{
using
namespace
detail
;
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
false
,
axes
);
r2c
(
shape
,
stride_in
,
stride_out
,
axes
.
back
(),
data_in
,
data_out
,
fct
);
util
::
sanity_check
(
shape
_in
,
stride_in
,
stride_out
,
false
,
axes
);
r2c
(
shape
_in
,
stride_in
,
stride_out
,
axes
.
back
(),
data_in
,
data_out
,
fct
);
if
(
axes
.
size
()
==
1
)
return
;
shape_t
shape_out
(
shape
);
shape_out
[
axes
.
back
()]
=
shape
[
axes
.
back
()]
/
2
+
1
;
shape_t
shape_out
(
shape
_in
);
shape_out
[
axes
.
back
()]
=
shape
_in
[
axes
.
back
()]
/
2
+
1
;
auto
newaxes
=
shape_t
{
axes
.
begin
(),
--
axes
.
end
()};
c2c
(
shape_out
,
stride_out
,
stride_out
,
newaxes
,
true
,
data_out
,
data_out
,
T
(
1
));
}
template
<
typename
T
>
void
c2r
(
const
shape_t
&
shape
,
size_t
new_size
,
template
<
typename
T
>
void
c2r
(
const
shape_t
&
shape
_out
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
size_t
axis
,
const
std
::
complex
<
T
>
*
data_in
,
T
*
data_out
,
T
fct
)
{
using
namespace
detail
;
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
false
,
axis
);
shape_t
shape_
out
(
shape
);
shape_
out
[
axis
]
=
new_size
;
ndarr
<
cmplx
<
T
>>
ain
(
data_in
,
shape
,
stride_in
);
util
::
sanity_check
(
shape
_out
,
stride_in
,
stride_out
,
false
,
axis
);
shape_t
shape_
in
(
shape_out
);
shape_
in
[
axis
]
=
shape_out
[
axis
]
/
2
+
1
;
ndarr
<
cmplx
<
T
>>
ain
(
data_in
,
shape
_in
,
stride_in
);
ndarr
<
T
>
aout
(
data_out
,
shape_out
,
stride_out
);
general_c2r
(
ain
,
aout
,
axis
,
fct
);
}
template
<
typename
T
>
void
c2r
(
const
shape_t
&
shape
,
size_t
new_size
,
template
<
typename
T
>
void
c2r
(
const
shape_t
&
shape
_out
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
const
std
::
complex
<
T
>
*
data_in
,
T
*
data_out
,
T
fct
)
{
using
namespace
detail
;
if
(
axes
.
size
()
==
1
)
{
c2r
(
shape
,
new_size
,
stride_in
,
stride_out
,
axes
[
0
],
data_in
,
data_out
,
fct
);
c2r
(
shape_out
,
stride_in
,
stride_out
,
axes
[
0
],
data_in
,
data_out
,
fct
);
return
;
}
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
false
,
axes
);
auto
nval
=
util
::
prod
(
shape
);
stride_t
stride_inter
(
shape
.
size
());
util
::
sanity_check
(
shape_out
,
stride_in
,
stride_out
,
false
,
axes
);
auto
shape_in
=
shape_out
;
shape_in
[
axes
.
back
()]
=
shape_out
[
axes
.
back
()]
/
2
+
1
;
auto
nval
=
util
::
prod
(
shape_in
);
stride_t
stride_inter
(
shape_in
.
size
());
stride_inter
.
back
()
=
sizeof
(
cmplx
<
T
>
);
for
(
int
i
=
shape
.
size
()
-
2
;
i
>=
0
;
--
i
)
stride_inter
[
i
]
=
stride_inter
[
i
+
1
]
*
shape
[
i
+
1
];
for
(
int
i
=
shape
_in
.
size
()
-
2
;
i
>=
0
;
--
i
)
stride_inter
[
i
]
=
stride_inter
[
i
+
1
]
*
shape
_in
[
i
+
1
];
arr
<
complex
<
T
>>
tmp
(
nval
);
auto
newaxes
=
shape_t
({
axes
.
begin
(),
--
axes
.
end
()});
c2c
(
shape
,
stride_in
,
stride_inter
,
newaxes
,
false
,
data_in
,
tmp
.
data
(),
T
(
1
));
c2r
(
shape
,
new_size
,
stride_inter
,
stride_out
,
axes
.
back
(),
tmp
.
data
(),
data_out
,
fct
);
c2c
(
shape_in
,
stride_in
,
stride_inter
,
newaxes
,
false
,
data_in
,
tmp
.
data
(),
T
(
1
));
c2r
(
shape_out
,
stride_inter
,
stride_out
,
axes
.
back
(),
tmp
.
data
(),
data_out
,
fct
);
}
template
<
typename
T
>
void
r2r_fftpack
(
const
shape_t
&
shape
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment