Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pypocketfft
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
4
Issues
4
List
Boards
Labels
Service Desk
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Reinecke
pypocketfft
Commits
656b2373
Commit
656b2373
authored
Jul 20, 2019
by
Martin Reinecke
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'sincos' into 'master'
DCT/DST support See merge request
!13
parents
0276f55e
20f3b84b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
688 additions
and
56 deletions
+688
-56
README.md
README.md
+7
-7
pocketfft_hdronly.h
pocketfft_hdronly.h
+503
-25
pypocketfft.cc
pypocketfft.cc
+146
-4
test.py
test.py
+32
-20
No files found.
README.md
View file @
656b2373
pypocketfft
===========
This package provides Fast Fourier
and Hartley transforms with a simple
Python interface.
This package provides Fast Fourier
, trigonometric and Hartley transforms with a
simple
Python interface.
The central algorithms are derived from Paul Swarztrauber's FFTPACK code
(http://www.netlib.org/fftpack).
...
...
@@ -10,11 +10,11 @@ The central algorithms are derived from Paul Swarztrauber's FFTPACK code
Features
--------
-
supports fully complex and half-complex (i.e. complex-to-real and
real-to-complex) FFTs
-
supports multidimensional arrays and selection of the axes to be transformed.
-
supports single and double precision
real-to-complex) FFTs, discrete sine/cosine transforms and Hartley transforms
-
achieves very high accuracy for all transforms
-
supports multidimensional arrays and selection of the axes to be transformed
-
supports single, double, and long double precision
-
makes use of CPU vector instructions when performing 2D and higher-dimensional
transforms
-
does not have persistent transform plans, which makes the interface simpler
-
supports prime-length transforms without degrading to O(N
**
2) performance
-
H
as optional OpenMP support for multidimensional transforms
-
h
as optional OpenMP support for multidimensional transforms
pocketfft_hdronly.h
View file @
656b2373
...
...
@@ -2,7 +2,13 @@
This file is part of pocketfft.
Copyright (C) 2010-2019 Max-Planck-Society
Author: Martin Reinecke
Copyright (C) 2019 Peter Bell
For the odd-sized DCT-IV transforms:
Copyright (C) 2003, 2007-14 Matteo Frigo
Copyright (C) 2003, 2007-14 Massachusetts Institute of Technology
Authors: Martin Reinecke, Peter Bell
All rights reserved.
...
...
@@ -196,6 +202,13 @@ template<typename T> struct cmplx {
{
r
+=
other
.
r
;
i
+=
other
.
i
;
return
*
this
;
}
template
<
typename
T2
>
cmplx
&
operator
*=
(
T2
other
)
{
r
*=
other
;
i
*=
other
;
return
*
this
;
}
template
<
typename
T2
>
cmplx
&
operator
*=
(
const
cmplx
<
T2
>
&
other
)
{
T
tmp
=
r
*
other
.
r
-
i
*
other
.
i
;
i
=
r
*
other
.
i
+
i
*
other
.
r
;
r
=
tmp
;
return
*
this
;
}
cmplx
operator
+
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
+
other
.
r
,
i
+
other
.
i
);
}
cmplx
operator
-
(
const
cmplx
&
other
)
const
...
...
@@ -474,8 +487,8 @@ struct util // hack to avoid duplicate symbols
shape_t
tmp
(
ndim
,
0
);
for
(
auto
ax
:
axes
)
{
if
(
ax
>=
ndim
)
throw
runtime_error
(
"bad axis number"
);
if
(
++
tmp
[
ax
]
>
1
)
throw
runtime_error
(
"axis specified repeatedly"
);
if
(
ax
>=
ndim
)
throw
invalid_argument
(
"bad axis number"
);
if
(
++
tmp
[
ax
]
>
1
)
throw
invalid_argument
(
"axis specified repeatedly"
);
}
}
...
...
@@ -484,7 +497,7 @@ struct util // hack to avoid duplicate symbols
size_t
axis
)
{
sanity_check
(
shape
,
stride_in
,
stride_out
,
inplace
);
if
(
axis
>=
shape
.
size
())
throw
runtime_error
(
"bad axis number"
);
if
(
axis
>=
shape
.
size
())
throw
invalid_argument
(
"bad axis number"
);
}
#ifdef POCKETFFT_OPENMP
...
...
@@ -531,7 +544,7 @@ template<bool fwd, typename T> void pass2 (size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -585,7 +598,7 @@ template<bool fwd, typename T> void pass3 (size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -623,7 +636,7 @@ template<bool fwd, typename T> void pass4 (size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -705,7 +718,7 @@ template<bool fwd, typename T> void pass5 (size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -779,7 +792,7 @@ template<bool fwd, typename T> void pass7(size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -843,7 +856,7 @@ template<bool fwd, typename T> void pass8 (size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -971,7 +984,7 @@ template<bool fwd, typename T> void pass11 (size_t ido, size_t l1,
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
-
1
+
x
*
(
ido
-
1
)];
};
...
...
@@ -1245,7 +1258,7 @@ template<bool fwd, typename T> void pass_all(T c[], T0 fct)
POCKETFFT_NOINLINE
cfftp
(
size_t
length_
)
:
length
(
length_
)
{
if
(
length
==
0
)
throw
runtime_error
(
"zero
length FFT requested"
);
if
(
length
==
0
)
throw
runtime_error
(
"zero
-
length FFT requested"
);
if
(
length
==
1
)
return
;
factorize
();
mem
.
resize
(
twsize
());
...
...
@@ -1290,7 +1303,7 @@ template<typename T> void radf2 (size_t ido, size_t l1,
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CH
=
[
ch
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
auto
CH
=
[
ch
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
...
...
@@ -1330,7 +1343,7 @@ template<typename T> void radf3(size_t ido, size_t l1,
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CH
=
[
ch
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
auto
CH
=
[
ch
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
...
...
@@ -1370,7 +1383,7 @@ template<typename T> void radf4(size_t ido, size_t l1,
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CH
=
[
ch
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
auto
CH
=
[
ch
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
...
...
@@ -1421,7 +1434,7 @@ template<typename T> void radf5(size_t ido, size_t l1,
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
auto
CH
=
[
ch
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
auto
CH
=
[
ch
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
for
(
size_t
k
=
0
;
k
<
l1
;
k
++
)
...
...
@@ -1620,7 +1633,7 @@ template<typename T> void radb2(size_t ido, size_t l1,
constexpr
size_t
cdim
=
2
;
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
...
...
@@ -1653,7 +1666,7 @@ template<typename T> void radb3(size_t ido, size_t l1,
constexpr
T0
taur
=-
0.5
,
taui
=
T0
(
0.8660254037844386467637231707529362
L
);
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
...
...
@@ -1694,7 +1707,7 @@ template<typename T> void radb4(size_t ido, size_t l1,
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
...
...
@@ -1750,7 +1763,7 @@ template<typename T> void radb5(size_t ido, size_t l1,
ti12
=
T0
(
0.5877852522924731291687059546390728
L
);
auto
WA
=
[
wa
,
ido
](
size_t
x
,
size_t
i
)
{
return
wa
[
i
+
x
*
(
ido
-
1
)];
};
auto
CC
=
[
cc
,
ido
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
auto
CC
=
[
cc
,
ido
,
cdim
](
size_t
a
,
size_t
b
,
size_t
c
)
->
const
T
&
{
return
cc
[
a
+
ido
*
(
b
+
cdim
*
c
)];
};
auto
CH
=
[
ch
,
ido
,
l1
](
size_t
a
,
size_t
b
,
size_t
c
)
->
T
&
{
return
ch
[
a
+
ido
*
(
b
+
l1
*
c
)];
};
...
...
@@ -2085,7 +2098,7 @@ template<typename T> void radbg(size_t ido, size_t ip, size_t l1,
POCKETFFT_NOINLINE
rfftp
(
size_t
length_
)
:
length
(
length_
)
{
if
(
length
==
0
)
throw
runtime_error
(
"zero-
sized FFT
"
);
if
(
length
==
0
)
throw
runtime_error
(
"zero-
length FFT requested
"
);
if
(
length
==
1
)
return
;
factorize
();
mem
.
resize
(
twsize
());
...
...
@@ -2221,10 +2234,10 @@ template<typename T0> class pocketfft_c
packplan
=
unique_ptr
<
cfftp
<
T0
>>
(
new
cfftp
<
T0
>
(
length
));
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
backward
(
cmplx
<
T
>
c
[],
T0
fct
)
template
<
typename
T
>
POCKETFFT_NOINLINE
void
backward
(
cmplx
<
T
>
c
[],
T0
fct
)
const
{
packplan
?
packplan
->
backward
(
c
,
fct
)
:
blueplan
->
backward
(
c
,
fct
);
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
forward
(
cmplx
<
T
>
c
[],
T0
fct
)
template
<
typename
T
>
POCKETFFT_NOINLINE
void
forward
(
cmplx
<
T
>
c
[],
T0
fct
)
const
{
packplan
?
packplan
->
forward
(
c
,
fct
)
:
blueplan
->
forward
(
c
,
fct
);
}
size_t
length
()
const
{
return
len
;
}
...
...
@@ -2261,13 +2274,13 @@ template<typename T0> class pocketfft_r
packplan
=
unique_ptr
<
rfftp
<
T0
>>
(
new
rfftp
<
T0
>
(
length
));
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
POCKETFFT_NOINLINE
void
backward
(
T
c
[],
T0
fct
)
const
{
packplan
?
packplan
->
backward
(
c
,
fct
)
:
blueplan
->
backward_r
(
c
,
fct
);
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
POCKETFFT_NOINLINE
void
forward
(
T
c
[],
T0
fct
)
const
{
packplan
?
packplan
->
forward
(
c
,
fct
)
:
blueplan
->
forward_r
(
c
,
fct
);
...
...
@@ -2276,6 +2289,365 @@ template<typename T0> class pocketfft_r
size_t
length
()
const
{
return
len
;
}
};
//
// sine/cosine transforms
//
template
<
typename
T0
>
class
T_dct1
{
private:
pocketfft_r
<
T0
>
fftplan
;
public:
POCKETFFT_NOINLINE
T_dct1
(
size_t
length
)
:
fftplan
(
2
*
(
length
-
1
))
{}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
)
const
{
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
size_t
N
=
fftplan
.
length
(),
n
=
N
/
2
+
1
;
if
(
ortho
)
{
c
[
0
]
*=
sqrt2
;
c
[
n
-
1
]
*=
sqrt2
;
}
arr
<
T
>
tmp
(
N
);
tmp
[
0
]
=
c
[
0
];
for
(
size_t
i
=
1
;
i
<
n
;
++
i
)
tmp
[
i
]
=
tmp
[
N
-
i
]
=
c
[
i
];
fftplan
.
forward
(
tmp
.
data
(),
fct
);
c
[
0
]
=
tmp
[
0
];
for
(
size_t
i
=
1
;
i
<
n
;
++
i
)
c
[
i
]
=
tmp
[
2
*
i
-
1
];
if
(
ortho
)
{
c
[
0
]
/=
sqrt2
;
c
[
n
-
1
]
/=
sqrt2
;
}
}
size_t
length
()
const
{
return
fftplan
.
length
()
/
2
+
1
;
}
};
template
<
typename
T0
>
class
T_dct2
{
private:
pocketfft_r
<
T0
>
fftplan
;
vector
<
T0
>
twiddle
;
public:
POCKETFFT_NOINLINE
T_dct2
(
size_t
length
)
:
fftplan
(
length
),
twiddle
(
length
)
{
constexpr
T0
pi
=
T0
(
3.141592653589793238462643383279502884197
L
);
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
twiddle
[
i
]
=
T0
(
cos
(
0.5
*
pi
*
T0
(
i
+
1
)
/
T0
(
length
)));
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
)
const
{
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
size_t
N
=
length
();
if
(
N
==
1
)
c
[
0
]
*=
2
*
fct
;
else
if
(
N
==
2
)
{
T
x1
=
2
*
fct
*
(
c
[
0
]
+
c
[
1
]);
c
[
1
]
=
sqrt2
*
fct
*
(
c
[
0
]
-
c
[
1
]);
c
[
0
]
=
x1
;
}
else
{
size_t
NS2
=
(
N
+
1
)
/
2
;
for
(
size_t
i
=
2
;
i
<
N
;
i
+=
2
)
{
T
xim1
=
T0
(
0.5
)
*
(
c
[
i
-
1
]
+
c
[
i
]);
c
[
i
]
=
T0
(
0.5
)
*
(
c
[
i
]
-
c
[
i
-
1
]);
c
[
i
-
1
]
=
xim1
;
}
fftplan
.
backward
(
c
,
fct
);
for
(
size_t
k
=
1
,
kc
=
N
-
1
;
k
<
NS2
;
++
k
,
--
kc
)
{
T
tmp
=
twiddle
[
k
-
1
]
*
c
[
kc
]
+
twiddle
[
kc
-
1
]
*
c
[
k
];
c
[
kc
]
=
twiddle
[
k
-
1
]
*
c
[
k
]
-
twiddle
[
kc
-
1
]
*
c
[
kc
];
c
[
k
]
=
tmp
;
}
if
((
N
&
1
)
==
0
)
c
[
NS2
]
=
twiddle
[
NS2
-
1
]
*
(
c
[
NS2
]
+
c
[
NS2
]);
for
(
size_t
k
=
1
,
kc
=
N
-
1
;
k
<
NS2
;
++
k
,
--
kc
)
{
T
tmp
=
c
[
k
]
+
c
[
kc
];
c
[
kc
]
=
c
[
k
]
-
c
[
kc
];
c
[
k
]
=
tmp
;
}
c
[
0
]
*=
2
;
}
if
(
ortho
)
c
[
0
]
/=
sqrt2
;
}
size_t
length
()
const
{
return
fftplan
.
length
();
}
};
template
<
typename
T0
>
class
T_dct3
{
private:
pocketfft_r
<
T0
>
fftplan
;
vector
<
T0
>
twiddle
;
public:
POCKETFFT_NOINLINE
T_dct3
(
size_t
length
)
:
fftplan
(
length
),
twiddle
(
length
)
{
constexpr
T0
pi
=
T0
(
3.141592653589793238462643383279502884197
L
);
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
twiddle
[
i
]
=
T0
(
cos
(
0.5
*
pi
*
T0
(
i
+
1
)
/
T0
(
length
)));
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
)
const
{
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
size_t
N
=
length
();
if
(
ortho
)
c
[
0
]
*=
sqrt2
;
if
(
N
==
1
)
c
[
0
]
*=
fct
;
else
if
(
N
==
2
)
{
T
TSQX
=
sqrt2
*
c
[
1
];
c
[
1
]
=
fct
*
(
c
[
0
]
-
TSQX
);
c
[
0
]
=
fct
*
(
c
[
0
]
+
TSQX
);
}
else
{
size_t
NS2
=
(
N
+
1
)
/
2
;
for
(
size_t
k
=
1
,
kc
=
N
-
1
;
k
<
NS2
;
++
k
,
--
kc
)
{
T
tmp
=
c
[
k
]
-
c
[
kc
];
c
[
k
]
=
c
[
k
]
+
c
[
kc
];
c
[
kc
]
=
tmp
;
}
if
((
N
&
1
)
==
0
)
c
[
NS2
]
=
c
[
NS2
]
+
c
[
NS2
];
for
(
size_t
k
=
1
,
kc
=
N
-
1
;
k
<
NS2
;
++
k
,
--
kc
)
{
T
tmp
=
twiddle
[
k
-
1
]
*
c
[
k
]
-
twiddle
[
kc
-
1
]
*
c
[
kc
];
c
[
k
]
=
twiddle
[
k
-
1
]
*
c
[
kc
]
+
twiddle
[
kc
-
1
]
*
c
[
k
];
c
[
kc
]
=
tmp
;
}
if
((
N
&
1
)
==
0
)
c
[
NS2
]
=
twiddle
[
NS2
-
1
]
*
c
[
NS2
];
fftplan
.
forward
(
c
,
fct
);
for
(
size_t
i
=
2
;
i
<
N
;
i
+=
2
)
{
T
xim1
=
c
[
i
-
1
]
-
c
[
i
];
c
[
i
]
+=
c
[
i
-
1
];
c
[
i
-
1
]
=
xim1
;
}
}
}
size_t
length
()
const
{
return
fftplan
.
length
();
}
};
template
<
typename
T0
>
class
T_dct4
{
// even length algorithm from
// https://www.appletonaudio.com/blog/2013/derivation-of-fast-dct-4-algorithm-based-on-dft/
private:
size_t
N
;
unique_ptr
<
pocketfft_c
<
T0
>>
fft
;
unique_ptr
<
pocketfft_r
<
T0
>>
rfft
;
arr
<
cmplx
<
T0
>>
C2
;
public:
POCKETFFT_NOINLINE
T_dct4
(
size_t
length
)
:
N
(
length
),
fft
((
N
&
1
)
?
nullptr
:
new
pocketfft_c
<
T0
>
(
N
/
2
)),
rfft
((
N
&
1
)
?
new
pocketfft_r
<
T0
>
(
N
)
:
nullptr
),
C2
((
N
&
1
)
?
0
:
N
/
2
)
{
constexpr
T0
pi
=
T0
(
3.141592653589793238462643383279502884197
L
);
if
((
N
&
1
)
==
0
)
for
(
size_t
i
=
0
;
i
<
N
/
2
;
++
i
)
{
T0
ang
=
-
pi
/
T0
(
N
)
*
(
T0
(
i
)
+
T0
(
0.125
));
C2
[
i
].
Set
(
cos
(
ang
),
sin
(
ang
));
}
}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
/*ortho*/
)
const
{
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
if
(
N
&
1
)
{
// The following code is derived from the FFTW3 function apply_re11()
// and is released under the 3-clause BSD license with friendly
// permission of Matteo Frigo.
auto
SGN_SET
=
[](
T
x
,
size_t
i
)
{
return
(
i
%
2
)
?
-
x
:
x
;};
arr
<
T
>
y
(
N
);
size_t
n2
=
N
/
2
;
size_t
i
;
{
size_t
m
;
for
(
i
=
0
,
m
=
n2
;
m
<
N
;
++
i
,
m
+=
4
)
y
[
i
]
=
c
[
m
];
for
(;
m
<
2
*
N
;
++
i
,
m
+=
4
)
y
[
i
]
=
-
c
[
2
*
N
-
m
-
1
];
for
(;
m
<
3
*
N
;
++
i
,
m
+=
4
)
y
[
i
]
=
-
c
[
m
-
2
*
N
];
for
(;
m
<
4
*
N
;
++
i
,
m
+=
4
)
y
[
i
]
=
c
[
4
*
N
-
m
-
1
];
m
-=
4
*
N
;
for
(;
i
<
N
;
++
i
,
m
+=
4
)
y
[
i
]
=
c
[
m
];
}
rfft
->
forward
(
y
.
data
(),
fct
);
for
(
i
=
0
;
i
+
i
+
1
<
n2
;
++
i
)
{
size_t
k
=
i
+
i
+
1
;
T
c1
=
y
[
2
*
k
-
1
],
s1
=
y
[
2
*
k
],
c2
=
y
[
2
*
k
+
1
],
s2
=
y
[
2
*
k
+
2
];
c
[
i
]
=
sqrt2
*
(
SGN_SET
(
c1
,
(
i
+
1
)
/
2
)
+
SGN_SET
(
s1
,
i
/
2
));
c
[
N
-
(
i
+
1
)]
=
sqrt2
*
(
SGN_SET
(
c1
,
(
N
-
i
)
/
2
)
-
SGN_SET
(
s1
,
(
N
-
(
i
+
1
))
/
2
));
c
[
n2
-
(
i
+
1
)]
=
sqrt2
*
(
SGN_SET
(
c2
,
(
n2
-
i
)
/
2
)
-
SGN_SET
(
s2
,
(
n2
-
(
i
+
1
))
/
2
));
c
[
n2
+
(
i
+
1
)]
=
sqrt2
*
(
SGN_SET
(
c2
,
(
n2
+
i
+
2
)
/
2
)
+
SGN_SET
(
s2
,
(
n2
+
(
i
+
1
))
/
2
));
}
if
(
i
+
i
+
1
==
n2
)
{
T
cx
=
y
[
2
*
n2
-
1
],
sx
=
y
[
2
*
n2
];
c
[
i
]
=
sqrt2
*
(
SGN_SET
(
cx
,
(
i
+
1
)
/
2
)
+
SGN_SET
(
sx
,
i
/
2
));
c
[
N
-
(
i
+
1
)]
=
sqrt2
*
(
SGN_SET
(
cx
,
(
i
+
2
)
/
2
)
+
SGN_SET
(
sx
,
(
i
+
1
)
/
2
));
}
c
[
n2
]
=
sqrt2
*
SGN_SET
(
y
[
0
],
(
n2
+
1
)
/
2
);
// FFTW-derived code ends here
}
else
{
arr
<
cmplx
<
T
>>
y
(
N
/
2
);
for
(
size_t
i
=
0
;
i
<
N
/
2
;
++
i
)
{
y
[
i
].
Set
(
c
[
2
*
i
],
c
[
N
-
1
-
2
*
i
]);
y
[
i
]
*=
C2
[
i
];
}
fft
->
forward
(
y
.
data
(),
fct
);
for
(
size_t
i
=
0
;
i
<
N
/
2
;
++
i
)
y
[
i
]
*=
C2
[
i
];
for
(
size_t
i
=
0
;
i
<
N
/
2
;
++
i
)
{
c
[
2
*
i
]
=
2
*
y
[
i
].
r
;
c
[
2
*
i
+
1
]
=
-
2
*
y
[
N
/
2
-
1
-
i
].
i
;
}
}
}
size_t
length
()
const
{
return
N
;
}
};
template
<
typename
T0
>
class
T_dst1
{
private:
pocketfft_r
<
T0
>
fftplan
;
public:
POCKETFFT_NOINLINE
T_dst1
(
size_t
length
)
:
fftplan
(
2
*
(
length
+
1
))
{}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
/*ortho*/
)
const
{
size_t
N
=
fftplan
.
length
(),
n
=
N
/
2
-
1
;
arr
<
T
>
tmp
(
N
);
tmp
[
0
]
=
tmp
[
n
+
1
]
=
c
[
0
]
*
0
;
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
{
tmp
[
i
+
1
]
=
c
[
i
];
tmp
[
N
-
1
-
i
]
=
-
c
[
i
];
}
fftplan
.
forward
(
tmp
.
data
(),
fct
);
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
c
[
i
]
=
-
tmp
[
2
*
i
+
2
];
}
size_t
length
()
const
{
return
fftplan
.
length
()
/
2
-
1
;
}
};
template
<
typename
T0
>
class
T_dst2
{
private:
T_dct2
<
T0
>
dct
;
public:
POCKETFFT_NOINLINE
T_dst2
(
size_t
length
)
:
dct
(
length
)
{}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
)
const
{
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
size_t
N
=
length
();
if
(
N
==
1
)
c
[
0
]
*=
2
*
fct
;
else
{
for
(
size_t
k
=
1
;
k
<
N
;
k
+=
2
)
c
[
k
]
=
-
c
[
k
];
dct
.
exec
(
c
,
fct
,
false
);
for
(
size_t
k
=
0
,
kc
=
N
-
1
;
k
<
kc
;
++
k
,
--
kc
)
swap
(
c
[
k
],
c
[
kc
]);
}
if
(
ortho
)
c
[
0
]
/=
sqrt2
;
}
size_t
length
()
const
{
return
dct
.
length
();
}
};
template
<
typename
T0
>
class
T_dst3
{
private:
T_dct3
<
T0
>
dct
;
public:
POCKETFFT_NOINLINE
T_dst3
(
size_t
length
)
:
dct
(
length
)
{}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
ortho
)
{
constexpr
T0
sqrt2
=
T0
(
1.414213562373095048801688724209698
L
);
size_t
N
=
length
();
if
(
ortho
)
c
[
0
]
*=
sqrt2
;
if
(
N
==
1
)
c
[
0
]
*=
fct
;
else
{
size_t
NS2
=
N
/
2
;
for
(
size_t
k
=
0
,
kc
=
N
-
1
;
k
<
NS2
;
++
k
,
--
kc
)
swap
(
c
[
k
],
c
[
kc
]);
dct
.
exec
(
c
,
fct
,
false
);
for
(
size_t
k
=
1
;
k
<
N
;
k
+=
2
)
c
[
k
]
=
-
c
[
k
];
}
}
size_t
length
()
const
{
return
dct
.
length
();
}
};
template
<
typename
T0
>
class
T_dst4
{
private:
T_dct4
<
T0
>
dct
;
public:
POCKETFFT_NOINLINE
T_dst4
(
size_t
length
)
:
dct
(
length
)
{}
template
<
typename
T
>
POCKETFFT_NOINLINE
void
exec
(
T
c
[],
T0
fct
,
bool
/*ortho*/
)
{
size_t
N
=
length
();
//if (N==1) { c[0]*=fct; return; }
size_t
NS2
=
N
/
2
;
for
(
size_t
k
=
0
,
kc
=
N
-
1
;
k
<
NS2
;
++
k
,
--
kc
)
swap
(
c
[
k
],
c
[
kc
]);
dct
.
exec
(
c
,
fct
,
false
);
for
(
size_t
k
=
1
;
k
<
N
;
k
+=
2
)
c
[
k
]
=
-
c
[
k
];
}