Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pypocketfft
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
4
Issues
4
List
Boards
Labels
Service Desk
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Reinecke
pypocketfft
Commits
367ace56
Commit
367ace56
authored
Apr 28, 2019
by
Martin Reinecke
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
evolution
parent
b0b3677a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
203 additions
and
127 deletions
+203
-127
README.md
README.md
+16
-0
pocketfft.cc
pocketfft.cc
+129
-127
stress.py
stress.py
+58
-0
No files found.
README.md
0 → 100644
View file @
367ace56
pypocketfft
===========
This package provides Fast Fourier and Hartley transforms with a simple
Python interface.
Features
--------
-
supports fully complex and half-complex (i.e. complex-to-real and
real-to-complex) FFTs
-
supports multidimensional arrays and selection of the axes to be transformed.
-
supports single and double precision
-
makes use of CPU vector instructions when performing 2D and higher-dimensional
transforms
-
does not have persistent transform plans, which makes the interface simpler
-
supports prime-length transforms without degrading to O(N
**
2) performance
pocketfft.cc
View file @
367ace56
...
...
@@ -1601,7 +1601,7 @@ template<typename T> void copy_and_norm(T *c, T *p1, size_t n, T0 fct)
public:
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fact
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fact
)
{
if
(
length
==
1
)
{
c
[
0
]
*=
fact
;
return
;
}
size_t
n
=
length
;
...
...
@@ -1633,7 +1633,7 @@ template<typename T> void forward(T c[], T0 fact)
copy_and_norm
(
c
,
p1
,
n
,
fact
);
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fact
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fact
)
{
if
(
length
==
1
)
{
c
[
0
]
*=
fact
;
return
;
}
size_t
n
=
length
;
...
...
@@ -1834,13 +1834,13 @@ template<typename T0> class fftblue
plan
.
forward
((
cmplx
<
T0
>
*
)
bkf
,
1.
);
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fct
)
{
fft
<
true
>
(
c
,
fct
);
}
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fct
)
{
fft
<
false
>
(
c
,
fct
);
}
template
<
typename
T
>
void
backward_r
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward_r
(
T
c
[],
T0
fct
)
{
arr
<
T
>
tmp
(
2
*
n
);
tmp
[
0
]
=
c
[
0
];
...
...
@@ -1857,7 +1857,7 @@ template<typename T0> class fftblue
c
[
m
]
=
tmp
[
2
*
m
];
}
template
<
typename
T
>
void
forward_r
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward_r
(
T
c
[],
T0
fct
)
{
arr
<
T
>
tmp
(
2
*
n
);
for
(
size_t
m
=
0
;
m
<
n
;
++
m
)
...
...
@@ -1901,13 +1901,13 @@ template<typename T0> class pocketfft_c
packplan
=
unique_ptr
<
cfftp
<
T0
>>
(
new
cfftp
<
T0
>
(
length
));
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
backward
((
cmplx
<
T
>
*
)
c
,
fct
)
:
blueplan
->
backward
(
c
,
fct
);
}
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
forward
((
cmplx
<
T
>
*
)
c
,
fct
)
:
blueplan
->
forward
(
c
,
fct
);
...
...
@@ -1946,13 +1946,13 @@ template<typename T0> class pocketfft_r
packplan
=
unique_ptr
<
rfftp
<
T0
>>
(
new
rfftp
<
T0
>
(
length
));
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
backward
(
c
,
fct
)
:
blueplan
->
backward_r
(
c
,
fct
);
}
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
forward
(
c
,
fct
)
:
blueplan
->
forward_r
(
c
,
fct
);
...
...
@@ -1970,6 +1970,8 @@ using stride_t = vector<int64_t>;
struct
diminfo
{
size_t
n
;
int64_t
s
;
};
struct
diminfo_io
{
size_t
n
;
int64_t
s_i
,
s_o
;
};
class
multiarr
{
private:
...
...
@@ -1987,49 +1989,51 @@ class multiarr
int64_t
stride
(
size_t
i
)
const
{
return
dim
[
i
].
s
;
}
};
class
multi_iter
class
multi_iter
_io
{
public:
vector
<
diminfo
>
dim
;
vector
<
diminfo
_io
>
dim
;
shape_t
pos
;
int64_t
ofs_
;
size_t
len
;
int64_t
str
;
int64_t
ofs_
i
,
ofs_o
;
size_t
len
_i
,
len_o
;
int64_t
str
_i
,
str_o
;
size_t
rem
;
bool
done_
;
public:
multi_iter
(
const
multiarr
&
arr
,
size_t
idim
)
:
pos
(
arr
.
ndim
()
-
1
,
0
),
ofs_
(
0
),
len
(
arr
.
size
(
idim
)),
str
(
arr
.
stride
(
idim
)),
rem
(
1
),
done_
(
false
)
multi_iter_io
(
const
multiarr
&
arr_i
,
const
multiarr
&
arr_o
,
size_t
idim
)
:
pos
(
arr_i
.
ndim
()
-
1
,
0
),
ofs_i
(
0
),
ofs_o
(
0
),
len_i
(
arr_i
.
size
(
idim
)),
len_o
(
arr_o
.
size
(
idim
)),
str_i
(
arr_i
.
stride
(
idim
)),
str_o
(
arr_o
.
stride
(
idim
)),
rem
(
1
)
{
dim
.
reserve
(
arr
.
ndim
()
-
1
);
for
(
size_t
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
dim
.
reserve
(
arr
_i
.
ndim
()
-
1
);
for
(
size_t
i
=
0
;
i
<
arr
_i
.
ndim
();
++
i
)
if
(
i
!=
idim
)
{
dim
.
push_back
({
arr
.
size
(
i
),
arr
.
stride
(
i
)});
done_
=
done_
||
(
arr
.
size
(
i
)
==
0
);
rem
*=
arr
.
size
(
i
);
dim
.
push_back
({
arr_i
.
size
(
i
),
arr_i
.
stride
(
i
),
arr_o
.
stride
(
i
)});
rem
*=
arr_i
.
size
(
i
);
}
}
void
advance
()
{
if
(
--
rem
==
0
)
{
done_
=
true
;
return
;
}
if
(
--
rem
==
0
)
return
;
for
(
int
i
=
pos
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
++
pos
[
i
];
ofs_
+=
dim
[
i
].
s
;
ofs_i
+=
dim
[
i
].
s_i
;
ofs_o
+=
dim
[
i
].
s_o
;
if
(
pos
[
i
]
<
dim
[
i
].
n
)
return
;
pos
[
i
]
=
0
;
ofs_
-=
dim
[
i
].
n
*
dim
[
i
].
s
;
ofs_i
-=
dim
[
i
].
n
*
dim
[
i
].
s_i
;
ofs_o
-=
dim
[
i
].
n
*
dim
[
i
].
s_o
;
}
done_
=
true
;
}
bool
done
()
const
{
return
done_
;
}
int64_t
offset
()
const
{
return
ofs_
;
}
size_t
length
()
const
{
return
len
;
}
int64_t
stride
()
const
{
return
str
;
}
int64_t
offset_in
()
const
{
return
ofs_i
;
}
int64_t
offset_out
()
const
{
return
ofs_o
;
}
size_t
length_in
()
const
{
return
len_i
;
}
size_t
length_out
()
const
{
return
len_o
;
}
int64_t
stride_in
()
const
{
return
str_i
;
}
int64_t
stride_out
()
const
{
return
str_o
;
}
size_t
remaining
()
const
{
return
rem
;
}
};
...
...
@@ -2071,22 +2075,24 @@ template<> struct VTYPE<float>
};
#endif
size_t
prod
(
const
shape_t
&
shape
)
{
size_t
res
=
1
;
for
(
auto
sz
:
shape
)
res
*=
sz
;
return
res
;
}
template
<
typename
T
>
arr
<
char
>
alloc_tmp
(
const
shape_t
&
shape
,
size_t
axsize
,
size_t
elemsize
)
{
size_t
fullsize
=
1
;
for
(
auto
sz
:
shape
)
fullsize
*=
sz
;
auto
othersize
=
fullsize
/
axsize
;
auto
othersize
=
prod
(
shape
)
/
axsize
;
auto
tmpsize
=
axsize
*
((
othersize
>=
VTYPE
<
T
>::
vlen
)
?
VTYPE
<
T
>::
vlen
:
1
);
return
arr
<
char
>
(
tmpsize
*
elemsize
);
}
template
<
typename
T
>
arr
<
char
>
alloc_tmp
(
const
shape_t
&
shape
,
const
shape_t
&
axes
,
size_t
elemsize
)
{
size_t
fullsize
=
1
;
for
(
auto
sz
:
shape
)
fullsize
*=
sz
;
size_t
fullsize
=
prod
(
shape
);
size_t
tmpsize
=
0
;
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
++
i
)
{
...
...
@@ -2097,18 +2103,19 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
return
arr
<
char
>
(
tmpsize
*
elemsize
);
}
template
<
size_t
vlen
>
struct
multioffset
template
<
size_t
vlen
>
struct
multioffset
_io
{
int64_t
ofs
[
vlen
];
multioffset
(
multi_iter
&
it
)
int64_t
ofs
_i
[
vlen
],
ofs_o
[
vlen
];
multioffset
_io
(
multi_iter_io
&
it
)
{
for
(
size_t
i
=
0
;
i
<
vlen
;
++
i
)
{
ofs
[
i
]
=
it
.
offse
t
();
it
.
advance
();
}
{
ofs
_i
[
i
]
=
it
.
offset_in
();
ofs_o
[
i
]
=
it
.
offset_ou
t
();
it
.
advance
();
}
}
int64_t
operator
[](
size_t
i
)
const
{
return
ofs
[
i
];
}
int64_t
in
(
size_t
i
)
const
{
return
ofs_i
[
i
];
}
int64_t
out
(
size_t
i
)
const
{
return
ofs_o
[
i
];
}
};
template
<
typename
T
>
void
pocketfft_general_c
(
const
shape_t
&
shape
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_c
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
bool
forward
,
const
cmplx
<
T
>
*
data_in
,
cmplx
<
T
>
*
data_out
,
T
fct
)
...
...
@@ -2124,50 +2131,49 @@ template<typename T> void pocketfft_general_c(const shape_t &shape,
for
(
size_t
iax
=
0
;
iax
<
axes
.
size
();
++
iax
)
{
multi_iter
it_in
(
a_in
,
axes
[
iax
]),
it_out
(
a_out
,
axes
[
iax
]);
size_t
len
=
it
_in
.
length
();
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it_out
.
stride
();
multi_iter
_io
it
(
a_in
,
a_out
,
axes
[
iax
]);
size_t
len
=
it
.
length_in
();
int64_t
s_i
=
it
.
stride_in
(),
s_o
=
it
.
stride_out
();
if
((
!
plan
)
||
(
len
!=
plan
->
length
()))
plan
.
reset
(
new
pocketfft_c
<
T
>
(
len
));
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
tdatav
[
i
].
r
[
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
].
r
;
tdatav
[
i
].
i
[
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
].
i
;
tdatav
[
i
].
r
[
j
]
=
data_in
[
p
.
in
(
j
)
+
i
*
s_i
].
r
;
tdatav
[
i
].
i
[
j
]
=
data_in
[
p
.
out
(
j
)
+
i
*
s_i
].
i
;
}
forward
?
plan
->
forward
((
vtype
*
)
tdatav
,
fct
)
:
plan
->
backward
((
vtype
*
)
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
i
*
s_o
].
Set
(
tdatav
[
i
].
r
[
j
],
tdatav
[
i
].
i
[
j
]);
data_out
[
p
.
out
(
j
)
+
i
*
s_o
].
Set
(
tdatav
[
i
].
r
[
j
],
tdatav
[
i
].
i
[
j
]);
}
while
(
it
_in
.
remaining
()
>
0
)
while
(
it
.
remaining
()
>
0
)
{
if
((
data_in
==
data_out
)
&&
s_i
==
1
)
// fully in-place
forward
?
plan
->
forward
((
T
*
)(
data_in
+
it
_in
.
offset
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_in
+
it
_in
.
offset
()),
fct
);
forward
?
plan
->
forward
((
T
*
)(
data_in
+
it
.
offset_in
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_in
+
it
.
offset_in
()),
fct
);
else
if
(
s_o
==
1
)
// compute FFT in output location
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
data_out
[
it
_out
.
offset
()
+
i
]
=
data_in
[
it_in
.
offset
()
+
i
*
s_i
];
forward
?
plan
->
forward
((
T
*
)(
data_out
+
it
_out
.
offse
t
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_out
+
it
_out
.
offse
t
()),
fct
);
data_out
[
it
.
offset_out
()
+
i
]
=
data_in
[
it
.
offset_in
()
+
i
*
s_i
];
forward
?
plan
->
forward
((
T
*
)(
data_out
+
it
.
offset_ou
t
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_out
+
it
.
offset_ou
t
()),
fct
);
}
else
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
data_in
[
it
_in
.
offset
()
+
i
*
s_i
];
tdata
[
i
]
=
data_in
[
it
.
offset_in
()
+
i
*
s_i
];
forward
?
plan
->
forward
((
T
*
)
tdata
,
fct
)
:
plan
->
backward
((
T
*
)
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
data_out
[
it
_out
.
offse
t
()
+
i
*
s_o
]
=
tdata
[
i
];
data_out
[
it
.
offset_ou
t
()
+
i
*
s_o
]
=
tdata
[
i
];
}
it_in
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
// after the first dimension, take data from output array
a_in
=
a_out
;
...
...
@@ -2177,7 +2183,7 @@ template<typename T> void pocketfft_general_c(const shape_t &shape,
}
}
template
<
typename
T
>
void
pocketfft_general_hartley
(
const
shape_t
&
shape
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_hartley
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
const
T
*
data_in
,
T
*
data_out
,
T
fct
)
{
...
...
@@ -2192,49 +2198,48 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape,
for
(
size_t
iax
=
0
;
iax
<
axes
.
size
();
++
iax
)
{
multi_iter
it_in
(
a_in
,
axes
[
iax
]),
it_out
(
a_out
,
axes
[
iax
]);
size_t
len
=
it
_in
.
length
();
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it_out
.
stride
();
multi_iter
_io
it
(
a_in
,
a_out
,
axes
[
iax
]);
size_t
len
=
it
.
length_in
();
int64_t
s_i
=
it
.
stride_in
(),
s_o
=
it
.
stride_out
();
if
((
!
plan
)
||
(
len
!=
plan
->
length
()))
plan
.
reset
(
new
pocketfft_r
<
T
>
(
len
));
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
];
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
i
*
s_i
];
plan
->
forward
((
vtype
*
)
tdatav
,
fct
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
]
=
tdatav
[
0
][
j
];
data_out
[
p
.
out
(
j
)
]
=
tdatav
[
0
][
j
];
size_t
i
=
1
,
i1
=
1
,
i2
=
len
-
1
;
for
(
i
=
1
;
i
<
len
-
1
;
i
+=
2
,
++
i1
,
--
i2
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
data_out
[
p
_o
[
j
]
+
i1
*
s_o
]
=
tdatav
[
i
][
j
]
+
tdatav
[
i
+
1
][
j
];
data_out
[
p
_o
[
j
]
+
i2
*
s_o
]
=
tdatav
[
i
][
j
]
-
tdatav
[
i
+
1
][
j
];
data_out
[
p
.
out
(
j
)
+
i1
*
s_o
]
=
tdatav
[
i
][
j
]
+
tdatav
[
i
+
1
][
j
];
data_out
[
p
.
out
(
j
)
+
i2
*
s_o
]
=
tdatav
[
i
][
j
]
-
tdatav
[
i
+
1
][
j
];
}
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
i1
*
s_o
]
=
tdatav
[
i
][
j
];
data_out
[
p
.
out
(
j
)
+
i1
*
s_o
]
=
tdatav
[
i
][
j
];
}
while
(
it
_in
.
remaining
()
>
0
)
while
(
it
.
remaining
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
data_in
[
it
_in
.
offset
()
+
i
*
s_i
];
tdata
[
i
]
=
data_in
[
it
.
offset_in
()
+
i
*
s_i
];
plan
->
forward
((
T
*
)
tdata
,
fct
);
// Hartley order
data_out
[
it
_out
.
offse
t
()]
=
tdata
[
0
];
data_out
[
it
.
offset_ou
t
()]
=
tdata
[
0
];
size_t
i
=
1
,
i1
=
1
,
i2
=
len
-
1
;
for
(
i
=
1
;
i
<
len
-
1
;
i
+=
2
,
++
i1
,
--
i2
)
{
data_out
[
it
_out
.
offse
t
()
+
i1
*
s_o
]
=
tdata
[
i
]
+
tdata
[
i
+
1
];
data_out
[
it
_out
.
offse
t
()
+
i2
*
s_o
]
=
tdata
[
i
]
-
tdata
[
i
+
1
];
data_out
[
it
.
offset_ou
t
()
+
i1
*
s_o
]
=
tdata
[
i
]
+
tdata
[
i
+
1
];
data_out
[
it
.
offset_ou
t
()
+
i2
*
s_o
]
=
tdata
[
i
]
-
tdata
[
i
+
1
];
}
if
(
i
<
len
)
data_out
[
it_out
.
offset
()
+
i1
*
s_o
]
=
tdata
[
i
];
it_in
.
advance
();
it_out
.
advance
();
data_out
[
it
.
offset_out
()
+
i1
*
s_o
]
=
tdata
[
i
];
it
.
advance
();
}
// after the first dimension, take data from output array
a_in
=
a_out
;
...
...
@@ -2244,7 +2249,7 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape,
}
}
template
<
typename
T
>
void
pocketfft_general_r2c
(
const
shape_t
&
shape
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_r2c
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
size_t
axis
,
const
T
*
data_in
,
cmplx
<
T
>
*
data_out
,
T
fct
)
{
...
...
@@ -2256,31 +2261,31 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape,
multiarr
a_in
(
shape
,
stride_in
),
a_out
(
shape
,
stride_out
);
pocketfft_r
<
T
>
plan
(
shape
[
axis
]);
multi_iter
it_in
(
a_in
,
axis
),
it_out
(
a_out
,
axis
);
multi_iter
_io
it
(
a_in
,
a_out
,
axis
);
size_t
len
=
shape
[
axis
];
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it_out
.
stride
();
int64_t
s_i
=
it
.
stride_in
(),
s_o
=
it
.
stride_out
();
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
];
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
i
*
s_i
];
plan
.
forward
((
vtype
*
)
tdatav
,
fct
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
].
Set
(
tdatav
[
0
][
j
]);
data_out
[
p
.
out
(
j
)
].
Set
(
tdatav
[
0
][
j
]);
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
data_out
[
p
.
out
(
j
)
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
]);
data_out
[
p
.
out
(
j
)
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
]);
}
while
(
it
_in
.
remaining
()
>
0
)
while
(
it
.
remaining
()
>
0
)
{
const
T
*
d_i
=
data_in
+
it
_in
.
offset
();
cmplx
<
T
>
*
d_o
=
data_out
+
it
_out
.
offse
t
();
const
T
*
d_i
=
data_in
+
it
.
offset_in
();
cmplx
<
T
>
*
d_o
=
data_out
+
it
.
offset_ou
t
();
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
d_i
[
i
*
s_i
];
plan
.
forward
(
tdata
,
fct
);
...
...
@@ -2290,11 +2295,10 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape,
d_o
[
ii
*
s_o
].
Set
(
tdata
[
i
],
tdata
[
i
+
1
]);
if
(
i
<
len
)
d_o
[
ii
*
s_o
].
Set
(
tdata
[
i
]);
it_in
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
}
template
<
typename
T
>
void
pocketfft_general_c2r
(
const
shape_t
&
shape_out
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_c2r
(
const
shape_t
&
shape_out
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
size_t
axis
,
const
cmplx
<
T
>
*
data_in
,
T
*
data_out
,
T
fct
)
{
...
...
@@ -2306,34 +2310,34 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out,
multiarr
a_in
(
shape_out
,
stride_in
),
a_out
(
shape_out
,
stride_out
);
pocketfft_r
<
T
>
plan
(
shape_out
[
axis
]);
multi_iter
it_in
(
a_in
,
axis
),
it_out
(
a_out
,
axis
);
multi_iter
_io
it
(
a_in
,
a_out
,
axis
);
size_t
len
=
shape_out
[
axis
];
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it_out
.
stride
();
int64_t
s_i
=
it
.
stride_in
(),
s_o
=
it
.
stride_out
();
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
0
][
j
]
=
data_in
[
p
_i
[
j
]
].
r
;
tdatav
[
0
][
j
]
=
data_in
[
p
.
in
(
j
)
].
r
;
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
ii
*
s_i
].
r
;
tdatav
[
i
+
1
][
j
]
=
data_in
[
p
_i
[
j
]
+
ii
*
s_i
].
i
;
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
ii
*
s_i
].
r
;
tdatav
[
i
+
1
][
j
]
=
data_in
[
p
.
in
(
j
)
+
ii
*
s_i
].
i
;
}
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
ii
*
s_i
].
r
;
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
ii
*
s_i
].
r
;
plan
.
backward
(
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
i
*
s_o
]
=
tdatav
[
i
][
j
];
data_out
[
p
.
out
(
j
)
+
i
*
s_o
]
=
tdatav
[
i
][
j
];
}
while
(
!
it_in
.
done
()
)
while
(
it
.
remaining
()
>
0
)
{
const
cmplx
<
T
>
*
d_i
=
data_in
+
it
_in
.
offset
();
T
*
d_o
=
data_out
+
it
_out
.
offse
t
();
const
cmplx
<
T
>
*
d_i
=
data_in
+
it
.
offset_in
();
T
*
d_o
=
data_out
+
it
.
offset_ou
t
();
tdata
[
0
]
=
d_i
[
0
].
r
;
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
...
...
@@ -2346,8 +2350,7 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out,
plan
.
backward
(
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
d_o
[
i
*
s_o
]
=
tdata
[
i
];
it_in
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
}
...
...
@@ -2508,38 +2511,37 @@ template<typename T>py::array complex2hartley(const py::array &in,
size_t
axis
=
axes
.
back
();
multiarr
a_tmp
(
dims_tmp
,
stride_tmp
),
a_out
(
dims_out
,
stride_out
);
multi_iter
it_tmp
(
a_tmp
,
axis
),
it_out
(
a_out
,
axis
);
multi_iter
_io
it
(
a_tmp
,
a_out
,
axis
);
const
cmplx
<
T
>
*
tdata
=
(
const
cmplx
<
T
>
*
)
tmp
.
data
();
T
*
odata
=
(
T
*
)
out
.
mutable_data
();
vector
<
bool
>
swp
(
ndim
-
1
,
false
);
for
(
auto
i
:
axes
)
if
(
i
!=
axis
)
swp
[
i
<
axis
?
i
:
i
-
1
]
=
true
;
while
(
!
it_tmp
.
done
()
)
while
(
it
.
remaining
()
>
0
)
{
auto
tofs
=
it
_tmp
.
offset
();
auto
ofs
=
it
_out
.
offse
t
();
auto
tofs
=
it
.
offset_in
();
auto
ofs
=
it
.
offset_ou
t
();
int64_t
rofs
=
0
;
for
(
size_t
i
=
0
;
i
<
it
_out
.
pos
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
pos
.
size
();
++
i
)
{
if
(
!
swp
[
i
])
rofs
+=
it
_out
.
pos
[
i
]
*
it_out
.
dim
[
i
].
s
;
rofs
+=
it
.
pos
[
i
]
*
it
.
dim
[
i
].
s_o
;
else
{
auto
x
=
(
it
_out
.
pos
[
i
]
==
0
)
?
0
:
it_out
.
dim
[
i
].
n
-
it_ou
t
.
pos
[
i
];
rofs
+=
x
*
it
_out
.
dim
[
i
].
s
;
auto
x
=
(
it
.
pos
[
i
]
==
0
)
?
0
:
it
.
dim
[
i
].
n
-
i
t
.
pos
[
i
];
rofs
+=
x
*
it
.
dim
[
i
].
s_o
;
}
}
for
(
size_t
i
=
0
;
i
<
it
_tmp
.
length
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length_in
();
++
i
)
{
auto
re
=
tdata
[
tofs
+
i
*
it
_tmp
.
stride
()].
r
;
auto
im
=
tdata
[
tofs
+
i
*
it
_tmp
.
stride
()].
i
;
auto
rev_i
=
(
i
==
0
)
?
0
:
it
_out
.
length
()
-
i
;
odata
[
ofs
+
i
*
it
_out
.
stride
()]
=
re
+
im
;
odata
[
rofs
+
rev_i
*
it
_out
.
stride
()]
=
re
-
im
;
auto
re
=
tdata
[
tofs
+
i
*
it
.
stride_in
()].
r
;
auto
im
=
tdata
[
tofs
+
i
*
it
.
stride_in
()].
i
;
auto
rev_i
=
(
i
==
0
)
?
0
:
it
.
length_out
()
-
i
;
odata
[
ofs
+
i
*
it
.
stride_out
()]
=
re
+
im
;
odata
[
rofs
+
rev_i
*
it
.
stride_out
()]
=
re
-
im
;
}
it_tmp
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
return
out
;
}
...
...
stress.py
0 → 100644
View file @
367ace56
import
numpy
as
np
import
pypocketfft
def
_l2error
(
a
,
b
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
cmaxerr
=
0.
fmaxerr
=
0.
cmaxerrf
=
0.
fmaxerrf
=
0.
hmaxerr
=
0.
hmaxerrf
=
0.
def
test
():
global
cmaxerr
,
fmaxerr
,
hmaxerr
,
cmaxerrf
,
fmaxerrf
,
hmaxerrf
ndim
=
np
.
random
.
randint
(
1
,
5
)
axlen
=
int
((
2
**
20
)
**
(
1.
/
ndim
))
shape
=
np
.
random
.
randint
(
1
,
axlen
,
ndim
)
axes
=
np
.
arange
(
ndim
)
np
.
random
.
shuffle
(
axes
)
nax
=
np
.
random
.
randint
(
1
,
ndim
+
1
)
axes
=
axes
[:
nax
]
lastsize
=
shape
[
axes
[
-
1
]]
fct
=
1.
/
np
.
prod
(
np
.
take
(
shape
,
axes
))
a
=
np
.
random
.
rand
(
*
shape
)
-
0.5
+
1j
*
np
.
random
.
rand
(
*
shape
)
-
0.5j
b
=
pypocketfft
.
ifftn
(
pypocketfft
.
fftn
(
a
,
axes
=
axes
),
axes
=
axes
,
fct
=
fct
)
err
=
_l2error
(
a
,
b
)
if
err
>
cmaxerr
:
cmaxerr
=
err
print
(
"cmaxerr:"
,
cmaxerr
,
shape
,
axes
)
b
=
pypocketfft
.
irfftn
(
pypocketfft
.
rfftn
(
a
.
real
,
axes
=
axes
),
axes
=
axes
,
fct
=
fct
,
lastsize
=
lastsize
)
err
=
_l2error
(
a
.
real
,
b
)
if
err
>
fmaxerr
:
fmaxerr
=
err
print
(
"fmaxerr:"
,
fmaxerr
,
shape
,
axes
)
b
=
pypocketfft
.
ifftn
(
pypocketfft
.
fftn
(
a
.
astype
(
np
.
complex64
),
axes
=
axes
),
axes
=
axes
,
fct
=
fct
)
err
=
_l2error
(
a
.
astype
(
np
.
complex64
),
b
)
if
err
>
cmaxerrf
:
cmaxerrf
=
err
print
(
"cmaxerrf:"
,
cmaxerrf
,
shape
,
axes
)
b
=
pypocketfft
.
irfftn
(
pypocketfft
.
rfftn
(
a
.
real
.
astype
(
np
.
float32
),
axes
=
axes
),
axes
=
axes
,
fct
=
fct
,
lastsize
=
lastsize
)
err
=
_l2error
(
a
.
real
.
astype
(
np
.
float32
),
b
)
if
err
>
fmaxerrf
:
fmaxerrf
=
err
print
(
"fmaxerrf:"
,
fmaxerrf
,
shape
,
axes
)
b
=
pypocketfft
.
hartley
(
pypocketfft
.
hartley
(
a
.
real
,
axes
=
axes
),
axes
=
axes
,
fct
=
fct
)
err
=
_l2error
(
a
.
real
,
b
)
if
err
>
hmaxerr
:
hmaxerr
=
err