Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
pypocketfft
Commits
367ace56
Commit
367ace56
authored
Apr 28, 2019
by
Martin Reinecke
Browse files
evolution
parent
b0b3677a
Changes
3
Hide whitespace changes
Inline
Side-by-side
README.md
0 → 100644
View file @
367ace56
pypocketfft
===========
This package provides Fast Fourier and Hartley transforms with a simple
Python interface.
Features
--------
-
supports fully complex and half-complex (i.e. complex-to-real and
real-to-complex) FFTs
-
supports multidimensional arrays and selection of the axes to be transformed.
-
supports single and double precision
-
makes use of CPU vector instructions when performing 2D and higher-dimensional
transforms
-
does not have persistent transform plans, which makes the interface simpler
-
supports prime-length transforms without degrading to O(N
**
2) performance
pocketfft.cc
View file @
367ace56
...
...
@@ -1601,7 +1601,7 @@ template<typename T> void copy_and_norm(T *c, T *p1, size_t n, T0 fct)
public:
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fact
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fact
)
{
if
(
length
==
1
)
{
c
[
0
]
*=
fact
;
return
;
}
size_t
n
=
length
;
...
...
@@ -1633,7 +1633,7 @@ template<typename T> void forward(T c[], T0 fact)
copy_and_norm
(
c
,
p1
,
n
,
fact
);
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fact
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fact
)
{
if
(
length
==
1
)
{
c
[
0
]
*=
fact
;
return
;
}
size_t
n
=
length
;
...
...
@@ -1834,13 +1834,13 @@ template<typename T0> class fftblue
plan
.
forward
((
cmplx
<
T0
>
*
)
bkf
,
1.
);
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fct
)
{
fft
<
true
>
(
c
,
fct
);
}
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fct
)
{
fft
<
false
>
(
c
,
fct
);
}
template
<
typename
T
>
void
backward_r
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward_r
(
T
c
[],
T0
fct
)
{
arr
<
T
>
tmp
(
2
*
n
);
tmp
[
0
]
=
c
[
0
];
...
...
@@ -1857,7 +1857,7 @@ template<typename T0> class fftblue
c
[
m
]
=
tmp
[
2
*
m
];
}
template
<
typename
T
>
void
forward_r
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward_r
(
T
c
[],
T0
fct
)
{
arr
<
T
>
tmp
(
2
*
n
);
for
(
size_t
m
=
0
;
m
<
n
;
++
m
)
...
...
@@ -1901,13 +1901,13 @@ template<typename T0> class pocketfft_c
packplan
=
unique_ptr
<
cfftp
<
T0
>>
(
new
cfftp
<
T0
>
(
length
));
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
backward
((
cmplx
<
T
>
*
)
c
,
fct
)
:
blueplan
->
backward
(
c
,
fct
);
}
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
forward
((
cmplx
<
T
>
*
)
c
,
fct
)
:
blueplan
->
forward
(
c
,
fct
);
...
...
@@ -1946,13 +1946,13 @@ template<typename T0> class pocketfft_r
packplan
=
unique_ptr
<
rfftp
<
T0
>>
(
new
rfftp
<
T0
>
(
length
));
}
template
<
typename
T
>
void
backward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
backward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
backward
(
c
,
fct
)
:
blueplan
->
backward_r
(
c
,
fct
);
}
template
<
typename
T
>
void
forward
(
T
c
[],
T0
fct
)
template
<
typename
T
>
NOINLINE
void
forward
(
T
c
[],
T0
fct
)
{
packplan
?
packplan
->
forward
(
c
,
fct
)
:
blueplan
->
forward_r
(
c
,
fct
);
...
...
@@ -1970,6 +1970,8 @@ using stride_t = vector<int64_t>;
struct
diminfo
{
size_t
n
;
int64_t
s
;
};
struct
diminfo_io
{
size_t
n
;
int64_t
s_i
,
s_o
;
};
class
multiarr
{
private:
...
...
@@ -1987,49 +1989,51 @@ class multiarr
int64_t
stride
(
size_t
i
)
const
{
return
dim
[
i
].
s
;
}
};
class
multi_iter
class
multi_iter
_io
{
public:
vector
<
diminfo
>
dim
;
vector
<
diminfo
_io
>
dim
;
shape_t
pos
;
int64_t
ofs_
;
size_t
len
;
int64_t
str
;
int64_t
ofs_
i
,
ofs_o
;
size_t
len
_i
,
len_o
;
int64_t
str
_i
,
str_o
;
size_t
rem
;
bool
done_
;
public:
multi_iter
(
const
multiarr
&
arr
,
size_t
idim
)
:
pos
(
arr
.
ndim
()
-
1
,
0
),
ofs_
(
0
),
len
(
arr
.
size
(
idim
)),
str
(
arr
.
stride
(
idim
)),
rem
(
1
),
done_
(
false
)
multi_iter_io
(
const
multiarr
&
arr_i
,
const
multiarr
&
arr_o
,
size_t
idim
)
:
pos
(
arr_i
.
ndim
()
-
1
,
0
),
ofs_i
(
0
),
ofs_o
(
0
),
len_i
(
arr_i
.
size
(
idim
)),
len_o
(
arr_o
.
size
(
idim
)),
str_i
(
arr_i
.
stride
(
idim
)),
str_o
(
arr_o
.
stride
(
idim
)),
rem
(
1
)
{
dim
.
reserve
(
arr
.
ndim
()
-
1
);
for
(
size_t
i
=
0
;
i
<
arr
.
ndim
();
++
i
)
dim
.
reserve
(
arr
_i
.
ndim
()
-
1
);
for
(
size_t
i
=
0
;
i
<
arr
_i
.
ndim
();
++
i
)
if
(
i
!=
idim
)
{
dim
.
push_back
({
arr
.
size
(
i
),
arr
.
stride
(
i
)});
done_
=
done_
||
(
arr
.
size
(
i
)
==
0
);
rem
*=
arr
.
size
(
i
);
dim
.
push_back
({
arr_i
.
size
(
i
),
arr_i
.
stride
(
i
),
arr_o
.
stride
(
i
)});
rem
*=
arr_i
.
size
(
i
);
}
}
void
advance
()
{
if
(
--
rem
==
0
)
{
done_
=
true
;
return
;
}
if
(
--
rem
==
0
)
return
;
for
(
int
i
=
pos
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
++
pos
[
i
];
ofs_
+=
dim
[
i
].
s
;
ofs_i
+=
dim
[
i
].
s_i
;
ofs_o
+=
dim
[
i
].
s_o
;
if
(
pos
[
i
]
<
dim
[
i
].
n
)
return
;
pos
[
i
]
=
0
;
ofs_
-=
dim
[
i
].
n
*
dim
[
i
].
s
;
ofs_i
-=
dim
[
i
].
n
*
dim
[
i
].
s_i
;
ofs_o
-=
dim
[
i
].
n
*
dim
[
i
].
s_o
;
}
done_
=
true
;
}
bool
done
()
const
{
return
done_
;
}
int64_t
offset
()
const
{
return
ofs_
;
}
size_t
length
()
const
{
return
len
;
}
int64_t
stride
()
const
{
return
str
;
}
int64_t
offset_in
()
const
{
return
ofs_i
;
}
int64_t
offset_out
()
const
{
return
ofs_o
;
}
size_t
length_in
()
const
{
return
len_i
;
}
size_t
length_out
()
const
{
return
len_o
;
}
int64_t
stride_in
()
const
{
return
str_i
;
}
int64_t
stride_out
()
const
{
return
str_o
;
}
size_t
remaining
()
const
{
return
rem
;
}
};
...
...
@@ -2071,22 +2075,24 @@ template<> struct VTYPE<float>
};
#endif
size_t
prod
(
const
shape_t
&
shape
)
{
size_t
res
=
1
;
for
(
auto
sz
:
shape
)
res
*=
sz
;
return
res
;
}
template
<
typename
T
>
arr
<
char
>
alloc_tmp
(
const
shape_t
&
shape
,
size_t
axsize
,
size_t
elemsize
)
{
size_t
fullsize
=
1
;
for
(
auto
sz
:
shape
)
fullsize
*=
sz
;
auto
othersize
=
fullsize
/
axsize
;
auto
othersize
=
prod
(
shape
)
/
axsize
;
auto
tmpsize
=
axsize
*
((
othersize
>=
VTYPE
<
T
>::
vlen
)
?
VTYPE
<
T
>::
vlen
:
1
);
return
arr
<
char
>
(
tmpsize
*
elemsize
);
}
template
<
typename
T
>
arr
<
char
>
alloc_tmp
(
const
shape_t
&
shape
,
const
shape_t
&
axes
,
size_t
elemsize
)
{
size_t
fullsize
=
1
;
for
(
auto
sz
:
shape
)
fullsize
*=
sz
;
size_t
fullsize
=
prod
(
shape
);
size_t
tmpsize
=
0
;
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
++
i
)
{
...
...
@@ -2097,18 +2103,19 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
return
arr
<
char
>
(
tmpsize
*
elemsize
);
}
template
<
size_t
vlen
>
struct
multioffset
template
<
size_t
vlen
>
struct
multioffset
_io
{
int64_t
ofs
[
vlen
];
multioffset
(
multi_iter
&
it
)
int64_t
ofs
_i
[
vlen
],
ofs_o
[
vlen
];
multioffset
_io
(
multi_iter
_io
&
it
)
{
for
(
size_t
i
=
0
;
i
<
vlen
;
++
i
)
{
ofs
[
i
]
=
it
.
offset
();
it
.
advance
();
}
{
ofs
_i
[
i
]
=
it
.
offset
_in
();
ofs_o
[
i
]
=
it
.
offset_out
();
it
.
advance
();
}
}
int64_t
operator
[](
size_t
i
)
const
{
return
ofs
[
i
];
}
int64_t
in
(
size_t
i
)
const
{
return
ofs_i
[
i
];
}
int64_t
out
(
size_t
i
)
const
{
return
ofs_o
[
i
];
}
};
template
<
typename
T
>
void
pocketfft_general_c
(
const
shape_t
&
shape
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_c
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
bool
forward
,
const
cmplx
<
T
>
*
data_in
,
cmplx
<
T
>
*
data_out
,
T
fct
)
...
...
@@ -2124,50 +2131,49 @@ template<typename T> void pocketfft_general_c(const shape_t &shape,
for
(
size_t
iax
=
0
;
iax
<
axes
.
size
();
++
iax
)
{
multi_iter
it
_in
(
a_in
,
axes
[
iax
]),
it_out
(
a_out
,
axes
[
iax
]);
size_t
len
=
it
_in
.
length
();
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it
_out
.
stride
();
multi_iter
_io
it
(
a_in
,
a_out
,
axes
[
iax
]);
size_t
len
=
it
.
length
_in
();
int64_t
s_i
=
it
.
stride
_in
(),
s_o
=
it
.
stride
_out
();
if
((
!
plan
)
||
(
len
!=
plan
->
length
()))
plan
.
reset
(
new
pocketfft_c
<
T
>
(
len
));
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p
_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
tdatav
[
i
].
r
[
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
].
r
;
tdatav
[
i
].
i
[
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
].
i
;
tdatav
[
i
].
r
[
j
]
=
data_in
[
p
.
in
(
j
)
+
i
*
s_i
].
r
;
tdatav
[
i
].
i
[
j
]
=
data_in
[
p
.
out
(
j
)
+
i
*
s_i
].
i
;
}
forward
?
plan
->
forward
((
vtype
*
)
tdatav
,
fct
)
:
plan
->
backward
((
vtype
*
)
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
i
*
s_o
].
Set
(
tdatav
[
i
].
r
[
j
],
tdatav
[
i
].
i
[
j
]);
data_out
[
p
.
out
(
j
)
+
i
*
s_o
].
Set
(
tdatav
[
i
].
r
[
j
],
tdatav
[
i
].
i
[
j
]);
}
while
(
it
_in
.
remaining
()
>
0
)
while
(
it
.
remaining
()
>
0
)
{
if
((
data_in
==
data_out
)
&&
s_i
==
1
)
// fully in-place
forward
?
plan
->
forward
((
T
*
)(
data_in
+
it
_in
.
offset
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_in
+
it
_in
.
offset
()),
fct
);
forward
?
plan
->
forward
((
T
*
)(
data_in
+
it
.
offset
_in
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_in
+
it
.
offset
_in
()),
fct
);
else
if
(
s_o
==
1
)
// compute FFT in output location
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
data_out
[
it
_out
.
offset
()
+
i
]
=
data_in
[
it
_in
.
offset
()
+
i
*
s_i
];
forward
?
plan
->
forward
((
T
*
)(
data_out
+
it
_out
.
offset
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_out
+
it
_out
.
offset
()),
fct
);
data_out
[
it
.
offset
_out
()
+
i
]
=
data_in
[
it
.
offset
_in
()
+
i
*
s_i
];
forward
?
plan
->
forward
((
T
*
)(
data_out
+
it
.
offset
_out
()),
fct
)
:
plan
->
backward
((
T
*
)(
data_out
+
it
.
offset
_out
()),
fct
);
}
else
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
data_in
[
it
_in
.
offset
()
+
i
*
s_i
];
tdata
[
i
]
=
data_in
[
it
.
offset
_in
()
+
i
*
s_i
];
forward
?
plan
->
forward
((
T
*
)
tdata
,
fct
)
:
plan
->
backward
((
T
*
)
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
data_out
[
it
_out
.
offset
()
+
i
*
s_o
]
=
tdata
[
i
];
data_out
[
it
.
offset
_out
()
+
i
*
s_o
]
=
tdata
[
i
];
}
it_in
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
// after the first dimension, take data from output array
a_in
=
a_out
;
...
...
@@ -2177,7 +2183,7 @@ template<typename T> void pocketfft_general_c(const shape_t &shape,
}
}
template
<
typename
T
>
void
pocketfft_general_hartley
(
const
shape_t
&
shape
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_hartley
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
const
shape_t
&
axes
,
const
T
*
data_in
,
T
*
data_out
,
T
fct
)
{
...
...
@@ -2192,49 +2198,48 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape,
for
(
size_t
iax
=
0
;
iax
<
axes
.
size
();
++
iax
)
{
multi_iter
it
_in
(
a_in
,
axes
[
iax
]),
it_out
(
a_out
,
axes
[
iax
]);
size_t
len
=
it
_in
.
length
();
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it
_out
.
stride
();
multi_iter
_io
it
(
a_in
,
a_out
,
axes
[
iax
]);
size_t
len
=
it
.
length
_in
();
int64_t
s_i
=
it
.
stride
_in
(),
s_o
=
it
.
stride
_out
();
if
((
!
plan
)
||
(
len
!=
plan
->
length
()))
plan
.
reset
(
new
pocketfft_r
<
T
>
(
len
));
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p
_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
];
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
i
*
s_i
];
plan
->
forward
((
vtype
*
)
tdatav
,
fct
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
]
=
tdatav
[
0
][
j
];
data_out
[
p
.
out
(
j
)
]
=
tdatav
[
0
][
j
];
size_t
i
=
1
,
i1
=
1
,
i2
=
len
-
1
;
for
(
i
=
1
;
i
<
len
-
1
;
i
+=
2
,
++
i1
,
--
i2
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
data_out
[
p
_o
[
j
]
+
i1
*
s_o
]
=
tdatav
[
i
][
j
]
+
tdatav
[
i
+
1
][
j
];
data_out
[
p
_o
[
j
]
+
i2
*
s_o
]
=
tdatav
[
i
][
j
]
-
tdatav
[
i
+
1
][
j
];
data_out
[
p
.
out
(
j
)
+
i1
*
s_o
]
=
tdatav
[
i
][
j
]
+
tdatav
[
i
+
1
][
j
];
data_out
[
p
.
out
(
j
)
+
i2
*
s_o
]
=
tdatav
[
i
][
j
]
-
tdatav
[
i
+
1
][
j
];
}
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
i1
*
s_o
]
=
tdatav
[
i
][
j
];
data_out
[
p
.
out
(
j
)
+
i1
*
s_o
]
=
tdatav
[
i
][
j
];
}
while
(
it
_in
.
remaining
()
>
0
)
while
(
it
.
remaining
()
>
0
)
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
data_in
[
it
_in
.
offset
()
+
i
*
s_i
];
tdata
[
i
]
=
data_in
[
it
.
offset
_in
()
+
i
*
s_i
];
plan
->
forward
((
T
*
)
tdata
,
fct
);
// Hartley order
data_out
[
it
_out
.
offset
()]
=
tdata
[
0
];
data_out
[
it
.
offset
_out
()]
=
tdata
[
0
];
size_t
i
=
1
,
i1
=
1
,
i2
=
len
-
1
;
for
(
i
=
1
;
i
<
len
-
1
;
i
+=
2
,
++
i1
,
--
i2
)
{
data_out
[
it
_out
.
offset
()
+
i1
*
s_o
]
=
tdata
[
i
]
+
tdata
[
i
+
1
];
data_out
[
it
_out
.
offset
()
+
i2
*
s_o
]
=
tdata
[
i
]
-
tdata
[
i
+
1
];
data_out
[
it
.
offset
_out
()
+
i1
*
s_o
]
=
tdata
[
i
]
+
tdata
[
i
+
1
];
data_out
[
it
.
offset
_out
()
+
i2
*
s_o
]
=
tdata
[
i
]
-
tdata
[
i
+
1
];
}
if
(
i
<
len
)
data_out
[
it_out
.
offset
()
+
i1
*
s_o
]
=
tdata
[
i
];
it_in
.
advance
();
it_out
.
advance
();
data_out
[
it
.
offset_out
()
+
i1
*
s_o
]
=
tdata
[
i
];
it
.
advance
();
}
// after the first dimension, take data from output array
a_in
=
a_out
;
...
...
@@ -2244,7 +2249,7 @@ template<typename T> void pocketfft_general_hartley(const shape_t &shape,
}
}
template
<
typename
T
>
void
pocketfft_general_r2c
(
const
shape_t
&
shape
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_r2c
(
const
shape_t
&
shape
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
size_t
axis
,
const
T
*
data_in
,
cmplx
<
T
>
*
data_out
,
T
fct
)
{
...
...
@@ -2256,31 +2261,31 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape,
multiarr
a_in
(
shape
,
stride_in
),
a_out
(
shape
,
stride_out
);
pocketfft_r
<
T
>
plan
(
shape
[
axis
]);
multi_iter
it
_in
(
a_in
,
axis
),
it_out
(
a_out
,
axis
);
multi_iter
_io
it
(
a_in
,
a_out
,
axis
);
size_t
len
=
shape
[
axis
];
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it
_out
.
stride
();
int64_t
s_i
=
it
.
stride
_in
(),
s_o
=
it
.
stride
_out
();
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p
_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
i
*
s_i
];
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
i
*
s_i
];
plan
.
forward
((
vtype
*
)
tdatav
,
fct
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
].
Set
(
tdatav
[
0
][
j
]);
data_out
[
p
.
out
(
j
)
].
Set
(
tdatav
[
0
][
j
]);
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
data_out
[
p
.
out
(
j
)
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
]);
data_out
[
p
.
out
(
j
)
+
ii
*
s_o
].
Set
(
tdatav
[
i
][
j
]);
}
while
(
it
_in
.
remaining
()
>
0
)
while
(
it
.
remaining
()
>
0
)
{
const
T
*
d_i
=
data_in
+
it
_in
.
offset
();
cmplx
<
T
>
*
d_o
=
data_out
+
it
_out
.
offset
();
const
T
*
d_i
=
data_in
+
it
.
offset
_in
();
cmplx
<
T
>
*
d_o
=
data_out
+
it
.
offset
_out
();
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
d_i
[
i
*
s_i
];
plan
.
forward
(
tdata
,
fct
);
...
...
@@ -2290,11 +2295,10 @@ template<typename T> void pocketfft_general_r2c(const shape_t &shape,
d_o
[
ii
*
s_o
].
Set
(
tdata
[
i
],
tdata
[
i
+
1
]);
if
(
i
<
len
)
d_o
[
ii
*
s_o
].
Set
(
tdata
[
i
]);
it_in
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
}
template
<
typename
T
>
void
pocketfft_general_c2r
(
const
shape_t
&
shape_out
,
template
<
typename
T
>
NOINLINE
void
pocketfft_general_c2r
(
const
shape_t
&
shape_out
,
const
stride_t
&
stride_in
,
const
stride_t
&
stride_out
,
size_t
axis
,
const
cmplx
<
T
>
*
data_in
,
T
*
data_out
,
T
fct
)
{
...
...
@@ -2306,34 +2310,34 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out,
multiarr
a_in
(
shape_out
,
stride_in
),
a_out
(
shape_out
,
stride_out
);
pocketfft_r
<
T
>
plan
(
shape_out
[
axis
]);
multi_iter
it
_in
(
a_in
,
axis
),
it_out
(
a_out
,
axis
);
multi_iter
_io
it
(
a_in
,
a_out
,
axis
);
size_t
len
=
shape_out
[
axis
];
int64_t
s_i
=
it
_in
.
stride
(),
s_o
=
it
_out
.
stride
();
int64_t
s_i
=
it
.
stride
_in
(),
s_o
=
it
.
stride
_out
();
if
(
vlen
>
1
)
while
(
it
_in
.
remaining
()
>=
vlen
)
while
(
it
.
remaining
()
>=
vlen
)
{
multioffset
<
vlen
>
p
_i
(
it_in
),
p_o
(
it_ou
t
);
multioffset
_io
<
vlen
>
p
(
i
t
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
0
][
j
]
=
data_in
[
p
_i
[
j
]
].
r
;
tdatav
[
0
][
j
]
=
data_in
[
p
.
in
(
j
)
].
r
;
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
ii
*
s_i
].
r
;
tdatav
[
i
+
1
][
j
]
=
data_in
[
p
_i
[
j
]
+
ii
*
s_i
].
i
;
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
ii
*
s_i
].
r
;
tdatav
[
i
+
1
][
j
]
=
data_in
[
p
.
in
(
j
)
+
ii
*
s_i
].
i
;
}
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
data_in
[
p
_i
[
j
]
+
ii
*
s_i
].
r
;
tdatav
[
i
][
j
]
=
data_in
[
p
.
in
(
j
)
+
ii
*
s_i
].
r
;
plan
.
backward
(
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
data_out
[
p
_o
[
j
]
+
i
*
s_o
]
=
tdatav
[
i
][
j
];
data_out
[
p
.
out
(
j
)
+
i
*
s_o
]
=
tdatav
[
i
][
j
];
}
while
(
!
it
_in
.
done
()
)
while
(
it
.
remaining
()
>
0
)
{
const
cmplx
<
T
>
*
d_i
=
data_in
+
it
_in
.
offset
();
T
*
d_o
=
data_out
+
it
_out
.
offset
();
const
cmplx
<
T
>
*
d_i
=
data_in
+
it
.
offset
_in
();
T
*
d_o
=
data_out
+
it
.
offset
_out
();
tdata
[
0
]
=
d_i
[
0
].
r
;
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
...
...
@@ -2346,8 +2350,7 @@ template<typename T> void pocketfft_general_c2r(const shape_t &shape_out,
plan
.
backward
(
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
d_o
[
i
*
s_o
]
=
tdata
[
i
];
it_in
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
}
...
...
@@ -2508,38 +2511,37 @@ template<typename T>py::array complex2hartley(const py::array &in,
size_t
axis
=
axes
.
back
();
multiarr
a_tmp
(
dims_tmp
,
stride_tmp
),
a_out
(
dims_out
,
stride_out
);
multi_iter
it
_tmp
(
a_tmp
,
axis
),
it_out
(
a_out
,
axis
);
multi_iter
_io
it
(
a_tmp
,
a_out
,
axis
);
const
cmplx
<
T
>
*
tdata
=
(
const
cmplx
<
T
>
*
)
tmp
.
data
();
T
*
odata
=
(
T
*
)
out
.
mutable_data
();
vector
<
bool
>
swp
(
ndim
-
1
,
false
);
for
(
auto
i
:
axes
)
if
(
i
!=
axis
)
swp
[
i
<
axis
?
i
:
i
-
1
]
=
true
;
while
(
!
it
_tmp
.
done
()
)
while
(
it
.
remaining
()
>
0
)
{
auto
tofs
=
it
_tmp
.
offset
();
auto
ofs
=
it
_out
.
offset
();
auto
tofs
=
it
.
offset
_in
();
auto
ofs
=
it
.
offset
_out
();
int64_t
rofs
=
0
;
for
(
size_t
i
=
0
;
i
<
it
_out
.
pos
.
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
pos
.
size
();
++
i
)
{
if
(
!
swp
[
i
])
rofs
+=
it
_out
.
pos
[
i
]
*
it
_out
.
dim
[
i
].
s
;
rofs
+=
it
.
pos
[
i
]
*
it
.
dim
[
i
].
s
_o
;
else
{
auto
x
=
(
it
_out
.
pos
[
i
]
==
0
)
?
0
:
it
_out
.
dim
[
i
].
n
-
it
_out
.
pos
[
i
];
rofs
+=
x
*
it
_out
.
dim
[
i
].
s
;
auto
x
=
(
it
.
pos
[
i
]
==
0
)
?
0
:
it
.
dim
[
i
].
n
-
it
.
pos
[
i
];
rofs
+=
x
*
it
.
dim
[
i
].
s
_o
;
}
}
for
(
size_t
i
=
0
;
i
<
it
_tmp
.
length
();
++
i
)
for
(
size_t
i
=
0
;
i
<
it
.
length
_in
();
++
i
)
{
auto
re
=
tdata
[
tofs
+
i
*
it
_tmp
.
stride
()].
r
;
auto
im
=
tdata
[
tofs
+
i
*
it
_tmp
.
stride
()].
i
;
auto
rev_i
=
(
i
==
0
)
?
0
:
it
_out
.
length
()
-
i
;
odata
[
ofs
+
i
*
it
_out
.
stride
()]
=
re
+
im
;
odata
[
rofs
+
rev_i
*
it
_out
.
stride
()]
=
re
-
im
;
auto
re
=
tdata
[
tofs
+
i
*
it
.
stride
_in
()].
r
;
auto
im
=
tdata
[
tofs
+
i
*
it
.
stride
_in
()].
i
;
auto
rev_i
=
(
i
==
0
)
?
0
:
it
.
length
_out
()
-
i
;
odata
[
ofs
+
i
*
it
.
stride
_out
()]
=
re
+
im
;
odata
[
rofs
+
rev_i
*
it
.
stride
_out
()]
=
re
-
im
;
}
it_tmp
.
advance
();
it_out
.
advance
();
it
.
advance
();
}
return
out
;
}
...
...
stress.py
0 → 100644
View file @
367ace56
import
numpy
as
np
import
pypocketfft
def
_l2error
(
a
,
b
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
cmaxerr
=
0.
fmaxerr
=
0.
cmaxerrf
=
0.
fmaxerrf
=
0.
hmaxerr
=
0.
hmaxerrf
=
0.
def
test
():
global
cmaxerr
,
fmaxerr
,
hmaxerr
,
cmaxerrf
,
fmaxerrf
,
hmaxerrf
ndim
=
np
.
random
.
randint
(
1
,
5
)
axlen
=
int
((
2
**
20
)
**
(
1.
/
ndim
))
shape
=
np
.
random
.
randint
(
1
,
axlen
,
ndim
)
axes
=
np
.
arange
(
ndim
)
np
.
random
.
shuffle
(
axes
)
nax
=
np
.
random
.
randint
(
1
,
ndim
+
1
)
axes
=
axes
[:
nax
]
lastsize
=
shape
[
axes
[
-
1
]]
fct
=
1.
/
np
.
prod
(
np
.
take
(
shape
,
axes
))
a
=
np
.
random
.
rand
(
*
shape
)
-
0.5
+
1j
*
np
.
random
.
rand
(
*
shape
)
-
0.5j
b
=
pypocketfft
.
ifftn
(
pypocketfft
.
fftn
(
a
,
axes
=
axes
),
axes
=
axes
,
fct
=
fct
)
err
=
_l2error
(
a
,
b
)
if
err
>
cmaxerr
:
cmaxerr
=
err
print
(
"cmaxerr:"
,
cmaxerr
,
shape
,
axes
)