Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
On Thursday, 7th July from 1 to 3 pm there will be a maintenance with a short downtime of GitLab.
Open sidebar
Martin Reinecke
pypocketfft
Commits
87832dd2
Commit
87832dd2
authored
Jun 18, 2019
by
Martin Reinecke
Browse files
cleaner design of constant and nonconstant arrays
parent
4e1f2c3b
Changes
2
Hide whitespace changes
Inline
Side-by-side
pocketfft_hdronly.h
View file @
87832dd2
...
...
@@ -2200,43 +2200,51 @@ template<typename T0> class pocketfft_r
// multi-D infrastructure
//
template
<
typename
T
>
class
nd
arr
class
arr
_info
{
private:
char
*
d
;
const
char
*
cd
;
protected:
shape_t
shp
;
stride_t
str
;
public:
ndarr
(
const
void
*
data_
,
const
shape_t
&
shape_
,
const
stride_t
&
stride_
)
:
d
(
nullptr
),
cd
(
reinterpret_cast
<
const
char
*>
(
data_
)),
shp
(
shape_
),
str
(
stride_
)
{}
ndarr
(
void
*
data_
,
const
shape_t
&
shape_
,
const
stride_t
&
stride_
)
:
d
(
reinterpret_cast
<
char
*>
(
data_
)),
cd
(
reinterpret_cast
<
const
char
*>
(
data_
)),
shp
(
shape_
),
str
(
stride_
)
{}
arr_info
(
const
shape_t
&
shape_
,
const
stride_t
&
stride_
)
:
shp
(
shape_
),
str
(
stride_
)
{}
size_t
ndim
()
const
{
return
shp
.
size
();
}
size_t
size
()
const
{
return
util
::
prod
(
shp
);
}
const
shape_t
&
shape
()
const
{
return
shp
;
}
size_t
shape
(
size_t
i
)
const
{
return
shp
[
i
];
}
const
stride_t
&
stride
()
const
{
return
str
;
}
const
ptrdiff_t
&
stride
(
size_t
i
)
const
{
return
str
[
i
];
}
T
&
operator
[](
ptrdiff_t
ofs
)
{
return
*
reinterpret_cast
<
T
*>
(
d
+
ofs
);
}
};
template
<
typename
T
>
class
cndarr
:
public
arr_info
{
protected:
const
char
*
d
;
public:
cndarr
(
const
void
*
data_
,
const
shape_t
&
shape_
,
const
stride_t
&
stride_
)
:
arr_info
(
shape_
,
stride_
),
d
(
reinterpret_cast
<
const
char
*>
(
data_
))
{}
const
T
&
operator
[](
ptrdiff_t
ofs
)
const
{
return
*
reinterpret_cast
<
const
T
*>
(
cd
+
ofs
);
}
T
get
(
ptrdiff_t
ofs
)
const
{
return
*
reinterpret_cast
<
const
T
*>
(
cd
+
ofs
);
}
void
set
(
ptrdiff_t
ofs
,
const
T
&
val
)
const
{
*
reinterpret_cast
<
T
*>
(
d
+
ofs
)
=
val
;
}
{
return
*
reinterpret_cast
<
const
T
*>
(
d
+
ofs
);
}
};
template
<
typename
T
>
class
ndarr
:
public
cndarr
<
T
>
{
public:
ndarr
(
void
*
data_
,
const
shape_t
&
shape_
,
const
stride_t
&
stride_
)
:
cndarr
<
T
>::
cndarr
(
const_cast
<
const
void
*>
(
data_
),
shape_
,
stride_
)
{}
T
&
operator
[](
ptrdiff_t
ofs
)
{
return
*
reinterpret_cast
<
T
*>
(
const_cast
<
char
*>
(
cndarr
<
T
>::
d
+
ofs
));
}
};
template
<
size_t
N
,
typename
Ti
,
typename
To
>
class
multi_iter
template
<
size_t
N
>
class
multi_iter
{
private:
shape_t
pos
;
const
ndarr
<
Ti
>
&
iarr
;
ndarr
<
To
>
&
oarr
;
const
arr_info
&
iarr
,
&
oarr
;
ptrdiff_t
p_ii
,
p_i
[
N
],
str_i
,
p_oi
,
p_o
[
N
],
str_o
;
size_t
idim
,
rem
;
...
...
@@ -2257,7 +2265,7 @@ template<size_t N, typename Ti, typename To> class multi_iter
}
public:
multi_iter
(
const
nd
arr
<
Ti
>
&
iarr_
,
ndarr
<
To
>
&
oarr_
,
size_t
idim_
)
multi_iter
(
const
arr
_info
&
iarr_
,
const
arr_info
&
oarr_
,
size_t
idim_
)
:
pos
(
iarr_
.
ndim
(),
0
),
iarr
(
iarr_
),
oarr
(
oarr_
),
p_ii
(
0
),
str_i
(
iarr
.
stride
(
idim_
)),
p_oi
(
0
),
str_o
(
oarr
.
stride
(
idim_
)),
idim
(
idim_
),
rem
(
iarr
.
size
()
/
iarr
.
shape
(
idim
))
...
...
@@ -2297,30 +2305,27 @@ template<size_t N, typename Ti, typename To> class multi_iter
}
rem
-=
n
;
}
const
Ti
&
in
(
size_t
i
)
const
{
return
iarr
[
p_i
[
0
]
+
ptrdiff_t
(
i
)
*
str_i
]
;
}
const
Ti
&
in
(
size_t
j
,
size_t
i
)
const
{
return
iarr
[
p_i
[
j
]
+
ptrdiff_t
(
i
)
*
str_i
]
;
}
To
&
out
(
size_t
i
)
{
return
oarr
[
p_o
[
0
]
+
ptrdiff_t
(
i
)
*
str_o
]
;
}
To
&
out
(
size_t
j
,
size_t
i
)
{
return
oarr
[
p_o
[
j
]
+
ptrdiff_t
(
i
)
*
str_o
]
;
}
ptrdiff_t
iofs
(
size_t
i
)
const
{
return
p_i
[
0
]
+
ptrdiff_t
(
i
)
*
str_i
;
}
ptrdiff_t
iofs
(
size_t
j
,
size_t
i
)
const
{
return
p_i
[
j
]
+
ptrdiff_t
(
i
)
*
str_i
;
}
ptrdiff_t
oofs
(
size_t
i
)
const
{
return
p_o
[
0
]
+
ptrdiff_t
(
i
)
*
str_o
;
}
ptrdiff_t
oofs
(
size_t
j
,
size_t
i
)
const
{
return
p_o
[
j
]
+
ptrdiff_t
(
i
)
*
str_o
;
}
size_t
length_in
()
const
{
return
iarr
.
shape
(
idim
);
}
size_t
length_out
()
const
{
return
oarr
.
shape
(
idim
);
}
ptrdiff_t
stride_in
()
const
{
return
str_i
;
}
ptrdiff_t
stride_out
()
const
{
return
str_o
;
}
size_t
remaining
()
const
{
return
rem
;
}
bool
inplace
()
const
{
return
&
iarr
[
0
]
==&
oarr
[
0
];
}
bool
contiguous_in
()
const
{
return
str_i
==
sizeof
(
Ti
);
}
bool
contiguous_out
()
const
{
return
str_o
==
sizeof
(
To
);
}
};
template
<
typename
T
>
class
simple_iter
class
simple_iter
{
private:
shape_t
pos
;
const
nd
arr
<
T
>
&
arr
;
const
arr
_info
&
arr
;
ptrdiff_t
p
;
size_t
rem
;
public:
simple_iter
(
const
nd
arr
<
T
>
&
arr_
)
simple_iter
(
const
arr
_info
&
arr_
)
:
pos
(
arr_
.
ndim
(),
0
),
arr
(
arr_
),
p
(
0
),
rem
(
arr_
.
size
())
{}
void
advance
()
{
...
...
@@ -2339,11 +2344,11 @@ template<typename T> class simple_iter
size_t
remaining
()
const
{
return
rem
;
}
};
template
<
typename
T
>
class
rev_iter
class
rev_iter
{
private:
shape_t
pos
;
ndarr
<
T
>
&
arr
;
const
arr_info
&
arr
;
vector
<
char
>
rev_axis
;
vector
<
char
>
rev_jump
;
size_t
last_axis
,
last_size
;
...
...
@@ -2352,7 +2357,7 @@ template<typename T> class rev_iter
size_t
rem
;
public:
rev_iter
(
ndarr
<
T
>
&
arr_
,
const
shape_t
&
axes
)
rev_iter
(
const
arr_info
&
arr_
,
const
shape_t
&
axes
)
:
pos
(
arr_
.
ndim
(),
0
),
arr
(
arr_
),
rev_axis
(
arr_
.
ndim
(),
0
),
rev_jump
(
arr_
.
ndim
(),
1
),
p
(
0
),
rp
(
0
)
{
...
...
@@ -2447,7 +2452,7 @@ template<typename T> arr<char> alloc_tmp(const shape_t &shape,
#endif
template
<
typename
T
>
NOINLINE
void
general_c
(
const
ndarr
<
cmplx
<
T
>>
&
in
,
ndarr
<
cmplx
<
T
>>
&
out
,
const
c
ndarr
<
cmplx
<
T
>>
&
in
,
ndarr
<
cmplx
<
T
>>
&
out
,
const
shape_t
&
axes
,
bool
forward
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
{
unique_ptr
<
pocketfft_c
<
T
>>
plan
;
...
...
@@ -2464,7 +2469,8 @@ template<typename T> NOINLINE void general_c(
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
cmplx
<
T
>
));
multi_iter
<
vlen
,
cmplx
<
T
>
,
cmplx
<
T
>>
it
(
iax
==
0
?
in
:
out
,
out
,
axes
[
iax
]);
const
auto
&
tin
(
iax
==
0
?
in
:
out
);
multi_iter
<
vlen
>
it
(
tin
,
out
,
axes
[
iax
]);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2474,34 +2480,37 @@ template<typename T> NOINLINE void general_c(
auto
tdatav
=
reinterpret_cast
<
cmplx
<
vtype
>
*>
(
storage
.
data
());
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
tdatav
[
i
].
r
[
j
]
=
it
.
in
(
j
,
i
).
r
;
tdatav
[
i
].
i
[
j
]
=
it
.
in
(
j
,
i
).
i
;
}
{
tdatav
[
i
].
r
[
j
]
=
tin
[
it
.
iofs
(
j
,
i
)].
r
;
tdatav
[
i
].
i
[
j
]
=
tin
[
it
.
iofs
(
j
,
i
)].
i
;
}
forward
?
plan
->
forward
(
tdatav
,
fct
)
:
plan
->
backward
(
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
i
).
Set
(
tdatav
[
i
].
r
[
j
],
tdatav
[
i
].
i
[
j
]);
out
[
it
.
o
ofs
(
j
,
i
)
]
.
Set
(
tdatav
[
i
].
r
[
j
],
tdatav
[
i
].
i
[
j
]);
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
cmplx
<
T
>
*>
(
storage
.
data
());
if
(
it
.
inplace
(
)
&&
it
.
contiguous_out
(
))
// fully in-place
forward
?
plan
->
forward
(
(
&
it
.
o
ut
(
0
)
)
,
fct
)
:
plan
->
backward
(
(
&
it
.
o
ut
(
0
)
)
,
fct
);
else
if
(
it
.
contiguous_out
(
))
// compute FFT in output location
if
(
(
&
tin
[
0
]
==&
out
[
0
]
)
&&
(
it
.
stride_out
()
==
sizeof
(
cmplx
<
T
>
)
))
// fully in-place
forward
?
plan
->
forward
(
&
out
[
it
.
o
ofs
(
0
)
]
,
fct
)
:
plan
->
backward
(
&
out
[
it
.
o
ofs
(
0
)
]
,
fct
);
else
if
(
it
.
stride_out
()
==
sizeof
(
cmplx
<
T
>
))
// compute FFT in output location
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
it
.
o
ut
(
i
)
=
it
.
i
n
(
i
);
forward
?
plan
->
forward
(
(
&
it
.
o
ut
(
0
)
)
,
fct
)
:
plan
->
backward
(
(
&
it
.
o
ut
(
0
)
)
,
fct
);
out
[
it
.
o
ofs
(
i
)
]
=
tin
[
it
.
i
ofs
(
i
)
]
;
forward
?
plan
->
forward
(
&
out
[
it
.
o
ofs
(
0
)
]
,
fct
)
:
plan
->
backward
(
&
out
[
it
.
o
ofs
(
0
)
]
,
fct
);
}
else
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
it
.
i
n
(
i
);
tdata
[
i
]
=
tin
[
it
.
i
ofs
(
i
)
]
;
forward
?
plan
->
forward
(
tdata
,
fct
)
:
plan
->
backward
(
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
it
.
o
ut
(
i
)
=
tdata
[
i
];
out
[
it
.
o
ofs
(
i
)
]
=
tdata
[
i
];
}
}
}
// end of parallel region
...
...
@@ -2510,7 +2519,7 @@ template<typename T> NOINLINE void general_c(
}
template
<
typename
T
>
NOINLINE
void
general_hartley
(
const
ndarr
<
T
>
&
in
,
ndarr
<
T
>
&
out
,
const
shape_t
&
axes
,
T
fct
,
const
c
ndarr
<
T
>
&
in
,
ndarr
<
T
>
&
out
,
const
shape_t
&
axes
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
{
unique_ptr
<
pocketfft_r
<
T
>>
plan
;
...
...
@@ -2527,7 +2536,8 @@ template<typename T> NOINLINE void general_hartley(
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
,
T
,
T
>
it
(
iax
==
0
?
in
:
out
,
out
,
axes
[
iax
]);
const
auto
&
tin
(
iax
==
0
?
in
:
out
);
multi_iter
<
vlen
>
it
(
tin
,
out
,
axes
[
iax
]);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2537,20 +2547,20 @@ template<typename T> NOINLINE void general_hartley(
auto
tdatav
=
reinterpret_cast
<
vtype
*>
(
storage
.
data
());
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
it
.
i
n
(
j
,
i
);
tdatav
[
i
][
j
]
=
tin
[
it
.
i
ofs
(
j
,
i
)
]
;
plan
->
forward
(
tdatav
,
fct
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
0
)
=
tdatav
[
0
][
j
];
out
[
it
.
o
ofs
(
j
,
0
)
]
=
tdatav
[
0
][
j
];
size_t
i
=
1
,
i1
=
1
,
i2
=
len
-
1
;
for
(
i
=
1
;
i
<
len
-
1
;
i
+=
2
,
++
i1
,
--
i2
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
it
.
o
ut
(
j
,
i1
)
=
tdatav
[
i
][
j
]
+
tdatav
[
i
+
1
][
j
];
it
.
o
ut
(
j
,
i2
)
=
tdatav
[
i
][
j
]
-
tdatav
[
i
+
1
][
j
];
out
[
it
.
o
ofs
(
j
,
i1
)
]
=
tdatav
[
i
][
j
]
+
tdatav
[
i
+
1
][
j
];
out
[
it
.
o
ofs
(
j
,
i2
)
]
=
tdatav
[
i
][
j
]
-
tdatav
[
i
+
1
][
j
];
}
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
i1
)
=
tdatav
[
i
][
j
];
out
[
it
.
o
ofs
(
j
,
i1
)
]
=
tdatav
[
i
][
j
];
}
#endif
while
(
it
.
remaining
()
>
0
)
...
...
@@ -2558,15 +2568,18 @@ template<typename T> NOINLINE void general_hartley(
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
it
.
i
n
(
i
);
tdata
[
i
]
=
tin
[
it
.
i
ofs
(
i
)
]
;
plan
->
forward
(
tdata
,
fct
);
// Hartley order
it
.
o
ut
(
0
)
=
tdata
[
0
];
out
[
it
.
o
ofs
(
0
)
]
=
tdata
[
0
];
size_t
i
=
1
,
i1
=
1
,
i2
=
len
-
1
;
for
(
i
=
1
;
i
<
len
-
1
;
i
+=
2
,
++
i1
,
--
i2
)
{
it
.
out
(
i1
)
=
tdata
[
i
]
+
tdata
[
i
+
1
];
it
.
out
(
i2
)
=
tdata
[
i
]
-
tdata
[
i
+
1
];
}
{
out
[
it
.
oofs
(
i1
)]
=
tdata
[
i
]
+
tdata
[
i
+
1
];
out
[
it
.
oofs
(
i2
)]
=
tdata
[
i
]
-
tdata
[
i
+
1
];
}
if
(
i
<
len
)
it
.
o
ut
(
i1
)
=
tdata
[
i
];
out
[
it
.
o
ofs
(
i1
)
]
=
tdata
[
i
];
}
}
// end of parallel region
fct
=
T
(
1
);
// factor has been applied, use 1 for remaining axes
...
...
@@ -2574,7 +2587,7 @@ template<typename T> NOINLINE void general_hartley(
}
template
<
typename
T
>
NOINLINE
void
general_r2c
(
const
ndarr
<
T
>
&
in
,
ndarr
<
cmplx
<
T
>>
&
out
,
size_t
axis
,
T
fct
,
const
c
ndarr
<
T
>
&
in
,
ndarr
<
cmplx
<
T
>>
&
out
,
size_t
axis
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
{
pocketfft_r
<
T
>
plan
(
in
.
shape
(
axis
));
...
...
@@ -2585,7 +2598,7 @@ template<typename T> NOINLINE void general_r2c(
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
,
T
,
cmplx
<
T
>
>
it
(
in
,
out
,
axis
);
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2595,17 +2608,17 @@ template<typename T> NOINLINE void general_r2c(
auto
tdatav
=
reinterpret_cast
<
vtype
*>
(
storage
.
data
());
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
i
t
.
in
(
j
,
i
);
tdatav
[
i
][
j
]
=
i
n
[
it
.
iofs
(
j
,
i
)
]
;
plan
.
forward
(
tdatav
,
fct
);
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
0
).
Set
(
tdatav
[
0
][
j
]);
out
[
it
.
o
ofs
(
j
,
0
)
]
.
Set
(
tdatav
[
0
][
j
]);
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
ii
).
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
out
[
it
.
o
ofs
(
j
,
ii
)
]
.
Set
(
tdatav
[
i
][
j
],
tdatav
[
i
+
1
][
j
]);
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
ii
).
Set
(
tdatav
[
i
][
j
]);
out
[
it
.
o
ofs
(
j
,
ii
)
]
.
Set
(
tdatav
[
i
][
j
]);
}
#endif
while
(
it
.
remaining
()
>
0
)
...
...
@@ -2613,19 +2626,19 @@ template<typename T> NOINLINE void general_r2c(
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
i
t
.
in
(
i
);
tdata
[
i
]
=
i
n
[
it
.
iofs
(
i
)
]
;
plan
.
forward
(
tdata
,
fct
);
it
.
o
ut
(
0
).
Set
(
tdata
[
0
]);
out
[
it
.
o
ofs
(
0
)
]
.
Set
(
tdata
[
0
]);
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
it
.
o
ut
(
ii
).
Set
(
tdata
[
i
],
tdata
[
i
+
1
]);
out
[
it
.
o
ofs
(
ii
)
]
.
Set
(
tdata
[
i
],
tdata
[
i
+
1
]);
if
(
i
<
len
)
it
.
o
ut
(
ii
).
Set
(
tdata
[
i
]);
out
[
it
.
o
ofs
(
ii
)
]
.
Set
(
tdata
[
i
]);
}
}
// end of parallel region
}
template
<
typename
T
>
NOINLINE
void
general_c2r
(
const
ndarr
<
cmplx
<
T
>>
&
in
,
ndarr
<
T
>
&
out
,
size_t
axis
,
T
fct
,
const
c
ndarr
<
cmplx
<
T
>>
&
in
,
ndarr
<
T
>
&
out
,
size_t
axis
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
{
pocketfft_r
<
T
>
plan
(
out
.
shape
(
axis
));
...
...
@@ -2636,7 +2649,7 @@ template<typename T> NOINLINE void general_c2r(
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
out
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
,
cmplx
<
T
>
,
T
>
it
(
in
,
out
,
axis
);
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2645,43 +2658,49 @@ template<typename T> NOINLINE void general_c2r(
it
.
advance
(
vlen
);
auto
tdatav
=
reinterpret_cast
<
vtype
*>
(
storage
.
data
());
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
0
][
j
]
=
i
t
.
in
(
j
,
0
).
r
;
tdatav
[
0
][
j
]
=
i
n
[
it
.
iofs
(
j
,
0
)
]
.
r
;
{
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
{
tdatav
[
i
][
j
]
=
it
.
in
(
j
,
ii
).
r
;
tdatav
[
i
+
1
][
j
]
=
it
.
in
(
j
,
ii
).
i
;
}
{
tdatav
[
i
][
j
]
=
in
[
it
.
iofs
(
j
,
ii
)].
r
;
tdatav
[
i
+
1
][
j
]
=
in
[
it
.
iofs
(
j
,
ii
)].
i
;
}
if
(
i
<
len
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
i
t
.
in
(
j
,
ii
).
r
;
tdatav
[
i
][
j
]
=
i
n
[
it
.
iofs
(
j
,
ii
)
]
.
r
;
}
plan
.
backward
(
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
i
)
=
tdatav
[
i
][
j
];
out
[
it
.
o
ofs
(
j
,
i
)
]
=
tdatav
[
i
][
j
];
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
tdata
[
0
]
=
i
t
.
in
(
0
).
r
;
tdata
[
0
]
=
i
n
[
it
.
iofs
(
0
)
]
.
r
;
{
size_t
i
=
1
,
ii
=
1
;
for
(;
i
<
len
-
1
;
i
+=
2
,
++
ii
)
{
tdata
[
i
]
=
it
.
in
(
ii
).
r
;
tdata
[
i
+
1
]
=
it
.
in
(
ii
).
i
;
}
{
tdata
[
i
]
=
in
[
it
.
iofs
(
ii
)].
r
;
tdata
[
i
+
1
]
=
in
[
it
.
iofs
(
ii
)].
i
;
}
if
(
i
<
len
)
tdata
[
i
]
=
i
t
.
in
(
ii
).
r
;
tdata
[
i
]
=
i
n
[
it
.
iofs
(
ii
)
]
.
r
;
}
plan
.
backward
(
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
it
.
o
ut
(
i
)
=
tdata
[
i
];
out
[
it
.
o
ofs
(
i
)
]
=
tdata
[
i
];
}
}
// end of parallel region
}
template
<
typename
T
>
NOINLINE
void
general_r
(
const
ndarr
<
T
>
&
in
,
ndarr
<
T
>
&
out
,
size_t
axis
,
bool
forward
,
T
fct
,
const
c
ndarr
<
T
>
&
in
,
ndarr
<
T
>
&
out
,
size_t
axis
,
bool
forward
,
T
fct
,
size_t
POCKETFFT_NTHREADS
)
{
constexpr
auto
vlen
=
VLEN
<
T
>::
val
;
...
...
@@ -2692,7 +2711,7 @@ template<typename T> NOINLINE void general_r(
#endif
{
auto
storage
=
alloc_tmp
<
T
>
(
in
.
shape
(),
len
,
sizeof
(
T
));
multi_iter
<
vlen
,
T
,
T
>
it
(
in
,
out
,
axis
);
multi_iter
<
vlen
>
it
(
in
,
out
,
axis
);
#ifndef POCKETFFT_NO_VECTORS
if
(
vlen
>
1
)
while
(
it
.
remaining
()
>=
vlen
)
...
...
@@ -2702,34 +2721,35 @@ template<typename T> NOINLINE void general_r(
auto
tdatav
=
reinterpret_cast
<
vtype
*>
(
storage
.
data
());
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
tdatav
[
i
][
j
]
=
i
t
.
in
(
j
,
i
);
tdatav
[
i
][
j
]
=
i
n
[
it
.
iofs
(
j
,
i
)
]
;
forward
?
plan
.
forward
(
tdatav
,
fct
)
:
plan
.
backward
(
tdatav
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
for
(
size_t
j
=
0
;
j
<
vlen
;
++
j
)
it
.
o
ut
(
j
,
i
)
=
tdatav
[
i
][
j
];
out
[
it
.
o
ofs
(
j
,
i
)
]
=
tdatav
[
i
][
j
];
}
#endif
while
(
it
.
remaining
()
>
0
)
{
it
.
advance
(
1
);
auto
tdata
=
reinterpret_cast
<
T
*>
(
storage
.
data
());
if
(
it
.
inplace
(
)
&&
it
.
contiguous_out
(
))
// fully in-place
forward
?
plan
.
forward
(
&
it
.
o
ut
(
0
),
fct
)
:
plan
.
backward
(
&
it
.
o
ut
(
0
),
fct
);
else
if
(
it
.
contiguous_out
(
))
// compute FFT in output location
if
(
(
&
in
[
0
]
==&
out
[
0
]
)
&&
(
it
.
stride_out
()
==
sizeof
(
T
)
))
// fully in-place
forward
?
plan
.
forward
(
&
out
[
it
.
o
ofs
(
0
)
]
,
fct
)
:
plan
.
backward
(
&
out
[
it
.
o
ofs
(
0
)
]
,
fct
);
else
if
(
it
.
stride_out
()
==
sizeof
(
T
))
// compute FFT in output location
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
it
.
out
(
i
)
=
it
.
in
(
i
);
forward
?
plan
.
forward
(
&
it
.
out
(
0
),
fct
)
:
plan
.
backward
(
&
it
.
out
(
0
),
fct
);
out
[
it
.
oofs
(
i
)]
=
in
[
it
.
iofs
(
i
)];
forward
?
plan
.
forward
(
&
out
[
it
.
oofs
(
0
)],
fct
)
:
plan
.
backward
(
&
out
[
it
.
oofs
(
0
)],
fct
);
}
else
{
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
tdata
[
i
]
=
i
t
.
in
(
i
);
tdata
[
i
]
=
i
n
[
it
.
iofs
(
i
)
]
;
forward
?
plan
.
forward
(
tdata
,
fct
)
:
plan
.
backward
(
tdata
,
fct
);
for
(
size_t
i
=
0
;
i
<
len
;
++
i
)
it
.
o
ut
(
i
)
=
tdata
[
i
];
out
[
it
.
o
ofs
(
i
)
]
=
tdata
[
i
];
}
}
}
// end of parallel region
...
...
@@ -2744,8 +2764,8 @@ template<typename T> void c2c(const shape_t &shape, const stride_t &stride_in,
{
if
(
util
::
prod
(
shape
)
==
0
)
return
;
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
data_in
==
data_out
,
axes
);
ndarr
<
cmplx
<
T
>>
ain
(
data_in
,
shape
,
stride_in
)
,
aout
(
data_out
,
shape
,
stride_out
);
c
ndarr
<
cmplx
<
T
>>
ain
(
data_in
,
shape
,
stride_in
)
;
ndarr
<
cmplx
<
T
>>
aout
(
data_out
,
shape
,
stride_out
);
general_c
(
ain
,
aout
,
axes
,
forward
,
fct
,
nthreads
);
}
...
...
@@ -2755,7 +2775,7 @@ template<typename T> void r2c(const shape_t &shape_in,
{
if
(
util
::
prod
(
shape_in
)
==
0
)
return
;
util
::
sanity_check
(
shape_in
,
stride_in
,
stride_out
,
false
,
axis
);
ndarr
<
T
>
ain
(
data_in
,
shape_in
,
stride_in
);
c
ndarr
<
T
>
ain
(
data_in
,
shape_in
,
stride_in
);
shape_t
shape_out
(
shape_in
);
shape_out
[
axis
]
=
shape_in
[
axis
]
/
2
+
1
;
ndarr
<
cmplx
<
T
>>
aout
(
data_out
,
shape_out
,
stride_out
);
...
...
@@ -2787,7 +2807,7 @@ template<typename T> void c2r(const shape_t &shape_out,
util
::
sanity_check
(
shape_out
,
stride_in
,
stride_out
,
false
,
axis
);
shape_t
shape_in
(
shape_out
);
shape_in
[
axis
]
=
shape_out
[
axis
]
/
2
+
1
;
ndarr
<
cmplx
<
T
>>
ain
(
data_in
,
shape_in
,
stride_in
);
c
ndarr
<
cmplx
<
T
>>
ain
(
data_in
,
shape_in
,
stride_in
);
ndarr
<
T
>
aout
(
data_out
,
shape_out
,
stride_out
);
general_c2r
(
ain
,
aout
,
axis
,
fct
,
nthreads
);
}
...
...
@@ -2822,7 +2842,8 @@ template<typename T> void r2r_fftpack(const shape_t &shape,
{
if
(
util
::
prod
(
shape
)
==
0
)
return
;
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
data_in
==
data_out
,
axis
);
ndarr
<
T
>
ain
(
data_in
,
shape
,
stride_in
),
aout
(
data_out
,
shape
,
stride_out
);
cndarr
<
T
>
ain
(
data_in
,
shape
,
stride_in
);
ndarr
<
T
>
aout
(
data_out
,
shape
,
stride_out
);
general_r
(
ain
,
aout
,
axis
,
forward
,
fct
,
nthreads
);
}
...
...
@@ -2832,7 +2853,8 @@ template<typename T> void r2r_hartley(const shape_t &shape,
{
if
(
util
::
prod
(
shape
)
==
0
)
return
;
util
::
sanity_check
(
shape
,
stride_in
,
stride_out
,
data_in
==
data_out
,
axes
);
ndarr
<
T
>
ain
(
data_in
,
shape
,
stride_in
),
aout
(
data_out
,
shape
,
stride_out
);
cndarr
<
T
>
ain
(
data_in
,
shape
,
stride_in
);
ndarr
<
T
>
aout
(
data_out
,
shape
,
stride_out
);
general_hartley
(
ain
,
aout
,
axes
,
fct
,
nthreads
);
}
...
...
pypocketfft.cc
View file @
87832dd2
...
...
@@ -138,11 +138,11 @@ template<typename T> py::array sym_rfftn_internal(const py::array &in,
// now fill in second half
using
namespace
pocketfft
::
detail
;
ndarr
<
complex
<
T
>>
ares
(
res
.
mutable_data
(),
dims
,
s_out
);
rev_iter
<
complex
<
T
>>
iter
(
ares
,
axes
);
rev_iter
iter
(
ares
,
axes
);
while
(
iter
.
remaining
()
>
0
)
{
auto
v
=
ares
.
get
(
iter
.
ofs
()
)
;
ares
.
set
(
iter
.
rev_ofs
()
,
conj
(
v
)
)
;
auto
v
=
ares
[
iter
.
ofs
()
]
;
ares
[
iter
.
rev_ofs
()
]
=
conj
(
v
);
iter
.
advance
();
}
}
...
...
@@ -282,19 +282,19 @@ template<typename T>py::array complex2hartley(const py::array &in,
using
namespace
pocketfft
::
detail
;
auto
dims_out
(
copy_shape
(
in
));
py
::
array
out
=
inplace
?
in
:
py
::
array_t
<
T
>
(
dims_out
);
ndarr
<
cmplx
<
T
>>
atmp
(
tmp
.
data
(),
copy_shape
(
tmp
),
copy_strides
(
tmp
));
c
ndarr
<
cmplx
<
T
>>
atmp
(
tmp
.
data
(),
copy_shape
(
tmp
),
copy_strides
(
tmp
));
ndarr
<
T
>
aout
(
out
.
mutable_data
(),
copy_shape
(
out
),
copy_strides
(
out
));
auto
axes
=
makeaxes
(
in
,
axes_
);
{
py
::
gil_scoped_release
release
;
simple_iter
<
cmplx
<
T
>>
iin
(
atmp
);
rev_iter
<
T
>
iout
(
aout
,
axes
);
simple_iter
iin
(
atmp
);
rev_iter
iout
(
aout
,
axes
);
if
(
iin
.
remaining
()
!=
iout
.
remaining
())
throw
runtime_error
(
"oops"
);
while
(
iin
.
remaining
()
>
0
)
{
auto
v
=
atmp
.
get
(
iin
.
ofs
()
)
;
aout
.
set
(
iout
.
ofs
()
,
v
.
r
+
v
.
i
)
;
aout
.
set
(
iout
.
rev_ofs
()
,
v
.
r
-
v
.
i
)
;
auto
v
=
atmp
[
iin
.
ofs
()
]
;
aout
[
iout
.
ofs
()
]
=
v
.
r
+
v
.
i
;
aout
[
iout
.
rev_ofs
()
]
=
v
.
r
-
v
.
i
;
iin
.
advance
();
iout
.
advance
();
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment