Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pypocketfft
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
4
Issues
4
List
Boards
Labels
Service Desk
Milestones
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Martin Reinecke
pypocketfft
Commits
7e4b3513
Commit
7e4b3513
authored
May 02, 2019
by
Martin Reinecke
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
smaller memory consumption during twiddle generation
parent
7023b907
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
66 additions
and
63 deletions
+66
-63
pocketfft.cc
pocketfft.cc
+66
-63
No files found.
pocketfft.cc
View file @
7e4b3513
...
...
@@ -72,14 +72,58 @@ template<typename T> struct arr
size_t
size
()
const
{
return
sz
;
}
};
template
<
typename
T
>
struct
cmplx
{
T
r
,
i
;
cmplx
()
{}
cmplx
(
T
r_
,
T
i_
)
:
r
(
r_
),
i
(
i_
)
{}
void
Set
(
T
r_
,
T
i_
)
{
r
=
r_
;
i
=
i_
;
}
void
Set
(
T
r_
)
{
r
=
r_
;
i
=
T
(
0
);
}
cmplx
&
operator
+=
(
const
cmplx
&
other
)
{
r
+=
other
.
r
;
i
+=
other
.
i
;
return
*
this
;
}
template
<
typename
T2
>
cmplx
&
operator
*=
(
T2
other
)
{
r
*=
other
;
i
*=
other
;
return
*
this
;
}
cmplx
operator
+
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
+
other
.
r
,
i
+
other
.
i
);
}
cmplx
operator
-
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
-
other
.
r
,
i
-
other
.
i
);
}
template
<
typename
T2
>
auto
operator
*
(
const
T2
&
other
)
const
->
cmplx
<
decltype
(
r
*
other
)
>
{
return
{
r
*
other
,
i
*
other
};
}
template
<
typename
T2
>
auto
operator
*
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
}
template
<
bool
bwd
,
typename
T2
>
auto
special_mul
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
if
(
bwd
)
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
else
return
{
r
*
other
.
r
+
i
*
other
.
i
,
i
*
other
.
r
-
r
*
other
.
i
};
}
};
template
<
typename
T
>
void
PMC
(
cmplx
<
T
>
&
a
,
cmplx
<
T
>
&
b
,
const
cmplx
<
T
>
&
c
,
const
cmplx
<
T
>
&
d
)
{
a
=
c
+
d
;
b
=
c
-
d
;
}
template
<
typename
T
>
cmplx
<
T
>
conj
(
const
cmplx
<
T
>
&
a
)
{
return
{
a
.
r
,
-
a
.
i
};
}
template
<
typename
T
>
void
ROT90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=
a
.
r
;
a
.
r
=-
a
.
i
;
a
.
i
=
tmp_
;
}
template
<
typename
T
>
void
ROTM90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=-
a
.
r
;
a
.
r
=
a
.
i
;
a
.
i
=
tmp_
;
}
//
// twiddle factor section
//
class
sincos_2pibyn
template
<
typename
T
>
class
sincos_2pibyn
{
private:
arr
<
double
>
data
;
arr
<
T
>
data
;
// adapted from https://stackoverflow.com/questions/42792939/
// CAUTION: this function only works for arguments in the range
...
...
@@ -110,15 +154,20 @@ class sincos_2pibyn
res
[
1
]
=
s
;
}
NOINLINE
void
calc_first_octant
(
size_t
den
,
double
*
restrict
res
)
NOINLINE
void
calc_first_octant
(
size_t
den
,
T
*
restrict
res
)
{
size_t
n
=
(
den
+
4
)
>>
3
;
if
(
n
==
0
)
return
;
res
[
0
]
=
1.
;
res
[
1
]
=
0.
;
if
(
n
==
1
)
return
;
size_t
l1
=
(
size_t
)
sqrt
(
n
);
arr
<
double
>
tmp
(
2
*
l1
);
for
(
size_t
i
=
1
;
i
<
l1
;
++
i
)
my_sincosm1pi
((
2.
*
i
)
/
den
,
&
res
[
2
*
i
]);
{
my_sincosm1pi
((
2.
*
i
)
/
den
,
&
tmp
[
2
*
i
]);
res
[
2
*
i
]
=
tmp
[
2
*
i
]
+
1.
;
res
[
2
*
i
+
1
]
=
tmp
[
2
*
i
+
1
];
}
size_t
start
=
l1
;
while
(
start
<
n
)
{
...
...
@@ -130,19 +179,17 @@ class sincos_2pibyn
if
(
start
+
end
>
n
)
end
=
n
-
start
;
for
(
size_t
i
=
1
;
i
<
end
;
++
i
)
{
double
csx
[
2
]
=
{
res
[
2
*
i
],
res
[
2
*
i
+
1
]};
double
csx
[
2
]
=
{
tmp
[
2
*
i
],
tmp
[
2
*
i
+
1
]};
res
[
2
*
(
start
+
i
)]
=
((
cs
[
0
]
*
csx
[
0
]
-
cs
[
1
]
*
csx
[
1
]
+
cs
[
0
])
+
csx
[
0
])
+
1.
;
res
[
2
*
(
start
+
i
)
+
1
]
=
(
cs
[
0
]
*
csx
[
1
]
+
cs
[
1
]
*
csx
[
0
])
+
cs
[
1
]
+
csx
[
1
];
}
start
+=
l1
;
}
for
(
size_t
i
=
1
;
i
<
l1
;
++
i
)
res
[
2
*
i
]
+=
1.
;
}
NOINLINE
void
calc_first_quadrant
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
calc_first_quadrant
(
size_t
n
,
T
*
restrict
res
)
{
double
*
restrict
p
=
res
+
n
;
T
*
restrict
p
=
res
+
n
;
calc_first_octant
(
n
<<
1
,
p
);
size_t
ndone
=
(
n
+
2
)
>>
2
;
size_t
i
=
0
,
idx1
=
0
,
idx2
=
2
*
ndone
-
2
;
...
...
@@ -160,10 +207,10 @@ class sincos_2pibyn
}
}
NOINLINE
void
calc_first_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
calc_first_half
(
size_t
n
,
T
*
restrict
res
)
{
int
ndone
=
(
n
+
1
)
>>
1
;
double
*
p
=
res
+
n
-
1
;
T
*
p
=
res
+
n
-
1
;
calc_first_octant
(
n
<<
2
,
p
);
int
i4
=
0
,
in
=
n
,
i
=
0
;
for
(;
i4
<=
in
-
i4
;
++
i
,
i4
+=
4
)
// octant 0
...
...
@@ -187,7 +234,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
fill_first_quadrant
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
fill_first_quadrant
(
size_t
n
,
T
*
restrict
res
)
{
const
double
hsqt2
=
0.707106781186547524400844362104849
;
size_t
quart
=
n
>>
2
;
...
...
@@ -200,7 +247,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
fill_first_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
fill_first_half
(
size_t
n
,
T
*
restrict
res
)
{
size_t
half
=
n
>>
1
;
if
((
n
&
3
)
==
0
)
...
...
@@ -217,7 +264,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
fill_second_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
fill_second_half
(
size_t
n
,
T
*
restrict
res
)
{
if
((
n
&
1
)
==
0
)
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
...
...
@@ -230,7 +277,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
sincos_2pibyn_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
sincos_2pibyn_half
(
size_t
n
,
T
*
restrict
res
)
{
if
((
n
&
3
)
==
0
)
{
...
...
@@ -255,7 +302,7 @@ class sincos_2pibyn
if
(
!
half
)
fill_second_half
(
n
,
data
.
data
());
}
double
operator
[](
size_t
idx
)
const
{
return
data
[
idx
];
}
T
operator
[](
size_t
idx
)
const
{
return
data
[
idx
];
}
};
NOINLINE
size_t
largest_prime_factor
(
size_t
n
)
...
...
@@ -313,50 +360,6 @@ NOINLINE size_t good_size(size_t n)
return
bestfac
;
}
template
<
typename
T
>
struct
cmplx
{
T
r
,
i
;
cmplx
()
{}
cmplx
(
T
r_
,
T
i_
)
:
r
(
r_
),
i
(
i_
)
{}
void
Set
(
T
r_
,
T
i_
)
{
r
=
r_
;
i
=
i_
;
}
void
Set
(
T
r_
)
{
r
=
r_
;
i
=
T
(
0
);
}
cmplx
&
operator
+=
(
const
cmplx
&
other
)
{
r
+=
other
.
r
;
i
+=
other
.
i
;
return
*
this
;
}
template
<
typename
T2
>
cmplx
&
operator
*=
(
T2
other
)
{
r
*=
other
;
i
*=
other
;
return
*
this
;
}
cmplx
operator
+
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
+
other
.
r
,
i
+
other
.
i
);
}
cmplx
operator
-
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
-
other
.
r
,
i
-
other
.
i
);
}
template
<
typename
T2
>
auto
operator
*
(
const
T2
&
other
)
const
->
cmplx
<
decltype
(
r
*
other
)
>
{
return
{
r
*
other
,
i
*
other
};
}
template
<
typename
T2
>
auto
operator
*
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
}
template
<
bool
bwd
,
typename
T2
>
auto
special_mul
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
if
(
bwd
)
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
else
return
{
r
*
other
.
r
+
i
*
other
.
i
,
i
*
other
.
r
-
r
*
other
.
i
};
}
};
template
<
typename
T
>
void
PMC
(
cmplx
<
T
>
&
a
,
cmplx
<
T
>
&
b
,
const
cmplx
<
T
>
&
c
,
const
cmplx
<
T
>
&
d
)
{
a
=
c
+
d
;
b
=
c
-
d
;
}
template
<
typename
T
>
cmplx
<
T
>
conj
(
const
cmplx
<
T
>
&
a
)
{
return
{
a
.
r
,
-
a
.
i
};
}
template
<
typename
T
>
void
ROT90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=
a
.
r
;
a
.
r
=-
a
.
i
;
a
.
i
=
tmp_
;
}
template
<
typename
T
>
void
ROTM90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=-
a
.
r
;
a
.
r
=
a
.
i
;
a
.
i
=
tmp_
;
}
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define WA(x,i) wa[(i)-1+(x)*(ido-1)]
...
...
@@ -910,7 +913,7 @@ template<bool bwd, typename T> NOINLINE void pass_all(T c[], T0 fact)
NOINLINE
void
comp_twiddle
()
{
sincos_2pibyn
twid
(
length
,
false
);
sincos_2pibyn
<
T0
>
twid
(
length
,
false
);
size_t
l1
=
1
;
size_t
memofs
=
0
;
for
(
size_t
k
=
0
;
k
<
nfct
;
++
k
)
...
...
@@ -1705,7 +1708,7 @@ size_t twsize() const
NOINLINE
void
comp_twiddle
()
{
sincos_2pibyn
twid
(
length
,
true
);
sincos_2pibyn
<
T0
>
twid
(
length
,
true
);
size_t
l1
=
1
;
T0
*
ptr
=
mem
.
data
();
for
(
size_t
k
=
0
;
k
<
nfct
;
++
k
)
...
...
@@ -1807,7 +1810,7 @@ template<typename T0> class fftblue
bk
(
mem
.
data
()),
bkf
(
mem
.
data
()
+
2
*
n
)
{
/* initialize b_k */
sincos_2pibyn
tmp
(
2
*
n
,
false
);
sincos_2pibyn
<
T0
>
tmp
(
2
*
n
,
false
);
bk
[
0
]
=
1
;
bk
[
1
]
=
0
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment