Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
pypocketfft
Commits
7e4b3513
Commit
7e4b3513
authored
May 02, 2019
by
Martin Reinecke
Browse files
smaller memory consumption during twiddle generation
parent
7023b907
Changes
1
Hide whitespace changes
Inline
Side-by-side
pocketfft.cc
View file @
7e4b3513
...
...
@@ -72,14 +72,58 @@ template<typename T> struct arr
size_t
size
()
const
{
return
sz
;
}
};
template
<
typename
T
>
struct
cmplx
{
T
r
,
i
;
cmplx
()
{}
cmplx
(
T
r_
,
T
i_
)
:
r
(
r_
),
i
(
i_
)
{}
void
Set
(
T
r_
,
T
i_
)
{
r
=
r_
;
i
=
i_
;
}
void
Set
(
T
r_
)
{
r
=
r_
;
i
=
T
(
0
);
}
cmplx
&
operator
+=
(
const
cmplx
&
other
)
{
r
+=
other
.
r
;
i
+=
other
.
i
;
return
*
this
;
}
template
<
typename
T2
>
cmplx
&
operator
*=
(
T2
other
)
{
r
*=
other
;
i
*=
other
;
return
*
this
;
}
cmplx
operator
+
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
+
other
.
r
,
i
+
other
.
i
);
}
cmplx
operator
-
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
-
other
.
r
,
i
-
other
.
i
);
}
template
<
typename
T2
>
auto
operator
*
(
const
T2
&
other
)
const
->
cmplx
<
decltype
(
r
*
other
)
>
{
return
{
r
*
other
,
i
*
other
};
}
template
<
typename
T2
>
auto
operator
*
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
}
template
<
bool
bwd
,
typename
T2
>
auto
special_mul
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
if
(
bwd
)
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
else
return
{
r
*
other
.
r
+
i
*
other
.
i
,
i
*
other
.
r
-
r
*
other
.
i
};
}
};
template
<
typename
T
>
void
PMC
(
cmplx
<
T
>
&
a
,
cmplx
<
T
>
&
b
,
const
cmplx
<
T
>
&
c
,
const
cmplx
<
T
>
&
d
)
{
a
=
c
+
d
;
b
=
c
-
d
;
}
template
<
typename
T
>
cmplx
<
T
>
conj
(
const
cmplx
<
T
>
&
a
)
{
return
{
a
.
r
,
-
a
.
i
};
}
template
<
typename
T
>
void
ROT90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=
a
.
r
;
a
.
r
=-
a
.
i
;
a
.
i
=
tmp_
;
}
template
<
typename
T
>
void
ROTM90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=-
a
.
r
;
a
.
r
=
a
.
i
;
a
.
i
=
tmp_
;
}
//
// twiddle factor section
//
class
sincos_2pibyn
template
<
typename
T
>
class
sincos_2pibyn
{
private:
arr
<
double
>
data
;
arr
<
T
>
data
;
// adapted from https://stackoverflow.com/questions/42792939/
// CAUTION: this function only works for arguments in the range
...
...
@@ -110,15 +154,20 @@ class sincos_2pibyn
res
[
1
]
=
s
;
}
NOINLINE
void
calc_first_octant
(
size_t
den
,
double
*
restrict
res
)
NOINLINE
void
calc_first_octant
(
size_t
den
,
T
*
restrict
res
)
{
size_t
n
=
(
den
+
4
)
>>
3
;
if
(
n
==
0
)
return
;
res
[
0
]
=
1.
;
res
[
1
]
=
0.
;
if
(
n
==
1
)
return
;
size_t
l1
=
(
size_t
)
sqrt
(
n
);
arr
<
double
>
tmp
(
2
*
l1
);
for
(
size_t
i
=
1
;
i
<
l1
;
++
i
)
my_sincosm1pi
((
2.
*
i
)
/
den
,
&
res
[
2
*
i
]);
{
my_sincosm1pi
((
2.
*
i
)
/
den
,
&
tmp
[
2
*
i
]);
res
[
2
*
i
]
=
tmp
[
2
*
i
]
+
1.
;
res
[
2
*
i
+
1
]
=
tmp
[
2
*
i
+
1
];
}
size_t
start
=
l1
;
while
(
start
<
n
)
{
...
...
@@ -130,19 +179,17 @@ class sincos_2pibyn
if
(
start
+
end
>
n
)
end
=
n
-
start
;
for
(
size_t
i
=
1
;
i
<
end
;
++
i
)
{
double
csx
[
2
]
=
{
res
[
2
*
i
],
res
[
2
*
i
+
1
]};
double
csx
[
2
]
=
{
tmp
[
2
*
i
],
tmp
[
2
*
i
+
1
]};
res
[
2
*
(
start
+
i
)]
=
((
cs
[
0
]
*
csx
[
0
]
-
cs
[
1
]
*
csx
[
1
]
+
cs
[
0
])
+
csx
[
0
])
+
1.
;
res
[
2
*
(
start
+
i
)
+
1
]
=
(
cs
[
0
]
*
csx
[
1
]
+
cs
[
1
]
*
csx
[
0
])
+
cs
[
1
]
+
csx
[
1
];
}
start
+=
l1
;
}
for
(
size_t
i
=
1
;
i
<
l1
;
++
i
)
res
[
2
*
i
]
+=
1.
;
}
NOINLINE
void
calc_first_quadrant
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
calc_first_quadrant
(
size_t
n
,
T
*
restrict
res
)
{
double
*
restrict
p
=
res
+
n
;
T
*
restrict
p
=
res
+
n
;
calc_first_octant
(
n
<<
1
,
p
);
size_t
ndone
=
(
n
+
2
)
>>
2
;
size_t
i
=
0
,
idx1
=
0
,
idx2
=
2
*
ndone
-
2
;
...
...
@@ -160,10 +207,10 @@ class sincos_2pibyn
}
}
NOINLINE
void
calc_first_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
calc_first_half
(
size_t
n
,
T
*
restrict
res
)
{
int
ndone
=
(
n
+
1
)
>>
1
;
double
*
p
=
res
+
n
-
1
;
T
*
p
=
res
+
n
-
1
;
calc_first_octant
(
n
<<
2
,
p
);
int
i4
=
0
,
in
=
n
,
i
=
0
;
for
(;
i4
<=
in
-
i4
;
++
i
,
i4
+=
4
)
// octant 0
...
...
@@ -187,7 +234,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
fill_first_quadrant
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
fill_first_quadrant
(
size_t
n
,
T
*
restrict
res
)
{
const
double
hsqt2
=
0.707106781186547524400844362104849
;
size_t
quart
=
n
>>
2
;
...
...
@@ -200,7 +247,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
fill_first_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
fill_first_half
(
size_t
n
,
T
*
restrict
res
)
{
size_t
half
=
n
>>
1
;
if
((
n
&
3
)
==
0
)
...
...
@@ -217,7 +264,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
fill_second_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
fill_second_half
(
size_t
n
,
T
*
restrict
res
)
{
if
((
n
&
1
)
==
0
)
for
(
size_t
i
=
0
;
i
<
n
;
++
i
)
...
...
@@ -230,7 +277,7 @@ class sincos_2pibyn
}
}
NOINLINE
void
sincos_2pibyn_half
(
size_t
n
,
double
*
restrict
res
)
NOINLINE
void
sincos_2pibyn_half
(
size_t
n
,
T
*
restrict
res
)
{
if
((
n
&
3
)
==
0
)
{
...
...
@@ -255,7 +302,7 @@ class sincos_2pibyn
if
(
!
half
)
fill_second_half
(
n
,
data
.
data
());
}
double
operator
[](
size_t
idx
)
const
{
return
data
[
idx
];
}
T
operator
[](
size_t
idx
)
const
{
return
data
[
idx
];
}
};
NOINLINE
size_t
largest_prime_factor
(
size_t
n
)
...
...
@@ -313,50 +360,6 @@ NOINLINE size_t good_size(size_t n)
return
bestfac
;
}
template
<
typename
T
>
struct
cmplx
{
T
r
,
i
;
cmplx
()
{}
cmplx
(
T
r_
,
T
i_
)
:
r
(
r_
),
i
(
i_
)
{}
void
Set
(
T
r_
,
T
i_
)
{
r
=
r_
;
i
=
i_
;
}
void
Set
(
T
r_
)
{
r
=
r_
;
i
=
T
(
0
);
}
cmplx
&
operator
+=
(
const
cmplx
&
other
)
{
r
+=
other
.
r
;
i
+=
other
.
i
;
return
*
this
;
}
template
<
typename
T2
>
cmplx
&
operator
*=
(
T2
other
)
{
r
*=
other
;
i
*=
other
;
return
*
this
;
}
cmplx
operator
+
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
+
other
.
r
,
i
+
other
.
i
);
}
cmplx
operator
-
(
const
cmplx
&
other
)
const
{
return
cmplx
(
r
-
other
.
r
,
i
-
other
.
i
);
}
template
<
typename
T2
>
auto
operator
*
(
const
T2
&
other
)
const
->
cmplx
<
decltype
(
r
*
other
)
>
{
return
{
r
*
other
,
i
*
other
};
}
template
<
typename
T2
>
auto
operator
*
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
}
template
<
bool
bwd
,
typename
T2
>
auto
special_mul
(
const
cmplx
<
T2
>
&
other
)
const
->
cmplx
<
decltype
(
r
+
other
.
r
)
>
{
if
(
bwd
)
return
{
r
*
other
.
r
-
i
*
other
.
i
,
r
*
other
.
i
+
i
*
other
.
r
};
else
return
{
r
*
other
.
r
+
i
*
other
.
i
,
i
*
other
.
r
-
r
*
other
.
i
};
}
};
template
<
typename
T
>
void
PMC
(
cmplx
<
T
>
&
a
,
cmplx
<
T
>
&
b
,
const
cmplx
<
T
>
&
c
,
const
cmplx
<
T
>
&
d
)
{
a
=
c
+
d
;
b
=
c
-
d
;
}
template
<
typename
T
>
cmplx
<
T
>
conj
(
const
cmplx
<
T
>
&
a
)
{
return
{
a
.
r
,
-
a
.
i
};
}
template
<
typename
T
>
void
ROT90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=
a
.
r
;
a
.
r
=-
a
.
i
;
a
.
i
=
tmp_
;
}
template
<
typename
T
>
void
ROTM90
(
cmplx
<
T
>
&
a
)
{
auto
tmp_
=-
a
.
r
;
a
.
r
=
a
.
i
;
a
.
i
=
tmp_
;
}
#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
#define WA(x,i) wa[(i)-1+(x)*(ido-1)]
...
...
@@ -910,7 +913,7 @@ template<bool bwd, typename T> NOINLINE void pass_all(T c[], T0 fact)
NOINLINE
void
comp_twiddle
()
{
sincos_2pibyn
twid
(
length
,
false
);
sincos_2pibyn
<
T0
>
twid
(
length
,
false
);
size_t
l1
=
1
;
size_t
memofs
=
0
;
for
(
size_t
k
=
0
;
k
<
nfct
;
++
k
)
...
...
@@ -1705,7 +1708,7 @@ size_t twsize() const
NOINLINE
void
comp_twiddle
()
{
sincos_2pibyn
twid
(
length
,
true
);
sincos_2pibyn
<
T0
>
twid
(
length
,
true
);
size_t
l1
=
1
;
T0
*
ptr
=
mem
.
data
();
for
(
size_t
k
=
0
;
k
<
nfct
;
++
k
)
...
...
@@ -1807,7 +1810,7 @@ template<typename T0> class fftblue
bk
(
mem
.
data
()),
bkf
(
mem
.
data
()
+
2
*
n
)
{
/* initialize b_k */
sincos_2pibyn
tmp
(
2
*
n
,
false
);
sincos_2pibyn
<
T0
>
tmp
(
2
*
n
,
false
);
bk
[
0
]
=
1
;
bk
[
1
]
=
0
;
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment