Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Reinecke
ducc
Commits
57745eea
Commit
57745eea
authored
Jun 25, 2020
by
Martin Reinecke
Browse files
significant speedup
parent
73736903
Pipeline
#77274
passed with stages
in 13 minutes and 1 second
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
python/totalconvolve.h
View file @
57745eea
...
...
@@ -434,7 +434,14 @@ template<typename T> class Interpolator
auto
idx
=
getIdx
(
ptg
);
execStatic
(
idx
.
size
(),
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
vector
<
T
>
wt
(
supp
),
wp
(
supp
);
union
{
native_simd
<
T
>
simd
[
64
/
vl
];
T
scalar
[
64
];
}
kdata
;
T
*
wt
(
kdata
.
scalar
),
*
wp
(
kdata
.
scalar
+
supp
);
size_t
nvec
=
(
2
*
supp
+
vl
-
1
)
/
vl
;
for
(
size_t
i
=
0
;
i
<
nvec
;
++
i
)
kdata
.
simd
[
i
]
=
0
;
vector
<
T
>
psiarr
(
2
*
kmax
+
1
);
#ifdef SIMD_INTERPOL
vector
<
native_simd
<
T
>>
psiarr2
((
2
*
kmax
+
1
+
vl
-
1
)
/
vl
);
...
...
@@ -446,11 +453,13 @@ template<typename T> class Interpolator
T
f0
=
T
(
0.5
*
supp
+
ptg
(
i
,
0
)
*
xdtheta
);
size_t
i0
=
size_t
(
f0
+
T
(
1
));
for
(
size_t
t
=
0
;
t
<
supp
;
++
t
)
wt
[
t
]
=
kernel
(
(
t
+
i0
-
f0
)
*
delta
-
1
)
;
wt
[
t
]
=
(
t
+
i0
-
f0
)
*
delta
-
1
;
T
f1
=
T
(
0.5
)
*
supp
+
ptg
(
i
,
1
)
*
xdphi
;
size_t
i1
=
size_t
(
f1
+
1.
);
for
(
size_t
t
=
0
;
t
<
supp
;
++
t
)
wp
[
t
]
=
kernel
((
t
+
i1
-
f1
)
*
delta
-
1
);
wp
[
t
]
=
(
t
+
i1
-
f1
)
*
delta
-
1
;
for
(
size_t
t
=
0
;
t
<
nvec
;
++
t
)
kdata
.
simd
[
t
]
=
kernel
(
kdata
.
simd
[
t
]);
psiarr
[
0
]
=
1.
;
double
psi
=
ptg
(
i
,
2
);
double
cpsi
=
cos
(
psi
),
spsi
=
sin
(
psi
);
...
...
@@ -484,25 +493,25 @@ template<typename T> class Interpolator
{
#ifdef SPECIAL_CASING
case
1
:
interpol_help0
<
1
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
1
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
2
:
interpol_help0
<
2
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
2
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
3
:
interpol_help0
<
3
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
3
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
4
:
interpol_help0
<
4
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
4
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
5
:
interpol_help0
<
5
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
5
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
6
:
interpol_help0
<
6
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
6
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
7
:
interpol_help0
<
7
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
7
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
#endif
default:
...
...
@@ -549,25 +558,25 @@ template<typename T> class Interpolator
{
#ifdef SPECIAL_CASING
case
1
:
interpol_help0
<
1
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
1
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
2
:
interpol_help0
<
2
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
2
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
3
:
interpol_help0
<
3
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
3
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
4
:
interpol_help0
<
4
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
4
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
5
:
interpol_help0
<
5
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
5
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
6
:
interpol_help0
<
6
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
6
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
case
7
:
interpol_help0
<
7
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
interpol_help0
<
7
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
res
,
i
);
break
;
#endif
default:
...
...
@@ -629,7 +638,14 @@ template<typename T> class Interpolator
execStatic
(
idx
.
size
(),
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
size_t
b_theta
=
99999999999999
,
b_phi
=
9999999999999999
;
vector
<
T
>
wt
(
supp
),
wp
(
supp
);
union
{
native_simd
<
T
>
simd
[
64
/
vl
];
T
scalar
[
64
];
}
kdata
;
T
*
wt
(
kdata
.
scalar
),
*
wp
(
kdata
.
scalar
+
supp
);
size_t
nvec
=
(
2
*
supp
+
vl
-
1
)
/
vl
;
for
(
size_t
i
=
0
;
i
<
nvec
;
++
i
)
kdata
.
simd
[
i
]
=
0
;
vector
<
T
>
psiarr
(
2
*
kmax
+
1
);
#ifdef SIMD_INTERPOL
vector
<
native_simd
<
T
>>
psiarr2
((
2
*
kmax
+
1
+
vl
-
1
)
/
vl
);
...
...
@@ -641,11 +657,13 @@ template<typename T> class Interpolator
T
f0
=
T
(
0.5
)
*
supp
+
ptg
(
i
,
0
)
*
xdtheta
;
size_t
i0
=
size_t
(
f0
+
1.
);
for
(
size_t
t
=
0
;
t
<
supp
;
++
t
)
wt
[
t
]
=
kernel
(
(
t
+
i0
-
f0
)
*
delta
-
1
)
;
wt
[
t
]
=
(
t
+
i0
-
f0
)
*
delta
-
1
;
T
f1
=
T
(
0.5
)
*
supp
+
ptg
(
i
,
1
)
*
xdphi
;
size_t
i1
=
size_t
(
f1
+
1.
);
for
(
size_t
t
=
0
;
t
<
supp
;
++
t
)
wp
[
t
]
=
kernel
((
t
+
i1
-
f1
)
*
delta
-
1
);
wp
[
t
]
=
(
t
+
i1
-
f1
)
*
delta
-
1
;
for
(
size_t
t
=
0
;
t
<
nvec
;
++
t
)
kdata
.
simd
[
t
]
=
kernel
(
kdata
.
simd
[
t
]);
psiarr
[
0
]
=
1.
;
double
psi
=
ptg
(
i
,
2
);
double
cpsi
=
cos
(
psi
),
spsi
=
sin
(
psi
);
...
...
@@ -696,25 +714,25 @@ template<typename T> class Interpolator
{
#ifdef SPECIAL_CASING
case
1
:
deinterpol_help0
<
1
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
1
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
2
:
deinterpol_help0
<
2
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
2
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
3
:
deinterpol_help0
<
3
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
3
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
4
:
deinterpol_help0
<
4
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
4
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
5
:
deinterpol_help0
<
5
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
5
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
6
:
deinterpol_help0
<
6
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
6
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
7
:
deinterpol_help0
<
7
,
1
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
7
,
1
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
#endif
default:
...
...
@@ -759,25 +777,25 @@ template<typename T> class Interpolator
{
#ifdef SPECIAL_CASING
case
1
:
deinterpol_help0
<
1
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
1
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
2
:
deinterpol_help0
<
2
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
2
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
3
:
deinterpol_help0
<
3
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
3
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
4
:
deinterpol_help0
<
4
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
4
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
5
:
deinterpol_help0
<
5
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
5
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
6
:
deinterpol_help0
<
6
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
6
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
case
7
:
deinterpol_help0
<
7
,
3
>
(
wt
.
data
(),
wp
.
data
()
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
deinterpol_help0
<
7
,
3
>
(
wt
,
wp
,
p
,
d0
,
d1
,
psiarr2
.
data
(),
data
,
i
);
break
;
#endif
default:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment