Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Simon Perkins
ducc
Commits
9e9294d4
Commit
9e9294d4
authored
Mar 11, 2020
by
Martin Reinecke
Browse files
allow multi-threaded adjoint interpolation
parent
92e73fd9
Changes
2
Hide whitespace changes
Inline
Side-by-side
interpol_ng/demo.py
View file @
9e9294d4
...
...
@@ -91,7 +91,7 @@ ptg=np.random.uniform(0.,1.,3*1000000).reshape(1000000,3)
ptg
[:,
0
]
*=
np
.
pi
ptg
[:,
1
]
*=
2
*
np
.
pi
ptg
[:,
2
]
*=
2
*
np
.
pi
foo
=
interpol_ng
.
PyInterpolator
(
slmT
,
blmT
,
lmax
,
kmax
,
epsilon
=
1e-6
,
nthreads
=
1
)
foo
=
interpol_ng
.
PyInterpolator
(
slmT
,
blmT
,
lmax
,
kmax
,
epsilon
=
1e-6
,
nthreads
=
2
)
bar
=
foo
.
interpol
(
ptg
)
fake
=
np
.
random
.
uniform
(
0.
,
1.
,
ptg
.
shape
[
0
])
foo2
=
interpol_ng
.
PyInterpolator
(
lmax
,
kmax
,
epsilon
=
1e-6
,
nthreads
=
2
)
...
...
interpol_ng/interpol_ng.cc
View file @
9e9294d4
...
...
@@ -283,8 +283,15 @@ template<typename T> class Interpolator
double
xdtheta
=
(
ntheta
-
1
)
/
pi
,
xdphi
=
nphi
/
(
2
*
pi
);
auto
idx
=
getIdx
(
ptg
);
execStatic
(
idx
.
size
(),
1
,
0
,
[
&
](
Scheduler
&
sched
)
// not parallel yet
constexpr
size_t
cellsize
=
16
;
size_t
nct
=
ntheta
/
cellsize
+
5
,
ncp
=
nphi
/
cellsize
+
5
;
mav
<
std
::
mutex
,
2
>
locks
({
nct
,
ncp
});
execStatic
(
idx
.
size
(),
nthreads
,
0
,
[
&
](
Scheduler
&
sched
)
{
size_t
b_theta
=
99999999999999
,
b_phi
=
9999999999999999
;
vector
<
T
>
wt
(
supp
),
wp
(
supp
);
vector
<
T
>
psiarr
(
2
*
kmax
+
1
);
while
(
auto
rng
=
sched
.
getNext
())
for
(
auto
ind
=
rng
.
lo
;
ind
<
rng
.
hi
;
++
ind
)
...
...
@@ -311,11 +318,36 @@ template<typename T> class Interpolator
cnpsi
=
cnpsi
*
cpsi
-
snpsi
*
spsi
;
snpsi
=
tmp
;
}
size_t
b_theta_new
=
i0
/
cellsize
,
b_phi_new
=
i1
/
cellsize
;
if
((
b_theta_new
!=
b_theta
)
||
(
b_phi_new
!=
b_phi
))
{
if
(
b_theta
<
locks
.
shape
(
0
))
// unlock
{
locks
.
v
(
b_theta
,
b_phi
).
unlock
();
locks
.
v
(
b_theta
,
b_phi
+
1
).
unlock
();
locks
.
v
(
b_theta
+
1
,
b_phi
).
unlock
();
locks
.
v
(
b_theta
+
1
,
b_phi
+
1
).
unlock
();
}
b_theta
=
b_theta_new
;
b_phi
=
b_phi_new
;
locks
.
v
(
b_theta
,
b_phi
).
lock
();
locks
.
v
(
b_theta
,
b_phi
+
1
).
lock
();
locks
.
v
(
b_theta
+
1
,
b_phi
).
lock
();
locks
.
v
(
b_theta
+
1
,
b_phi
+
1
).
lock
();
}
for
(
size_t
j
=
0
;
j
<
supp
;
++
j
)
for
(
size_t
k
=
0
;
k
<
supp
;
++
k
)
for
(
size_t
l
=
0
;
l
<
2
*
kmax
+
1
;
++
l
)
cube
.
v
(
i0
+
j
,
i1
+
k
,
l
)
+=
val
*
wt
[
j
]
*
wp
[
k
]
*
psiarr
[
l
];
}
if
(
b_theta
<
locks
.
shape
(
0
))
// unlock
{
locks
.
v
(
b_theta
,
b_phi
).
unlock
();
locks
.
v
(
b_theta
,
b_phi
+
1
).
unlock
();
locks
.
v
(
b_theta
+
1
,
b_phi
).
unlock
();
locks
.
v
(
b_theta
+
1
,
b_phi
+
1
).
unlock
();
}
});
}
void
getSlmx
(
const
Alm
<
complex
<
T
>>
&
blmT
,
Alm
<
complex
<
T
>>
&
slmT
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment