Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Simon Perkins
ducc
Commits
54db3189
Commit
54db3189
authored
Apr 24, 2020
by
Martin Reinecke
Browse files
temporary
parent
f53dd11d
Changes
5
Show whitespace changes
Inline
Side-by-side
pyinterpol_ng/demo.py
View file @
54db3189
...
...
@@ -39,14 +39,17 @@ def convolve(alm1, alm2, lmax):
return
job
.
map2alm
(
map
)[
0
]
*
np
.
sqrt
(
4
*
np
.
pi
)
lmax
=
60
kmax
=
13
lmax
=
2048
kmax
=
8
ncomp
=
1
separate
=
True
separate
=
False
nptg
=
10000000
ncomp2
=
ncomp
if
separate
else
1
epsilon
=
1e-4
ofactor
=
2
nthreads
=
0
ofactor
=
1.5
nthreads
=
0
# use as many threads as available
ncomp2
=
ncomp
if
separate
else
1
# get random sky a_lm
# the a_lm arrays follow the same conventions as those in healpy
...
...
@@ -68,38 +71,39 @@ nph = 2*lmax+1
# compute a convolved map at a fixed psi and compare it to a map convolved
# "by hand"
ptg
=
np
.
zeros
((
nth
,
nph
,
3
))
ptg
[:,:,
0
]
=
(
np
.
pi
*
(
0.5
+
np
.
arange
(
nth
))
/
nth
).
reshape
((
-
1
,
1
))
ptg
[:,:,
1
]
=
(
2
*
np
.
pi
*
(
0.5
+
np
.
arange
(
nph
))
/
nph
).
reshape
((
1
,
-
1
))
ptg
[:,:,
2
]
=
np
.
pi
*
0.2
t0
=
time
.
time
()
# do the actual interpolation
bar
=
foo
.
interpol
(
ptg
.
reshape
((
-
1
,
3
))).
reshape
((
nth
,
nph
,
ncomp2
))
print
(
"interpolation time: "
,
time
.
time
()
-
t0
)
plt
.
subplot
(
2
,
2
,
1
)
plt
.
imshow
(
bar
[:,:,
0
])
bar2
=
np
.
zeros
((
nth
,
nph
))
blmfull
=
np
.
zeros
(
slm
.
shape
)
+
0j
blmfull
[
0
:
blm
.
shape
[
0
],:]
=
blm
for
ith
in
range
(
nth
):
rbeamth
=
pyinterpol_ng
.
rotate_alm
(
blmfull
[:,
0
],
lmax
,
ptg
[
ith
,
0
,
2
],
ptg
[
ith
,
0
,
0
],
0
)
for
iph
in
range
(
nph
):
rbeam
=
pyinterpol_ng
.
rotate_alm
(
rbeamth
,
lmax
,
0
,
0
,
ptg
[
ith
,
iph
,
1
])
bar2
[
ith
,
iph
]
=
convolve
(
slm
[:,
0
],
rbeam
,
lmax
).
real
plt
.
subplot
(
2
,
2
,
2
)
plt
.
imshow
(
bar2
)
plt
.
subplot
(
2
,
2
,
3
)
plt
.
imshow
(
bar2
-
bar
[:,:,
0
])
plt
.
show
()
ptg
=
np
.
random
.
uniform
(
0.
,
1.
,
3
*
1000000
).
reshape
(
1000000
,
3
)
#
ptg = np.zeros((nth,nph,3))
#
ptg[:,:,0] = (np.pi*(0.5+np.arange(nth))/nth).reshape((-1,1))
#
ptg[:,:,1] = (2*np.pi*(0.5+np.arange(nph))/nph).reshape((1,-1))
#
ptg[:,:,2] = np.pi*0.2
#
t0=time.time()
#
# do the actual interpolation
#
bar=foo.interpol(ptg.reshape((-1,3))).reshape((nth,nph,ncomp2))
#
print("interpolation time: ", time.time()-t0)
#
plt.subplot(2,2,1)
#
plt.imshow(bar[:,:,0])
#
bar2 = np.zeros((nth,nph))
#
blmfull = np.zeros(slm.shape)+0j
#
blmfull[0:blm.shape[0],:] = blm
#
for ith in range(nth):
#
rbeamth=pyinterpol_ng.rotate_alm(blmfull[:,0], lmax, ptg[ith,0,2],ptg[ith,0,0],0)
#
for iph in range(nph):
#
rbeam=pyinterpol_ng.rotate_alm(rbeamth, lmax, 0, 0, ptg[ith,iph,1])
#
bar2[ith,iph] = convolve(slm[:,0], rbeam, lmax).real
#
plt.subplot(2,2,2)
#
plt.imshow(bar2)
#
plt.subplot(2,2,3)
#
plt.imshow(bar2-bar[:,:,0])
#
plt.show()
ptg
=
np
.
random
.
uniform
(
0.
,
1.
,
3
*
nptg
).
reshape
(
nptg
,
3
)
ptg
[:,
0
]
*=
np
.
pi
ptg
[:,
1
]
*=
2
*
np
.
pi
ptg
[:,
2
]
*=
2
*
np
.
pi
#foo = pyinterpol_ng.PyInterpolator(slm,blm,separate,lmax, kmax, epsilon=1e-6, nthreads=2)
t0
=
time
.
time
()
bar
=
foo
.
interpol
(
ptg
)
del
foo
print
(
"interpolation time: "
,
time
.
time
()
-
t0
)
fake
=
np
.
random
.
uniform
(
0.
,
1.
,
(
ptg
.
shape
[
0
],
ncomp2
))
foo2
=
pyinterpol_ng
.
PyInterpolator
(
lmax
,
kmax
,
ncomp2
,
epsilon
=
epsilon
,
ofactor
=
ofactor
,
nthreads
=
nthreads
)
...
...
pyinterpol_ng/interpol_ng.h
View file @
54db3189
...
...
@@ -22,6 +22,97 @@
namespace
mr
{
#if 0
namespace detail_fft {
using std::vector;
template<typename T, typename T0> aligned_array<T> alloc_tmp_conv
(const fmav_info &info, size_t axis, size_t len)
{
auto othersize = info.size()/info.shape(axis);
constexpr auto vlen = native_simd<T0>::size();
auto tmpsize = len*((othersize>=vlen) ? vlen : 1);
return aligned_array<T>(tmpsize);
}
template<typename Tplan, typename T, typename T0, typename Exec>
MRUTIL_NOINLINE void general_convolve(const fmav<T> &in, fmav<T> &out,
const size_t axis, const vector<T0> &kernel, size_t nthreads,
const Exec &exec, const bool allow_inplace=true)
{
std::shared_ptr<Tplan> plan1, plan2;
size_t l_in=in.shape(axis), l_out=out.shape(axis);
size_t l_min=std::min(l_in, l_out), l_max=std::max(l_in, l_out);
MR_assert(kernel.size()==l_min/2+1, "bad kernel size");
plan1 = get_plan<Tplan>(l_in);
plan2 = get_plan<Tplan>(l_out);
execParallel(
util::thread_count(nthreads, in, axis, native_simd<T0>::size()),
[&](Scheduler &sched) {
constexpr auto vlen = native_simd<T0>::size();
auto storage = alloc_tmp_conv<T,T0>(in, axis, l_max); //FIXME!
multi_iter<vlen> it(in, out, axis, sched.num_threads(), sched.thread_num());
#ifndef MRUTIL_NO_SIMD
if (vlen>1)
while (it.remaining()>=vlen)
{
it.advance(vlen);
auto tdatav = reinterpret_cast<add_vec_t<T> *>(storage.data());
exec(it, in, out, tdatav, *plan1, *plan2, kernel);
}
#endif
while (it.remaining()>0)
{
it.advance(1);
auto buf = allow_inplace && it.stride_out() == 1 ?
&out.vraw(it.oofs(0)) : reinterpret_cast<T *>(storage.data());
exec(it, in, out, buf, *plan1, *plan2, kernel);
}
}); // end of parallel region
}
struct ExecConvR1
{
template <typename T0, typename T, size_t vlen> void operator() (
const multi_iter<vlen> &it, const fmav<T0> &in, fmav<T0> &out,
T * buf, const pocketfft_r<T0> &plan1, const pocketfft_r<T0> &plan2,
const vector<T0> &kernel) const
{
size_t l_in = plan1.length(),
l_out = plan2.length(),
l_min = std::min(l_in, l_out);
copy_input(it, in, buf);
plan1.exec(buf, T0(1), true);
buf[0] *= kernel[0];
for (size_t i=1; i<l_min; ++i)
{ buf[2*i-1]*=kernel[i]; buf[2*i] *=kernel[i]; }
for (size_t i=l_in; i<l_out; ++i) buf[i] = T(0);
plan2.exec(buf, T0(1), false);
copy_output(it, buf, out);
}
};
template<typename T> void convolve_1d(const fmav<T> &in,
fmav<T> &out, size_t axis, const vector<T> &kernel, size_t nthreads=1)
{
// util::sanity_check_onetype(in, out, in.data()==out.data(), axes);
MR_assert(axis<in.ndim(), "bad axis number");
MR_assert(in.ndim()==out.ndim(), "dimensionality mismatch");
if (in.data()==out.data())
MR_assert(in.strides()==out.strides(), "strides mismatch");
for (size_t i=0; i<in.ndim(); ++i)
if (i!=axis)
MR_assert(in.shape(i)==out.shape(i), "shape mismatch");
if (in.size()==0) return;
general_convolve<pocketfft_r<T>>(in, out, axis, kernel, nthreads,
ExecConvR1());
}
}
#endif
namespace
detail_interpol_ng
{
using
namespace
std
;
...
...
@@ -49,6 +140,7 @@ template<typename T> class Interpolator
for
(
size_t
j
=
0
;
j
<
nphi0
;
++
j
)
tmp0
.
v
(
i
,
j
)
=
arr
(
i
,
j
);
// extend to second half
// FIXME: merge with loop above to avoid edundant memory reads.
for
(
size_t
i
=
1
,
i2
=
nphi0
-
1
;
i
+
1
<
ntheta0
;
++
i
,
--
i2
)
for
(
size_t
j
=
0
,
j2
=
nphi0
/
2
;
j
<
nphi0
;
++
j
,
++
j2
)
{
...
...
@@ -56,7 +148,9 @@ template<typename T> class Interpolator
tmp0
.
v
(
i2
,
j
)
=
sfct
*
tmp0
(
i
,
j2
);
}
// FFT to frequency domain on minimal grid
// one bad FFT axis
r2r_fftpack
(
ftmp0
,
ftmp0
,{
0
,
1
},
true
,
true
,
T
(
1.
/
(
nphi0
*
nphi0
)),
nthreads
);
// correct amplitude at Nyquist frequency
for
(
size_t
i
=
0
;
i
<
nphi0
;
++
i
)
{
...
...
@@ -70,7 +164,9 @@ template<typename T> class Interpolator
auto
tmp1
=
tmp
.
template
subarray
<
2
>({
0
,
0
},{
nphi
,
nphi0
});
fmav
<
T
>
ftmp1
(
tmp1
);
// zero-padded FFT in theta direction
// one bad FFT axis
r2r_fftpack
(
ftmp1
,
ftmp1
,{
0
},
false
,
false
,
T
(
1
),
nthreads
);
auto
tmp2
=
tmp
.
template
subarray
<
2
>({
0
,
0
},{
ntheta
,
nphi
});
fmav
<
T
>
ftmp2
(
tmp2
);
fmav
<
T
>
farr
(
arr
);
...
...
pyinterpol_ng/pyinterpol_ng.cc
View file @
54db3189
...
...
@@ -242,16 +242,28 @@ PYBIND11_MODULE(pyinterpol_ng, m)
m
.
doc
()
=
pyinterpol_ng_DS
;
py
::
class_
<
PyInterpolator
<
fptype
>>
(
m
,
"PyInterpolator"
,
pyinterpolator_DS
)
using
inter_d
=
PyInterpolator
<
double
>
;
py
::
class_
<
inter_d
>
(
m
,
"PyInterpolator"
,
pyinterpolator_DS
)
.
def
(
py
::
init
<
const
py
::
array
&
,
const
py
::
array
&
,
bool
,
int64_t
,
int64_t
,
fptype
,
fptype
,
int
>
(),
initnormal_DS
,
"sky"
_a
,
"beam"
_a
,
"separate"
_a
,
"lmax"
_a
,
"kmax"
_a
,
"epsilon"
_a
,
"ofactor"
_a
=
fptype
(
1.5
),
"nthreads"
_a
=
0
)
.
def
(
py
::
init
<
int64_t
,
int64_t
,
int64_t
,
fptype
,
fptype
,
int
>
(),
initadjoint_DS
,
"lmax"
_a
,
"kmax"
_a
,
"ncomp"
_a
,
"epsilon"
_a
,
"ofactor"
_a
=
fptype
(
1.5
),
"nthreads"
_a
=
0
)
.
def
(
"interpol"
,
&
PyInterpolator
<
fptype
>::
pyinterpol
,
interpol_DS
,
"ptg"
_a
)
.
def
(
"deinterpol"
,
&
PyInterpolator
<
fptype
>::
pydeinterpol
,
deinterpol_DS
,
"ptg"
_a
,
"data"
_a
)
.
def
(
"getSlm"
,
&
PyInterpolator
<
fptype
>::
pygetSlm
,
getSlm_DS
,
"beam"
_a
)
.
def
(
"support"
,
&
PyInterpolator
<
fptype
>::
support
);
.
def
(
"interpol"
,
&
inter_d
::
pyinterpol
,
interpol_DS
,
"ptg"
_a
)
.
def
(
"deinterpol"
,
&
inter_d
::
pydeinterpol
,
deinterpol_DS
,
"ptg"
_a
,
"data"
_a
)
.
def
(
"getSlm"
,
&
inter_d
::
pygetSlm
,
getSlm_DS
,
"beam"
_a
)
.
def
(
"support"
,
&
inter_d
::
support
);
// using inter_f = PyInterpolator<float>;
// py::class_<inter_f> (m, "PyInterpolator_f", pyinterpolator_DS)
// .def(py::init<const py::array &, const py::array &, bool, int64_t, int64_t, fptype, fptype, int>(),
// initnormal_DS, "sky"_a, "beam"_a, "separate"_a, "lmax"_a, "kmax"_a, "epsilon"_a, "ofactor"_a=fptype(1.5),
// "nthreads"_a=0)
// .def(py::init<int64_t, int64_t, int64_t, fptype, fptype, int>(), initadjoint_DS,
// "lmax"_a, "kmax"_a, "ncomp"_a, "epsilon"_a, "ofactor"_a=fptype(1.5), "nthreads"_a=0)
// .def ("interpol", &inter_f::pyinterpol, interpol_DS, "ptg"_a)
// .def ("deinterpol", &inter_f::pydeinterpol, deinterpol_DS, "ptg"_a, "data"_a)
// .def ("getSlm", &inter_f::pygetSlm, getSlm_DS, "beam"_a)
// .def ("support", &inter_f::support);
#if 1
m
.
def
(
"rotate_alm"
,
&
pyrotate_alm
<
fptype
>
,
"alm"
_a
,
"lmax"
_a
,
"psi"
_a
,
"theta"
_a
,
"phi"
_a
);
...
...
pypocketfft/demos/stress.py
View file @
54db3189
...
...
@@ -2,8 +2,15 @@ import numpy as np
import
pypocketfft
#def _l2error(a, b, axes):
# return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))/np.log2(np.max([2,np.prod(np.take(a.shape,axes))]))
def
_l2error
(
a
,
b
,
axes
):
return
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
/
np
.
log2
(
np
.
max
([
2
,
np
.
prod
(
np
.
take
(
a
.
shape
,
axes
))]))
x1
=
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
/
np
.
log2
(
np
.
max
([
2
,
np
.
prod
(
np
.
take
(
a
.
shape
,
axes
))]))
a
=
a
*
np
.
array
([
1.
])
b
=
b
*
np
.
array
([
1.
])
x2
=
np
.
sqrt
(
np
.
sum
(
np
.
abs
(
a
-
b
)
**
2
)
/
np
.
sum
(
np
.
abs
(
a
)
**
2
))
/
np
.
log2
(
np
.
max
([
2
,
np
.
prod
(
np
.
take
(
a
.
shape
,
axes
))]))
print
(
x1
,
x2
,
x1
-
x2
)
return
x2
def
fftn
(
a
,
axes
=
None
,
inorm
=
0
,
out
=
None
,
nthreads
=
1
):
...
...
src/mr_util/math/fft.h
View file @
54db3189
...
...
@@ -604,21 +604,21 @@ template<typename T, typename T0> aligned_array<T> alloc_tmp
auto
tmpsize
=
axsize
*
((
othersize
>=
vlen
)
?
vlen
:
1
);
return
aligned_array
<
T
>
(
tmpsize
);
}
template
<
typename
T
,
typename
T0
>
aligned_array
<
T
>
alloc_tmp
(
const
fmav_info
&
info
,
const
shape_t
&
axes
)
{
size_t
fullsize
=
info
.
size
();
size_t
tmpsize
=
0
;
for
(
size_t
i
=
0
;
i
<
axes
.
size
();
++
i
)
{
auto
axsize
=
info
.
shape
(
axes
[
i
]);
auto
othersize
=
fullsize
/
axsize
;
constexpr
auto
vlen
=
native_simd
<
T0
>::
size
();
auto
sz
=
axsize
*
((
othersize
>=
vlen
)
?
vlen
:
1
);
if
(
sz
>
tmpsize
)
tmpsize
=
sz
;
}
return
aligned_array
<
T
>
(
tmpsize
);
}
//
template<typename T, typename T0> aligned_array<T> alloc_tmp
//
(const fmav_info &info, const shape_t &axes)
//
{
//
size_t fullsize=info.size();
//
size_t tmpsize=0;
//
for (size_t i=0; i<axes.size(); ++i)
//
{
//
auto axsize = info.shape(axes[i]);
//
auto othersize = fullsize/axsize;
//
constexpr auto vlen = native_simd<T0>::size();
//
auto sz = axsize*((othersize>=vlen) ? vlen : 1);
//
if (sz>tmpsize) tmpsize=sz;
//
}
//
return aligned_array<T>(tmpsize);
//
}
template
<
typename
T
,
size_t
vlen
>
void
copy_input
(
const
multi_iter
<
vlen
>
&
it
,
const
fmav
<
Cmplx
<
T
>>
&
src
,
Cmplx
<
native_simd
<
T
>>
*
MRUTIL_RESTRICT
dst
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment