Commit 9e9294d4 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

allow multi-threaded adjoint interpolation

parent 92e73fd9
Pipeline #70635 passed with stages
in 12 minutes and 42 seconds
......@@ -91,7 +91,7 @@ ptg=np.random.uniform(0.,1.,3*1000000).reshape(1000000,3)
ptg[:,0]*=np.pi
ptg[:,1]*=2*np.pi
ptg[:,2]*=2*np.pi
foo = interpol_ng.PyInterpolator(slmT,blmT,lmax, kmax, epsilon=1e-6, nthreads=1)
foo = interpol_ng.PyInterpolator(slmT,blmT,lmax, kmax, epsilon=1e-6, nthreads=2)
bar=foo.interpol(ptg)
fake = np.random.uniform(0.,1., ptg.shape[0])
foo2 = interpol_ng.PyInterpolator(lmax, kmax, epsilon=1e-6, nthreads=2)
......
......@@ -283,8 +283,15 @@ template<typename T> class Interpolator
double xdtheta = (ntheta-1)/pi,
xdphi = nphi/(2*pi);
auto idx = getIdx(ptg);
execStatic(idx.size(), 1, 0, [&](Scheduler &sched) // not parallel yet
constexpr size_t cellsize=16;
size_t nct = ntheta/cellsize+5,
ncp = nphi/cellsize+5;
mav<std::mutex,2> locks({nct,ncp});
execStatic(idx.size(), nthreads, 0, [&](Scheduler &sched)
{
size_t b_theta=99999999999999, b_phi=9999999999999999;
vector<T> wt(supp), wp(supp);
vector<T> psiarr(2*kmax+1);
while (auto rng=sched.getNext()) for(auto ind=rng.lo; ind<rng.hi; ++ind)
......@@ -311,11 +318,36 @@ template<typename T> class Interpolator
cnpsi=cnpsi*cpsi - snpsi*spsi;
snpsi=tmp;
}
size_t b_theta_new = i0/cellsize,
b_phi_new = i1/cellsize;
if ((b_theta_new!=b_theta) || (b_phi_new!=b_phi))
{
if (b_theta<locks.shape(0)) // unlock
{
locks.v(b_theta,b_phi).unlock();
locks.v(b_theta,b_phi+1).unlock();
locks.v(b_theta+1,b_phi).unlock();
locks.v(b_theta+1,b_phi+1).unlock();
}
b_theta = b_theta_new;
b_phi = b_phi_new;
locks.v(b_theta,b_phi).lock();
locks.v(b_theta,b_phi+1).lock();
locks.v(b_theta+1,b_phi).lock();
locks.v(b_theta+1,b_phi+1).lock();
}
for (size_t j=0; j<supp; ++j)
for (size_t k=0; k<supp; ++k)
for (size_t l=0; l<2*kmax+1; ++l)
cube.v(i0+j,i1+k,l) += val*wt[j]*wp[k]*psiarr[l];
}
if (b_theta<locks.shape(0)) // unlock
{
locks.v(b_theta,b_phi).unlock();
locks.v(b_theta,b_phi+1).unlock();
locks.v(b_theta+1,b_phi).unlock();
locks.v(b_theta+1,b_phi+1).unlock();
}
});
}
void getSlmx (const Alm<complex<T>> &blmT, Alm<complex<T>> &slmT)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment