Scheduled maintenance on Monday 2019-06-24 between 10:00-11:00 CEST

Commit ec1cb325 authored by Martin Reinecke's avatar Martin Reinecke

drop GIL when possible

parent f62af027
......@@ -191,6 +191,8 @@ template<typename T> pyarr_c<T> complex2hartley
auto res = makeArray<T>({nu,nv});
auto grid2 = res.mutable_data();
{
py::gil_scoped_release release;
#pragma omp parallel for
for (size_t u=0; u<nu; ++u)
{
......@@ -204,6 +206,7 @@ template<typename T> pyarr_c<T> complex2hartley
grid[i2].real()-grid[i2].imag());
}
}
}
return res;
}
......@@ -216,6 +219,8 @@ template<typename T> pyarr_c<complex<T>> hartley2complex
auto res=makeArray<complex<T>>({nu, nv});
auto grid2 = res.mutable_data();
{
py::gil_scoped_release release;
#pragma omp parallel for
for (size_t u=0; u<nu; ++u)
{
......@@ -230,17 +235,20 @@ template<typename T> pyarr_c<complex<T>> hartley2complex
grid2[i1] = complex<T>(v1+v2, v1-v2);
}
}
}
return res;
}
template<typename T> void hartley2_2D(const pyarr_c<T> &in, pyarr_c<T> &out)
{
size_t nu=in.shape(0), nv=in.shape(1);
pocketfft::r2r_hartley({nu, nv},
{in.strides(0), in.strides(1)},
{out.strides(0), out.strides(1)}, {0,1},
in.data(), out.mutable_data(), T(1), 0);
pocketfft::stride_t s_i{in.strides(0), in.strides(1)},
s_o{out.strides(0), out.strides(1)};
auto d_i = in.data();
auto ptmp = out.mutable_data();
{
py::gil_scoped_release release;
pocketfft::r2r_hartley({nu, nv}, s_i, s_o, {0,1}, d_i, ptmp, T(1), 0);
#pragma omp parallel for
for(size_t i=1; i<(nu+1)/2; ++i)
for(size_t j=1; j<(nv+1)/2; ++j)
......@@ -255,6 +263,7 @@ template<typename T> void hartley2_2D(const pyarr_c<T> &in, pyarr_c<T> &out)
ptmp[(nu-i)*nv+nv-j] = T(0.5)*(b+c+d-a);
}
}
}
/* Compute correction factors for the ES gridding kernel
This implementation follows eqs. (3.8) to (3.10) of Barnett et al. 2018 */
......@@ -305,14 +314,18 @@ template<typename T> class Baselines
checkArray(freq_, "freq", {0});
nrows = coord_.shape(0);
nchan = freq_.shape(0);
myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS");
auto freq = freq_.data();
auto cood = coord_.data();
{
py::gil_scoped_release release;
f_over_c.resize(nchan);
for (size_t i=0; i<nchan; ++i)
f_over_c[i] = freq_.data()[i]/speedOfLight;
f_over_c[i] = freq[i]/speedOfLight;
coord.resize(nrows);
auto cood = coord_.data();
for (size_t i=0; i<coord.size(); ++i)
coord[i] = UVW<T>(cood[3*i], cood[3*i+1], cood[3*i+2]);
myassert(nrows*nchan<(size_t(1)<<32), "too many entries in MS");
}
}
UVW<T> effectiveCoord(uint32_t index) const
......@@ -337,9 +350,12 @@ template<typename T> class Baselines
auto res=makeArray<T2>({nvis});
auto vis = res.mutable_data();
{
py::gil_scoped_release release;
#pragma omp parallel for
for (size_t i=0; i<nvis; ++i)
vis[i] = ms[idx[i]];
}
return res;
}
......@@ -354,9 +370,12 @@ template<typename T> class Baselines
auto res = provideArray<T2>(ms_in, {nrows, nchan});
auto ms = res.mutable_data();
{
py::gil_scoped_release release;
#pragma omp parallel for
for (size_t i=0; i<nvis; ++i)
ms[idx[i]] = vis[i];
}
return res;
}
};
......@@ -382,6 +401,8 @@ template<typename T> class GridderConfig
beta(2.3*w),
cfu(nx_dirty), cfv(ny_dirty)
{
{
py::gil_scoped_release release;
myassert((nx_dirty&1)==0, "nx_dirty must be even");
myassert((ny_dirty&1)==0, "ny_dirty must be even");
myassert(epsilon>0, "epsilon must be positive");
......@@ -399,6 +420,7 @@ template<typename T> class GridderConfig
for (size_t i=1; i<ny_dirty/2; ++i)
cfv[ny_dirty/2-i] = cfv[ny_dirty/2+i] = tmp[i];
}
}
size_t Nu() const { return nu; }
size_t Nv() const { return nv; }
size_t W() const { return w; }
......@@ -412,6 +434,8 @@ template<typename T> class GridderConfig
hartley2_2D<T>(grid, tmp);
auto res = makeArray<T>({nx_dirty, ny_dirty});
auto pout = res.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
{
......@@ -421,6 +445,7 @@ template<typename T> class GridderConfig
if (j2>=nv) j2-=nv;
pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j];
}
}
return res;
}
pyarr_c<complex<T>> grid2dirty_c(const pyarr_c<complex<T>> &grid) const
......@@ -433,6 +458,8 @@ template<typename T> class GridderConfig
grid.data(), tmp.mutable_data(), T(1), 0);
auto res = makeArray<complex<T>>({nx_dirty, ny_dirty});
auto pout = res.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nx_dirty; ++i)
for (size_t j=0; j<ny_dirty; ++j)
{
......@@ -442,6 +469,7 @@ template<typename T> class GridderConfig
if (j2>=nv) j2-=nv;
pout[ny_dirty*i + j] = ptmp[nv*i2+j2]*cfu[i]*cfv[j];
}
}
return res;
}
pyarr_c<T> dirty2grid(const pyarr_c<T> &dirty) const
......@@ -450,6 +478,8 @@ template<typename T> class GridderConfig
auto pdirty = dirty.data();
auto tmp = makeArray<T>({nu, nv});
auto ptmp = tmp.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nu*nv; ++i)
ptmp[i] = 0.;
for (size_t i=0; i<nx_dirty; ++i)
......@@ -461,6 +491,7 @@ template<typename T> class GridderConfig
if (j2>=nv) j2-=nv;
ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j];
}
}
hartley2_2D<T>(tmp, tmp);
return tmp;
}
......@@ -470,6 +501,9 @@ template<typename T> class GridderConfig
auto pdirty = dirty.data();
auto tmp = makeArray<complex<T>>({nu, nv});
auto ptmp = tmp.mutable_data();
pocketfft::stride_t strides{tmp.strides(0),tmp.strides(1)};
{
py::gil_scoped_release release;
for (size_t i=0; i<nu*nv; ++i)
ptmp[i] = 0.;
for (size_t i=0; i<nx_dirty; ++i)
......@@ -481,9 +515,9 @@ template<typename T> class GridderConfig
if (j2>=nv) j2-=nv;
ptmp[nv*i2+j2] = pdirty[ny_dirty*i + j]*cfu[i]*cfv[j];
}
pocketfft::c2c({nu,nv},{tmp.strides(0),tmp.strides(1)},
{tmp.strides(0), tmp.strides(1)}, {0,1}, pocketfft::FORWARD,
tmp.data(), tmp.mutable_data(), T(1), 0);
pocketfft::c2c({nu,nv}, strides, strides, {0,1}, pocketfft::FORWARD,
ptmp, ptmp, T(1), 0);
}
return tmp;
}
inline void getpix(T u_in, T v_in, T &u, T &v, int &iu0, int &iv0) const
......@@ -605,6 +639,8 @@ template<typename T> pyarr_c<complex<T>> vis2grid_c(const Baselines<T> &baseline
size_t nu=gconf.Nu(), nv=gconf.Nv();
auto res = makeArray<complex<T>>({nu, nv});
auto grid = res.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nu*nv; ++i) grid[i] = 0.;
T beta = gconf.Beta();
size_t w = gconf.W();
......@@ -633,6 +669,7 @@ template<typename T> pyarr_c<complex<T>> vis2grid_c(const Baselines<T> &baseline
}
}
} // end of parallel region
}
return res;
}
......@@ -656,6 +693,8 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c(const Baselines<T> &baselines
size_t nu=gconf.Nu(), nv=gconf.Nv();
auto res = makeArray<complex<T>>({nu, nv});
auto grid = res.mutable_data();
{
py::gil_scoped_release release;
for (size_t i=0; i<nu*nv; ++i) grid[i] = 0.;
T beta = gconf.Beta();
size_t w = gconf.W();
......@@ -684,6 +723,7 @@ template<typename T> pyarr_c<complex<T>> ms2grid_c(const Baselines<T> &baselines
}
}
} // end of parallel region
}
return res;
}
......@@ -705,6 +745,8 @@ template<typename T> pyarr_c<complex<T>> grid2vis_c(const Baselines<T> &baseline
auto res = makeArray<complex<T>>({nvis});
auto vis = res.mutable_data();
{
py::gil_scoped_release release;
T beta = gconf.Beta();
size_t w = gconf.W();
......@@ -734,6 +776,7 @@ template<typename T> pyarr_c<complex<T>> grid2vis_c(const Baselines<T> &baseline
vis[ipart] = r*emb;
}
}
}
return res;
}
......@@ -756,6 +799,8 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
auto res = provideArray<complex<T>>(ms_in,
{baselines.Nrows(), baselines.Nchannels()});
auto ms = res.mutable_data();
{
py::gil_scoped_release release;
T beta = gconf.Beta();
size_t w = gconf.W();
......@@ -785,6 +830,7 @@ template<typename T> pyarr_c<complex<T>> grid2ms_c(const Baselines<T> &baselines
ms[idx[ipart]] += r*emb;
}
}
}
return res;
}
......@@ -809,6 +855,8 @@ template<typename T> pyarr_c<uint32_t> getIndices(const Baselines<T> &baselines,
nbv = (gconf.Nv()+1+side-1) >> logsquare;
vector<uint32_t> acc(nbu*nbv+1, 0);
vector<uint32_t> tmp(nrow*(chend-chbegin));
{
py::gil_scoped_release release;
for (size_t irow=0, idx=0; irow<nrow; ++irow)
for (int ichan=chbegin; ichan<chend; ++ichan)
if (!flags[irow*nchan+ichan])
......@@ -828,8 +876,11 @@ template<typename T> pyarr_c<uint32_t> getIndices(const Baselines<T> &baselines,
for (size_t i=1; i<acc.size(); ++i)
acc[i] += acc[i-1];
}
auto res = makeArray<uint32_t>({acc.back()});
auto iout = res.mutable_data();
{
py::gil_scoped_release release;
for (size_t irow=0, idx=0; irow<nrow; ++irow)
for (int ichan=chbegin; ichan<chend; ++ichan)
if (!flags[irow*nchan+ichan])
......@@ -838,6 +889,7 @@ template<typename T> pyarr_c<uint32_t> getIndices(const Baselines<T> &baselines,
if ((uvw.w>=wmin) && (uvw.w<wmax))
iout[acc[tmp[idx++]]++] = irow*nchan+ichan;
}
}
return res;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment