Commit 2b4d3adf authored by Martin Reinecke's avatar Martin Reinecke
Browse files

seems to work

parent 3dcf3ba0
......@@ -263,15 +263,11 @@ struct RowChan
template<typename T> class EC_Kernel
{
protected:
size_t supp;
T beta, emb_;
T beta;
public:
EC_Kernel(size_t supp_)
: supp(supp_), beta(2.3*supp_), emb_(exp(-beta)) {}
EC_Kernel(size_t supp) : beta(2.3*supp) {}
T operator()(T v) const { return exp(beta*(sqrt(T(1)-v*v)-T(1))); }
T val_no_emb(T v) const { return exp(beta*sqrt(T(1)-v*v)); }
T emb() const { return emb_; }
};
template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T>
......@@ -280,12 +276,12 @@ template<typename T> class EC_Kernel_with_correction: public EC_Kernel<T>
static constexpr T pi = T(3.141592653589793238462643383279502884197L);
int p;
vector<double> x, wgt, psi;
using EC_Kernel<T>::supp;
size_t supp;
public:
using EC_Kernel<T>::operator();
EC_Kernel_with_correction(size_t supp_, size_t nthreads)
: EC_Kernel<T>(supp_), p(int(1.5*supp_+2))
: EC_Kernel<T>(supp_), p(int(1.5*supp_+2)), supp(supp_)
{
legendre_prep(2*p,x,wgt,nthreads);
psi=x;
......@@ -593,6 +589,9 @@ template<typename T, typename T2=complex<T>> class Helper
int bu0, bv0; // start index of the current buffer
vector<T2> rbuf, wbuf;
bool do_w_gridding;
T w0, xdw;
size_t nexp;
size_t nvecs;
void dump() const
......@@ -637,20 +636,24 @@ template<typename T, typename T2=complex<T>> class Helper
T2 *p0w;
T kernel[64] __attribute__ ((aligned(64)));
Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_)
Helper(const GridderConfig<T> &gconf_, const T2 *grid_r_, T2 *grid_w_, T w0_=-1, T dw_=-1)
: gconf(gconf_), nu(gconf.Nu()), nv(gconf.Nv()), nsafe(gconf.Nsafe()),
supp(gconf.Supp()), beta(gconf.Beta()), grid_r(grid_r_),
grid_w(grid_w_), su(2*nsafe+(1<<logsquare)), sv(2*nsafe+(1<<logsquare)),
bu0(-1000000), bv0(-1000000),
rbuf(su*sv*(grid_r!=nullptr),T(0)),
wbuf(su*sv*(grid_w!=nullptr),T(0)),
nvecs(VLEN<T>::val*((2*supp+VLEN<T>::val-1)/VLEN<T>::val))
do_w_gridding(dw_>0),
w0(w0_),
xdw(T(1)/dw_),
nexp(2*supp + do_w_gridding),
nvecs(VLEN<T>::val*((nexp+VLEN<T>::val-1)/VLEN<T>::val))
{}
~Helper() { if (grid_w) dump(); }
int lineJump() const { return sv; }
void prep(T u_in, T v_in)
T Wfac() const { return kernel[2*supp]; }
void prep(T u_in, T v_in, T w_in)
{
T u, v;
gconf.getpix(u_in, v_in, u, v, iu0, iv0);
......@@ -662,12 +665,12 @@ template<typename T, typename T2=complex<T>> class Helper
kernel[i ] = x0+i*xsupp;
kernel[i+supp] = y0+i*xsupp;
}
for (size_t i=2*supp; i<nvecs; ++i)
if (do_w_gridding)
kernel[2*supp] = min(T(1), xdw*xsupp*abs(w0-w_in));
for (size_t i=nexp; i<nvecs; ++i)
kernel[i]=0;
for (size_t i=0; i<nvecs; ++i)
kernel[i] = exp(beta*sqrt(T(1)-kernel[i]*kernel[i]));
// for (auto &k : kernel)
// k = exp(beta*sqrt(T(1)-k*k));
kernel[i] = exp(beta*(sqrt(T(1)-kernel[i]*kernel[i])-T(1)));
if ((iu0<bu0) || (iv0<bv0) || (iu0+supp>bu0+su) || (iv0+supp>bv0+sv))
{
if (grid_w) { dump(); fill(wbuf.begin(), wbuf.end(), T(0)); }
......@@ -700,6 +703,8 @@ template<class T, class T2, class T3> class VisServ
have_wgt = wgt.size()!=0;
if (have_wgt) checkShape(wgt.shape(), {nvis});
}
VisServ(const VisServ &orig, const const_mav<uint32_t,1> &newidx)
: VisServ(orig.baselines, newidx, orig.vis, orig.wgt) {}
size_t Nvis() const { return nvis; }
const Baselines<T> &getBaselines() const { return baselines; }
UVW<T> getCoord(size_t i) const
......@@ -734,6 +739,8 @@ template<class T, class T2, class T3> class MsServ
have_wgt = wgt.size()!=0;
if (have_wgt) checkShape(wgt.shape(), {nrows, nchan});
}
MsServ(const MsServ &orig, const const_mav<uint32_t,1> &newidx)
: MsServ(orig.baselines, newidx, orig.ms, orig.wgt) {}
size_t Nvis() const { return nvis; }
const Baselines<T> &getBaselines() const { return baselines; }
UVW<T> getCoord(size_t i) const
......@@ -758,19 +765,18 @@ template<class T, class T2, class T3> class MsServ
};
template<typename T, typename Serv> void x2grid_c
(const GridderConfig<T> &gconf, const Serv &srv, mav<complex<T>,2> &grid)
(const GridderConfig<T> &gconf, const Serv &srv, mav<complex<T>,2> &grid, T w0=-1, T dw=-1)
{
size_t nu=gconf.Nu(), nv=gconf.Nv();
checkShape(grid.shape(), {nu, nv});
myassert(grid.contiguous(), "grid is not contiguous");
T beta = gconf.Beta();
size_t supp = gconf.Supp();
size_t nthreads = gconf.Nthreads();
bool do_w_gridding=dw>0;
#pragma omp parallel num_threads(nthreads)
{
Helper<T> hlp(gconf, nullptr, grid.data());
T emb = exp(-2*beta);
Helper<T> hlp(gconf, nullptr, grid.data(), w0, dw);
int jump = hlp.lineJump();
const T * ku = hlp.kernel;
const T * kv = hlp.kernel+supp;
......@@ -782,9 +788,10 @@ template<typename T, typename Serv> void x2grid_c
{
UVW<T> coord = srv.getCoord(ipart);
auto flip = coord.FixW();
hlp.prep(coord.u, coord.v);
hlp.prep(coord.u, coord.v, coord.w);
auto * ptr = hlp.p0w;
auto v(srv.getVis(ipart)*emb);
auto v(srv.getVis(ipart));
if (do_w_gridding) v*=hlp.Wfac();
if (flip) v=conj(v);
for (size_t cu=0; cu<supp; ++cu)
{
......@@ -800,9 +807,9 @@ template<typename T, typename Serv> void x2grid_c
template<typename T> void vis2grid_c
(const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const const_mav<uint32_t,1> &idx, const const_mav<complex<T>,1> &vis,
mav<complex<T>,2> &grid, const const_mav<T,1> &wgt)
mav<complex<T>,2> &grid, const const_mav<T,1> &wgt, T w0=-1, T dw=-1)
{
x2grid_c(gconf, VisServ<T,const_mav<complex<T>,1>,const_mav<T,1>> (baselines, idx, vis, wgt), grid);
x2grid_c(gconf, VisServ<T,const_mav<complex<T>,1>,const_mav<T,1>> (baselines, idx, vis, wgt), grid, w0, dw);
}
template<typename T> void ms2grid_c
......@@ -814,20 +821,20 @@ template<typename T> void ms2grid_c
}
template<typename T, typename Serv> void grid2x_c
(const GridderConfig<T> &gconf, const const_mav<complex<T>,2> &grid, const Serv &srv)
(const GridderConfig<T> &gconf, const const_mav<complex<T>,2> &grid, const Serv &srv,
T w0=-1, T dw=-1)
{
size_t nu=gconf.Nu(), nv=gconf.Nv();
checkShape(grid.shape(), {nu, nv});
myassert(grid.contiguous(), "grid is not contiguous");
T beta = gconf.Beta();
size_t supp = gconf.Supp();
size_t nthreads = gconf.Nthreads();
bool do_w_gridding=dw>=0;
// Loop over sampling points
#pragma omp parallel num_threads(nthreads)
{
Helper<T> hlp(gconf, grid.data(), nullptr);
T emb = exp(-2*beta);
Helper<T> hlp(gconf, grid.data(), nullptr, w0, dw);
int jump = hlp.lineJump();
const T * ku = hlp.kernel;
const T * kv = hlp.kernel+supp;
......@@ -838,7 +845,7 @@ template<typename T, typename Serv> void grid2x_c
{
UVW<T> coord = srv.getCoord(ipart);
auto flip = coord.FixW();
hlp.prep(coord.u, coord.v);
hlp.prep(coord.u, coord.v, coord.w);
complex<T> r = 0;
const auto * ptr = hlp.p0r;
for (size_t cu=0; cu<supp; ++cu)
......@@ -850,16 +857,17 @@ template<typename T, typename Serv> void grid2x_c
ptr += jump;
}
if (flip) r=conj(r);
srv.setVis(ipart, r*emb);
if (do_w_gridding) r*=hlp.Wfac();
srv.setVis(ipart, r);
}
}
}
template<typename T> void grid2vis_c
(const Baselines<T> &baselines, const GridderConfig<T> &gconf,
const const_mav<uint32_t,1> &idx, const const_mav<complex<T>,2> &grid,
mav<complex<T>,1> &vis, const const_mav<T,1> &wgt)
mav<complex<T>,1> &vis, const const_mav<T,1> &wgt, T w0=-1, T dw=-1)
{
grid2x_c(gconf, grid, VisServ<T,mav<complex<T>,1>,const_mav<T,1>> (baselines, idx, vis, wgt));
grid2x_c(gconf, grid, VisServ<T,mav<complex<T>,1>,const_mav<T,1>> (baselines, idx, vis, wgt), w0, dw);
}
template<typename T> void grid2ms_c
......@@ -885,14 +893,12 @@ template<typename T> void apply_holo
ogrid.fill(0);
T beta = gconf.Beta();
size_t supp = gconf.Supp();
// Loop over sampling points
#pragma omp parallel num_threads(nthreads)
{
Helper<T> hlp(gconf, grid.data(), ogrid.data());
T emb = exp(-2*beta);
int jump = hlp.lineJump();
const T * ku = hlp.kernel;
const T * kv = hlp.kernel+supp;
......@@ -902,7 +908,7 @@ template<typename T> void apply_holo
{
UVW<T> coord = baselines.effectiveCoord(idx(ipart));
coord.FixW();
hlp.prep(coord.u, coord.v);
hlp.prep(coord.u, coord.v, 0);
complex<T> r = 0;
const auto * ptr = hlp.p0r;
for (size_t cu=0; cu<supp; ++cu)
......@@ -913,7 +919,6 @@ template<typename T> void apply_holo
r += tmp*ku[cu];
ptr += jump;
}
r*=emb*emb;
if (have_wgt)
{
auto twgt = wgt(ipart);
......@@ -946,7 +951,6 @@ template<typename T> void get_correlations
myassert(size_t(abs(dv))<supp, "|dv| must be smaller than Supp");
size_t nthreads = gconf.Nthreads();
T beta = gconf.Beta();
ogrid.fill(0);
size_t u0, u1, v0, v1;
......@@ -963,7 +967,6 @@ template<typename T> void get_correlations
#pragma omp parallel num_threads(nthreads)
{
Helper<T,T> hlp(gconf, nullptr, ogrid.data());
T emb = exp(-2*beta);
int jump = hlp.lineJump();
const T * ku = hlp.kernel;
const T * kv = hlp.kernel+supp;
......@@ -973,9 +976,9 @@ template<typename T> void get_correlations
{
UVW<T> coord = baselines.effectiveCoord(idx(ipart));
coord.FixW();
hlp.prep(coord.u, coord.v);
hlp.prep(coord.u, coord.v, 0);
auto * wptr = hlp.p0w + u0*jump;
auto f0 = emb*emb;
auto f0 = T(1);
if (have_wgt)
{
auto twgt = wgt(ipart);
......@@ -1091,9 +1094,10 @@ template<typename T, typename Serv> void dirty2x(
}
template<typename T, typename Serv> void wstack_common(
const GridderConfig<T> &gconf, const Serv &srv, T &wmin, vector<T> &wval,
const GridderConfig<T> &gconf, const Serv &srv, T &wmin,
double &dw, size_t &nplanes, vector<size_t> &nvis_plane, vector<int> &minplane, size_t verbosity)
{
cout << "enter" << endl;
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
auto psx=gconf.Pixsize_x();
......@@ -1104,7 +1108,7 @@ template<typename T, typename Serv> void wstack_common(
// determine w values for every visibility, and min/max w;
wmin=T(1e38);
T wmax=T(-1e38);
wval.resize(nvis);
vector<T> wval(nvis);
#pragma omp parallel for num_threads(nthreads) reduction(min:wmin) reduction(max:wmax)
for (size_t ipart=0; ipart<nvis; ++ipart)
......@@ -1145,6 +1149,7 @@ template<typename T, typename Serv> void wstack_common(
for (size_t i=0; i<nplanes; ++i)
nvis_plane[i]+=nvploc[i];
}
cout << "done" << endl;
}
template<typename T, typename Serv> void x2dirty_wstack(
......@@ -1159,13 +1164,12 @@ template<typename T, typename Serv> void x2dirty_wstack(
size_t nthreads = gconf.Nthreads();
EC_Kernel_with_correction<T> kernel(w_supp, nthreads);
T wmin;
vector<T> wval;
double dw;
size_t nplanes;
vector<size_t> nvis_plane;
vector<int> minplane;
if (verbosity>0) cout << "Gridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, wval, dw, nplanes, nvis_plane, minplane, verbosity);
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
dirty.fill(0);
vector<complex<T>> vis_loc_;
......@@ -1192,17 +1196,15 @@ template<typename T, typename Serv> void x2dirty_wstack(
if ((int(iw)>=minplane[ipart]) && (iw<minplane[ipart]+w_supp))
mapper[cnt++] = ipart;
T tmpval=T(2)/(w_supp*dw);
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
{
auto ipart = mapper[i];
T x=min(T(1), tmpval*abs(wcur-wval[ipart]));
vis_loc[i] = srv.getVis(ipart)*kernel(x);
vis_loc[i] = srv.getVis(ipart);
idx_loc[i] = srv.getIdx(ipart);
}
grid.fill(0);
vis2grid_c(srv.getBaselines(), gconf, const_mav<uint32_t,1>(idx_loc), const_mav<complex<T>,1>(vis_loc), grid, const_mav<T,1>(nullptr,{0}));
vis2grid_c(srv.getBaselines(), gconf, const_mav<uint32_t,1>(idx_loc), const_mav<complex<T>,1>(vis_loc), grid, const_mav<T,1>(nullptr,{0}), wcur, T(dw));
gconf.grid2dirty_c(grid, tdirty);
gconf.apply_wscreen(tdirty, tdirty, wcur, false);
#pragma omp parallel for num_threads(nthreads)
......@@ -1233,13 +1235,12 @@ template<typename T, typename Serv> void dirty2x_wstack(
size_t nthreads = gconf.Nthreads();
EC_Kernel_with_correction<T> kernel(w_supp, nthreads);
T wmin;
vector<T> wval;
double dw;
size_t nplanes;
vector<size_t> nvis_plane;
vector<int> minplane;
if (verbosity>0) cout << "Degridding using improved w-stacking" << endl;
wstack_common(gconf, srv, wmin, wval, dw, nplanes, nvis_plane, minplane, verbosity);
wstack_common(gconf, srv, wmin, dw, nplanes, nvis_plane, minplane, verbosity);
for (size_t i=0; i<nvis; ++i)
srv.setVis(i, 0.);
......@@ -1285,16 +1286,11 @@ template<typename T, typename Serv> void dirty2x_wstack(
tdirty2(i,j) = tdirty(i,j);
gconf.apply_wscreen(tdirty2, tdirty2, wcur, true);
gconf.dirty2grid_c(tdirty2, grid);
grid2vis_c(srv.getBaselines(), gconf, const_mav<uint32_t,1>(idx_loc), const_mav<complex<T>,2>(grid), vis_loc, const_mav<T,1>(nullptr,{0}));
grid2vis_c(srv.getBaselines(), gconf, const_mav<uint32_t,1>(idx_loc), const_mav<complex<T>,2>(grid), vis_loc, const_mav<T,1>(nullptr,{0}), wcur, T(dw));
T tmpval=T(2)/(w_supp*dw);
#pragma omp parallel for num_threads(nthreads)
for (size_t i=0; i<nvis_plane[iw]; ++i)
{
auto ipart = mapper[i];
T x=min(T(1), tmpval*abs(wcur-wval[ipart]));
srv.addVis(ipart, vis_loc[i]*kernel(x));
}
srv.addVis(mapper[i], vis_loc[i]);
}
}
template<typename T> void dirty2vis_wstack(const Baselines<T> &baselines,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment