Commit 004be60c authored by Martin Reinecke's avatar Martin Reinecke
Browse files

first steps

parent 3d9ce29d
......@@ -99,8 +99,7 @@ uint32_t morton2peano2D_32(uint32_t v, int bits)
void myassert(bool cond, const char *msg)
{
if (cond) return;
cerr << msg << endl;
throw 42;
throw runtime_error(msg);
}
template<typename It, typename Comp> class IdxComp__
......@@ -185,84 +184,69 @@ a_i_c peanoindex(const a_d_c &uv_, int nu, int nv)
class Helper
{
private:
int nu, nv, nspread, nbuf;
double r2lamb, fac;
vector<double> kernel;
protected:
int nu, nv;
public:
vector<double> ku, kv;
int iu, iv, idxu0, idxv0;
complex<double> val;
int w;
protected:
double beta;
int nsafe, su;
public:
int sv;
Helper(int nu_, int nv_, int nspread_, double r2lamb_)
: nu(nu_), nv(nv_), nspread(nspread_), nbuf(2*nspread_), r2lamb(r2lamb_),
fac(pi/r2lamb), kernel(nspread+1), ku(nbuf), kv(nbuf)
{
// Precompute gridding kernel
for (size_t i=0; i<kernel.size(); ++i)
kernel[i] = exp(-pi/r2lamb*i*i);
}
void update(double u_in, double v_in, complex<double> vis)
vector<double> kernel;
int iu0, iv0; // start index of the current visibility
int bu0, bv0; // start index of the current buffer
void update(double u_in, double v_in)
{
auto u = fmodulo(u_in, 1.)*nu;
iu = min(nu-1, int(u));
auto du = u-iu;
idxu0 = (iu-nspread+1+nu)%nu;
iu0 = int(u-w*0.5 + 1 + nu) - nu;
if (iu0+w>nu+nsafe) iu0 = nu+nsafe-w;
auto v = fmodulo(v_in, 1.)*nv;
iv = min(nv-1, int(v));
auto dv = v-iv;
idxv0 = (iv-nspread+1+nv)%nv;
val = vis*exp(-fac*(du*du + dv*dv));
auto fu0 = exp(2*fac*du);
auto fv0 = exp(2*fac*dv);
auto fu = 1.;
auto fv = 1.;
for (int i=0; i<nspread; ++i)
// cout << "uv0 " << u << " " << v << endl;
iv0 = int(v-w*0.5 + 1 + nv) - nv;
if (iv0+w>nv+nsafe) iv0 = nv+nsafe-w;
// cout << "iuv0 " << iu0 << " " << iv0 << endl;
double xw=2./w;
for (int i=0; i<w; ++i)
{
ku[nspread-i-1] = kernel[i]/fu;
kv[nspread-i-1] = kernel[i]/fv;
fu *= fu0;
fv *= fv0;
ku[nspread+i] = kernel[i+1]*fu;
kv[nspread+i] = kernel[i+1]*fv;
kernel[i ] = xw*(iu0+i-u);
kernel[i+w] = xw*(iv0+i-v);
}
for (int i=0; i<2*w; ++i)
{
kernel[i] = exp(beta*(sqrt(1-kernel[i]*kernel[i])-1.));
// cout << kernel[i] << endl;
}
}
};
bool need_to_move() const
{ return (iu0<bu0) || (iv0<bv0) || (iu0+w>bu0+su) || (iv0+w>bv0+sv); }
class Buffer
{
protected:
int nu, nv, nspread, su;
public:
int sv;
protected:
int u0, v0;
bool need_to_move(int iu, int iv) const
void update_position()
{
return (abs(iu-u0)>su-nspread) || (abs(iv-v0)>sv-nspread);
bu0=max(-nsafe, min(nu+nsafe-su, iu0+nsafe-su/2));
bv0=max(-nsafe, min(nv+nsafe-sv, iv0+nsafe-sv/2));
//cout << "buv0 " << bu0 << " " << bv0 << endl;
}
void update_position(int iu, int iv)
protected:
Helper(int nu_, int nv_, double epsilon)
: nu(nu_), nv(nv_),
w(int(-log10(epsilon)+1.9999)), beta(2.3*w), nsafe((w+1)/2),
su(min(3*w, nu)), sv(min(3*w, nv)),
kernel(2*w),
bu0(-1000000), bv0(-1000000)
{
int safe_u = su-nspread, safe_v = sv-nspread;
u0=max(safe_u, min(nu-1-safe_u, iu));
v0=max(safe_v, min(nv-1-safe_v, iv));
if (min(nu,nv)<2*nsafe) throw runtime_error("grid dimensions too small");
// cout << "beta=" << beta << endl;
// cout << "w=" << w << endl;
// cout << "suv=" << su << " " << sv << endl;
}
public:
Buffer(int nu_, int nv_, int nspread_)
: nu(nu_), nv(nv_), nspread(nspread_),
su(nspread+min(3*nspread, nu)), sv(3*nspread+min(nspread, nv)),
u0(-1000000), v0(-1000000)
{}
};
class WriteBuffer: public Buffer
class WriteHelper: public Helper
{
private:
vector<complex<double>> data;
......@@ -270,17 +254,17 @@ class WriteBuffer: public Buffer
void dump()
{
if (u0<0) return;
if (bu0<-nsafe) return; // nothing written into buffer yet
#pragma omp critical
{
int idxu = (u0-su+1+nu)%nu;
int idxv0 = (v0-sv+1+nv)%nv;
for (int iu=0; iu<2*su; ++iu)
int idxu = (bu0+nu)%nu;
int idxv0 = (bv0+nv)%nv;
for (int iu=0; iu<su; ++iu)
{
int idxv = idxv0;
for (int iv=0; iv<2*sv; ++iv)
for (int iv=0; iv<sv; ++iv)
{
grid[idxu*nv + idxv] += data[iu*2*sv + iv];
grid[idxu*nv + idxv] += data[iu*sv + iv];
if (++idxv>=nv) idxv=0;
}
if (++idxu>=nu) idxu=0;
......@@ -290,24 +274,24 @@ class WriteBuffer: public Buffer
public:
complex<double> *p0;
WriteBuffer(int nu_, int nv_, int nspread_, complex<double> *grid_)
: Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {}
~WriteBuffer() { dump(); }
WriteHelper(int nu_, int nv_, double epsilon, complex<double> *grid_)
: Helper(nu_, nv_, epsilon), data(su*sv, 0.), grid(grid_) {}
~WriteHelper() { dump(); }
void prep_write(int iu, int iv)
/* iu = [0; nu-1]; iv = [0; nv-1] */
void prep_write(double u_in, double v_in)
{
if (need_to_move(iu, iv))
update(u_in, v_in);
if (need_to_move())
{
dump();
update_position(iu, iv);
update_position();
fill(data.begin(), data.end(), 0.);
}
p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread;
p0 = data.data() + sv*(iu0-bu0) + iv0-bv0;
}
};
class ReadBuffer: public Buffer
class ReadHelper: public Helper
{
private:
vector<complex<double>> data;
......@@ -315,14 +299,14 @@ class ReadBuffer: public Buffer
void load()
{
int idxu = (u0-su+1+nu)%nu;
int idxv0 = (v0-sv+1+nv)%nv;
for (int iu=0; iu<2*su; ++iu)
int idxu = (bu0+nu)%nu;
int idxv0 = (bv0+nv)%nv;
for (int iu=0; iu<su; ++iu)
{
int idxv = idxv0;
for (int iv=0; iv<2*sv; ++iv)
for (int iv=0; iv<sv; ++iv)
{
data[iu*2*sv + iv] = grid[idxu*nv + idxv];
data[iu*sv + iv] = grid[idxu*nv + idxv];
if (++idxv>=nv) idxv=0;
}
if (++idxu>=nu) idxu=0;
......@@ -331,23 +315,23 @@ class ReadBuffer: public Buffer
public:
const complex<double> *p0;
ReadBuffer(int nu_, int nv_, int nspread_, const complex<double> *grid_)
: Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {}
ReadHelper(int nu_, int nv_, double epsilon, const complex<double> *grid_)
: Helper(nu_, nv_, epsilon), data(su*sv,0.), grid(grid_), p0(nullptr) {}
void prep_read(int iu, int iv)
/* iu = [0; nu-1]; iv = [0; nv-1] */
void prep_read(double u_in, double v_in)
{
if (need_to_move(iu, iv))
update(u_in, v_in);
if (need_to_move())
{
update_position(iu, iv);
update_position();
load();
}
p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread;
p0 = data.data() + sv*(iu0-bu0) + iv0-bv0;
}
};
a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
int nu, int nv, int nspread, double r2lamb)
int nu, int nv, double epsilon)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
......@@ -364,23 +348,20 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
#pragma omp parallel
{
Helper hlp(nu, nv, nspread, r2lamb);
WriteBuffer buf(nu, nv, nspread, grid);
int delta = 2*(buf.sv-nspread);
WriteHelper hlp(nu, nv, epsilon, grid);
// Loop over sampling points
#pragma omp for schedule(dynamic,10000)
for (int ipart=0; ipart<nvis; ++ipart)
{
hlp.update(uv[2*ipart], uv[2*ipart+1], vis[ipart]);
buf.prep_write(hlp.iu, hlp.iv);
auto ptr = buf.p0;
for (int cu=0; cu<2*nspread; ++cu)
hlp.prep_write(uv[2*ipart], uv[2*ipart+1]);
auto ptr = hlp.p0;
for (int cu=0; cu<hlp.w; ++cu)
{
complex<double> tmp = hlp.val*hlp.ku[cu];
for (int cv=0; cv<2*nspread; ++cv)
*ptr++ += tmp*hlp.kv[cv];
ptr+=delta;
complex<double> tmp = vis[ipart]*hlp.kernel[cu];
for (int cv=0; cv<hlp.w; ++cv)
*ptr++ += tmp*hlp.kernel[hlp.w+cv];
ptr+=hlp.sv-hlp.w;
}
}
} // end of parallel region
......@@ -412,7 +393,7 @@ a_d_c to_grid_post (const a_c_c &grid_)
}
a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
int nu, int nv, int nspread, double r2lamb)
int nu, int nv, double epsilon)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
......@@ -431,26 +412,23 @@ a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
// Loop over sampling points
#pragma omp parallel
{
Helper hlp(nu, nv, nspread, r2lamb);
ReadBuffer buf(nu, nv, nspread, grid);
int delta = 2*(buf.sv-nspread);
ReadHelper hlp(nu, nv, epsilon, grid);
#pragma omp for schedule(dynamic,10000)
for (int ipart=0; ipart<nvis; ++ipart)
{
hlp.update(uv[2*ipart], uv[2*ipart+1], 1.);
hlp.prep_read(uv[2*ipart], uv[2*ipart+1]);
complex<double> r = 0.;
buf.prep_read(hlp.iu, hlp.iv);
auto ptr = buf.p0;
for (int cu=0; cu<2*nspread; ++cu)
auto ptr = hlp.p0;
for (int cu=0; cu<hlp.w; ++cu)
{
complex<double> tmp = 0.;
for (int cv=0; cv<2*nspread; ++cv)
tmp += (*ptr++) * hlp.kv[cv];
r+=tmp*hlp.ku[cu];
ptr += delta;
for (int cv=0; cv<hlp.w; ++cv)
tmp += (*ptr++) * hlp.kernel[hlp.w+cv];
r+=tmp*hlp.kernel[cu];
ptr += hlp.sv-hlp.w;
}
vis[ipart] = r*hlp.val;
vis[ipart] = r;
}
}
return res;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment