Commit c10d721a authored by Martin Reinecke's avatar Martin Reinecke
Browse files

tweaks

parent 7f820d1e
......@@ -257,7 +257,7 @@ class Buffer
public:
Buffer(int nu_, int nv_, int nspread_)
: nu(nu_), nv(nv_), nspread(nspread_),
su(nspread+min(nspread, nu)), sv(nspread+min(nspread, nv)),
su(nspread+min(3*nspread, nu)), sv(3*nspread+min(nspread, nv)),
u0(-1000000), v0(-1000000)
{}
};
......@@ -366,6 +366,7 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
{
Helper hlp(nu, nv, nspread, r2lamb);
WriteBuffer buf(nu, nv, nspread, grid);
int delta = 2*(buf.sv-nspread);
// Loop over sampling points
#pragma omp for schedule(dynamic,10000)
......@@ -373,11 +374,13 @@ a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
{
hlp.update(uv[2*ipart], uv[2*ipart+1], vis[ipart]);
buf.prep_write(hlp.iu, hlp.iv);
auto ptr = buf.p0;
for (int cu=0; cu<2*nspread; ++cu)
{
complex<double> tmp = hlp.val*hlp.ku[cu];
for (int cv=0; cv<2*nspread; ++cv)
buf.p0[cu*2*buf.sv + cv] += tmp*hlp.kv[cv];
*ptr++ += tmp*hlp.kv[cv];
ptr+=delta;
}
}
} // end of parallel region
......@@ -430,18 +433,22 @@ a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
{
Helper hlp(nu, nv, nspread, r2lamb);
ReadBuffer buf(nu, nv, nspread, grid);
int delta = 2*(buf.sv-nspread);
#pragma omp for schedule(dynamic,10000)
for (int ipart=0; ipart<nvis; ++ipart)
{
hlp.update(uv[2*ipart], uv[2*ipart+1], 1.);
complex<double> r = 0.;
buf.prep_read(hlp.iu, hlp.iv);
auto ptr = buf.p0;
for (int cu=0; cu<2*nspread; ++cu)
{
complex<double> tmp = 0.;
for (int cv=0; cv<2*nspread; ++cv)
tmp += buf.p0[cu*2*buf.sv + cv]*hlp.kv[cv];
tmp += (*ptr++) * hlp.kv[cv];
r+=tmp*hlp.ku[cu];
ptr += delta;
}
vis[ipart] = r*hlp.val;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment