Commit f21a205b authored by Martin Reinecke's avatar Martin Reinecke
Browse files

more tweaks

parent 4475d484
Pipeline #81409 passed with stages
in 12 minutes and 20 seconds
...@@ -56,8 +56,6 @@ inline complex<float> hsum_cmplx(native_simd<float> vr, native_simd<float> vi) ...@@ -56,8 +56,6 @@ inline complex<float> hsum_cmplx(native_simd<float> vr, native_simd<float> vi)
auto t2 = _mm_hadd_ps(_mm256_extractf128_ps(t1, 0), _mm256_extractf128_ps(t1, 1)); auto t2 = _mm_hadd_ps(_mm256_extractf128_ps(t1, 0), _mm256_extractf128_ps(t1, 1));
t2 += _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(1,0,3,2)); t2 += _mm_shuffle_ps(t2, t2, _MM_SHUFFLE(1,0,3,2));
return complex<float>(t2[0], t2[1]); return complex<float>(t2[0], t2[1]);
//FIXME perhaps some shuffling?
return complex<float>(t2[0]+t2[2], t2[1]+t2[3]);
} }
#endif #endif
...@@ -78,9 +76,11 @@ template<typename T> void complex2hartley ...@@ -78,9 +76,11 @@ template<typename T> void complex2hartley
MR_assert(grid.conformable(grid2), "shape mismatch"); MR_assert(grid.conformable(grid2), "shape mismatch");
size_t nu=grid.shape(0), nv=grid.shape(1); size_t nu=grid.shape(0), nv=grid.shape(1);
execStatic(nu, nthreads, 0, [&](Scheduler &sched) execParallel(nthreads, [&](Scheduler &sched)
{ {
while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u) auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nu);
for(auto u=lo; u<hi; ++u)
{ {
size_t xu = (u==0) ? 0 : nu-u; size_t xu = (u==0) ? 0 : nu-u;
for (size_t v=0; v<nv; ++v) for (size_t v=0; v<nv; ++v)
...@@ -99,9 +99,11 @@ template<typename T> void hartley2complex ...@@ -99,9 +99,11 @@ template<typename T> void hartley2complex
MR_assert(grid.conformable(grid2), "shape mismatch"); MR_assert(grid.conformable(grid2), "shape mismatch");
size_t nu=grid.shape(0), nv=grid.shape(1); size_t nu=grid.shape(0), nv=grid.shape(1);
execStatic(nu, nthreads, 0, [&](Scheduler &sched) execParallel(nthreads, [&](Scheduler &sched)
{ {
while (auto rng=sched.getNext()) for(auto u=rng.lo; u<rng.hi; ++u) auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nu);
for(auto u=lo; u<hi; ++u)
{ {
size_t xu = (u==0) ? 0 : nu-u; size_t xu = (u==0) ? 0 : nu-u;
for (size_t v=0; v<nv; ++v) for (size_t v=0; v<nv; ++v)
...@@ -133,9 +135,12 @@ template<typename T> void hartley2_2D(mav<T,2> &arr, size_t vlim, ...@@ -133,9 +135,12 @@ template<typename T> void hartley2_2D(mav<T,2> &arr, size_t vlim,
} }
else else
r2r_separable_hartley(farr, farr, {0,1}, T(1), nthreads); r2r_separable_hartley(farr, farr, {0,1}, T(1), nthreads);
execStatic((nu+1)/2-1, nthreads, 0, [&](Scheduler &sched)
execParallel(nthreads, [&](Scheduler &sched)
{ {
while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i<rng.hi+1; ++i) auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, (nu+1)/2-1);
for(auto i=lo+1; i<hi+1; ++i)
for(size_t j=1; j<(nv+1)/2; ++j) for(size_t j=1; j<(nv+1)/2; ++j)
{ {
T a = arr(i,j); T a = arr(i,j);
...@@ -235,30 +240,30 @@ constexpr int logsquare=4; ...@@ -235,30 +240,30 @@ constexpr int logsquare=4;
template<typename T> class Params template<typename T> class Params
{ {
private: private:
bool gridding; bool gridding;
TimerHierarchy timers; TimerHierarchy timers;
const mav<complex<T>,2> &ms_in; const mav<complex<T>,2> &ms_in;
mav<complex<T>,2> &ms_out; mav<complex<T>,2> &ms_out;
const mav<T,2> &dirty_in; const mav<T,2> &dirty_in;
mav<T,2> &dirty_out; mav<T,2> &dirty_out;
const mav<T,2> &wgt; const mav<T,2> &wgt;
const mav<uint8_t,2> &mask; const mav<uint8_t,2> &mask;
double pixsize_x, pixsize_y; double pixsize_x, pixsize_y;
size_t nxdirty, nydirty; size_t nxdirty, nydirty;
double epsilon; double epsilon;
bool do_wgridding; bool do_wgridding;
size_t nthreads; size_t nthreads;
size_t verbosity; size_t verbosity;
bool negate_v, divide_by_n; bool negate_v, divide_by_n;
Baselines bl; Baselines bl;
VVR ranges; VVR ranges;
double wmin_d, wmax_d; double wmin_d, wmax_d;
size_t nvis; size_t nvis;
double wmin, dw; double wmin, dw;
size_t nplanes; size_t nplanes;
double nm1min; double nm1min;
vector<uint8_t> active; vector<uint8_t> active;
size_t nu, nv; size_t nu, nv;
double ofactor; double ofactor;
...@@ -288,9 +293,11 @@ template<typename T> class Params ...@@ -288,9 +293,11 @@ template<typename T> class Params
checkShape(dirty.shape(), {nxdirty, nydirty}); checkShape(dirty.shape(), {nxdirty, nydirty});
auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads); auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads); auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
execStatic(nxdirty, nthreads, 0, [&](Scheduler &sched) execParallel(nthreads, [&](Scheduler &sched)
{ {
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i) auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nxdirty);
for (auto i=lo; i<hi; ++i)
{ {
int icfu = abs(int(nxdirty/2)-int(i)); int icfu = abs(int(nxdirty/2)-int(i));
for (size_t j=0; j<nydirty; ++j) for (size_t j=0; j<nydirty; ++j)
...@@ -311,13 +318,15 @@ template<typename T> class Params ...@@ -311,13 +318,15 @@ template<typename T> class Params
checkShape(dirty.shape(), {nxdirty,nydirty}); checkShape(dirty.shape(), {nxdirty,nydirty});
double x0 = -0.5*nxdirty*pixsize_x, double x0 = -0.5*nxdirty*pixsize_x,
y0 = -0.5*nydirty*pixsize_y; y0 = -0.5*nydirty*pixsize_y;
execStatic(nxdirty/2+1, nthreads, 0, [&](Scheduler &sched) execParallel(nthreads, [&](Scheduler &sched)
{ {
auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nxdirty/2+1);
using vtype = native_simd<T>; using vtype = native_simd<T>;
constexpr size_t vlen=vtype::size(); constexpr size_t vlen=vtype::size();
size_t nvec = (nydirty/2+1+(vlen-1))/vlen; size_t nvec = (nydirty/2+1+(vlen-1))/vlen;
vector<vtype> ph(nvec), sp(nvec), cp(nvec); vector<vtype> ph(nvec), sp(nvec), cp(nvec);
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i) for (auto i=lo; i<hi; ++i)
{ {
T fx = T(x0+i*pixsize_x); T fx = T(x0+i*pixsize_x);
fx *= fx; fx *= fx;
...@@ -398,10 +407,25 @@ template<typename T> class Params ...@@ -398,10 +407,25 @@ template<typename T> class Params
auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads); auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads); auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
// FIXME: maybe we don't have to fill everything and can save some time // FIXME: maybe we don't have to fill everything and can save some time
grid.fill(0); // grid.fill(0);
execStatic(nxdirty, nthreads, 0, [&](Scheduler &sched) execParallel(nthreads, [&](Scheduler &sched)
{ {
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i) auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nu);
for (auto i=lo; i<hi; ++i)
{
size_t lo2=0, hi2=nv;
if ((i<nxdirty/2) || (i>=nu-nxdirty/2))
{ lo2=nydirty/2; hi2=nv-nydirty/2+1; }
for (auto j=lo2; j<hi2; ++j)
grid.v(i,j) = 0;
}
});
execParallel(nthreads, [&](Scheduler &sched)
{
auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nxdirty);
for (auto i=lo; i<hi; ++i)
{ {
int icfu = abs(int(nxdirty/2)-int(i)); int icfu = abs(int(nxdirty/2)-int(i));
for (size_t j=0; j<nydirty; ++j) for (size_t j=0; j<nydirty; ++j)
...@@ -422,17 +446,32 @@ template<typename T> class Params ...@@ -422,17 +446,32 @@ template<typename T> class Params
checkShape(dirty.shape(), {nxdirty, nydirty}); checkShape(dirty.shape(), {nxdirty, nydirty});
checkShape(grid.shape(), {nu, nv}); checkShape(grid.shape(), {nu, nv});
// FIXME: maybe we don't have to fill everything and can save some time // FIXME: maybe we don't have to fill everything and can save some time
grid.fill(0); // grid.fill(0);
execParallel(nthreads, [&](Scheduler &sched)
{
auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nu);
for (auto i=lo; i<hi; ++i)
{
size_t lo2=0, hi2=nv;
if ((i<nxdirty/2) || (i>=nu-nxdirty/2))
{ lo2=nydirty/2; hi2=nv-nydirty/2+1; }
for (auto j=lo2; j<hi2; ++j)
grid.v(i,j) = 0;
}
});
double x0 = -0.5*nxdirty*pixsize_x, double x0 = -0.5*nxdirty*pixsize_x,
y0 = -0.5*nydirty*pixsize_y; y0 = -0.5*nydirty*pixsize_y;
execStatic(nxdirty/2+1, nthreads, 0, [&](Scheduler &sched) execParallel(nthreads, [&](Scheduler &sched)
{ {
auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nxdirty/2+1);
using vtype = native_simd<T>; using vtype = native_simd<T>;
constexpr size_t vlen=vtype::size(); constexpr size_t vlen=vtype::size();
size_t nvec = (nydirty/2+1+(vlen-1))/vlen; size_t nvec = (nydirty/2+1+(vlen-1))/vlen;
vector<vtype> ph(nvec), sp(nvec), cp(nvec); vector<vtype> ph(nvec), sp(nvec), cp(nvec);
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i) for(auto i=lo; i<hi; ++i)
{ {
T fx = T(x0+i*pixsize_x); T fx = T(x0+i*pixsize_x);
fx *= fx; fx *= fx;
...@@ -511,784 +550,799 @@ template<typename T> class Params ...@@ -511,784 +550,799 @@ template<typename T> class Params
iv0 = min(int(v+vshift)-int(nv), maxiv0); iv0 = min(int(v+vshift)-int(nv), maxiv0);
} }
void report() void report()
{
if (verbosity==0) return;
cout << (gridding ? "Gridding" : "Degridding")
<< ": nthreads=" << nthreads << ", "
<< "dirty=(" << nxdirty << "x" << nydirty << "), "
<< "grid=(" << nu << "x" << nv;
if (nplanes>0) cout << "x" << nplanes;
cout << "), nvis=" << nvis
<< ", supp=" << supp
<< ", eps=" << (epsilon * ((nplanes==0) ? 2 : 3))
<< endl;
cout << " w=[" << wmin_d << "; " << wmax_d << "], min(n-1)=" << nm1min << ", dw=" << dw
<< ", wmax/dw=" << wmax_d/dw << ", nranges=" << ranges.size() << endl;
}
void scanData()
{
timers.push("Initial scan");
size_t nrow=bl.Nrows(),
nchan=bl.Nchannels();
bool have_wgt=wgt.size()!=0;
if (have_wgt) checkShape(wgt.shape(),{nrow,nchan});
bool have_ms=ms_in.size()!=0;
if (have_ms) checkShape(ms_in.shape(), {nrow,nchan});
bool have_mask=mask.size()!=0;
if (have_mask) checkShape(mask.shape(), {nrow,nchan});
active.resize(nrow*nchan, 0);
nvis=0;
wmin_d=1e300;
wmax_d=-1e300;
mutex mut;
execParallel(nthreads, [&](Scheduler &sched)
{
double lwmin_d=1e300, lwmax_d=-1e300;
size_t lnvis=0;
auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nrow);
for(auto irow=lo; irow<hi; ++irow)
for (size_t ichan=0, idx=irow*nchan; ichan<nchan; ++ichan, ++idx)
if (((!have_ms ) || (norm(ms_in(irow,ichan))!=0)) &&
((!have_wgt) || (wgt(irow,ichan)!=0)) &&
((!have_mask) || (mask(irow,ichan)!=0)))
{
++lnvis;
active[irow*nchan+ichan] = 1;
auto uvw = bl.effectiveCoord(irow,ichan);
double w = abs(uvw.w);
lwmin_d = min(lwmin_d, w);
lwmax_d = max(lwmax_d, w);
}
{
lock_guard<mutex> lock(mut);
wmin_d = min(wmin_d, lwmin_d);
wmax_d = max(wmax_d, lwmax_d);
nvis += lnvis;
}
});
timers.pop();
}
auto getNuNv()
{
timers.push("parameter calculation");
double x0 = -0.5*nxdirty*pixsize_x,
y0 = -0.5*nydirty*pixsize_y;
nm1min = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
if (x0*x0+y0*y0>1.)
nm1min = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
auto idx = getAvailableKernels<T>(epsilon);
double mincost = 1e300;
constexpr double nref_fft=2048;
constexpr double costref_fft=0.0693;
size_t minnu=0, minnv=0, minidx=KernelDB.size();
constexpr size_t vlen = native_simd<T>::size();
for (size_t i=0; i<idx.size(); ++i)
{
const auto &krn(KernelDB[idx[i]]);
auto supp = krn.W;
auto nvec = (supp+vlen-1)/vlen;
auto ofactor = krn.ofactor;
size_t nu=2*good_size_complex(size_t(nxdirty*ofactor*0.5)+1);
size_t nv=2*good_size_complex(size_t(nydirty*ofactor*0.5)+1);
double logterm = log(nu*nv)/log(nref_fft*nref_fft);
double fftcost = nu/nref_fft*nv/nref_fft*logterm*costref_fft;
double gridcost = 2.2e-10*nvis*(supp*nvec*vlen + ((2*nvec+1)*(supp+3)*vlen));
if (do_wgridding)
{ {
double dw = 0.5/ofactor/abs(nm1min); if (verbosity==0) return;
size_t nplanes = size_t((wmax_d-wmin_d)/dw+supp); cout << (gridding ? "Gridding" : "Degridding")
fftcost *= nplanes; << ": nthreads=" << nthreads << ", "
gridcost *= supp; << "dirty=(" << nxdirty << "x" << nydirty << "), "
<< "grid=(" << nu << "x" << nv;
if (nplanes>0) cout << "x" << nplanes;
cout << "), nvis=" << nvis
<< ", supp=" << supp
<< ", eps=" << (epsilon * ((nplanes==0) ? 2 : 3))
<< endl;
cout << " w=[" << wmin_d << "; " << wmax_d << "], min(n-1)=" << nm1min << ", dw=" << dw
<< ", wmax/dw=" << wmax_d/dw << ", nranges=" << ranges.size() << endl;
} }
double cost = fftcost+gridcost;
if (cost<mincost) void scanData()
{ {
mincost=cost; timers.push("Initial scan");
minnu=nu; size_t nrow=bl.Nrows(),
minnv=nv; nchan=bl.Nchannels();
minidx = idx[i]; bool have_wgt=wgt.size()!=0;
if (have_wgt) checkShape(wgt.shape(),{nrow,nchan});
bool have_ms=ms_in.size()!=0;
if (have_ms) checkShape(ms_in.shape(), {nrow,nchan});
bool have_mask=mask.size()!=0;
if (have_mask) checkShape(mask.shape(), {nrow,nchan});
active.resize(nrow*nchan, 0);
nvis=0;
wmin_d=1e300;
wmax_d=-1e300;
mutex mut;
execParallel(nthreads, [&](Scheduler &sched)
{
double lwmin_d=1e300, lwmax_d=-1e300;
size_t lnvis=0;
auto tid = sched.thread_num();
auto [lo, hi] = calcShare(nthreads, tid, nrow);
for(auto irow=lo; irow<hi; ++irow)
for (size_t ichan=0, idx=irow*nchan; ichan<nchan; ++ichan, ++idx)
if (((!have_ms ) || (norm(ms_in(irow,ichan))!=0)) &&
((!have_wgt) || (wgt(irow,ichan)!=0)) &&
((!have_mask) || (mask(irow,ichan)!=0)))
{
++lnvis;
active[irow*nchan+ichan] = 1;
auto uvw = bl.effectiveCoord(irow,ichan);
double w = abs(uvw.w);
lwmin_d = min(lwmin_d, w);
lwmax_d = max(lwmax_d, w);
}
{
lock_guard<mutex> lock(mut);
wmin_d = min(wmin_d, lwmin_d);
wmax_d = max(wmax_d, lwmax_d);
nvis += lnvis;
}
});
timers.pop();
} }
}
timers.pop();
nu = minnu;
nv = minnv;
return minidx;
}
void countRanges() auto getNuNv()
{
timers.push("range count");
size_t nrow=bl.Nrows(),
nchan=bl.Nchannels();
dw = 0.5/ofactor/abs(nm1min);
nplanes = size_t((wmax_d-wmin_d)/dw+supp);
wmin = (wmin_d+wmax_d)*0.5 - 0.5*(nplanes-1)*dw;
struct bufvec
{
VVR v;
uint64_t dummy[8];
};
auto Sorter = [](const visrange &a, const visrange &b) { return a.uvwidx()<b.uvwidx(); };
vector<bufvec> lranges(nthreads);
execParallel(nthreads, [&](Scheduler &sched)
{
auto tid = sched.thread_num();
auto &myranges(lranges[tid].v);
auto [lo, hi] = calcShare(nthreads, tid, nrow);
for(auto irow=lo; irow<hi; ++irow)
{ {
bool on=false; timers.push("parameter calculation");
int iulast, ivlast, plast; double x0 = -0.5*nxdirty*pixsize_x,
size_t chan0=0; y0 = -0.5*nydirty*pixsize_y;
for (size_t ichan=0; ichan<nchan; ++ichan) nm1min = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
if (x0*x0+y0*y0>1.)
nm1min = -sqrt(abs(1.-x0*x0-y0*y0))-1.;
auto idx = getAvailableKernels<T>(epsilon);
double mincost = 1e300;
constexpr double nref_fft=2048;
constexpr double costref_fft=0.0693;
size_t minnu=0, minnv=0, minidx=KernelDB.size();
constexpr size_t vlen = native_simd<T>::size();
for (size_t i=0; i<idx.size(); ++i)
{ {
if (active[irow*nchan+ichan]) const auto &krn(KernelDB[idx[i]]);
auto supp = krn.W;
auto nvec = (supp+vlen-1)/vlen;
auto ofactor = krn.ofactor;
size_t nu=2*good_size_complex(size_t(nxdirty*ofactor*0.5)+1);
size_t nv=2*good_size_complex(size_t(nydirty*ofactor*0.5)+1);
double logterm = log(nu*nv)/log(nref_fft*nref_fft);
double fftcost = nu/nref_fft*nv/nref_fft*logterm*costref_fft;
double gridcost = 2.2e-10*nvis*(supp*nvec*vlen + ((2*nvec+1)*(supp+3)*vlen));
if (do_wgridding)
{ {
auto uvw = bl.effectiveCoord(irow, ichan); double dw = 0.5/ofactor/abs(nm1min);
if (uvw.w<0) uvw.Flip(); size_t nplanes = size_t((wmax_d-wmin_d)/dw+supp);
double u, v; fftcost *= nplanes;
int iu0, iv0, iw; gridcost *= supp;
getpix(uvw.u, uvw.v, u, v, iu0, iv0);
iu0 = (iu0+nsafe)>>logsquare;
iv0 = (iv0+nsafe)>>logsquare;
iw = max(0,int(1+(abs(uvw.w)-(0.5*supp*dw)-wmin)/dw));
if (!on) // new active region
{
on=true;
iulast=iu0; ivlast=iv0; plast=iw; chan0=ichan;
}
else if ((iu0!=iulast) || (iv0!=ivlast) || (iw!=plast)) // change of active region
{
myranges.emplace_back(iulast, ivlast, plast, irow, chan0, ichan);
iulast=iu0; ivlast=iv0; plast=iw; chan0=ichan;
}
} }
else if (on) // end of active region double cost = fftcost+gridcost;
if (cost<mincost)
{ {
myranges.emplace_back(iulast, ivlast, plast, irow, chan0, ichan); mincost=cost;
on=false; minnu=nu;
minnv=nv;
minidx = idx[i];
} }
} }
if (on) // end of active region at last channel timers.pop();
myranges.emplace_back(iulast, ivlast, plast, irow, chan0, nchan); nu = minnu;
nv = minnv;
return minidx;
} }
sort(myranges.begin(), myranges.end(), Sorter);
});
// free mask memory void countRanges()
vector<uint8_t>().swap(active);
timers.poppush("range merging");
size_t nth = nthreads;
while (nth>1)
{
auto nmerge=nth/2;
execParallel(nmerge, [&](Scheduler &sched)
{ {
auto tid = sched.thread_num(); timers.push("range count");
auto tid_partner = nth-1-tid; size_t nrow=bl.Nrows(),
VVR tmp; nchan=bl.Nchannels();
tmp.reserve(lranges[tid].v.size() + lranges[tid_partner].v.size());
merge(lranges[tid].v.begin(), lranges[tid].v.end(),
lranges[tid_partner].v.begin(), lranges[tid_partner].v.end(),
back_inserter(tmp), Sorter);
lranges[tid].v.swap(tmp);
VVR().swap(lranges[tid_partner].v);
});
nth-=nmerge;
}
ranges.swap(lranges[0].v);
timers.pop();
}
void apply_global_corrections(mav<T,2> &dirty) dw = 0.5/ofactor/abs(nm1min);
{ nplanes = size_t((wmax_d-wmin_d)/dw+supp);
timers.push("global corrections"); wmin = (wmin_d+wmax_d)*0.5 - 0.5*(nplanes-1)*dw;
double x0 = -0.5*nxdirty*pixsize_x,
y0 = -0.5*nydirty*pixsize_y; struct bufvec
auto cfu = krn->corfunc(nxdirty/2+1, 1./nu, nthreads);
auto cfv = krn->corfunc(nydirty/2+1, 1./nv, nthreads);
execStatic(nxdirty/2+1, nthreads, 0, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo; i<rng.hi; ++i)
{
auto fx = T(x0+i*pixsize_x);
fx *= fx;
for (size_t j=0; j<=nydirty/2; ++j)
{ {
auto fy = T(y0+j*pixsize_y); VVR v;
fy*=fy; uint64_t dummy[8];
T fct = 0; };
auto tmp = 1-fx-fy; auto Sorter = [](const visrange &a, const visrange &b) { return a.uvwidx()<b.uvwidx(); };