Commit f113decb authored by Martin Reinecke's avatar Martin Reinecke
Browse files

add timing report

parent bacdff6d
Pipeline #80556 passed with stages
in 18 minutes and 14 seconds
......@@ -227,6 +227,7 @@ template<typename T> class GridderConfig
// FIXME: this should probably be done more cleanly
public:
TimerHierarchy &timers;
shared_ptr<HornerKernel<T>> krn;
protected:
......@@ -252,10 +253,11 @@ template<typename T> class GridderConfig
public:
GridderConfig(size_t nxdirty, size_t nydirty, size_t nu_, size_t nv_,
size_t kidx, double epsilon_, double pixsize_x, double pixsize_y,
const Baselines &baselines, size_t nthreads_)
const Baselines &baselines, size_t nthreads_, TimerHierarchy &timers_)
: nx_dirty(nxdirty), ny_dirty(nydirty), nu(nu_), nv(nv_),
epsilon(epsilon_),
ofactor(min(double(nu)/nxdirty, double(nv)/nydirty)),
timers(timers_),
krn(selectKernel<T>(ofactor, epsilon,kidx)),
psx(pixsize_x), psy(pixsize_y),
supp(krn->support()), nsafe((supp+1)/2),
......@@ -367,19 +369,22 @@ template<typename T> class GridderConfig
});
}
void grid2dirty(const mav<T,2> &grid,
mav<T,2> &dirty) const
void grid2dirty(const mav<T,2> &grid, mav<T,2> &dirty) const
{
timers.push("FFT");
checkShape(grid.shape(), {nu,nv});
mav<T,2> tmav({nu,nv});
tmav.apply(grid, [](T&a, T b) {a=b;});
hartley2_2D<T>(tmav, vlim, uv_side_fast, nthreads);
timers.poppush("grid correction");
grid2dirty_post(tmav, dirty);
timers.pop();
}
void grid2dirty_c_overwrite_wscreen_add
(mav<complex<T>,2> &grid, mav<T,2> &dirty, T w) const
{
timers.push("FFT");
checkShape(grid.shape(), {nu,nv});
fmav<complex<T>> inout(grid);
if (2*vlim<nv)
......@@ -395,7 +400,9 @@ template<typename T> class GridderConfig
}
else
c2c(inout, inout, {0,1}, BACKWARD, T(1), nthreads);
timers.poppush("wscreen+grid correction");
grid2dirty_post2(grid, dirty, w);
timers.pop();
}
void dirty2grid_pre(const mav<T,2> &dirty,
......@@ -482,14 +489,19 @@ template<typename T> class GridderConfig
void dirty2grid(const mav<T,2> &dirty,
mav<T,2> &grid) const
{
timers.push("grid correction");
dirty2grid_pre(dirty, grid);
timers.poppush("FFT");
hartley2_2D<T>(grid, vlim, !uv_side_fast, nthreads);
timers.pop();
}
void dirty2grid_c_wscreen(const mav<T,2> &dirty,
mav<complex<T>,2> &grid, T w) const
{
timers.push("wscreen+grid correction");
dirty2grid_pre2(dirty, grid, w);
timers.poppush("FFT");
fmav<complex<T>> inout(grid);
if (2*vlim<nv)
{
......@@ -504,6 +516,7 @@ template<typename T> class GridderConfig
}
else
c2c(inout, inout, {0,1}, FORWARD, T(1), nthreads);
timers.pop();
}
[[gnu::always_inline]] void getpix(double u_in, double v_in, double &u, double &v, int &iu0, int &iv0) const
......@@ -812,6 +825,7 @@ template<bool wgrid, typename T, typename Serv> void x2grid_c
(const GridderConfig<T> &gconf, Serv &srv, mav<complex<T>,2> &grid,
double w0=-1, double dw=-1)
{
gconf.timers.push("gridding proper");
checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
switch(gconf.Supp())
......@@ -831,6 +845,7 @@ template<bool wgrid, typename T, typename Serv> void x2grid_c
case 16: x2grid_c_helper<16, wgrid>(gconf, srv, grid, w0, dw); break;
default: MR_fail("must not happen");
}
gconf.timers.pop();
}
template<size_t SUPP, bool wgrid, typename T, typename Serv> [[gnu::hot]] void grid2x_c_helper
......@@ -882,6 +897,7 @@ template<bool wgrid, typename T, typename Serv> void grid2x_c
(const GridderConfig<T> &gconf, const mav<complex<T>,2> &grid,
Serv &srv, double w0=-1, double dw=-1)
{
gconf.timers.push("degridding proper");
checkShape(grid.shape(), {gconf.Nu(), gconf.Nv()});
switch(gconf.Supp())
......@@ -901,11 +917,13 @@ template<bool wgrid, typename T, typename Serv> void grid2x_c
case 16: grid2x_c_helper<16, wgrid>(gconf, grid, srv, w0, dw); break;
default: MR_fail("must not happen");
}
gconf.timers.pop();
}
template<typename T> void apply_global_corrections(const GridderConfig<T> &gconf,
mav<T,2> &dirty, double dw, bool divide_by_n)
{
gconf.timers.push("global corrections");
auto nx_dirty=gconf.Nxdirty();
auto ny_dirty=gconf.Nydirty();
size_t nthreads = gconf.Nthreads();
......@@ -958,11 +976,13 @@ template<typename T> void apply_global_corrections(const GridderConfig<T> &gconf
}
}
});
gconf.timers.pop();
}
template<typename T, typename Serv> class WgridHelper
{
private:
GridderConfig<T> &gconf;
Serv &srv;
double wmin, dw;
size_t nplanes, supp, nthreads;
......@@ -972,9 +992,10 @@ template<typename T, typename Serv> class WgridHelper
int curplane;
vector<idx_t> subidx;
template<typename T2> static void update_idx(vector<T2> &v, const vector<T2> &add,
template<typename T2> void update_idx(vector<T2> &v, const vector<T2> &add,
const vector<T2> &del, size_t nthreads)
{
gconf.timers.push("update_idx");
MR_assert(v.size()>=del.size(), "must not happen");
vector<T2> res;
res.reserve((v.size()+add.size())-del.size());
......@@ -1033,13 +1054,15 @@ template<typename T, typename Serv> class WgridHelper
#endif
MR_assert(res.size()==(v.size()+add.size())-del.size(), "must not happen");
v.swap(res);
gconf.timers.pop();
}
public:
WgridHelper(const GridderConfig<T> &gconf, Serv &srv_, double wmin_, double wmax, size_t verbosity_)
: srv(srv_), wmin(wmin_), supp(gconf.Supp()), nthreads(gconf.Nthreads()),
WgridHelper(GridderConfig<T> &gconf_, Serv &srv_, double wmin_, double wmax, size_t verbosity_)
: gconf(gconf_), srv(srv_), wmin(wmin_), supp(gconf.Supp()), nthreads(gconf.Nthreads()),
verbosity(verbosity_), curplane(-1)
{
gconf.timers.push("computing minplane");
size_t nvis = srv.Nvis();
double x0 = -0.5*gconf.Nxdirty()*gconf.Pixsize_x(),
y0 = -0.5*gconf.Nydirty()*gconf.Pixsize_y();
......@@ -1092,6 +1115,7 @@ template<typename T, typename Serv> class WgridHelper
minplane[p0[i]][cnt.v(tid,p0[i])++]=idx_t(i);
});
#endif
gconf.timers.pop();
}
typename Serv::Tsub getSubserv() const
......@@ -1138,7 +1162,7 @@ template<typename T> void report(const GridderConfig<T> &gconf, size_t nvis,
}
template<typename T, typename Serv> void x2dirty(
const GridderConfig<T> &gconf, Serv &srv, mav<T,2> &dirty,
GridderConfig<T> &gconf, Serv &srv, mav<T,2> &dirty,
bool do_wstacking, double wmin, double wmax, size_t verbosity)
{
if (do_wstacking)
......@@ -1171,7 +1195,7 @@ template<typename T, typename Serv> void x2dirty(
}
template<typename T, typename Serv> void dirty2x(
const GridderConfig<T> &gconf, const mav<T,2> &dirty,
GridderConfig<T> &gconf, const mav<T,2> &dirty,
Serv &srv, bool do_wstacking, double wmin, double wmax, size_t verbosity)
{
if (do_wstacking)
......@@ -1208,8 +1232,9 @@ template<typename T, typename Serv> void dirty2x(
template<typename T> auto getNuNv(double epsilon,
bool do_wstacking, double wmin, double wmax, size_t nvis,
size_t nxdirty, size_t nydirty, double pixsize_x, double pixsize_y)
size_t nxdirty, size_t nydirty, double pixsize_x, double pixsize_y, TimerHierarchy &timers)
{
timers.push("parameter calculation");
double x0 = -0.5*nxdirty*pixsize_x,
y0 = -0.5*nydirty*pixsize_y;
double nmin = sqrt(max(1.-x0*x0-y0*y0,0.))-1.;
......@@ -1248,12 +1273,14 @@ template<typename T> auto getNuNv(double epsilon,
minidx = idx[i];
}
}
timers.pop();
return make_tuple(minnu, minnv, minidx);
}
template<typename T> vector<idx_t> getIndices(const Baselines &baselines,
const GridderConfig<T> &gconf, const mav<uint8_t,2> &mask)
{
gconf.timers.push("Index generation");
size_t nrow=baselines.Nrows(),
nchan=baselines.Nchannels(),
nsafe=gconf.Nsafe(),
......@@ -1307,12 +1334,14 @@ template<typename T> vector<idx_t> getIndices(const Baselines &baselines,
if (tmp[idx]!=(~idx_t(0)))
res[acc.v(tid, tmp[idx])++] = baselines.getIdx(irow, ichan);
});
gconf.timers.pop();
return res;
}
template<typename T> auto scanData(const Baselines &baselines, const mav<complex<T>,2> &ms,
const mav<T, 2> &wgt, const mav<uint8_t, 2> &mask, size_t nthreads)
const mav<T, 2> &wgt, const mav<uint8_t, 2> &mask, size_t nthreads, TimerHierarchy &timers)
{
timers.push("Initial scan");
size_t nrow=baselines.Nrows(),
nchan=baselines.Nchannels();
bool have_wgt=wgt.size()!=0;
......@@ -1352,6 +1381,7 @@ template<typename T> auto scanData(const Baselines &baselines, const mav<complex
nvis += lnvis;
}
});
timers.pop();
return make_tuple(wmin, wmax, nvis, mask_out);
}
......@@ -1361,28 +1391,28 @@ template<typename T> void ms2dirty(const mav<double,2> &uvw,
bool do_wstacking, size_t nthreads, mav<T,2> &dirty, size_t verbosity,
bool negate_v=false)
{
SimpleTimer timer;
TimerHierarchy timers("gridding");
Baselines baselines(uvw, freq, negate_v);
// adjust for increased error when gridding in 2 or 3 dimensions
epsilon /= do_wstacking ? 3 : 2;
auto [wmin, wmax, nvis, mask_out] = scanData(baselines, ms, wgt, mask, nthreads);
auto [wmin, wmax, nvis, mask_out] = scanData(baselines, ms, wgt, mask, nthreads, timers);
if (nvis==0)
{ dirty.fill(0); return; }
size_t kidx = KernelDB.size();
if (nu*nv==0)
{
auto [nu2, nv2, kidx2] = getNuNv<T>(epsilon, do_wstacking, wmin, wmax, nvis, dirty.shape(0), dirty.shape(1), pixsize_x, pixsize_y);
auto [nu2, nv2, kidx2] = getNuNv<T>(epsilon, do_wstacking, wmin, wmax, nvis, dirty.shape(0), dirty.shape(1), pixsize_x, pixsize_y, timers);
nu = nu2;
nv = nv2;
kidx = kidx2;
}
GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, kidx, epsilon, pixsize_x, pixsize_y, baselines, nthreads);
GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, kidx, epsilon, pixsize_x, pixsize_y, baselines, nthreads, timers);
auto idx = getIndices(baselines, gconf, mask_out);
auto idx2 = mav<idx_t,1>(idx.data(),{idx.size()});
auto serv = makeMsServ(baselines,idx2,ms,wgt);
x2dirty(gconf, serv, dirty, do_wstacking, wmin, wmax, verbosity);
if (verbosity>0)
cout << "Wall clock time for gridding: " << timer() << "s" << endl;
timers.report(cout);
}
template<typename T> void dirty2ms(const mav<double,2> &uvw,
......@@ -1391,30 +1421,30 @@ template<typename T> void dirty2ms(const mav<double,2> &uvw,
double epsilon, bool do_wstacking, size_t nthreads, mav<complex<T>,2> &ms,
size_t verbosity, bool negate_v=false)
{
SimpleTimer timer;
TimerHierarchy timers("degridding");
Baselines baselines(uvw, freq, negate_v);
// adjust for increased error when gridding in 2 or 3 dimensions
epsilon /= do_wstacking ? 3 : 2;
mav<complex<T>,2> null_ms(nullptr, {0,0}, false);
ms.fill(0);
auto [wmin, wmax, nvis, mask_out] = scanData(baselines, null_ms, wgt, mask, nthreads);
auto [wmin, wmax, nvis, mask_out] = scanData(baselines, null_ms, wgt, mask, nthreads, timers);
if (nvis==0)
return;
size_t kidx = KernelDB.size();
if (nu*nv==0)
{
auto [nu2, nv2, kidx2] = getNuNv<T>(epsilon, do_wstacking, wmin, wmax, nvis, dirty.shape(0), dirty.shape(1), pixsize_x, pixsize_y);
auto [nu2, nv2, kidx2] = getNuNv<T>(epsilon, do_wstacking, wmin, wmax, nvis, dirty.shape(0), dirty.shape(1), pixsize_x, pixsize_y, timers);
nu = nu2;
nv = nv2;
kidx = kidx2;
}
GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, kidx, epsilon, pixsize_x, pixsize_y, baselines, nthreads);
GridderConfig<T> gconf(dirty.shape(0), dirty.shape(1), nu, nv, kidx, epsilon, pixsize_x, pixsize_y, baselines, nthreads, timers);
auto idx = getIndices(baselines, gconf, mask_out);
auto idx2 = mav<idx_t,1>(idx.data(),{idx.size()});
auto serv = makeMsServ(baselines,idx2,ms,wgt);
dirty2x(gconf, dirty, serv, do_wstacking, wmin, wmax, verbosity);
if (verbosity>0)
cout << "Wall clock time for degridding: " << timer() << "s" << endl;
timers.report(cout);
}
} // namespace detail_gridder
......
......@@ -24,16 +24,22 @@
#include <chrono>
#include <string>
#include <iostream>
#include <iomanip>
#include <map>
#include "ducc0/infra/error_handling.h"
namespace ducc0 {
namespace detail_timers {
using namespace std;
class SimpleTimer
{
private:
using clock = std::chrono::steady_clock;
using clock = chrono::steady_clock;
clock::time_point starttime;
public:
......@@ -42,27 +48,105 @@ class SimpleTimer
void reset()
{ starttime = clock::now(); }
double operator()() const
{
return std::chrono::duration<double>(clock::now() - starttime).count();
}
{ return chrono::duration<double>(clock::now() - starttime).count(); }
};
class TimerHierarchy
{
private:
using clock = std::chrono::steady_clock;
using clock = chrono::steady_clock;
class tstack_node
{
private:
using maptype = map<string,tstack_node>;
using Tipair = pair<maptype::const_iterator,double>;
public:
tstack_node *parent;
string name;
double accTime;
std::map<std::string,tstack_node> child;
maptype child;
tstack_node(tstack_node *parent_)
: parent(parent_), accTime(0.) {}
private:
double full_acc() const
{
double t_own = accTime;
for (const auto &nd: child)
t_own += nd.second.full_acc();
return t_own;
}
double add_timings(const std::string &prefix,
std::map<std::string, double> &res) const
size_t max_namelen() const
{
auto res=name.length();
for (const auto &ch: child)
res=max(res,ch.second.max_namelen());
return res;
}
static void floatformat(double val, size_t pre, size_t post, ostream &os)
{
size_t fct=1;
for (size_t i=0; i<post; ++i, fct*=10);
os << setw(pre) << int(val) << "." << setw(post) << setfill('0')
<< int((val-int(val))*fct+0.5) << setfill(' ');
}
static void printline(const string &indent, int twidth, int slen,
const string &name, double val, double total,
ostream &os)
{
os << indent << "+- " << name << setw(slen+1-name.length()) << ":";
floatformat(100*val/total, 3, 2, os);
os << "% (";
floatformat(val, twidth-5, 4, os);
os << "s)\n";
}
void report(const string &indent, int twidth, int slen, ostream &os) const
{
double total=full_acc();
vector<Tipair> tmp;
for (auto it=child.cbegin(); it!=child.cend(); ++it)
tmp.push_back(make_pair(it, it->second.full_acc()));
if (tmp.size()>0)
{
sort(tmp.begin(),tmp.end(),
[](const Tipair &a, const Tipair &b){ return a.second>b.second; });
double tsum=0;
os << indent << "|\n";
for (unsigned i=0; i<tmp.size(); ++i)
{
printline(indent, twidth, slen, tmp[i].first->first, tmp[i].second, total, os);
(tmp[i].first->second).report(indent+"| ",twidth,slen,os);
tsum+=tmp[i].second;
}
printline(indent, twidth, slen, "<unaccounted>", total-tsum, total, os);
if (indent!="") os << indent << "\n";
}
}
public:
tstack_node(const string &name_, tstack_node *parent_=nullptr)
: parent(parent_), name(name_), accTime(0.) {}
void report(ostream &os) const
{
auto slen=string("<unaccounted>").size();
slen = max(slen, max_namelen());
double total=full_acc();
os << "\nTotal wall clock time for " << name << ": " << setprecision(4) << total << "s\n";
// printf("\nTotal wall clock time for '%s': %1.4fs\n",name.c_str(),total);
int logtime=max(1,int(log10(total)+1));
report("",logtime+5,slen, os);
}
void addTime(double dt)
{ accTime += dt; }
double add_timings(const string &prefix,
map<string, double> &res) const
{
double t_own = accTime;
for (const auto &nd: child)
......@@ -79,26 +163,25 @@ class TimerHierarchy
void adjust_time()
{
auto tnow = clock::now();
curnode->accTime +=
std::chrono::duration <double>(tnow - last_time).count();
curnode->addTime(chrono::duration<double>(tnow - last_time).count());
last_time = tnow;
}
void push_internal(const std::string &name)
void push_internal(const string &name)
{
auto it=curnode->child.find(name);
if (it==curnode->child.end())
{
MR_assert(name.find(':') == std::string::npos, "reserved character");
it = curnode->child.insert(make_pair(name,tstack_node(curnode))).first;
MR_assert(name.find(':') == string::npos, "reserved character");
it = curnode->child.insert(make_pair(name,tstack_node(name, curnode))).first;
}
curnode=&(it->second);
}
public:
TimerHierarchy()
: last_time(clock::now()), root(nullptr), curnode(&root) {}
void push(const std::string &name)
TimerHierarchy(const string &name="<root>")
: last_time(clock::now()), root(name, nullptr), curnode(&root) {}
void push(const string &name)
{
adjust_time();
push_internal(name);
......@@ -109,20 +192,27 @@ class TimerHierarchy
curnode = curnode->parent;
MR_assert(curnode!=nullptr, "tried to pop from empty timer stack");
}
void poppush(const std::string &name)
void poppush(const string &name)
{
pop();
push_internal(name);
}
std::map<std::string, double> get_timings()
map<string, double> get_timings()
{
adjust_time();
std::map<std::string, double> res;
map<string, double> res;
root.add_timings("root", res);
return res;
}
void report(ostream &os) const
{ root.report(os); }
};
}
using detail_timers::SimpleTimer;
using detail_timers::TimerHierarchy;
}
#endif
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment