Commit c351cbf2 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

start restructuring

parent 1ce04c30
#ifndef MRUTIL_GL_INTEGRATOR_H
#define MRUTIL_GL_INTEGRATOR_H
#include <cmath>
#include "mr_util/error_handling.h"
#include "mr_util/threading.h"
namespace mr {
namespace gl_integrator {
namespace detail {
using namespace std;
using namespace mr::threading;
class GL_Integrator
{
private:
int m;
vector<double> x, w;
static inline double one_minus_x2 (double x)
{ return (abs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
public:
GL_Integrator(int n, size_t nthreads=1)
{
MR_assert(n>=1, "number of points must be at least 1");
constexpr double pi = 3.141592653589793238462643383279502884197;
constexpr double eps = 3e-14;
m = (n+1)>>1;
x.resize(m);
w.resize(m);
const double t0 = 1 - (1-1./n) / (8.*n*n);
const double t1 = 1./(4.*n+2.);
execDynamic(m, nthreads, 1, [&](Scheduler &sched)
{
while (auto rng=sched.getNext()) for(auto i=rng.lo+1; i<rng.hi+1; ++i)
{
double x0 = cos(pi * ((i<<2)-1) * t1) * t0;
int dobreak=0;
int j=0;
double dpdx;
while(1)
{
double P_1 = 1.0;
double P0 = x0;
double dx, x1;
for (int k=2; k<=n; k++)
{
double P_2 = P_1;
P_1 = P0;
// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
}
dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);
/* Newton step */
x1 = x0 - P0/dpdx;
dx = x0-x1;
x0 = x1;
if (dobreak) break;
if (abs(dx)<=eps) dobreak=1;
MR_assert(++j<100, "convergence problem");
}
x[m-i] = x0;
w[m-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
}
});
if (n&1) x[0] = 0.; // set to exact zero
}
template<typename Func> double integrate(Func f)
{
double res=0, istart=0;
if (x[0]==0.)
{
res = f(x[0])*w[0];
istart=1;
}
for (size_t i=istart; i<x.size(); ++i)
res += (f(x[i])+f(-x[i]))*w[i];
return res;
}
template<typename Func> auto integrateSymmetric(Func f) -> decltype(f(0.))
{
using T = decltype(f(0.));
T res=f(x[0])*w[0];
if (x[0]==0.) res *= 0.5;
for (size_t i=1; i<x.size(); ++i)
res += f(x[i])*w[i];
return res*2;
}
};
}
using detail::GL_Integrator;
}}
#endif
#ifndef MRUTIL_MAV_H
#define MRUTIL_MAV_H
#include <cstdlib>
#include <array>
namespace mr {
namespace mav {
namespace detail {
using namespace std;
// "mav" stands for "multidimensional array view"
template<typename T, size_t ndim> class mav
{
static_assert((ndim>0) && (ndim<3), "only supports 1D and 2D arrays");
private:
T *d;
array<size_t, ndim> shp;
array<ptrdiff_t, ndim> str;
public:
mav(T *d_, const array<size_t,ndim> &shp_,
const array<ptrdiff_t,ndim> &str_)
: d(d_), shp(shp_), str(str_) {}
mav(T *d_, const array<size_t,ndim> &shp_)
: d(d_), shp(shp_)
{
str[ndim-1]=1;
for (size_t d=2; d<=ndim; ++d)
str[ndim-d] = str[ndim-d+1]*shp[ndim-d+1];
}
T &operator[](size_t i) const
{ return operator()(i); }
T &operator()(size_t i) const
{
static_assert(ndim==1, "ndim must be 1");
return d[str[0]*i];
}
T &operator()(size_t i, size_t j) const
{
static_assert(ndim==2, "ndim must be 2");
return d[str[0]*i + str[1]*j];
}
size_t shape(size_t i) const { return shp[i]; }
const array<size_t,ndim> &shape() const { return shp; }
size_t size() const
{
size_t res=1;
for (auto v: shp) res*=v;
return res;
}
ptrdiff_t stride(size_t i) const { return str[i]; }
T *data() const
{ return d; }
bool last_contiguous() const
{ return (str[ndim-1]==1) || (str[ndim-1]==0); }
void check_storage(const char *name) const
{
if (!last_contiguous())
cout << "Array '" << name << "': last dimension is not contiguous.\n"
"This may slow down computation significantly!\n";
}
bool contiguous() const
{
ptrdiff_t stride=1;
for (size_t i=0; i<ndim; ++i)
{
if (str[ndim-1-i]!=stride) return false;
stride *= shp[ndim-1-i];
}
return true;
}
void fill(const T &val) const
{
// FIXME: special cases for contiguous arrays and/or zeroing?
if (ndim==1)
for (size_t i=0; i<shp[0]; ++i)
d[str[0]*i]=val;
else if (ndim==2)
for (size_t i=0; i<shp[0]; ++i)
for (size_t j=0; j<shp[1]; ++j)
d[str[0]*i + str[1]*j] = val;
}
};
template<typename T, size_t ndim> using const_mav = mav<const T, ndim>;
template<typename T, size_t ndim> const_mav<T, ndim> cmav (const mav<T, ndim> &mav)
{ return const_mav<T, ndim>(mav.data(), mav.shape()); }
template<typename T, size_t ndim> const_mav<T, ndim> nullmav()
{
array<size_t,ndim> shp;
shp.fill(0);
return const_mav<T, ndim>(nullptr, shp);
}
}
using detail::mav;
using detail::const_mav;
using detail::cmav;
using detail::nullmav;
}}
#endif
#ifndef MRUTIL_THREADING_H
#define MRUTIL_THREADING_H
#include <cstdlib>
#include <mutex>
#include <condition_variable>
#include <thread>
#include <queue>
#include <atomic>
#include <functional>
#include <vector>
#ifdef MRUTIL_THREADING_PTHREADS
# include <pthread.h>
#endif
namespace mr {
namespace threading {
namespace detail {
using namespace std;
thread_local size_t thread_id = 0;
thread_local size_t num_threads = 1;
static const size_t max_threads = max(1u, thread::hardware_concurrency());
class latch
{
atomic<size_t> num_left_;
mutex mut_;
condition_variable completed_;
using lock_t = unique_lock<mutex>;
public:
latch(size_t n): num_left_(n) {}
void count_down()
{
lock_t lock(mut_);
if (--num_left_)
return;
completed_.notify_all();
}
void wait()
{
lock_t lock(mut_);
completed_.wait(lock, [this]{ return is_ready(); });
}
bool is_ready() { return num_left_ == 0; }
};
template <typename T> class concurrent_queue
{
queue<T> q_;
mutex mut_;
condition_variable item_added_;
bool shutdown_;
using lock_t = unique_lock<mutex>;
public:
concurrent_queue(): shutdown_(false) {}
void push(T val)
{
{
lock_t lock(mut_);
if (shutdown_)
throw runtime_error("Item added to queue after shutdown");
q_.push(move(val));
}
item_added_.notify_one();
}
bool pop(T & val)
{
lock_t lock(mut_);
item_added_.wait(lock, [this] { return (!q_.empty() || shutdown_); });
if (q_.empty())
return false; // We are shutting down
val = std::move(q_.front());
q_.pop();
return true;
}
void shutdown()
{
{
lock_t lock(mut_);
shutdown_ = true;
}
item_added_.notify_all();
}
void restart() { shutdown_ = false; }
};
class thread_pool
{
concurrent_queue<function<void()>> work_queue_;
vector<thread> threads_;
void worker_main()
{
function<void()> work;
while (work_queue_.pop(work))
work();
}
void create_threads()
{
size_t nthreads = threads_.size();
for (size_t i=0; i<nthreads; ++i)
{
try { threads_[i] = thread([this]{ worker_main(); }); }
catch (...)
{
shutdown();
throw;
}
}
}
public:
explicit thread_pool(size_t nthreads):
threads_(nthreads)
{ create_threads(); }
thread_pool(): thread_pool(max_threads) {}
~thread_pool() { shutdown(); }
void submit(function<void()> work)
{
work_queue_.push(move(work));
}
void shutdown()
{
work_queue_.shutdown();
for (auto &thread : threads_)
if (thread.joinable())
thread.join();
}
void restart()
{
work_queue_.restart();
create_threads();
}
};
thread_pool & get_pool()
{
static thread_pool pool;
#ifdef MR_THREADING_PTHREADS
static once_flag f;
call_once(f,
[]{
pthread_atfork(
+[]{ get_pool().shutdown(); }, // prepare
+[]{ get_pool().restart(); }, // parent
+[]{ get_pool().restart(); } // child
);
});
#endif
return pool;
}
/** Map a function f over nthreads */
template <typename Func>
void thread_map(size_t nthreads, Func f)
{
if (nthreads == 0)
nthreads = max_threads;
if (nthreads == 1)
{ f(); return; }
auto & pool = get_pool();
latch counter(nthreads);
exception_ptr ex;
mutex ex_mut;
for (size_t i=0; i<nthreads; ++i)
{
pool.submit(
[&f, &counter, &ex, &ex_mut, i, nthreads] {
thread_id = i;
num_threads = nthreads;
try { f(); }
catch (...)
{
lock_guard<mutex> lock(ex_mut);
ex = current_exception();
}
counter.count_down();
});
}
counter.wait();
if (ex)
rethrow_exception(ex);
}
class Scheduler
{
private:
size_t nthreads_;
mutex mut_;
size_t nwork_;
size_t cur_;
size_t chunksize_;
double fact_max_;
vector<size_t> nextstart;
typedef enum { SINGLE, STATIC, DYNAMIC } SchedMode;
SchedMode mode;
bool single_done;
struct Range
{
size_t lo, hi;
Range() : lo(0), hi(0) {}
Range(size_t lo_, size_t hi_) : lo(lo_), hi(hi_) {}
operator bool() const { return hi>lo; }
};
public:
size_t nthreads() const { return nthreads_; }
mutex &mut() { return mut_; }
template<typename Func> void execSingle(size_t nwork, Func f)
{
mode = SINGLE;
single_done = false;
nwork_ = nwork;
f(*this);
}
template<typename Func> void execStatic(size_t nwork,
size_t nthreads, size_t chunksize, Func f)
{
mode = STATIC;
nthreads_ = (nthreads==0) ? max_threads : nthreads;
nwork_ = nwork;
chunksize_ = (chunksize<1) ? (nwork_+nthreads_-1)/nthreads_
: chunksize;
if (chunksize_>=nwork_) return execSingle(nwork_, move(f));
nextstart.resize(nthreads_);
for (size_t i=0; i<nextstart.size(); ++i)
nextstart[i] = i*chunksize_;
thread_map(nthreads_, [&]() {f(*this);});
}
template<typename Func> void execDynamic(size_t nwork,
size_t nthreads, size_t chunksize_min, double fact_max, Func f)
{
mode = DYNAMIC;
nthreads_ = (nthreads==0) ? max_threads : nthreads;
nwork_ = nwork;
chunksize_ = (chunksize_min<1) ? 1 : chunksize_min;
if (chunksize_*nthreads_>=nwork_)
return execStatic(nwork, nthreads, 0, move(f));
fact_max_ = fact_max;
cur_ = 0;
thread_map(nthreads_, [&]() {f(*this);});
}
Range getNext()
{
switch (mode)
{
case SINGLE:
{
if (single_done) return Range();
single_done=true;
return Range(0, nwork_);
}
case STATIC:
{
if (nextstart[thread_id]>=nwork_) return Range();
size_t lo=nextstart[thread_id];
size_t hi=min(lo+chunksize_,nwork_);
nextstart[thread_id] += nthreads_*chunksize_;
return Range(lo, hi);
}
case DYNAMIC:
{
unique_lock<mutex> lck(mut_);
if (cur_>=nwork_) return Range();
auto rem = nwork_-cur_;
size_t tmp = size_t((fact_max_*rem)/nthreads_);
auto sz = min(rem, max(chunksize_, tmp));
size_t lo=cur_;
cur_+=sz;
size_t hi=cur_;
return Range(lo, hi);
}
}
return Range();
}
};
template<typename Func> void execSingle(size_t nwork, Func f)
{
Scheduler sched;
sched.execSingle(nwork, move(f));
}
template<typename Func> void execStatic(size_t nwork,
size_t nthreads, size_t chunksize, Func f)
{
Scheduler sched;
sched.execStatic(nwork, nthreads, chunksize, move(f));
}
template<typename Func> void execDynamic(size_t nwork,
size_t nthreads, size_t chunksize_min, Func f)
{
Scheduler sched;
sched.execDynamic(nwork, nthreads, chunksize_min, 0., move(f));
}
template<typename Func> void execGuided(size_t nwork,
size_t nthreads, size_t chunksize_min, double fact_max, Func f)
{
Scheduler sched;
sched.execDynamic(nwork, nthreads, chunksize_min, fact_max, move(f));
}
} // end of namespace detail
using detail::Scheduler;
using detail::execSingle;
using detail::execStatic;
using detail::execDynamic;
using detail::execGuided;
} // end of namespace threading
} // end of namespace mr
#endif
#ifndef MRUTIL_VECSUPPORT_H
#define MRUTIL_VECSUPPORT_H
#include <cstdlib>
#include <cmath>
namespace mr {
namespace vecsupport {
namespace detail {
using namespace std;
#if (defined(__AVX512F__))
constexpr size_t vbytes = 64;
#elif (defined(__AVX__))
constexpr size_t vbytes = 32;
#elif (defined(__SSE2__))
constexpr size_t vbytes = 16;
#elif (defined(__VSX__))
constexpr size_t vbytes = 16;
#endif
template<typename T, size_t len=vbytes/sizeof(T)> class vtp
{
protected:
using Tv __attribute__ ((vector_size (len*sizeof(T)))) = T;
static_assert((len>0) && ((len&(len-1))==0), "bad vector length");
Tv v;
void from_scalar(T other)
{ for (size_t i=0; i<len; ++i) v[i]=other; }
public:
static constexpr size_t vlen=len;
vtp () {}
vtp(T other)
{ from_scalar(other); }
vtp(const Tv &other)
: v(other) {}
vtp(const vtp &other) = default;
vtp &operator=(T other)
{ from_scalar(other); return *this; }
vtp operator-() const { return vtp(-v); }
vtp operator+(vtp other) const { return vtp(v+other.v); }
vtp operator-(vtp other) const { return vtp(v-other.v); }
vtp operator*(vtp other) const { return vtp(v*other.v); }
vtp operator/(vtp other) const { return