Commit d1523bc1 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

import

parents
# never store the git version file
git_version.py
# custom
setup.cfg
.idea
.DS_Store
*.pyc
*.html
.document
.svn/
*.csv
.pytest_cache/
*.png
# from https://github.com/github/gitignore/blob/master/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.c
*.o
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/build/
docs/source/mod
# PyBuilder
target/
# IPython Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# dotenv
.env
# virtualenv
venv/
ENV/
# Spyder project settings
.spyderproject
# Rope project settings
.ropeproject
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <iostream>
#include <algorithm>
using namespace std;
namespace py = pybind11;
namespace {
constexpr double pi = 3.141592653589793238462643383279502884197;
static const uint16_t utab[] = {
#define Z(a) 0x##a##0, 0x##a##1, 0x##a##4, 0x##a##5
#define Y(a) Z(a##0), Z(a##1), Z(a##4), Z(a##5)
#define X(a) Y(a##0), Y(a##1), Y(a##4), Y(a##5)
X(0),X(1),X(4),X(5)
#undef X
#undef Y
#undef Z
};
uint32_t coord2morton2D_32 (uint32_t x, uint32_t y)
{
typedef uint32_t I;
return (I)(utab[x&0xff]) | ((I)(utab[(x>>8)&0xff])<<16)
| ((I)(utab[y&0xff])<<1) | ((I)(utab[(y>>8)&0xff])<<17);
}
static const uint8_t m2p2D_1[4][4] = {
{ 4, 1, 11, 2},{0,15, 5, 6},{10,9,3,12},{14,7,13,8}};
static uint8_t m2p2D_3[4][64];
static const uint8_t p2m2D_1[4][4] = {
{ 4, 1, 3, 10},{0,6,7,13},{15,9,8,2},{11,14,12,5}};
static uint8_t p2m2D_3[4][64];
static int peano2d_done=0;
static void init_peano2d (void)
{
peano2d_done=1;
for (int d=0; d<4; ++d)
for (uint32_t p=0; p<64; ++p)
{
unsigned rot = d;
uint32_t v = p<<26;
uint32_t res = 0;
for (int i=0; i<3; ++i)
{
unsigned tab=m2p2D_1[rot][v>>30];
v<<=2;
res = (res<<2) | (tab&0x3);
rot = tab>>2;
}
m2p2D_3[d][p]=res|(rot<<6);
}
for (int d=0; d<4; ++d)
for (uint32_t p=0; p<64; ++p)
{
unsigned rot = d;
uint32_t v = p<<26;
uint32_t res = 0;
for (int i=0; i<3; ++i)
{
unsigned tab=p2m2D_1[rot][v>>30];
v<<=2;
res = (res<<2) | (tab&0x3);
rot = tab>>2;
}
p2m2D_3[d][p]=res|(rot<<6);
}
}
uint32_t morton2peano2D_32(uint32_t v, int bits)
{
if (!peano2d_done) init_peano2d();
unsigned rot = 0;
uint32_t res = 0;
v<<=32-(bits<<1);
int i=0;
for (; i<bits-2; i+=3)
{
unsigned tab=m2p2D_3[rot][v>>26];
v<<=6;
res = (res<<6) | (tab&0x3fu);
rot = tab>>6;
}
for (; i<bits; ++i)
{
unsigned tab=m2p2D_1[rot][v>>30];
v<<=2;
res = (res<<2) | (tab&0x3);
rot = tab>>2;
}
return res;
}
void myassert(bool cond, const char *msg)
{
if (cond) return;
cerr << msg << endl;
throw 42;
}
template<typename It, typename Comp> class IdxComp__
{
private:
It begin;
Comp comp;
public:
IdxComp__ (It begin_, Comp comp_): begin(begin_), comp(comp_) {}
bool operator() (std::size_t a, std::size_t b) const
{ return comp(*(begin+a),*(begin+b)); }
};
/*! Performs an indirect sort on the supplied iterator range and returns in
\a idx a \a vector containing the indices of the smallest, second smallest,
third smallest, etc. element, according to \a comp. */
template<typename It, typename T2, typename Comp>
inline void buildIndex (It begin, It end, std::vector<T2> &idx, Comp comp)
{
using namespace std;
T2 num=end-begin;
idx.resize(num);
for (T2 i=0; i<num; ++i) idx[i] = i;
sort (idx.begin(),idx.end(),IdxComp__<It,Comp>(begin,comp));
}
/*! Performs an indirect sort on the supplied iterator range and returns in
\a idx a \a vector containing the indices of the smallest, second smallest,
third smallest, etc. element. */
template<typename It, typename T2> inline void buildIndex (It begin, It end,
std::vector<T2> &idx)
{
using namespace std;
typedef typename iterator_traits<It>::value_type T;
buildIndex(begin,end,idx,less<T>());
}
using a_i_c = py::array_t<int, py::array::c_style | py::array::forcecast>;
using a_d_c = py::array_t<double, py::array::c_style | py::array::forcecast>;
using a_c_c = py::array_t<complex<double>,
py::array::c_style | py::array::forcecast>;
a_i_c peanoindex(const a_d_c &uv_, int nu, int nv)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
int nvis = uv_.shape(0);
auto uv = uv_.data();
int npmax = max(nu, nv);
int nbits = 0;
for (int istart = npmax-1; istart!=0; istart>>=1, ++nbits);
vector<int> ipeano(nvis);
for (int i=0; i<nvis; ++i)
{
auto u = uv[2*i]*nu; if (u<0) u+=nu;
auto iu = min(nu-1, int(u));
auto v = uv[2*i+1]*nv; if (v<0) v+=nv;
auto iv = min(nv-1, int(v));
ipeano[i] = morton2peano2D_32(coord2morton2D_32(iu,iv),nbits);
}
vector<int> newind;
buildIndex(ipeano.begin(), ipeano.end(), newind);
int odim[] = {nvis};
a_i_c res(odim);
auto iout = res.mutable_data();
for (int i=0; i<nvis; ++i)
iout[i] = newind[i];
return res;
}
class Helper
{
private:
int nu, nv, nspread, nbuf;
double r2lamb, fac;
vector<double> kernel;
public:
vector<double> ku, kv;
int iu, iv, idxu0, idxv0;
complex<double> val;
Helper(int nu_, int nv_, int nspread_, double r2lamb_)
: nu(nu_), nv(nv_), nspread(nspread_), nbuf(2*nspread_), r2lamb(r2lamb_),
fac(pi/r2lamb), kernel(nspread+1), ku(nbuf), kv(nbuf)
{
// Precompute gridding kernel
for (size_t i=0; i<kernel.size(); ++i)
kernel[i] = exp(-pi/r2lamb*i*i);
}
void update(double u_in, double v_in, complex<double> vis)
{
auto u = u_in*nu; if (u<0) u+=nu;
iu = min(nu-1, int(u));
auto du = u-iu;
idxu0 = (iu-nspread+1+nu)%nu;
auto v = v_in*nv; if (v<0) v+=nv;
iv = min(nv-1, int(v));
auto dv = v-iv;
idxv0 = (iv-nspread+1+nv)%nv;
val = vis*exp(-fac*(du*du + dv*dv));
auto fu0 = exp(2*fac*du);
auto fv0 = exp(2*fac*dv);
auto fu = 1.;
auto fv = 1.;
for (int i=0; i<nspread; ++i)
{
ku[nspread-i-1] = kernel[i]/fu;
kv[nspread-i-1] = kernel[i]/fv;
fu *= fu0;
fv *= fv0;
ku[nspread+i] = kernel[i+1]*fu;
kv[nspread+i] = kernel[i+1]*fv;
}
}
};
class Buffer
{
protected:
int nu, nv, nspread, su;
public:
int sv;
protected:
int u0, v0;
bool need_to_move(int iu, int iv) const
{
return (abs(iu-u0)>su-nspread) || (abs(iv-v0)>sv-nspread);
}
void update_position(int iu, int iv)
{
int safe_u = su-nspread, safe_v = sv-nspread;
u0=max(safe_u, min(nu-1-safe_u, iu));
v0=max(safe_v, min(nv-1-safe_v, iv));
}
public:
Buffer(int nu_, int nv_, int nspread_)
: nu(nu_), nv(nv_), nspread(nspread_),
su(nspread+min(nspread, nu)), sv(nspread+min(nspread, nv)),
u0(-1000000), v0(-1000000)
{}
};
class WriteBuffer: public Buffer
{
private:
vector<complex<double>> data;
complex<double> *grid;
void dump()
{
if (u0<0) return;
#pragma omp critical
{
int idxu = (u0-su+1+nu)%nu;
int idxv0 = (v0-sv+1+nv)%nv;
for (int iu=0; iu<2*su; ++iu)
{
int idxv = idxv0;
for (int iv=0; iv<2*sv; ++iv)
{
grid[idxu*nv + idxv] += data[iu*2*sv + iv];
if (++idxv>=nv) idxv=0;
}
if (++idxu>=nu) idxu=0;
}
}
}
public:
complex<double> *p0;
WriteBuffer(int nu_, int nv_, int nspread_, complex<double> *grid_)
: Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {}
~WriteBuffer() { dump(); }
void prep_write(int iu, int iv)
/* iu = [0; nu-1]; iv = [0; nv-1] */
{
if (need_to_move(iu, iv))
{
dump();
update_position(iu, iv);
fill(data.begin(), data.end(), 0.);
}
p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread;
}
};
class ReadBuffer: public Buffer
{
private:
vector<complex<double>> data;
const complex<double> *grid;
void load()
{
int idxu = (u0-su+1+nu)%nu;
int idxv0 = (v0-sv+1+nv)%nv;
for (int iu=0; iu<2*su; ++iu)
{
int idxv = idxv0;
for (int iv=0; iv<2*sv; ++iv)
{
data[iu*2*sv + iv] = grid[idxu*nv + idxv];
if (++idxv>=nv) idxv=0;
}
if (++idxu>=nu) idxu=0;
}
}
public:
const complex<double> *p0;
ReadBuffer(int nu_, int nv_, int nspread_, const complex<double> *grid_)
: Buffer(nu_, nv_, nspread_), data(4*su*sv,0.), grid(grid_) {}
void prep_read(int iu, int iv)
/* iu = [0; nu-1]; iv = [0; nv-1] */
{
if (need_to_move(iu, iv))
{
update_position(iu, iv);
load();
}
p0 = data.data() + 2*sv*(iu-u0+su-nspread) + iv-v0+sv-nspread;
}
};
a_c_c to_grid (const a_d_c &uv_, const a_c_c &vis_,
int nu, int nv, int nspread, double r2lamb)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
int nvis = uv_.shape(0);
myassert(vis_.ndim()==1, "vis array must be 1D");
myassert(vis_.shape(0)==nvis, "array size mismatch");
auto uv = uv_.data();
auto vis = vis_.data();
int odim[] = {nu,nv};
a_c_c res(odim);
auto grid = res.mutable_data();
for (int i=0; i<nu*nv; ++i) grid[i] = 0.;
#pragma omp parallel
{
Helper hlp(nu, nv, nspread, r2lamb);
WriteBuffer buf(nu, nv, nspread, grid);
// Loop over sampling points
#pragma omp for schedule(dynamic,10000)
for (int ipart=0; ipart<nvis; ++ipart)
{
hlp.update(uv[2*ipart], uv[2*ipart+1], vis[ipart]);
buf.prep_write(hlp.iu, hlp.iv);
for (int cu=0; cu<2*nspread; ++cu)
{
complex<double> tmp = hlp.val*hlp.ku[cu];
for (int cv=0; cv<2*nspread; ++cv)
buf.p0[cu*2*buf.sv + cv] += tmp*hlp.kv[cv];
}
}
} // end of parallel region
return res;
}
a_c_c from_grid (const a_d_c &uv_, const a_c_c &grid_,
int nu, int nv, int nspread, double r2lamb)
{
myassert(uv_.ndim()==2, "uv array must be 2D");
myassert(uv_.shape(1)==2, "uv.shape[1] must be 2");
myassert(grid_.ndim()==2, "grid array must be 2D");
int nvis = uv_.shape(0);
auto uv = uv_.data();
auto grid = grid_.data();
myassert(nu==grid_.shape(0), "oops");
myassert(nv==grid_.shape(1), "oops");
int odim[] = {nvis};
a_c_c res(odim);
auto vis = res.mutable_data();
// Loop over sampling points
#pragma omp parallel
{
Helper hlp(nu, nv, nspread, r2lamb);
ReadBuffer buf(nu, nv, nspread, grid);
#pragma omp for schedule(dynamic,10000)
for (int ipart=0; ipart<nvis; ++ipart)
{
hlp.update(uv[2*ipart], uv[2*ipart+1], 1.);
complex<double> r = 0.;
buf.prep_read(hlp.iu, hlp.iv);
for (int cu=0; cu<2*nspread; ++cu)
{
complex<double> tmp = 0.;
for (int cv=0; cv<2*nspread; ++cv)
tmp += buf.p0[cu*2*buf.sv + cv]*hlp.kv[cv];
r+=tmp*hlp.ku[cu];
}
vis[ipart] = r*hlp.val;
}
}
return res;
}
} // unnamed namespace
PYBIND11_MODULE(nifty_gridder, m)
{
using namespace pybind11::literals;
m.def("peanoindex",&peanoindex);
m.def("to_grid",&to_grid);
m.def("from_grid",&from_grid);
}
from setuptools import setup, Extension, Distribution
import setuptools.command.build_ext
import sys
import sysconfig
import os
import os.path
import distutils.sysconfig
import itertools
from glob import iglob
def _get_distutils_build_directory():
"""
Returns the directory distutils uses to build its files.
We need this directory since we build extensions which have to link
other ones.
"""
pattern = "lib.{platform}-{major}.{minor}"
return os.path.join(
'build', pattern.format(platform=sysconfig.get_platform(),
major=sys.version_info[0],
minor=sys.version_info[1]))
class _deferred_pybind11_include(object):
def __init__(self, user=False):
self.user = user
def __str__(self):
import pybind11
return pybind11.get_include(self.user)
def _strip_inc(input):
res = []
for i in input:
if "inc.c" not in i:
res.append(i)
return res
def _get_cc_files(directory):
path = directory
iterable_sources = (iglob(os.path.join(root, '*.cc'))
for root, dirs, files in os.walk(path))
source_files = itertools.chain.from_iterable(iterable_sources)
return list(source_files)
def _remove_strict_prototype_option_from_distutils_config():
strict_prototypes = '-Wstrict-prototypes'
config = distutils.sysconfig.get_config_vars()
for key, value in config.items():
if strict_prototypes in str(value):
config[key] = config[key].replace(strict_prototypes, '')
_remove_strict_prototype_option_from_distutils_config()
extra_compile_args = []
openmp_libs = []
extra_cc_compile_args = []
include_dirs = ['./', _deferred_pybind11_include(),
_deferred_pybind11_include(True)]
library_dirs = [_get_distutils_build_directory()]
python_module_link_args = []
base_library_link_args = []
if sys.platform == 'darwin':
extra_cc_compile_args.append('--std=c++14')
extra_cc_compile_args.append('--stdlib=libc++')
extra_compile_args.append('-mmacosx-version-min=10.9')
vars = distutils.sysconfig.get_config_vars()
vars['LDSHARED'] = vars['LDSHARED'].replace('-bundle', '')
python_module_link_args.append('-bundle')
builder = setuptools.command.build_ext.build_ext(Distribution())
base_library_link_args.append('-dynamiclib')
else:
extra_compile_args += ['-fopenmp', '-march=native', '-O3', '-ffast-math']
python_module_link_args += ['-fopenmp', '-march=native']
extra_cc_compile_args.append('--std=c++14')