Commit 8169eaab authored by Martin Reinecke's avatar Martin Reinecke
Browse files

first import of libsharp

parent a0f9efc1
ACLOCAL_AMFLAGS = -I m4
lib_LTLIBRARIES = libmrutil.la
libmrutil_la_SOURCES = \
libsharp2/sharp_utils.h \
libsharp2/sharp_utils.cc \
libsharp2/sharp.cc \
libsharp2/sharp_almhelpers.cc \
libsharp2/sharp_core.cc \
libsharp2/sharp_geomhelpers.cc \
libsharp2/sharp_ylmgen_c.cc \
libsharp2/sharp_internal.h \
libsharp2/sharp_vecsupport.h \
libsharp2/sharp_ylmgen_c.h \
mr_util/cmplx.h \
mr_util/error_handling.cc \
mr_util/error_handling.h \
mr_util/fft.h \
mr_util/gl_integrator.h \
mr_util/mav.h \
mr_util/string_utils.cc \
mr_util/string_utils.h \
mr_util/system.cc \
mr_util/system.h \
mr_util/threading.h \
mr_util/timers.h \
mr_util/unity_roots.h \
mr_util/useful_macros.h
# format is "current:revision:age"
# any change: increase revision
# any interface change: increase current, revision=0
# any backward-compatible change: increase age
# any backward-incompatible change: age=0
# ==> age <= current
libmrutil_la_LDFLAGS = -version-info 0:0:0 -lpthread
AM_CXXFLAGS = @AM_CXXFLAGS@
if HAVE_MULTIARCH
libavx_la_SOURCES = libsharp2/sharp_core_inc.cc
libavx2_la_SOURCES = libsharp2/sharp_core_inc.cc
libfma_la_SOURCES = libsharp2/sharp_core_inc.cc
libfma4_la_SOURCES = libsharp2/sharp_core_inc.cc
libavx512f_la_SOURCES = libsharp2/sharp_core_inc.cc
noinst_LTLIBRARIES = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la
libavx_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx -DARCH=avx
libavx2_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx2 -DARCH=avx2
libfma_la_CXXFLAGS = ${AM_CXXFLAGS} -mfma -DARCH=fma
libfma4_la_CXXFLAGS = ${AM_CXXFLAGS} -mfma4 -DARCH=fma4
libavx512f_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx512f -DARCH=avx512f
libmrutil_la_LIBADD = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la
endif
nobase_include_HEADERS = \
libsharp2/sharp.h \
libsharp2/sharp_mpi.h \
libsharp2/sharp_geomhelpers.h \
libsharp2/sharp_almhelpers.h \
libsharp2/sharp_cxx.h
EXTRA_DIST = \
runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp2/sharp_mpi.cc
check_PROGRAMS = sharp2_testsuite
sharp2_testsuite_SOURCES = test/sharp2_testsuite.cc
sharp2_testsuite_LDADD = libmrutil.la
TESTS = runtest.sh
pkgconfigdir = $(libdir)/pkgconfig
nodist_pkgconfig_DATA = @PACKAGE_NAME@.pc
DISTCLEANFILES=@PACKAGE_NAME@.pc @PACKAGE_NAME@.pc.in @PACKAGE_NAME@-uninstalled.pc @PACKAGE_NAME@-uninstalled.sh
AC_INIT([libmrutil], [1.0.0])
AM_INIT_AUTOMAKE([foreign subdir-objects -Wall -Werror])
AM_MAINTAINER_MODE([enable])
dnl
dnl Needed for linking on Windows.
dnl Protect with m4_ifdef because AM_PROG_AR is required in
dnl autoconf >= 1.12 when using -Wall, but the macro is
dnl absent in old versions of autoconf.
dnl
m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
LT_INIT
AC_CONFIG_MACRO_DIR([m4])
dnl
dnl Enable silent build rules if this version of Automake supports them
dnl
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_PROG_CXX
AX_CXX_COMPILE_STDCXX([17])
AC_PROG_LIBTOOL
tmpval=`echo $CXXFLAGS | grep -c '\-DMULTIARCH'`
AM_CONDITIONAL([HAVE_MULTIARCH], [test $tmpval -gt 0])
PACKAGE_LIBS="-lmrutil"
dnl
dnl Create pkgconfig .pc file.
dnl
AX_CREATE_PKGCONFIG_INFO(,,,,[])
AC_SUBST([AM_CXXFLAGS])
AC_CONFIG_FILES([Makefile])
AC_OUTPUT
/*
* This file is part of libsharp2.
*
* libsharp2 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp2; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libsharp2 is being developed at the Max-Planck-Institut fuer Astrophysik */
/*! \file sharp.c
* Spherical transform library
*
* Copyright (C) 2006-2019 Max-Planck-Society
* \author Martin Reinecke \author Dag Sverre Seljebotn
*/
#include <cmath>
#include <cstring>
#include <atomic>
#include "mr_util/fft.h"
#include "libsharp2/sharp_ylmgen_c.h"
#include "libsharp2/sharp_internal.h"
#include "libsharp2/sharp_utils.h"
#include "libsharp2/sharp_almhelpers.h"
#include "libsharp2/sharp_geomhelpers.h"
#include "mr_util/threading.h"
#include "mr_util/useful_macros.h"
#include "mr_util/error_handling.h"
#include "mr_util/timers.h"
typedef complex<double> dcmplx;
typedef complex<float> fcmplx;
static const double sqrt_one_half = 0.707106781186547572737310929369;
static const double sqrt_two = 1.414213562373095145474621858739;
static int chunksize_min=500, nchunks_max=10;
static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
{
*chunksize = (ndata+nchunks_max-1)/nchunks_max;
if (*chunksize>=chunksize_min) // use max number of chunks
*chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
else // need to adjust chunksize and nchunks
{
*nchunks = (ndata+chunksize_min-1)/chunksize_min;
*chunksize = (ndata+(*nchunks)-1)/(*nchunks);
if (*nchunks>1)
*chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
}
*nchunks = (ndata+(*chunksize)-1)/(*chunksize);
}
MRUTIL_NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
{
double ofs=lmax*0.01;
if (ofs<100.) ofs=100.;
double b = -2*spin*fabs(cth);
double t1 = lmax*sth+ofs;
double c = (double)spin*spin-t1*t1;
double discr = b*b-4*c;
if (discr<=0) return lmax;
double res=(-b+sqrt(discr))/2.;
if (res>lmax) res=lmax;
return (int)(res+0.5);
}
typedef struct
{
double phi0_;
dcmplx *shiftarr;
int s_shift;
mr::detail_fft::rfftp<double> *plan;
int length;
int norot;
} ringhelper;
static void ringhelper_init (ringhelper *self)
{
static ringhelper rh_null = { 0, NULL, 0, NULL, 0, 0 };
*self = rh_null;
}
static void ringhelper_destroy (ringhelper *self)
{
delete self->plan;
DEALLOC(self->shiftarr);
ringhelper_init(self);
}
MRUTIL_NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
{
self->norot = (fabs(phi0)<1e-14);
if (!(self->norot))
if ((mmax!=self->s_shift-1) || (!FAPPROX(phi0,self->phi0_,1e-12)))
{
RESIZE (self->shiftarr,dcmplx,mmax+1);
self->s_shift = mmax+1;
self->phi0_ = phi0;
// FIXME: improve this by using sincos2pibyn(nph) etc.
for (int m=0; m<=mmax; ++m)
self->shiftarr[m] = dcmplx(cos(m*phi0),sin(m*phi0));
// double *tmp=(double *) self->shiftarr;
// sincos_multi (mmax+1, phi0, &tmp[1], &tmp[0], 2);
}
// if (!self->plan) self->plan=pocketfft_make_plan_r(nph);
if (nph!=(int)self->length)
{
if (self->plan) delete self->plan;
self->plan=new mr::detail_fft::rfftp<double>(nph);
self->length=nph;
}
}
static int ringinfo_compare (const void *xa, const void *xb)
{
const sharp_ringinfo *a=(const sharp_ringinfo *)xa, *b=(const sharp_ringinfo *)xb;
return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
}
static int ringpair_compare (const void *xa, const void *xb)
{
const sharp_ringpair *a=(const sharp_ringpair *)xa, *b=(const sharp_ringpair *)xb;
// return (a->r1.sth < b->r1.sth) ? -1 : (a->r1.sth > b->r1.sth) ? 1 : 0;
if (a->r1.nph==b->r1.nph)
return (a->r1.phi0 < b->r1.phi0) ? -1 :
((a->r1.phi0 > b->r1.phi0) ? 1 :
(a->r1.cth>b->r1.cth ? -1 : 1));
return (a->r1.nph<b->r1.nph) ? -1 : 1;
}
void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
const ptrdiff_t *mstart, int flags, sharp_alm_info **alm_info)
{
sharp_alm_info *info = RALLOC(sharp_alm_info,1);
info->lmax = lmax;
info->nm = nm;
info->mval = RALLOC(int,nm);
info->mvstart = RALLOC(ptrdiff_t,nm);
info->stride = stride;
info->flags = flags;
for (int mi=0; mi<nm; ++mi)
{
info->mval[mi] = mval[mi];
info->mvstart[mi] = mstart[mi];
}
*alm_info = info;
}
void sharp_make_alm_info (int lmax, int mmax, int stride,
const ptrdiff_t *mstart, sharp_alm_info **alm_info)
{
int *mval=RALLOC(int,mmax+1);
for (int i=0; i<=mmax; ++i)
mval[i]=i;
sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, 0, alm_info);
DEALLOC(mval);
}
ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi)
{
MR_assert(!(self->flags & SHARP_PACKED),
"sharp_alm_index not applicable with SHARP_PACKED alms");
return self->mvstart[mi]+self->stride*l;
}
ptrdiff_t sharp_alm_count(const sharp_alm_info *self)
{
ptrdiff_t result=0;
for (int im=0; im!=self->nm; ++im)
{
int m=self->mval[im];
ptrdiff_t x=(self->lmax + 1 - m);
if ((m!=0)&&((self->flags&SHARP_PACKED)!=0)) result+=2*x;
else result+=x;
}
return result;
}
void sharp_destroy_alm_info (sharp_alm_info *info)
{
DEALLOC (info->mval);
DEALLOC (info->mvstart);
DEALLOC (info);
}
void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
const int *stride, const double *phi0, const double *theta,
const double *wgt, sharp_geom_info **geom_info)
{
sharp_geom_info *info = RALLOC(sharp_geom_info,1);
sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
int pos=0;
info->pair=RALLOC(sharp_ringpair,nrings);
info->npairs=0;
info->nphmax=0;
*geom_info = info;
for (int m=0; m<nrings; ++m)
{
infos[m].theta = theta[m];
infos[m].cth = cos(theta[m]);
infos[m].sth = sin(theta[m]);
infos[m].weight = (wgt != NULL) ? wgt[m] : 1.;
infos[m].phi0 = phi0[m];
infos[m].ofs = ofs[m];
infos[m].stride = stride[m];
infos[m].nph = nph[m];
if (info->nphmax<nph[m]) info->nphmax=nph[m];
}
qsort(infos,nrings,sizeof(sharp_ringinfo),ringinfo_compare);
while (pos<nrings)
{
info->pair[info->npairs].r1=infos[pos];
if ((pos<nrings-1) && FAPPROX(infos[pos].cth,-infos[pos+1].cth,1e-12))
{
if (infos[pos].cth>0) // make sure northern ring is in r1
info->pair[info->npairs].r2=infos[pos+1];
else
{
info->pair[info->npairs].r1=infos[pos+1];
info->pair[info->npairs].r2=infos[pos];
}
++pos;
}
else
info->pair[info->npairs].r2.nph=-1;
++pos;
++info->npairs;
}
DEALLOC(infos);
qsort(info->pair,info->npairs,sizeof(sharp_ringpair),ringpair_compare);
}
ptrdiff_t sharp_map_size(const sharp_geom_info *info)
{
ptrdiff_t result = 0;
for (int m=0; m<info->npairs; ++m)
{
result+=info->pair[m].r1.nph;
result+=(info->pair[m].r2.nph>=0) ? (info->pair[m].r2.nph) : 0;
}
return result;
}
void sharp_destroy_geom_info (sharp_geom_info *geom_info)
{
DEALLOC (geom_info->pair);
DEALLOC (geom_info);
}
/* This currently requires all m values from 0 to nm-1 to be present.
It might be worthwhile to relax this criterion such that holes in the m
distribution are permissible. */
static int sharp_get_mmax (int *mval, int nm)
{
//FIXME: if gaps are allowed, we have to search the maximum m in the array
int *mcheck=RALLOC(int,nm);
SET_ARRAY(mcheck,0,nm,0);
for (int i=0; i<nm; ++i)
{
int m_cur=mval[i];
MR_assert((m_cur>=0) && (m_cur<nm), "not all m values are present");
MR_assert(mcheck[m_cur]==0, "duplicate m value");
mcheck[m_cur]=1;
}
DEALLOC(mcheck);
return nm-1;
}
MRUTIL_NOINLINE static void ringhelper_phase2ring (ringhelper *self,
const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
int pstride, int flags)
{
int nph = info->nph;
ringhelper_update (self, nph, mmax, info->phi0);
double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
if (flags&SHARP_REAL_HARMONICS)
wgt *= sqrt_one_half;
if (nph>=2*mmax+1)
{
if (self->norot)
for (int m=0; m<=mmax; ++m)
{
data[2*m]=phase[m*pstride].real()*wgt;
data[2*m+1]=phase[m*pstride].imag()*wgt;
}
else
for (int m=0; m<=mmax; ++m)
{
dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
data[2*m]=tmp.real()*wgt;
data[2*m+1]=tmp.imag()*wgt;
}
for (int m=2*(mmax+1); m<nph+2; ++m)
data[m]=0.;
}
else
{
data[0]=phase[0].real()*wgt;
SET_ARRAY(data,1,nph+2,0.);
int idx1=1, idx2=nph-1;
for (int m=1; m<=mmax; ++m)
{
dcmplx tmp = phase[m*pstride]*wgt;
if(!self->norot) tmp*=self->shiftarr[m];
if (idx1<(nph+2)/2)
{
data[2*idx1]+=tmp.real();
data[2*idx1+1]+=tmp.imag();
}
if (idx2<(nph+2)/2)
{
data[2*idx2]+=tmp.real();
data[2*idx2+1]-=tmp.imag();
}
if (++idx1>=nph) idx1=0;
if (--idx2<0) idx2=nph-1;
}
}
data[1]=data[0];
self->plan->exec(&(data[1]), 1., false);
}
MRUTIL_NOINLINE static void ringhelper_ring2phase (ringhelper *self,
const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
int pstride, int flags)
{
int nph = info->nph;
#if 1
int maxidx = mmax; /* Enable this for traditional Healpix compatibility */
#else
int maxidx = IMIN(nph-1,mmax);
#endif
ringhelper_update (self, nph, mmax, -info->phi0);
double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1;
if (flags&SHARP_REAL_HARMONICS)
wgt *= sqrt_two;
self->plan->exec (&(data[1]), 1., true);
data[0]=data[1];
data[1]=data[nph+1]=0.;
if (maxidx<=nph/2)
{
if (self->norot)
for (int m=0; m<=maxidx; ++m)
phase[m*pstride] = dcmplx(data[2*m], data[2*m+1]) * wgt;
else
for (int m=0; m<=maxidx; ++m)
phase[m*pstride] =
dcmplx(data[2*m], data[2*m+1]) * self->shiftarr[m] * wgt;
}
else
{
for (int m=0; m<=maxidx; ++m)
{
int idx=m%nph;
dcmplx val;
if (idx<(nph-idx))
val = dcmplx(data[2*idx], data[2*idx+1]) * wgt;
else
val = dcmplx(data[2*(nph-idx)], -data[2*(nph-idx)+1]) * wgt;
if (!self->norot)
val *= self->shiftarr[m];
phase[m*pstride]=val;
}
}
for (int m=maxidx+1;m<=mmax; ++m)
phase[m*pstride]=0.;
}
MRUTIL_NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
int flags)
{
if (flags & SHARP_NO_FFT)
{
for (int j=0;j<ginfo->npairs;++j)
{
if (flags&SHARP_DP)
{
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
}
else
{
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
}
}
}
else
{
if (flags&SHARP_DP)
{
for (int j=0;j<ginfo->npairs;++j)
{
double *dmap=(double *)map;
if (ginfo->pair[j].r1.stride==1)
memset(&dmap[ginfo->pair[j].r1.ofs],0,
ginfo->pair[j].r1.nph*sizeof(double));
else
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
dmap[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
if ((ginfo->pair[j].r2.nph>0)&&(ginfo->pair[j].r2.stride==1))
memset(&dmap[ginfo->pair[j].r2.ofs],0,
ginfo->pair[j].r2.nph*sizeof(double));
else
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
dmap[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
}
}
else
{
for (int j=0;j<ginfo->npairs;++j)
{
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=0;
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=0;
}
}
}
}
MRUTIL_NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
int flags)
{
#define CLEARLOOP(real_t,body) \
{ \
real_t *talm = (real_t *)alm; \
for (int l=m;l<=ainfo->lmax;++l) \
body \
}
for (int mi=0;mi<ainfo->nm;++mi)
{
int m=ainfo->mval[mi];
ptrdiff_t mvstart = ainfo->mvstart[mi];
ptrdiff_t stride = ainfo->stride;
if (!(ainfo->flags&SHARP_PACKED))
mvstart*=2;
if ((ainfo->flags&SHARP_PACKED)&&(m==0))
{
if (flags&SHARP_DP)
CLEARLOOP(double, talm[mvstart+l*stride] = 0.;)
else
CLEARLOOP(float, talm[mvstart+l*stride] = 0.;)
}
else
{
stride*=2;
if (flags&SHARP_DP)
CLEARLOOP(double,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
else
CLEARLOOP(float,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
}
#undef CLEARLOOP
}
}
MRUTIL_NOINLINE static void init_output (sharp_job *job)
{
if (job->flags&SHARP_ADD) return;