diff --git a/COMPILE b/COMPILE
index 5b1c5b2c81c05d8acbfa73b9328637e97b3351d4..a89866223aa034d304808addbd7dd5baf9ee85b5 100644
--- a/COMPILE
+++ b/COMPILE
@@ -15,18 +15,18 @@ flags.
 Fast math
 ---------
 
-Specifying "-ffast-math" is important for all compilers, since it allows the
-compiler to fuse multiplications and additions into FMA instructions, which is
-forbidden by the C99 standard. Since FMAs are a central aspect of the algorithm,
-they are needed for optimum performance.
+Specifying "-ffast-math" or "-ffp-contract=fast" is important for all compilers,
+since it allows the compiler to fuse multiplications and additions into FMA
+instructions, which is forbidden by the C99 standard. Since FMAs are a central
+aspect of the algorithm, they are needed for optimum performance.
 
 If you are calling libsharp from other code which requires strict adherence
 to the C99 standard, you should still be able to compile libsharp with
 "-ffast-math" without any problems.
 
 
-Runtime CPU selection with gcc
-------------------------------
+Runtime CPU selection with gcc and clang
+----------------------------------------
 
 When using a recent gcc (6.0 and newer) or a recent clang (successfully tested
 with versions 6 and 7) on an x86_64 platform, the build machinery can compile
@@ -44,9 +44,11 @@ resulting binary will most likely not run on other computers, though.
 OpenMP
 ------
 
-OpenMP should be switched on for maximum performance, and at runtime
-OMP_NUM_THREADS should be set to the number of hardware threads (not physical
-cores) of the system.
+OpenMP is enabled by default if the selected compiler supports it.
+It can be disabled at configuration time by specifying "--disable-openmp" at the
+configure command line.
+At runtime OMP_NUM_THREADS should be set to the number of hardware threads
+(not physical cores) of the system.
 (Usually this is  already the default setting when OMP_NUM_THREADS is not
 specified.)
 
@@ -65,19 +67,19 @@ Example configure invocations
 =============================
 
 GCC, OpenMP, portable binary:
-CFLAGS="-DMULTIARCH -std=c99 -O3 -ffast-math -fopenmp" ./configure
+CFLAGS="-DMULTIARCH -std=c99 -O3 -ffast-math" ./configure
 
 GCC, no OpenMP, portable binary:
-CFLAGS="-DMULTIARCH -std=c99 -O3 -ffast-math" ./configure
+CFLAGS="-DMULTIARCH -std=c99 -O3 -ffast-math" ./configure --disable-openmp
 
 Clang, OpenMP, portable binary:
-CC=clang CFLAGS="-DMULTIARCH -std=c99 -O3 -ffast-math -fopenmp" ./configure
+CC=clang CFLAGS="-DMULTIARCH -std=c99 -O3 -ffast-math" ./configure
 
 Intel C compiler, OpenMP, nonportable binary:
-CC=icc CFLAGS="-std=c99 -O3 -march=native -ffast-math -fopenmp -D__PURE_INTEL_C99_HEADERS__" ./configure
+CC=icc CFLAGS="-std=c99 -O3 -march=native -ffast-math -D__PURE_INTEL_C99_HEADERS__" ./configure
 
-MPI support, nonportable binary:
-CC=mpicc CFLAGS="-DUSE_MPI -std=c99 -O3 -march=native -ffast-math" ./configure
+MPI support, OpenMP, portable binary:
+CC=mpicc CFLAGS="-DUSE_MPI -DMULTIARCH -std=c99 -O3 -ffast-math" ./configure
 
 Additional GCC flags for pedantic warning and debugging:
 
diff --git a/Makefile.am b/Makefile.am
index 0dfc6e138325481efd218b0dd41b3697ad6c9c37..8b7d2430fdb8bb6b23c070bcc30aea4ca901cc38 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -3,10 +3,10 @@ ACLOCAL_AMFLAGS = -I m4
 lib_LTLIBRARIES = libsharp.la
 
 libsharp_la_SOURCES = \
-  c_utils/c_utils.c \
-  c_utils/c_utils.h \
   pocketfft/pocketfft.c \
   pocketfft/pocketfft.h \
+  libsharp/sharp_utils.c \
+  libsharp/sharp_utils.h \
   libsharp/sharp.c \
   libsharp/sharp_almhelpers.c \
   libsharp/sharp_core.c \
@@ -18,6 +18,16 @@ libsharp_la_SOURCES = \
   libsharp/sharp_vecsupport.h \
   libsharp/sharp_ylmgen_c.h
 
+# format is "current:revision:age"
+# any change: increase revision
+# any interface change: increase current, revision=0
+# any backward-compatible change: increase age
+# any backward-incompatible change: age=0
+# ==> age <= current
+libsharp_la_LDFLAGS = -version-info 0:0:0
+
+AM_CFLAGS = @AM_CFLAGS@
+
 if HAVE_MULTIARCH
 
 libavx_la_SOURCES = libsharp/sharp_core_inc.c
@@ -38,23 +48,22 @@ libsharp_la_LIBADD = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la
 
 endif
 
-include_HEADERS = \
+nobase_include_HEADERS = \
   libsharp/sharp.h \
+  libsharp/sharp_mpi.h \
   libsharp/sharp_geomhelpers.h \
   libsharp/sharp_almhelpers.h \
   libsharp/sharp_cxx.h
 
 EXTRA_DIST = \
-  runtest.sh
+  runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp/sharp_mpi.c
 
 check_PROGRAMS = sharp_testsuite
-sharp_testsuite_SOURCES = libsharp/sharp_testsuite.c c_utils/memusage.c c_utils/memusage.h c_utils/walltime_c.c c_utils/walltime_c.h
-sharp_testsuite_LDADD = libsharp.la
+sharp_testsuite_SOURCES = test/sharp_testsuite.c test/memusage.c test/memusage.h
+sharp_testsuite_LDADD = libsharp.la -lm
 
 TESTS = runtest.sh
 
-AM_CFLAGS = -I$(top_srcdir)/c_utils -I$(top_srcdir)/libsharp @AM_CFLAGS@
-
 pkgconfigdir = $(libdir)/pkgconfig
 nodist_pkgconfig_DATA = @PACKAGE_NAME@.pc
 
diff --git a/c_utils/walltime_c.c b/c_utils/walltime_c.c
deleted file mode 100644
index 8f4ac0c4786687d1b360ebc28dc9670408eac9ef..0000000000000000000000000000000000000000
--- a/c_utils/walltime_c.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- *  This file is part of libc_utils.
- *
- *  libc_utils is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libc_utils is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libc_utils; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*
- *  Functionality for reading wall clock time
- *
- *  Copyright (C) 2010-2016 Max-Planck-Society
- *  Author: Martin Reinecke
- */
-
-#if defined (_OPENMP)
-#include <omp.h>
-#elif defined (USE_MPI)
-#include "mpi.h"
-#elif defined (_WIN32)
-#include <Windows.h>
-#else
-#include <sys/time.h>
-#include <stdlib.h>
-#endif
-
-#include "walltime_c.h"
-
-double wallTime(void)
-  {
-#if defined (_OPENMP)
-  return omp_get_wtime();
-#elif defined (USE_MPI)
-  return MPI_Wtime();
-#elif defined (_WIN32)
-  static double inv_freq = -1.;
-  if (inv_freq<0)
-    {
-    LARGE_INTEGER freq;
-    QueryPerformanceFrequency(&freq);
-    inv_freq = 1. / double(freq.QuadPart);
-    }
-  LARGE_INTEGER count;
-  QueryPerformanceCounter(&count);
-  return count.QuadPart*inv_freq;
-#else
-  struct timeval t;
-  gettimeofday(&t, NULL);
-  return t.tv_sec + 1e-6*t.tv_usec;
-#endif
-  }
diff --git a/c_utils/walltime_c.h b/c_utils/walltime_c.h
deleted file mode 100644
index c291f17de3a5b29e1f68aad7ba27ef561b76e918..0000000000000000000000000000000000000000
--- a/c_utils/walltime_c.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- *  This file is part of libc_utils.
- *
- *  libc_utils is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License as published by
- *  the Free Software Foundation; either version 2 of the License, or
- *  (at your option) any later version.
- *
- *  libc_utils is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *  GNU General Public License for more details.
- *
- *  You should have received a copy of the GNU General Public License
- *  along with libc_utils; if not, write to the Free Software
- *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
- */
-
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
-
-/*! \file walltime_c.h
- *  Functionality for reading wall clock time
- *
- *  Copyright (C) 2010 Max-Planck-Society
- *  \author Martin Reinecke
- */
-
-#ifndef PLANCK_WALLTIME_C_H
-#define PLANCK_WALLTIME_C_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*! Returns an approximation of the current wall time (in seconds).
-    The first available of the following timers will be used:
-    <ul>
-    <li> \a omp_get_wtime(), if OpenMP is available
-    <li> \a MPI_Wtime(), if MPI is available
-    <li> \a gettimeofday() otherwise
-    </ul>
-    \note Only useful for measuring time differences.
-    \note This function has an execution time between 10 and 100 nanoseconds. */
-double wallTime(void);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/configure.ac b/configure.ac
index a619d3fe0610aad98d3c4dbabcd8fb1b10597157..b020181ed3eb757f5137637dc5f2ed1ca93c0fda 100644
--- a/configure.ac
+++ b/configure.ac
@@ -14,12 +14,6 @@ m4_ifdef([AM_PROG_AR], [AM_PROG_AR])
 LT_INIT
 AC_CONFIG_MACRO_DIR([m4])
 
-dnl
-dnl By default, install the headers into a subdirectory of
-dnl ${prefix}/include to avoid possible header filename collisions.
-dnl
-includedir="${includedir}/${PACKAGE_NAME}"
-
 dnl
 dnl Enable silent build rules if this version of Automake supports them
 dnl
@@ -27,22 +21,23 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
 
 AC_PROG_CC_C99
-
-# adding the lib to the files to link
-LIBS="-lm"
-
+AC_OPENMP
 AC_PROG_LIBTOOL
 
 tmpval=`echo $CFLAGS | grep -c '\-DMULTIARCH'`
 AM_CONDITIONAL([HAVE_MULTIARCH], [test $tmpval -gt 0])
 
+AM_CFLAGS="$AM_CFLAGS $OPENMP_CFLAGS"
+
+PACKAGE_LIBS="-lsharp"
+PACKAGE_CFLAGS="$PACKAGE_CFLAGS $OPENMP_CFLAGS"
+PACKAGE_LDFLAGS="$PACKAGE_LDFLAGS $OPENMP_CFLAGS"
+
 dnl
 dnl Create pkgconfig .pc file.
 dnl
 AX_CREATE_PKGCONFIG_INFO(,,,,[])
-AC_SUBST([LIBS])
 AC_SUBST([AM_CFLAGS])
-AC_SUBST([AM_LDFLAGS])
 
 AC_CONFIG_FILES([Makefile])
 AC_OUTPUT
diff --git a/fortran/sharp.f90 b/fortran/sharp.f90
new file mode 100644
index 0000000000000000000000000000000000000000..d6b5eee3cd8e99946739afcf4e211958a09ca20f
--- /dev/null
+++ b/fortran/sharp.f90
@@ -0,0 +1,242 @@
+module sharp
+  use iso_c_binding
+  implicit none
+  ! alm_info flags
+  integer, parameter :: SHARP_PACKED = 1
+
+  ! sharp job types
+  enum, bind(c)
+      enumerator :: SHARP_YtW = 0
+      enumerator :: SHARP_Y = 1
+      enumerator :: SHARP_Yt = 2
+      enumerator :: SHARP_WY = 3
+      enumerator :: SHARP_ALM2MAP_DERIV1 = 4
+   end enum
+
+  ! sharp job flags
+  integer, parameter :: SHARP_DP             = ISHFT(1, 4)
+  integer, parameter :: SHARP_ADD            = ISHFT(1, 5)
+  integer, parameter :: SHARP_REAL_HARMONICS = ISHFT(1, 6)
+  integer, parameter :: SHARP_NO_FFT         = ISHFT(1, 7)
+
+  type sharp_geom_info
+     type(c_ptr) :: handle
+     integer(c_intptr_t) :: n_local
+  end type sharp_geom_info
+
+  type sharp_alm_info
+     type(c_ptr) :: handle
+     integer(c_intptr_t) :: n_local
+  end type sharp_alm_info
+
+  interface
+
+     ! alm_info
+     subroutine sharp_make_general_alm_info( &
+         lmax, nm, stride, mval, mvstart, flags, alm_info) bind(c)
+       use iso_c_binding
+       integer(c_int), value, intent(in)    :: lmax, nm, stride, flags
+       integer(c_int), intent(in)           :: mval(nm)
+       integer(c_intptr_t), intent(in)     :: mvstart(nm)
+       type(c_ptr), intent(out)             :: alm_info
+     end subroutine sharp_make_general_alm_info
+
+     subroutine c_sharp_make_mmajor_real_packed_alm_info( &
+         lmax, stride, nm, ms, alm_info) bind(c, name='sharp_make_mmajor_real_packed_alm_info')
+       use iso_c_binding
+       integer(c_int), value, intent(in)    :: lmax, nm, stride
+       integer(c_int), intent(in), optional :: ms(nm)
+       type(c_ptr), intent(out)             :: alm_info
+     end subroutine c_sharp_make_mmajor_real_packed_alm_info
+
+     function c_sharp_alm_count(alm_info) bind(c, name='sharp_alm_count')
+       use iso_c_binding
+       integer(c_intptr_t)           :: c_sharp_alm_count
+       type(c_ptr), value, intent(in) :: alm_info
+     end function c_sharp_alm_count
+
+     subroutine c_sharp_destroy_alm_info(alm_info) bind(c, name='sharp_destroy_alm_info')
+       use iso_c_binding
+       type(c_ptr), value                   :: alm_info
+     end subroutine c_sharp_destroy_alm_info
+
+     ! geom_info
+     subroutine sharp_make_subset_healpix_geom_info ( &
+          nside, stride, nrings, rings, weight, geom_info) bind(c)
+       use iso_c_binding
+       integer(c_int), value, intent(in)    :: nside, stride, nrings
+       integer(c_int), intent(in), optional :: rings(nrings)
+       real(c_double), intent(in), optional :: weight(2 * nside)
+       type(c_ptr), intent(out)             :: geom_info
+     end subroutine sharp_make_subset_healpix_geom_info
+
+     subroutine c_sharp_destroy_geom_info(geom_info) bind(c, name='sharp_destroy_geom_info')
+       use iso_c_binding
+       type(c_ptr), value                   :: geom_info
+     end subroutine c_sharp_destroy_geom_info
+
+     function c_sharp_map_size(info) bind(c, name='sharp_map_size')
+       use iso_c_binding
+       integer(c_intptr_t) :: c_sharp_map_size
+       type(c_ptr), value   :: info
+     end function c_sharp_map_size
+
+
+     ! execute
+     subroutine c_sharp_execute(type, spin, alm, map, geom_info, alm_info, &
+                                flags, time, opcnt) bind(c, name='sharp_execute')
+       use iso_c_binding
+       integer(c_int), value                        :: type, spin, flags
+       type(c_ptr), value                           :: alm_info, geom_info
+       real(c_double), intent(out), optional        :: time
+       integer(c_long_long), intent(out), optional  :: opcnt
+       type(c_ptr), intent(in)                      :: alm(*), map(*)
+     end subroutine c_sharp_execute
+
+     subroutine c_sharp_execute_mpi(comm, type, spin, alm, map, geom_info, alm_info, &
+                                    flags, time, opcnt) bind(c, name='sharp_execute_mpi_fortran')
+       use iso_c_binding
+       integer(c_int), value                        :: comm, type, spin, flags
+       type(c_ptr), value                           :: alm_info, geom_info
+       real(c_double), intent(out), optional        :: time
+       integer(c_long_long), intent(out), optional  :: opcnt
+       type(c_ptr), intent(in)                      :: alm(*), map(*)
+     end subroutine c_sharp_execute_mpi
+  end interface
+
+  interface sharp_execute
+     module procedure sharp_execute_d
+  end interface
+
+contains
+  ! alm info
+
+  ! if ms is not passed, we default to using m=0..lmax.
+  subroutine sharp_make_mmajor_real_packed_alm_info(lmax, ms, alm_info)
+    use iso_c_binding
+    integer(c_int), value, intent(in)    :: lmax
+    integer(c_int), intent(in), optional :: ms(:)
+    type(sharp_alm_info), intent(out)    :: alm_info
+    !--
+    integer(c_int), allocatable          :: ms_copy(:)
+    integer(c_int)                       :: nm
+
+    if (present(ms)) then
+       nm = size(ms)
+       allocate(ms_copy(nm))
+       ms_copy = ms
+       call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, nm, ms_copy, alm_info=alm_info%handle)
+       deallocate(ms_copy)
+    else
+       call c_sharp_make_mmajor_real_packed_alm_info(lmax, 1, lmax + 1, alm_info=alm_info%handle)
+    end if
+    alm_info%n_local = c_sharp_alm_count(alm_info%handle)
+  end subroutine sharp_make_mmajor_real_packed_alm_info
+
+  subroutine sharp_destroy_alm_info(alm_info)
+    use iso_c_binding
+    type(sharp_alm_info), intent(inout) :: alm_info
+    call c_sharp_destroy_alm_info(alm_info%handle)
+    alm_info%handle = c_null_ptr
+  end subroutine sharp_destroy_alm_info
+
+
+  ! geom info
+  subroutine sharp_make_healpix_geom_info(nside, rings, weight, geom_info)
+    integer(c_int), value                :: nside
+    integer(c_int), optional             :: rings(:)
+    real(c_double), intent(in), optional :: weight(2 * nside)
+    type(sharp_geom_info), intent(out)   :: geom_info
+    !--
+    integer(c_int) :: nrings
+    integer(c_int), allocatable :: rings_copy(:)
+
+    if (present(rings)) then
+       nrings = size(rings)
+       allocate(rings_copy(nrings))
+       rings_copy = rings
+       call sharp_make_subset_healpix_geom_info(nside, 1, nrings, rings_copy, &
+                                                weight, geom_info%handle)
+       deallocate(rings_copy)
+    else
+       call sharp_make_subset_healpix_geom_info(nside, 1, nrings=4 * nside - 1, &
+                                                weight=weight, geom_info=geom_info%handle)
+    end if
+    geom_info%n_local = c_sharp_map_size(geom_info%handle)
+  end subroutine sharp_make_healpix_geom_info
+
+  subroutine sharp_destroy_geom_info(geom_info)
+    use iso_c_binding
+    type(sharp_geom_info), intent(inout) :: geom_info
+    call c_sharp_destroy_geom_info(geom_info%handle)
+    geom_info%handle = c_null_ptr
+  end subroutine sharp_destroy_geom_info
+
+
+  ! Currently the only mode supported is stacked (not interleaved) maps.
+  !
+  ! Note that passing the exact dimension of alm/map is necessary, it
+  ! prevents the caller from doing too crazy slicing prior to pass array
+  ! in...
+  !
+  ! Usage:
+  !
+  ! The alm array must have shape exactly alm(alm_info%n_local, nmaps)
+  ! The maps array must have shape exactly map(map_info%n_local, nmaps).
+  subroutine sharp_execute_d(type, spin, nmaps, alm, alm_info, map, geom_info, &
+                             add, time, opcnt, comm)
+    use iso_c_binding
+    use mpi
+    implicit none
+    integer(c_int), value                        :: type, spin, nmaps
+    integer(c_int), optional                     :: comm
+    logical, value, optional                     :: add  ! should add instead of replace out
+
+    type(sharp_alm_info)                         :: alm_info
+    type(sharp_geom_info)                        :: geom_info
+    real(c_double), intent(out), optional        :: time
+    integer(c_long_long), intent(out), optional  :: opcnt
+    real(c_double), target, intent(inout)        :: alm(0:alm_info%n_local - 1, 1:nmaps)
+    real(c_double), target, intent(inout)        :: map(0:geom_info%n_local - 1, 1:nmaps)
+    !--
+    integer(c_int)         :: mod_flags, ntrans, k
+    type(c_ptr), target    :: alm_ptr(nmaps)
+    type(c_ptr), target    :: map_ptr(nmaps)
+
+    mod_flags = SHARP_DP
+    if (present(add) .and. add) then
+       mod_flags = or(mod_flags, SHARP_ADD)
+    end if
+
+    if (spin == 0) then
+       ntrans = nmaps
+    else
+       ntrans = nmaps / 2
+    end if
+    if (ntrans/=1) print *, "ERROR: ntrans /= 1"
+
+    ! Set up pointer table to access maps
+    alm_ptr(:) = c_null_ptr
+    map_ptr(:) = c_null_ptr
+    do k = 1, nmaps
+       if (alm_info%n_local > 0) alm_ptr(k) = c_loc(alm(0, k))
+       if (geom_info%n_local > 0) map_ptr(k) = c_loc(map(0, k))
+    end do
+
+    if (present(comm)) then
+      call c_sharp_execute_mpi(comm, type, spin, alm_ptr, map_ptr, &
+          geom_info=geom_info%handle, &
+          alm_info=alm_info%handle, &
+          flags=mod_flags, &
+          time=time, &
+          opcnt=opcnt)
+    else
+      call c_sharp_execute(type, spin, alm_ptr, map_ptr, &
+          geom_info=geom_info%handle, &
+          alm_info=alm_info%handle, &
+          flags=mod_flags, &
+          time=time, &
+          opcnt=opcnt)
+   end if
+  end subroutine sharp_execute_d
+end module
diff --git a/fortran/test_sharp.f90 b/fortran/test_sharp.f90
new file mode 100644
index 0000000000000000000000000000000000000000..78e6c169d254c714956a042dfcd5372643ca26ed
--- /dev/null
+++ b/fortran/test_sharp.f90
@@ -0,0 +1,56 @@
+program test_sharp
+  use mpi
+  use sharp
+  use iso_c_binding, only : c_ptr, c_double
+  implicit none
+
+  integer, parameter :: lmax = 2, nside = 2
+  type(sharp_alm_info) :: alm_info
+  type(sharp_geom_info) :: geom_info
+
+  real(c_double), dimension(0:(lmax + 1)**2 - 1, 1:1) :: alm
+  real(c_double), dimension(0:12*nside**2 - 1, 1:1) :: map
+
+  integer(c_int), dimension(1:lmax + 1) :: ms
+  integer(c_int), dimension(1:4 * nside - 1) :: rings
+  integer(c_int) :: nm, m, nrings, iring
+  integer :: nodecount, rank, ierr
+
+  call MPI_Init(ierr)
+  call MPI_Comm_size(MPI_COMM_WORLD, nodecount, ierr)
+  call MPI_Comm_rank(MPI_COMM_WORLD, rank, ierr)
+
+  nm = 0
+  do m = rank, lmax, nodecount
+     nm = nm + 1
+     ms(nm) = m
+  end do
+
+  nrings = 0
+  do iring = rank + 1, 4 * nside - 1, nodecount
+     nrings = nrings + 1
+     rings(nrings) = iring
+  end do
+
+  alm = 0
+  map = 0
+  if (rank == 0) then
+    alm(0, 1) = 1
+  end if
+
+  print *, ms(1:nm)
+  call sharp_make_mmajor_real_packed_alm_info(lmax, ms=ms(1:nm), alm_info=alm_info)
+  print *, 'alm_info%n_local', alm_info%n_local
+  call sharp_make_healpix_geom_info(nside, rings=rings(1:nrings), geom_info=geom_info)
+  print *, 'geom_info%n_local', geom_info%n_local
+  print *, 'execute'
+  call sharp_execute(SHARP_Y, 0, 1, alm, alm_info, map, geom_info, comm=MPI_COMM_WORLD)
+
+  print *, alm
+  print *, map
+
+  call sharp_destroy_alm_info(alm_info)
+  call sharp_destroy_geom_info(geom_info)
+  print *, 'DONE'
+  call MPI_Finalize(ierr)
+end program test_sharp
diff --git a/libsharp/sharp.c b/libsharp/sharp.c
index 456b69ccefd73e877cc7c62fe099a9fa2e3f833a..a4345f2df3d8f479ce9db69b75eb3f7dca675fe2 100644
--- a/libsharp/sharp.c
+++ b/libsharp/sharp.c
@@ -16,28 +16,23 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp.c
  *  Spherical transform library
  *
- *  Copyright (C) 2006-2016 Max-Planck-Society
+ *  Copyright (C) 2006-2019 Max-Planck-Society
  *  \author Martin Reinecke \author Dag Sverre Seljebotn
  */
 
 #include <math.h>
 #include <string.h>
 #include "pocketfft/pocketfft.h"
-#include "sharp_ylmgen_c.h"
-#include "sharp_internal.h"
-#include "c_utils.h"
-#include "walltime_c.h"
-#include "sharp_almhelpers.h"
-#include "sharp_geomhelpers.h"
+#include "libsharp/sharp_ylmgen_c.h"
+#include "libsharp/sharp_internal.h"
+#include "libsharp/sharp_utils.h"
+#include "libsharp/sharp_almhelpers.h"
+#include "libsharp/sharp_geomhelpers.h"
 
 typedef complex double dcmplx;
 typedef complex float  fcmplx;
@@ -81,7 +76,7 @@ typedef struct
   double phi0_;
   dcmplx *shiftarr;
   int s_shift;
-  rfft_plan plan;
+  pocketfft_plan_r plan;
   int length;
   int norot;
   } ringhelper;
@@ -94,7 +89,7 @@ static void ringhelper_init (ringhelper *self)
 
 static void ringhelper_destroy (ringhelper *self)
   {
-  if (self->plan) destroy_rfft_plan(self->plan);
+  if (self->plan) pocketfft_delete_plan_r(self->plan);
   DEALLOC(self->shiftarr);
   ringhelper_init(self);
   }
@@ -114,11 +109,11 @@ NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, dou
 //      double *tmp=(double *) self->shiftarr;
 //      sincos_multi (mmax+1, phi0, &tmp[1], &tmp[0], 2);
       }
-//  if (!self->plan) self->plan=make_rfft_plan(nph);
+//  if (!self->plan) self->plan=pocketfft_make_plan_r(nph);
   if (nph!=(int)self->length)
     {
-    if (self->plan) destroy_rfft_plan(self->plan);
-    self->plan=make_rfft_plan(nph);
+    if (self->plan) pocketfft_delete_plan_r(self->plan);
+    self->plan=pocketfft_make_plan_r(nph);
     self->length=nph;
     }
   }
@@ -335,7 +330,7 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
       }
     }
   data[1]=data[0];
-  rfft_backward (self->plan, &(data[1]), 1.);
+  pocketfft_backward_r (self->plan, &(data[1]), 1.);
   }
 
 NOINLINE static void ringhelper_ring2phase (ringhelper *self,
@@ -354,7 +349,7 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
   if (flags&SHARP_REAL_HARMONICS)
     wgt *= sqrt_two;
 
-  rfft_forward (self->plan, &(data[1]), 1.);
+  pocketfft_forward_r (self->plan, &(data[1]), 1.);
   data[0]=data[1];
   data[1]=data[nph+1]=0.;
 
@@ -765,7 +760,7 @@ NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
     }
   else
     {
-#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
+#pragma omp parallel
 {
     ringhelper helper;
     ringhelper_init(&helper);
@@ -810,7 +805,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
     }
   else
     {
-#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
+#pragma omp parallel
 {
     ringhelper helper;
     ringhelper_init(&helper);
@@ -840,7 +835,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
 
 NOINLINE static void sharp_execute_job (sharp_job *job)
   {
-  double timer=wallTime();
+  double timer=sharp_wallTime();
   job->opcnt=0;
   int lmax = job->ainfo->lmax,
       mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
@@ -876,7 +871,7 @@ NOINLINE static void sharp_execute_job (sharp_job *job)
 /* map->phase where necessary */
     map2phase (job, mmax, llim, ulim);
 
-#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
+#pragma omp parallel
 {
     sharp_job ljob = *job;
     ljob.opcnt=0;
@@ -914,7 +909,7 @@ NOINLINE static void sharp_execute_job (sharp_job *job)
 
   DEALLOC(job->norm_l);
   dealloc_phase (job);
-  job->time=wallTime()-timer;
+  job->time=sharp_wallTime()-timer;
   }
 
 static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
diff --git a/libsharp/sharp.h b/libsharp/sharp.h
index 0e43ba9d73f8f7174769ce5a8b2685c0199be964..1fe1566132a768f339e5c76f2707d8fdbadb074a 100644
--- a/libsharp/sharp.h
+++ b/libsharp/sharp.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp.h
  *  Portable interface for the spherical transform library.
@@ -29,8 +25,8 @@
  *  \author Martin Reinecke \author Dag Sverre Seljebotn
  */
 
-#ifndef PLANCK_SHARP_H
-#define PLANCK_SHARP_H
+#ifndef SHARP_SHARP_H
+#define SHARP_SHARP_H
 
 #include <stddef.h>
 
@@ -199,7 +195,6 @@ typedef enum { SHARP_DP              = 1<<4,
                SHARP_NO_FFT          = 1<<7,
 
                SHARP_USE_WEIGHTS     = 1<<20,    /* internal use only */
-               SHARP_NO_OPENMP       = 1<<21,    /* internal use only */
              } sharp_jobflags;
 
 /*! Performs a libsharp SHT job. The interface deliberately does not use
@@ -258,6 +253,10 @@ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
 
 /*! \} */
 
+int sharp_get_mlim (int lmax, int spin, double sth, double cth);
+int sharp_veclen(void);
+const char *sharp_architecture(void);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/libsharp/sharp_almhelpers.c b/libsharp/sharp_almhelpers.c
index 12ce600eb7b4607d5bda7d4e37e2d72723e6ab9d..59b564c32af61865878ec0e09b6168cf2c1a486c 100644
--- a/libsharp/sharp_almhelpers.c
+++ b/libsharp/sharp_almhelpers.c
@@ -16,21 +16,17 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_almhelpers.c
  *  Spherical transform library
  *
- *  Copyright (C) 2008-2016 Max-Planck-Society
+ *  Copyright (C) 2008-2019 Max-Planck-Society
  *  \author Martin Reinecke
  */
 
-#include "sharp_almhelpers.h"
-#include "c_utils.h"
+#include "libsharp/sharp_almhelpers.h"
+#include "libsharp/sharp_utils.h"
 
 void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
   sharp_alm_info **alm_info)
diff --git a/libsharp/sharp_almhelpers.h b/libsharp/sharp_almhelpers.h
index 06bee8fa21085fe8edc86675a202632dbfdc363e..3dc85f5176f516096f6e383be6ed138b40b0c0b4 100644
--- a/libsharp/sharp_almhelpers.h
+++ b/libsharp/sharp_almhelpers.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_almhelpers.h
  *  SHARP helper function for the creation of a_lm data structures
@@ -29,10 +25,10 @@
  *  \author Martin Reinecke
  */
 
-#ifndef PLANCK_SHARP_ALMHELPERS_H
-#define PLANCK_SHARP_ALMHELPERS_H
+#ifndef SHARP_ALMHELPERS_H
+#define SHARP_ALMHELPERS_H
 
-#include "sharp.h"
+#include "libsharp/sharp.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c
index f54a058b0e72e80a389bc420d3f59f3c4c463c7f..1dec17a505e9c233d508370cf3459616e192074c 100644
--- a/libsharp/sharp_core.c
+++ b/libsharp/sharp_core.c
@@ -1,6 +1,33 @@
+/*
+ *  This file is part of libsharp.
+ *
+ *  libsharp is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  libsharp is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with libsharp; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
+
+/*! \file sharp_core.c
+ *  Spherical transform library
+ *
+ *  Copyright (C) 2019 Max-Planck-Society
+ *  \author Martin Reinecke
+ */
+
 #define ARCH default
 #define GENERIC_ARCH
-#include "sharp_core_inc.c"
+#include "libsharp/sharp_core_inc.c"
 #undef GENERIC_ARCH
 #undef ARCH
 
@@ -42,7 +69,9 @@ int XCONCATX2(sharp_veclen,arch) (void); \
 int XCONCATX2(sharp_max_nvec,arch) (int spin); \
 const char *XCONCATX2(sharp_architecture,arch) (void);
 
+#if (!defined(__APPLE__))
 DECL(avx512f)
+#endif
 DECL(fma4)
 DECL(fma)
 DECL(avx2)
@@ -62,7 +91,9 @@ static void assign_funcs(void)
     architecture_ = XCONCATX2(sharp_architecture,arch); \
     return; \
     }
+#if (!defined(__APPLE__))
 DECL2(avx512f)
+#endif
 DECL2(fma4)
 DECL2(fma)
 DECL2(avx2)
@@ -74,6 +105,7 @@ DECL2(avx)
   architecture_ = sharp_architecture_default;
   }
 
+#pragma GCC visibility push(hidden)
 
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
   const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
@@ -82,16 +114,20 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
   if (!inner_loop_) assign_funcs();
   inner_loop_(job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
   }
-int sharp_veclen(void)
-  {
-  if (!veclen_) assign_funcs();
-  return veclen_();
-  }
+
 int sharp_max_nvec(int spin)
   {
   if (!max_nvec_) assign_funcs();
   return max_nvec_(spin);
   }
+
+#pragma GCC visibility pop
+
+int sharp_veclen(void)
+  {
+  if (!veclen_) assign_funcs();
+  return veclen_();
+  }
 const char *sharp_architecture(void)
   {
   if (!architecture_) assign_funcs();
diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c
index b875877cf7ee17fa79883c9610b52e8c5a2678b7..612a778804ed9bce76399179a06f59c15372b2cb 100644
--- a/libsharp/sharp_core_inc.c
+++ b/libsharp/sharp_core_inc.c
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_core_inc.c
  *  Computational core
@@ -29,6 +25,9 @@
  *  \author Martin Reinecke
  */
 
+// FIXME: special ugly workaround for problems on OSX
+#if (!defined(__APPLE__)) || (!defined(__AVX512F__))
+
 #if (defined(MULTIARCH) || defined(GENERIC_ARCH))
 
 #define XCONCATX(a,b) a##_##b
@@ -38,10 +37,17 @@
 #include <complex.h>
 #include <math.h>
 #include <string.h>
-#include "sharp_vecsupport.h"
-#include "sharp.h"
-#include "sharp_internal.h"
-#include "c_utils.h"
+#include "libsharp/sharp_vecsupport.h"
+#include "libsharp/sharp.h"
+#include "libsharp/sharp_internal.h"
+#include "libsharp/sharp_utils.h"
+
+#pragma GCC visibility push(hidden)
+
+// In the following, we explicitly allow the compiler to contract floating
+// point operations, like multiply-and-add.
+// Unfortunately, most compilers don't act on this pragma yet.
+#pragma STDC FP_CONTRACT ON
 
 typedef complex double dcmplx;
 
@@ -1204,4 +1210,8 @@ const char *XARCH(sharp_architecture)(void)
   return xstr(ARCH);
   }
 
+#pragma GCC visibility pop
+
+#endif
+
 #endif
diff --git a/libsharp/sharp_cxx.h b/libsharp/sharp_cxx.h
index b2415e02e41fbfdfd0f9428343eaaf08655b698b..60995ed23b10414732d3253092459a9d6303c934 100644
--- a/libsharp/sharp_cxx.h
+++ b/libsharp/sharp_cxx.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_cxx.h
  *  Spherical transform library
@@ -29,13 +25,13 @@
  *  \author Martin Reinecke
  */
 
-#ifndef PLANCK_SHARP_CXX_H
-#define PLANCK_SHARP_CXX_H
+#ifndef SHARP_CXX_H
+#define SHARP_CXX_H
 
 #include <complex>
-#include "sharp.h"
-#include "sharp_geomhelpers.h"
-#include "sharp_almhelpers.h"
+#include "libsharp/sharp.h"
+#include "libsharp/sharp_geomhelpers.h"
+#include "libsharp/sharp_almhelpers.h"
 
 class sharp_base
   {
@@ -89,9 +85,6 @@ class sharp_base
       if (ainfo) sharp_destroy_alm_info(ainfo);
       sharp_make_triangular_alm_info (lmax, mmax, 1, &ainfo);
       }
-
-    const sharp_geom_info* get_geom_info() const { return ginfo; }
-    const sharp_alm_info* get_alm_info() const { return ainfo; }
   };
 
 template<typename T> struct cxxjobhelper__ {};
diff --git a/libsharp/sharp_geomhelpers.c b/libsharp/sharp_geomhelpers.c
index 0f6af399430c8f1f9815f19b6aa6e2862b51046b..cd7c6fa51624b95c46579d17f8b9f39c8fc130da 100644
--- a/libsharp/sharp_geomhelpers.c
+++ b/libsharp/sharp_geomhelpers.c
@@ -16,23 +16,19 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_geomhelpers.c
  *  Spherical transform library
  *
- *  Copyright (C) 2006-2018 Max-Planck-Society
+ *  Copyright (C) 2006-2019 Max-Planck-Society
  *  \author Martin Reinecke
  */
 
 #include <math.h>
-#include "sharp_geomhelpers.h"
-#include "sharp_legendre_roots.h"
-#include "c_utils.h"
+#include "libsharp/sharp_geomhelpers.h"
+#include "libsharp/sharp_legendre_roots.h"
+#include "libsharp/sharp_utils.h"
 #include "pocketfft/pocketfft.h"
 
 void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,
@@ -159,9 +155,9 @@ void sharp_make_fejer1_geom_info (int nrings, int ppring, double phi0,
     weight[2*k  ]=2./(1.-4.*k*k)*sin((k*pi)/nrings);
     }
   if ((nrings&1)==0) weight[nrings-1]=0.;
-  rfft_plan plan = make_rfft_plan(nrings);
-  rfft_backward(plan,weight,1.);
-  destroy_rfft_plan(plan);
+  pocketfft_plan_r plan = pocketfft_make_plan_r(nrings);
+  pocketfft_backward_r(plan,weight,1.);
+  pocketfft_delete_plan_r(plan);
 
   for (int m=0; m<(nrings+1)/2; ++m)
     {
@@ -206,9 +202,9 @@ void sharp_make_cc_geom_info (int nrings, int ppring, double phi0,
   for (int k=1; k<=(n/2-1); ++k)
     weight[2*k-1]=2./(1.-4.*k*k) + dw;
   weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1. -dw*((2-(n&1))*n-1);
-  rfft_plan plan = make_rfft_plan(n);
-  rfft_backward(plan,weight,1.);
-  destroy_rfft_plan(plan);
+  pocketfft_plan_r plan = pocketfft_make_plan_r(n);
+  pocketfft_backward_r(plan,weight,1.);
+  pocketfft_delete_plan_r(plan);
   weight[n]=weight[0];
 
   for (int m=0; m<(nrings+1)/2; ++m)
@@ -254,9 +250,9 @@ void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0,
   for (int k=1; k<=(n/2-1); ++k)
     weight[2*k-1]=2./(1.-4.*k*k);
   weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1.;
-  rfft_plan plan = make_rfft_plan(n);
-  rfft_backward(plan,weight,1.);
-  destroy_rfft_plan(plan);
+  pocketfft_plan_r plan = pocketfft_make_plan_r(n);
+  pocketfft_backward_r(plan,weight,1.);
+  pocketfft_delete_plan_r(plan);
   for (int m=0; m<nrings; ++m)
     weight[m]=weight[m+1];
 
diff --git a/libsharp/sharp_geomhelpers.h b/libsharp/sharp_geomhelpers.h
index a59af9b97011fc1f617137e5906a61bad1fd5520..d0a4f33ff483451db742c2379d345ff2a1543f4e 100644
--- a/libsharp/sharp_geomhelpers.h
+++ b/libsharp/sharp_geomhelpers.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_geomhelpers.h
  *  SHARP helper function for the creation of grid geometries
@@ -29,10 +25,10 @@
  *  \author Martin Reinecke
  */
 
-#ifndef PLANCK_SHARP_GEOMHELPERS_H
-#define PLANCK_SHARP_GEOMHELPERS_H
+#ifndef SHARP_GEOMHELPERS_H
+#define SHARP_GEOMHELPERS_H
 
-#include "sharp.h"
+#include "libsharp/sharp.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/libsharp/sharp_internal.h b/libsharp/sharp_internal.h
index 979ae5820c00b78a27caa22bb906e4dec0c8dc68..89c4429089d7c37e02387f99e79dd2b63f9cc3c3 100644
--- a/libsharp/sharp_internal.h
+++ b/libsharp/sharp_internal.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_internal.h
  *  Internally used functionality for the spherical transform library.
@@ -29,16 +25,16 @@
  *  \author Martin Reinecke \author Dag Sverre Seljebotn
  */
 
-#ifndef PLANCK_SHARP_INTERNAL_H
-#define PLANCK_SHARP_INTERNAL_H
+#ifndef SHARP_INTERNAL_H
+#define SHARP_INTERNAL_H
 
 #ifdef __cplusplus
 #error This header file cannot be included from C++, only from C
 #endif
 
 #include <complex.h>
-#include "sharp.h"
-#include "sharp_ylmgen_c.h"
+#include "libsharp/sharp.h"
+#include "libsharp/sharp_ylmgen_c.h"
 
 typedef struct
   {
@@ -58,14 +54,10 @@ typedef struct
   unsigned long long opcnt;
   } sharp_job;
 
-int sharp_get_mlim (int lmax, int spin, double sth, double cth);
-
 void inner_loop (sharp_job *job, const int *ispair,const double *cth,
   const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
   const int *mlim);
 
-int sharp_veclen(void);
 int sharp_max_nvec(int spin);
-const char *sharp_architecture(void);
 
 #endif
diff --git a/libsharp/sharp_legendre_roots.c b/libsharp/sharp_legendre_roots.c
index 44d8507649319a0376da6f8080ca11880e382e1a..648cfca234d1b5c12cde90758f2b9a4b8aeae140 100644
--- a/libsharp/sharp_legendre_roots.c
+++ b/libsharp/sharp_legendre_roots.c
@@ -7,8 +7,8 @@
     - tweaked Newton iteration to obtain higher accuracy */
 
 #include <math.h>
-#include "sharp_legendre_roots.h"
-#include "c_utils.h"
+#include "libsharp/sharp_legendre_roots.h"
+#include "libsharp/sharp_utils.h"
 
 static inline double one_minus_x2 (double x)
   { return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
diff --git a/libsharp/sharp_legendre_roots.h b/libsharp/sharp_legendre_roots.h
index 19f8ec801a2c439be8e18378a71581aab6ad77e5..4220fceb12b259a68cc27b0de04119b87a265ed4 100644
--- a/libsharp/sharp_legendre_roots.h
+++ b/libsharp/sharp_legendre_roots.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_legendre_roots.h
  *
diff --git a/libsharp/sharp_mpi.c b/libsharp/sharp_mpi.c
index a90ec69713d34fcd031ee69690a23e4466de2c15..2be016abb781b2a89ab433a32efab28cf0964e00 100644
--- a/libsharp/sharp_mpi.c
+++ b/libsharp/sharp_mpi.c
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_mpi.c
  *  Functionality only needed for MPI-parallel transforms
@@ -31,7 +27,7 @@
 
 #ifdef USE_MPI
 
-#include "sharp_mpi.h"
+#include "libsharp/sharp_mpi.h"
 
 typedef struct
   {
@@ -215,7 +211,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
     { sharp_execute_job (job); return; }
 
   MPI_Barrier(comm);
-  double timer=wallTime();
+  double timer=sharp_wallTime();
   job->opcnt=0;
   sharp_mpi_info minfo;
   sharp_make_mpi_info(comm, job, &minfo);
@@ -270,7 +266,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
 
     map2alm_comm (job, &minfo);
 
-#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
+#pragma omp parallel
 {
     sharp_job ljob = *job;
     sharp_Ylmgen_C generator;
@@ -310,7 +306,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
     dealloc_phase (job);
     }
   sharp_destroy_mpi_info(&minfo);
-  job->time=wallTime()-timer;
+  job->time=sharp_wallTime()-timer;
   }
 
 void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
diff --git a/libsharp/sharp_mpi.h b/libsharp/sharp_mpi.h
index 5de37bf4540779ab913f399f15325c7329b216f2..01950321d2e8f6f709b395b7b90d0cb7d524f349 100644
--- a/libsharp/sharp_mpi.h
+++ b/libsharp/sharp_mpi.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_mpi.h
  *  Interface for the spherical transform library with MPI support.
@@ -29,11 +25,11 @@
  *  \author Martin Reinecke \author Dag Sverre Seljebotn
  */
 
-#ifndef PLANCK_SHARP_MPI_H
-#define PLANCK_SHARP_MPI_H
+#ifndef SHARP_MPI_H
+#define SHARP_MPI_H
 
 #include <mpi.h>
-#include "sharp.h"
+#include "libsharp/sharp.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/c_utils/c_utils.c b/libsharp/sharp_utils.c
similarity index 57%
rename from c_utils/c_utils.c
rename to libsharp/sharp_utils.c
index 9344a6d9471f197f09248fc1fdea6d0aefbc33ca..c73edf88dd3f1c3a0ab2d57c882ef61e9a6d98d7 100644
--- a/c_utils/c_utils.c
+++ b/libsharp/sharp_utils.c
@@ -1,46 +1,38 @@
 /*
- *  This file is part of libc_utils.
+ *  This file is part of libsharp.
  *
- *  libc_utils is free software; you can redistribute it and/or modify
+ *  libsharp is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
- *  libc_utils is distributed in the hope that it will be useful,
+ *  libsharp is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
- *  along with libc_utils; if not, write to the Free Software
+ *  along with libsharp; if not, write to the Free Software
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*
  *  Convenience functions
  *
- *  Copyright (C) 2008-2017 Max-Planck-Society
+ *  Copyright (C) 2008-2019 Max-Planck-Society
  *  Author: Martin Reinecke
  */
 
 #include <stdio.h>
-#include "c_utils.h"
+#include "libsharp/sharp_utils.h"
 
-void util_fail_ (const char *file, int line, const char *func, const char *msg)
+void sharp_fail_ (const char *file, int line, const char *func, const char *msg)
   {
   fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
   exit(1);
   }
-void util_warn_ (const char *file, int line, const char *func, const char *msg)
-  {
-  fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
-  }
 
 /* This function tries to avoid allocations with a total size close to a high
    power of two (called the "critical stride" here), by adding a few more bytes
@@ -57,7 +49,7 @@ static size_t manipsize(size_t sz)
 
 #ifdef __SSE__
 #include <xmmintrin.h>
-void *util_malloc_ (size_t sz)
+void *sharp_malloc_ (size_t sz)
   {
   void *res;
   if (sz==0) return NULL;
@@ -65,10 +57,10 @@ void *util_malloc_ (size_t sz)
   UTIL_ASSERT(res,"_mm_malloc() failed");
   return res;
   }
-void util_free_ (void *ptr)
+void sharp_free_ (void *ptr)
   { if ((ptr)!=NULL) _mm_free(ptr); }
 #else
-void *util_malloc_ (size_t sz)
+void *sharp_malloc_ (size_t sz)
   {
   void *res;
   if (sz==0) return NULL;
@@ -76,6 +68,41 @@ void *util_malloc_ (size_t sz)
   UTIL_ASSERT(res,"malloc() failed");
   return res;
   }
-void util_free_ (void *ptr)
+void sharp_free_ (void *ptr)
   { if ((ptr)!=NULL) free(ptr); }
 #endif
+
+#if defined (_OPENMP)
+#include <omp.h>
+#elif defined (USE_MPI)
+#include "mpi.h"
+#elif defined (_WIN32)
+#include <Windows.h>
+#else
+#include <sys/time.h>
+#include <stdlib.h>
+#endif
+
+double sharp_wallTime(void)
+  {
+#if defined (_OPENMP)
+  return omp_get_wtime();
+#elif defined (USE_MPI)
+  return MPI_Wtime();
+#elif defined (_WIN32)
+  static double inv_freq = -1.;
+  if (inv_freq<0)
+    {
+    LARGE_INTEGER freq;
+    QueryPerformanceFrequency(&freq);
+    inv_freq = 1. / double(freq.QuadPart);
+    }
+  LARGE_INTEGER count;
+  QueryPerformanceCounter(&count);
+  return count.QuadPart*inv_freq;
+#else
+  struct timeval t;
+  gettimeofday(&t, NULL);
+  return t.tv_sec + 1e-6*t.tv_usec;
+#endif
+  }
diff --git a/c_utils/c_utils.h b/libsharp/sharp_utils.h
similarity index 64%
rename from c_utils/c_utils.h
rename to libsharp/sharp_utils.h
index 01c64ad48b39b0332f20718440caf8a5625cd4c0..d2a1c74f6ad40a1bbbe71bd4d8b64ba10b7549cc 100644
--- a/c_utils/c_utils.h
+++ b/libsharp/sharp_utils.h
@@ -16,22 +16,18 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file c_utils.h
  *  Convenience functions
  *
- *  Copyright (C) 2008-2017 Max-Planck-Society
+ *  Copyright (C) 2008-2019 Max-Planck-Society
  *  \author Martin Reinecke
  *  \note This file should only be included from .c files, NOT from .h files.
  */
 
-#ifndef PLANCK_C_UTILS_H
-#define PLANCK_C_UTILS_H
+#ifndef SHARP_UTILS_H
+#define SHARP_UTILS_H
 
 #include <math.h>
 #include <stdlib.h>
@@ -41,10 +37,9 @@
 extern "C" {
 #endif
 
-void util_fail_ (const char *file, int line, const char *func, const char *msg);
-void util_warn_ (const char *file, int line, const char *func, const char *msg);
-void *util_malloc_ (size_t sz);
-void util_free_ (void *ptr);
+void sharp_fail_ (const char *file, int line, const char *func, const char *msg);
+void *sharp_malloc_ (size_t sz);
+void sharp_free_ (void *ptr);
 
 #if defined (__GNUC__)
 #define UTIL_FUNC_NAME__ __func__
@@ -57,43 +52,32 @@ void util_free_ (void *ptr);
     source file name and line number of the call, as well as \a msg;
     then exit the program with an error status. */
 #define UTIL_ASSERT(cond,msg) \
-  if(!(cond)) util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
+  if(!(cond)) sharp_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
 /*! \def UTIL_WARN(cond,msg)
     If \a cond is false, print an warning containing function name,
     source file name and line number of the call, as well as \a msg. */
-#define UTIL_WARN(cond,msg) \
-  if(!(cond)) util_warn_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
-/*! \def UTIL_FAIL(msg)
-    Print an error message containing function name,
-    source file name and line number of the call, as well as \a msg;
-    then exit the program with an error status. */
 #define UTIL_FAIL(msg) \
-  util_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
+  sharp_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
 
 /*! \def ALLOC(ptr,type,num)
     Allocate space for \a num objects of type \a type. Make sure that the
     allocation succeeded, else stop the program with an error. Return the
     resulting pointer in \a ptr. */
 #define ALLOC(ptr,type,num) \
-  do { (ptr)=(type *)util_malloc_((num)*sizeof(type)); } while (0)
+  do { (ptr)=(type *)sharp_malloc_((num)*sizeof(type)); } while (0)
 /*! \def RALLOC(type,num)
     Allocate space for \a num objects of type \a type. Make sure that the
     allocation succeeded, else stop the program with an error. Cast the
     resulting pointer to \a (type*). */
 #define RALLOC(type,num) \
-  ((type *)util_malloc_((num)*sizeof(type)))
+  ((type *)sharp_malloc_((num)*sizeof(type)))
 /*! \def DEALLOC(ptr)
     Deallocate \a ptr. It must have been allocated using \a ALLOC or
     \a RALLOC. */
 #define DEALLOC(ptr) \
-  do { util_free_(ptr); (ptr)=NULL; } while(0)
+  do { sharp_free_(ptr); (ptr)=NULL; } while(0)
 #define RESIZE(ptr,type,num) \
-  do { util_free_(ptr); ALLOC(ptr,type,num); } while(0)
-#define GROW(ptr,type,sz_old,sz_new) \
-  do { \
-    if ((sz_new)>(sz_old)) \
-      { RESIZE(ptr,type,2*(sz_new));sz_old=2*(sz_new); } \
-  } while(0)
+  do { sharp_free_(ptr); ALLOC(ptr,type,num); } while(0)
 /*! \def SET_ARRAY(ptr,i1,i2,val)
     Set the entries \a ptr[i1] ... \a ptr[i2-1] to \a val. */
 #define SET_ARRAY(ptr,i1,i2,val) \
@@ -101,14 +85,6 @@ void util_free_ (void *ptr);
     ptrdiff_t cnt_; \
     for (cnt_=(i1);cnt_<(i2);++cnt_) (ptr)[cnt_]=(val); \
     } while(0)
-/*! \def COPY_ARRAY(src,dest,i1,i2)
-    Copy the entries \a src[i1] ... \a src[i2-1] to
-    \a dest[i1] ... \a dest[i2-1]. */
-#define COPY_ARRAY(src,dest,i1,i2) \
-  do { \
-    ptrdiff_t cnt_; \
-    for (cnt_=(i1);cnt_<(i2);++cnt_) (dest)[cnt_]=(src)[cnt_]; \
-    } while(0)
 
 #define ALLOC2D(ptr,type,num1,num2) \
   do { \
@@ -123,8 +99,6 @@ void util_free_ (void *ptr);
 
 #define FAPPROX(a,b,eps) \
   (fabs((a)-(b))<((eps)*fabs(b)))
-#define ABSAPPROX(a,b,eps) \
-  (fabs((a)-(b))<(eps))
 #define IMAX(a,b) \
   (((a)>(b)) ? (a) : (b))
 #define IMIN(a,b) \
@@ -133,12 +107,16 @@ void util_free_ (void *ptr);
 #define SWAP(a,b,type) \
   do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
 
-#define CHECK_STACK_ALIGN(align) \
-  do { \
-    double foo; \
-    UTIL_WARN((((size_t)(&foo))&(align-1))==0, \
-      "WARNING: stack not sufficiently aligned!"); \
-    } while(0)
+/*! Returns an approximation of the current wall time (in seconds).
+    The first available of the following timers will be used:
+    <ul>
+    <li> \a omp_get_wtime(), if OpenMP is available
+    <li> \a MPI_Wtime(), if MPI is available
+    <li> \a gettimeofday() otherwise
+    </ul>
+    \note Only useful for measuring time differences.
+    \note This function has an execution time between 10 and 100 nanoseconds. */
+double sharp_wallTime(void);
 
 #ifdef __cplusplus
 }
diff --git a/libsharp/sharp_vecsupport.h b/libsharp/sharp_vecsupport.h
index e4bfc4fc94da5d0e65725c4e8f5a74fee4220f32..7894ef63429f02326e53b49162f1571ae5151d76 100644
--- a/libsharp/sharp_vecsupport.h
+++ b/libsharp/sharp_vecsupport.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*  \file sharp_vecsupport.h
  *  Convenience functions for vector arithmetics
diff --git a/libsharp/sharp_ylmgen_c.c b/libsharp/sharp_ylmgen_c.c
index f408eea9595bffbc674b7fea251852e21f5bac6a..791908ce49ba143a4ab542517d4c590ee440ebeb 100644
--- a/libsharp/sharp_ylmgen_c.c
+++ b/libsharp/sharp_ylmgen_c.c
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*
  *  Helper code for efficient calculation of Y_lm(theta,phi=0)
@@ -31,8 +27,10 @@
 
 #include <math.h>
 #include <stdlib.h>
-#include "sharp_ylmgen_c.h"
-#include "c_utils.h"
+#include "libsharp/sharp_ylmgen_c.h"
+#include "libsharp/sharp_utils.h"
+
+#pragma GCC visibility push(hidden)
 
 static inline void normalize (double *val, int *scale, double xfmax)
   {
@@ -256,3 +254,5 @@ double *sharp_Ylmgen_get_d1norm (int lmax)
     res[l] = (l<1) ? 0. : 0.5*sqrt(l*(l+1.)*(2*l+1.)/(4*pi));
   return res;
   }
+
+#pragma GCC visibility pop
diff --git a/libsharp/sharp_ylmgen_c.h b/libsharp/sharp_ylmgen_c.h
index 130d7970d72d94e705972b7c63dd201288982973..1fa1cfa730762f3e9e860f3fb6ce29314f7beae8 100644
--- a/libsharp/sharp_ylmgen_c.h
+++ b/libsharp/sharp_ylmgen_c.h
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file sharp_ylmgen_c.h
  *  Code for efficient calculation of Y_lm(phi=0,theta)
diff --git a/pocketfft/pocketfft.c b/pocketfft/pocketfft.c
index de1af3e2367bbb3cf93921b96a78ec5e53ca098b..56c9dc5dc14c6282fa09d2162be10c8a7ba9854b 100644
--- a/pocketfft/pocketfft.c
+++ b/pocketfft/pocketfft.c
@@ -6,14 +6,14 @@
 /*
  *  Main implementation file.
  *
- *  Copyright (C) 2004-2018 Max-Planck-Society
+ *  Copyright (C) 2004-2019 Max-Planck-Society
  *  \author Martin Reinecke
  */
 
 #include <math.h>
 #include <string.h>
 
-#include "pocketfft.h"
+#include "pocketfft/pocketfft.h"
 
 #define RALLOC(type,num) \
   ((type *)malloc((num)*sizeof(type)))
@@ -2057,16 +2057,16 @@ static int rfftblue_forward(fftblue_plan plan, double c[], double fct)
   return 0;
   }
 
-typedef struct cfft_plan_i
+typedef struct pocketfft_plan_c_i
   {
   cfftp_plan packplan;
   fftblue_plan blueplan;
-  } cfft_plan_i;
+  } pocketfft_plan_c_i;
 
-cfft_plan make_cfft_plan (size_t length)
+pocketfft_plan_c pocketfft_make_plan_c (size_t length)
   {
   if (length==0) return NULL;
-  cfft_plan plan = RALLOC(cfft_plan_i,1);
+  pocketfft_plan_c plan = RALLOC(pocketfft_plan_c_i,1);
   if (!plan) return NULL;
   plan->blueplan=0;
   plan->packplan=0;
@@ -2092,7 +2092,7 @@ cfft_plan make_cfft_plan (size_t length)
   return plan;
   }
 
-void destroy_cfft_plan (cfft_plan plan)
+void pocketfft_delete_plan_c (pocketfft_plan_c plan)
   {
   if (plan->blueplan)
     destroy_fftblue_plan(plan->blueplan);
@@ -2101,7 +2101,8 @@ void destroy_cfft_plan (cfft_plan plan)
   DEALLOC(plan);
   }
 
-WARN_UNUSED_RESULT int cfft_backward(cfft_plan plan, double c[], double fct)
+WARN_UNUSED_RESULT
+int pocketfft_backward_c(pocketfft_plan_c plan, double c[], double fct)
   {
   if (plan->packplan)
     return cfftp_backward(plan->packplan,c,fct);
@@ -2109,7 +2110,8 @@ WARN_UNUSED_RESULT int cfft_backward(cfft_plan plan, double c[], double fct)
   return cfftblue_backward(plan->blueplan,c,fct);
   }
 
-WARN_UNUSED_RESULT int cfft_forward(cfft_plan plan, double c[], double fct)
+WARN_UNUSED_RESULT
+int pocketfft_forward_c(pocketfft_plan_c plan, double c[], double fct)
   {
   if (plan->packplan)
     return cfftp_forward(plan->packplan,c,fct);
@@ -2117,16 +2119,16 @@ WARN_UNUSED_RESULT int cfft_forward(cfft_plan plan, double c[], double fct)
   return cfftblue_forward(plan->blueplan,c,fct);
   }
 
-typedef struct rfft_plan_i
+typedef struct pocketfft_plan_r_i
   {
   rfftp_plan packplan;
   fftblue_plan blueplan;
-  } rfft_plan_i;
+  } pocketfft_plan_r_i;
 
-rfft_plan make_rfft_plan (size_t length)
+pocketfft_plan_r pocketfft_make_plan_r (size_t length)
   {
   if (length==0) return NULL;
-  rfft_plan plan = RALLOC(rfft_plan_i,1);
+  pocketfft_plan_r plan = RALLOC(pocketfft_plan_r_i,1);
   if (!plan) return NULL;
   plan->blueplan=0;
   plan->packplan=0;
@@ -2152,7 +2154,7 @@ rfft_plan make_rfft_plan (size_t length)
   return plan;
   }
 
-void destroy_rfft_plan (rfft_plan plan)
+void pocketfft_delete_plan_r (pocketfft_plan_r plan)
   {
   if (plan->blueplan)
     destroy_fftblue_plan(plan->blueplan);
@@ -2161,19 +2163,20 @@ void destroy_rfft_plan (rfft_plan plan)
   DEALLOC(plan);
   }
 
-size_t rfft_length(rfft_plan plan)
+size_t pocketfft_length_r(pocketfft_plan_r plan)
   {
   if (plan->packplan) return plan->packplan->length;
   return plan->blueplan->n;
   }
 
-size_t cfft_length(cfft_plan plan)
+size_t pocketfft_length_c(pocketfft_plan_c plan)
   {
   if (plan->packplan) return plan->packplan->length;
   return plan->blueplan->n;
   }
 
-WARN_UNUSED_RESULT int rfft_backward(rfft_plan plan, double c[], double fct)
+WARN_UNUSED_RESULT
+int pocketfft_backward_r(pocketfft_plan_r plan, double c[], double fct)
   {
   if (plan->packplan)
     return rfftp_backward(plan->packplan,c,fct);
@@ -2181,7 +2184,8 @@ WARN_UNUSED_RESULT int rfft_backward(rfft_plan plan, double c[], double fct)
     return rfftblue_backward(plan->blueplan,c,fct);
   }
 
-WARN_UNUSED_RESULT int rfft_forward(rfft_plan plan, double c[], double fct)
+WARN_UNUSED_RESULT
+int pocketfft_forward_r(pocketfft_plan_r plan, double c[], double fct)
   {
   if (plan->packplan)
     return rfftp_forward(plan->packplan,c,fct);
diff --git a/pocketfft/pocketfft.h b/pocketfft/pocketfft.h
index 9eb398542124e2d8d8c05289eb9264b33332e91f..b1a02eeca93803a5e7b8e4d73d879ac3c02b8350 100644
--- a/pocketfft/pocketfft.h
+++ b/pocketfft/pocketfft.h
@@ -6,7 +6,7 @@
 /*! \file pocketfft.h
  *  Public interface of the pocketfft library
  *
- *  Copyright (C) 2008-2018 Max-Planck-Society
+ *  Copyright (C) 2008-2019 Max-Planck-Society
  *  \author Martin Reinecke
  */
 
@@ -15,20 +15,36 @@
 
 #include <stdlib.h>
 
-struct cfft_plan_i;
-typedef struct cfft_plan_i * cfft_plan;
-cfft_plan make_cfft_plan (size_t length);
-void destroy_cfft_plan (cfft_plan plan);
-int cfft_backward(cfft_plan plan, double c[], double fct);
-int cfft_forward(cfft_plan plan, double c[], double fct);
-size_t cfft_length(cfft_plan plan);
-
-struct rfft_plan_i;
-typedef struct rfft_plan_i * rfft_plan;
-rfft_plan make_rfft_plan (size_t length);
-void destroy_rfft_plan (rfft_plan plan);
-int rfft_backward(rfft_plan plan, double c[], double fct);
-int rfft_forward(rfft_plan plan, double c[], double fct);
-size_t rfft_length(rfft_plan plan);
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct pocketfft_plan_c_i;
+typedef struct pocketfft_plan_c_i * pocketfft_plan_c;
+pocketfft_plan_c pocketfft_make_plan_c (size_t length);
+void pocketfft_delete_plan_c (pocketfft_plan_c plan);
+int pocketfft_backward_c(pocketfft_plan_c plan, double c[], double fct);
+int pocketfft_forward_c(pocketfft_plan_c plan, double c[], double fct);
+size_t pocketfft_length_c(pocketfft_plan_c plan);
+
+struct pocketfft_plan_r_i;
+typedef struct pocketfft_plan_r_i * pocketfft_plan_r;
+pocketfft_plan_r pocketfft_make_plan_r (size_t length);
+void pocketfft_delete_plan_r (pocketfft_plan_r plan);
+/*! Computes a real backward FFT on \a c, using \a plan
+    and assuming the FFTPACK storage scheme:
+    - on entry, \a c has the form <tt>r0, r1, i1, r2, i2, ...</tt>
+    - on exit, it has the form <tt>r0, r1, ..., r[length-1]</tt>. */
+int pocketfft_backward_r(pocketfft_plan_r plan, double c[], double fct);
+/*! Computes a real forward FFT on \a c, using \a plan
+    and assuming the FFTPACK storage scheme:
+    - on entry, \a c has the form <tt>r0, r1, ..., r[length-1]</tt>;
+    - on exit, it has the form <tt>r0, r1, i1, r2, i2, ...</tt> */
+int pocketfft_forward_r(pocketfft_plan_r plan, double c[], double fct);
+size_t pocketfft_length_r(pocketfft_plan_r plan);
+
+#ifdef __cplusplus
+}
+#endif
 
 #endif
diff --git a/python/pysharp.cc b/python/pysharp.cc
new file mode 100644
index 0000000000000000000000000000000000000000..1b1d28e64389909c48bad025d8036dc971da6020
--- /dev/null
+++ b/python/pysharp.cc
@@ -0,0 +1,208 @@
+/*
+ *  This file is part of libsharp.
+ *
+ *  libsharp is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  libsharp is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with libsharp; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ *  For more information about HEALPix, see http://healpix.sourceforge.net
+ */
+
+/*
+ *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
+ */
+
+/*
+ *  Copyright (C) 2017 Max-Planck-Society
+ *  Author: Martin Reinecke
+ */
+
+#include <pybind11/pybind11.h>
+#include <pybind11/numpy.h>
+#include <iostream>
+#include <vector>
+#include <complex>
+#include <string>
+
+#include "libsharp/sharp_cxx.h"
+#include "libsharp/sharp_legendre_roots.h"
+
+using namespace std;
+
+namespace py = pybind11;
+
+namespace {
+
+template<typename T> using pyarr = py::array_t<T>;
+template<typename T> using pyarr_c
+  = py::array_t<T, py::array::c_style | py::array::forcecast>;
+
+using a_d = py::array_t<double>;
+using a_c = py::array_t<complex<double>>;
+using a_i = py::array_t<int64_t>;
+using a_d_c = py::array_t<double, py::array::c_style | py::array::forcecast>;
+using a_c_c = py::array_t<complex<double>,
+  py::array::c_style | py::array::forcecast>;
+
+void myassert(bool cond, const char *msg)
+  { if (!cond) throw runtime_error(msg); }
+
+template<typename T> class py_sharpjob
+  {
+  private:
+    sharp_cxxjob<T> job;
+    int64_t lmax_, mmax_, npix_;
+
+  public:
+    py_sharpjob () : lmax_(0), mmax_(0), npix_(0) {}
+
+    void set_Gauss_geometry(int64_t nrings, int64_t nphi)
+      {
+      myassert((nrings>0)&&(nphi>0),"bad grid dimensions");
+      npix_=nrings*nphi;
+      job.set_Gauss_geometry(nrings, nphi);
+      }
+    void set_Healpix_geometry(int64_t nside)
+      {
+      myassert(nside>0,"bad Nside value");
+      npix_=12*nside*nside;
+      job.set_Healpix_geometry(nside);
+      }
+    void set_triangular_alm_info (int64_t lmax, int64_t mmax)
+      {
+      myassert(mmax>=0,"negative mmax");
+      myassert(mmax<=lmax,"mmax must not be larger than lmax");
+      lmax_=lmax; mmax_=mmax;
+      job.set_triangular_alm_info (lmax,mmax);
+      }
+
+    int64_t n_alm() const
+      { return ((mmax_+1)*(mmax_+2))/2 + (mmax_+1)*(lmax_-mmax_); }
+
+    a_d_c alm2map (const a_c_c &alm) const
+      {
+      myassert(npix_>0,"no map geometry specified");
+      myassert (alm.size()==n_alm(),
+        "incorrect size of a_lm array");
+      a_d_c map(npix_);
+      auto mr=map.mutable_unchecked<1>();
+      auto ar=alm.unchecked<1>();
+      job.alm2map(&ar[0],&mr[0],false);
+      return map;
+      }
+    a_c_c alm2map_adjoint (const a_d_c &map) const
+      {
+      myassert(npix_>0,"no map geometry specified");
+      myassert (map.size()==npix_,"incorrect size of map array");
+      a_c_c alm(n_alm());
+      auto mr=map.unchecked<1>();
+      auto ar=alm.mutable_unchecked<1>();
+      job.alm2map_adjoint(&mr[0],&ar[0],false);
+      return alm;
+      }
+    a_c_c map2alm (const a_d_c &map) const
+      {
+      myassert(npix_>0,"no map geometry specified");
+      myassert (map.size()==npix_,"incorrect size of map array");
+      a_c_c alm(n_alm());
+      auto mr=map.unchecked<1>();
+      auto ar=alm.mutable_unchecked<1>();
+      job.map2alm(&mr[0],&ar[0],false);
+      return alm;
+      }
+    a_d_c alm2map_spin (const a_c_c &alm, int64_t spin) const
+      {
+      myassert(npix_>0,"no map geometry specified");
+      auto ar=alm.unchecked<2>();
+      myassert((ar.shape(0)==2)&&(ar.shape(1)==n_alm()),
+        "incorrect size of a_lm array");
+      a_d_c map(vector<size_t>{2,size_t(npix_)});
+      auto mr=map.mutable_unchecked<2>();
+      job.alm2map_spin(&ar(0,0),&ar(1,0),&mr(0,0),&mr(1,0),spin,false);
+      return map;
+      }
+    a_c_c map2alm_spin (const a_d_c &map, int64_t spin) const
+      {
+      myassert(npix_>0,"no map geometry specified");
+      auto mr=map.unchecked<2>();
+      myassert ((mr.shape(0)==2)&&(mr.shape(1)==npix_),
+        "incorrect size of map array");
+      a_c_c alm(vector<size_t>{2,size_t(n_alm())});
+      auto ar=alm.mutable_unchecked<2>();
+      job.map2alm_spin(&mr(0,0),&mr(1,0),&ar(0,0),&ar(1,0),spin,false);
+      return alm;
+      }
+  };
+
+a_d_c GL_weights(int64_t nlat, int64_t nlon)
+  {
+  constexpr double twopi=6.283185307179586476925286766559005768394;
+  a_d_c res(nlat);
+  auto rr=res.mutable_unchecked<1>();
+  vector<double> dummy_roots(nlat);
+  sharp_legendre_roots(nlat, dummy_roots.data(), &rr[0]);
+  for (size_t i=0; i<size_t(rr.shape(0)); ++i)
+    rr[i]*=twopi/nlon;
+  return res;
+  }
+
+a_d_c GL_thetas(int64_t nlat)
+  {
+  a_d_c res(nlat);
+  auto rr=res.mutable_unchecked<1>();
+  vector<double> dummy_weights(nlat);
+  sharp_legendre_roots(nlat, &rr[0], dummy_weights.data());
+  for (size_t i=0; i<size_t(rr.shape(0)); ++i)
+    rr[i]=acos(-rr[i]);
+  return res;
+  }
+
+
+const char *pysharp_DS = R"""(
+Python interface for libsharp
+
+All angles are interpreted as radians.
+The theta coordinate is measured as co-latitude, ranging from 0 (North Pole)
+to pi (South Pole).
+
+Error conditions are reported by raising exceptions.
+)""";
+
+
+} // unnamed namespace
+
+PYBIND11_MODULE(pysharp, m)
+  {
+  using namespace pybind11::literals;
+
+  m.doc() = pysharp_DS;
+
+  py::class_<py_sharpjob<double>> (m, "sharpjob_d")
+    .def(py::init<>())
+    .def("set_Gauss_geometry", &py_sharpjob<double>::set_Gauss_geometry,
+      "nrings"_a,"nphi"_a)
+    .def("set_Healpix_geometry", &py_sharpjob<double>::set_Healpix_geometry,
+      "nside"_a)
+    .def("set_triangular_alm_info",
+      &py_sharpjob<double>::set_triangular_alm_info, "lmax"_a, "mmax"_a)
+    .def("n_alm", &py_sharpjob<double>::n_alm)
+    .def("alm2map", &py_sharpjob<double>::alm2map,"alm"_a)
+    .def("alm2map_adjoint", &py_sharpjob<double>::alm2map_adjoint,"map"_a)
+    .def("map2alm", &py_sharpjob<double>::map2alm,"map"_a)
+    .def("alm2map_spin", &py_sharpjob<double>::alm2map_spin,"alm"_a,"spin"_a)
+    .def("map2alm_spin", &py_sharpjob<double>::map2alm_spin,"map"_a,"spin"_a)
+    ;
+
+  m.def("GL_weights",&GL_weights, "nlat"_a, "nlon"_a);
+  m.def("GL_thetas",&GL_thetas, "nlat"_a);
+  }
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..153af067ee3ccb09900ad18c6143290a53d1b29b
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,70 @@
+from setuptools import setup, Extension, Distribution
+import setuptools.command.build_ext
+
+import sys
+import sysconfig
+import distutils.sysconfig
+
+
+# FIXME this has to be set from outside!
+sharp_libpath='/home/martin/codes/sharp2/.libs'
+
+class _deferred_pybind11_include(object):
+    def __init__(self, user=False):
+        self.user = user
+
+    def __str__(self):
+        import pybind11
+        return pybind11.get_include(self.user)
+
+
+def _remove_strict_prototype_option_from_distutils_config():
+    strict_prototypes = '-Wstrict-prototypes'
+    config = distutils.sysconfig.get_config_vars()
+    for key, value in config.items():
+        if strict_prototypes in str(value):
+            config[key] = config[key].replace(strict_prototypes, '')
+
+
+_remove_strict_prototype_option_from_distutils_config()
+
+
+extra_cc_compile_args = []
+include_dirs = ['../',
+                _deferred_pybind11_include(),
+                _deferred_pybind11_include(True)]
+
+python_module_link_args = []
+
+if sys.platform == 'darwin':
+    extra_cc_compile_args += ['--std=c++11', '--stdlib=libc++',
+                              '-mmacosx-version-min=10.9']
+    vars = distutils.sysconfig.get_config_vars()
+    vars['LDSHARED'] = vars['LDSHARED'].replace('-bundle', '')
+    python_module_link_args += ['-bundle']
+else:
+    extra_cc_compile_args += ['--std=c++11']
+    python_module_link_args += ["-Wl,-rpath,$ORIGIN"]
+
+
+def get_extension_modules():
+    return [Extension('pysharp',
+                      sources=['pysharp.cc'],
+                      include_dirs=include_dirs,
+                      extra_compile_args=extra_cc_compile_args,
+                      libraries=["sharp"],
+                      library_dirs=[sharp_libpath],
+                      extra_link_args=python_module_link_args)]
+
+
+setup(name='pysharp',
+      version='0.0.1',
+      description='Python bindings for libsharp',
+      include_package_data=True,
+      author='Martin Reinecke',
+      author_email='martin@mpa-garching.mpg.de',
+      packages=[],
+      setup_requires=['numpy>=1.10.4', 'pybind11>=2.2.1'],
+      ext_modules=get_extension_modules(),
+      install_requires=['numpy>=1.10.4', 'pybind11>=2.2.1']
+      )
diff --git a/c_utils/memusage.c b/test/memusage.c
similarity index 85%
rename from c_utils/memusage.c
rename to test/memusage.c
index a1c25c0fa4913ff2f0dceb26b0fb3d86dfb95136..3db8e13fdc9896479b7eafd016f9519ff0e1a758 100644
--- a/c_utils/memusage.c
+++ b/test/memusage.c
@@ -16,22 +16,18 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*
  *  Functionality for measuring memory consumption
  *
- *  Copyright (C) 2012 Max-Planck-Society
+ *  Copyright (C) 2012-2019 Max-Planck-Society
  *  Author: Martin Reinecke
  */
 
 #include <stdio.h>
 #include <string.h>
-#include "memusage.h"
+#include "test/memusage.h"
 
 double residentSetSize(void)
   {
@@ -57,7 +53,7 @@ double VmHWM(void)
     if (!strncmp(word, "VmHWM:", 6))
       {
       if (fscanf(f,"%lf%2s",&res,word)<0)
-	{ fclose(f); return -1.0; }
+        { fclose(f); return -1.0; }
       if (strncmp(word, "kB", 2))
         { fclose(f); return -1.0; }
       res *=1024;
diff --git a/c_utils/memusage.h b/test/memusage.h
similarity index 82%
rename from c_utils/memusage.h
rename to test/memusage.h
index fa0ac4346f106b61ab8ee989a23b5c7ab7e4473c..023821a00e0a236e4bea907a6964b16b26b2a682 100644
--- a/c_utils/memusage.h
+++ b/test/memusage.h
@@ -16,21 +16,17 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*! \file memusage.h
  *  Functionality for measuring memory consumption
  *
- *  Copyright (C) 2012 Max-Planck-Society
+ *  Copyright (C) 2012-2019 Max-Planck-Society
  *  \author Martin Reinecke
  */
 
-#ifndef PLANCK_MEMUSAGE_H
-#define PLANCK_MEMUSAGE_H
+#ifndef SHARP_MEMUSAGE_H
+#define SHARP_MEMUSAGE_H
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/libsharp/sharp_testsuite.c b/test/sharp_testsuite.c
similarity index 98%
rename from libsharp/sharp_testsuite.c
rename to test/sharp_testsuite.c
index f22a91ecb12f86fbc7137d43dde23c7474493b4d..c9986be3de981a8933d73dba62afb2fafafd577e 100644
--- a/libsharp/sharp_testsuite.c
+++ b/test/sharp_testsuite.c
@@ -16,11 +16,7 @@
  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-/*
- *  libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
- *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
- *  (DLR).
- */
+/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
 
 /*  \file sharp_testsuite.c
  *
@@ -30,19 +26,19 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <complex.h>
 #ifdef USE_MPI
 #include "mpi.h"
-#include "sharp_mpi.h"
+#include "libsharp/sharp_mpi.h"
 #endif
 #ifdef _OPENMP
 #include <omp.h>
 #endif
-#include "sharp.h"
-#include "sharp_internal.h"
-#include "sharp_geomhelpers.h"
-#include "sharp_almhelpers.h"
-#include "c_utils.h"
-#include "memusage.h"
+#include "libsharp/sharp.h"
+#include "libsharp/sharp_geomhelpers.h"
+#include "libsharp/sharp_almhelpers.h"
+#include "libsharp/sharp_utils.h"
+#include "test/memusage.h"
 
 static void OpenMP_status(void)
   {