Commit 5fedd515 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

Merge branch 'fft_work' into 'master'

Fft work

See merge request mtr/cxxbase!10
parents fd7a243c f7778607
Pipeline #75158 passed with stages
in 8 minutes and 58 seconds
......@@ -25,7 +25,7 @@ if sys.platform == 'darwin':
python_module_link_args+=['-bundle']
else:
extra_compile_args += ['--std=c++17', '-march=native', '-O3', '-ffast-math']
python_module_link_args += ['-march=native', '-Wl,-rpath,$ORIGIN', '-ffast-math']
python_module_link_args += ['-march=native', '-Wl,-rpath,$ORIGIN']
# if you don't want debugging info, add "-s" to python_module_link_args
......
......@@ -27,7 +27,7 @@ elif sys.platform == 'win32':
extra_compile_args = ['/Ox', '/EHsc', '/std:c++17']
else:
extra_compile_args += ['-Wfatal-errors', '-Wfloat-conversion','-W', '-Wall', '-Wstrict-aliasing=2', '-Wwrite-strings', '-Wredundant-decls', '-Woverloaded-virtual', '-Wcast-qual', '-Wcast-align', '-Wpointer-arith']
python_module_link_args += ['-march=native', '-ffast-math', '-Wl,-rpath,$ORIGIN']
python_module_link_args += ['-march=native', '-Wl,-rpath,$ORIGIN']
# if you don't want debugging info, add "-s" to python_module_link_args
......
......@@ -27,7 +27,7 @@ elif sys.platform == 'win32':
extra_compile_args = ['/Ox', '/EHsc', '/std:c++17']
else:
extra_compile_args += ['-Wfatal-errors', '-Wfloat-conversion', '-W', '-Wall', '-Wstrict-aliasing=2', '-Wwrite-strings', '-Wredundant-decls', '-Woverloaded-virtual', '-Wcast-qual', '-Wcast-align', '-Wpointer-arith']
python_module_link_args += ['-march=native', '-ffast-math', '-Wl,-rpath,$ORIGIN']
python_module_link_args += ['-march=native', '-Wl,-rpath,$ORIGIN']
# if you don't want debugging info, add "-s" to python_module_link_args
......
......@@ -3,6 +3,8 @@ import pypocketfft
from time import time
import matplotlib.pyplot as plt
np.random.seed(42)
def _l2error(a, b):
return np.sqrt(np.sum(np.abs(a-b)**2)/np.sum(np.abs(a)**2))
......@@ -130,11 +132,12 @@ def bench_nd(ndim, nmax, nthr, ntry, tp, funcs, nrepeat, ttl="", filename="",
funcs = (measure_pypocketfft, measure_fftw)
ttl = "pypocketfft/FFTW()"
ntry=100
nthr = 1
nice_sizes = True
bench_nd(1, 8192, nthr, 100, "c16", funcs, 10, ttl, "1d.png", nice_sizes)
bench_nd(2, 2048, nthr, 100, "c16", funcs, 2, ttl, "2d.png", nice_sizes)
bench_nd(3, 256, nthr, 100, "c16", funcs, 2, ttl, "3d.png", nice_sizes)
bench_nd(1, 8192, nthr, 100, "c8", funcs, 10, ttl, "1d_single.png", nice_sizes)
bench_nd(2, 2048, nthr, 100, "c8", funcs, 2, ttl, "2d_single.png", nice_sizes)
bench_nd(3, 256, nthr, 100, "c8", funcs, 2, ttl, "3d_single.png", nice_sizes)
bench_nd(1, 8192, nthr, ntry, "c16", funcs, 10, ttl, "1d.png", nice_sizes)
bench_nd(2, 2048, nthr, ntry, "c16", funcs, 2, ttl, "2d.png", nice_sizes)
bench_nd(3, 256, nthr, ntry, "c16", funcs, 2, ttl, "3d.png", nice_sizes)
bench_nd(1, 8192, nthr, ntry, "c8", funcs, 10, ttl, "1d_single.png", nice_sizes)
bench_nd(2, 2048, nthr, ntry, "c8", funcs, 2, ttl, "2d_single.png", nice_sizes)
bench_nd(3, 256, nthr, ntry, "c8", funcs, 2, ttl, "3d_single.png", nice_sizes)
......@@ -28,7 +28,7 @@ elif sys.platform == 'win32':
extra_compile_args = ['/Ox', '/EHsc', '/std:c++17']
else:
extra_compile_args += ['-Wfatal-errors', '-Wfloat-conversion', '-Wsign-conversion', '-Wconversion' ,'-W', '-Wall', '-Wstrict-aliasing=2', '-Wwrite-strings', '-Wredundant-decls', '-Woverloaded-virtual', '-Wcast-qual', '-Wcast-align', '-Wpointer-arith']
python_module_link_args += ['-march=native', '-ffast-math', '-Wl,-rpath,$ORIGIN']
python_module_link_args += ['-Wl,-rpath,$ORIGIN']
# if you don't want debugging info, add "-s" to python_module_link_args
......
......@@ -71,7 +71,8 @@ dtypes = [np.float32, np.float64,
@pmp("inorm", [0, 1, 2])
@pmp("dtype", dtypes)
def test1D(len, inorm, dtype):
a = np.random.rand(len)-0.5 + 1j*np.random.rand(len)-0.5j
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(len)-0.5 + 1j*rng.random(len)-0.5j
a = a.astype(ctype[dtype])
eps = tol[dtype]
assert_(_l2error(a, ifftn(fftn(a, inorm=inorm), inorm=2-inorm)) < eps)
......@@ -91,7 +92,8 @@ def test1D(len, inorm, dtype):
@pmp("nthreads", (0, 1, 2))
@pmp("inorm", [0, 1, 2])
def test_fftn(shp, nthreads, inorm):
a = np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5 + 1j*rng.random(shp)-0.5j
assert_(_l2error(a, ifftn(fftn(a, nthreads=nthreads, inorm=inorm),
nthreads=nthreads, inorm=2-inorm)) < 1e-15)
a = a.astype(np.complex64)
......@@ -103,7 +105,8 @@ def test_fftn(shp, nthreads, inorm):
@pmp("axes", ((0,), (1,), (0, 1), (1, 0)))
@pmp("inorm", [0, 1, 2])
def test_fftn2D(shp, axes, inorm):
a = np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5 + 1j*rng.random(shp)-0.5j
assert_(_l2error(a, ifftn(fftn(a, axes=axes, inorm=inorm),
axes=axes, inorm=2-inorm)) < 1e-15)
a = a.astype(np.complex64)
......@@ -113,7 +116,8 @@ def test_fftn2D(shp, axes, inorm):
@pmp("shp", shapes)
def test_rfftn(shp):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
tmp1 = rfftn(a)
tmp2 = fftn(a)
part = tuple(slice(0,tmp1.shape[i]) for i in range(tmp1.ndim))
......@@ -128,7 +132,7 @@ def test_rfftn(shp):
# @pmp("shp", shapes)
# def test_rfft_scipy(shp):
# for i in range(len(shp)):
# a = np.random.rand(*shp)-0.5
# a = rng.random(shp)-0.5
# assert_(_l2error(pyfftw.interfaces.scipy_fftpack.rfft(a, axis=i),
# rfft_scipy(a, axis=i)) < 1e-15)
# assert_(_l2error(pyfftw.interfaces.scipy_fftpack.irfft(a, axis=i),
......@@ -138,7 +142,8 @@ def test_rfftn(shp):
@pmp("shp", shapes2D)
@pmp("axes", ((0,), (1,), (0, 1), (1, 0)))
def test_rfftn2D(shp, axes):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
tmp1 = rfftn(a,axes=axes)
tmp2 = fftn(a,axes=axes)
part = tuple(slice(0,tmp1.shape[i]) for i in range(tmp1.ndim))
......@@ -152,7 +157,8 @@ def test_rfftn2D(shp, axes):
@pmp("shp", shapes)
def test_identity(shp):
a = np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5 + 1j*rng.random(shp)-0.5j
assert_(_l2error(ifftn(fftn(a), inorm=2), a) < 1.5e-15)
assert_(_l2error(ifftn(fftn(a.real), inorm=2), a.real) < 1.5e-15)
assert_(_l2error(fftn(ifftn(a.real), inorm=2), a.real) < 1.5e-15)
......@@ -165,7 +171,8 @@ def test_identity(shp):
@pmp("shp", shapes)
def test_identity_r(shp):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
b = a.astype(np.float32)
for ax in range(a.ndim):
n = a.shape[ax]
......@@ -177,14 +184,16 @@ def test_identity_r(shp):
@pmp("shp", shapes)
def test_identity_r2(shp):
a = np.random.rand(*shp)-0.5 + 1j*np.random.rand(*shp)-0.5j
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5 + 1j*rng.random(shp)-0.5j
a = rfftn(irfftn(a))
assert_(_l2error(rfftn(irfftn(a), inorm=2), a) < 1e-15)
@pmp("shp", shapes2D+shapes3D)
def test_genuine_hartley(shp):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
v1 = pypocketfft.genuine_hartley(a)
v2 = fftn(a.astype(np.complex128))
v2 = v2.real+v2.imag
......@@ -193,14 +202,16 @@ def test_genuine_hartley(shp):
@pmp("shp", shapes)
def test_hartley_identity(shp):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
v1 = pypocketfft.separable_hartley(pypocketfft.separable_hartley(a))/a.size
assert_(_l2error(a, v1) < 1e-15)
@pmp("shp", shapes)
def test_genuine_hartley_identity(shp):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
v1 = pypocketfft.genuine_hartley(pypocketfft.genuine_hartley(a))/a.size
assert_(_l2error(a, v1) < 1e-15)
v1 = a.copy()
......@@ -212,7 +223,8 @@ def test_genuine_hartley_identity(shp):
@pmp("shp", shapes2D+shapes3D)
@pmp("axes", ((0,), (1,), (0, 1), (1, 0)))
def test_genuine_hartley_2D(shp, axes):
a = np.random.rand(*shp)-0.5
rng = np.random.default_rng(np.random.SeedSequence(42))
a = rng.random(shp)-0.5
assert_(_l2error(pypocketfft.genuine_hartley(pypocketfft.genuine_hartley(
a, axes=axes), axes=axes, inorm=2), a) < 1e-15)
......@@ -222,7 +234,8 @@ def test_genuine_hartley_2D(shp, axes):
@pmp("type", [1, 2, 3, 4])
@pmp("dtype", dtypes)
def testdcst1D(len, inorm, type, dtype):
a = (np.random.rand(len)-0.5).astype(dtype)
rng = np.random.default_rng(np.random.SeedSequence(42))
a = (rng.random(len)-0.5).astype(dtype)
eps = tol[dtype]
itp = (0, 1, 3, 2, 4)
itype = itp[type]
......
......@@ -27,7 +27,7 @@ elif sys.platform == 'win32':
extra_compile_args = ['/Ox', '/EHsc', '/std:c++17']
else:
extra_compile_args += ['-Wfatal-errors', '-Wfloat-conversion', '-W', '-Wall', '-Wstrict-aliasing=2', '-Wwrite-strings', '-Wredundant-decls', '-Woverloaded-virtual', '-Wcast-qual', '-Wcast-align', '-Wpointer-arith']
python_module_link_args += ['-march=native', '-ffast-math', '-Wl,-rpath,$ORIGIN']
python_module_link_args += ['-march=native', '-Wl,-rpath,$ORIGIN']
# if you don't want debugging info, add "-s" to python_module_link_args
......
......@@ -14,7 +14,8 @@ def test_GL(params):
job = pysharp.sharpjob_d()
nalm = ((mmax+1)*(mmax+2))//2 + (mmax+1)*(lmax-mmax)
nalm_r = nalm*2-lmax-1
alm_r = np.random.uniform(-1., 1., nalm_r)
rng = np.random.default_rng(np.random.SeedSequence(42))
alm_r = rng.uniform(-1., 1., nalm_r)
alm = np.empty(nalm, dtype=np.complex128)
alm[0:lmax+1] = alm_r[0:lmax+1]
alm[lmax+1:] = np.sqrt(0.5)*(alm_r[lmax+1::2] + 1j*alm_r[lmax+2::2])
......@@ -32,7 +33,8 @@ def test_fejer1(params):
job = pysharp.sharpjob_d()
nalm = ((mmax+1)*(mmax+2))//2 + (mmax+1)*(lmax-mmax)
nalm_r = nalm*2-lmax-1
alm_r = np.random.uniform(-1., 1., nalm_r)
rng = np.random.default_rng(np.random.SeedSequence(42))
alm_r = rng.uniform(-1., 1., nalm_r)
alm = np.empty(nalm, dtype=np.complex128)
alm[0:lmax+1] = alm_r[0:lmax+1]
alm[lmax+1:] = np.sqrt(0.5)*(alm_r[lmax+1::2] + 1j*alm_r[lmax+2::2])
......@@ -50,7 +52,8 @@ def test_dh(params):
job = pysharp.sharpjob_d()
nalm = ((mmax+1)*(mmax+2))//2 + (mmax+1)*(lmax-mmax)
nalm_r = nalm*2-lmax-1
alm_r = np.random.uniform(-1., 1., nalm_r)
rng = np.random.default_rng(np.random.SeedSequence(42))
alm_r = rng.uniform(-1., 1., nalm_r)
alm = np.empty(nalm, dtype=np.complex128)
alm[0:lmax+1] = alm_r[0:lmax+1]
alm[lmax+1:] = np.sqrt(0.5)*(alm_r[lmax+1::2] + 1j*alm_r[lmax+2::2])
......
#include <cstdlib>
#include <cstring>
#include <numeric>
#include <unordered_map>
#include "mr_util/infra/communication.h"
#include "mr_util/infra/error_handling.h"
namespace mr {
namespace detail_communication {
using namespace std;
void assert_unequal (const void *a, const void *b)
{ MR_assert (a!=b, "input and output buffers must not be identical"); }
#ifdef MRUTIL_USE_MPI
class Typemap: public TypeMapper<MPI_Datatype>
{
public:
Typemap()
{
add<double>(MPI_DOUBLE);
add<float>(MPI_FLOAT);
add<int>(MPI_INT);
add<long>(MPI_LONG);
add<char>(MPI_CHAR);
add<unsigned char>(MPI_BYTE);
// etc.
}
};
Typemap typemap;
MPI_Datatype ndt2mpi (type_index type)
{ return typemap[type]; }
MPI_Op op2mop (Communicator::redOp op)
{
switch (op)
{
case Communicator::Min : return MPI_MIN;
case Communicator::Max : return MPI_MAX;
case Communicator::Sum : return MPI_SUM;
case Communicator::Prod: return MPI_PROD;
default: MR_fail ("unsupported reduction operation");
}
}
//static
void Communication::init()
{
MPI_Init(0,0);
MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);
}
//static
bool Communication::initialized()
{
int flag=0;
MPI_Initialized(&flag);
return flag;
}
//static
void Communication::finalize()
{ MPI_Finalize(); }
//static
void Communication::abort()
{
if (initialized())
MPI_Abort(MPI_COMM_WORLD, 1);
else
exit(1);
}
Communicator::Communicator(CommType comm)
: comm_(comm)
{
MPI_Comm_size(comm_, &num_ranks_);
MPI_Comm_rank(comm_, &rank_);
}
Communicator::Communicator()
: Communicator(MPI_COMM_WORLD) {}
Communicator::~Communicator()
{
if (comm_!=MPI_COMM_WORLD)
MPI_Comm_free(&comm_);
}
void Communicator::barrier() const
{ MPI_Barrier(comm_); }
Communicator Communicator::split(size_t color) const
{
MPI_Comm comm;
MPI_Comm_split (comm_, color, rank_, &comm);
return Communicator(comm);
}
void Communicator::sendrecvRawVoid (const void *sendbuf, size_t sendcnt,
size_t dest, void *recvbuf, size_t recvcnt, size_t src, type_index type) const
{
if ((sendcnt>0)&&(recvcnt>0)) assert_unequal(sendbuf,recvbuf);
MPI_Datatype dtype = ndt2mpi(type);
MPI_Sendrecv (const_cast<void *>(sendbuf),sendcnt,dtype,dest,0,
recvbuf,recvcnt,dtype,src,0,comm_,MPI_STATUS_IGNORE);
}
void Communicator::sendrecv_replaceRawVoid (void *data, type_index type, size_t num,
size_t dest, size_t src) const
{
MPI_Sendrecv_replace (data,num,ndt2mpi(type),dest,0,src,0,comm_,
MPI_STATUS_IGNORE);
}
void Communicator::allreduceRawVoid (const void *in, void *out, type_index type,
size_t num, redOp op) const
{
void *in2 = (in==out) ? MPI_IN_PLACE : const_cast<void *>(in);
MPI_Allreduce (in2,out,num,ndt2mpi(type),op2mop(op),comm_);
}
void Communicator::allgatherRawVoid (const void *in, void *out, type_index type,
size_t num) const
{
if (num>0) assert_unequal(in,out);
MPI_Datatype tp = ndt2mpi(type);
MPI_Allgather (const_cast<void *>(in),num,tp,out,num,tp,comm_);
}
void Communicator::allgathervRawVoid (const void *in, int numin, void *out,
const int *numout, const int *disout, type_index type) const
{
if (numin>0) assert_unequal(in,out);
MR_assert(numin==numout[rank_],"inconsistent arguments");
MPI_Datatype tp = ndt2mpi(type);
MPI_Allgatherv (const_cast<void *>(in),numin,tp,out,const_cast<int *>(numout),
const_cast<int *>(disout),tp,comm_);
}
void Communicator::all2allRawVoid (const void *in, void *out, type_index type,
size_t num) const
{
void *in2 = (in==out) ? MPI_IN_PLACE : const_cast<void *>(in);
MR_assert (num%num_ranks_==0,
"array size is not divisible by number of ranks");
MPI_Datatype tp = ndt2mpi(type);
MPI_Alltoall (in2,num/num_ranks_,tp,out,num/num_ranks_,tp,comm_);
}
void Communicator::all2allvRawVoid (const void *in, const int *numin,
const int *disin, void *out, const int *numout, const int *disout, type_index type)
const
{
long commsz=disin[num_ranks_-1]+numin[num_ranks_-1]
+disout[num_ranks_-1]+numout[num_ranks_-1];
if (commsz>0) assert_unequal(in,out);
MPI_Datatype tp = ndt2mpi(type);
MPI_Alltoallv (const_cast<void *>(in), const_cast<int *>(numin),
const_cast<int *>(disin), tp, out, const_cast<int *>(numout),
const_cast<int *>(disout), tp, comm_);
}
void Communicator::bcastRawVoid (void *data, type_index type, size_t num, int root) const
{ MPI_Bcast (data,num,ndt2mpi(type),root,comm_); }
#else
//static
void Communication::init() {}
//static
bool Communication::initialized()
{ return true; }
//static
void Communication::finalize() {}
//static
void Communication::abort()
{ exit(1); }
Communicator::Communicator()
: rank_(0), num_ranks_(1) {}
Communicator::~Communicator() {}
void Communicator::barrier() const {}
Communicator Communicator::split(size_t /*color*/) const
{ return *this; }
void Communicator::sendrecvRawVoid (const void *sendbuf, size_t sendcnt,
size_t dest, void *recvbuf, size_t recvcnt, size_t src, type_index type) const
{
MR_assert ((dest==0) && (src==0), "inconsistent call");
MR_assert (sendcnt==recvcnt, "inconsistent call");
if (sendcnt>0) assert_unequal(sendbuf,recvbuf);
memcpy (recvbuf, sendbuf, sendcnt*typesize(type));
}
void Communicator::sendrecv_replaceRawVoid (void *, type_index, size_t, size_t dest,
size_t src) const
{ MR_assert ((dest==0) && (src==0), "inconsistent call"); }
void Communicator::allreduceRawVoid (const void *in, void *out, type_index type,
size_t num, redOp /*op*/) const
{
if (in==out) return;
memcpy (out, in, num*typesize(type));
}
void Communicator::allgatherRawVoid (const void *in, void *out, type_index type,
size_t num) const
{ if (num>0) assert_unequal(in,out); memcpy (out, in, num*typesize(type)); }
void Communicator::all2allRawVoid (const void *in, void *out, type_index type,
size_t num) const
{
if (in==out) return;
memcpy (out, in, num*typesize(type));
}
void Communicator::allgathervRawVoid (const void *in, int numin, void *out,
const int *numout, const int *disout, type_index type) const
{
if (numin>0) assert_unequal(in,out);
MR_assert(numin==numout[0],"inconsistent call");
memcpy (reinterpret_cast<char *>(out)+disout[0]*typesize(type), in,
numin*typesize(type));
}
void Communicator::all2allvRawVoid (const void *in, const int *numin,
const int *disin, void *out, const int *numout, const int *disout, type_index type)
const
{
if (numin[0]>0) assert_unequal(in,out);
MR_assert (numin[0]==numout[0],"message size mismatch");
const char *in2 = static_cast<const char *>(in);
char *out2 = static_cast<char *>(out);
size_t st=typesize(type);
memcpy (out2+disout[0]*st,in2+disin[0]*st,numin[0]*st);
}
void Communicator::bcastRawVoid (void *, type_index, size_t, int) const
{}
#endif
}}
/*
* This file is part of the MR utility library.
*
* This code is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This code is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this code; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* Copyright (C) 2009-2020 Max-Planck-Society
* \author Martin Reinecke
*/
#ifndef MRUTIL_COMMUNICATION_H
#define MRUTIL_COMMUNICATION_H
#define MRUTIL_USE_MPI
#include <vector>
#ifdef MRUTIL_USE_MPI
#include <mpi.h>
#endif
#include "mr_util/infra/types.h"
namespace mr {
namespace detail_communication {
using namespace std;
class Communication
{
public:
static void init();
static bool initialized();
static void finalize();
static void abort();
};
class Communicator
{
public:
enum redOp { Sum, Min, Max, Prod };
#ifdef MRUTIL_USE_MPI
using CommType = MPI_Comm;
#else
using CommType = struct{};
#endif
private:
CommType comm_;
int rank_, num_ranks_;
Communicator(CommType comm);
void sendrecvRawVoid (const void *sendbuf, size_t sendcnt,
size_t dest, void *recvbuf, size_t recvcnt, size_t src, type_index type) const;
void sendrecv_replaceRawVoid (void *data, type_index type, size_t num,
size_t dest, size_t src) const;
void allreduceRawVoid (const void *in, void *out, type_index type, size_t num,
redOp op) const;
void allgatherRawVoid (const void *in, void *out, type_index type, size_t num)
const;
void allgathervRawVoid (const void *in, int numin, void *out,
const int *numout, const int *disout, type_index type) const;
/*! NB: \a num refers to the <i>total</i> number of items in the arrays;
the individual message size is \a num/num_ranks(). */
void all2allRawVoid (const void *in, void *out, type_index type, size_t num) const;
void all2allvRawVoid (const void *in, const int *numin, const int *disin,
void *out, const int *numout, const int *disout, type_index type) const;
void bcastRawVoid (void *data, type_index type, size_t num, int root) const;
public:
Communicator();
~Communicator();
Communicator(const Communicator &other) = default;
int num_ranks() const { return num_ranks_; }
int rank() const { return rank_; }
bool master() const { return rank_==0; }
CommType comm() const { return comm_; }
void barrier() const;
Communicator split(size_t subgroup) const;