Commit f4ad8895 authored by Martin Reinecke's avatar Martin Reinecke
Browse files

cleanup

parent b6b06fd8
......@@ -59,12 +59,10 @@ endif
nobase_include_HEADERS = \
libsharp2/sharp.h \
libsharp2/sharp_mpi.h \
libsharp2/sharp_geomhelpers.h \
libsharp2/sharp_almhelpers.h
EXTRA_DIST = \
runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp2/sharp_mpi.cc
EXTRA_DIST = runtest.sh
check_PROGRAMS = sharp2_testsuite
sharp2_testsuite_SOURCES = test/sharp2_testsuite.cc
......
......@@ -928,32 +928,3 @@ void sharp_set_chunksize_min(int new_chunksize_min)
{ chunksize_min=new_chunksize_min; }
void sharp_set_nchunks_max(int new_nchunks_max)
{ nchunks_max=new_nchunks_max; }
#ifdef USE_MPI
#include "sharp_mpi.c"
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt)
{
MPI_Comm comm = *(MPI_Comm*)pcomm;
sharp_execute_mpi((MPI_Comm)comm, type, spin, alm, map, geom_info, alm_info,
flags, time, opcnt);
return 0;
}
#else
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt)
{
/* Suppress unused warning: */
(void)pcomm; (void)type; (void)spin; (void)alm; (void)map; (void)geom_info;
(void)alm_info; (void)flags; (void)time; (void)opcnt;
return SHARP_ERROR_NO_MPI;
}
#endif
......@@ -226,28 +226,6 @@ void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
void sharp_set_chunksize_min(int new_chunksize_min);
void sharp_set_nchunks_max(int new_nchunks_max);
typedef enum { SHARP_ERROR_NO_MPI = 1,
/*!< libsharp2 not compiled with MPI support */
} sharp_errors;
/*! Works like sharp_execute_mpi, but is always present whether or not libsharp2
is compiled with USE_MPI. This is primarily useful for wrapper code etc.
Note that \a pcomm has the type MPI_Comm*, except we declare void* to avoid
pulling in MPI headers. I.e., the comm argument of sharp_execute_mpi
is *(MPI_Comm*)pcomm.
Other parameters are the same as sharp_execute_mpi.
Returns 0 if successful, or SHARP_ERROR_NO_MPI if MPI is not available
(in which case nothing is done).
*/
int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt);
/*! \} */
int sharp_get_mlim (int lmax, int spin, double sth, double cth);
......
/*
* This file is part of libsharp2.
*
* libsharp2 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp2; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libsharp2 is being developed at the Max-Planck-Institut fuer Astrophysik */
/*! \file sharp_mpi.c
* Functionality only needed for MPI-parallel transforms
*
* Copyright (C) 2012-2019 Max-Planck-Society
* \author Martin Reinecke \author Dag Sverre Seljebotn
*/
#ifdef USE_MPI
#include "libsharp2/sharp_mpi.h"
typedef struct
{
int ntasks; /* number of tasks */
int mytask; /* own task number */
MPI_Comm comm; /* communicator to use */
int *nm; /* number of m values on every task */
int *ofs_m; /* accumulated nm */
int nmtotal; /* total number of m values (must be mmax+1) */
int *mval; /* array containing all m values of task 0, task 1 etc. */
int mmax;
int nph;
int *npair; /* number of ring pairs on every task */
int *ofs_pair; /* accumulated npair */
int npairtotal; /* total number of ring pairs */
double *theta; /* theta of first ring of every pair on task 0, task 1 etc. */
int *ispair; /* is this really a pair? */
int *almcount, *almdisp, *mapcount, *mapdisp; /* for all2all communication */
} sharp_mpi_info;
static void sharp_make_mpi_info (MPI_Comm comm, const sharp_job *job,
sharp_mpi_info *minfo)
{
minfo->comm = comm;
MPI_Comm_size (comm, &minfo->ntasks);
MPI_Comm_rank (comm, &minfo->mytask);
minfo->nm=RALLOC(int,minfo->ntasks);
MPI_Allgather ((int *)(&job->ainfo->nm),1,MPI_INT,minfo->nm,1,MPI_INT,comm);
minfo->ofs_m=RALLOC(int,minfo->ntasks+1);
minfo->ofs_m[0]=0;
for (int i=1; i<=minfo->ntasks; ++i)
minfo->ofs_m[i] = minfo->ofs_m[i-1]+minfo->nm[i-1];
minfo->nmtotal=minfo->ofs_m[minfo->ntasks];
minfo->mval=RALLOC(int,minfo->nmtotal);
MPI_Allgatherv(job->ainfo->mval, job->ainfo->nm, MPI_INT, minfo->mval,
minfo->nm, minfo->ofs_m, MPI_INT, comm);
minfo->mmax=sharp_get_mmax(minfo->mval,minfo->nmtotal);
minfo->npair=RALLOC(int,minfo->ntasks);
MPI_Allgather ((int *)(&job->ginfo->npairs), 1, MPI_INT, minfo->npair, 1,
MPI_INT, comm);
minfo->ofs_pair=RALLOC(int,minfo->ntasks+1);
minfo->ofs_pair[0]=0;
for (int i=1; i<=minfo->ntasks; ++i)
minfo->ofs_pair[i] = minfo->ofs_pair[i-1]+minfo->npair[i-1];
minfo->npairtotal=minfo->ofs_pair[minfo->ntasks];
double *theta_tmp=RALLOC(double,job->ginfo->npairs);
int *ispair_tmp=RALLOC(int,job->ginfo->npairs);
for (int i=0; i<job->ginfo->npairs; ++i)
{
theta_tmp[i]=job->ginfo->pair[i].r1.theta;
ispair_tmp[i]=job->ginfo->pair[i].r2.nph>0;
}
minfo->theta=RALLOC(double,minfo->npairtotal);
minfo->ispair=RALLOC(int,minfo->npairtotal);
MPI_Allgatherv(theta_tmp, job->ginfo->npairs, MPI_DOUBLE, minfo->theta,
minfo->npair, minfo->ofs_pair, MPI_DOUBLE, comm);
MPI_Allgatherv(ispair_tmp, job->ginfo->npairs, MPI_INT, minfo->ispair,
minfo->npair, minfo->ofs_pair, MPI_INT, comm);
DEALLOC(theta_tmp);
DEALLOC(ispair_tmp);
minfo->nph=2*job->nmaps;
minfo->almcount=RALLOC(int,minfo->ntasks);
minfo->almdisp=RALLOC(int,minfo->ntasks+1);
minfo->mapcount=RALLOC(int,minfo->ntasks);
minfo->mapdisp=RALLOC(int,minfo->ntasks+1);
minfo->almdisp[0]=minfo->mapdisp[0]=0;
for (int i=0; i<minfo->ntasks; ++i)
{
minfo->almcount[i] = 2*minfo->nph*minfo->nm[minfo->mytask]*minfo->npair[i];
minfo->almdisp[i+1] = minfo->almdisp[i]+minfo->almcount[i];
minfo->mapcount[i] = 2*minfo->nph*minfo->nm[i]*minfo->npair[minfo->mytask];
minfo->mapdisp[i+1] = minfo->mapdisp[i]+minfo->mapcount[i];
}
}
static void sharp_destroy_mpi_info (sharp_mpi_info *minfo)
{
DEALLOC(minfo->nm);
DEALLOC(minfo->ofs_m);
DEALLOC(minfo->mval);
DEALLOC(minfo->npair);
DEALLOC(minfo->ofs_pair);
DEALLOC(minfo->theta);
DEALLOC(minfo->ispair);
DEALLOC(minfo->almcount);
DEALLOC(minfo->almdisp);
DEALLOC(minfo->mapcount);
DEALLOC(minfo->mapdisp);
}
static void sharp_communicate_alm2map (const sharp_mpi_info *minfo, dcmplx **ph)
{
dcmplx *phas_tmp = RALLOC(dcmplx,minfo->mapdisp[minfo->ntasks]/2);
MPI_Alltoallv (*ph,minfo->almcount,minfo->almdisp,MPI_DOUBLE,phas_tmp,
minfo->mapcount,minfo->mapdisp,MPI_DOUBLE,minfo->comm);
DEALLOC(*ph);
ALLOC(*ph,dcmplx,minfo->nph*minfo->npair[minfo->mytask]*minfo->nmtotal);
for (int task=0; task<minfo->ntasks; ++task)
for (int th=0; th<minfo->npair[minfo->mytask]; ++th)
for (int mi=0; mi<minfo->nm[task]; ++mi)
{
int m = minfo->mval[mi+minfo->ofs_m[task]];
int o1 = minfo->nph*(th*(minfo->mmax+1) + m);
int o2 = minfo->mapdisp[task]/2+minfo->nph*(mi+th*minfo->nm[task]);
for (int i=0; i<minfo->nph; ++i)
(*ph)[o1+i] = phas_tmp[o2+i];
}
DEALLOC(phas_tmp);
}
static void sharp_communicate_map2alm (const sharp_mpi_info *minfo, dcmplx **ph)
{
dcmplx *phas_tmp = RALLOC(dcmplx,minfo->mapdisp[minfo->ntasks]/2);
for (int task=0; task<minfo->ntasks; ++task)
for (int th=0; th<minfo->npair[minfo->mytask]; ++th)
for (int mi=0; mi<minfo->nm[task]; ++mi)
{
int m = minfo->mval[mi+minfo->ofs_m[task]];
int o1 = minfo->mapdisp[task]/2+minfo->nph*(mi+th*minfo->nm[task]);
int o2 = minfo->nph*(th*(minfo->mmax+1) + m);
for (int i=0; i<minfo->nph; ++i)
phas_tmp[o1+i] = (*ph)[o2+i];
}
DEALLOC(*ph);
ALLOC(*ph,dcmplx,minfo->nph*minfo->nm[minfo->mytask]*minfo->npairtotal);
MPI_Alltoallv (phas_tmp,minfo->mapcount,minfo->mapdisp,MPI_DOUBLE,
*ph,minfo->almcount,minfo->almdisp,MPI_DOUBLE,minfo->comm);
DEALLOC(phas_tmp);
}
static void alloc_phase_mpi (sharp_job *job, int nm, int ntheta,
int nmfull, int nthetafull)
{
ptrdiff_t phase_size = (job->type==SHARP_MAP2ALM) ?
(ptrdiff_t)(nmfull)*ntheta : (ptrdiff_t)(nm)*nthetafull;
job->phase=RALLOC(dcmplx,2*job->nmaps*phase_size);
job->s_m=2*job->nmaps;
job->s_th = job->s_m * ((job->type==SHARP_MAP2ALM) ? nmfull : nm);
}
static void alm2map_comm (sharp_job *job, const sharp_mpi_info *minfo)
{
if (job->type != SHARP_MAP2ALM)
{
sharp_communicate_alm2map (minfo,&job->phase);
job->s_th=job->s_m*minfo->nmtotal;
}
}
static void map2alm_comm (sharp_job *job, const sharp_mpi_info *minfo)
{
if (job->type == SHARP_MAP2ALM)
{
sharp_communicate_map2alm (minfo,&job->phase);
job->s_th=job->s_m*minfo->nm[minfo->mytask];
}
}
static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
{
int ntasks;
MPI_Comm_size(comm, &ntasks);
if (ntasks==1) /* fall back to scalar implementation */
{ sharp_execute_job (job); return; }
MPI_Barrier(comm);
double timer=sharp_wallTime();
job->opcnt=0;
sharp_mpi_info minfo;
sharp_make_mpi_info(comm, job, &minfo);
if (minfo.npairtotal>minfo.ntasks*300)
{
int nsub=(minfo.npairtotal+minfo.ntasks*200-1)/(minfo.ntasks*200);
for (int isub=0; isub<nsub; ++isub)
{
sharp_job ljob=*job;
// When creating a_lm, every sub-job produces a complete set of
// coefficients; they need to be added up.
if ((isub>0)&&(job->type==SHARP_MAP2ALM)) ljob.flags|=SHARP_ADD;
sharp_geom_info lginfo;
lginfo.pair=RALLOC(sharp_ringpair,(job->ginfo->npairs/nsub)+1);
lginfo.npairs=0;
lginfo.nphmax = job->ginfo->nphmax;
while (lginfo.npairs*nsub+isub<job->ginfo->npairs)
{
lginfo.pair[lginfo.npairs]=job->ginfo->pair[lginfo.npairs*nsub+isub];
++lginfo.npairs;
}
ljob.ginfo=&lginfo;
sharp_execute_job_mpi (&ljob,comm);
job->opcnt+=ljob.opcnt;
DEALLOC(lginfo.pair);
}
}
else
{
int lmax = job->ainfo->lmax;
job->norm_l = sharp_Ylmgen_get_norm (lmax, job->spin);
/* clear output arrays if requested */
init_output (job);
alloc_phase_mpi (job,job->ainfo->nm,job->ginfo->npairs,minfo.mmax+1,
minfo.npairtotal);
double *cth = RALLOC(double,minfo.npairtotal),
*sth = RALLOC(double,minfo.npairtotal);
int *mlim = RALLOC(int,minfo.npairtotal);
for (int i=0; i<minfo.npairtotal; ++i)
{
cth[i] = cos(minfo.theta[i]);
sth[i] = sin(minfo.theta[i]);
mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i]);
}
/* map->phase where necessary */
map2phase (job, minfo.mmax, 0, job->ginfo->npairs);
map2alm_comm (job, &minfo);
#pragma omp parallel
{
sharp_job ljob = *job;
sharp_Ylmgen_C generator;
sharp_Ylmgen_init (&generator,lmax,minfo.mmax,ljob.spin);
alloc_almtmp(&ljob,lmax);
#pragma omp for schedule(dynamic,1)
for (int mi=0; mi<job->ainfo->nm; ++mi)
{
/* alm->alm_tmp where necessary */
alm2almtmp (&ljob, lmax, mi);
/* inner conversion loop */
inner_loop (&ljob, minfo.ispair, cth, sth, 0, minfo.npairtotal,
&generator, mi, mlim);
/* alm_tmp->alm where necessary */
almtmp2alm (&ljob, lmax, mi);
}
sharp_Ylmgen_destroy(&generator);
dealloc_almtmp(&ljob);
#pragma omp critical
job->opcnt+=ljob.opcnt;
} /* end of parallel region */
alm2map_comm (job, &minfo);
/* phase->map where necessary */
phase2map (job, minfo.mmax, 0, job->ginfo->npairs);
DEALLOC(mlim);
DEALLOC(cth);
DEALLOC(sth);
DEALLOC(job->norm_l);
dealloc_phase (job);
}
sharp_destroy_mpi_info(&minfo);
job->time=sharp_wallTime()-timer;
}
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt)
{
sharp_job job;
sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
flags);
sharp_execute_job_mpi (&job, comm);
if (time!=NULL) *time = job.time;
if (opcnt!=NULL) *opcnt = job.opcnt;
}
/* We declare this only in C file to make symbol available for Fortran wrappers;
without declaring it in C header as it should not be available to C code */
void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt);
void sharp_execute_mpi_fortran(MPI_Fint comm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt)
{
sharp_execute_mpi(MPI_Comm_f2c(comm), type, spin, alm, map, geom_info,
alm_info, flags, time, opcnt);
}
#endif
/*
* This file is part of libsharp2.
*
* libsharp2 is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp2 is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp2; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libsharp2 is being developed at the Max-Planck-Institut fuer Astrophysik */
/*! \file sharp_mpi.h
* Interface for the spherical transform library with MPI support.
*
* Copyright (C) 2011-2019 Max-Planck-Society
* \author Martin Reinecke \author Dag Sverre Seljebotn
*/
#ifndef SHARP_MPI_H
#define SHARP_MPI_H
#include <mpi.h>
#include "libsharp2/sharp.h"
#ifdef __cplusplus
extern "C" {
#endif
/*! Performs an MPI parallel libsharp2 SHT job. The interface deliberately does
not use the C99 "complex" data type, in order to be callable from C89 and C++.
\param comm the MPI communicator to be used for this SHT
\param type the type of SHT
\param spin the spin of the quantities to be transformed
\param alm contains pointers to the a_lm coefficients. If \a spin==0,
alm[0] points to the a_lm of the SHT. If \a spin>0, alm[0] and alm[1]
point to the two a_lm sets of the SHT. The exact data type of \a alm
depends on whether the SHARP_DP flag is set.
\param map contains pointers to the maps. If \a spin==0,
map[0] points to the map of the SHT. If \a spin>0, or \a type is
SHARP_ALM2MAP_DERIV1, map[0] and map[1] point to the two maps of the SHT.
The exact data type of \a map depends on whether the SHARP_DP flag is set.
\param geom_info A \c sharp_geom_info object compatible with the provided
\a map arrays. The total map geometry is the union of all \a geom_info
objects over the participating MPI tasks.
\param alm_info A \c sharp_alm_info object compatible with the provided
\a alm arrays. All \c m values from 0 to some \c mmax<=lmax must be present
exactly once in the union of all \a alm_info objects over the participating
MPI tasks.
\param flags See sharp_jobflags. In particular, if SHARP_DP is set, then
\a alm is expected to have the type "complex double **" and \a map is
expected to have the type "double **"; otherwise, the expected
types are "complex float **" and "float **", respectively.
\param time If not NULL, the wall clock time required for this SHT
(in seconds) will be written here.
\param opcnt If not NULL, a conservative estimate of the total floating point
operation count for this SHT will be written here. */
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,
void *alm, void *map, const sharp_geom_info *geom_info,
const sharp_alm_info *alm_info, int flags, double *time,
unsigned long long *opcnt);
#ifdef __cplusplus
}
#endif
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment