Commit 42dd0b07 authored by Andreas Marek's avatar Andreas Marek

Merge branch 'python' into 'master_pre_stage'

Add python interface for ELPA

See merge request !8
parents b3e69553 a7da2bbf
......@@ -125,6 +125,26 @@ distcheck-mpi:
# python tests
python-intel-intel-mpi-openmp:
tags:
- python
artifacts:
when: on_success
expire_in: 2 month
script:
- ./ci_test_scripts/run_ci_tests.sh -c "CC=\"mpiicc\" CFLAGS=\"-O3 -xAVX\" FC=\"mpiifort\" FCFLAGS=\"-O3 -xAVX\" SCALAPACK_LDFLAGS=\"$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP \" SCALAPACK_FCFLAGS=\"$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP \" --enable-option-checking=fatal --with-mpi=yes --enable-openmp --disable-gpu --enable-avx --enable-python --enable-python-tests" -j 8 -t 2 -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE -s $SKIP_STEP -i $INTERACTIVE_RUN
python-distcheck:
tags:
- python
script:
- ./configure CC="mpiicc" CFLAGS="-O3 -xAVX" FC="mpiifort" FCFLAGS="-O3 -xAVX" SCALAPACK_LDFLAGS="$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP" SCALAPACK_FCFLAGS="$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP" --enable-option-checking=fatal --with-mpi=yes --enable-openmp --disable-gpu --enable-avx --enable-python --enable-python-tests || { cat config.log; exit 1; }
# stupid 'make distcheck' leaves behind write-protected files that the stupid gitlab runner cannot remove
- make distcheck DISTCHECK_CONFIGURE_FLAGS="CC=\"mpiicc\" CFLAGS=\"-O3 -xAVX\" FC=\"mpiifort\" FCFLAGS=\"-O3 -xAVX\" SCALAPACK_LDFLAGS=\"$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP \" SCALAPACK_FCFLAGS=\"$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP \" --enable-option-checking=fatal --with-mpi=yes --enable-openmp --disable-gpu --enable-avx --enable-python --enable-python-tests" TASKS=2 TEST_FLAGS="150 50 16" || { chmod u+rwX -R . ; exit 1 ; }
# test_project_1stage_legacy_api_gnu
test_project_1stage_legacy_api_gnu:
tags:
......
......@@ -615,7 +615,22 @@ endif
#test_c_cannon@SUFFIX@_LDADD = $(test_program_ldadd) $(FCLIBS)
#test_c_cannon@SUFFIX@_CFLAGS = $(test_program_cflags)
# python wrapper
pyelpadir = $(pythondir)/pyelpa
if WITH_PYTHON
pyelpa_PYTHON = python/pyelpa/__init__.py python/pyelpa/distributedmatrix.py
pyelpa_LTLIBRARIES = wrapper.la
else
pyelpa_PYTHON =
pyelpa_LTLIBRARIES =
endif
nodist_wrapper_la_SOURCES = python/pyelpa/wrapper.c
wrapper_la_LDFLAGS = -module -avoid-version -shared $(AM_LDFLAGS)
wrapper_la_LIBADD = libelpa@SUFFIX@.la
wrapper_la_CFLAGS = $(PYTHON_INCLUDE) $(NUMPY_INCLUDE) $(AM_CFLAGS)
python/pyelpa/wrapper.c: python/pyelpa/wrapper.pyx
cython $< -o $@
# test scripts
TASKS ?= 2
......@@ -635,6 +650,25 @@ TESTS = $(check_SCRIPTS)
@echo '$(wrapper)' ./$^ '$$TEST_FLAGS' >> $@
@chmod +x $@
if WITH_PYTHON_TESTS
check_SCRIPTS += test_python.sh
endif
test_python.sh:
@echo '#!/bin/bash' > $@
# this is kind of hacky... is there a better way to get wrapper.so?
@echo 'export PYTHONPATH=./python-copy:$$PYTHONPATH' >> $@
@echo 'cp -r $(abs_top_srcdir)/python python-copy || exit 1' >> $@
@echo 'chmod u+rwX -R python-copy || exit 1' >> $@
@echo 'cp .libs/wrapper.so python-copy/pyelpa/ || exit 1' >> $@
# the dlopen flags are needed for MKL to work properly...
# only in os from python 3.3 on
@echo "$(wrapper) $(PYTHON) -c 'import sys, os; sys.setdlopenflags(os.RTLD_NOW | os.RTLD_GLOBAL); import pytest; sys.exit(pytest.main([\"./python-copy\", \"-p\", \"no:cacheprovider\"]))'" >> $@
@echo 'exit_code=$$?' >> $@
@echo 'rm -rf python-copy || exit 1' >> $@
@echo 'exit $$exit_code' >> $@
@chmod +x $@
include doxygen.am
CLEANFILES = \
......@@ -654,7 +688,9 @@ CLEANFILES = \
real* \
complex* \
double_instance* \
*.i
*.i \
python/pyelpa/wrapper.c \
check_python.sh
clean-local:
-rm -rf modules/* private_modules/* test_modules/* .fortran_dependencies/*
......@@ -817,6 +853,15 @@ EXTRA_DIST += \
test/shared/test_scalapack_template.F90
endif
# python wrapper files
EXTRA_DIST += python/pyelpa/__init__.py \
python/pyelpa/distributedmatrix.py \
python/pyelpa/wrapper.pyx \
python/tests/test_elpa_import.py \
python/tests/test_mpi4py.py \
python/tests/test_numroc.py \
python/tests/test_with_mpi.py
LIBTOOL_DEPS = @LIBTOOL_DEPS@
libtool: $(LIBTOOL_DEPS)
$(SHELL) ./config.status libtool
......
if [ "$(hostname)" == "freya01" ]; then module purge && source /mpcdf/soft/try_new_modules.sh && module load git intel/17.0 gcc/7 impi/2017.3 mkl/2017.3 autoconf automake libtool pkg-config && unset SLURM_MPI_TYPE I_MPI_SLURM_EXT I_MPI_PMI_LIBRARY I_MPI_PMI2 I_MPI_HYDRA_BOOTSTRAP; fi
if [ "$(hostname)" == "freya01" ]; then module purge && source /mpcdf/soft/try_new_modules.sh && module load git intel/17.0 gcc/7 impi/2017.3 mkl/2017.3 autoconf automake libtool pkg-config anaconda/3 && unset SLURM_MPI_TYPE I_MPI_SLURM_EXT I_MPI_PMI_LIBRARY I_MPI_PMI2 I_MPI_HYDRA_BOOTSTRAP; fi
if [ "$(hostname)" == "buildtest-rzg" ]; then module load impi/5.1.3 intel/16.0 gcc/6.3 mkl/11.3 autotools pkg-config; fi
......@@ -55,6 +55,10 @@ export MKL_GFORTRAN_SCALAPACK_NO_MPI_OMP_BASELINE="-L$MKL_HOME/lib/intel64 -lmkl
export MKL_GFORTRAN_SCALAPACK_FCFLAGS_NO_MPI_OMP="-I$MKL_HOME/include/intel64/lp64"
export MKL_GFORTRAN_SCALAPACK_LDFLAGS_NO_MPI_OMP="$MKL_GFORTRAN_SCALAPACK_NO_MPI_OMP_BASELINE -Wl,-rpath,$MKL_HOME/lib/intel64"
export MKL_ANACONDA_INTEL_SCALAPACK_MPI_OMP_BASELINE="-L$ANACONDA_HOME/lib -lmkl_scalapack_lp64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lmkl_blacs_intelmpi_lp64 -liomp5 -lpthread"
export MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP="-I$MKL_HOME/include/intel64/lp64"
export MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP="$MKL_ANACONDA_INTEL_SCALAPACK_MPI_OMP_BASELINE -Wl,-rpath,$ANACONDA_HOME/lib"
export ASAN_OPTIONS=suppressions=./ci_test_scripts/no_asan_for_mpi.supp,fast_unwind_on_malloc=0
export LSAN_OPTIONS=suppressions=./ci_test_scripts/no_lsan_for_mpi.supp
......
......@@ -336,6 +336,52 @@ print(" # stupid 'make distcheck' leaves behind write-protected files that th
print(' - make distcheck DISTCHECK_CONFIGURE_FLAGS="FC=mpiifort FCFLAGS=\\"-xHost\\" CFLAGS=\\"-march=native\\" SCALAPACK_LDFLAGS=\\"$MKL_INTEL_SCALAPACK_LDFLAGS_MPI_NO_OMP\\" SCALAPACK_FCFLAGS=\\"$MKL_INTEL_SCALAPACK_FCFLAGS_MPI_NO_OMP\\" --with-mpi=yes --disable-sse-assembly --disable-sse --disable-avx --disable-avx2" TASKS=2 TEST_FLAGS="150 50 16" || { chmod u+rwX -R . ; exit 1 ; }')
print("\n\n")
# add python tests
python_ci_tests = [
"# python tests",
"python-intel-intel-mpi-openmp:",
" tags:",
" - python",
" artifacts:",
" when: on_success",
" expire_in: 2 month",
" script:",
' - ./ci_test_scripts/run_ci_tests.sh -c "'
'CC=\\"mpiicc\\" CFLAGS=\\"-O3 -xAVX\\" '
'FC=\\"mpiifort\\" FCFLAGS=\\"-O3 -xAVX\\" '
'SCALAPACK_LDFLAGS=\\"$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP \\" '
'SCALAPACK_FCFLAGS=\\"$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP \\" '
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests'
'" -j 8 -t 2 -m $MATRIX_SIZE -n $NUMBER_OF_EIGENVECTORS -b $BLOCK_SIZE '
'-s $SKIP_STEP -i $INTERACTIVE_RUN',
"\n",
"python-distcheck:",
" tags:",
" - python",
" script:",
' - ./configure '
'CC="mpiicc" CFLAGS="-O3 -xAVX" '
'FC="mpiifort" FCFLAGS="-O3 -xAVX" '
'SCALAPACK_LDFLAGS="$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP" '
'SCALAPACK_FCFLAGS="$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP" '
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests'
' || { cat config.log; exit 1; }',
" # stupid 'make distcheck' leaves behind write-protected files that "
"the stupid gitlab runner cannot remove",
' - make distcheck DISTCHECK_CONFIGURE_FLAGS="'
'CC=\\"mpiicc\\" CFLAGS=\\"-O3 -xAVX\\" '
'FC=\\"mpiifort\\" FCFLAGS=\\"-O3 -xAVX\\" '
'SCALAPACK_LDFLAGS=\\"$MKL_ANACONDA_INTEL_SCALAPACK_LDFLAGS_MPI_OMP \\" '
'SCALAPACK_FCFLAGS=\\"$MKL_ANACONDA_INTEL_SCALAPACK_FCFLAGS_MPI_OMP \\" '
'--enable-option-checking=fatal --with-mpi=yes --enable-openmp '
'--disable-gpu --enable-avx --enable-python --enable-python-tests'
'" TASKS=2 TEST_FLAGS="150 50 16" || { chmod u+rwX -R . ; exit 1 ; }',
"\n",
]
print("\n".join(python_ci_tests))
# construct the builds of the "test_projects"
......
......@@ -1299,6 +1299,81 @@ else
AC_MSG_RESULT([no])
fi
AC_MSG_CHECKING(whether --enable-python is specified)
AC_ARG_ENABLE([python],
AS_HELP_STRING([--enable-python],
[build and install python wrapper, default no.]),
[
if test x"$enableval" = x"yes"; then
enable_python=yes
else
enable_python=no
fi
],
[enable_python=no])
AC_MSG_RESULT([${enable_python}])
AM_CONDITIONAL([WITH_PYTHON],[test x"$enable_python" = x"yes"])
if test x"${enable_python}" = x"yes"; then
AC_DEFINE([WITH_PYTHON], [1], [build and install python wrapper])
# check for python and dependencies
AM_PATH_PYTHON([3.6])
AC_ARG_VAR([PYTHON_INCLUDE], [Include flags for python, bypassing python-config])
AC_ARG_VAR([PYTHON_CONFIG], [Path to python-config])
AS_IF([test -z "$PYTHON_INCLUDE"], [
AS_IF([test -z "$PYTHON_CONFIG"], [
AC_PATH_PROGS([PYTHON_CONFIG],
[python$PYTHON_VERSION-config python-config],
[no],
[`dirname $PYTHON`])
AS_IF([test "$PYTHON_CONFIG" = no], [AC_MSG_ERROR([cannot find python-config for $PYTHON.])])
])
AC_MSG_CHECKING([python include flags])
PYTHON_INCLUDE=`$PYTHON_CONFIG --includes`
AC_MSG_RESULT([$PYTHON_INCLUDE])
])
AC_MSG_CHECKING([numpy module])
AS_IF([$PYTHON -c "import numpy"], [AC_MSG_RESULT([found.])],
[AC_MSG_ERROR([cannot find numpy.])])
AC_MSG_CHECKING([mpi4py module])
AS_IF([$PYTHON -c "import mpi4py"], [AC_MSG_RESULT([found.])],
[AC_MSG_ERROR([cannot find mpi4py.])])
AC_MSG_CHECKING([cython module])
AS_IF([$PYTHON -c "import cython"], [AC_MSG_RESULT([found.])],
[AC_MSG_ERROR([cannot find cython.])])
AC_CHECK_PROG([cython_found], [cython], [yes], [no])
if test x"$cython_found" != x"yes" ; then
AC_MSG_ERROR([cython not found.])
fi
AC_ARG_VAR([NUMPY_INCLUDE], [Include flags for numpy])
AC_MSG_CHECKING([numpy include flags])
NUMPY_INCLUDE=-I`$PYTHON -c "import numpy; print(numpy.get_include())"`
AS_IF([test "$NUMPY_INCLUDE" = "-I"], [AC_MSG_ERROR([cannot get numpy include path.])])
AC_MSG_RESULT([$NUMPY_INCLUDE])
fi
AC_MSG_CHECKING(whether --enable-python-tests is specified)
AC_ARG_ENABLE([python-tests],
AS_HELP_STRING([--enable-python-tests],
[enable python tests, default no.]),
[
if test x"$enableval" = x"yes"; then
enable_python_tests=yes
else
enable_python_tests=no
fi
],
[enable_python_tests=no])
AC_MSG_RESULT([${enable_python_tests}])
AM_CONDITIONAL([WITH_PYTHON_TESTS],[test x"$enable_python_tests" = x"yes"])
if test x"${enable_python_tests}" = x"yes"; then
if test x"${enable_python}" = x"no"; then
AC_MSG_ERROR([Python tests can only be enabled it python is enabled.])
fi
AC_CHECK_PROG([pytest_found], [pytest], [yes], [no])
if test x"$pytest_found" != x"yes" ; then
AC_MSG_ERROR([pytest not found.])
fi
fi
AC_OUTPUT
......
......@@ -98,7 +98,7 @@ endif
# $1 program
define program_dependencies
$(_f90_only_verbose){ $(foreach argument,$(_$p_use_mods) $(_$p_def_mods) $(foreach l,$(call recursive_lib_deps,$p),$(_$l_use_mods) $(_$l_def_mods)),echo $(argument); ) } | \
$(_f90_only_verbose){ $(foreach argument,$(_$p_use_mods) $(_$p_def_mods) $(foreach l,$(call recursive_lib_deps,$p),$(_$l_use_mods) $(_$l_def_mods)),echo $(argument); ) true; } | \
$(top_srcdir)/fdep/fortran_dependencies.pl $p >> $@ || { rm $@; exit 1; }
endef
......
#!/usr/bin/env python
import numpy as np
from pyelpa import DistributedMatrix
import sys
# set some parameters for matrix layout
na = 1000
nev = 200
nblk = 16
# create distributed matrix
a = DistributedMatrix.from_comm_world(na, nev, nblk)
# set matrix a by looping over indices
# this is the easiest but also slowest way
for global_row, global_col in a.global_indices():
a.set_data_for_global_index(global_row, global_col,
global_row*global_col)
print("Call ELPA eigenvectors")
sys.stdout.flush()
# now compute nev of na eigenvectors and eigenvalues
data = a.compute_eigenvectors()
eigenvalues = data['eigenvalues']
eigenvectors = data['eigenvectors']
print("Done")
# now eigenvectors.data contains the local part of the eigenvector matrix
# which is stored in a block-cyclic distributed layout and eigenvalues contains
# all computed eigenvalues on all cores
# set a again because it has changed after calling elpa
# this time set it by looping over blocks, this is more efficient
for global_row, global_col, row_block_size, col_block_size in \
a.global_block_indices():
# set block with product of indices
x = np.arange(global_row, global_row + row_block_size)[:, None] * \
np.arange(global_col, global_col + col_block_size)[None, :]
a.set_block_for_global_index(global_row, global_col,
row_block_size, col_block_size, x)
print("Call ELPA eigenvalues")
sys.stdout.flush()
# now compute nev of na eigenvalues
eigenvalues = a.compute_eigenvalues()
print("Done")
# now eigenvalues contains all computed eigenvalues on all cores
"""pyelpa -- python wrapper for ELPA
This wrapper uses cython to wrap the C API of ELPA (Eigenvalue SoLvers for
Petaflop-Applications) so that it can be called from python.
Examples:
1. Use the Elpa object to access the eigenvectors/eigenvalues wrapper:
>>> import numpy as np
... from pyelpa import ProcessorLayout, DistributedMatrix, Elpa
... from mpi4py import MPI
... import sys
...
... # set some parameters for matrix layout
... na = 1000
... nev = 200
... nblk = 16
...
... # initialize processor layout, needed for calling ELPA
... comm = MPI.COMM_WORLD
... layout_p = ProcessorLayout(comm)
...
... # create arrays
... a = DistributedMatrix(layout_p, na, nev, nblk)
... eigenvectors = DistributedMatrix(layout_p, na, nev, nblk)
... eigenvalues = np.zeros(na, dtype=np.float64)
...
... # initialize elpa
... e = Elpa.from_distributed_matrix(a)
...
... # set input matrix (a.data) on this core (a is stored in a block-cyclic
... # distributed layout; local size: a.na_rows x a.na_cols)
... # Caution: using this, the global matrix will not be symmetric; this is just
... # and example to show how to access the data
... a.data[:, :] = np.random.rand(a.na_rows, a.na_cols).astype(np.float64)
...
... # now compute nev of na eigenvectors and eigenvalues
... e.eigenvectors(a.data, eigenvalues, eigenvectors.data)
...
... # now eigenvectors.data contains the local part of the eigenvector matrix
... # which is stored in a block-cyclic distributed layout
...
... # now eigenvalues contains all computed eigenvalues on all cores
...
... # now compute nev of na eigenvalues
... e.eigenvalues(a.data, eigenvalues)
...
... # now eigenvalues contains all computed eigenvalues on all cores
2. Use the functions provided by the DistributedMatrix object:
>>> import numpy as np
... from pyelpa import DistributedMatrix
...
... # set some parameters for matrix layout
... na = 1000
... nev = 200
... nblk = 16
...
... a = DistributedMatrix.from_comm_world(na, nev, nblk)
... # use a diagonal matrix as input
... matrix = np.diagflat(np.arange(na)**2)
... # set from global matrix
... a.set_data_from_global_matrix(matrix)
...
... data = a.compute_eigenvectors()
... eigenvalues = data['eigenvalues']
... eigenvectors = data['eigenvectors']
... # now eigenvectors.data contains the local part of the eigenvector matrix
... # which is stored in a block-cyclic distributed layout
...
... # now eigenvalues contains all computed eigenvalues on all cores
"""
from .wrapper import Elpa
from .distributedmatrix import ProcessorLayout, DistributedMatrix
__all__ = ['ProcessorLayout', 'DistributedMatrix', 'Elpa']
"""distributedmatrix.py -- classes for distributed matrices
This file contains the python classes to use with the wrapper.
"""
import numpy as np
from functools import wraps
from .wrapper import Elpa
class ProcessorLayout:
"""Create rectangular processor layout for use with distributed matrices"""
def __init__(self, comm):
"""Initialize processor layout.
Args:
comm: MPI communicator from mpi4py
"""
nprocs = comm.Get_size()
rank = comm.Get_rank()
for np_cols in range(int(np.sqrt(nprocs)), 0, -1):
if nprocs % np_cols == 0:
break
#if nprocs == 1:
# np_cols = 1
np_rows = nprocs//np_cols
# column major distribution of processors
my_pcol = rank // np_rows
my_prow = rank % np_rows
self.np_cols, self.np_rows = np_cols, np_rows
self.my_pcol, self.my_prow = my_pcol, my_prow
self.comm = comm
self.comm_f = comm.py2f()
class DistributedMatrix:
"""Class for generating a distributed block-cyclic matrix
The data attribute contains the array in the correct size for the local
processor.
"""
def __init__(self, processor_layout, na, nev, nblk, dtype=np.float64):
"""Initialize distributed matrix for a given processor layout.
Args:
processor_layout (ProcessorLayout): has to be created from MPI
communicator
na (int): dimension of matrix
nev (int): number of eigenvectors/eigenvalues to be computed
nblk (int): block size of distributed matrix
dtype: data type of matrix
"""
self.na = na
self.nev = nev
self.nblk = nblk
self.processor_layout = processor_layout
# get local size
self.na_rows = self.numroc(na, nblk, processor_layout.my_prow, 0,
processor_layout.np_rows)
self.na_cols = self.numroc(na, nblk, processor_layout.my_pcol, 0,
processor_layout.np_cols)
# create array
self.data = np.empty((self.na_rows, self.na_cols),
dtype=dtype, order='F')
self.elpa = None
@classmethod
def from_communicator(cls, comm, na, nev, nblk, dtype=np.float64):
"""Initialize distributed matrix from a MPI communicator.
Args:
comm: MPI communicator from mpi4py
na (int): dimension of matrix
nev (int): number of eigenvectors/eigenvalues to be computed
nblk (int): block size of distributed matrix
dtype: data type of matrix
"""
processor_layout = ProcessorLayout(comm)
return cls(processor_layout, na, nev, nblk, dtype)
@classmethod
def from_comm_world(cls, na, nev, nblk, dtype=np.float64):
"""Initialize distributed matrix from the MPI_COMM_WORLD communicator.
Args:
na (int): dimension of matrix
nev (int): number of eigenvectors/eigenvalues to be computed
nblk (int): block size of distributed matrix
dtype: data type of matrix
"""
from mpi4py import MPI
comm = MPI.COMM_WORLD
processor_layout = ProcessorLayout(comm)
return cls(processor_layout, na, nev, nblk, dtype)
@classmethod
def like(cls, matrix):
"""Get a DistributedMatrix with the same parameters as matrix"""
return cls(matrix.processor_layout, matrix.na, matrix.nev, matrix.nblk,
matrix.data.dtype)
def get_local_index(self, global_row, global_col):
"""compute local row and column indices from global ones
Returns a tuple of the local row and column indices
"""
local_row = self.indxg2l(global_row, self.nblk,
self.processor_layout.my_prow, 0,
self.processor_layout.np_rows)
local_col = self.indxg2l(global_col, self.nblk,
self.processor_layout.my_pcol, 0,
self.processor_layout.np_cols)
return local_row, local_col
def get_global_index(self, local_row, local_col):
"""compute global row and column indices from local ones
Returns a tuple of the global row and column indices
"""
global_row = self.indxl2g(local_row, self.nblk,
self.processor_layout.my_prow, 0,
self.processor_layout.np_rows)
global_col = self.indxl2g(local_col, self.nblk,
self.processor_layout.my_pcol, 0,
self.processor_layout.np_cols)
return global_row, global_col
def is_local_index(self, global_row, global_col):
"""check if global index is stored on current processor"""
return self.is_local_row(global_row) and self.is_local_col(global_col)
def is_local_row(self, global_row):
"""check if global row is stored on this processor"""
process_row = self.indxg2p(global_row, self.nblk,
self.processor_layout.my_prow, 0,
self.processor_layout.np_rows)
return process_row == self.processor_layout.my_prow
def is_local_col(self, global_col):
process_col = self.indxg2p(global_col, self.nblk,
self.processor_layout.my_pcol, 0,
self.processor_layout.np_cols)
return process_col == self.processor_layout.my_pcol
@staticmethod
def indxg2l(indxglob, nb, iproc, isrcproc, nprocs):
"""compute local index from global index indxglob
original netlib scalapack source:
.. code-block:: fortran
INDXG2L = NB*((INDXGLOB-1)/(NB*NPROCS))+MOD(INDXGLOB-1,NB)+1
"""
# adapt to python 0-based indexing
return nb*(indxglob//(nb*nprocs)) + indxglob%nb
@staticmethod
def indxl2g(indxloc, nb, iproc, isrcproc, nprocs):
"""compute global index from local index indxloc
original netlib scalapack source:
.. code-block:: fortran
INDXL2G = NPROCS*NB*((INDXLOC-1)/NB) + MOD(INDXLOC-1,NB) +
MOD(NPROCS+IPROC-ISRCPROC, NPROCS)*NB + 1
"""
# adapt to python 0-based indexing
return nprocs*nb*(indxloc//nb) + indxloc%nb + \
((nprocs+iproc-isrcproc)%nprocs)*nb
@staticmethod
def indxg2p(indxglob, nb, iproc, isrcproc, nprocs):
"""compute process coordinate for global index
original netlib scalapack source:
.. code-block:: fortran
INDXG2P = MOD( ISRCPROC + (INDXGLOB - 1) / NB, NPROCS )
"""
# adapt to python 0-based indexing
return (isrcproc + indxglob // nb) % nprocs
@staticmethod
def numroc(n, nb, iproc, isrcproc, nprocs):
"""Get local dimensions of distributed block-cyclic matrix.
Programmed after scalapack source (tools/numroc.f on netlib).
"""
mydist = (nprocs + iproc - isrcproc) % nprocs
nblocks = n // nb
result = (nblocks // nprocs) * nb
extrablks = nblocks % nprocs
if mydist < extrablks:
result += nb
elif mydist == extrablks:
result += n % nb
return int(result)
def _initialized_elpa(function):
# wrapper to ensure one-time initialization of Elpa object
@wraps(function)
def wrapped_function(self):
if self.elpa is None:
self.elpa = Elpa.from_distributed_matrix(self)
return function(self)
return wrapped_function
@_initialized_elpa
def compute_eigenvectors(self):
"""Compute eigenvalues and eigenvectors
The eigenvectors are stored in columns.
This function returns a dictionary with entries 'eigenvalues' and
'eigenvectors'.
After computing the eigenvectors, the original content of the matrix is
lost.
"""
eigenvectors = DistributedMatrix.like(self)
eigenvalues = np.zeros(self.na, dtype=np.float64)
# call ELPA
self.elpa.eigenvectors(self.data, eigenvalues, eigenvectors.data)
return {'eigenvalues': eigenvalues, 'eigenvectors': eigenvectors}
@_initialized_elpa
def compute_eigenvalues(self):
"""Compute only the eigenvalues.
This function returns the eigenvalues as an array.
After computing the eigenvalues, the original content of the matrix is
lost.
"""
eigenvalues = np.zeros(self.na, dtype=np.float64)
# call ELPA
self.elpa.eigenvalues(self.data, eigenvalues)
return eigenvalues
def set_data_from_global_matrix(self, matrix):
"""Set local part of the global matrix"""
for local_row in range(self.na_rows):
for local_col in range(self.na_cols):
global_row, global_col = self.get_global_index(local_row,
local_col)
self.data[local_row, local_col] = matrix[global_row,
global_col]
def dot(self, vector):
"""Compute dot product of matrix with vector.
This blocked implementation is much faster than the naive
implementation.
"""
if len(vector.shape) > 1 or vector.shape[0] != self.na:
raise ValueError("Error: shape of vector {} incompatible to "
"matrix of size {:d}x{:d}.".format(
vector.shape, self.na, self.na))
from mpi4py import MPI
summation = np.zeros_like(vector)
# loop only over blocks here
for local_row in range(0, self.na_rows, self.nblk):
for local_col in range(0, self.na_cols, self.nblk):
# do not go beyond the end of the matrix
row_block_size = min(local_row + self.nblk,
self.na_rows) - local_row
col_block_size = min(local_col + self.nblk,
self.na_cols) - local_col
global_row, global_col = self.get_global_index(local_row,
local_col)
# use numpy for faster dot product of local block
summation[global_row:global_row+row_block_size] += \
np.dot(self.data[local_row:local_row + row_block_size,
local_col:local_col + col_block_size],
vector[global_col:global_col+col_block_size])
result = np.zeros_like(vector)
self.processor_layout.comm.Allreduce(summation, result, op=MPI.SUM)
return result
def _dot_naive(self, vector):
"""Compute naive dot product of matrix with vector.
Still in here as an example and for testing purposes.
"""
from mpi4py import MPI
summation = np.zeros_like(vector)
for local_row in range(self.na_rows):
for local_col in range(self.na_cols):
global_row, global_col = self.get_global_index(local_row,
local_col)
summation[global_row] += self.data[local_row, local_col] *\
vector[global_col]
result = np.zeros_like(vector)
self.processor_layout.comm.Allreduce(summation, result, op=MPI.SUM)
return result
def get_column(self, global_col):
"""Return global column"""
from mpi4py import MPI
column = np.zeros(self.na, dtype=self.data.dtype)
temporary = np.zeros_like(column)
if self.is_local_col(global_col):
for global_row in range(self.na):
if not self.is_local_row(global_row):
continue
local_row, local_col = self.get_local_index(global_row,
global_col)
temporary[global_row] = self.data[local_row, local_col]
# this could be done more efficiently with a gather
self.processor_layout.comm.Allreduce(temporary, column, op=MPI.SUM)
return column
def get_row(self, global_row):
"""Return global row"""
from mpi4py import MPI
row = np.zeros(self.na, dtype=self.data.dtype)
temporary = np.zeros_like(row)
if self.is_local_row(global_row):
for global_col in range(self.na):
if not self.is_local_col(global_col):
continue
local_row, local_col = self.get_local_index(global_row,
global_col)
temporary[global_col] = self.data[local_row, local_col]
# this could be done more efficiently with a gather
self.processor_layout.comm.Allreduce(temporary, row, op=MPI.SUM)
return row
def global_indices(self):
"""Return iterator over global indices of matrix.
Use together with set_data_global_index and get_data_global_index.
"""
for local_row in range(self.na_rows):