Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
cf009642
Commit
cf009642
authored
Jan 16, 2017
by
Pavel Kus
Browse files
interface for already banded matrix added
parent
1dccb3c6
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
cf009642
...
...
@@ -315,6 +315,7 @@ dist_files_DATA = \
test
/Fortran/test_real2_default.F90
\
test
/Fortran/test_real2_qr.F90
\
test
/Fortran/test_real2_api.F90
\
test
/Fortran/test_real2_banded.F90
\
test
/Fortran/test_real.F90
\
test
/Fortran/test_real_with_c.F90
\
test
/Fortran/test_toeplitz.F90
\
...
...
@@ -345,6 +346,7 @@ noinst_PROGRAMS = \
elpa2_test_real_default@SUFFIX@
\
elpa2_test_real_qr@SUFFIX@
\
elpa2_test_real_api@SUFFIX@
\
elpa2_test_real_banded@SUFFIX@
\
elpa2_test_complex@SUFFIX@
\
elpa2_test_complex_default@SUFFIX@
\
elpa2_test_complex_api@SUFFIX@
\
...
...
@@ -532,6 +534,11 @@ elpa2_test_real_api@SUFFIX@_LDADD = $(build_lib)
elpa2_test_real_api@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa2_test_real_api@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa2_test_real_banded@SUFFIX@
_SOURCES
=
test
/Fortran/test_real2_banded.F90
elpa2_test_real_banded@SUFFIX@
_LDADD
=
$(build_lib)
elpa2_test_real_banded@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
EXTRA_elpa2_test_real_banded@SUFFIX@
_DEPENDENCIES
=
test
/Fortran/elpa_print_headers.X90
elpa1_test_complex@SUFFIX@
_SOURCES
=
test
/Fortran/test_complex.F90
elpa1_test_complex@SUFFIX@
_LDADD
=
$(build_lib)
elpa1_test_complex@SUFFIX@
_FCFLAGS
=
$(AM_FCFLAGS)
@FC_MODOUT@private_modules @FC_MODINC@private_modules
...
...
@@ -731,6 +738,7 @@ check_SCRIPTS = \
elpa2_test_real_qr@SUFFIX@.sh
\
elpa2_test_complex_default@SUFFIX@.sh
\
elpa2_test_real_api@SUFFIX@.sh
\
elpa2_test_real_banded@SUFFIX@.sh
\
elpa2_test_complex_api@SUFFIX@.sh
\
elpa_driver_real@SUFFIX@.sh
\
elpa_driver_complex@SUFFIX@.sh
\
...
...
src/elpa2_template.X90
View file @
cf009642
#if REALCASE == 1
function solve_evp_real_2stage_PRECISION (na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_ELPA_KERNEL_API, useQR, useGPU) result(success)
mpi_comm_all, THIS_ELPA_KERNEL_API, useQR, useGPU
, bandwidth
) result(success)
#elif COMPLEXCASE == 1
function solve_evp_complex_2stage_PRECISION(na, nev, a, lda, ev, q, ldq, nblk, &
matrixCols, mpi_comm_rows, mpi_comm_cols, &
mpi_comm_all, THIS_ELPA_KERNEL_API, useGPU) result(success)
mpi_comm_all, THIS_ELPA_KERNEL_API, useGPU
, bandwidth
) result(success)
#endif
#ifdef HAVE_DETAILED_TIMINGS
use timings
...
...
@@ -23,8 +23,11 @@
logical, intent(in), optional :: useGPU
#if REALCASE == 1
logical, intent(in), optional :: useQR
#endif
#endif
logical :: useQRActual, useQREnvironment
integer(kind=c_int), intent(in), optional :: bandwidth
integer(kind=c_int), intent(in), optional :: THIS_ELPA_KERNEL_API
integer(kind=c_int) :: THIS_ELPA_KERNEL
...
...
@@ -190,44 +193,61 @@
endif
endif
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
! On older systems (IBM Bluegene/P, Intel Nehalem) a value of 32 was optimal.
! For Intel(R) Xeon(R) E5 v2 and v3, better use 64 instead of 32!
! For IBM Bluegene/Q this is not clear at the moment. We have to keep an eye
! on this and maybe allow a run-time optimization here
if (do_useGPU) then
nbw = nblk
if(present(bandwidth)) then
nbw = bandwidth
if ((nbw == 0) .or. (mod(nbw, nblk) .ne. 0)) then
if (wantDebug) then
write(error_unit,*) "Specified bandwidth has to be a multiple of blocksize"
endif
print *, "Specified bandwidth has to be a multiple of blocksize"
success = .false.
return
endif
ttts = MPI_Wtime()
else
! Choose bandwidth, must be a multiple of nblk, set to a value >= 32
! On older systems (IBM Bluegene/P, Intel Nehalem) a value of 32 was optimal.
! For Intel(R) Xeon(R) E5 v2 and v3, better use 64 instead of 32!
! For IBM Bluegene/Q this is not clear at the moment. We have to keep an eye
! on this and maybe allow a run-time optimization here
if (do_useGPU) then
nbw = nblk
else
#if REALCASE == 1
nbw = (63/nblk+1)*nblk
nbw = (63/nblk+1)*nblk
#elif COMPLEXCASE == 1
nbw = (31/nblk+1)*nblk
nbw = (31/nblk+1)*nblk
#endif
endif
endif
num_blocks = (na-1)/nbw + 1
num_blocks = (na-1)/nbw + 1
allocate(tmat(nbw,nbw,num_blocks), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when allocating tmat "//errorMessage
stop
endif
allocate(tmat(nbw,nbw,num_blocks), stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when allocating tmat "//errorMessage
stop
endif
! Reduction full -> band
! Reduction full -> band
ttt0 = MPI_Wtime()
ttts = ttt0
ttt0 = MPI_Wtime()
ttts = ttt0
#if REALCASE == 1
call bandred_real_PRECISION(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, tmat_dev, wantDebug, do_useGPU, success, useQRActual)
call bandred_real_PRECISION(na, a, a_dev, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, tmat_dev, wantDebug, do_useGPU, success, useQRActual)
#elif COMPLEXCASE == 1
call bandred_complex_PRECISION(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, do_useGPU, success)
call bandred_complex_PRECISION(na, a, lda, nblk, nbw, matrixCols, num_blocks, mpi_comm_rows, mpi_comm_cols, &
tmat, wantDebug, do_useGPU, success)
#endif
if (.not.(success)) return
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//bandred_NUMBER_PRECISION_STR//' :',ttt1-ttt0
if (.not.(success)) return
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//bandred_NUMBER_PRECISION_STR//' :',ttt1-ttt0
end if ! matrix not already banded on input
! Reduction band -> tridiagonal
...
...
@@ -327,32 +347,36 @@
stop
endif
! Backtransform stage 2
ttt0 = MPI_Wtime()
if(present(bandwidth)) then
time_evp_back = ttt1-ttts
else
! Backtransform stage 2
ttt0 = MPI_Wtime()
#if REALCASE == 1
call trans_ev_band_to_full_real_PRECISION(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, &
q, q_dev, ldq, matrixCols, num_blocks, &
mpi_comm_rows, mpi_comm_cols, do_useGPU, useQRActual)
call trans_ev_band_to_full_real_PRECISION(na, nev, nblk, nbw, a, a_dev, lda, tmat, tmat_dev, &
q, q_dev, ldq, matrixCols, num_blocks, &
mpi_comm_rows, mpi_comm_cols, do_useGPU, useQRActual)
#elif COMPLEXCASE == 1
call trans_ev_band_to_full_complex_PRECISION(na, nev, nblk, nbw, a, lda, tmat, &
q, ldq, matrixCols, num_blocks, &
mpi_comm_rows, mpi_comm_cols, do_useGPU)
call trans_ev_band_to_full_complex_PRECISION(na, nev, nblk, nbw, a, lda, tmat, &
q, ldq, matrixCols, num_blocks, &
mpi_comm_rows, mpi_comm_cols, do_useGPU)
#endif
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//trans_ev_band_to_full_NUMBER_PRECISION_STR//' :',ttt1-ttt0
time_evp_back = ttt1-ttts
ttt1 = MPI_Wtime()
if (my_prow==0 .and. my_pcol==0 .and. elpa_print_times) &
write(error_unit,*) 'Time '//trans_ev_band_to_full_NUMBER_PRECISION_STR//' :',ttt1-ttt0
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when deallocating tmat"//errorMessage
stop
endif
time_evp_back = ttt1-ttts
deallocate(tmat, stat=istat, errmsg=errorMessage)
if (istat .ne. 0) then
print *,solve_evp_NUMBER_2stage_PRECISION_STR//": error when deallocating tmat"//errorMessage
stop
endif
endif ! not present(bandwidth)
call timer%stop(solve_evp_NUMBER_2stage_PRECISION_STR)
1 format(a,f10.3)
end function solve_evp_NUMBER_2stage_PRECISION
test/Fortran/test_real2_banded.F90
0 → 100644
View file @
cf009642
! This file is part of ELPA.
!
! The ELPA library was originally created by the ELPA consortium,
! consisting of the following organizations:
!
! - Max Planck Computing and Data Facility (MPCDF), formerly known as
! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
! Informatik,
! - Technische Universität München, Lehrstuhl für Informatik mit
! Schwerpunkt Wissenschaftliches Rechnen ,
! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
! and
! - IBM Deutschland GmbH
!
!
! More information can be found here:
! http://elpa.mpcdf.mpg.de/
!
! ELPA is free software: you can redistribute it and/or modify
! it under the terms of the version 3 of the license of the
! GNU Lesser General Public License as published by the Free
! Software Foundation.
!
! ELPA is distributed in the hope that it will be useful,
! but WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
! GNU Lesser General Public License for more details.
!
! You should have received a copy of the GNU Lesser General Public License
! along with ELPA. If not, see <http://www.gnu.org/licenses/>
!
! ELPA reflects a substantial effort on the part of the original
! ELPA consortium, and we ask you to respect the spirit of the
! license that we chose: i.e., please contribute any changes you
! may have back to the original ELPA library distribution, and keep
! any derivatives of ELPA under the same license that we chose for
! the original distribution, the GNU Lesser General Public License.
!
!
#include "config-f90.h"
!>
!> Fortran test programm to demonstrates the use of
!> ELPA 2 real case library.
!> If "HAVE_REDIRECT" was defined at build time
!> the stdout and stderr output of each MPI task
!> can be redirected to files if the environment
!> variable "REDIRECT_ELPA_TEST_OUTPUT" is set
!> to "true".
!>
!> By calling executable [arg1] [arg2] [arg3] [arg4]
!> one can define the size (arg1), the number of
!> Eigenvectors to compute (arg2), and the blocking (arg3).
!> If these values are not set default values (4000, 1500, 16)
!> are choosen.
!> If these values are set the 4th argument can be
!> "output", which specifies that the EV's are written to
!> an ascii file.
!>
!> The real ELPA 2 kernel is set as the default kernel.
!> However, this can be overriden by setting
!> the environment variable "REAL_ELPA_KERNEL" to an
!> appropiate value.
!>
program
test_real2_double_precision
!-------------------------------------------------------------------------------
! Standard eigenvalue problem - REAL version
!
! This program demonstrates the use of the ELPA module
! together with standard scalapack routines
!
! Copyright of the original code rests with the authors inside the ELPA
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
!
!-------------------------------------------------------------------------------
use
precision
use
ELPA1
use
ELPA2
use
mod_check_for_gpu
,
only
:
check_for_gpu
use
elpa_utilities
,
only
:
error_unit
#ifdef WITH_OPENMP
use
test_util
#endif
use
mod_read_input_parameters
use
mod_check_correctness
use
mod_setup_mpi
use
mod_blacs_infrastructure
use
mod_prepare_matrix
use
elpa_mpi
#ifdef HAVE_REDIRECT
use
redirect
#endif
#ifdef HAVE_DETAILED_TIMINGS
use
timings
#endif
use
output_types
implicit
none
!-------------------------------------------------------------------------------
! Please set system size parameters below!
! na: System size
! nev: Number of eigenvectors to be calculated
! nblk: Blocking factor in block cyclic distribution
!-------------------------------------------------------------------------------
integer
(
kind
=
ik
)
::
nblk
integer
(
kind
=
ik
)
::
na
,
nev
integer
(
kind
=
ik
)
::
np_rows
,
np_cols
,
na_rows
,
na_cols
integer
(
kind
=
ik
)
::
myid
,
nprocs
,
my_prow
,
my_pcol
,
mpi_comm_rows
,
mpi_comm_cols
integer
(
kind
=
ik
)
::
i
,
mpierr
,
my_blacs_ctxt
,
sc_desc
(
9
),
info
,
nprow
,
npcol
integer
(
kind
=
ik
),
external
::
numroc
real
(
kind
=
rk8
),
allocatable
::
a
(:,:),
z
(:,:),
tmp1
(:,:),
tmp2
(:,:),
as
(:,:),
ev
(:)
integer
(
kind
=
ik
)
::
iseed
(
4096
)
! Random seed, size should be sufficient for every generator
integer
(
kind
=
ik
)
::
STATUS
#ifdef WITH_OPENMP
integer
(
kind
=
ik
)
::
omp_get_max_threads
,
required_mpi_thread_level
,
provided_mpi_thread_level
#endif
logical
::
successELPA
,
success
integer
(
kind
=
ik
)
::
numberOfDevices
logical
::
gpuAvailable
type
(
output_t
)
::
write_to_file
character
(
len
=
8
)
::
task_suffix
integer
(
kind
=
ik
)
::
j
integer
(
kind
=
ik
)
::
global_row
,
global_col
,
local_row
,
local_col
integer
(
kind
=
ik
)
::
bandwidth
#define DOUBLE_PRECISION_REAL 1
successELPA
=
.true.
gpuAvailable
=
.false.
call
read_input_parameters
(
na
,
nev
,
nblk
,
write_to_file
)
!-------------------------------------------------------------------------------
! MPI Initialization
call
setup_mpi
(
myid
,
nprocs
)
gpuAvailable
=
check_for_gpu
(
myid
,
numberOfDevices
)
STATUS
=
0
#define REALCASE
#include "elpa_print_headers.X90"
#ifdef HAVE_DETAILED_TIMINGS
! initialise the timing functionality
#ifdef HAVE_LIBPAPI
call
timer
%
measure_flops
(
.true.
)
#endif
call
timer
%
measure_allocated_memory
(
.true.
)
call
timer
%
measure_virtual_memory
(
.true.
)
call
timer
%
measure_max_allocated_memory
(
.true.
)
call
timer
%
set_print_options
(&
#ifdef HAVE_LIBPAPI
print_flop_count
=
.true.
,
&
print_flop_rate
=
.true.
,
&
#endif
print_allocated_memory
=
.true.
,
&
print_virtual_memory
=
.true.
,
&
print_max_allocated_memory
=
.true.
)
call
timer
%
enable
()
call
timer
%
start
(
"program: test_real2_double_precision"
)
#endif
!-------------------------------------------------------------------------------
! Selection of number of processor rows/columns
! We try to set up the grid square-like, i.e. start the search for possible
! divisors of nprocs with a number next to the square root of nprocs
! and decrement it until a divisor is found.
do
np_cols
=
NINT
(
SQRT
(
REAL
(
nprocs
))),
2
,
-1
if
(
mod
(
nprocs
,
np_cols
)
==
0
)
exit
enddo
! at the end of the above loop, nprocs is always divisible by np_cols
np_rows
=
nprocs
/
np_cols
if
(
myid
==
0
)
then
print
*
print
'(a)'
,
'Standard eigenvalue problem - REAL version'
print
*
print
'(3(a,i0))'
,
'Matrix size='
,
na
,
', Number of eigenvectors='
,
nev
,
', Block size='
,
nblk
print
'(3(a,i0))'
,
'Number of processor rows='
,
np_rows
,
', cols='
,
np_cols
,
', total='
,
nprocs
print
*
endif
!-------------------------------------------------------------------------------
! Set up BLACS context and MPI communicators
!
! The BLACS context is only necessary for using Scalapack.
!
! For ELPA, the MPI communicators along rows/cols are sufficient,
! and the grid setup may be done in an arbitrary way as long as it is
! consistent (i.e. 0<=my_prow<np_rows, 0<=my_pcol<np_cols and every
! process has a unique (my_prow,my_pcol) pair).
call
set_up_blacsgrid
(
mpi_comm_world
,
my_blacs_ctxt
,
np_rows
,
np_cols
,
&
nprow
,
npcol
,
my_prow
,
my_pcol
)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Past BLACS_Gridinfo.'
end
if
! All ELPA routines need MPI communicators for communicating within
! rows or columns of processes, these are set in elpa_get_communicators.
mpierr
=
elpa_get_communicators
(
mpi_comm_world
,
my_prow
,
my_pcol
,
&
mpi_comm_rows
,
mpi_comm_cols
)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Past split communicator setup for rows and columns.'
end
if
call
set_up_blacs_descriptor
(
na
,
nblk
,
my_prow
,
my_pcol
,
np_rows
,
np_cols
,
&
na_rows
,
na_cols
,
sc_desc
,
my_blacs_ctxt
,
info
)
if
(
myid
==
0
)
then
print
'(a)'
,
'| Past scalapack descriptor setup.'
end
if
!-------------------------------------------------------------------------------
! Allocate matrices and set up a test matrix for the eigenvalue problem
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
start
(
"set up matrix"
)
#endif
allocate
(
a
(
na_rows
,
na_cols
))
allocate
(
z
(
na_rows
,
na_cols
))
allocate
(
as
(
na_rows
,
na_cols
))
allocate
(
ev
(
na
))
call
prepare_matrix_double
(
na
,
myid
,
sc_desc
,
iseed
,
a
,
z
,
as
)
! set values outside of the bandwidth to zero
bandwidth
=
nblk
do
local_row
=
1
,
na_rows
global_row
=
index_l2g
(
local_row
,
nblk
,
my_prow
,
np_rows
)
do
local_col
=
1
,
na_cols
global_col
=
index_l2g
(
local_col
,
nblk
,
my_pcol
,
np_cols
)
if
(
ABS
(
global_row
-
global_col
)
>
bandwidth
)
then
a
(
local_row
,
local_col
)
=
0.0
as
(
local_row
,
local_col
)
=
0.0
end
if
end
do
end
do
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"set up matrix"
)
#endif
! set print flag in elpa1
elpa_print_times
=
.true.
!-------------------------------------------------------------------------------
! Calculate eigenvalues/eigenvectors
if
(
myid
==
0
)
then
print
'(a)'
,
'| Entering two-stage ELPA solver ... '
print
*
end
if
#ifdef WITH_MPI
call
mpi_barrier
(
mpi_comm_world
,
mpierr
)
! for correct timings only
#endif
successELPA
=
elpa_solve_evp_real_2stage_double
(
na
,
nev
,
a
,
na_rows
,
ev
,
z
,
na_rows
,
nblk
,
na_cols
,
&
mpi_comm_rows
,
mpi_comm_cols
,
mpi_comm_world
,
bandwidth
=
bandwidth
)
if
(
.not.
(
successELPA
))
then
write
(
error_unit
,
*
)
"solve_evp_real_2stage produced an error! Aborting..."
#ifdef WITH_MPI
call
MPI_ABORT
(
mpi_comm_world
,
1
,
mpierr
)
#endif
endif
if
(
myid
==
0
)
then
print
'(a)'
,
'| Two-step ELPA solver complete.'
print
*
end
if
if
(
myid
==
0
)
print
*
,
'Time transform to tridi :'
,
time_evp_fwd
if
(
myid
==
0
)
print
*
,
'Time solve tridi :'
,
time_evp_solve
if
(
myid
==
0
)
print
*
,
'Time transform back EVs :'
,
time_evp_back
if
(
myid
==
0
)
print
*
,
'Total time (sum above) :'
,
time_evp_back
+
time_evp_solve
+
time_evp_fwd
if
(
write_to_file
%
eigenvectors
)
then
write
(
unit
=
task_suffix
,
fmt
=
'(i8.8)'
)
myid
open
(
17
,
file
=
"EVs_real2_out_task_"
//
task_suffix
(
1
:
8
)//
".txt"
,
form
=
'formatted'
,
status
=
'new'
)
write
(
17
,
*
)
"Part of eigenvectors: na_rows="
,
na_rows
,
"of na="
,
na
,
" na_cols="
,
na_cols
,
" of na="
,
na
do
i
=
1
,
na_rows
do
j
=
1
,
na_cols
write
(
17
,
*
)
"row="
,
i
,
" col="
,
j
,
" element of eigenvector="
,
z
(
i
,
j
)
enddo
enddo
close
(
17
)
endif
if
(
write_to_file
%
eigenvalues
)
then
if
(
myid
==
0
)
then
open
(
17
,
file
=
"Eigenvalues_real2_out.txt"
,
form
=
'formatted'
,
status
=
'new'
)
do
i
=
1
,
na
write
(
17
,
*
)
i
,
ev
(
i
)
enddo
close
(
17
)
endif
endif
!-------------------------------------------------------------------------------
! Test correctness of result (using plain scalapack routines)
allocate
(
tmp1
(
na_rows
,
na_cols
))
allocate
(
tmp2
(
na_rows
,
na_cols
))
status
=
check_correctness
(
na
,
nev
,
as
,
z
,
ev
,
sc_desc
,
myid
,
tmp1
,
tmp2
)
deallocate
(
a
)
deallocate
(
as
)
deallocate
(
z
)
deallocate
(
tmp1
)
deallocate
(
tmp2
)
deallocate
(
ev
)
#ifdef HAVE_DETAILED_TIMINGS
call
timer
%
stop
(
"program: test_real2_double_precision"
)
print
*
,
" "
print
*
,
"Timings program: test_real2_double_precision"
call
timer
%
print
(
"program: test_real2_double_precision"
)
print
*
,
" "
print
*
,
"End timings program: test_real2_double_precision"
#endif
#ifdef WITH_MPI
call
blacs_gridexit
(
my_blacs_ctxt
)
call
mpi_finalize
(
mpierr
)
#endif
call
EXIT
(
STATUS
)
end
!-------------------------------------------------------------------------------
test/shared/blacs_infrastructure.F90
View file @
cf009642
...
...
@@ -156,4 +156,9 @@ module mod_blacs_infrastructure
end
subroutine
integer
function
index_l2g
(
idx_loc
,
nblk
,
iproc
,
nprocs
)
index_l2g
=
nprocs
*
nblk
*
((
idx_loc
-1
)
/
nblk
)
+
mod
(
idx_loc
-1
,
nblk
)
+
mod
(
nprocs
+
iproc
,
nprocs
)
*
nblk
+
1
return
end
end
module
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment