Commit 5ab1742b authored by Carolin Penke's avatar Carolin Penke
Browse files

working version for skew-symmetric matrices

parent 7a51ebd1
......@@ -63,6 +63,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa_generalized/cannon.c \
#src/elpa_generalized/test_c_bindings.c \
src/helpers/matrix_plot.F90 \
src/general/dssmv.F90 \
src/general/dssr2.F90 \
src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
......@@ -101,6 +103,7 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/qr/elpa_pdgeqrf_template.F90 \
src/elpa2/elpa2_bandred_template.F90 \
src/elpa2/elpa2_symm_matrix_allreduce_real_template.F90 \
src/elpa2/elpa2_ssymm_matrix_allreduce_real_template.F90 \
src/elpa2/elpa2_trans_ev_band_to_full_template.F90 \
src/elpa2/elpa2_tridiag_band_template.F90 \
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90 \
......@@ -703,6 +706,7 @@ EXTRA_DIST = \
src/elpa1/elpa_reduce_add_vectors.F90 \
src/elpa1/elpa_solve_tridi_impl_public.F90 \
src/elpa1/elpa_transpose_vectors.F90 \
src/elpa1/elpa_transpose_vectors_ss.F90 \
src/elpa2/GPU/ev_tridi_band_gpu_c_v2_complex_template.cu \
src/elpa2/GPU/ev_tridi_band_gpu_c_v2_real_template.cu \
src/elpa2/compute_hh_trafo.F90 \
......
......@@ -1179,6 +1179,9 @@ fi
AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"])
AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"])
#always define SKEWSYMMETRIC for the moment
AC_DEFINE([SKEWSYMMETRIC],[1],[build for skewsyemmtric case])
AC_SUBST([MPI_BINARY])
AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
......
......@@ -105,7 +105,9 @@ module elpa1_compute
public :: elpa_reduce_add_vectors_real_double
public :: elpa_reduce_add_vectors_real
public :: elpa_transpose_vectors_real_double
public :: elpa_transpose_vectors_ss_real_double
public :: elpa_transpose_vectors_real
public :: elpa_transpose_vectors_ss_real
interface hh_transform_real
module procedure hh_transform_real_double
......@@ -118,11 +120,16 @@ module elpa1_compute
interface elpa_transpose_vectors_real
module procedure elpa_transpose_vectors_real_double
end interface
interface elpa_transpose_vectors_ss_real
module procedure elpa_transpose_vectors_ss_real_double
end interface
#ifdef WANT_SINGLE_PRECISION_REAL
public :: hh_transform_real_single
public :: elpa_reduce_add_vectors_real_single
public :: elpa_transpose_vectors_real_single
public :: elpa_transpose_vectors_ss_real_single
#endif
public :: hh_transform_complex_double
......@@ -130,7 +137,9 @@ module elpa1_compute
public :: elpa_reduce_add_vectors_complex_double
public :: elpa_reduce_add_vectors_complex
public :: elpa_transpose_vectors_complex_double
public :: elpa_transpose_vectors_ss_complex_double
public :: elpa_transpose_vectors_complex
public :: elpa_transpose_vectors_ss_complex
interface hh_transform_complex
module procedure hh_transform_complex_double
......@@ -143,11 +152,16 @@ module elpa1_compute
interface elpa_transpose_vectors_complex
module procedure elpa_transpose_vectors_complex_double
end interface
interface elpa_transpose_vectors_ss_complex
module procedure elpa_transpose_vectors_ss_complex_double
end interface
#ifdef WANT_SINGLE_PRECISION_COMPLEX
public :: hh_transform_complex_single
public :: elpa_reduce_add_vectors_complex_single
public :: elpa_transpose_vectors_complex_single
public :: elpa_transpose_vectors_ss_complex_single
#endif
contains
......@@ -159,6 +173,7 @@ module elpa1_compute
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#include "elpa_reduce_add_vectors.F90"
#undef DOUBLE_PRECISION
#undef REALCASE
......@@ -170,6 +185,7 @@ module elpa1_compute
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#include "elpa_reduce_add_vectors.F90"
#undef SINGLE_PRECISION
#undef REALCASE
......@@ -181,6 +197,7 @@ module elpa1_compute
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
......@@ -191,6 +208,7 @@ module elpa1_compute
#define SINGLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
......
......@@ -74,6 +74,7 @@ module elpa2_impl
contains
#define REALCASE 1
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
!-------------------------------------------------------------------------------
......
......@@ -1416,11 +1416,7 @@
endif
! Transpose umc -> umr (stored in vmr, second half)
#if SKEWSYMMETRIC == 1
call elpa_transpose_vectors_ss&
#else
call elpa_transpose_vectors_&
#endif
call elpa_transpose_vectors_&
&MATH_DATATYPE&
&_&
&PRECISION &
......@@ -1453,9 +1449,10 @@
#if REALCASE == 1
#if SKEWSYMMETRIC == 1
0.5_rk, &
#endif
#else
-0.5_rk, &
#endif
#endif
#if COMPLEXCASE == 1
(-0.5_rk, 0.0_rk), &
#endif
......
......@@ -112,7 +112,7 @@
call obj%timer%stop("symm_matrix_allreduce" // PRECISION_SUFFIX)
end subroutine symm_matrix_allreduce_&
end subroutine ssymm_matrix_allreduce_&
&PRECISION
......
......@@ -49,6 +49,7 @@
! consortium. The copyright of any additional modifications shall rest
! with their original authors, but shall adhere to the licensing terms
! distributed along with the original code in the file "COPYING".
function elpa_solve_evp_&
&MATH_DATATYPE&
&_&
......@@ -129,6 +130,9 @@
do_trans_to_band, do_trans_to_full
integer(kind=ik) :: nrThreads
#if SKEWSYMMETRIC ==1
integer(kind=ik) :: global_index
#endif
#if REALCASE == 1
#undef GPU_KERNEL
#undef GENERIC_KERNEL
......
......@@ -119,6 +119,7 @@ static int na_is_valid(elpa_index_t index, int n, int new_value);
static int nev_is_valid(elpa_index_t index, int n, int new_value);
static int bw_is_valid(elpa_index_t index, int n, int new_value);
static int gpu_is_valid(elpa_index_t index, int n, int new_value);
static int skewsymmetric_is_valid(elpa_index_t index, int n, int new_value);
static int is_positive(elpa_index_t index, int n, int new_value);
......@@ -194,6 +195,8 @@ static const elpa_index_int_entry_t int_entries[] = {
number_of_solvers, solver_enumerate, solver_is_valid, elpa_solver_name, PRINT_YES),
INT_ENTRY("gpu", "Use GPU acceleration", 0, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
cardinality_bool, enumerate_identity, gpu_is_valid, NULL, PRINT_YES),
INT_ENTRY("is_skewsymmetric", "Matrix is skewsymmetic", 0, ELPA_AUTOTUNE_NOT_TUNABLE, 0,
cardinality_bool, enumerate_identity, skewsymmetric_is_valid, NULL, PRINT_YES),
//default of gpu ussage for individual phases is 1. However, it is only evaluated, if GPU is used at all, which first has to be determined
//by the parameter gpu and presence of the device
INT_ENTRY("gpu_tridiag", "Use GPU acceleration for ELPA1 tridiagonalization", 1, ELPA_AUTOTUNE_MEDIUM, ELPA_AUTOTUNE_DOMAIN_ANY,
......@@ -750,6 +753,10 @@ static int gpu_is_valid(elpa_index_t index, int n, int new_value) {
return new_value == 0 || new_value == 1;
}
static int skewsymmetric_is_valid(elpa_index_t index, int n, int new_value) {
return new_value == 0 || new_value == 1;
}
static int band_to_full_cardinality(elpa_index_t index) {
return 10;
}
......
SUBROUTINE DSSMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
*
!
IMPLICIT NONE
*
* .. Scalar Arguments ..
!
! .. Scalar Arguments ..
CHARACTER UPLO
INTEGER N, LDA, INCX, INCY
DOUBLE PRECISION ALPHA, BETA
* ..
* .. Array Arguments ..
! ..
! .. Array Arguments ..
DOUBLE PRECISION A( LDA, * ), X( * ), Y( * )
* ..
*
* Purpose
* =======
*
* DSSMV performs the matrix-vector operation
*
* y := alpha*A*x + beta*y,
*
* where alpha and beta are scalars, x and y are n element vectors and
* A is an n by n skew-symmetric matrix.
*
* Arguments
* ==========
*
* UPLO - CHARACTER*1.
* On entry, UPLO specifies whether the upper or lower
* triangular part of the array A is to be referenced as
* follows:
*
* UPLO = 'U' or 'u' Only the upper triangular part of A
* is to be referenced.
*
* UPLO = 'L' or 'l' Only the lower triangular part of A
* is to be referenced.
*
* Unchanged on exit.
*
* N - INTEGER.
* On entry, N specifies the order of the matrix A.
* N must be at least zero.
* Unchanged on exit.
*
* ALPHA - DOUBLE PRECISION.
* On entry, ALPHA specifies the scalar alpha.
* Unchanged on exit.
*
* A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
* Before entry with UPLO = 'U' or 'u', the leading n by n
* upper triangular part of the array A must contain the upper
* triangular part of the skew-symmetric matrix and the
* strictly lower triangular part of A is not referenced.
* Before entry with UPLO = 'L' or 'l', the leading n by n
* lower triangular part of the array A must contain the lower
* triangular part of the skew-symmetric matrix and the
* strictly upper triangular part of A is not referenced.
* Unchanged on exit.
*
* LDA - INTEGER.
* On entry, LDA specifies the first dimension of A as declared
* in the calling (sub) program. LDA must be at least
* max( 1, n ).
* Unchanged on exit.
*
* X - DOUBLE PRECISION array of dimension at least
* ( 1 + ( n - 1 )*abs( INCX ) ).
* Before entry, the incremented array X must contain the n
* element vector x.
* Unchanged on exit.
*
* INCX - INTEGER.
* On entry, INCX specifies the increment for the elements of
* X. INCX must not be zero.
* Unchanged on exit.
*
* BETA - DOUBLE PRECISION.
* On entry, BETA specifies the scalar beta. When BETA is
* supplied as zero then Y need not be set on input.
* Unchanged on exit.
*
* Y - DOUBLE PRECISION array of dimension at least
* ( 1 + ( n - 1 )*abs( INCY ) ).
* Before entry, the incremented array Y must contain the n
* element vector y. On exit, Y is overwritten by the updated
* vector y.
*
* INCY - INTEGER.
* On entry, INCY specifies the increment for the elements of
* Y. INCY must not be zero.
* Unchanged on exit.
*
* Further Details
* ===============
*
* Level 2 BLAS-like routine.
*
* Written by Meiyue Shao, Lawrence Berkeley National Laboratory.
* Last change: October 2014
*
* =====================================================================
*
* .. Parameters ..
! ..
!
! Purpose
! =======
!
! DSSMV performs the matrix-vector operation
!
! y := alpha*A*x + beta*y,
!
! where alpha and beta are scalars, x and y are n element vectors and
! A is an n by n skew-symmetric matrix.
!
! Arguments
! ==========
!
! UPLO - CHARACTER*1.
! On entry, UPLO specifies whether the upper or lower
! triangular part of the array A is to be referenced as
! follows:
!
! UPLO = 'U' or 'u' Only the upper triangular part of A
! is to be referenced.
!
! UPLO = 'L' or 'l' Only the lower triangular part of A
! is to be referenced.
!
! Unchanged on exit.
!
! N - INTEGER.
! On entry, N specifies the order of the matrix A.
! N must be at least zero.
! Unchanged on exit.
!
! ALPHA - DOUBLE PRECISION.
! On entry, ALPHA specifies the scalar alpha.
! Unchanged on exit.
!
! A - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
! Before entry with UPLO = 'U' or 'u', the leading n by n
! upper triangular part of the array A must contain the upper
! triangular part of the skew-symmetric matrix and the
! strictly lower triangular part of A is not referenced.
! Before entry with UPLO = 'L' or 'l', the leading n by n
! lower triangular part of the array A must contain the lower
! triangular part of the skew-symmetric matrix and the
! strictly upper triangular part of A is not referenced.
! Unchanged on exit.
!
! LDA - INTEGER.
! On entry, LDA specifies the first dimension of A as declared
! in the calling (sub) program. LDA must be at least
! max( 1, n ).
! Unchanged on exit.
!
! X - DOUBLE PRECISION array of dimension at least
! ( 1 + ( n - 1 )*abs( INCX ) ).
! Before entry, the incremented array X must contain the n
! element vector x.
! Unchanged on exit.
!
! INCX - INTEGER.
! On entry, INCX specifies the increment for the elements of
! X. INCX must not be zero.
! Unchanged on exit.
!
! BETA - DOUBLE PRECISION.
! On entry, BETA specifies the scalar beta. When BETA is
! supplied as zero then Y need not be set on input.
! Unchanged on exit.
!
! Y - DOUBLE PRECISION array of dimension at least
! ( 1 + ( n - 1 )*abs( INCY ) ).
! Before entry, the incremented array Y must contain the n
! element vector y. On exit, Y is overwritten by the updated
! vector y.
!
! INCY - INTEGER.
! On entry, INCY specifies the increment for the elements of
! Y. INCY must not be zero.
! Unchanged on exit.
!
! Further Details
! ===============
!
! Level 2 BLAS-like routine.
!
! Written by Meiyue Shao, Lawrence Berkeley National Laboratory.
! Last change: October 2014
!
! =====================================================================
!
! .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
INTEGER NB
PARAMETER ( NB = 64 )
* ..
* .. Local Scalars ..
! ..
! .. Local Scalars ..
INTEGER II, JJ, IC, IY, JC, JX, KX, KY, INFO
DOUBLE PRECISION TEMP
LOGICAL UPPER
* .. Local Arrays ..
! .. Local Arrays ..
DOUBLE PRECISION WORK( NB )
* ..
* .. External Functions ..
! ..
! .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
! ..
! .. External Subroutines ..
EXTERNAL DGEMV, DSSMV_SM, XERBLA
* ..
* .. Intrinsic Functions ..
! ..
! .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Executable Statements ..
*
* Test the input parameters.
*
! ..
! .. Executable Statements ..
!
! Test the input parameters.
!
UPPER = LSAME( UPLO, 'U' )
INFO = 0
IF ( .NOT. UPPER .AND. .NOT. LSAME( UPLO, 'L' ) ) THEN
......@@ -144,14 +144,14 @@
CALL XERBLA( 'DSSMV ', INFO )
RETURN
END IF
*
* Quick return if possible.
*
IF ( ( N .EQ. 0 ) .OR. ( ( ALPHA .EQ. ZERO ) .AND.
$ ( BETA .EQ. ONE ) ) ) RETURN
*
* Set up the start points in X and Y.
*
!
! Quick return if possible.
!
IF ( ( N .EQ. 0 ) .OR. ( ( ALPHA .EQ. ZERO ) .AND. &
& ( BETA .EQ. ONE ) ) ) RETURN
!
! Set up the start points in X and Y.
!
IF ( INCX .GT. 0 ) THEN
KX = 1
ELSE
......@@ -162,13 +162,13 @@
ELSE
KY = 1 - (N-1)*INCY
END IF
*
* Start the operations.
*
!
! Start the operations.
!
IF ( UPPER ) THEN
*
* Form y when A is stored in the upper triangle.
*
!
! Form y when A is stored in the upper triangle.
!
TEMP = BETA
DO JJ = 1, N, NB
JC = MIN( NB, N-JJ+1 )
......@@ -178,27 +178,27 @@
IC = MIN( NB, N-II+1 )
IY = KY + (II-1)*INCY
IF ( INCY .LT. 0 ) IY = IY + (IC-1)*INCY
*
* Call DSSMV_SM for diagonal blocks,
* and DGEMV for off-diagonal blocks.
*
!
! Call DSSMV_SM for diagonal blocks,
! and DGEMV for off-diagonal blocks.
!
IF ( II .GT. JJ ) THEN
CALL DGEMV( 'T', NB, IC, -ALPHA, A( JJ, II ), LDA,
$ X( JX ), INCX, TEMP, Y( IY ), INCY )
CALL DGEMV( 'T', NB, IC, -ALPHA, A( JJ, II ), LDA, &
& X( JX ), INCX, TEMP, Y( IY ), INCY )
ELSE IF ( II .LT. JJ ) THEN
CALL DGEMV( 'N', NB, JC, ALPHA, A( II, JJ ), LDA,
$ X( JX ), INCX, TEMP, Y( IY ), INCY )
CALL DGEMV( 'N', NB, JC, ALPHA, A( II, JJ ), LDA, &
& X( JX ), INCX, TEMP, Y( IY ), INCY )
ELSE
CALL DSSMV_SM( UPPER, JC, ALPHA, A( JJ, JJ ), LDA,
$ X( JX ), INCX, TEMP, Y( IY ), INCY, WORK )
CALL DSSMV_SM( UPPER, JC, ALPHA, A( JJ, JJ ), LDA, &
& X( JX ), INCX, TEMP, Y( IY ), INCY, WORK )
END IF
END DO
TEMP = ONE
END DO
ELSE
*
* Form y when A is stored in the lower triangle.
*
!
! Form y when A is stored in the lower triangle.
!
TEMP = BETA
DO JJ = 1, N, NB
JC = MIN( NB, N-JJ+1 )
......@@ -208,72 +208,72 @@
IC = MIN( NB, N-II+1 )
IY = KY + (II-1)*INCY
IF ( INCY .LT. 0 ) IY = IY + (IC-1)*INCY
*
* Call DSSMV_SM for diagonal blocks,
* and DGEMV for off-diagonal blocks.
*
!
! Call DSSMV_SM for diagonal blocks,
! and DGEMV for off-diagonal blocks.
!
IF ( II .LT. JJ ) THEN
CALL DGEMV( 'T', JC, NB, -ALPHA, A( JJ, II ), LDA,
$ X( JX ), INCX, TEMP, Y( IY ), INCY )
CALL DGEMV( 'T', JC, NB, -ALPHA, A( JJ, II ), LDA, &
& X( JX ), INCX, TEMP, Y( IY ), INCY )
ELSE IF ( II .GT. JJ ) THEN
CALL DGEMV( 'N', IC, NB, ALPHA, A( II, JJ ), LDA,
$ X( JX ), INCX, TEMP, Y( IY ), INCY )
CALL DGEMV( 'N', IC, NB, ALPHA, A( II, JJ ), LDA, &
& X( JX ), INCX, TEMP, Y( IY ), INCY )
ELSE
CALL DSSMV_SM( UPPER, JC, ALPHA, A( JJ, JJ ), LDA,
$ X( JX ), INCX, TEMP, Y( IY ), INCY, WORK )
CALL DSSMV_SM( UPPER, JC, ALPHA, A( JJ, JJ ), LDA, &
& X( JX ), INCX, TEMP, Y( IY ), INCY, WORK )
END IF
END DO
TEMP = ONE
END DO
END IF
*
!
RETURN
*
* End of DSSMV.
*
!
! End of DSSMV.
!
END
*
*
*
SUBROUTINE DSSMV_SM( UPPER, N, ALPHA, A, LDA, X, INCX, BETA, Y,
$ INCY, WORK )
*
* The length of WORK is at least n.
*
!
!
!
SUBROUTINE DSSMV_SM( UPPER, N, ALPHA, A, LDA, X, INCX, BETA, Y, &
& INCY, WORK )
!
! The length of WORK is at least n.
!
IMPLICIT NONE
*
* .. Scalar Arguments ..
!