Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
ab3f568f
Commit
ab3f568f
authored
Jun 16, 2015
by
Andreas Marek
Browse files
Re-integrate some changes from NVIDIA
parent
0adedef7
Changes
11
Expand all
Hide whitespace changes
Inline
Side-by-side
configure.ac
View file @
ab3f568f
...
...
@@ -428,7 +428,7 @@ AC_MSG_RESULT([${fortran_can_check_environment}])
dnl check whether GPU version is requested
CUDA_INSTALL_PATH="/usr/local/cuda/"
#
CUDA_INSTALL_PATH="/usr/local/cuda/"
#CUDA_SDK_INSTALL_PATH="/usr/local/NVIDIA_GPU_Computing_SDK"
AC_MSG_CHECKING(whether GPU support is requested)
...
...
m4/libtool.m4
View file @
ab3f568f
This diff is collapsed.
Click to expand it.
m4/ltoptions.m4
View file @
ab3f568f
# Helper functions for option handling. -*- Autoconf -*-
#
# Copyright (C) 2004
,
2005, 2007
,
200
8
, 20
09
Free Software
Foundation,
# Inc.
# Copyright (C) 2004
-
2005, 2007
-
200
9
, 20
11-2015
Free Software
#
Foundation,
Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
# unlimited permission to copy and/or distribute it, with or without
# modifications, as long as this notice is preserved.
# serial
7
ltoptions.m4
# serial
8
ltoptions.m4
# This is to help aclocal find these macros, as it can't see m4_define.
AC_DEFUN([LTOPTIONS_VERSION], [m4_if([1])])
...
...
@@ -29,7 +29,7 @@ m4_define([_LT_SET_OPTION],
[m4_define(_LT_MANGLE_OPTION([$1], [$2]))dnl
m4_ifdef(_LT_MANGLE_DEFUN([$1], [$2]),
_LT_MANGLE_DEFUN([$1], [$2]),
[m4_warning([Unknown $1 option
`
$2'])])[]dnl
[m4_warning([Unknown $1 option
'
$2'])])[]dnl
])
...
...
@@ -75,13 +75,15 @@ m4_if([$1],[LT_INIT],[
dnl
dnl If no reference was made to various pairs of opposing options, then
dnl we run the default mode handler for the pair. For example, if neither
dnl
`
shared' nor
`
disable-shared' was passed, we enable building of shared
dnl
'
shared' nor
'
disable-shared' was passed, we enable building of shared
dnl archives by default:
_LT_UNLESS_OPTIONS([LT_INIT], [shared disable-shared], [_LT_ENABLE_SHARED])
_LT_UNLESS_OPTIONS([LT_INIT], [static disable-static], [_LT_ENABLE_STATIC])
_LT_UNLESS_OPTIONS([LT_INIT], [pic-only no-pic], [_LT_WITH_PIC])
_LT_UNLESS_OPTIONS([LT_INIT], [fast-install disable-fast-install],
[_LT_ENABLE_FAST_INSTALL])
[_LT_ENABLE_FAST_INSTALL])
_LT_UNLESS_OPTIONS([LT_INIT], [aix-soname=aix aix-soname=both aix-soname=svr4],
[_LT_WITH_AIX_SONAME([aix])])
])
])# _LT_SET_OPTIONS
...
...
@@ -112,7 +114,7 @@ AU_DEFUN([AC_LIBTOOL_DLOPEN],
[_LT_SET_OPTION([LT_INIT], [dlopen])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the
`
dlopen' option into LT_INIT's first parameter.])
put the
'
dlopen' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
...
...
@@ -148,7 +150,7 @@ AU_DEFUN([AC_LIBTOOL_WIN32_DLL],
_LT_SET_OPTION([LT_INIT], [win32-dll])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the
`
win32-dll' option into LT_INIT's first parameter.])
put the
'
win32-dll' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
...
...
@@ -157,9 +159,9 @@ dnl AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [])
# _LT_ENABLE_SHARED([DEFAULT])
# ----------------------------
# implement the --enable-shared flag, and supports the
`
shared' and
#
`
disable-shared' LT_INIT options.
# DEFAULT is either
`
yes' or
`
no'. If omitted, it defaults to
`
yes'.
# implement the --enable-shared flag, and supports the
'
shared' and
#
'
disable-shared' LT_INIT options.
# DEFAULT is either
'
yes' or
'
no'. If omitted, it defaults to
'
yes'.
m4_define([_LT_ENABLE_SHARED],
[m4_define([_LT_ENABLE_SHARED_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([shared],
...
...
@@ -172,14 +174,14 @@ AC_ARG_ENABLE([shared],
*)
enable_shared=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs=
"
$IFS
"
; IFS=
"${
IFS
}
$PATH_SEPARATOR,
"
lt_save_ifs=$IFS; IFS=
$
IFS$PATH_SEPARATOR,
for pkg in $enableval; do
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
if test "X$pkg" = "X$p"; then
enable_shared=yes
fi
done
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
;;
esac],
[enable_shared=]_LT_ENABLE_SHARED_DEFAULT)
...
...
@@ -211,9 +213,9 @@ dnl AC_DEFUN([AM_DISABLE_SHARED], [])
# _LT_ENABLE_STATIC([DEFAULT])
# ----------------------------
# implement the --enable-static flag, and support the
`
static' and
#
`
disable-static' LT_INIT options.
# DEFAULT is either
`
yes' or
`
no'. If omitted, it defaults to
`
yes'.
# implement the --enable-static flag, and support the
'
static' and
#
'
disable-static' LT_INIT options.
# DEFAULT is either
'
yes' or
'
no'. If omitted, it defaults to
'
yes'.
m4_define([_LT_ENABLE_STATIC],
[m4_define([_LT_ENABLE_STATIC_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([static],
...
...
@@ -226,14 +228,14 @@ AC_ARG_ENABLE([static],
*)
enable_static=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs=
"
$IFS
"
; IFS=
"${
IFS
}
$PATH_SEPARATOR,
"
lt_save_ifs=$IFS; IFS=
$
IFS$PATH_SEPARATOR,
for pkg in $enableval; do
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
if test "X$pkg" = "X$p"; then
enable_static=yes
fi
done
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
;;
esac],
[enable_static=]_LT_ENABLE_STATIC_DEFAULT)
...
...
@@ -265,9 +267,9 @@ dnl AC_DEFUN([AM_DISABLE_STATIC], [])
# _LT_ENABLE_FAST_INSTALL([DEFAULT])
# ----------------------------------
# implement the --enable-fast-install flag, and support the
`
fast-install'
# and
`
disable-fast-install' LT_INIT options.
# DEFAULT is either
`
yes' or
`
no'. If omitted, it defaults to
`
yes'.
# implement the --enable-fast-install flag, and support the
'
fast-install'
# and
'
disable-fast-install' LT_INIT options.
# DEFAULT is either
'
yes' or
'
no'. If omitted, it defaults to
'
yes'.
m4_define([_LT_ENABLE_FAST_INSTALL],
[m4_define([_LT_ENABLE_FAST_INSTALL_DEFAULT], [m4_if($1, no, no, yes)])dnl
AC_ARG_ENABLE([fast-install],
...
...
@@ -280,14 +282,14 @@ AC_ARG_ENABLE([fast-install],
*)
enable_fast_install=no
# Look at the argument we got. We use all the common list separators.
lt_save_ifs=
"
$IFS
"
; IFS=
"${
IFS
}
$PATH_SEPARATOR,
"
lt_save_ifs=$IFS; IFS=
$
IFS$PATH_SEPARATOR,
for pkg in $enableval; do
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
if test "X$pkg" = "X$p"; then
enable_fast_install=yes
fi
done
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
;;
esac],
[enable_fast_install=]_LT_ENABLE_FAST_INSTALL_DEFAULT)
...
...
@@ -304,14 +306,14 @@ AU_DEFUN([AC_ENABLE_FAST_INSTALL],
[_LT_SET_OPTION([LT_INIT], m4_if([$1], [no], [disable-])[fast-install])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
the
`
fast-install' option into LT_INIT's first parameter.])
the
'
fast-install' option into LT_INIT's first parameter.])
])
AU_DEFUN([AC_DISABLE_FAST_INSTALL],
[_LT_SET_OPTION([LT_INIT], [disable-fast-install])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you put
the
`
disable-fast-install' option into LT_INIT's first parameter.])
the
'
disable-fast-install' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
...
...
@@ -319,11 +321,64 @@ dnl AC_DEFUN([AC_ENABLE_FAST_INSTALL], [])
dnl AC_DEFUN([AM_DISABLE_FAST_INSTALL], [])
# _LT_WITH_AIX_SONAME([DEFAULT])
# ----------------------------------
# implement the --with-aix-soname flag, and support the `aix-soname=aix'
# and `aix-soname=both' and `aix-soname=svr4' LT_INIT options. DEFAULT
# is either `aix', `both' or `svr4'. If omitted, it defaults to `aix'.
m4_define([_LT_WITH_AIX_SONAME],
[m4_define([_LT_WITH_AIX_SONAME_DEFAULT], [m4_if($1, svr4, svr4, m4_if($1, both, both, aix))])dnl
shared_archive_member_spec=
case $host,$enable_shared in
power*-*-aix[[5-9]]*,yes)
AC_MSG_CHECKING([which variant of shared library versioning to provide])
AC_ARG_WITH([aix-soname],
[AS_HELP_STRING([--with-aix-soname=aix|svr4|both],
[shared library versioning (aka "SONAME") variant to provide on AIX, @<:@default=]_LT_WITH_AIX_SONAME_DEFAULT[@:>@.])],
[case $withval in
aix|svr4|both)
;;
*)
AC_MSG_ERROR([Unknown argument to --with-aix-soname])
;;
esac
lt_cv_with_aix_soname=$with_aix_soname],
[AC_CACHE_VAL([lt_cv_with_aix_soname],
[lt_cv_with_aix_soname=]_LT_WITH_AIX_SONAME_DEFAULT)
with_aix_soname=$lt_cv_with_aix_soname])
AC_MSG_RESULT([$with_aix_soname])
if test aix != "$with_aix_soname"; then
# For the AIX way of multilib, we name the shared archive member
# based on the bitwidth used, traditionally 'shr.o' or 'shr_64.o',
# and 'shr.imp' or 'shr_64.imp', respectively, for the Import File.
# Even when GNU compilers ignore OBJECT_MODE but need '-maix64' flag,
# the AIX toolchain works better with OBJECT_MODE set (default 32).
if test 64 = "${OBJECT_MODE-32}"; then
shared_archive_member_spec=shr_64
else
shared_archive_member_spec=shr
fi
fi
;;
*)
with_aix_soname=aix
;;
esac
_LT_DECL([], [shared_archive_member_spec], [0],
[Shared archive member basename, for filename based shared library versioning on AIX])dnl
])# _LT_WITH_AIX_SONAME
LT_OPTION_DEFINE([LT_INIT], [aix-soname=aix], [_LT_WITH_AIX_SONAME([aix])])
LT_OPTION_DEFINE([LT_INIT], [aix-soname=both], [_LT_WITH_AIX_SONAME([both])])
LT_OPTION_DEFINE([LT_INIT], [aix-soname=svr4], [_LT_WITH_AIX_SONAME([svr4])])
# _LT_WITH_PIC([MODE])
# --------------------
# implement the --with-pic flag, and support the
`
pic-only' and
`
no-pic'
# implement the --with-pic flag, and support the
'
pic-only' and
'
no-pic'
# LT_INIT options.
# MODE is either
`
yes' or
`
no'. If omitted, it defaults to
`
both'.
# MODE is either
'
yes' or
'
no'. If omitted, it defaults to
'
both'.
m4_define([_LT_WITH_PIC],
[AC_ARG_WITH([pic],
[AS_HELP_STRING([--with-pic@<:@=PKGS@:>@],
...
...
@@ -334,19 +389,17 @@ m4_define([_LT_WITH_PIC],
*)
pic_mode=default
# Look at the argument we got. We use all the common list separators.
lt_save_ifs=
"
$IFS
"
; IFS=
"${
IFS
}
$PATH_SEPARATOR,
"
lt_save_ifs=$IFS; IFS=
$
IFS$PATH_SEPARATOR,
for lt_pkg in $withval; do
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
if test "X$lt_pkg" = "X$lt_p"; then
pic_mode=yes
fi
done
IFS=
"
$lt_save_ifs
"
IFS=$lt_save_ifs
;;
esac],
[pic_mode=default])
test -z "$pic_mode" && pic_mode=m4_default([$1], [default])
[pic_mode=m4_default([$1], [default])])
_LT_DECL([], [pic_mode], [0], [What type of objects to build])dnl
])# _LT_WITH_PIC
...
...
@@ -359,7 +412,7 @@ AU_DEFUN([AC_LIBTOOL_PICMODE],
[_LT_SET_OPTION([LT_INIT], [pic-only])
AC_DIAGNOSE([obsolete],
[$0: Remove this warning and the call to _LT_SET_OPTION when you
put the
`
pic-only' option into LT_INIT's first parameter.])
put the
'
pic-only' option into LT_INIT's first parameter.])
])
dnl aclocal-1.4 backwards compatibility:
...
...
m4/ltsugar.m4
View file @
ab3f568f
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software
# Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
...
...
@@ -33,7 +34,7 @@ m4_define([_lt_join],
# ------------
# Manipulate m4 lists.
# These macros are necessary as long as will still need to support
# Autoconf-2.59 which quotes differently.
# Autoconf-2.59
,
which quotes differently.
m4_define([lt_car], [[$1]])
m4_define([lt_cdr],
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
...
...
@@ -44,7 +45,7 @@ m4_define([lt_unquote], $1)
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
# ------------------------------------------
# Redefine MACRO-NAME to hold its former content plus
`
SEPARATOR'
`
STRING'.
# Redefine MACRO-NAME to hold its former content plus
'
SEPARATOR'
'
STRING'.
# Note that neither SEPARATOR nor STRING are expanded; they are appended
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
...
...
m4/ltversion.m4
View file @
ab3f568f
# ltversion.m4 -- version numbers -*- Autoconf -*-
#
# Copyright (C) 2004 Free Software Foundation, Inc.
# Copyright (C) 2004
, 2011-2015
Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004
#
# This file is free software; the Free Software Foundation gives
...
...
@@ -9,15 +9,15 @@
# @configure_input@
# serial
3337
ltversion.m4
# serial
4179
ltversion.m4
# This file is part of GNU Libtool
m4_define([LT_PACKAGE_VERSION], [2.4.
2
])
m4_define([LT_PACKAGE_REVISION], [
1.3337
])
m4_define([LT_PACKAGE_VERSION], [2.4.
6
])
m4_define([LT_PACKAGE_REVISION], [
2.4.6
])
AC_DEFUN([LTVERSION_VERSION],
[macro_version='2.4.
2
'
macro_revision='
1.3337
'
[macro_version='2.4.
6
'
macro_revision='
2.4.6
'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
])
m4/lt~obsolete.m4
View file @
ab3f568f
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software
# Foundation, Inc.
# Written by Scott James Remnant, 2004.
#
# This file is free software; the Free Software Foundation gives
...
...
@@ -11,7 +12,7 @@
# These exist entirely to fool aclocal when bootstrapping libtool.
#
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
,
# which have later been changed to m4_define as they aren't part of the
# exported API, or moved to Autoconf or Automake where they belong.
#
...
...
@@ -25,7 +26,7 @@
# included after everything else. This provides aclocal with the
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
# because those macros already exist, or will be overwritten later.
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
#
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
# Yes, that means every name once taken will need to remain here until
...
...
src/elpa2.F90
View file @
ab3f568f
This diff is collapsed.
Click to expand it.
src/ev_tridi_band_gpu_c_v2.cu
View file @
ab3f568f
...
...
@@ -16,6 +16,7 @@ static __device__ __forceinline__ double shfl_xor(double r, int mask)
return
__hiloint2double
(
hi
,
lo
);
}
#if 0
static __device__ __forceinline__ cuDoubleComplex shfl_xor_complex(cuDoubleComplex r, int mask)
{
double real = cuCreal(r) ;
...
...
@@ -35,7 +36,7 @@ static __device__ __forceinline__ cuDoubleComplex shfl_xor_complex(cuDoubleComp
return make_cuDoubleComplex(real, imag);
}
#endif
// Perform the equivalent of "__shfl_down" on an 8-byte value
...
...
src/interface_cuda.F90
View file @
ab3f568f
...
...
@@ -67,6 +67,13 @@ module cuda_routines
integer
(
C_INT
)
::
istat
end
function
cuda_setdevice
function
cuda_getdevicecount
(
n
)
result
(
istat
)
&
bind
(
C
,
name
=
"cudaGetDeviceCount"
)
use
iso_c_binding
integer
,
intent
(
out
)
::
n
integer
(
C_INT
)
::
istat
end
function
cuda_getdevicecount
function
cuda_ProfilerStart
()
result
(
istat
)&
bind
(
C
,
name
=
"cudaProfilerStart"
)
...
...
test/test_complex2.F90
View file @
ab3f568f
...
...
@@ -149,7 +149,7 @@ program test_complex2
#ifdef WITH_GPU_VERSION
character
(
len
=
1024
)
::
envname
integer
::
istat
,
devnum
integer
::
istat
,
devnum
,
numdevs
#endif
write_to_file
=
.false.
...
...
@@ -165,13 +165,23 @@ program test_complex2
! MPI Initialization
call
setup_mpi
(
myid
,
nprocs
)
#ifdef WITH_GPU_VERSION
devnum
=
0
istat
=
cuda_getdevicecount
(
numdevs
)
if
(
istat
.ne.
0
)
then
print
*
,
"Error in cuda_getdevicecount"
stop
endif
if
(
myid
==
0
)
then
print
*
print
'(3(a,i0))'
,
'Found '
,
numdevs
,
' GPUs'
endif
devnum
=
mod
(
myid
,
numdevs
)
istat
=
cuda_setdevice
(
devnum
)
if
(
istat
.ne.
0
)
then
print
*
,
"Cannot set CudaDevice"
stop
endif
print
'(3(a,i0))'
,
'MPI rank '
,
myid
,
' uses GPU #'
,
devnum
#endif
STATUS
=
0
...
...
@@ -345,8 +355,8 @@ program test_complex2
! Calculate eigenvalues/eigenvectors
call
mpi_barrier
(
mpi_comm_world
,
mpierr
)
! for correct timings only
success
=
solve_evp_complex_2stage
(
na
,
nev
,
a
,
na_rows
,
ev
,
z
,
na_rows
,
nblk
,
&
na_rows
,
na_cols
,
mpi_comm_rows
,
mpi_comm_cols
,
mpi_comm_world
)
success
=
solve_evp_complex_2stage
(
na
,
nev
,
a
,
na_rows
,
ev
,
z
,
na_rows
,
na_cols
,
nblk
,
&
mpi_comm_rows
,
mpi_comm_cols
,
mpi_comm_world
)
if
(
.not.
(
success
))
then
write
(
error_unit
,
*
)
"solve_evp_complex_2stage produced an error! Aborting..."
...
...
test/test_real2.F90
View file @
ab3f568f
...
...
@@ -143,7 +143,7 @@ program test_real2
#ifdef WITH_GPU_VERSION
character
(
len
=
1024
)
::
envname
integer
::
istat
,
devnum
integer
::
istat
,
devnum
,
numdevs
#endif
write_to_file
=
.false.
success
=
.true.
...
...
@@ -160,13 +160,26 @@ program test_real2
call
setup_mpi
(
myid
,
nprocs
)
#ifdef WITH_GPU_VERSION
devnum
=
0
! call getenv("CUDA_PROXY_PIPE_DIRECTORY", envname)
istat
=
cuda_getdevicecount
(
numdevs
)
if
(
istat
.ne.
0
)
then
print
*
,
"error in cuda_getdevicecount"
stop
endif
if
(
myid
==
0
)
then
print
*
print
'(3(a,i0))'
,
'Found '
,
numdevs
,
' GPUs'
endif
devnum
=
mod
(
myid
,
numdevs
)
istat
=
cuda_setdevice
(
devnum
)
if
(
istat
.ne.
0
)
then
print
*
,
"Cannot set CudaDevice"
stop
endif
print
'(3(a,i0))'
,
'MPI rank '
,
myid
,
' uses GPU #'
,
devnum
#endif
STATUS
=
0
...
...
@@ -341,7 +354,7 @@ program test_real2
end
if
call
mpi_barrier
(
mpi_comm_world
,
mpierr
)
! for correct timings only
success
=
solve_evp_real_2stage
(
na
,
nev
,
a
,
na_rows
,
ev
,
z
,
na_rows
,
nblk
,
na_rows
,
na_cols
,
&
success
=
solve_evp_real_2stage
(
na
,
nev
,
a
,
na_rows
,
ev
,
z
,
na_rows
,
nblk
,
&
mpi_comm_rows
,
mpi_comm_cols
,
mpi_comm_world
)
if
(
.not.
(
success
))
then
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment