Commit ab3f568f authored by Andreas Marek's avatar Andreas Marek

Re-integrate some changes from NVIDIA

parent 0adedef7
......@@ -428,7 +428,7 @@ AC_MSG_RESULT([${fortran_can_check_environment}])
dnl check whether GPU version is requested
CUDA_INSTALL_PATH="/usr/local/cuda/"
#CUDA_INSTALL_PATH="/usr/local/cuda/"
#CUDA_SDK_INSTALL_PATH="/usr/local/NVIDIA_GPU_Computing_SDK"
AC_MSG_CHECKING(whether GPU support is requested)
......
This diff is collapsed.
This diff is collapsed.
# ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
# Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software
# Foundation, Inc.
# Written by Gary V. Vaughan, 2004
#
# This file is free software; the Free Software Foundation gives
......@@ -33,7 +34,7 @@ m4_define([_lt_join],
# ------------
# Manipulate m4 lists.
# These macros are necessary as long as will still need to support
# Autoconf-2.59 which quotes differently.
# Autoconf-2.59, which quotes differently.
m4_define([lt_car], [[$1]])
m4_define([lt_cdr],
[m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
......@@ -44,7 +45,7 @@ m4_define([lt_unquote], $1)
# lt_append(MACRO-NAME, STRING, [SEPARATOR])
# ------------------------------------------
# Redefine MACRO-NAME to hold its former content plus `SEPARATOR'`STRING'.
# Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'.
# Note that neither SEPARATOR nor STRING are expanded; they are appended
# to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
# No SEPARATOR is output if MACRO-NAME was previously undefined (different
......
# ltversion.m4 -- version numbers -*- Autoconf -*-
#
# Copyright (C) 2004 Free Software Foundation, Inc.
# Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc.
# Written by Scott James Remnant, 2004
#
# This file is free software; the Free Software Foundation gives
......@@ -9,15 +9,15 @@
# @configure_input@
# serial 3337 ltversion.m4
# serial 4179 ltversion.m4
# This file is part of GNU Libtool
m4_define([LT_PACKAGE_VERSION], [2.4.2])
m4_define([LT_PACKAGE_REVISION], [1.3337])
m4_define([LT_PACKAGE_VERSION], [2.4.6])
m4_define([LT_PACKAGE_REVISION], [2.4.6])
AC_DEFUN([LTVERSION_VERSION],
[macro_version='2.4.2'
macro_revision='1.3337'
[macro_version='2.4.6'
macro_revision='2.4.6'
_LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
_LT_DECL(, macro_revision, 0)
])
# lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*-
#
# Copyright (C) 2004, 2005, 2007, 2009 Free Software Foundation, Inc.
# Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software
# Foundation, Inc.
# Written by Scott James Remnant, 2004.
#
# This file is free software; the Free Software Foundation gives
......@@ -11,7 +12,7 @@
# These exist entirely to fool aclocal when bootstrapping libtool.
#
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN)
# In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN),
# which have later been changed to m4_define as they aren't part of the
# exported API, or moved to Autoconf or Automake where they belong.
#
......@@ -25,7 +26,7 @@
# included after everything else. This provides aclocal with the
# AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
# because those macros already exist, or will be overwritten later.
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
# We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
#
# Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
# Yes, that means every name once taken will need to remain here until
......
This diff is collapsed.
......@@ -16,6 +16,7 @@ static __device__ __forceinline__ double shfl_xor(double r, int mask)
return __hiloint2double(hi, lo);
}
#if 0
static __device__ __forceinline__ cuDoubleComplex shfl_xor_complex(cuDoubleComplex r, int mask)
{
double real = cuCreal(r) ;
......@@ -35,7 +36,7 @@ static __device__ __forceinline__ cuDoubleComplex shfl_xor_complex(cuDoubleComp
return make_cuDoubleComplex(real, imag);
}
#endif
// Perform the equivalent of "__shfl_down" on an 8-byte value
......
......@@ -67,6 +67,13 @@ module cuda_routines
integer(C_INT) :: istat
end function cuda_setdevice
function cuda_getdevicecount(n) result(istat) &
bind(C, name="cudaGetDeviceCount")
use iso_c_binding
integer, intent(out) :: n
integer(C_INT) :: istat
end function cuda_getdevicecount
function cuda_ProfilerStart() result(istat)&
bind (C, name="cudaProfilerStart")
......
......@@ -149,7 +149,7 @@ program test_complex2
#ifdef WITH_GPU_VERSION
character(len=1024) :: envname
integer :: istat, devnum
integer :: istat, devnum, numdevs
#endif
write_to_file = .false.
......@@ -165,13 +165,23 @@ program test_complex2
! MPI Initialization
call setup_mpi(myid, nprocs)
#ifdef WITH_GPU_VERSION
devnum = 0
istat = cuda_getdevicecount(numdevs)
if (istat .ne. 0) then
print *,"Error in cuda_getdevicecount"
stop
endif
if(myid==0) then
print *
print '(3(a,i0))','Found ', numdevs, ' GPUs'
endif
devnum = mod(myid, numdevs)
istat = cuda_setdevice(devnum)
if (istat .ne. 0) then
print *,"Cannot set CudaDevice"
stop
endif
print '(3(a,i0))', 'MPI rank ', myid, ' uses GPU #', devnum
#endif
STATUS = 0
......@@ -345,8 +355,8 @@ program test_complex2
! Calculate eigenvalues/eigenvectors
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_complex_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
na_rows, na_cols, mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
success = solve_evp_complex_2stage(na, nev, a, na_rows, ev, z, na_rows, na_cols, nblk, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
if (.not.(success)) then
write(error_unit,*) "solve_evp_complex_2stage produced an error! Aborting..."
......
......@@ -143,7 +143,7 @@ program test_real2
#ifdef WITH_GPU_VERSION
character(len=1024) :: envname
integer :: istat, devnum
integer :: istat, devnum, numdevs
#endif
write_to_file = .false.
success = .true.
......@@ -160,13 +160,26 @@ program test_real2
call setup_mpi(myid, nprocs)
#ifdef WITH_GPU_VERSION
devnum = 0
! call getenv("CUDA_PROXY_PIPE_DIRECTORY", envname)
istat = cuda_getdevicecount(numdevs)
if (istat .ne. 0) then
print *,"error in cuda_getdevicecount"
stop
endif
if (myid==0) then
print *
print '(3(a,i0))','Found ', numdevs, ' GPUs'
endif
devnum = mod(myid, numdevs)
istat = cuda_setdevice(devnum)
if (istat .ne. 0) then
print *,"Cannot set CudaDevice"
stop
endif
print '(3(a,i0))', 'MPI rank ', myid, ' uses GPU #', devnum
#endif
STATUS = 0
......@@ -341,7 +354,7 @@ program test_real2
end if
call mpi_barrier(mpi_comm_world, mpierr) ! for correct timings only
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, na_rows, na_cols, &
success = solve_evp_real_2stage(na, nev, a, na_rows, ev, z, na_rows, nblk, &
mpi_comm_rows, mpi_comm_cols, mpi_comm_world)
if (.not.(success)) then
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment