Commit 636a9b07 authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master_pre_stage' into 'master'

Master pre stage

See merge request !74
parents e5cdc060 924b2848
This diff is collapsed.
......@@ -654,6 +654,7 @@ for cc, fc, m, o, p, a, b, g, instr, addr, na in product(
sorted(address_sanitize_flag.keys()),
sorted(matrix_size.keys())):
cov = "no-coverage"
nev = 150
......@@ -744,7 +745,7 @@ for cc, fc, m, o, p, a, b, g, instr, addr, na in product(
continue
#at the moment gpu testing only on AVX machines or minskys
if (g == "with-gpu" and (instr !="avx" and instr !="power8")):
if (g == "with-gpu" and (instr !="avx512" and instr !="power8")):
continue
# #on KNL do only intel tests
......
......@@ -136,7 +136,7 @@ then
if [ "$gpuJob" == "yes" ]
then
cp $HOME/runners/job_script_templates/run_${CLUSTER}_1node_2GPU.sh .
echo "if \[ \$SLURM_PROCID -eq 0 \]" >> ./run_${CLUSTER}_1node_GPU.sh
echo "if \[ \$SLURM_PROCID -eq 0 \]" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "then" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "echo \"process \$SLURM_PROCID running configure\"" >> ./run_${CLUSTER}_1node_2GPU.sh
echo "#decouple from SLURM (maybe this could be removed)" >> ./run_${CLUSTER}_1node_2GPU.sh
......
......@@ -84,6 +84,36 @@ module mod_check_for_gpu
stop
endif
success = .true.
numberOfDevices = -1
#ifdef WITH_NVIDIA_GPU_VERSION
! call getenv("CUDA_PROXY_PIPE_DIRECTORY", envname)
success = cuda_getdevicecount(numberOfDevices)
#endif
#ifdef WITH_AMD_GPU_VERSION
! call getenv("CUDA_PROXY_PIPE_DIRECTORY", envname)
success = hip_getdevicecount(numberOfDevices)
#endif
if (.not.(success)) then
#ifdef WITH_NVIDIA_GPU_VERSION
print *,"error in cuda_getdevicecount"
#endif
#ifdef WITH_AMD_GPU_VERSION
print *,"error in hip_getdevicecount"
#endif
stop 1
endif
#ifdef WITH_INTEL_GPU_VERSION
gpuAvailable = .false.
numberOfDevices = -1
numberOfDevices = 1
print *,"Manually setting",numberOfDevices," of GPUs"
if (numberOfDevices .ge. 1) then
gpuAvailable = .true.
endif
#endif
if (obj%is_set("use_gpu_id") == 1) then
call obj%get("use_gpu_id", use_gpu_id, error)
if (use_gpu_id == -99) then
......@@ -110,6 +140,12 @@ module mod_check_for_gpu
endif
endif
if (use_gpu_id+1 .gt. numberOfDevices) then
print *,"Task=",myid," wants to use GPU id=",use_gpu_id," allowed (0:#GPUs-1)"
print *,"However, there are only ",numberOfDevices," on the node"
stop 1
endif
success = .true.
#ifdef WITH_NVIDIA_GPU_VERSION
success = cuda_setdevice(use_gpu_id)
......
......@@ -280,7 +280,7 @@ module gpu_c_kernel
call launch_my_pack_c_cuda_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, stripe_count, l_nev, &
a_dev, row_group_dev)
#endif
#ifdef WITH_NVIDIA_GPU_VERSION
#ifdef WITH_AMD_GPU_VERSION
call launch_my_pack_c_hip_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, stripe_count, l_nev, &
a_dev, row_group_dev)
#endif
......
......@@ -689,7 +689,8 @@ program test
#if (TEST_GPU_SET_ID == 1) && (TEST_INTEL_GPU == 0)
! simple test
! Can (and should) fail often
gpuID = mod(myid,2)
!gpuID = mod(myid,2)
gpuID = mod(myid,1)
print *,"Task",myid,"wants to use GPU",gpuID
call e%set("use_gpu_id", int(gpuID,kind=c_int), error_elpa)
assert_elpa_ok(error_elpa)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment