Skip to content
Snippets Groups Projects
Commit 411e07e1 authored by Max Lindqvist's avatar Max Lindqvist
Browse files

Fixed maxlinpc_gpu_check.sh

parent 995e2c16
Branches
Tags
No related merge requests found
Pipeline #251510 failed
......@@ -47,50 +47,55 @@ cd $FOLDER_NAME
# ------------ HIP ------------ #
echo "=== Compiling hello_hip_gpu ==="
#hipcc -o hello_hip_gpu hello_hip_gpu.cpp
# hipcc -o cpp/hello_hip_gpu cpp/hello_hip_gpu.cpp
echo "=== Running ./hello_hip_gpu ==="
#./hello_hip_gpu > hello_hip_gpu.out
# ./cpp/hello_hip_gpu > cpp/hello_hip_gpu.out
# ------------ CUDA ------------ #
echo "=== Compiling hello_cuda_gpu ==="
nvcc -o hello_cuda_gpu hello_cuda_gpu.c
nvcc -o cuda/hello_cuda cuda/hello.cu
echo "=== Running ./hello_cuda_gpu ==="
./hello_cuda_gpu > hello_cuda_gpu.out
echo "=== Running ./cuda/hello_cuda_gpu ==="
./cuda/hello_cuda > cuda/hello.out
# ------------ OpenMP ------------ #
echo "=== Compiling hello_openmp_gpu_c ==="
# nvcc -Xcompiler -fopenmp hello_openmp_gpu.c -o hello_openmp_gpu
clang -fopenmp -fopenmp-targets=nvptx64 -L/usr/lib/llvm-18/lib -Wl,-rpath,/usr/lib/llvm-18/lib -o hello_openmp_gpu_c hello_openmp_gpu_c.c
clang -fopenmp -fopenmp-targets=nvptx64 -L/usr/lib/llvm-18/lib -Wl,-rpath,/usr/lib/llvm-18/lib -o C/hello_openmp_gpu C/hello_openmp_gpu.c
clang -fopenmp -fopenmp-targets=nvptx64 -L/usr/lib/llvm-18/lib -Wl,-rpath,/usr/lib/llvm-18/lib -o C/matmul_openmp_gpu C/matmul_openmp_gpu.c
echo "=== Running ./hello_openmp_gpu_c ==="
./hello_openmp_gpu_c > hello_openmp_gpu_c.out
./C/hello_openmp_gpu > C/hello_openmp_gpu.out
# ./C/matmul_openmp_gpu > ./C/matmul_openmp_gpu.out
echo "=== Compiling hello_openmp_gpu_fortran ==="
nvfortran -gpu=cc75 -O3 -fast -acc -mp=gpu -o hello_openmp_gpu_fortran hello_openmp_gpu_fortran.f90
nvfortran -gpu=cc75 -O3 -fast -acc -mp=gpu -o fortran/hello_openmp_gpu_fortran fortran/hello_openmp_gpu_fortran.f90
echo "=== Running ./fortran/hello_openmp_gpu_fortran ==="
./fortran/hello_openmp_gpu_fortran > fortran/hello_openmp_gpu_fortran.out
echo "=== Running ./hello_openmp_gpu_fortran ==="
./hello_openmp_gpu_fortran > hello_openmp_gpu_fortran.out
# # ------------ OpenMP + pyccel ------------ #
export LD_LIBRARY_PATH="/usr/lib/llvm-18/lib:$LD_LIBRARY_PATH"
echo "=== Compiling hello_openmp_gpu_pyccel ==="
pyccel --language c --openmp --compiler compiler_clang_maxlinpc.json --verbose hello_openmp_gpu_pyccel.py
#pyccel --language c --openmp --compiler compiler_clang_maxlinpc.json --verbose hello_openmp_gpu_pyccel.py
#pyccel --language fortran --openmp --compiler compiler_nvfortran_maxlinpc.json --verbose hello_openmp_gpu_pyccel.py
echo "=== Running ./hello_openmp_gpu_pyccel ==="
nsys profile ./hello_openmp_gpu_pyccel > hello_openmp_gpu_pyccel.out
#echo "=== Running ./hello_openmp_gpu_pyccel ==="
#nsys profile ./hello_openmp_gpu_pyccel > hello_openmp_gpu_pyccel.out
echo "=== Compiling pyccel_kernels ==="
pyccel --language fortran --openmp --compiler compiler_nvfortran_maxlinpc.json --verbose pyccel_kernels.py
echo "=== Running compute_gpu_pyccel.py ==="
#echo "=== Compiling pyccel_kernels ==="
#pyccel --language fortran --openmp --compiler compiler_nvfortran_maxlinpc.json --verbose pyccel_kernels.py
#echo "=== Running compute_gpu_pyccel.py ==="
# python compute_gpu_pyccel.py
nsys profile --stats=true python compute_gpu_pyccel.py
#nsys profile --stats=true python compute_gpu_pyccel.py
# ------------ Cleanup ------------ #
......@@ -16,7 +16,8 @@ all: $(OUTPUT_SO)
# Rule to run pyccel and generate the shared library
$(OUTPUT_SO): $(PYTHON_FILE)
$(PYCCEL) --language $(LANGUAGE) --openmp --compiler $(COMPILER) --libdir $(LIBDIR) --verbose $(PYTHON_FILE)
# $(PYCCEL) --language $(LANGUAGE) --openmp --compiler $(COMPILER) --libdir $(LIBDIR) --verbose $(PYTHON_FILE)
$(PYCCEL) --language $(LANGUAGE) --openmp --compiler $(COMPILER) --verbose $(PYTHON_FILE)
#--------------------------------------
......
......
......@@ -10,86 +10,86 @@ from pyccel.stdlib.internal.openmp import (
def set_pi(pi: float) -> None:
pi = 3.14159
# def print_cpu_gpu_thread_info(
# N : int,
# data : 'int[:,:]',
# ):
# # Get the number of available GPU devices.
# num_devices = omp_get_num_devices()
# print("Number of available GPUs: ", num_devices)
# temp : int = 0
# # data[:,:] = np.empty((N, 4), dtype=int)
# # CPU
# print('Loop with CPU:')
# #$ omp parallel for
# for i in range(N):
# tid = omp_get_thread_num() # Thread id within the team.
# nthreads = omp_get_num_threads() # Number of threads.
# team = omp_get_team_num() # Team number.
# nteams = omp_get_num_teams()
def print_cpu_gpu_thread_info(
N : int,
data : 'int[:,:]',
):
# Get the number of available GPU devices.
num_devices = omp_get_num_devices()
print("Number of available GPUs: ", num_devices)
temp : int = 0
# data[:,:] = np.empty((N, 4), dtype=int)
# CPU
print('Loop with CPU:')
#$ omp parallel for
for i in range(N):
tid = omp_get_thread_num() # Thread id within the team.
nthreads = omp_get_num_threads() # Number of threads.
team = omp_get_team_num() # Team number.
nteams = omp_get_num_teams()
data[i, 0] = team
data[i, 1] = nteams
data[i, 2] = tid
data[i, 3] = nthreads
temp = temp + 1
for i in range(N):
print("CPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
print('----------------------------------\n\n\n')
# GPU
print('Loop with GPU:')
#
#$ omp target teams distribute parallel for schedule(static) private(temp)
for i in range(N):
tid = omp_get_thread_num() # Thread id within the team.
nthreads = omp_get_num_threads() # Number of threads.
team = omp_get_team_num() # Team number.
nteams = omp_get_num_teams()
# print("GPU: Iteration", i, "processed by team", team, "/", nteams, "thread", tid, "/", nthreads, "threads")
# data[i, 0] = team
# data[i, 1] = nteams
# data[i, 2] = tid
# data[i, 3] = nthreads
# temp = temp + 1
# for i in range(N):
# print("CPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
# print('----------------------------------\n\n\n')
# # GPU
# print('Loop with GPU:')
# #
# #$ omp target teams distribute parallel for schedule(static) private(temp)
# for i in range(N):
# tid = omp_get_thread_num() # Thread id within the team.
# nthreads = omp_get_num_threads() # Number of threads.
# team = omp_get_team_num() # Team number.
# nteams = omp_get_num_teams()
# # print("GPU: Iteration", i, "processed by team", team, "/", nteams, "thread", tid, "/", nthreads, "threads")
# # data[i, 0] = team
# # data[i, 1] = nteams
# # data[i, 2] = tid
# # data[i, 3] = nthreads
# # temp = temp + 1
# # for i in range(N):
# # print("GPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
# print("GPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
def axpy(a: 'float', x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
for i in range(N):
y[i] = a * x[i] + y[i]
# def axpy_gpu(a: float, x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# #$ omp target teams distribute parallel for schedule(static)
# for i in range(N):
# y[i] = a * x[i] + y[i]
def axpy_gpu(a: float, x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
#$ omp target teams distribute parallel for schedule(static)
for i in range(N):
y[i] = a * x[i] + y[i]
# def heavy_compute_cpu(x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# temp: float = 0.0
# for i in range(N):
# temp = x[i]
# # A heavy inner loop to increase arithmetic intensity
# for j in range(1000):
# temp = np.sqrt(temp + 1.0)
# y[i] = temp
# # GPU version: offloaded using OpenMP target directive
# def heavy_compute_gpu(x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# temp: float = 0.0
# #$ omp target teams distribute parallel for schedule(static)
# for i in range(N):
# temp = x[i]
# for j in range(1000):
# temp = np.sqrt(temp + 1.0)
# y[i] = temp
def heavy_compute_cpu(x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
temp: float = 0.0
for i in range(N):
temp = x[i]
# A heavy inner loop to increase arithmetic intensity
for j in range(1000):
temp = np.sqrt(temp + 1.0)
y[i] = temp
# GPU version: offloaded using OpenMP target directive
def heavy_compute_gpu(x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
temp: float = 0.0
#$ omp target teams distribute parallel for schedule(static)
for i in range(N):
temp = x[i]
for j in range(1000):
temp = np.sqrt(temp + 1.0)
y[i] = temp
def matmul_cpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
N: int = A.shape[0]
......@@ -101,16 +101,16 @@ def matmul_cpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
s += A[i, k] * B[k, j]
C[i, j] = s
# def matmul_gpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
# N: int = A.shape[0]
# s: float = 0.0
# #$ omp target teams distribute parallel for collapse(2) map(tofrom:C)
# for i in range(N):
# for j in range(N):
# s = 0.0
# for k in range(N):
# s += A[i, k] * B[k, j]
# C[i, j] = s
def matmul_gpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
N: int = A.shape[0]
s: float = 0.0
#$ omp target teams distribute parallel for collapse(2) map(tofrom:C)
for i in range(N):
for j in range(N):
s = 0.0
for k in range(N):
s += A[i, k] * B[k, j]
C[i, j] = s
def test_matmul():
......
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment