Skip to content
Snippets Groups Projects
Commit 7e9049f3 authored by Max Lindqvist's avatar Max Lindqvist
Browse files

Testing...

parent fad3c356
No related branches found
No related tags found
No related merge requests found
Pipeline #246261 failed
......@@ -72,16 +72,22 @@ echo "=== Running ./hello_openmp_gpu_fortran ==="
echo "=== Loadig environment with pyccel installed ==="
source ~/virtual_envs/env_pyccel/bin/activate
# # ------------ OpenMP + pyccel ------------ #
make clean
unset LD_LIBRARY_PATH
# export LD_LIBRARY_PATH="/usr/lib/llvm-18/lib:$LD_LIBRARY_PATH"
# export LD_LIBRARY_PATH="/mpcdf/soft/RHEL_9/sub/amd-llvm_5_3/modules/libs:$LD_LIBRARY_PATH"
export LD_LIBRARY_PATH="/mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/lib:$LD_LIBRARY_PATH"
echo $LD_LIBRARY_PATH
echo "=== Compiling pyccel_kernels ==="
# pyccel --language c --openmp --compiler compiler_clang_viper.json --verbose pyccel_kernels.py
# pyccel --language fortran --openmp --libdir /mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/lib --compiler compiler_flang_viper.json --verbose pyccel_kernels.py
pyccel --language fortran --openmp --compiler compiler_flang_viper.json --verbose pyccel_kernels.py
# pyccel --language fortran --openmp --verbose pyccel_kernels.py
python hello_openmp_gpu_pyccel.py
# echo "=== Compiling hello_openmp_gpu_pyccel ==="
#pyccel --language c --openmp --compiler compiler_clang_viper.json --verbose hello_openmp_gpu_pyccel.py
echo "=== Running hello_openmp_gpu_pyccel.py ==="
python hello_openmp_gpu_pyccel.py > hello_openmp_gpu_pyccel.out
......
{
"exec": "amdflang",
"mpi_exec": "mpicc",
"exec": "flang",
"mpi_exec": "mpirun",
"language": "fortran",
"module_output_flag": "-J",
"debug_flags": [
......@@ -20,9 +20,15 @@
"openmp": {
"flags": [
"-fopenmp",
"--offload-arch=gfx942"
"--offload-arch=gfx942",
"-L/${AMDLLVM_HOME}/lib/llvm/lib",
"-L/viper/u2/system/soft/RHEL_9/packages/x86_64/gcc/14.1.0/lib64"
],
"libs": []
"libs": [
"-lomptarget",
"-lflang",
"-lgfortran"
]
},
"openacc": {
"flags": [
......@@ -52,6 +58,7 @@
"/mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/include/python3.12",
"/u/maxlin/virtual_envs/env_pyccel/lib/python3.12/site-packages/numpy/_core/include"
],
"libs":[],
"shared_suffix": ".cpython-312-x86_64-linux-gnu.so",
"dependencies": [
"/mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/lib/libpython3.12.so"
......
#--------------------------------------
# CONFIGURATION
#--------------------------------------
PYCCEL = pyccel
COMPILER_JSON = compiler_flang_viper.json
PYTHON_FILE = pyccel_kernels.py
OUTPUT_SO = pyccel_kernels.cpython-312-x86_64-linux-gnu.so
# Default rule: build the shared library
all: $(OUTPUT_SO)
# Rule to run pyccel and generate the shared library
$(OUTPUT_SO): $(PYTHON_FILE)
$(PYCCEL) --language fortran --openmp --compiler $(COMPILER_JSON) --verbose $(PYTHON_FILE)
#--------------------------------------
......@@ -13,3 +24,5 @@ clean:
find ./ -type f -name '*.lock' -delete
find ./ -type f -name '*.so' -delete
find ./ -type f -name '*.o' -delete
.PHONY: all clean
\ No newline at end of file
......@@ -10,104 +10,105 @@ from pyccel.stdlib.internal.openmp import (
def set_pi(pi: float) -> None:
pi = 3.14159
# def print_cpu_gpu_thread_info():
# # Get the number of available GPU devices.
# num_devices = omp_get_num_devices()
# print("Number of available GPUs: ", num_devices)
# print()
# N : int = 10
# temp : int = 0
# data = np.empty((N, 4), dtype=int)
# # CPU
# print('Loop with CPU:')
# #$ omp parallel for
# for i in range(N):
# tid = omp_get_thread_num() # Thread id within the team.
# nthreads = omp_get_num_threads() # Number of threads.
# team = omp_get_team_num() # Team number.
# nteams = omp_get_num_teams()
# data[i, 0] = team
# data[i, 1] = nteams
# data[i, 2] = tid
# data[i, 3] = nthreads
# for i in range(N):
# print("CPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
# print('----------------------------------\n\n\n')
# # GPU
# print('Loop with GPU:')
# #$ omp target teams distribute parallel for schedule(static) private(temp)
# for i in range(N):
# tid = omp_get_thread_num() # Thread id within the team.
# nthreads = omp_get_num_threads() # Number of threads.
# team = omp_get_team_num() # Team number.
# nteams = omp_get_num_teams()
# #print("GPU: Iteration", i, "processed by team", team, "/", nteams, "thread", tid, "/", nthreads, "threads")
# data[i, 0] = team
# data[i, 1] = nteams
# data[i, 2] = tid
# data[i, 3] = nthreads
# temp = temp + 1
# for i in range(N):
# print("GPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
# def axpy(a: 'float', x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# for i in range(N):
# y[i] = a * x[i] + y[i]
# def axpy_gpu(a: float, x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# #$ omp target teams distribute parallel for schedule(static)
# for i in range(N):
# y[i] = a * x[i] + y[i]
# def heavy_compute_cpu(x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# temp: float = 0.0
# for i in range(N):
# temp = x[i]
# # A heavy inner loop to increase arithmetic intensity
# for j in range(1000):
# temp = np.sqrt(temp + 1.0)
# y[i] = temp
# # GPU version: offloaded using OpenMP target directive
# def heavy_compute_gpu(x: 'float[:]', y: 'float[:]'):
# N: int = x.shape[0]
# temp: float = 0.0
# #$ omp target teams distribute parallel for schedule(static)
# for i in range(N):
# temp = x[i]
# for j in range(1000):
# temp = np.sqrt(temp + 1.0)
# y[i] = temp
# def matmul_cpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
# N: int = A.shape[0]
# s: float = 0.0
# for i in range(N):
# for j in range(N):
# s = 0.0
# for k in range(N):
# s += A[i, k] * B[k, j]
# C[i, j] = s
# def matmul_gpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
# N: int = A.shape[0]
# s: float = 0.0
# #$ omp target teams distribute parallel for collapse(2)
# for i in range(N):
# for j in range(N):
# s = 0.0
# for k in range(N):
# s += A[i, k] * B[k, j]
# C[i, j] = s
def print_cpu_gpu_thread_info():
# Get the number of available GPU devices.
num_devices = omp_get_num_devices()
print("Number of available GPUs: ", num_devices)
print()
N : int = 10
temp : int = 0
data = np.empty((N, 4), dtype=int)
# CPU
print('Loop with CPU:')
#$ omp parallel for
for i in range(N):
tid = omp_get_thread_num() # Thread id within the team.
nthreads = omp_get_num_threads() # Number of threads.
team = omp_get_team_num() # Team number.
nteams = omp_get_num_teams()
data[i, 0] = team
data[i, 1] = nteams
data[i, 2] = tid
data[i, 3] = nthreads
temp = temp + 1
for i in range(N):
print("CPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
print('----------------------------------\n\n\n')
# GPU
print('Loop with GPU:')
#$ omp target teams distribute parallel for schedule(static) private(temp)
for i in range(N):
tid = omp_get_thread_num() # Thread id within the team.
nthreads = omp_get_num_threads() # Number of threads.
team = omp_get_team_num() # Team number.
nteams = omp_get_num_teams()
#print("GPU: Iteration", i, "processed by team", team, "/", nteams, "thread", tid, "/", nthreads, "threads")
data[i, 0] = team
data[i, 1] = nteams
data[i, 2] = tid
data[i, 3] = nthreads
temp = temp + 1
for i in range(N):
print("GPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2], "/", data[i, 3])
def axpy(a: 'float', x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
for i in range(N):
y[i] = a * x[i] + y[i]
def axpy_gpu(a: float, x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
#$ omp target teams distribute parallel for schedule(static)
for i in range(N):
y[i] = a * x[i] + y[i]
def heavy_compute_cpu(x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
temp: float = 0.0
for i in range(N):
temp = x[i]
# A heavy inner loop to increase arithmetic intensity
for j in range(1000):
temp = np.sqrt(temp + 1.0)
y[i] = temp
# GPU version: offloaded using OpenMP target directive
def heavy_compute_gpu(x: 'float[:]', y: 'float[:]'):
N: int = x.shape[0]
temp: float = 0.0
#$ omp target teams distribute parallel for schedule(static)
for i in range(N):
temp = x[i]
for j in range(1000):
temp = np.sqrt(temp + 1.0)
y[i] = temp
def matmul_cpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
N: int = A.shape[0]
s: float = 0.0
for i in range(N):
for j in range(N):
s = 0.0
for k in range(N):
s += A[i, k] * B[k, j]
C[i, j] = s
def matmul_gpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
N: int = A.shape[0]
s: float = 0.0
#$ omp target teams distribute parallel for collapse(2)
for i in range(N):
for j in range(N):
s = 0.0
for k in range(N):
s += A[i, k] * B[k, j]
C[i, j] = s
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment