Testing...

7e9049f3 · Max Lindqvist · fad3c356 · 7e9049f3 · 7e9049f3 · 7e9049f3
Commit 7e9049f3 authored 4 months ago by Max Lindqvist
--- a/2025-ACH-STRUPHY/00_GPU_TESTS/slurm_viper_gpu_check.sh
+++ b/2025-ACH-STRUPHY/00_GPU_TESTS/slurm_viper_gpu_check.sh
@@ -72,16 +72,22 @@ echo "=== Running ./hello_openmp_gpu_fortran ==="
 echo "=== Loadig environment with pyccel installed ==="
 source ~/virtual_envs/env_pyccel/bin/activate

-
 # # ------------ OpenMP + pyccel ------------ #
+make clean
+unset LD_LIBRARY_PATH
 # export LD_LIBRARY_PATH="/usr/lib/llvm-18/lib:$LD_LIBRARY_PATH"
 # export LD_LIBRARY_PATH="/mpcdf/soft/RHEL_9/sub/amd-llvm_5_3/modules/libs:$LD_LIBRARY_PATH"
+export LD_LIBRARY_PATH="/mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/lib:$LD_LIBRARY_PATH"
+echo $LD_LIBRARY_PATH

 echo "=== Compiling pyccel_kernels ==="
+# pyccel --language c       --openmp --compiler compiler_clang_viper.json --verbose pyccel_kernels.py
+# pyccel --language fortran --openmp --libdir /mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/lib --compiler compiler_flang_viper.json --verbose pyccel_kernels.py
 pyccel --language fortran --openmp --compiler compiler_flang_viper.json --verbose pyccel_kernels.py
+# pyccel --language fortran --openmp --verbose pyccel_kernels.py
+python hello_openmp_gpu_pyccel.py
+

-# echo "=== Compiling hello_openmp_gpu_pyccel ==="
-#pyccel --language c --openmp --compiler compiler_clang_viper.json --verbose hello_openmp_gpu_pyccel.py

 echo "=== Running hello_openmp_gpu_pyccel.py ==="
 python hello_openmp_gpu_pyccel.py > hello_openmp_gpu_pyccel.out

--- a/2025-ACH-STRUPHY/00_GPU_TESTS/tests/compiler_flang_viper.json
+++ b/2025-ACH-STRUPHY/00_GPU_TESTS/tests/compiler_flang_viper.json
 {
-    "exec": "amdflang",
-    "mpi_exec": "mpicc",
+    "exec": "flang",
+    "mpi_exec": "mpirun",
    "language": "fortran",
    "module_output_flag": "-J",
    "debug_flags": [
@@ -20,9 +20,15 @@
    "openmp": {
        "flags": [
            "-fopenmp",
-            "--offload-arch=gfx942"
+            "--offload-arch=gfx942",
+            "-L/${AMDLLVM_HOME}/lib/llvm/lib",
+            "-L/viper/u2/system/soft/RHEL_9/packages/x86_64/gcc/14.1.0/lib64"
        ],
-        "libs": []
+        "libs": [
+            "-lomptarget",
+            "-lflang",
+            "-lgfortran"
+        ]
    },
    "openacc": {
        "flags": [
@@ -52,6 +58,7 @@
            "/mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/include/python3.12",
            "/u/maxlin/virtual_envs/env_pyccel/lib/python3.12/site-packages/numpy/_core/include"
        ],
+        "libs":[],
        "shared_suffix": ".cpython-312-x86_64-linux-gnu.so",
        "dependencies": [
            "/mpcdf/soft/RHEL_9/packages/x86_64/python-waterboa/2024.06/lib/libpython3.12.so"

--- a/2025-ACH-STRUPHY/00_GPU_TESTS/tests/makefile
+++ b/2025-ACH-STRUPHY/00_GPU_TESTS/tests/makefile
 #--------------------------------------
 # CONFIGURATION
 #--------------------------------------
+PYCCEL = pyccel
+COMPILER_JSON = compiler_flang_viper.json
+PYTHON_FILE = pyccel_kernels.py
+OUTPUT_SO = pyccel_kernels.cpython-312-x86_64-linux-gnu.so
+
+# Default rule: build the shared library
+all: $(OUTPUT_SO)
+
+# Rule to run pyccel and generate the shared library
+$(OUTPUT_SO): $(PYTHON_FILE)
+	$(PYCCEL) --language fortran --openmp --compiler $(COMPILER_JSON) --verbose $(PYTHON_FILE)


 #--------------------------------------
@@ -13,3 +24,5 @@ clean:
 	find ./ -type f -name '*.lock' -delete
 	find ./ -type f -name '*.so' -delete
 	find ./ -type f -name '*.o' -delete
+
+.PHONY: all clean
\ No newline at end of file
--- a/2025-ACH-STRUPHY/00_GPU_TESTS/tests/pyccel_kernels.py
+++ b/2025-ACH-STRUPHY/00_GPU_TESTS/tests/pyccel_kernels.py
@@ -10,104 +10,105 @@ from pyccel.stdlib.internal.openmp import (
 def set_pi(pi: float) -> None:
    pi = 3.14159

-# def print_cpu_gpu_thread_info():
-#     # Get the number of available GPU devices.
-#     num_devices = omp_get_num_devices()
-#     print("Number of available GPUs: ", num_devices)
-#     print()
-
-#     N : int = 10
-#     temp : int = 0
-#     data = np.empty((N, 4), dtype=int)
-#     # CPU
-#     print('Loop with CPU:')
-#     #$ omp parallel for 
-#     for i in range(N):
-#         tid = omp_get_thread_num()          # Thread id within the team.
-#         nthreads = omp_get_num_threads()    # Number of threads.
-#         team = omp_get_team_num()           # Team number.
-#         nteams = omp_get_num_teams()
-
-#         data[i, 0] = team
-#         data[i, 1] = nteams
-#         data[i, 2] = tid
-#         data[i, 3] = nthreads
-    
-#     for i in range(N):
-#         print("CPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2],  "/", data[i, 3])
-
-#     print('----------------------------------\n\n\n')
-#     # GPU
-#     print('Loop with GPU:')
-    
-#     #$ omp target teams distribute parallel for schedule(static) private(temp)
-#     for i in range(N):
-#         tid = omp_get_thread_num()          # Thread id within the team.
-#         nthreads = omp_get_num_threads()    # Number of threads.
-#         team = omp_get_team_num()           # Team number.
-#         nteams = omp_get_num_teams()
-#         #print("GPU: Iteration", i, "processed by team", team, "/", nteams, "thread", tid,  "/", nthreads, "threads")
-#         data[i, 0] = team
-#         data[i, 1] = nteams
-#         data[i, 2] = tid
-#         data[i, 3] = nthreads
-#         temp = temp + 1
-
-    
-#     for i in range(N):
-#         print("GPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2],  "/", data[i, 3])
-
-# def axpy(a: 'float', x: 'float[:]', y: 'float[:]'):
-#     N: int = x.shape[0]
-#     for i in range(N):
-#         y[i] = a * x[i] + y[i]
-
-# def axpy_gpu(a: float, x: 'float[:]', y: 'float[:]'):
-#     N: int = x.shape[0]
-#     #$ omp target teams distribute parallel for schedule(static)
-#     for i in range(N):
-#         y[i] = a * x[i] + y[i]
-            
-# def heavy_compute_cpu(x: 'float[:]', y: 'float[:]'):
-#     N: int = x.shape[0]
-#     temp: float = 0.0
-#     for i in range(N):
-#         temp = x[i]
-#         # A heavy inner loop to increase arithmetic intensity
-#         for j in range(1000):
-#             temp = np.sqrt(temp + 1.0)
-#         y[i] = temp
-
-# # GPU version: offloaded using OpenMP target directive
-# def heavy_compute_gpu(x: 'float[:]', y: 'float[:]'):
-#     N: int = x.shape[0]
-#     temp: float = 0.0
-#     #$ omp target teams distribute parallel for schedule(static)
-#     for i in range(N):
-#         temp = x[i]
-#         for j in range(1000):
-#             temp =  np.sqrt(temp + 1.0)
-#         y[i] = temp
-
-# def matmul_cpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
-#     N: int = A.shape[0]
-#     s: float = 0.0
-#     for i in range(N):
-#         for j in range(N):
-#             s = 0.0
-#             for k in range(N):
-#                 s += A[i, k] * B[k, j]
-#             C[i, j] = s
-
-# def matmul_gpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
-#     N: int = A.shape[0]
-#     s: float = 0.0
-#     #$ omp target teams distribute parallel for collapse(2)
-#     for i in range(N):
-#         for j in range(N):
-#             s = 0.0
-#             for k in range(N):
-#                 s += A[i, k] * B[k, j] 
-#             C[i, j] = s
+def print_cpu_gpu_thread_info():
+    # Get the number of available GPU devices.
+    num_devices = omp_get_num_devices()
+    print("Number of available GPUs: ", num_devices)
+    print()
+
+    N : int = 10
+    temp : int = 0
+    data = np.empty((N, 4), dtype=int)
+    # CPU
+    print('Loop with CPU:')
+    #$ omp parallel for 
+    for i in range(N):
+        tid = omp_get_thread_num()          # Thread id within the team.
+        nthreads = omp_get_num_threads()    # Number of threads.
+        team = omp_get_team_num()           # Team number.
+        nteams = omp_get_num_teams()
+
+        data[i, 0] = team
+        data[i, 1] = nteams
+        data[i, 2] = tid
+        data[i, 3] = nthreads
+        temp = temp + 1
+    
+    for i in range(N):
+        print("CPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2],  "/", data[i, 3])
+
+    print('----------------------------------\n\n\n')
+    # GPU
+    print('Loop with GPU:')
+    
+    #$ omp target teams distribute parallel for schedule(static) private(temp)
+    for i in range(N):
+        tid = omp_get_thread_num()          # Thread id within the team.
+        nthreads = omp_get_num_threads()    # Number of threads.
+        team = omp_get_team_num()           # Team number.
+        nteams = omp_get_num_teams()
+        #print("GPU: Iteration", i, "processed by team", team, "/", nteams, "thread", tid,  "/", nthreads, "threads")
+        data[i, 0] = team
+        data[i, 1] = nteams
+        data[i, 2] = tid
+        data[i, 3] = nthreads
+        temp = temp + 1
+
+    
+    for i in range(N):
+        print("GPU: Iteration", i, "processed by team", data[i, 0], "/", data[i, 1], ", thread", data[i, 2],  "/", data[i, 3])
+
+def axpy(a: 'float', x: 'float[:]', y: 'float[:]'):
+    N: int = x.shape[0]
+    for i in range(N):
+        y[i] = a * x[i] + y[i]
+
+def axpy_gpu(a: float, x: 'float[:]', y: 'float[:]'):
+    N: int = x.shape[0]
+    #$ omp target teams distribute parallel for schedule(static)
+    for i in range(N):
+        y[i] = a * x[i] + y[i]
+            
+def heavy_compute_cpu(x: 'float[:]', y: 'float[:]'):
+    N: int = x.shape[0]
+    temp: float = 0.0
+    for i in range(N):
+        temp = x[i]
+        # A heavy inner loop to increase arithmetic intensity
+        for j in range(1000):
+            temp = np.sqrt(temp + 1.0)
+        y[i] = temp
+
+# GPU version: offloaded using OpenMP target directive
+def heavy_compute_gpu(x: 'float[:]', y: 'float[:]'):
+    N: int = x.shape[0]
+    temp: float = 0.0
+    #$ omp target teams distribute parallel for schedule(static)
+    for i in range(N):
+        temp = x[i]
+        for j in range(1000):
+            temp =  np.sqrt(temp + 1.0)
+        y[i] = temp
+
+def matmul_cpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
+    N: int = A.shape[0]
+    s: float = 0.0
+    for i in range(N):
+        for j in range(N):
+            s = 0.0
+            for k in range(N):
+                s += A[i, k] * B[k, j]
+            C[i, j] = s
+
+def matmul_gpu(A: 'float[:,:]', B: 'float[:,:]', C: 'float[:,:]'):
+    N: int = A.shape[0]
+    s: float = 0.0
+    #$ omp target teams distribute parallel for collapse(2)
+    for i in range(N):
+        for j in range(N):
+            s = 0.0
+            for k in range(N):
+                s += A[i, k] * B[k, j] 
+            C[i, j] = s