From 751aedeba15a0018924c8baf4de90b11fc282d48 Mon Sep 17 00:00:00 2001 From: Repo Updater <noreply@mpcdf.mpg.de> Date: Wed, 6 Oct 2021 07:08:16 +0200 Subject: [PATCH] 62e5e3b4 some adaptations of Diffusion and Cython --- notebooks/2b--Cython.ipynb | 53 ++++++++------ notebooks/2d--Diffusion.ipynb | 133 ++++++++++++++++++---------------- 2 files changed, 102 insertions(+), 84 deletions(-) diff --git a/notebooks/2b--Cython.ipynb b/notebooks/2b--Cython.ipynb index 0dc9768..647474c 100644 --- a/notebooks/2b--Cython.ipynb +++ b/notebooks/2b--Cython.ipynb @@ -11,7 +11,7 @@ "# Cython\n", "**Python for HPC course**\n", "\n", - "Sebastian Ohlmann, Klaus Reuter\n", + "2018 - 2021 Sebastian Ohlmann, Klaus Reuter\n", "\n", "Max Planck Computing and Data Facility, Garching" ] @@ -45,16 +45,16 @@ } }, "source": [ - "### Cython Overview\n", - "* \"Cython is Python with C data types\"\n", - " * Cython is a superset of the Python language\n", - " * Cython is a source-to-source compiler\n", + "### Cython?\n", "\n", + "* Cython is Python extended with C data types $\\to$ Cython is a superset of the Python language\n", + "* Cython is a source-to-source compiler\n", "\n", - "### Cython Workflow\n", - "1. move performance-critical code from Python to Cython (`.pyx`)\n", - "2. Cython compiler translates `.pyx` code into `.c` code\n", - "3. C code is finally compiled into a Python module (Linux `.so`) by a C compiler (e.g. `gcc`)\n" + "### Workflow\n", + "\n", + "1. create Cython source file (`.pyx`), e.g. by moving performance-critical code from Python to Cython\n", + "2. apply Cython compiler which translates `.pyx` code into `.c` code\n", + "3. C code is finally compiled into a Python module (Linux `.so`) by a C compiler (e.g. `gcc`)\n" ] }, { @@ -65,8 +65,9 @@ } }, "source": [ - "### Cython Advantages\n", - "* achieve performance close to native C/C++ or Fortran code\n", + "### Advantages\n", + "\n", + "* achieve performance close to native C/C++ or Fortran code while keeping Python-like code\n", " * Cython code is compiled, not interpreted\n", " * compiler optimizations can be applied (e.g. vectorization)\n", " * OpenMP thread parallelization becomes possible\n", @@ -82,9 +83,10 @@ } }, "source": [ - "### Cython Compilation\n", - "* in principle, shell scripts or Makefiles can be used, but avoid this\n", - "* better: use a simple `setup.py` to compile your Cython code reliably\n", + "### Compilation\n", + "\n", + "* in principle, shell scripts or Makefiles can be used (but avoid this)\n", + "* better: use a simple `setup.py` script to compile and install your Cython code properly\n", "* see the simple example at `cython/hello_world`" ] }, @@ -157,10 +159,11 @@ } }, "source": [ - "### Integration with Jupyter notebooks\n", - "* Cython code can be compiled and used directly from a Jupyter notebook (`%load_ext Cython`)\n", - "* use the cell magic `%%cython` to compile a Jupyter cell\n", - "* using `-c=` is is possible to specify compiler optimization flags" + "### Cython integration with Jupyter notebooks\n", + "\n", + "* Cython code can be compiled and used directly from a Jupyter notebook after loading (`%load_ext Cython`)\n", + "* use the cell magic `%%cython` to compile a Jupyter cell with Cython code\n", + "* using `-c=` allows to specify compiler optimization flags, for linker flags use `--link-args`" ] }, { @@ -356,7 +359,7 @@ "source": [ "## Interfacing C/C++ code with Cython\n", "\n", - "### $\\rightarrow$ see `Interfacing_with_C_and_F.ipynb`" + "### $\\rightarrow$ continue with the `Interfacing_with_C_and_F.ipynb` notebook" ] }, { @@ -368,9 +371,10 @@ }, "source": [ "## Cython summary\n", + "\n", "* Cython speeds up Python code by converting it into C and compiling it\n", "* Workflow\n", - " * start with existing Python code, move it into `.pyx` file, create basic `setup.py`\n", + " * start with existing (critical) Python code, move it to `.pyx` file, create basic `setup.py`\n", " * introduce basic type declarations, e.g. `cdef int a`\n", " * introduce NumPy array declarations, e.g. \n", " `np.ndarray[np.float64_t, ndim=2] grid`\n", @@ -380,6 +384,13 @@ " * http://cython.org/ for in-depth information, in particular\n", " * http://cython.readthedocs.io/en/latest/src/tutorial/numpy.html" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -399,7 +410,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/notebooks/2d--Diffusion.ipynb b/notebooks/2d--Diffusion.ipynb index c152ebe..6d75981 100644 --- a/notebooks/2d--Diffusion.ipynb +++ b/notebooks/2d--Diffusion.ipynb @@ -246,8 +246,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 192 ms, sys: 1.37 ms, total: 194 ms\n", - "Wall time: 192 ms\n" + "CPU times: user 192 ms, sys: 3.83 ms, total: 196 ms\n", + "Wall time: 195 ms\n" ] } ], @@ -299,8 +299,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.32 ms, sys: 67 µs, total: 4.39 ms\n", - "Wall time: 3.41 ms\n" + "CPU times: user 4.39 ms, sys: 29 µs, total: 4.42 ms\n", + "Wall time: 3.51 ms\n" ] } ], @@ -339,8 +339,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.9 ms, sys: 0 ns, total: 2.9 ms\n", - "Wall time: 2.38 ms\n" + "CPU times: user 4.59 ms, sys: 48 µs, total: 4.64 ms\n", + "Wall time: 3.64 ms\n" ] } ], @@ -362,8 +362,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 14.3 s, sys: 4.85 s, total: 19.1 s\n", - "Wall time: 19.1 s\n" + "CPU times: user 13.7 s, sys: 5.24 s, total: 19 s\n", + "Wall time: 19 s\n" ] } ], @@ -481,8 +481,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 12.1 s, sys: 3.87 ms, total: 12.1 s\n", - "Wall time: 12.1 s\n" + "CPU times: user 12.2 s, sys: 5.63 ms, total: 12.2 s\n", + "Wall time: 12.2 s\n" ] } ], @@ -553,8 +553,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 12 s, sys: 7.75 ms, total: 12 s\n", - "Wall time: 12 s\n" + "CPU times: user 12.5 s, sys: 18.8 ms, total: 12.5 s\n", + "Wall time: 12.5 s\n" ] } ], @@ -633,8 +633,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 1.72 ms, sys: 210 µs, total: 1.93 ms\n", - "Wall time: 1.93 ms\n" + "CPU times: user 694 µs, sys: 90 µs, total: 784 µs\n", + "Wall time: 788 µs\n" ] } ], @@ -656,8 +656,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 297 µs, sys: 36 µs, total: 333 µs\n", - "Wall time: 243 µs\n" + "CPU times: user 228 µs, sys: 19 µs, total: 247 µs\n", + "Wall time: 185 µs\n" ] } ], @@ -698,8 +698,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 9.91 s, sys: 11.8 ms, total: 9.92 s\n", - "Wall time: 9.92 s\n" + "CPU times: user 10.1 s, sys: 19.3 ms, total: 10.1 s\n", + "Wall time: 10.1 s\n" ] } ], @@ -770,8 +770,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 16.5 s, sys: 3.72 ms, total: 16.5 s\n", - "Wall time: 16.5 s\n" + "CPU times: user 16.1 s, sys: 7.69 ms, total: 16.1 s\n", + "Wall time: 16.1 s\n" ] } ], @@ -891,8 +891,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.48 ms, sys: 183 µs, total: 2.66 ms\n", - "Wall time: 2.05 ms\n" + "CPU times: user 2.58 ms, sys: 0 ns, total: 2.58 ms\n", + "Wall time: 2.03 ms\n" ] } ], @@ -4726,8 +4726,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.12 s, sys: 368 µs, total: 2.12 s\n", - "Wall time: 2.12 s\n" + "CPU times: user 1.93 s, sys: 4 ms, total: 1.93 s\n", + "Wall time: 1.93 s\n" ] } ], @@ -4831,8 +4831,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3.37 s, sys: 22 µs, total: 3.37 s\n", - "Wall time: 3.37 s\n" + "CPU times: user 3.36 s, sys: 54 µs, total: 3.37 s\n", + "Wall time: 3.36 s\n" ] } ], @@ -4878,7 +4878,7 @@ "output_type": "stream", "text": [ "rm -f diff_mpi.exe diff.exe *.o *.mod\n", - "gfortran -O3 -march=native -fopt-info-vec -fopenmp -fno-strict-aliasing -o diff.exe diff.F90\n", + "f77 -O3 -march=native -fopt-info-vec -fopenmp -o diff.exe diff.F90\n", "diff.F90:30:0: optimized: loop vectorized using 32 byte vectors\n", "diff.F90:30:0: optimized: loop versioned for vectorization because of possible aliasing\n", "diff.F90:19:0: optimized: loop vectorized using 32 byte vectors\n", @@ -4931,7 +4931,7 @@ " 90 %\n", " 95 %\n", " 100 %\n", - " main loop time = 1.2827631459999793 \n" + " main loop time = 1.2748106609797105 \n" ] } ], @@ -5046,8 +5046,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2.19 s, sys: 7.93 ms, total: 2.2 s\n", - "Wall time: 1.11 s\n" + "CPU times: user 2.07 s, sys: 434 µs, total: 2.07 s\n", + "Wall time: 1.04 s\n" ] } ], @@ -5096,7 +5096,7 @@ " 90 %\n", " 95 %\n", " 100 %\n", - " main loop time = 1.2201892590001080 \n" + " main loop time = 1.1477611069567502 \n" ] } ], @@ -5130,7 +5130,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 63, "metadata": { "slideshow": { "slide_type": "-" @@ -5143,7 +5143,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 64, "metadata": { "slideshow": { "slide_type": "subslide" @@ -5151,7 +5151,7 @@ }, "outputs": [], "source": [ - "@jit(nopython=True, fastmath=True)\n", + "@jit(nopython=True)\n", "def apply_periodic_bc_python_numba(grid, n_points):\n", " \"\"\"Explicitly apply periodic boundary conditions, via Python loops.\"\"\"\n", " for j in range(n_points + 2):\n", @@ -5164,7 +5164,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 65, "metadata": { "slideshow": { "slide_type": "-" @@ -5172,7 +5172,7 @@ }, "outputs": [], "source": [ - "@jit(nopython=True, fastmath=True) # parallel=True turns out to slow down the code a bit\n", + "@jit(nopython=True)\n", "def evolve_python_numba(grid, grid_tmp, n_points, dt, D):\n", " apply_periodic_bc_python_numba(grid, n_points)\n", " for i in range(1, n_points+1):\n", @@ -5185,7 +5185,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 66, "metadata": { "scrolled": true, "slideshow": { @@ -5200,7 +5200,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 67, "metadata": { "scrolled": true, "slideshow": { @@ -5212,8 +5212,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3.91 s, sys: 3.84 ms, total: 3.91 s\n", - "Wall time: 3.91 s\n" + "CPU times: user 3.69 s, sys: 271 µs, total: 3.69 s\n", + "Wall time: 3.7 s\n" ] } ], @@ -5225,7 +5225,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 68, "metadata": { "slideshow": { "slide_type": "subslide" @@ -5233,7 +5233,7 @@ }, "outputs": [], "source": [ - "@jit(nopython=True, fastmath=True, parallel=True)\n", + "@jit(nopython=True, parallel=True)\n", "def evolve_python_numba_parallel(grid, grid_tmp, n_points, dt, D):\n", " apply_periodic_bc_python_numba(grid, n_points)\n", " for i in prange(1, n_points+1):\n", @@ -5246,7 +5246,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 69, "metadata": { "slideshow": { "slide_type": "fragment" @@ -5260,7 +5260,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 70, "metadata": { "slideshow": { "slide_type": "-" @@ -5271,8 +5271,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 3.98 s, sys: 126 ms, total: 4.11 s\n", - "Wall time: 1.96 s\n" + "CPU times: user 3.76 s, sys: 95.6 ms, total: 3.85 s\n", + "Wall time: 1.84 s\n" ] } ], @@ -5284,7 +5284,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 71, "metadata": { "slideshow": { "slide_type": "skip" @@ -5326,7 +5326,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 72, "metadata": { "slideshow": { "slide_type": "subslide" @@ -5334,7 +5334,7 @@ }, "outputs": [], "source": [ - "@jit(nopython=True, fastmath=True, parallel=True)\n", + "@jit(nopython=True, parallel=True)\n", "def apply_periodic_bc_numba(grid, n_points):\n", " \"\"\"Explicitly apply periodic boundary conditions, using NumPy ranges.\"\"\"\n", " grid[ 0, :] = grid[-2, :]\n", @@ -5345,7 +5345,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 73, "metadata": { "slideshow": { "slide_type": "fragment" @@ -5353,7 +5353,7 @@ }, "outputs": [], "source": [ - "@jit(nopython=True, fastmath=True)\n", + "@jit(nopython=True)\n", "def evolve_np_slicing_numba(grid, grid_tmp, n_points, dt, D):\n", " \"\"\"Time step based on an explicitly coded Laplacian using array slicing.\"\"\"\n", " apply_periodic_bc_numba(grid, n_points)\n", @@ -5365,7 +5365,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 74, "metadata": { "slideshow": { "slide_type": "fragment" @@ -5379,7 +5379,7 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 75, "metadata": { "slideshow": { "slide_type": "fragment" @@ -5390,8 +5390,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 9.37 s, sys: 342 ms, total: 9.71 s\n", - "Wall time: 8.5 s\n" + "CPU times: user 9.02 s, sys: 267 ms, total: 9.29 s\n", + "Wall time: 7.97 s\n" ] } ], @@ -5403,7 +5403,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 76, "metadata": { "slideshow": { "slide_type": "subslide" @@ -5411,7 +5411,7 @@ }, "outputs": [], "source": [ - "@jit(nopython=True, fastmath=True, parallel=True)\n", + "@jit(nopython=True, parallel=True)\n", "def evolve_np_slicing_numba_parallel(grid, grid_tmp, n_points, dt, D):\n", " \"\"\"Time step based on an explicitly coded Laplacian using array slicing.\"\"\"\n", " apply_periodic_bc_numba(grid, n_points)\n", @@ -5423,7 +5423,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 77, "metadata": { "slideshow": { "slide_type": "fragment" @@ -5437,7 +5437,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 78, "metadata": { "slideshow": { "slide_type": "fragment" @@ -5448,8 +5448,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 4.75 s, sys: 423 ms, total: 5.17 s\n", - "Wall time: 2.03 s\n" + "CPU times: user 4.06 s, sys: 425 ms, total: 4.49 s\n", + "Wall time: 1.75 s\n" ] } ], @@ -5461,7 +5461,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 79, "metadata": { "slideshow": { "slide_type": "skip" @@ -5515,8 +5515,15 @@ "* due to knowledge about the structure of the stencil, the code can be highly optimized and may outperform cython or Numba\n", "* install via conda or pip, find more information at \n", " https://i10git.cs.fau.de/pycodegen/pystencils\n", - "* optional exercise: implement the computation using pystencils" + "* optional exercise: implement the diffusion computation using pystencils" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { -- GitLab