From 28408c3a2d4fc74551e63befa671d103dd3728dd Mon Sep 17 00:00:00 2001 From: Repo Updater <noreply@mpcdf.mpg.de> Date: Sun, 3 Oct 2021 21:26:41 +0200 Subject: [PATCH] 72e3d505 some polishing, add some references to NumPy/SciPy --- notebooks/1b--Introduction.ipynb | 6 +-- notebooks/1c--Python_Refresher.ipynb | 16 +++--- notebooks/1d--BasicHPC.ipynb | 9 ++-- notebooks/2a--NumPy.ipynb | 76 +++++++++++----------------- 4 files changed, 45 insertions(+), 62 deletions(-) diff --git a/notebooks/1b--Introduction.ipynb b/notebooks/1b--Introduction.ipynb index 7a3c4fd..1de2b48 100644 --- a/notebooks/1b--Introduction.ipynb +++ b/notebooks/1b--Introduction.ipynb @@ -133,7 +133,7 @@ "## About this Python for HPC tutorial\n", "\n", "* Practical hands-on approach with code examples\n", - "* Presentation is mainly based on Jupyter notebooks\n", + "* Presentation is based on Jupyter notebooks\n", "* Course material available for download at \n", " https://gitlab.mpcdf.mpg.de/mpcdf/python-for-hpc-exercises" ] @@ -169,7 +169,7 @@ "\n", "* Use the link communicated by email to access a Jupyter service on the MPCDF HPC cloud\n", "* The course material *and* software are provided via an interactive JupyterLab interface\n", - "* Each instance provides up to 6 virtual CPU cores and up to 12 GB RAM (0.3 cores and 1.75 GB guaranteed)\n", + "* Each instance provides up to 6 virtual CPU cores and up to 12 GB RAM (less guaranteed)\n", "* Please keep the following points in mind\n", " * Use the JupyterLab menu **File $\\to$ Shut down** to free resources when finished\n", " * A session is terminated after 12h\n", @@ -185,7 +185,7 @@ }, "source": [ "### Option 2: MPCDF Python infrastructure on the HPC systems\n", - "* Python (2.7, **3.8**) is provided via the Anaconda Python Distribution\n", + "* Python (**3.8**) is provided via the Anaconda Python Distribution\n", "* software is accessible via environment modules, e.g. \n", " `module purge` \n", " `module load gcc/10 impi/2019.9` \n", diff --git a/notebooks/1c--Python_Refresher.ipynb b/notebooks/1c--Python_Refresher.ipynb index 55e0afe..0e937e5 100644 --- a/notebooks/1c--Python_Refresher.ipynb +++ b/notebooks/1c--Python_Refresher.ipynb @@ -28,7 +28,7 @@ "source": [ "### Python: History and Status\n", "* First version released in 1991 by G. van Rossum\n", - "* Implementations: **cPython**, PyPy, Jython, Pyston, ...\n", + "* Implementations: **cPython**, PyPy, Pyston, ...\n", "* Language versions: 2.7 (legacy, ✞2020), now 3.6 - 3.9 \n", " (to migrate legacy code, the packages `2to3`, `six`, `future` are helpful)" ] @@ -110,7 +110,7 @@ " * select \"Cell\" $\\to$ \"Run all\" from the menu to run the complete notebook\n", "* code, figures and documentation can be mixed in the same notebook\n", " * plot via matplotlib, document using markdown, typeset formulas via LaTeX\n", - "* launch via `jupyter notebook` from a terminal, then work in your browser\n", + "* locally, launch via `jupyter notebook` from a terminal, then work in your browser\n", "* notebook export to Python source file works via \"File\" $\\to$ \"Download as\"\n", "* https://jupyter.org" ] @@ -543,7 +543,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": { "slideshow": { "slide_type": "-" @@ -565,7 +565,7 @@ " sq = a*a\n", " if verbose:\n", " print(sq)\n", - " return a*a\n", + " return sq\n", "\n", "b = square(2)\n", "c = square(b, verbose=True)" @@ -642,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 2, "metadata": { "slideshow": { "slide_type": "subslide" @@ -664,7 +664,7 @@ " a.append(\"two\")\n", "\n", "def modify_list__localvar(a):\n", - " a = a + [\"three\"] # local variable a is created during the assignment\n", + " a = a + [\"three\"] # local variable!\n", "\n", "def modify_list__range(a):\n", " a[:] = a + [\"four\"]\n", @@ -1019,7 +1019,7 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "fragment" + "slide_type": "skip" } }, "source": [ @@ -1245,7 +1245,7 @@ "source": [ "### Python Standard Library\n", "* Python comes with a huge standard library (\"batteries included\")\n", - "* noteworthy modules: `os`, `sys`, `math`, `multiprocessing`\n", + "* noteworthy modules: `os`, `sys`, `math`, `multiprocessing`, ...\n", "* building blocks for most programming tasks already exist\n", "* well-tested and well documented \n", " https://docs.python.org/3.9/library/index.html" diff --git a/notebooks/1d--BasicHPC.ipynb b/notebooks/1d--BasicHPC.ipynb index 2c4823d..3c44686 100644 --- a/notebooks/1d--BasicHPC.ipynb +++ b/notebooks/1d--BasicHPC.ipynb @@ -9,9 +9,10 @@ }, "source": [ "# Basics of High Performance Computing\n", - "**Python for HPC course**\n", "\n", - "Sebastian Ohlmann, Klaus Reuter\n", + "**Python for HPC**\n", + "\n", + "2018-2021 Sebastian Ohlmann, Klaus Reuter\n", "\n", "Max Planck Computing and Data Facility, Garching" ] @@ -374,7 +375,7 @@ "cell_type": "markdown", "metadata": { "slideshow": { - "slide_type": "slide" + "slide_type": "skip" } }, "source": [ @@ -427,7 +428,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.3" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/notebooks/2a--NumPy.ipynb b/notebooks/2a--NumPy.ipynb index 66a0139..97e01fc 100644 --- a/notebooks/2a--NumPy.ipynb +++ b/notebooks/2a--NumPy.ipynb @@ -12,7 +12,8 @@ "\n", "**Python for HPC**\n", "\n", - "Rafael Lago, Sebastian Ohlmann, Klaus Reuter\n", + "2018-2021 Sebastian Ohlmann, Klaus Reuter \n", + "2020 Rafael Lago\n", "\n", "Max Planck Computing and Data Facility, Garching" ] @@ -26,43 +27,11 @@ }, "source": [ "## Outline\n", + "\n", "* NumPy\n", "* SciPy examples\n", - "* Input/Output with NumPy arrays\n", - "* Extra: image processing examples" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "### Python Overview - <span style=\"color:green\">Pros</span>\n", - "\n", - "* easy to use; argueably the best entry-level programming language\n", - "* quite portable between OS\n", - "* object oriented\n", - "* many available libraries for various applications" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "### Python Overview - <span style=\"color:red\">Cons</span>\n", - "\n", - "* dynamically typed: easy to misuse classes/types\n", - "* \"too easy\": leads to occasionally poorly optimized code (a.k.a. lazy coding)\n", - "* <span style=\"color:red\">slower</span> than C/Fortran\n", - "\n", - "$\\to$ let us address this so called \"slowness\" problem" + "* Input/output with NumPy arrays\n", + "* Bonus: Image processing examples" ] }, { @@ -785,7 +754,7 @@ "\n", "\n", "* `x[1:] + x[:-1]` will create a temporary array\n", - "* we can avoid it, but is it worthy?" + "* we can avoid it, but does that pay off?" ] }, { @@ -901,7 +870,7 @@ "\n", "* Tradeoff: high development productivity versus performance\n", "* NumPy code *can be* as fast as C or Fortran code, but often it is a factor of 2-4 slower\n", - "* Hotspots in C/Fortran + Python Interface (Wed. Morning Session)" + "* $\\to$ Implement hotspots in C/Fortran + Python Interface" ] }, { @@ -1307,16 +1276,23 @@ } }, "source": [ - "## SciPy Summary\n", + "## NumPy/SciPy Summary\n", + "\n", "* NumPy: efficient handling of arrays\n", - "* SciPy: more advanced mathematical and numerical routines; uses NumPy under the hood too!\n", - " * very features rich\n", + "* SciPy: more advanced mathematical and numerical routines; uses NumPy under the hood *plus* other C/F libraries\n", "* Performance tips:\n", " * Work on full arrays (slicing, NumPy routines...)\n", - " * SciPy `linalg` is more comprehensive than NumPy's\n", - " * identify Hotspots and optimize them:\n", - " * Profile\n", - " * code in Cython or C/Fortran + Python Interface (Wed. Morning Session)\n" + " * identify hotspots and optimize them\n", + " * profile\n", + " * code in Cython or C/Fortran + Python interface -- or try Numba\n", + "\n", + "Futher reading\n", + "\n", + "* Harris, C.R., Millman, K.J., van der Walt, S.J. et al. *Array programming with NumPy.* **Nature** 585, 357–362 (2020). (https://doi.org/10.1038/s41586-020-2649-2)\n", + "* Bressert, E. (2012). SciPy and NumPy (1st edition.). O'Reilly Media, Inc. (https://ebooks.mpdl.mpg.de/ebooks/Record/EB001944176)\n", + "* There are numerous books on the topic available: https://ebooks.mpdl.mpg.de/ebooks/Search/Results?type=AllFields&lookfor=numpy\n", + "\n", + "(MPG.eBooks work from any Max Planck IP address.)" ] }, { @@ -1652,12 +1628,18 @@ }, "source": [ "## Summary on HDF5 IO of NumPy data\n", + "\n", "* easy to use with `h5py` (alternative module would be `pytables`)\n", "* hierarchical structure enables you to store complex data in a single file\n", "* HDF5 files are future-proof and portable data files\n", "* data can be shared & used by other programs\n", "\n", - "$\\to$ our recommendation for NumPy-related I/O!" + "$\\to$ our recommendation for NumPy-related I/O!\n", + "\n", + "Further reading:\n", + "\n", + "* Collette, A. (2013). Python and HDF5 (1st edition.). O'Reilly Media, Inc. \n", + " (https://ebooks.mpdl.mpg.de/ebooks/Record/EB001941719)" ] }, { @@ -1853,7 +1835,7 @@ }, "source": [ "## Example 2: Get edges\n", - "* Use following formulae:\n", + "* Use the following formula:\n", "$$\\tilde{f}_{i,j} = 8 f_{i,j} - (f_{i-1,j} + f_{i+1,j} + f_{i,j-1} + f_{i,j+1} + f_{i-1,j-1} + f_{i-1,j+1} + f_{i+1,j+1} + f_{i+1,j-1})$$\n", "* Subtract mean of surrounding cells\n", "* Also use diagonals\n", -- GitLab