b7df38db re-enable jupyterlab for Binder and beyond

43cd6f92 · Repo Updater · b490a66f · 43cd6f92 · 43cd6f92 · 43cd6f92
Commit 43cd6f92 authored 8 months ago by Repo Updater
--- a/README.md
+++ b/README.md
@@ -78,9 +78,5 @@ commercial Python distributions -- and use `conda` or `mamba` together with the
 ### Jupyter presentation via RISE

 * Presentation of the Jupyter notebook cells as slides is possible via the [RISE](http://rise.readthedocs.io/en/latest/index.html) extension.
-  * Installation
-    * `pip install RISE`
-    * `jupyter-nbextension install rise --py --sys-prefix`
-    * `jupyter-nbextension enable rise --py --sys-prefix`
  * To enter presentation mode, simply press <Alt+r> from within a notebook.

--- a/environment.yml
+++ b/environment.yml
@@ -4,7 +4,7 @@ channels:
  - conda-forge
  - nodefaults
 dependencies:
-  - python=3.12
+  - python=3.11
  - pip
  - libopenblas=*=*openmp*
  - numpy
@@ -26,5 +26,6 @@ dependencies:
  - mpi4py
  - hdf5=*=mpi*
  - h5py=*=mpi*
-  # - jupyterlab
-  # - notebook
+  - jupyterlab
+  - notebook<7
+  - rise
--- a/examples/slurm/submit_mpi4py.sh
+++ b/examples/slurm/submit_mpi4py.sh
 #!/bin/bash -l
 #
-# Example job script for using MPI4PY on Cobra@MPCDF.
+# Example job script for using MPI4PY on Raven@MPCDF.
 #
 #SBATCH -o ./job.out.%j
 #SBATCH -e ./job.err.%j
@@ -8,17 +8,17 @@
 #SBATCH -J PYTHON_MPI
 #SBATCH --mail-type=none
 #SBATCH --nodes=1
-#SBATCH --ntasks-per-node=40
+#SBATCH --ntasks-per-node=72
 #SBATCH --cpus-per-task=1
 #SBATCH --time=00:10:00   # run time in h:m:s, up to 24h possible

 module purge
-module load gcc/10 impi/2019.9
-module load anaconda/3/2021.05
-module load mpi4py/3.0.3
+module load gcc/13 openmpi/4.1
+module load anaconda/3/2023.03
+module load mpi4py/3.1.5

 # avoid overbooking of the cores which would occur via NumPy/MKL threading
 # as the parallelization takes place via MPI tasks
 export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}

-srun python ./python_mpi4py.py
+srun python3 ./python_mpi4py.py
--- a/examples/slurm/submit_multiprocessing.sh
+++ b/examples/slurm/submit_multiprocessing.sh
 #!/bin/bash -l
 #
-# Example job script for using Python-Multiprocessing on Cobra@MPCDF.
+# Example job script for using Python-Multiprocessing on Raven@MPCDF.
 #
 #SBATCH -o ./job.out.%j
 #SBATCH -e ./job.err.%j
@@ -9,12 +9,12 @@
 #SBATCH --mail-type=none
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=1   # only start 1 task via srun because Python multiprocessing starts more tasks internally
-#SBATCH --cpus-per-task=40    # assign all the cores to that first task to make room for Python's multiprocessing tasks
+#SBATCH --cpus-per-task=72    # assign all the cores to that first task to make room for Python's multiprocessing tasks
 #SBATCH --time=00:10:00

 module purge
-module load gcc/10 impi/2019.9
-module load anaconda/3/2021.05
+module load gcc/13 openmpi/4.1
+module load anaconda/3/2023.03

 # avoid overbooking of the cores which might occur via NumPy/MKL threading,
 # as the parallelization takes place via processes/multiprocessing
@@ -22,4 +22,4 @@ export OMP_NUM_THREADS=1

 # pass the number of available cores via the command line, and
 # use that number to spawn as many workers from multiprocessing
-srun python ./python_multiprocessing.py $SLURM_CPUS_PER_TASK
+srun python3 ./python_multiprocessing.py $SLURM_CPUS_PER_TASK
--- a/examples/slurm/submit_multithreading.sh
+++ b/examples/slurm/submit_multithreading.sh
 #!/bin/bash -l
 #
-# Example job script for using Python with Multithreading (OpenMP, Numba, numexpr) on Cobra@MPCDF.
+# Example job script for using Python with Multithreading (OpenMP, Numba, numexpr) on Raven@MPCDF.
 #
 #SBATCH -o ./job.out.%j
 #SBATCH -e ./job.err.%j
@@ -9,12 +9,12 @@
 #SBATCH --mail-type=none
 #SBATCH --nodes=1
 #SBATCH --ntasks-per-node=1   # only start 1 task via srun because Python multiprocessing starts more tasks internally
-#SBATCH --cpus-per-task=40    # assign all the cores to that first task to make room for Python's multiprocessing tasks
+#SBATCH --cpus-per-task=72    # assign all the cores to that first task to make room for Python's multiprocessing tasks
 #SBATCH --time=00:10:00

 module purge
-module load gcc/10 impi/2019.9
-module load anaconda/3/2021.05
+module load gcc/13 openmpi/4.1
+module load anaconda/3/2023.03

 # set the correct number of threads for various thread-parallel modules
 export OMP_NUM_THREADS=$SLURM_CPUS_PER_TASK
@@ -23,4 +23,4 @@ export NUMEXPR_NUM_THREADS=$SLURM_CPUS_PER_TASK
 # pin OpenMP threads to cores
 export OMP_PLACES=cores

-srun python ./python_multithreading.py
+srun python3 ./python_multithreading.py
--- a/examples/slurm/submit_sequential.sh
+++ b/examples/slurm/submit_sequential.sh
@@ -12,10 +12,10 @@
 #SBATCH --time=0:10:00     # run time, up to 24h

 module purge
-module load gcc/10 impi/2019.9
-module load anaconda/3/2021.05
+module load gcc/13 openmpi/4.1
+module load anaconda/3/2023.03

 # avoid overbooking of the cores which might occur via NumPy/MKL threading
 export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK:-1}

-srun python ./python_hello_world.py
+srun python3 ./python_hello_world.py
--- a/notebooks/01--Introduction.ipynb
+++ b/notebooks/01--Introduction.ipynb
@@ -14,7 +14,7 @@
    "\n",
    "Sebastian Kehl, Sebastian Ohlmann, Klaus Reuter\n",
    "\n",
-    "_25-27 July 2023_\n",
+    "_26-28 November 2024_\n",
    "\n",
    "HPC Application Support Group\n",
    "\n",
@@ -192,7 +192,7 @@
   },
   "source": [
    "### Software Option 2: Python infrastructure on the MPCDF HPC systems\n",
-    "* Python is provided via the Anaconda Python Distribution\n",
+    "* Python is provided via conda-based Python distributions (now based on 'conda-forge', historically Anaconda)\n",
    "* software is accessible via environment modules, e.g.  \n",
    "    `module purge`  \n",
    "    `module load gcc/12 impi/2021.9`  \n",
@@ -210,8 +210,15 @@
   "source": [
    "### Software Option 3: Python environment for your local computer\n",
    "* On a recent Linux system, install all the necessary packages via the package manager\n",
-    "* Alternatively, install the Anaconda Python Distribution (https://www.anaconda.com) and use the `conda` package manager and the file `environment.yml` to add all necessary packages"
+    "* Alternatively, install [Miniforge](https://conda-forge.org/miniforge/) and use the `conda` package manager and the file `environment.yml` that is provided together with the course material to add all necessary packages"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
@@ -231,9 +238,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.7"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
 %% Cell type:markdown id: tags:

 # Introduction

 **Python for HPC course**

 Sebastian Kehl, Sebastian Ohlmann, Klaus Reuter

-_25-27 July 2023_
+_26-28 November 2024_

 HPC Application Support Group

 Max Planck Computing and Data Facility, Garching

 %% Cell type:markdown id: tags:

 ## Why Python in HPC?
 * Efficiency with respect to development effort:
  $\rightarrow$ Python helps you to get "things done" quickly and focus on your science.
 * Efficiency with respect to hardware utilization:
  $\rightarrow$ Python-based codes may perform well if implemented properly.
 * Using Python may achieve a good overall efficiency for small- to medium-scale codes
 * And finally ... Python is already there, so we have do deal with it!

 %% Cell type:markdown id: tags:

 ## Examples for "real" HPC Codes using Python

 ### [GPAW](https://wiki.fysik.dtu.dk/gpaw/)

 * Density-functional theory code for material science
 * Implemented as a Python module (C core, Python UI)
 * Parallelization: MPI + OpenMP, scales to O(10k) cores

 ### [ESPResSo++](http://www.espresso-pp.de)

 * Molecular dynamics package for soft matter simulation,
  MPI for Polymer Research, Mainz
 * Implemented as a Python module (C++ core, Python UI)
 * Parallelization: Domain decomposition using MPI

 %% Cell type:markdown id: tags:

 ##  Use cases often relevant to our audience

 * prototype implementations of numerical or AI methods that should run (much) faster when going towards production
 * data analysis scripts that should run faster and/or operate in parallel
 * IO-handling and processing of large numerical data

 $\to$ very often, efficient hardware utilization and good speedups can be gained without too much effort by applying the right techniques and tricks

 %% Cell type:markdown id: tags:

 ## Scope of this tutorial
 * Learn about the tools and techniques to write efficient Python code for scientific computing
 ![gain vs effort](fig/gain_vs_effort.svg)

 %% Cell type:markdown id: tags:

 ## Course Outline

 * Introduction
 * Refresher of Python and the Python Ecosystem
 * Basics of High Performance Computing
 * Scientific Computing with Python: NumPy and SciPy
 * Performance: Cython, JIT (Numba, JAX), C/Fortran interfacing
 * Parallelism: multithreading, multiprocessing, GPU computing, Dask, mpi4py
 * Software Engineering, Packaging, Profiling, Visualization

 %% Cell type:markdown id: tags:

 ### *Python for HPC* complements advanced HPC courses
 * We cannot cover traditional HPC topics in depth, and would highly recommend the following courses
    * Node-level performance engineering
    * Parallel programming with MPI
    * Parallel programming with OpenMP
    * Advanced parallel programming with OpenMP and MPI
 * Watch out for these courses which are regularly offered by HLRS, LRZ, NHR/RRZE

 %% Cell type:markdown id: tags:

 ## About this Python for HPC tutorial

 * Practical hands-on approach with code examples
 * Presentation is based on Jupyter notebooks
 * Course material available for download at
  https://gitlab.mpcdf.mpg.de/mpcdf/python-for-hpc-exercises

 %% Cell type:markdown id: tags:

 ## Software prerequisites for the Jupyter notebooks and exercises
 * Python >=3.6
 * Jupyter
 * NumPy, SciPy
 * matplotlib
 * Cython
 * gcc, gfortran
 * Numba
 * (CuPy, JAX)
 * (Dask, Dask-MPI, MPI, mpi4py)

 %% Cell type:markdown id: tags:

 ### Software Option 1: Cloud-based MPCDF Jupyter notebooks

 * Use the link communicated by email to access a Jupyter service on the MPCDF HPC cloud
 * The course material *and* software are provided via an interactive JupyterLab interface
 * Each instance provides access to several virtual CPU cores and a few GB of RAM
 * Please keep the following points in mind
    * Use the JupyterLab menu **File $\to$ Shut down** to free resources when finished
    * A session is terminated after 12h
    * The storage is only temporary, i.e. download your modified notebooks and data in time via the GUI

 %% Cell type:markdown id: tags:

 ### Software Option 2: Python infrastructure on the MPCDF HPC systems
-* Python is provided via the Anaconda Python Distribution
+* Python is provided via conda-based Python distributions (now based on 'conda-forge', historically Anaconda)
 * software is accessible via environment modules, e.g.
    `module purge`
    `module load gcc/12 impi/2021.9`
    `module load anaconda/3/2023.03`
    `module load mpi4py/3.1.4`

 %% Cell type:markdown id: tags:

 ### Software Option 3: Python environment for your local computer
 * On a recent Linux system, install all the necessary packages via the package manager
-* Alternatively, install the Anaconda Python Distribution (https://www.anaconda.com) and use the `conda` package manager and the file `environment.yml` to add all necessary packages
+* Alternatively, install [Miniforge](https://conda-forge.org/miniforge/) and use the `conda` package manager and the file `environment.yml` that is provided together with the course material to add all necessary packages
+
+%% Cell type:code id: tags:
+
+``` python
+```

--- a/notebooks/03--Python_Refresher.ipynb
+++ b/notebooks/03--Python_Refresher.ipynb
@@ -20,6 +20,9 @@
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    },
    "slideshow": {
     "slide_type": "subslide"
    }
@@ -28,7 +31,7 @@
    "### Python: History and Status\n",
    "* First version released in 1991 by G. van Rossum\n",
    "* [Implementations](https://wiki.python.org/moin/PythonImplementations): **cPython**, PyPy, Pyston, ...\n",
-    "* Language versions: 2.7 (legacy, ✞2020), now 3.8 - 3.11  \n",
+    "* Language versions: 2.7 (legacy, ✞2020), now 3 (relevant versions are 3.8 - 3.13)  \n",
    "  (to migrate legacy code, the packages `2to3`, `six`, `future` are helpful)"
   ]
  },
@@ -869,7 +872,6 @@
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
-    "scrolled": false,
    "slideshow": {
     "slide_type": "skip"
    }
@@ -1436,9 +1438,9 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.12.7"
  }
 },
 "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
 %% Cell type:markdown id: tags:

 # A Python refresher

 **Python for HPC course**


 Max Planck Computing and Data Facility, Garching

 %% Cell type:markdown id: tags:

 ### Python: History and Status
 * First version released in 1991 by G. van Rossum
 * [Implementations](https://wiki.python.org/moin/PythonImplementations): **cPython**, PyPy, Pyston, ...
-* Language versions: 2.7 (legacy, ✞2020), now 3.8 - 3.11
+* Language versions: 2.7 (legacy, ✞2020), now 3 (relevant versions are 3.8 - 3.13)
  (to migrate legacy code, the packages `2to3`, `six`, `future` are helpful)

 %% Cell type:markdown id: tags:

 ### Python Language Key Features
 * high-level language, extensive standard library and ecosystem
 * no type declarations, types are tracked at runtime
  $\rightarrow$ code is interpreted in most implementations (or just-in-time compiled)
  $\rightarrow$ certain [compiler optimizations](https://en.wikipedia.org/wiki/Optimizing_compiler) are not possible $\rightarrow$ performance overhead
 * imperative, procedural, functional, or object-oriented programming styles possible
 * syntax claimed to make programs comparably easy to read
  e.g. indentation (typ. 4 spaces per level) defines logical blocks

 %% Cell type:markdown id: tags:

 ### Execution of Python programs
 #### Scripts

 * Python programs are text files (UTF-8) with the suffix '.py'
  ```python
  #!/usr/bin/env python3
  print("Hello World!")
  ```
 * execution from a shell prompt
  ```bash
  $ python3 hello.py
  Hello World!
  $ chmod +x hello.py
  $ ./hello.py
  Hello World!
  ```

 %% Cell type:markdown id: tags:

 #### Jupyter notebooks
 * web-based interactive development environment and document
 * code, figures and documentation can be mixed in the same notebook
    * plot via matplotlib, document using markdown, typeset formulas via LaTeX
 * code blocks are organized in cells
    * press "Shift + Enter" to execute the current cell
    * select "Cell" $\to$ "Run all" from the menu to run the complete notebook
 * notebook export to Python source file works via "File" $\to$ "Download as"
 * [papermill](https://papermill.readthedocs.io/en/latest/) allows to run notebooks in 'headless' mode without user interaction (e.g. in a batch job)
 * locally, launch via `jupyter notebook` or `jupyter lab` from a terminal
 * https://jupyter.org

 %% Cell type:markdown id: tags:

 ### Basic data types and data structures
 * numbers: integer (*arbitrary* precision), float (double precision), complex `2 + 3j` (double precision)
 * boolean: `True`, `False`
 * strings: `"foo"`
 * collections (selection)
    * lists: `[1, "bar"]`, indexed
    * tuples: `(2, "foobar")`, indexed
    * sets: `{"apple", "banana"}`, not indexed
    * dictionaries: `{"germany" : "berlin", "france" : "paris"}`, indexed
 * https://docs.python.org/3.11/tutorial/introduction.html
  https://docs.python.org/3.11/tutorial/datastructures.html

 %% Cell type:markdown id: tags:

 ### Variables, mutable vs. immutable objects

 * a variable is a named reference to an object, e.g.
 ```python
 a = 5
 ```
 * in Python everything is an object, i.e. contains data along with attributes and/or methods
 * immutable objects may not change once created: integer, float, boolean, string, tuple
 * mutable objects may change in place: list, set, dictionary, most user-defined classes
 * if unsure, check via the object ID (memory location)

 %% Cell type:code id: tags:

 ``` python
 word = "Hello"
 print(id(word))  # Python-internal object id
 word = word + " World!"
 print(id(word))  # id has changed
 ```

 %% Output

    140587166103472
    140587166090288

 %% Cell type:markdown id: tags:

 ### Strings $\to$ `"abc"`
 * strings are immutable, any "modification" of a string implies the creation of a new object
 * indexing and slicing same as with lists (see below)
 * numerous built-in string-methods are available
 * Python 3 strings are represented in Unicode, encoding and decoding to other formats may be necessary
 * https://docs.python.org/3/library/string.html

 %% Cell type:code id: tags:

 ``` python
 # examples for string methods
 msg = "Hello World!"
 print(msg.endswith("ld!"))
 print(msg.upper())  # returns new string, keeps `msg` unchanged
 print(msg.replace('Hello', 'Goodbye'))  # returns new string, keeps `msg` unchanged
 print(msg + " " + msg)

 # string formatting example
 a = 3.14
 b = 2
 print("%f and %d" % (a, b))     # old C-style % formatting
 print("{} and {}".format(a, b)) # placeholders {} and format()
 print(f"{a} and {b}")           # f strings, since Python 3.6
 ```

 %% Output

    True
    HELLO WORLD!
    Goodbye World!
    Hello World! Hello World!
    3.140000 and 2
    3.14 and 2
    3.14 and 2

 %% Cell type:markdown id: tags:

 ### Lists $\to$ `[a, b, c]`
 * a list is a collection of objects which is ordered and mutable
 * a list may contain objects of different type
 * list elements are not necessarily laid out contiguously in memory

 %% Cell type:code id: tags:

 ``` python
 my_list = [1, 'apple', 2, 'bananas']

 my_list[2] = 3
 my_list.append(4)          # in-place modification!
 my_list += [5, 'berries']  # append a list to another list
 my_list.reverse()          # in-place modification!

 my_alias = my_list
 my_list.remove('bananas')  # in-place modification!
 print('bananas' in my_alias)
 ```

 %% Output

    False

 %% Cell type:markdown id: tags:

 ### Indexing of lists, tuples, NumPy arrays, etc.

 %% Cell type:code id: tags:

 ``` python
 a = list(range(10))

 print(a)
 print(a[0])     # list/tuple/string indices start at 0
 print(a[-1])    # negative indices start from the end
 print(a[0:3])   # range: start, stop
 print(a[::2])   # step
 print(a[::-2])  # negative step
 ```

 %% Output

    [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    0
    9
    [0, 1, 2]
    [0, 2, 4, 6, 8]
    [9, 7, 5, 3, 1]

 %% Cell type:markdown id: tags:

 ### Tuples $\to$ `a, b, c`
 * a tuple is a collection which is ordered and immutable

 %% Cell type:code id: tags:raises-exception

 ``` python
 tup1 = "banana", 1, "apple"

 tup2 = "orange",  # a single-element tuple
 tup3 = tup1 + tup2

 # tuple object cannot be modified once instanciated
 tup1[0] = 0
 ```

 %% Output

    ---------------------------------------------------------------------------
    TypeError                                 Traceback (most recent call last)
 Cell     In[5], line 7
          4 tup3 = tup1 + tup2
          6 # tuple object cannot be modified once instanciated
    ----> 7 tup1[0] = 0
    TypeError: 'tuple' object does not support item assignment

 %% Cell type:markdown id: tags:

 ### Sets $\to$ `{'a', 'b', 3}`
 * a set is a collection which is unordered, mutable, and not indexed
 * no duplicate members exist in sets $\to$ great to check uniqueness efficiently
 * efficient mathematical set operations are provided

 %% Cell type:code id: tags:

 ``` python
 set1 = {"apple", "banana", "apple", 47, 11, 47}
 print(set1)  # --> elements are unique

 set2 = {"banana", 11}
 print(set2.issubset(set1))
 set2.add(3.1415)
 print(set2.issubset(set1))

 print(set2.intersection(set1))
 ```

 %% Output

    {'banana', 11, 'apple', 47}
    True
    False
    {'banana', 11}

 %% Cell type:markdown id: tags:

 ### Dictionaries $\to$ `{'a':1, 'b':2, 'c':3}`

 * a dictionary is a collection which is ~~un~~ordered, mutable and indexed
 * dictionaries contain pairs of keys and values
 * no duplicate keys are allowed, keys must be immutable
 * since Python 3.7 the language standard defines `dict` to [retain the insertion order](https://docs.python.org/3.7/library/stdtypes.html#typesmapping)

 %% Cell type:code id: tags:

 ``` python
 phonebook = {"joe": 1493, "mike": 4711}
 phonebook["nancy"] = 1421

 # iterate over keys
 for key in phonebook:
    print(key)
 # iterate over key-value pairs
 for key, val in phonebook.items():
    print(f"{key}: {val}")

 print(len(phonebook))
 del phonebook["joe"]
 print("joe" in phonebook)
 ```

 %% Output

    joe
    mike
    nancy
    joe: 1493
    mike: 4711
    nancy: 1421
    3
    False

 %% Cell type:markdown id: tags:

 ### Control flow
 * branching
  `if` ... `elif` ... `else`
 * loops
    * `for` $-$ used to repeat a block of code a fixed number of times, used in combination with an iterable object (`range()`, list, etc.)
    * `while` $-$ used to repeat a block of code as long as a condition is satisfied
 * loop modifier statements
    * `break`
    * `continue`
 * placeholder, useful when necessary for syntactic reasons
    * `pass`
 * https://docs.python.org/3.11/tutorial/controlflow.html

 %% Cell type:markdown id: tags:

 ### Exceptions and Error Handling
 * errors during execution `raise` exceptions
 * exceptions are handled using `try` ... `except` [... `else`...`finally`] blocks
 * advantage: error handling becomes possible across nested function calls, between modules, etc.
 * Python knows numerous built-in exceptions, cf. https://docs.python.org/3/library/exceptions.html, user-defined exceptions can be implemented in addition

 %% Cell type:code id: tags:

 ``` python
 try:
    y = 1/0
 except ZeroDivisionError:
    print("Division by zero.")
 ```

 %% Output

    Division by zero.

 %% Cell type:markdown id: tags:

 ### Functions

 %% Cell type:code id: tags:

 ``` python
 def square(a, verbose=False):
    """Return the square of the input parameter a.
    """
    sq = a*a
    if verbose:
        print(sq)
    return sq

 b = square(2)
 c = square(b, verbose=True)
 ```

 %% Output

    16

 %% Cell type:markdown id: tags:

 #### Variable scopes
 * global variables are defined at script or module-level
 * local variables are defined at function level
 * name resolution order: local, enclosed function, global, built-in
 * in case global variables need to be modified locally, use the `global` keyword

 %% Cell type:code id: tags:

 ``` python
 def echo():
    print(s)

 def func1():
    # local 's' is a different object than global 's'
    s = "Mouse"

 def func2():
    global s
    s = "Dog"

 s = "Cat"

 echo()
 func1()
 echo()
 func2()
 echo()
 ```

 %% Output

    Cat
    Cat
    Dog

 %% Cell type:markdown id: tags:

 #### Function parameters
 * parameters are passed by assignment
 * mutable objects are effectively passed by reference
 * immutable objects are effectively passed by value

 %% Cell type:code id: tags:

 ``` python
 # Pitfalls with function parameter handling,
 # mind local function variables when dealing with mutable function parameters

 def modify_list_method(a):
    a.append("two")

 def modify_list_localvar(a):
    a = a + ["three"]   # lhs 'a' is a local variable!

 def modify_list_slicing(a):
    a[:] = a + ["four"]

 l = ["one"]
 modify_list_method(l)
 modify_list_localvar(l)
 modify_list_slicing(l)
 print(f"l = {l}")
 ```

 %% Output

    l = ['one', 'two', 'four']

 %% Cell type:markdown id: tags:

 ### Classes
 * classes bundle data and functionality together, are key to object oriented programming
 * Python supports classes and related functionality, among others
    * methods
    * class and instance variables
    * inheritance
 * unintended side effects may be caused by mutable class variables (see example)
 * https://docs.python.org/3.11/tutorial/classes.html

 %% Cell type:code id: tags:

 ``` python
 class myClass():
    """A simple example class."""
    shared = []  # class variable, shared by all instances

    def __init__(self):  # constructor
        self.private = []  # per-instance variable

    def show(self):
        print("shared = {}, private = {}".format(self.shared, self.private))

 x = myClass()
 x.shared.append("cat")
 x.private.append("dog")
 x.show()

 y = myClass()
 y.shared.append("cat")
 y.private.append("dog")
 y.show()
 ```

 %% Output

    shared = ['cat'], private = ['dog']
    shared = ['cat', 'cat'], private = ['dog']

 %% Cell type:markdown id: tags:

 ### Decorators

 * decorators wrap existing functions and modify their behaviour
 * decorators take a function as argument and return a wrapped function
 * use cases: profilers, loggers, just-in-time compilers, parallelizers, ...
 * https://peps.python.org/pep-0318/

 %% Cell type:code id: tags:

 ``` python
 def do_twice(func):
    """A simple decorator, calling 'func' twice.
    """
    def wrapper(name):
        func(name)
        func(name)
    return wrapper

 @do_twice
 def say_hello(name):
    print("Hello {}!".format(name))

 say_hello("Peter")
 ```

 %% Output

    Hello Peter!
    Hello Peter!

 %% Cell type:code id: tags:

 ``` python
 # a decorator with an argument requires one more level of indirection
 def do_n_times(n):
    def decorator(func):
        def wrapper(name):
            for i in range(n):
                func(name)
        return wrapper
    return decorator

 @do_n_times(3)
 def say_hello(name):
    print("Hello {}!".format(name))

 say_hello("Peter")
 ```

 %% Output

    Hello Peter!
    Hello Peter!
    Hello Peter!

 %% Cell type:markdown id: tags:

 ### Functional programming

 * functional approach: transform data via functions that only take input and return output
 * Python basic concept: iterators representing streams of elements, e.g. `range()`
 * Python builtins: `map()`, `filter()`, `enumerate()`, `sorted()`, `zip()`
 * https://docs.python.org/3/howto/functional.html

 %% Cell type:code id: tags:

 ``` python
 def my_transform(x):
    return x*x

 # map returns an iterator, applying a function to each element of an iterator
 seq = map(my_transform, range(1,10))

 for i in seq:
    print(i, end=' ')
 ```

 %% Output

    1 4 9 16 25 36 49 64 81

 %% Cell type:markdown id: tags:

 #### lambda functions
 * anonymous functions that execute a single expression
 * *syntactic sugar* to avoid explicit function definitions

 %% Cell type:code id: tags:

 ``` python
 # lambda functions are often used in combination with Python's functional builtins
 seq = map(lambda x : x*x, range(1,10))

 for i in seq:
    print(i, end=' ')
 ```

 %% Output

    1 4 9 16 25 36 49 64 81

 %% Cell type:markdown id: tags:

 #### Generators
 * custom iterators can be written easily using generator functions (`yield`) and generator expressions
 * e.g. useful to process large amounts of data in a memory-efficient way, element per element

 %% Cell type:code id: tags:

 ``` python
 # naive approach: build a full list in memory
 def first_n_sq(n):
    num = 1
    nums = []
    while num <= n:
        nums.append(num * num)
        num += 1
    return nums

 sum(first_n_sq(30))
 ```

 %% Output

    9455

 %% Cell type:code id: tags:

 ``` python
 # better: generator function **yield**ing individual elements
 def first_n_sq_gen(n):
    num = 1
    while num <= n:
        yield num * num
        num += 1

 sum(first_n_sq_gen(30))
 ```

 %% Output

    9455

 %% Cell type:code id: tags:

 ``` python
 # better: generator expression
 first_n_sq_gen_expr = (i*i for i in range(1,31))
 sum(first_n_sq_gen_expr)
 ```

 %% Output

    9455

 %% Cell type:markdown id: tags:

 ### Python modules
 * Python allows to structure code into modules
 * each module has its own namespace
 * use `import` to get access to a module from your code
    * don't use `from module_name import *` $\to$ clutters namespace
 * use `dir(module_name)` to see what's inside of an imported module
 * more on modules later when we talk about packaging ...
 * https://docs.python.org/3/tutorial/modules.html

 %% Cell type:code id: tags:

 ``` python
 import math
 math.exp(1.0)
 ```

 %% Output

    2.718281828459045

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 np.exp(1.0)
 ```

 %% Output

    2.718281828459045

 %% Cell type:code id: tags:

 ``` python
 from numpy import log, exp
 log(exp(1.0))
 ```

 %% Output

    1.0

 %% Cell type:code id: tags:

 ``` python
 print(dir(math))
 ```

 %% Output

    ['__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'acos', 'acosh', 'asin', 'asinh', 'atan', 'atan2', 'atanh', 'ceil', 'comb', 'copysign', 'cos', 'cosh', 'degrees', 'dist', 'e', 'erf', 'erfc', 'exp', 'expm1', 'fabs', 'factorial', 'floor', 'fmod', 'frexp', 'fsum', 'gamma', 'gcd', 'hypot', 'inf', 'isclose', 'isfinite', 'isinf', 'isnan', 'isqrt', 'lcm', 'ldexp', 'lgamma', 'log', 'log10', 'log1p', 'log2', 'modf', 'nan', 'nextafter', 'perm', 'pi', 'pow', 'prod', 'radians', 'remainder', 'sin', 'sinh', 'sqrt', 'tan', 'tanh', 'tau', 'trunc', 'ulp']

 %% Cell type:markdown id: tags:

 ### Python's built-in help system
 * invoke `help(obj)` to get information about `obj`
 * essentially, the docstring of `obj` is printed

 %% Cell type:code id: tags:

 ``` python
 help(math.gamma)
 ```

 %% Output

    Help on built-in function gamma in module math:
    
    gamma(x, /)
        Gamma function at x.
    

 %% Cell type:markdown id: tags:

 ### Python Standard Library
 * Python comes with a huge standard library ("batteries included")
 * noteworthy modules: `os`, `sys`, `math`, `multiprocessing`, ...
 * building blocks for most programming tasks already exist
 * well-tested and well documented
  https://docs.python.org/3.11/library/index.html

 %% Cell type:markdown id: tags:

 ### PEP8, style guide for Python code
 * PEP8 is a style guide to write clean, readable, and maintainable Python code
    * indentation using 4 *spaces*
    * 79 characters maximum line width
    * UTF8 source file encoding
    * comments, docstrings
    * naming conventions
    * ...
 * https://www.python.org/dev/peps/pep-0008/
 * convert existing code into a PEP8 compliant format using `autopep8` or `black`

 %% Cell type:markdown id: tags:

 ### Basic file IO

 %% Cell type:code id: tags:

 ``` python
 # naive way: write to a text file
 file_name = "/tmp/dummy.txt"
 fh = open(file_name, 'w')
 fh.write("Hello\n")
 fh.close()
 ```

 %% Cell type:code id: tags:

 ``` python
 # better: write to a text file, close file handle implicitly (ContextManager)
 with open(file_name, 'a') as fh:
    fh.write("World\n")
 ```

 %% Cell type:markdown id: tags:

 #### Background information on the `with` statement and context managers

 * https://docs.python.org/3/reference/compound_stmts.html#the-with-statement
 * https://docs.python.org/3/reference/datamodel.html#with-statement-context-managers
 * https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager

 %% Cell type:code id: tags:

 ``` python
 # read the complete file into a list in memory
 with open(file_name, 'r') as fh:
    lines = fh.readlines()
 print(lines)
 ```

 %% Output

    ['Hello\n', 'World\n']

 %% Cell type:code id: tags:

 ``` python
 # when reading, the file handle `fh` is actually an iterator on the lines of the file
 with open(file_name, 'r') as fh:
    for line in fh:
        print(line, end='')
 ```

 %% Output

    Hello
    World

 %% Cell type:markdown id: tags:

 ### Parameter files

 * scientific codes are often initialized using parameter files
    * parameter files can be edited by hand
    * code does not need to be changed, reads in parameter file
 * recommended
    * YAML $\to$ human-readable, compatible with many other tools
 * less recommended
    * configparser (".cfg", contained in standard library)
    * JSON (".json", contained in standard library))
    * custom formats

 %% Cell type:code id: tags:

 ``` python
 # let's assume our code uses a nested dict for parameter handling
 par = {}
 par["general"] = {}
 par["general"]["time_steps"] = 1000
 par["general"]["resolution"] = (1024,1024,512)
 par["species"] = {}
 par["species"]["H"] = False
 par["species"]["N"] = True
 ```

 %% Cell type:markdown id: tags:

 #### YAML

 * available via the `pyyaml` package
 * https://pyyaml.org/wiki/PyYAMLDocumentation

 %% Cell type:code id: tags:

 ``` python
 # write the par object to a well-formatted YAML file
 import yaml

 with open("par.yaml", 'w') as fp:
    yaml.safe_dump(par, fp, default_flow_style=False, indent=4)
 ```

 %% Cell type:code id: tags:

 ``` python
 # read the parameters back from YAML
 with open("par.yaml", 'r') as fp:
    par_y = yaml.safe_load(fp)
 ```

 %% Cell type:markdown id: tags:

 ```yaml
 # par.yaml
 general:
    resolution:
    - 1024
    - 1024
    - 512
    time_steps: 1000
 species:
    H: false
    N: true
 ```

 %% Cell type:markdown id: tags:

 ### References and Further Reading

 * [A Whirlwind Tour of Python](https://jakevdp.github.io/WhirlwindTourOfPython/) by Jake VanderPlas, free O'Reilly report
 * [Dive Into Python](https://diveintopython3.problemsolving.io) by Mark Pilgrim, free online book
 * [Python for Beginners](https://www.python.org/about/gettingstarted/)
 * [The Python Tutorial](https://docs.python.org/3/tutorial/index.html)
 * [Python Documentation](https://docs.python.org/3/index.html)
 * [What’s New in Python](https://docs.python.org/3/whatsnew/index.html)
 * [Python Enhancement Proposals (PEP)](https://peps.python.org/pep-0000/)

--- a/notebooks/05--NumPy_SciPy.ipynb
+++ b/notebooks/05--NumPy_SciPy.ipynb
@@ -814,7 +814,8 @@
   },
   "source": [
    "* performance is problem- and size-dependent\n",
-    "* $\\to$ may be faster, but not necessarily!"
+    "* $\\to$ may be faster, but not necessarily!\n",
+    "* keep mutability in mind!"
   ]
  },
  {
@@ -1788,7 +1789,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.10"
  },
  "rise": {
   "enable_chalkboard": true,

 %% Cell type:markdown id: tags:

 # NumPy and SciPy

 **Python for HPC**

 Max Planck Computing and Data Facility, Garching

 %% Cell type:markdown id: tags:

 ## Outline

 * NumPy
 * SciPy examples
 * Input/output with NumPy arrays
 * Bonus: Image processing examples

 %% Cell type:markdown id: tags:

 ## Performance Problem - Loops

 * Looping in Python (and most interpreted languages) can be **very expensive**
 * Numerical codes perform mostly array computation
 * Example: Discrete Fourier Transform

 $$ X_k = \sum_{n=0}^{N-1}x_n\cdot e^{\frac{-2i\pi}{N}kn} $$

 %% Cell type:code id: tags:

 ``` python
 import cmath
 import random

 def DFT_loop(x):
    N = len(x)
    X = [0 for n in range(N)]
    for k in range (N):
        for n in range(N):
            X[k] += x[n]*cmath.exp(-2j * cmath.pi * k * n / N)
    return X

 X = [random.random() for i in range(512)]
 F = DFT_loop(X)
 ```

 %% Cell type:markdown id: tags:

 ### Solution - Numpy Arrays

 <div class="row">
  <div class="col-md-6" markdown="1">

 * Possible solution: turn loop coefficients into arrays and then use linear algebra operations
 * <span style="color:red">Larger memory footprint</span>, but <span style="color:green">higher performance</span>
 * much faster than loops

  </div>
  <div class="col-md-6" markdown="1">
      <center>
        <img src="fig/numpy_fft.svg", style="width: 85%">
      </center>
  </div>
 </div>

 $$ X_k = \sum_{n=0}^{N-1}x_n\cdot e^{\frac{-2i\pi}{N}kn} $$

 %% Cell type:code id: tags:

 ``` python
 import numpy as np

 def DFT_arrays(x):
    x = np.asarray(x, dtype=float)
    N = x.shape[0]
    n = np.arange(N)
    k = n.reshape((N, 1))
    M = np.exp(-2j * np.pi * k * n / N)
    return np.dot(M, x)

 X = np.random.random(512)
 F = DFT_arrays(X)
 ```

 %% Cell type:markdown id: tags:

 ### Better Solution - External Call


 * low-level external libraries will almost always be *much* better!
 * calls highly optimized low level (e.g. C, Fortran) functions with OpenMP, cache-hit, vectorization, etc
 * NumPy and SciPy already interface FFT (and many other) libraries
 * How does the performance compare?

 $$ X_k = \sum_{n=0}^{N-1}x_n\cdot e^{kn(-2i\pi /N)} $$

 %% Cell type:code id: tags:

 ``` python
 X = np.random.random(512)

 %timeit Fl = DFT_loop(X)
 %timeit Fa = DFT_arrays(X)
 %timeit Fe = np.fft.fft(X)
 ```

 %% Cell type:markdown id: tags:

 ## Bread & Butter: NumPy & SciPy

 * NumPy:
  * efficient handling of array operations via NumPy arrays
  * executes loops in compiled C code (*much* faster!)

 * SciPy:
  * more advanced mathematical and numerical routines
  * provides several flavours of (sparse) matrix computation

 * Both make calls to BLAS and LAPACK routines
  * `numpy.linalg` and `scipy.linalg`

 %% Cell type:markdown id: tags:

 ## NumPy arrays
 * [NumPy N-dimensional array](https://numpy.org/doc/stable/reference/arrays.ndarray.html) `ndarray`
    * fundamental container class to store items of the same type and size
    * plain C array with the plain values linear in memory
    * preserves the intuitive syntax of Python
    * provides numerous methods for math operations on arrays
 * great opportunity for performance optimization of (math) operations on NumPy arrays
    * may use CPU performance features such as prefetching, caching, vectorization
    * may call high-performance libraries such as MKL (Math Kernel Library by Intel), Lapack, BLAS under the hood

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 my_array = np.array([[1,2,3,4], [5,6,7,8]])
 print(my_array)
 print(type(my_array))
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Constructors

 <br>

 * often require "shape" tuple (the parenthesis are mandatory)
 * accepts multi-dimension

 %% Cell type:code id: tags:

 ``` python
 x = np.zeros((3,4))
 x = np.ones((2,3,4))
 x = np.full((2,2),7.2)

 # random arrays
 x = np.random.random((2,2))      # 2x2 matrix with random reals
 x = np.random.randint(10,size=5) # 5 random integers between 0 and 9

 # use uninitialized memory
 x = np.empty((3,2))

 # start, stop, stride
 x = np.arange(10,55,5)
 print("Discrete Range:" + str(x))

 # first element, last element, total number of elements
 x = np.linspace(0,1,9)
 print("Continuous Range:" + str(x))
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Datatypes & Compatibility


 * datatypes map to native C types
 * datatype can be specified via the `dtype=` parameter of  constructor
 * operations automatically <span style="color:red">cast compatible types</span>
 * defaults (in most Linux/x86_64): `np.float64` and `np.int64`
 * https://docs.scipy.org/doc/numpy/user/basics.types.html

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 x = np.array([1,2,3])
 y = np.array([1.,2.,3.])
 print("x and y type: %s and %s" % (x.dtype, y.dtype))

 r = x+y
 print("x+y type: %s" % (r.dtype))

 z = np.array([1+2j,2+3j,3+4j])
 sz = z.astype(np.complex64)
 print("z and sz type: %s and %s" % (z.dtype, sz.dtype))
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Functions


 Arrays have a large selection of predefined operations. There are too many to list here!

 %% Cell type:code id: tags:

 ``` python
 x = np.random.randint(10,size=8)
 print("* Unsorted array: ", x)
 x.sort()
 print("* Sorted array: ", x)

 # compute some properties
 y = np.random.random((10,3))
 print("* Max, min, mean, std, sum: ", y.max(), y.min(), y.mean(), y.std(), y.sum())

 # may also specify over which axis
 print("* Max of each column: ", y.max(axis=0))
 print("* Max of each row: ", y.max(axis=1))

 # comparing two arrays
 z = y + 1e-16
 print("* Equal: ", np.array_equal(y,z))
 print("* Close: ", np.allclose(y, z, 1e-8))
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Functions Performance

 * prefer to use *built-in* functions if they are there: <span style="color:green">faster</span> and <span style="color:green">cleaner</span>

 %% Cell type:code id: tags:

 ``` python
 # use numpy functions
 def mean_loop(a):
    result = 0
    for i in range(len(a)):
        result += a[i]
    return result/len(a)

 x = np.arange(2**20)

 %timeit mean_loop(x)
 %timeit np.mean(x)
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Attributes

 * rather advanced
 * useful if you are writing interfaces between Python and C/Fortran
 * useful for <span style="color:green">asserting contiguity</span> of data

 %% Cell type:code id: tags:

 ``` python
 print(y.ndim)  # number of dimensions
 print(y.size)  # number of elements
 print(y.dtype) # information about the data type
 print(y.flags) # information about memory layout
 print(y.itemsize) # size of one array element in bytes
 print(y.nbytes) # total size of the array in bytes
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Indexing

 * syntax identical to standard python: `x[sel]`
 * does <span style="color:red">not guarantee contiguity</span> of data
 * multidimensional indexing `x[sel1, sel2, sel3]` shorthand for `x[(sel1, sel2, sel3)]`

 %% Cell type:markdown id: tags:

 ### Numpy Arrays - Basic Indexing

 * single element indexing:
  * `x[0]`
  * `x[0,1]` (equivalent to `x[0][1]` but more efficient!)

 * slicing and striding
  * selection obj is a `slice` (`start:stop:step`) or tuple of `slice`, integers, `newaxis`, `Ellipsis`

 * basic indexing generally creates <span style="color:red">views</span> into the original array

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(20)

 # basic slice syntax as for python lists is start:end:step
 # for multidimension [start1:end1:step1, start2:end2:step2, ...]
 print("From 2 to 9, in steps 2:", x[2:9:2])
 print("All except 3 last elements:", x[:-3])

 # revert array
 print("Backwards:", x[::-1])

 # Reshape into multi-D arrays
 y = x.reshape((5,4))
 print("Shape of y:", y.shape)
 print("Slice of y:", y[1:,:2])

 # new dimensions can be added
 z = np.arange(3)
 print("z:")
 print(z[:,np.newaxis])
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Broadcasting

 * some dimension conversion is automatic, some are not!
 * careful with <span style="color:red">readability</span> of the code

 %% Cell type:markdown id: tags:

 <center>
    <img src="fig/numpy_broadcast.svg" style="width: 60%">
 </center>

 %% Cell type:markdown id: tags:

 ### NumPy Arrays - Broadcasting

 * careful with <span style="color:red">readability</span> of the code

 %% Cell type:code id: tags:

 ``` python
 X = np.arange(12).reshape(4,3)
 Y = np.arange(3)/10
 Z = np.arange(4)/10
 print("X is a 4x3 matrix:\n",X)

 print("\nX+Y is valid:\n",X+Y)

 # X+Z is invalid! We need to add a new dimension:
 print("\nX+Z[:,None] is valid:\n",X+Z[:,None])
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Array - Advanced Indexing


 * the selection object (as in `x[obj]`) can be:
    * a non-tuple sequence object (i.e., list or range) or ndarray of indices
    * or a tuple with at least one non-tuple squence object or ndarray
    * a boolean array $\to$ must have same dimensions

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(10)*10
 print(x)

 # use integer list as indices
 print("X with integer indices: ", x[[2,5,8]])
 print("X with boolean indices: ", x[[False, False, True, False, False, True, False, False, True, False]])

 # create boolean array
 y = x**2 - 750
 index = (x < y)
 print(index)
 print("X with boolean indices: ", x[index])
 print("X with negated indices: ", x[~index])
 print("X with oneliner: ", x[x<y])

 # Set x to 100 for y below 10 - equivalent to:
 # for i in range(len(x)):
 #     if y[i]<10:
 #         x[i] = 100
 # Indexing can be used to assign values to indexed arrays
 x[x < y] = 100
 print("Oneliner x: ", x)
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays vs Views

 * Basic slicing on arrays usually returns a view on the original array
 * Data is **not** copied $\to$ changing the original array also changes the view
 * Views may be <span style="color:red">non-contiguous data</span>
 * use `copy()` for copying the data over
 * Advanced indexing directly returns a copy of the indexed data

 %% Cell type:code id: tags:

 ``` python
 x = np.arange(15)
 print("Array x: ", x)
 y = x[1:8:2]
 print("View y: ", y)
 print(y.flags)
 x[1] = 20
 print("View y after changing x: ", y)
 y[0] *= 10
 print("Original x after changing view y: ", x)

 c = y.copy()
 print("Copy of y: ", c)
 print(c.flags)
 ```

 %% Cell type:markdown id: tags:

 ### NumPy Arrays vs Views: Performance

 * strided data access may <span style="color:red">hinder performance</span>.
 * temporary array **may** be preferable, but: additional cost due to copy!
 * careful with passing views as argument

 %% Cell type:code id: tags:

 ``` python
 import numpy as np

 def sum_test(c,x,y):
    # Computes c*x^2 + y
    if len(c) == len(x) == len(y):
        return c*x*x + y
    else:
        print("Wrong shape")
        return None

 def sum_test_copy(c,x,y):
    copy = x.copy()
    return sum_test(c,copy,y)

 x = np.random.random(1000000)
 y = np.random.random(100000)
 c = np.random.random(100000)
 view = x[::10]
 copy = view.copy()

 %timeit res = sum_test(c,view,y)
 %timeit res = sum_test(c,copy,y)
 %timeit res = sum_test_copy(c,view,y)
 ```

 %% Cell type:markdown id: tags:

 ### Example 1:  Midpoint Computation

 $$y_i = \dfrac{x_{i}+x_{i+1}}{2}, \qquad \forall i<N$$

 %% Cell type:code id: tags:

 ``` python
 def midpoints_list(x):    # worst implementation
    y = []
    for i in range(len(x)-1):
        y.append(0.5*(x[i] + x[i+1]))
    return np.array(y)

 def midpoints_loop(x):    # loop forcing numpy array
    N = len(x)
    y = np.zeros(N-1)
    for i in range(N-1):
        y[i] = 0.5*(x[i] + x[i+1])
    return y

 def midpoints_array(x):   # best: no loops at all
    return 0.5*(x[:-1] + x[1:])

 x = np.linspace(0, 10, 10**6)
 %timeit midpoints_list(x)
 %timeit midpoints_loop(x)
 %timeit midpoints_array(x)
 ```

 %% Cell type:markdown id: tags:

 ### Example 1:  Midpoint Computation Explanation

 $$y_i = \dfrac{x_{i}+x_{i+1}}{2}, \qquad \forall i<N$$

 <br>

 * Loop ```for i in range(N-1):``` to left-hand side ```y[0:N-1]```
 * Translate indices on right-hand side to ranges for array
    * ```x[i]``` to ```x[0:N-1]``` and  ```x[i+1]``` to ```x[1:N]```

 %% Cell type:markdown id: tags:

 <center>
    <img src="fig/numpy-indexing.svg", style="width: 50%">
 </center>

 %% Cell type:markdown id: tags:

 ### Example 2: In-Place Manipulations


 * `x[1:] + x[:-1]` will create a temporary array
 * we can avoid it, but does that pay off?

 %% Cell type:code id: tags:

 ``` python
 def temp_arrays(x):
    return 0.5*(x[1:] + x[:-1])

 def in_place(x):
    y = x[1:]
    y += x[:-1]
    y *= 0.5
    return y

 x = np.random.rand(2**15)
 %timeit temp_arrays(x)
 %timeit in_place(x)
 ```

 %% Cell type:markdown id: tags:

 * performance is problem- and size-dependent
 * $\to$ may be faster, but not necessarily!
+* keep mutability in mind!

 %% Cell type:markdown id: tags:

 ### Example 3: Center of Mass
 * For positions $\vec{x}_i$ ($j$-th coordinate $x_{i,j}$), masses $m_i$:
 $$ \vec{x}_\mathsf{com} = \frac{1}{M} \sum_{i=0}^N m_i \vec{x}_i, \qquad\text{where}\qquad M=\sum_i m_i$$

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 def center_of_mass_loop(pos, mass):
    com = np.zeros(3)
    for j in range(3):
        for i in range(pos.shape[0]):
            com[j] += pos[i, j] * mass[i]
    total_mass = 0.0
    for i in range(pos.shape[0]):
        total_mass += mass[i]
    for j in range(3):
        com[j] /= total_mass
    return com

 def center_of_mass_arrays(pos, mass):
    com = np.zeros(3)
    for j in range(3):
        com[j] = np.sum(pos[:, j]*mass[:])
    total_mass = np.sum(mass)
    com /= total_mass
    return com

 def center_of_mass_dot(pos, mass):
    return np.dot(mass, pos)/np.sum(mass)

 # positions and masses of particles
 N = 10**5
 pos = np.random.rand(N, 3)
 mass = np.random.rand(N)

 %timeit center_of_mass_loop(pos, mass)
 %timeit center_of_mass_arrays(pos, mass)
 %timeit center_of_mass_dot(pos, mass)
 ```

 %% Cell type:markdown id: tags:

 ## NumPy Summary


 * Takeaway messages:
    1. High level manipulations $\to$ done in Python
    2. Intensive computation $\to$ handled by underlying libraries
    3. In case no dedicated library exists $\to$ use NumPy arrays and their functions

 <br>

 * Tradeoff: high development productivity versus performance
 * NumPy code *can be* as fast as C or Fortran code, but often it is a factor of 2-4 slower
 * Outlook: Implement hotspots in C/Fortran + Python interface, or use just-in-time compilation (Numba, jax)

 %% Cell type:markdown id: tags:

 # SciPy


 * SciPy is a mathematical toolbox built on NumPy, providing functions for
    * FFT
    * integration
    * interpolation
    * linear algebra
    * sparse matrix operations
    * optimization
    * special functions (e.g. Bessel functions)
 * ... and many more
 * most of them delegate <span style="color:red">heavy work</span> to C/Fortran libraries
 * [Documentation](https://docs.scipy.org/doc/scipy/reference/)

 %% Cell type:markdown id: tags:

 ## SciPy Example 1: FFT & Power Spectrum

 * calculate and plot the power spectrum of a signal

 %% Cell type:code id: tags:

 ``` python
 # power spectrum (original code taken from the SciPy tutorial)
 import numpy as np
 from scipy.fft import fft, fftfreq
 import matplotlib.pyplot as plt
 N = 600  # number of sample points
 T = 1.0 / 800.0  # sample spacing
 x = np.linspace(0.0, N*T, N)
 y = np.sin(50.0 * 2.0*np.pi*x) + 0.5*np.sin(80.0 * 2.0*np.pi*x)

 yf = fft(y)
 xf = fftfreq(N, T)

 plt.plot(xf[0:N//2], 2.0/N * np.abs(yf[0:N//2]))
 plt.grid()
 ```

 %% Cell type:markdown id: tags:

 ## SciPy Example 2: Integration
 * Numerical integration of
    * a given function object
    * a given sample of function values
    * ODEs (ordinary differential equations)

 %% Cell type:code id: tags:

 ``` python
 import scipy.integrate as integrate
 result = integrate.quad(lambda x: x**2, 0, 2.5) # uses Fortran's QUADPACK
 print(result[0], 2.5**3/3.)

 x = np.linspace(0, 2.0, 10)
 y = np.sin(x)
 result = integrate.simps(y, x) # Simpson rule for given sample
 print(result, 1.-np.cos(2.0))

 # simple derivative: exponential
 def yprime(y, t, alpha):
    return np.exp(alpha*t)*alpha

 ts = np.linspace(0, 2.0, 50)
 y0 = 1.0
 y = integrate.odeint(yprime, y0, ts, args=(2.0,))  # uses Fortran's ODEPACK
 print(y[-1], np.exp(2.0 * ts[-1])) # compare to analytical result
 ```

 %% Cell type:markdown id: tags:

 ## SciPy Example 3: Sparse vs Dense Linear Systems

 * dense: `scipy.linalg`, sparse: `scipy.sparse.linalg`
 * sparse matrix operations can be much faster, depending on the density of matrix

 %% Cell type:code id: tags:

 ``` python
 from scipy import sparse
 from scipy import linalg
 import scipy.sparse.linalg as splinalg
 import numpy as np

 def compare_density(density, n=1000):
    b = np.random.random(n)
    A1 = sparse.random(n, n, density=density, format='csr')
    A1 += sparse.eye(n,n, format='csr')
    print("Density: ", density)
    print("Sparse: ")
    %timeit splinalg.spsolve(A1, b)
    A2 = A1.todense()
    print("Dense: ")
    %timeit linalg.solve(A2, b)

 # Density=0.001: sparse is faster, Density=0.01: dense is faster
 compare_density(0.01)
 compare_density(0.001)
 ```

 %% Cell type:markdown id: tags:

 ## SciPy Example 4: Interpolation
 * Interpolation of 1D and multi-D data (structured grid and unstructured points)
 * Splines and other polynomials

 %% Cell type:code id: tags:

 ``` python
 import scipy.interpolate as interpolate
 x = np.linspace(0, 10.0, 12)
 y = np.sin(x)
 y_int1 = interpolate.interp1d(x, y)
 y_int2 = interpolate.PchipInterpolator(x, y)

 import matplotlib.pyplot as plt
 plt.figure(figsize=(20,6))
 plt.subplot(1,2,1)
 plt.plot(x, y)
 x2 = np.linspace(0, 10.0, 50)
 plt.plot(x2, y_int1(x2))

 plt.subplot(1,2,2)
 plt.plot(x, y)
 im = plt.plot(x2, y_int2(x2))
 ```

 %% Cell type:markdown id: tags:

 ## NumPy/SciPy Summary

 * NumPy: efficient handling of arrays using the fundamental `ndarray` object and methods
    * avoid explicit loops over elements, rather express loops using NumPy array operations
    * various other implementations of `ndarray` exist, e.g. for GPUs (CuPy, jax), distributed parallelization (Dask), or IO (h5py)
 * SciPy: more advanced mathematical and numerical routines; uses NumPy under the hood *plus* other C/F libraries
 * Performance tips:
    * Work on full arrays (slicing, NumPy routines...)
    * identify hotspots and optimize them
        * profile
        * code in Cython or C/Fortran + Python interface -- or try just-in-time compilation using Numba or jax

 %% Cell type:markdown id: tags:

 ## Futher reading

 * Harris, C.R., Millman, K.J., van der Walt, S.J. et al. *Array programming with NumPy.* **Nature** 585, 357–362 (2020).  (https://doi.org/10.1038/s41586-020-2649-2)
 * Bressert, E. (2012). SciPy and NumPy (1st edition.). O'Reilly Media, Inc.  (https://ebooks.mpdl.mpg.de/ebooks/Record/EB001944176)
 * There are numerous books on the topic available: https://ebooks.mpdl.mpg.de/ebooks/Search/Results?type=AllFields&lookfor=numpy

 (MPG.eBooks work from any Max Planck IP address.)

 %% Cell type:markdown id: tags:

 # Numpy Input and Output
 ## Reading and writing NumPy arrays
 Possibilities:
 * NumPy's native IO facilities
 * HDF5 using H5PY

 %% Cell type:markdown id: tags:

 ## Native NumPy Input/Output
 ### Saving NumPy arrays to files

 %% Cell type:code id: tags:

 ``` python
 import os
 import numpy as np

 x = np.linspace(0.0,100.0,101)

 # save to text file (for debugging and small files only)
 np.savetxt("x.txt", x)
 print("savetxt: ", os.path.getsize('x.txt'), "bytes")

 # save to binary file, suffix ".npy"
 np.save("x", x)
 print("save: ", os.path.getsize('x.npy'), "bytes")

 y = np.linspace(0.0,100.0,201)
 # save several arrays to binary file, suffix ".npz", preserve variable names via kwargs
 np.savez("xy", x=x, y=y)
 print("savez: ", os.path.getsize('xy.npz'), "bytes")

 np.savez_compressed("xy_compressed", x=x, y=y)
 print("savez_compressed: ", os.path.getsize('xy_compressed.npz'), "bytes")
 ```

 %% Cell type:markdown id: tags:

 ### Reading NumPy arrays from files

 %% Cell type:code id: tags:

 ``` python
 import numpy as np

 #x = np.loadtxt("x.txt")
 x = np.load("x.npy")
 print("type of x:", type(x))
 xy = np.load("xy.npz")
 print("type of xy:", type(xy))

 # individual NumPy arrays in xy are accessible via a dict-style interface
 print("Keys of xy: ", tuple(xy.keys()))
 print("x and xy['x'] are the same: ", np.allclose(x, xy["x"]))
 print("y and xy['y'] are the same: ", np.allclose(y, xy["y"]))
 ```

 %% Cell type:markdown id: tags:

 ## Native NumPy Input/Output
 * Useful for
    * Quickly saving results (e.g. from interactive sessions)
    * Caching results from expensive calculations locally
    * Data that is read only in the same way
 * Note
    * Binary portability (endianness) across systems is supported
 * Problem
    * Not easily accessible from other programs

 %% Cell type:markdown id: tags:

 ## HDF5 using H5PY

 %% Cell type:markdown id: tags:

 ### HDF5 in a nutshell
 * HDF5 is a hierarchichal data format specification and library implementation
 * HDF5 contains data plus metadata, i.e. it is a self-describing format
 * standard and custom datatypes
 * filters (e.g. checksums, compression, scale offset)
 * parallel IO supported, platform independent
 * claimed to be future proof
 * recommended and widely used in HPC by simulation codes, analysis and visualization tools (e.g. VisIt, ParaView, IDL, Matlab)

 (cf. https://support.hdfgroup.org/HDF5/doc/H5.intro.html)

 %% Cell type:markdown id: tags:

 ### HDF5 schematic

 * tree structure with groups, datasets, and attributes, some analogy to a file system with directories and files
 ![HDF5 schematic](fig/hdf5_structure4.jpg)

 %% Cell type:markdown id: tags:

 ## H5PY: HDF5 bindings for Python
 * easy to use Pythonic high-level interface
    * dictionary syntax (dataset access by name)
    * NumPy array syntax (indexing, slicing, etc.)
    * metadata access: use `attrs` attribute
 * designed from the beginning to integrate well with NumPy
 * low-level Cython interface to the HDF5 C API is available

 %% Cell type:markdown id: tags:

 ### Write a NumPy array to HDF5

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import h5py

 M = np.random.rand(128,128)

 with h5py.File("data.hdf5", 'w') as fp:
    # a HDF5 file always has a root group, "/" which we use implicitly here
    dset = fp.create_dataset("random_matrix", data=M)
    # add metadata (attributes) describing the dataset via the 'attrs' proxy object
    dset.attrs["whatis"] = "This is a randomly generated 128x128 matrix."

    # or even easier
    fp["random_matrix2"] = M
    fp["random_matrix2"].attrs["whatis"] = "This is another randomly generated 128x128 matrix."
 ```

 %% Cell type:markdown id: tags:

 $\to$ use `with` to ensure file is closed and written!

 %% Cell type:markdown id: tags:

 ### Access a HDF5 dataset

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import h5py

 with h5py.File("data.hdf5", 'r') as fp:
    print("Available keys:", tuple(fp.keys()))

    # retrieve the dataset
    dset = fp["random_matrix"]
    print(type(dset), dset.shape, dset.dtype)

    # retrieve the metadata that comes with the dataset
    for key,val in dset.attrs.items():
        print("{} : {}".format(key, val))

    # dset can be used similarly to a NumPy array
    elem = dset[0,0]  # indexing, slicing
    dsum = np.sum(dset)  # not dset.sum()!

    # explicit conversion to NumPy array
    M = dset[:]
    print(type(M), M.shape, M.dtype)
    msum = M.sum()

 print("Sums are the same:", dsum == msum)
 ```

 %% Cell type:markdown id: tags:

 ### Parallel Access (Preview)

 %% Cell type:code id: tags:

 ``` python
 from mpi4py import MPI
 import h5py

 rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)

 f = h5py.File('parallel_test.hdf5', 'w', driver='mpio', comm=MPI.COMM_WORLD)

 dset = f.create_dataset('test', (4,), dtype='i')
 dset[rank] = rank

 f.close()
 ```

 %% Cell type:markdown id: tags:

 ## Advanced usage of HDF5-IO on HPC systems
 * checkpointing/resuming of long-running jobs
    * write current state of simulation to HDF5 file
    * resume simulation by reading back-in that state
    * upcoming optional exercise: diffusion example
 * large-scale parallel IO from multiple MPI processes
    * write few, large files from each process
      limit the number of files per directory (max. ~1000)
    * write to a single file using MPI-enabled `h5py` (MPCDF environment module `h5py-mpi`)

 %% Cell type:markdown id: tags:

 ## Summary on HDF5 IO of NumPy data

 * easy to use with `h5py` (alternative module would be `pytables`)
 * hierarchical structure enables you to store complex data in a single file
 * HDF5 files are future-proof and portable data files
 * data can be shared & used by other programs

 $\to$ our recommendation for NumPy-related I/O!

 Further reading:

 * Collette, A. (2013). Python and HDF5 (1st edition.). O'Reilly Media, Inc.
  (https://ebooks.mpdl.mpg.de/ebooks/Record/EB001941719)

 %% Cell type:markdown id: tags:

 # Application: image processing

 %% Cell type:markdown id: tags:

 ## Example 1: Smooth Images
 ![image filter](fig/image-filter-example.svg)

 %% Cell type:markdown id: tags:

 ## Smoothing with image filters
 * Goal: subtract noise
 * Use filtering approach
 * For each pixel, at $i, j$, compute:
 $$\tilde{f}_{i,j} = \alpha f_{i,j} + \frac{(1 - \alpha)}{4} (f_{i-1,j} + f_{i+1,j} + f_{i,j-1} + f_{i,j+1})$$
 * Mix pixel with average of surrounding pixels $\to$ *filter out high-frequency noise*
 * Can also be rewritten using the discrete Laplace operator:
 $$\tilde{f}_{i,j} = f_{i,j} + \frac{(1 - \alpha)}{4} \Delta f_{i,j}$$
 * Strongest smoothing for $\alpha = 0$, no smoothing for $\alpha = 1$

 %% Cell type:markdown id: tags:

 ### Image: alpha Centauri
 * Image from DSS (for copyright see http://archive.stsci.edu/dss/acknowledging.html)

 %% Cell type:code id: tags:

 ``` python
 import numpy as np
 import matplotlib.pyplot as plt
 # convert to float between 0 and 1
 image = plt.imread("fig/dss_alpha_centauri.gif").astype(np.float64)/255.
 plt.imshow(image, cmap='gray')
 print("Shape: ", image.shape)
 ```

 %% Cell type:markdown id: tags:

 ## Loops in python vs. numpy arrays

 %% Cell type:code id: tags:

 ``` python
 def get_filtered_loop(image, alpha=0.25):
    filtered = np.empty_like(image)
    N, M = image.shape
    for i in range(1, N-1):
        for j in range(1, M-1):
            filtered[i, j] = 0.25*(1.0-alpha)*(image[i-1, j] + image[i+1, j]
                                               + image[i, j-1] + image[i, j+1]) \
                + alpha*image[i, j]
    return filtered
 %timeit get_filtered_loop(image)
 ```

 %% Cell type:code id: tags:

 ``` python
 def get_filtered_array(image, alpha=0.25):
    filtered = np.empty_like(image)
    N, M = image.shape
    filtered[1:N-1, 1:M-1] = 0.25*(1.0-alpha)*(image[0:N-2, 1:M-1] + image[2:N, 1:M-1]
                                               + image[1:N-1, 0:M-2] + image[1:N-1, 2:M]) \
        + alpha*image[1:N-1, 1:M-1]
    return filtered
 %timeit get_filtered_array(image)
 ```

 %% Cell type:markdown id: tags:

 $\to$ roughly factor $\mathcal{O}(10 - 100)$ faster!

 %% Cell type:markdown id: tags:

 ## Transformation step by step
 * Transform for loops to indices on left hand side (same range)
 * From:
 ```python
 for i in range(1, N-1):
    for j in range(1, M-1):
 ```
 To:
 ```python
 filtered[1:N-1, 1:M-1]
 ```
 * Shift indices on right hand side: `i-1` $\to$ `0:N-2`, `i` $\to$ `1:N-1`, `i+1` $\to$ `2:N`
 * E.g.: `image[i-1, j]` $\to$ `image[0:N-2, 1:M-1]`
 * Similar for `j`

 %% Cell type:code id: tags:

 ``` python
 filtered_loop = get_filtered_loop(image)
 filtered_array = get_filtered_array(image)

 f, axs = plt.subplots(2, 3, figsize=(14, 8))
 axs[0, 0].imshow(image, cmap='gray')
 axs[0, 1].imshow(filtered_array, cmap='gray')
 axs[0, 2].imshow(filtered_array - filtered_loop, cmap='gray')
 axs[1, 0].imshow(image[600:700, 600:700], cmap='gray')
 axs[1, 1].imshow(filtered_array[600:700, 600:700], cmap='gray')
 axs[1, 2].imshow((image-filtered_array)[600:700, 600:700], cmap='gray')
 print("Loop and array method yields the same: ", np.allclose(filtered_loop, filtered_array))
 ```

 %% Cell type:markdown id: tags:

 ## Example 2: Get edges
 * Use the following formula:
 $$\tilde{f}_{i,j} = 8 f_{i,j} - (f_{i-1,j} + f_{i+1,j} + f_{i,j-1} + f_{i,j+1} + f_{i-1,j-1} + f_{i-1,j+1} + f_{i+1,j+1} + f_{i+1,j-1})$$
 * Subtract mean of surrounding cells
 * Also use diagonals
 * Truncate results to a certain percentile

 %% Cell type:code id: tags:

 ``` python
 from scipy.misc import ascent
 im = plt.imshow(ascent(), cmap='gray')
 ```

 %% Cell type:markdown id: tags:

 ### Our implementation

 %% Cell type:code id: tags:

 ``` python
 def get_edges(image, percentiles=(25, 99)):
    result = np.empty_like(image)
    N, M = image.shape
    result[1:N-1, 1:M-1] = -1.0*(image[0:N-2, 1:M-1] + image[2:N, 1:M-1]
                                 + image[1:N-1, 0:M-2] + image[1:N-1, 2:M]
                                 + image[0:N-2, 0:M-2] + image[0:N-2, 2:M]
                                 + image[2:N, 0:M-2] + image[2:N, 2:M])     + 8.*image[1:N-1, 1:M-1]
    # now truncate results
    fmin, fmax = np.percentile(result, percentiles)
    result[result < fmin] = fmin
    result[result > fmax] = fmax
    return result
 f, axs = plt.subplots(1, 2, figsize=(15, 5))
 axs[0].imshow(ascent(), cmap='gray')
 axs[1].imshow(get_edges(ascent()), cmap='gray');None
 ```

 %% Cell type:markdown id: tags:

 ### Predefined filter

 %% Cell type:code id: tags:

 ``` python
 from PIL import Image, ImageFilter
 img = Image.fromarray(ascent().astype("int32"), mode="I").convert("L")
 filtered_img = img.filter(ImageFilter.FIND_EDGES)

 f, axs = plt.subplots(1, 2, figsize=(15, 5))
 axs[0].imshow(ascent(), cmap='gray')
 axs[1].imshow(np.asarray(filtered_img), cmap='gray');None
 ```

 %% Cell type:markdown id: tags:

 $\to$ results very similar!