From f51e44fdc24935fe30e1726ae437ee5d36e0b5ca Mon Sep 17 00:00:00 2001 From: Repo Updater <noreply@mpcdf.mpg.de> Date: Mon, 24 Jul 2023 18:12:08 +0200 Subject: [PATCH] 529e2818 further polishing of Python recap --- notebooks/01--Introduction.ipynb | 2 +- notebooks/03--Python_Refresher.ipynb | 272 ++++++++++--------- notebooks/05--NumPy_SciPy.ipynb | 173 +++--------- notebooks/09--Interfacing_with_C_and_F.ipynb | 6 +- 4 files changed, 175 insertions(+), 278 deletions(-) diff --git a/notebooks/01--Introduction.ipynb b/notebooks/01--Introduction.ipynb index 85ea3af..2b361ea 100644 --- a/notebooks/01--Introduction.ipynb +++ b/notebooks/01--Introduction.ipynb @@ -120,10 +120,10 @@ "source": [ "### *Python for HPC* complements advanced HPC courses\n", "* We cannot cover traditional HPC topics in depth, and would highly recommend the following courses\n", + " * Node-level performance engineering\n", " * Parallel programming with MPI\n", " * Parallel programming with OpenMP\n", " * Advanced parallel programming with OpenMP and MPI\n", - " * Node-level performance engineering\n", "* Watch out for these courses which are regularly offered by HLRS, LRZ, NHR/RRZE" ] }, diff --git a/notebooks/03--Python_Refresher.ipynb b/notebooks/03--Python_Refresher.ipynb index 4923dff..21f5600 100644 --- a/notebooks/03--Python_Refresher.ipynb +++ b/notebooks/03--Python_Refresher.ipynb @@ -27,7 +27,7 @@ "source": [ "### Python: History and Status\n", "* First version released in 1991 by G. van Rossum\n", - "* Implementations: **cPython**, PyPy, Pyston, ...\n", + "* [Implementations](https://wiki.python.org/moin/PythonImplementations): **cPython**, PyPy, Pyston, ...\n", "* Language versions: 2.7 (legacy, ✞2020), now 3.8 - 3.11 \n", " (to migrate legacy code, the packages `2to3`, `six`, `future` are helpful)" ] @@ -41,6 +41,7 @@ }, "source": [ "### Python Language Key Features\n", + "* high-level language, extensive standard library and ecosystem\n", "* no type declarations, types are tracked at runtime \n", " $\\rightarrow$ code is interpreted in most implementations (or just-in-time compiled) \n", " $\\rightarrow$ certain [compiler optimizations](https://en.wikipedia.org/wiki/Optimizing_compiler) are not possible $\\rightarrow$ performance overhead\n", @@ -84,14 +85,15 @@ }, "source": [ "#### Jupyter notebooks\n", - "* web-based interactive development environment\n", + "* web-based interactive development environment and document\n", + "* code, figures and documentation can be mixed in the same notebook\n", + " * plot via matplotlib, document using markdown, typeset formulas via LaTeX\n", "* code blocks are organized in cells\n", " * press \"Shift + Enter\" to execute the current cell\n", " * select \"Cell\" $\\to$ \"Run all\" from the menu to run the complete notebook\n", - "* code, figures and documentation can be mixed in the same notebook\n", - " * plot via matplotlib, document using markdown, typeset formulas via LaTeX\n", - "* locally, launch via `jupyter notebook` or `jupyter lab` from a terminal, then work in your browser\n", "* notebook export to Python source file works via \"File\" $\\to$ \"Download as\"\n", + "* [papermill](https://papermill.readthedocs.io/en/latest/) allows to run notebooks in 'headless' mode without user interaction (e.g. in a batch job)\n", + "* locally, launch via `jupyter notebook` or `jupyter lab` from a terminal\n", "* https://jupyter.org" ] }, @@ -464,8 +466,8 @@ "* branching \n", " `if` ... `elif` ... `else`\n", "* loops\n", - " * `for`\n", - " * `while`\n", + " * `for` $-$ used to repeat a block of code a fixed number of times, used in combination with an iterable object (`range()`, list, etc.)\n", + " * `while` $-$ used to repeat a block of code as long as a condition is satisfied\n", "* loop modifier statements\n", " * `break`\n", " * `continue`\n", @@ -992,113 +994,6 @@ "sum(first_n_sq_gen_expr)" ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### Basic file IO" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "# naive way: write to a text file\n", - "file_name = \"/tmp/dummy.txt\"\n", - "fh = open(file_name, 'w')\n", - "fh.write(\"Hello\\n\")\n", - "fh.close()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "# better: write to a text file, close file handle implicitly (ContextManager)\n", - "with open(file_name, 'a') as fh:\n", - " fh.write(\"World\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "skip" - } - }, - "source": [ - "#### Background information on the `with` statement and context managers\n", - "\n", - "* https://docs.python.org/3/reference/compound_stmts.html#the-with-statement\n", - "* https://docs.python.org/3/reference/datamodel.html#with-statement-context-managers\n", - "* https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Hello\\n', 'World\\n']\n" - ] - } - ], - "source": [ - "# read the complete file into a list in memory\n", - "with open(file_name, 'r') as fh:\n", - " lines = fh.readlines()\n", - "print(lines)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Hello\n", - "World\n" - ] - } - ], - "source": [ - "# when reading, the file handle `fh` is actually an iterator on the lines of the file\n", - "with open(file_name, 'r') as fh:\n", - " for line in fh:\n", - " print(line, end='')" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1268,6 +1163,133 @@ " https://docs.python.org/3.11/library/index.html" ] }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "### PEP8, style guide for Python code\n", + "* PEP8 is a style guide to write clean, readable, and maintainable Python code\n", + " * indentation using 4 *spaces*\n", + " * 79 characters maximum line width\n", + " * UTF8 source file encoding\n", + " * comments, docstrings\n", + " * naming conventions\n", + " * ...\n", + "* https://www.python.org/dev/peps/pep-0008/\n", + "* convert existing code into a PEP8 compliant format using `autopep8` or `black`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "source": [ + "### Basic file IO" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "# naive way: write to a text file\n", + "file_name = \"/tmp/dummy.txt\"\n", + "fh = open(file_name, 'w')\n", + "fh.write(\"Hello\\n\")\n", + "fh.close()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "# better: write to a text file, close file handle implicitly (ContextManager)\n", + "with open(file_name, 'a') as fh:\n", + " fh.write(\"World\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "source": [ + "#### Background information on the `with` statement and context managers\n", + "\n", + "* https://docs.python.org/3/reference/compound_stmts.html#the-with-statement\n", + "* https://docs.python.org/3/reference/datamodel.html#with-statement-context-managers\n", + "* https://docs.python.org/3/library/contextlib.html#contextlib.contextmanager" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Hello\\n', 'World\\n']\n" + ] + } + ], + "source": [ + "# read the complete file into a list in memory\n", + "with open(file_name, 'r') as fh:\n", + " lines = fh.readlines()\n", + "print(lines)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Hello\n", + "World\n" + ] + } + ], + "source": [ + "# when reading, the file handle `fh` is actually an iterator on the lines of the file\n", + "with open(file_name, 'r') as fh:\n", + " for line in fh:\n", + " print(line, end='')" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1377,26 +1399,6 @@ "```" ] }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, - "source": [ - "### PEP8, style guide for Python code\n", - "* PEP8 is a style guide to write clean, readable, and maintainable Python code\n", - " * indentation using 4 *spaces*\n", - " * 79 characters maximum line width\n", - " * UTF8 source file encoding\n", - " * comments, docstrings\n", - " * naming conventions\n", - " * ...\n", - "* https://www.python.org/dev/peps/pep-0008/\n", - "* convert existing code into a PEP8 compliant format using `autopep8` or `black`" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1434,7 +1436,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/notebooks/05--NumPy_SciPy.ipynb b/notebooks/05--NumPy_SciPy.ipynb index 3bd5115..f18bc54 100644 --- a/notebooks/05--NumPy_SciPy.ipynb +++ b/notebooks/05--NumPy_SciPy.ipynb @@ -358,7 +358,7 @@ "source": [ "### NumPy Arrays - Functions Performance\n", "\n", - "* prefer to use functions if they are there: <span style=\"color:green\">faster</span> and <span style=\"color:green\">cleaner</span>" + "* prefer to use *built-in* functions if they are there: <span style=\"color:green\">faster</span> and <span style=\"color:green\">cleaner</span>" ] }, { @@ -426,10 +426,31 @@ } }, "source": [ - "### NumPy Arrays - Basic Slicing\n", + "### NumPy Arrays - Indexing\n", "\n", - "* syntax identical to lists\n", - "* does <span style=\"color:red\">not guarantee contiguity</span> of data" + "* syntax identical to standard python: `x[sel]`\n", + "* does <span style=\"color:red\">not guarantee contiguity</span> of data\n", + "* multidimensional indexing `x[sel1, sel2, sel3]` shorthand for `x[(sel1, sel2, sel3)]`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Numpy Arrays - Basic Indexing\n", + "\n", + "* single element indexing:\n", + " * `x[0]`\n", + " * `x[0,1]` (equivalent to `x[0][1]` but more efficient!)\n", + " \n", + "* slicing and striding\n", + " * selection obj is a `slice` (`start:stop:step`) or tuple of `slice`, integers, `newaxis`, `Ellipsis`\n", + " \n", + "* basic indexing generally creates <span style=\"color:red\">views</span> into the original array" ] }, { @@ -437,7 +458,7 @@ "execution_count": null, "metadata": { "slideshow": { - "slide_type": "fragment" + "slide_type": "subslide" } }, "outputs": [], @@ -923,9 +944,9 @@ }, "outputs": [], "source": [ - "# power spectrum (code taken from the SciPy tutorial)\n", + "# power spectrum (original code taken from the SciPy tutorial)\n", "import numpy as np\n", - "from scipy.fftpack import fft\n", + "from scipy.fft import fft, fftfreq\n", "import matplotlib.pyplot as plt\n", "N = 600 # number of sample points\n", "T = 1.0 / 800.0 # sample spacing\n", @@ -933,9 +954,9 @@ "y = np.sin(50.0 * 2.0*np.pi*x) + 0.5*np.sin(80.0 * 2.0*np.pi*x)\n", "\n", "yf = fft(y)\n", - "xf = np.linspace(0.0, 1.0/(2.0*T), N//2)\n", + "xf = fftfreq(N, T)\n", "\n", - "plt.plot(xf, 2.0/N * np.abs(yf[0:N//2]))\n", + "plt.plot(xf[0:N//2], 2.0/N * np.abs(yf[0:N//2]))\n", "plt.grid()" ] }, @@ -991,91 +1012,7 @@ } }, "source": [ - "## SciPy Example 3: Sparse Matrices\n", - "\n", - "\n", - "* different sparse formats for different tasks:\n", - " * CSR: compressed sparse row, for row operations/slicing\n", - " * CSC: compressed sparse column, for column operations/slicing\n", - " * LIL: list-of-lists, efficient for building the matrix, slow for most other operations\n", - " * DIA: diagonal matrix, very useful for FD\n", - " * `speye()`: sparse identity\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "### SciPy Example 3: Sparse Matrices" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "from scipy import sparse\n", - "import numpy as np\n", - "\n", - "def fill_matrix(M):\n", - " for i in range(300):\n", - " rows = np.random.randint(0,N,300)\n", - " cols = np.random.randint(0,N,300)\n", - " val = np.random.random()\n", - " M[rows,cols] = val\n", - " return M\n", - "\n", - "N = 10000\n", - "Mlil = sparse.lil_matrix((N,N))\n", - "Mcsr = sparse.csr_matrix((N,N))\n", - "\n", - "%time Mcsr = fill_matrix(Mcsr)\n", - "%time Mlil = fill_matrix(Mlil)\n", - "%time Mlil = fill_matrix(Mlil).tocsr()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "## SciPy - Linear Algebra\n", - "\n", - "\n", - "* handles dense and sparse matrices\n", - "* delegates computation to lower level libraries\n", - "* e.g. Anaconda Python comes with Intel MKL\n", - "* SIMD instructions (\"vectorization\")\n", - "* parallel processing (\"threading\")\n", - "\n", - "<br>\n", - "\n", - "* NumPy vs SciPy `linalg` module:\n", - " * NumPy: uses own implementation (slower) if Fortran Compiler unavailable\n", - " * SciPy: requires Fortran Compiler and has more functions" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "## SciPy Example 4: Sparse vs Dense Linear Systems\n", + "## SciPy Example 3: Sparse vs Dense Linear Systems\n", "\n", "* dense: `scipy.linalg`, sparse: `scipy.sparse.linalg`\n", "* sparse matrix operations can be much faster, depending on the density of matrix" @@ -1120,51 +1057,7 @@ } }, "source": [ - "### SciPy - Further Linear Algebra Functions\n", - "\n", - "* Matrix-matrix and matrix-vector products \n", - " * careful with preservation of sparsity!\n", - "* Solve linear systems\n", - "* Compute eigenvalues and singular values\n", - "* Compute pseudoinverse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "slideshow": { - "slide_type": "fragment" - } - }, - "outputs": [], - "source": [ - "N = 100\n", - "A = np.random.rand(N, N)\n", - "x = np.random.rand(N)\n", - "u, s, vh = linalg.svd(A) # singular value decomposition\n", - "values, vectors = linalg.eig(A) # eigenvalue problem\n", - "det = linalg.det(A) # determinant\n", - "P = linalg.pinv(A) # pseudoinverse (accepts rectangular matrices)\n", - "\n", - "# Further sparse matrix operations\n", - "A = sparse.random(N, N, density=0.01, format='csr')\n", - "B = sparse.random(N, N, density=0.01, format='csr')\n", - "C = sparse.diags(np.arange(4), 1)\n", - "D = sparse.kron(B, A) # kronecker product of matrices\n", - "ev, evec = splinalg.eigs(A, 2) # 2 largest eigenvalues\n", - "u, s, vh = sparse.linalg.svds(A, 2) # 2 largest singular values" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "## SciPy Example 5: Interpolation\n", + "## SciPy Example 4: Interpolation\n", "* Interpolation of 1D and multi-D data (structured grid and unstructured points)\n", "* Splines and other polynomials" ] @@ -1716,7 +1609,7 @@ } }, "source": [ - "$\\to$ roughly factor 100-150 faster!" + "$\\to$ roughly factor $\\mathcal{O}(10 - 100)$ faster!" ] }, { diff --git a/notebooks/09--Interfacing_with_C_and_F.ipynb b/notebooks/09--Interfacing_with_C_and_F.ipynb index 83724b5..dc0a03b 100644 --- a/notebooks/09--Interfacing_with_C_and_F.ipynb +++ b/notebooks/09--Interfacing_with_C_and_F.ipynb @@ -76,8 +76,10 @@ " * automatically takes care of type casting and non-contiguous arrays\n", "* two ways of usage\n", " * direct calling of the `f2py` executable\n", - " * define an extension in `setup.py` using `numpy.distutils`\n", - "* see examples in './f2py' (cf. the [f2py user guide](https://sysbio.ioc.ee/projects/f2py2e/usersguide/index.html))" + " * via a build-system\n", + " * define an extension in `setup.py` using `numpy.distutils` (deprecated with python 3.12)\n", + " * use external build-systems: `cmake`, `meson`, `scikit-build`, see (cf. the [f2py user guide](https://numpy.org/doc/stable/f2py/index.html))\n", + "* see examples in './f2py' " ] }, { -- GitLab