diff --git a/.cache/cache.db b/.cache/cache.db deleted file mode 100644 index d8d3e492c9ea34b5ada4eae9053518cb5534e344..0000000000000000000000000000000000000000 Binary files a/.cache/cache.db and /dev/null differ diff --git a/data/cache.dat b/data/cache.dat new file mode 100644 index 0000000000000000000000000000000000000000..317f612fb8476a15fba7fa3f472d9387b20a01b2 Binary files /dev/null and b/data/cache.dat differ diff --git a/requirements.txt b/requirements.txt index e5ec021d00fd1639e5e42286306bf772657433ca..04660f07555e3cd7644c4ba67b2ad04042d9650f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,3 @@ scipy==1.4.1 pandas==0.24.2 scikit-learn==0.22.1 joblib==0.14.1 -diskcache==4.1.0 diff --git a/tcmi.ipynb b/tcmi.ipynb index ab6acac4e3ecd3e35c9b828f9ddb384f45979b67..cc16309926d6eca564523e784760c7117a6af83e 100644 --- a/tcmi.ipynb +++ b/tcmi.ipynb @@ -25,7 +25,7 @@ " Matthias Scheffler<sup>1</sup>,\n", " and Luca Ghiringhelli<sup> 1</sup> <br><br>\n", "<sup>1</sup> Fritz Haber Institute of the Max Planck Society, Faradayweg 4-6, D-14195 Berlin, Germany <br>\n", - "<span class=\"nomad--last-updated\" data-version=\"v1.0.0\">[Last updated: January 14, 2020]</span>\n", + "<span class=\"nomad--last-updated\" data-version=\"v1.0.1\">[Last updated: January 17, 2020]</span>\n", "</p>\n", " \n", "<div> \n", @@ -75,8 +75,8 @@ "execution_count": 1, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T17:27:29.160459Z", - "start_time": "2020-01-14T17:27:29.158391Z" + "end_time": "2020-01-17T09:14:38.542016Z", + "start_time": "2020-01-17T09:14:38.539754Z" }, "init_cell": true }, @@ -88,19 +88,22 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "heading_collapsed": true + }, "source": [ "## Import modules" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T17:28:05.660112Z", - "start_time": "2020-01-14T17:28:05.535093Z" + "end_time": "2020-01-17T09:14:39.810527Z", + "start_time": "2020-01-17T09:14:38.543402Z" }, + "hidden": true, "init_cell": true }, "outputs": [], @@ -112,7 +115,6 @@ "import warnings\n", "import functools\n", "import itertools\n", - "import diskcache\n", "\n", "import numpy as np\n", "import scipy as sp\n", @@ -124,6 +126,7 @@ "import tcmi\n", "from tcmi import utils\n", "from tcmi import entropy\n", + "from tcmi.cache import Cache\n", "from tcmi.subspace_search import get_subspaces\n", "from tcmi.estimators import DependenceEstimator\n", "\n", @@ -137,7 +140,7 @@ "# Main loop\n", "if __name__ == '__main__': \n", " # Provide cache\n", - " storage = diskcache.Cache('.cache')\n", + " storage = Cache('data')\n", " \n", " # Configure plot environment\n", " mpl.rc('font', family='sans', size=14)\n", @@ -159,11 +162,7 @@ " \n", " # General settings (please do not touch)\n", " kwargs = dict(n_jobs=-1, return_scores=True)\n", - " seed = 2019\n", - "\n", - "\n", - " directory = 'thumbnails'\n", - " os.makedirs(directory, exist_ok=True)" + " seed = 2019" ] }, { @@ -198,8 +197,8 @@ "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:49.116976Z", - "start_time": "2020-01-14T16:57:49.114571Z" + "end_time": "2020-01-17T09:14:10.654185Z", + "start_time": "2020-01-17T09:14:10.651576Z" }, "hidden": true }, @@ -244,8 +243,8 @@ "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:49.154114Z", - "start_time": "2020-01-14T16:57:49.118327Z" + "end_time": "2020-01-17T09:14:10.688047Z", + "start_time": "2020-01-17T09:14:10.655785Z" }, "hidden": true }, @@ -515,8 +514,8 @@ "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:49.581396Z", - "start_time": "2020-01-14T16:57:49.155732Z" + "end_time": "2020-01-17T09:14:11.115344Z", + "start_time": "2020-01-17T09:14:10.689464Z" }, "hidden": true, "hide_input": false @@ -702,8 +701,8 @@ "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:49.606945Z", - "start_time": "2020-01-14T16:57:49.582702Z" + "end_time": "2020-01-17T09:14:11.136801Z", + "start_time": "2020-01-17T09:14:11.116550Z" }, "hidden": true, "scrolled": true @@ -796,8 +795,8 @@ "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:50.149158Z", - "start_time": "2020-01-14T16:57:49.608329Z" + "end_time": "2020-01-17T09:14:11.659328Z", + "start_time": "2020-01-17T09:14:11.138689Z" }, "hidden": true }, @@ -932,6 +931,7 @@ { "cell_type": "markdown", "metadata": { + "heading_collapsed": true, "hidden": true }, "source": [ @@ -953,8 +953,8 @@ "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.376153Z", - "start_time": "2020-01-14T16:57:50.150470Z" + "end_time": "2020-01-17T09:14:14.090857Z", + "start_time": "2020-01-17T09:14:11.660643Z" }, "hidden": true }, @@ -964,11 +964,11 @@ "output_type": "stream", "text": [ "Subspace: {} -> [-inf +- 0.00]\n", - "x1 [Score: 0.29 +- 0.00]\n", - "x2 [Score: 0.29 +- 0.00]\n", - "x3 [Score: 0.29 +- 0.00]\n", + "x1 [Score: 0.22 +- 0.00]\n", + "x2 [Score: 0.22 +- 0.00]\n", + "x3 [Score: 0.22 +- 0.00]\n", "Bound: -inf -> -inf\n", - "[2020-01-14 17:57:52 - 0 subspaces remaining]\n", + "[2020-01-17 10:14:14 - 0 subspaces remaining]\n", "\n", "\n", "Balance search tree\n" @@ -1013,8 +1013,8 @@ "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.381592Z", - "start_time": "2020-01-14T16:57:52.378442Z" + "end_time": "2020-01-17T09:14:14.095658Z", + "start_time": "2020-01-17T09:14:14.092601Z" }, "hidden": true }, @@ -1047,8 +1047,8 @@ "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.399855Z", - "start_time": "2020-01-14T16:57:52.383463Z" + "end_time": "2020-01-17T09:14:14.117423Z", + "start_time": "2020-01-17T09:14:14.096814Z" }, "hidden": true }, @@ -1110,8 +1110,8 @@ "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.407133Z", - "start_time": "2020-01-14T16:57:52.401094Z" + "end_time": "2020-01-17T09:14:14.125156Z", + "start_time": "2020-01-17T09:14:14.118966Z" }, "hidden": true }, @@ -1183,8 +1183,8 @@ "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.422195Z", - "start_time": "2020-01-14T16:57:52.408387Z" + "end_time": "2020-01-17T09:14:14.139619Z", + "start_time": "2020-01-17T09:14:14.127330Z" }, "hidden": true }, @@ -1313,8 +1313,8 @@ "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.453354Z", - "start_time": "2020-01-14T16:57:52.423327Z" + "end_time": "2020-01-17T09:14:14.165781Z", + "start_time": "2020-01-17T09:14:14.141425Z" }, "hidden": true, "scrolled": true @@ -1541,8 +1541,8 @@ "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:52.521337Z", - "start_time": "2020-01-14T16:57:52.454571Z" + "end_time": "2020-01-17T09:14:14.233510Z", + "start_time": "2020-01-17T09:14:14.167295Z" }, "code_folding": [ 0, @@ -1764,8 +1764,8 @@ "execution_count": 17, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:53.012686Z", - "start_time": "2020-01-14T16:57:52.522645Z" + "end_time": "2020-01-17T09:14:14.715484Z", + "start_time": "2020-01-17T09:14:14.234956Z" }, "code_folding": [], "hidden": true @@ -1937,8 +1937,8 @@ "execution_count": 18, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:53.016279Z", - "start_time": "2020-01-14T16:57:53.013963Z" + "end_time": "2020-01-17T09:14:14.718967Z", + "start_time": "2020-01-17T09:14:14.716748Z" }, "hidden": true }, @@ -1962,8 +1962,8 @@ "execution_count": 19, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:53.042121Z", - "start_time": "2020-01-14T16:57:53.017442Z" + "end_time": "2020-01-17T09:14:14.745691Z", + "start_time": "2020-01-17T09:14:14.720322Z" }, "hidden": true }, @@ -2051,8 +2051,8 @@ "execution_count": 20, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:53.705286Z", - "start_time": "2020-01-14T16:57:53.043392Z" + "end_time": "2020-01-17T09:14:14.834041Z", + "start_time": "2020-01-17T09:14:14.747010Z" }, "hidden": true, "scrolled": true @@ -2465,8 +2465,8 @@ "execution_count": 21, "metadata": { "ExecuteTime": { - "end_time": "2020-01-14T16:57:53.753938Z", - "start_time": "2020-01-14T16:57:53.706466Z" + "end_time": "2020-01-17T09:14:14.881245Z", + "start_time": "2020-01-17T09:14:14.835250Z" }, "scrolled": true }, diff --git a/tcmi/cache.py b/tcmi/cache.py new file mode 100644 index 0000000000000000000000000000000000000000..dd6d8664f4374ca67712802991483a0b23c02d79 --- /dev/null +++ b/tcmi/cache.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +@package tcmi.cache + +@copyright Copyright (c) 2018+ Fritz Haber Institute of the Max Planck Society, + Benjamin Regler <regler@fhi-berlin.mpg.de> +@license See LICENSE file for details. + +Licensed under the Apache License, Version 2.0 (the "License"). +You may not use this file except in compliance with the License. +""" + +import os +import gzip +import pickle +import tempfile + + +class Constant(tuple): + "Pretty display of immutable constant." + def __new__(cls, name): + return tuple.__new__(cls, (name,)) + + def __repr__(self): + return '%s' % self[0] + + +class Cache(object): + """Simple read-only disk-cache. + """ + + ENOVAL = Constant('ENOVAL') + + def __init__(self, directory=None): + """Initialize cache instance. + """ + + if directory is None: + directory = tempfile.mkdtemp(prefix='diskcache-') + directory = os.path.expanduser(directory) + directory = os.path.expandvars(directory) + + self._directory = directory + self._filename = os.path.join(directory, 'cache.dat') + if not os.path.isdir(directory): + try: + os.makedirs(directory, 0o755) + except OSError as error: + if error.errno != errno.EEXIST: + raise RuntimeError( + error.errno, + 'Cache directory "%s" does not exist' + ' and could not be created' % self._directory + ) + + data = {} + if os.path.exists(self._filename): + with gzip.open(self._filename, 'rb') as file: + data = pickle.load(file) + self._data = data + + @property + def directory(self): + """Cache directory.""" + return self._directory + + @property + def filename(self): + """Cache filename.""" + return self._filename + + def __getitem__(self, key): + """Return corresponding value for `key` from cache. + """ + value = self.get(key, default=self.ENOVAL) + if value is self.ENOVAL: + raise KeyError(key) + return value + + def __setitem__(self, key, value): + """Set corresponding `value` for `key` in cache. + """ + self.set(key, value) + + def get(self, key, default=None): + """Retrieve value from cache. If `key` is missing, return `default`. + """ + return self._data.get(key, default) + + def set(self, key, value): + """Set `key` and `value` item in cache. + """ + self._data[key] = value + return True + + def save(self): + """Save cache to file. + """ + with gzip.open(self._filename, 'wb') as file: + pickle.dump(self._data, file, pickle.HIGHEST_PROTOCOL)