From 367eb40429270e7a1aeef749dbfb0831e8b354ea Mon Sep 17 00:00:00 2001
From: Jon Mease <jon.mease@gmail.com>
Date: Mon, 23 Jul 2018 16:44:39 -0400
Subject: [PATCH] Download Taxi data if not yet cached

---
 notebooks/nyc_taxi_selection.ipynb | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/notebooks/nyc_taxi_selection.ipynb b/notebooks/nyc_taxi_selection.ipynb
index 7433bbd..6bab48f 100644
--- a/notebooks/nyc_taxi_selection.ipynb
+++ b/notebooks/nyc_taxi_selection.ipynb
@@ -14,6 +14,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import os\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "import plotly.graph_objs as go\n",
@@ -26,7 +27,24 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_pickle('data/nyc_taxi.pkl')\n",
+    "pkl_path = 'data/nyc_taxi.pkl'\n",
+    "if os.path.exists(pkl_path):\n",
+    "    print('Loading saved dataset file... ', end='')\n",
+    "    df = pd.read_pickle(pkl_path)\n",
+    "    print('done')\n",
+    "else:\n",
+    "    print('Downloading and saving dataset (thanks to the datashader project for making this example dataset available!)... ', end='')\n",
+    "    df = pd.read_csv('http://s3.amazonaws.com/datashader-data/nyc_taxi.zip', compression='zip')\n",
+    "    df.to_pickle(pkl_path)\n",
+    "    print('done')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "df.head()"
    ]
   },
@@ -244,7 +262,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.6.5"
   }
  },
  "nbformat": 4,
-- 
GitLab