Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
Plotly Ipywidget Notebooks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Nahuel Ferreiro-Iachellini
Plotly Ipywidget Notebooks
Commits
367eb404
Commit
367eb404
authored
7 years ago
by
Jon Mease
Browse files
Options
Downloads
Patches
Plain Diff
Download Taxi data if not yet cached
parent
95bb8ead
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
notebooks/nyc_taxi_selection.ipynb
+20
-2
20 additions, 2 deletions
notebooks/nyc_taxi_selection.ipynb
with
20 additions
and
2 deletions
notebooks/nyc_taxi_selection.ipynb
+
20
−
2
View file @
367eb404
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"import os\n",
"import pandas as pd\n",
"import pandas as pd\n",
"import numpy as np\n",
"import numpy as np\n",
"import plotly.graph_objs as go\n",
"import plotly.graph_objs as go\n",
...
@@ -26,7 +27,24 @@
...
@@ -26,7 +27,24 @@
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"df = pd.read_pickle('data/nyc_taxi.pkl')\n",
"pkl_path = 'data/nyc_taxi.pkl'\n",
"if os.path.exists(pkl_path):\n",
" print('Loading saved dataset file... ', end='')\n",
" df = pd.read_pickle(pkl_path)\n",
" print('done')\n",
"else:\n",
" print('Downloading and saving dataset (thanks to the datashader project for making this example dataset available!)... ', end='')\n",
" df = pd.read_csv('http://s3.amazonaws.com/datashader-data/nyc_taxi.zip', compression='zip')\n",
" df.to_pickle(pkl_path)\n",
" print('done')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
"df.head()"
]
]
},
},
...
@@ -244,7 +262,7 @@
...
@@ -244,7 +262,7 @@
"name": "python",
"name": "python",
"nbconvert_exporter": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"pygments_lexer": "ipython3",
"version": "3.6.
6
"
"version": "3.6.
5
"
}
}
},
},
"nbformat": 4,
"nbformat": 4,
...
...
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
### Load NYC Taxi data
### Load NYC Taxi data
We're going to look at the distribution of trip distances for a potion of the NYC taxi data set for January 2015.
We're going to look at the distribution of trip distances for a potion of the NYC taxi data set for January 2015.
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
import
os
import
pandas
as
pd
import
pandas
as
pd
import
numpy
as
np
import
numpy
as
np
import
plotly.graph_objs
as
go
import
plotly.graph_objs
as
go
from
ipywidgets
import
VBox
,
HBox
from
ipywidgets
import
VBox
,
HBox
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
df
=
pd
.
read_pickle
(
'
data/nyc_taxi.pkl
'
)
pkl_path
=
'
data/nyc_taxi.pkl
'
if
os
.
path
.
exists
(
pkl_path
):
print
(
'
Loading saved dataset file...
'
,
end
=
''
)
df
=
pd
.
read_pickle
(
pkl_path
)
print
(
'
done
'
)
else
:
print
(
'
Downloading and saving dataset (thanks to the datashader project for making this example dataset available!)...
'
,
end
=
''
)
df
=
pd
.
read_csv
(
'
http://s3.amazonaws.com/datashader-data/nyc_taxi.zip
'
,
compression
=
'
zip
'
)
df
.
to_pickle
(
pkl_path
)
print
(
'
done
'
)
```
%% Cell type:code id: tags:
```
python
df
.
head
()
df
.
head
()
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Extract only those trips that carried at least 4 passengers. Also discard outlying trips fo more than 10 miles for visualization purposes.
Extract only those trips that carried at least 4 passengers. Also discard outlying trips fo more than 10 miles for visualization purposes.
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
df_cleaned
=
df
.
loc
[
np
.
logical_and
(
df_cleaned
=
df
.
loc
[
np
.
logical_and
(
df
.
passenger_count
>=
4
,
df
.
passenger_count
>=
4
,
df
.
trip_distance
.
between
(
0
,
10
,
inclusive
=
False
)
df
.
trip_distance
.
between
(
0
,
10
,
inclusive
=
False
)
)]
)]
len
(
df_cleaned
)
len
(
df_cleaned
)
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
We're left with almost 1.2 million data points
We're left with almost 1.2 million data points
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Distribution of trip distance
## Distribution of trip distance
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Initialize an empty figure with fixed x-axis range
Initialize an empty figure with fixed x-axis range
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
fig1
=
go
.
FigureWidget
(
layout
=
{
fig1
=
go
.
FigureWidget
(
layout
=
{
'
xaxis
'
:
{
'
range
'
:
[
-
0.1
,
10
]}
'
xaxis
'
:
{
'
range
'
:
[
-
0.1
,
10
]}
})
})
fig1
fig1
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Add a histogram trace with predefined bins (This take a couple of seconds)
Add a histogram trace with predefined bins (This take a couple of seconds)
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
hist
=
fig1
.
add_histogram
(
x
=
df_cleaned
[
'
trip_distance
'
],
hist
=
fig1
.
add_histogram
(
x
=
df_cleaned
[
'
trip_distance
'
],
xbins
=
{
'
start
'
:
-
0.05
,
'
size
'
:
0.1
,
'
end
'
:
10
})
xbins
=
{
'
start
'
:
-
0.05
,
'
size
'
:
0.1
,
'
end
'
:
10
})
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Update the axis titles (This happens immediately)
Update the axis titles (This happens immediately)
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
fig1
.
layout
.
xaxis
.
title
=
'
Trip distance (mi.)
'
fig1
.
layout
.
xaxis
.
title
=
'
Trip distance (mi.)
'
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
fig1
.
layout
.
yaxis
.
title
=
'
Frequency
'
fig1
.
layout
.
yaxis
.
title
=
'
Frequency
'
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
### Plot pickup locations
### Plot pickup locations
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Create an empty figure with hidden axes
Create an empty figure with hidden axes
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
fig2
=
go
.
FigureWidget
(
fig2
=
go
.
FigureWidget
(
layout
=
{
'
width
'
:
400
,
'
height
'
:
400
,
'
hovermode
'
:
False
,
layout
=
{
'
width
'
:
400
,
'
height
'
:
400
,
'
hovermode
'
:
False
,
'
xaxis
'
:
{
'
tickvals
'
:
[]},
'
xaxis
'
:
{
'
tickvals
'
:
[]},
'
yaxis
'
:
{
'
tickvals
'
:
[]},
'
yaxis
'
:
{
'
tickvals
'
:
[]},
'
margin
'
:
{
'
b
'
:
0
,
'
t
'
:
0
,
'
l
'
:
0
,
'
r
'
:
0
}
'
margin
'
:
{
'
b
'
:
0
,
'
t
'
:
0
,
'
l
'
:
0
,
'
r
'
:
0
}
})
})
fig2
fig2
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Add a
`scattergl`
trace of the
`x`
and
`y`
coordinates of all 1.2 million pickup locations
Add a
`scattergl`
trace of the
`x`
and
`y`
coordinates of all 1.2 million pickup locations
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
scatter
=
fig2
.
add_scattergl
(
scatter
=
fig2
.
add_scattergl
(
x
=
df_cleaned
[
'
pickup_x
'
],
x
=
df_cleaned
[
'
pickup_x
'
],
y
=
df_cleaned
[
'
pickup_y
'
],
y
=
df_cleaned
[
'
pickup_y
'
],
mode
=
'
markers
'
,
mode
=
'
markers
'
,
marker
=
{
'
size
'
:
4
,
'
opacity
'
:
0.1
})
marker
=
{
'
size
'
:
4
,
'
opacity
'
:
0.1
})
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Constrain the aspect ratio so that view isn't distorted on zoom
Constrain the aspect ratio so that view isn't distorted on zoom
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
fig2
.
layout
.
yaxis
.
scaleanchor
=
'
x
'
fig2
.
layout
.
yaxis
.
scaleanchor
=
'
x
'
```
```
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
## Install Selection Callback
## Install Selection Callback
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
Install selection callback function to update the trip distance histogram based on only the selected trips
Install selection callback function to update the trip distance histogram based on only the selected trips
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
def
update_hist
(
trace
,
points
,
state
):
def
update_hist
(
trace
,
points
,
state
):
if
points
.
point_inds
:
if
points
.
point_inds
:
hist
.
x
=
df_cleaned
[
'
trip_distance
'
].
iloc
[
points
.
point_inds
]
hist
.
x
=
df_cleaned
[
'
trip_distance
'
].
iloc
[
points
.
point_inds
]
else
:
else
:
hist
.
x
=
df_cleaned
[
'
trip_distance
'
]
hist
.
x
=
df_cleaned
[
'
trip_distance
'
]
scatter
.
on_selection
(
update_hist
)
scatter
.
on_selection
(
update_hist
)
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
```
python
```
python
```
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment