In the previous two modules we explored ways to manipulate and structure data to prepare the data for modeling and visualization.
As seen below there are several tools (i.e., libraries) that will need to be imported into your jupyter notebook to model the data. Once these libraries have been imported the next step is to import your csv files into your notebook. These files can be ones created separately in an Excel spreadsheet that were then saved as a comma separated value (csv) file, or they can be datasets manipulated in a jupyter notebooks (as we did in part 2), or even a combination of the both. Keep in mind, your datasets can still be manipulated in your jupyter notebook as we are modeling since both pandas and numpy are being imported into the notebook.
# import the following libraries for visualization purposes
import pandas as pd
import networkx
import numpy as np
import geopandas as gpd
import shapely as shp
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.graph_objects as go
import plotly.express as px
import plotly
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# # Using plotly + cufflinks in offline mode
# import cufflinks
# cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# import matplotlib library
import matplotlib.pyplot as plt
# import bokeh library for network analysis
from bokeh.io import output_notebook, show, save
# the libraries below are needed for Network Visualization
from bokeh.io import output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
# these are needed for Geo Map visualization below
import plotly.figure_factory as ff
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
# Import the csv data files
fullstackeddf = pd.read_csv('python-fullmovements-stacked.csv',dtype=object,na_values=[],keep_default_na=False)
pathsdf = pd.read_csv('python-paths.csv',dtype=object,na_values=[],keep_default_na=False)
totalsdf = pd.read_csv('python-totals.csv',dtype=object,na_values=[],keep_default_na=False)
#totalsdf['total'] = totalsdf['total'].astype(np.int64) #have to change the type for total to integer
If you find that you are not able to import a library into your notebook then it is likely you will need to install the library. A python library can be installed by using !pip install (and the name of the library). Once the library has been installed then the library will need to be imported into the notebook, as shown below.
# The cufflinks package handles temp objects which is a pandas.series dataframe 'df' created above which does not have a iplot method when not linked to plotly.
# We need cufflinks to link plotly to pandas and add the iplot method:
# import cufflinks now
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
Requirement already satisfied: cufflinks in /opt/conda/lib/python3.8/site-packages (0.17.3) Requirement already satisfied: colorlover>=0.2.1 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (0.3.0) Requirement already satisfied: ipython>=5.3.0 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (7.20.0) Requirement already satisfied: pandas>=0.19.2 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (1.2.2) Requirement already satisfied: six>=1.9.0 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (1.15.0) Requirement already satisfied: plotly>=4.1.1 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (5.9.0) Requirement already satisfied: setuptools>=34.4.1 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (49.6.0.post20210108) Requirement already satisfied: ipywidgets>=7.0.0 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (7.6.3) Requirement already satisfied: numpy>=1.9.2 in /opt/conda/lib/python3.8/site-packages (from cufflinks) (1.19.5) Requirement already satisfied: traitlets>=4.2 in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (5.0.5) Requirement already satisfied: decorator in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (4.4.2) Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (3.0.16) Requirement already satisfied: backcall in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.2.0) Requirement already satisfied: pygments in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (2.7.4) Requirement already satisfied: jedi>=0.16 in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.18.0) Requirement already satisfied: pickleshare in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (0.7.5) Requirement already satisfied: pexpect>4.3 in /opt/conda/lib/python3.8/site-packages (from ipython>=5.3.0->cufflinks) (4.8.0) Requirement already satisfied: ipykernel>=4.5.1 in /opt/conda/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (5.4.3) Requirement already satisfied: widgetsnbextension~=3.5.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (3.5.1) Requirement already satisfied: nbformat>=4.2.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (5.1.2) Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from ipywidgets>=7.0.0->cufflinks) (1.0.0) Requirement already satisfied: tornado>=4.2 in /opt/conda/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1) Requirement already satisfied: jupyter-client in /opt/conda/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1.11) Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/conda/lib/python3.8/site-packages (from jedi>=0.16->ipython>=5.3.0->cufflinks) (0.8.1) Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.2.0) Requirement already satisfied: ipython-genutils in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.2.0) Requirement already satisfied: jupyter-core in /opt/conda/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (4.7.1) Requirement already satisfied: pyrsistent>=0.14.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.17.3) Requirement already satisfied: attrs>=17.4.0 in /opt/conda/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (20.3.0) Requirement already satisfied: python-dateutil>=2.7.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.19.2->cufflinks) (2.8.1) Requirement already satisfied: pytz>=2017.3 in /opt/conda/lib/python3.8/site-packages (from pandas>=0.19.2->cufflinks) (2021.1) Requirement already satisfied: ptyprocess>=0.5 in /opt/conda/lib/python3.8/site-packages (from pexpect>4.3->ipython>=5.3.0->cufflinks) (0.7.0) Requirement already satisfied: tenacity>=6.2.0 in /opt/conda/lib/python3.8/site-packages (from plotly>=4.1.1->cufflinks) (8.0.1) Requirement already satisfied: wcwidth in /opt/conda/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->cufflinks) (0.2.5) Requirement already satisfied: notebook>=4.4.1 in /opt/conda/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.2.0) Requirement already satisfied: Send2Trash>=1.5.0 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.5.0) Requirement already satisfied: pyzmq>=17 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.0.0) Requirement already satisfied: nbconvert in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.0.7) Requirement already satisfied: prometheus-client in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.9.0) Requirement already satisfied: argon2-cffi in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.1.0) Requirement already satisfied: jinja2 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.11.3) Requirement already satisfied: terminado>=0.8.3 in /opt/conda/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.9.2) Requirement already satisfied: cffi>=1.0.0 in /opt/conda/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.14.4) Requirement already satisfied: pycparser in /opt/conda/lib/python3.8/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.20) Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.1.1) Requirement already satisfied: entrypoints>=0.2.2 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.3) Requirement already satisfied: jupyterlab-pygments in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.1.2) Requirement already satisfied: pandocfilters>=1.4.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.4.2) Requirement already satisfied: testpath in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.4.4) Requirement already satisfied: mistune<2,>=0.8.1 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.8.4) Requirement already satisfied: defusedxml in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.6.0) Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.2) Requirement already satisfied: bleach in /opt/conda/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.3.0) Requirement already satisfied: nest-asyncio in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.4.3) Requirement already satisfied: async-generator in /opt/conda/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.10) Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.1) Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.9) Requirement already satisfied: pyparsing>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.4.7)
Plotly offers a variety of tools to model geographical data in unique ways. For example, the first three maps use Plotly's scatter_geo feature to plot points. Whereas, the fourth visualization still harnesses Plotly's scatter_geo but maps count data so that we can view relative sizes at each location. Other features of Plotly are scattergeo and .add_trace. These two features can be used together, as shown in the fifth, sixth, and seventh map, to model and visualize paths.
# The below command will map the locations of George Kuratomi
kuratomi = fullstackeddf[fullstackeddf['name'].str.contains('kuratomi')]
kuratomi = fullstackeddf[fullstackeddf['name'].str.contains('kuratomi')]
map1 = px.scatter_geo(kuratomi, lat="lat", lon="long", color="city",
hover_name="name",
animation_frame="order",
projection="orthographic")
map1.update_layout(height=450, margin={"r":0,"t":0,"l":0,"b":0})
map1.show()
# The below command will only map data that contain california
california = fullstackeddf[fullstackeddf['state'].str.contains('california')]
map2 = px.scatter_geo(california, lat="lat", lon="long", color="name",
hover_name="name",
animation_frame="city",
projection="orthographic")
map2.update_layout(height=450, margin={"r":0,"t":0,"l":0,"b":0})
map2.show()
# The below command will map all of the locations for all movements
map3 = px.scatter_geo(fullstackeddf, locationmode='USA-states',
lat = 'lat',
lon = 'long',
hover_name="name",
projection = 'orthographic',
color = 'city',
opacity = .8,
animation_frame="order",
)
map3.update_layout(height=450, margin={"r":0,"t":0,"l":0,"b":0})
map3.show()
# The below command will map the relative size of each location
fullstackeddf['counts'] = fullstackeddf.groupby(['city'])['order'].transform('count')
map4 = px.scatter_geo(fullstackeddf, lat="lat", lon="long", color="city",
hover_name="state", size='counts',
animation_frame="order",
projection="orthographic")
map4.update_layout(height=700, margin={"r":0,"t":0,"l":0,"b":0})
map4.show()
# convert variables to numeric using pd.to_numeric function
fullstackeddf["lat"] = pd.to_numeric(fullstackeddf["lat"])
fullstackeddf["long"] = pd.to_numeric(fullstackeddf["long"])
## The below command will plot the paths of George Kuratomi
kuratomipoints = fullstackeddf[fullstackeddf['name'].str.contains('kuratomi')]
map5 = px.line_mapbox(kuratomipoints, lat="lat", lon="long", color="name", zoom=3, height=300)
map5.update_layout(mapbox_style="stamen-terrain", mapbox_zoom=4, mapbox_center_lat = 41,
margin={"r":0,"t":0,"l":0,"b":0})
map5.show()
# The below command will plot the paths of George Kuratomi and Singer Terada
kuratomiandterada = fullstackeddf[fullstackeddf['name'].str.contains('george kuratomi')| fullstackeddf['name'].str.contains('terada')]
map6 = px.line_mapbox(kuratomiandterada, lat="lat", lon="long", color="name", zoom=3, height=600)
map6.update_layout(mapbox_style="stamen-terrain", mapbox_zoom=4, mapbox_center_lat = 35,
margin={"r":0,"t":0,"l":0,"b":0})
map6.show()
# The below command will map the paths for all 25 individuals
map7 = px.line_mapbox(fullstackeddf, lat="lat", lon="long", color="name", zoom=3, height=600)
map7.update_layout(mapbox_style="stamen-terrain", mapbox_zoom=4, mapbox_center_lat = 35,
margin={"r":0,"t":0,"l":0,"b":0})
map7.show()
# The command below creates a choropleth map based on number in each state.
totalsdf['code'] = totalsdf['code'].str.upper()
fig9 = go.Figure(data=go.Choropleth(
locations=totalsdf['code'], # Spatial coordinates
z = totalsdf['total'].astype(float), # Data to be color-coded
locationmode = 'USA-states',# Set of locations match entries in `locations`
colorscale = 'Reds',
colorbar_title = "Total",
))
fig9.update_layout(
title_text = 'Total Incarceration by State',
height=600,
geo_scope='usa', # limit map scope to USA
)
fig9.show()
The following visualizations are graphs created using an assortment of Plotly features. The graphs were created using the same datasets created to model the geographical data. Though the datasets were created to model spatial data, this is a great example of how the datasets can be utilized in other ways employing new tools.
# The command below creates a strip graph that plots points based on selected variables
fig1 = px.strip(pathsdf, x='name',color='year', hover_name='year', height=500)
fig1
# The command below creates a histogram from the selected variables
fig2 = px.histogram(pathsdf, x='name',color='year', hover_name='dates', marginal='rug', height=500)
fig2
# The command below creates a simple bar graph based on your selected variables
fig3 = px.bar(totalsdf, color='total', x='state', y='type', hover_name='state', height=500) # the height can be adjusted and width added by adding a comma width=500
fig3
# The command below creates a sunburst graph.
fig5 = px.sunburst(fullstackeddf, values='uid', path=['name','dates'], color='order', hover_name='name', height=600) #click on a name in the center and it will show the data specific to that person
fig5
# The below command creates a basic scatterplot
fig6 = px.scatter(fullstackeddf, x='city', size='counts', color='name', size_max=30, height=500)
fig6
# The command below shows you json elements
fig6.show("json")
# The below command will graph a more clean scatterplot showing the size for each city and based on the order
fig7 = px.scatter(fullstackeddf, y='order', x='city', size='counts', color='name', size_max=30, height=500,
width=900, template='simple_white',
color_discrete_sequence=px.colors.qualitative.G10,
title="JAWWII Incarceration",
labels=dict(
city='city', count='count',
name='name', order='order'))
fig7.update_layout(font_family="Rockwell",
legend=dict(orientation='h', title='', y=1.1, x=1, xanchor='right', yanchor='bottom'))
fig7.show()
# The below command will create a 3d graph of the variables you plug into the x, y, and z
fig8 = px.scatter_3d(fullstackeddf, x="state", y="order", z="dates", color="counts", size="counts", hover_name="order", #the color and size is based on the counts variable
size_max=20, height=500, width=900, color_discrete_map = {"city": "blue", "order": "green", "dates":"red"})
fig8.show()
click here to go to index page