# import important libraries used for visualization purposes
import pandas as pd
import networkx
import numpy as np
import geopandas as gpd
import shapely as shp
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
# import cufflinks
import plotly
# word cloud library
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# # Using plotly + cufflinks in offline mode
# import cufflinks
# cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
# matplotlib library
import matplotlib.pyplot as plt
# import bokeh library which is a famous one for network analysis
from bokeh.io import output_notebook, show, save
# these are needed for Network Visualization below
from bokeh.io import output_notebook, show, save
from bokeh.models import Range1d, Circle, ColumnDataSource, MultiLine
from bokeh.plotting import figure
from bokeh.plotting import from_networkx
# these are needed for Geo Map visualization below
import plotly.figure_factory as ff
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
#reimport the csv saved from the previous step 2
#code to import the csv saved from the previous step
df = pd.read_csv("Datasets/LoS_Clean_Output_Mod2.csv")
df.head(10)
C:\Users\rgnanase\Anaconda3\envs\igsc2021_jb\lib\site-packages\IPython\core\interactiveshell.py:3165: DtypeWarning: Columns (24,25) have mixed types.Specify dtype option on import or set low_memory=False.
DataID | DataItem | County | Owner_FirstName | Owner_LastName | Witness | Date | Freed_FirstName | Freed_LastName | Alias | ... | DatasetName | Notes | isWorking | isError | ChangeDate | CreateDate | DateFormatted | PriorStatusFormatted | Height_Inches | AgeFormatted | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AR7-46 | 1 | AA | Ann | Ailsworth | NaN | None | Keziah | Cromwell | NaN | ... | FF | NaN | 0 | 0 | 39:20.3 | 39:20.3 | NaN | Unknown | 63.00 | 20.0 |
1 | AR7-46 | 2 | AA | Ann | Ailsworth | Zachariah Duvall | 1811-06-24 | Resiah | Cromwell | NaN | ... | FF | NaN | 0 | 0 | 39:20.3 | 39:20.3 | 1811-06-24 | Unknown | 63.00 | 28.0 |
2 | AR7-46 | 3 | AA | Ann | Ailsworth | Jenifer Duvall | 1811-06-24 | Kesiah | Cromwell | NaN | ... | FF | Freed by will of Mrs. Ann Ailsworth. | 0 | 0 | 39:20.3 | 39:20.3 | 1811-06-24 | Slave | 63.00 | 28.0 |
3 | AR7-46 | 4 | AA | William | Alexander | NaN | 1815-03-28 | Handy | McCeomey | NaN | ... | FF | Freed by manumission, dated 27 March 1815. Rai... | 0 | 0 | 39:20.3 | 39:20.3 | 1815-03-28 | Unknown | 67.75 | 43.0 |
4 | AR7-46 | 5 | AA | Thomas | Allen | NaN | 1837-07-10 | Nancy | Ennis | NaN | ... | FF | Freed by petition to Anne Arundel County Court... | 0 | 0 | 39:20.3 | 39:20.3 | 1837-07-10 | Unknown | 57.50 | 37.0 |
5 | AR7-46 | 6 | AA | Thomas | Allen | NaN | 1837-08-03 | Jim | Sharpe | NaN | ... | FF | Freed by petition to Anne Arundel County Court... | 0 | 0 | 39:20.3 | 39:20.3 | 1837-08-03 | Unknown | 61.50 | 41.0 |
6 | AR7-46 | 7 | AA | James | Alleson | NaN | 1826-10-28 | Belly | NaN | NaN | ... | FF | Freed by manumission, dated 28 Oct 1826. Raise... | 0 | 0 | 39:20.3 | 39:20.3 | 1826-10-28 | Unknown | 61.50 | 26.0 |
7 | AR7-46 | 8 | AA | Mary | Alwell | NaN | 1844-11-08 | Howard | Davis | NaN | ... | FF | son of Nelly. Freed by manumission, dated 12 A... | 0 | 0 | 39:20.3 | 39:20.3 | 1844-11-08 | Unknown | 66.50 | 22.0 |
8 | AR7-46 | 9 | AA | Mary | Armiger | NaN | 1819-01-27 | Abigail | NaN | NaN | ... | FF | along with Richard G. Stetton. Freed by manumi... | 0 | 0 | 39:20.3 | 39:20.3 | 1819-01-27 | Unknown | 61.00 | 40.0 |
9 | AR7-46 | 10 | AA | Mary | Atcock | Jacob Franklin, Jr. | 1812-12-30 | Ned | NaN | NaN | ... | FF | NaN | 0 | 0 | 39:20.3 | 39:20.3 | 1812-12-30 | Unknown | 66.25 | 21.0 |
10 rows × 32 columns
# Below cufflinks package is to handle temp object which is a pandas.series dataframe 'df' created above which does not have a iplot method when not linked to plotly.
# We need cufflinks to link plotly to pandas and add the iplot method:
import cufflinks as cf
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)
Requirement already satisfied: cufflinks in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (0.17.3) Requirement already satisfied: ipywidgets>=7.0.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (7.6.3) Requirement already satisfied: plotly>=4.1.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (4.14.3) Requirement already satisfied: numpy>=1.9.2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (1.20.2) Requirement already satisfied: six>=1.9.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (1.15.0) Requirement already satisfied: pandas>=0.19.2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (1.2.3) Requirement already satisfied: setuptools>=34.4.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (52.0.0.post20210125) Requirement already satisfied: colorlover>=0.2.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (0.3.0) Requirement already satisfied: ipython>=5.3.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cufflinks) (7.22.0) Requirement already satisfied: pickleshare in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (0.7.5) Requirement already satisfied: traitlets>=4.2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (5.0.5) Requirement already satisfied: decorator in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (4.4.2) Requirement already satisfied: pygments in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (2.8.1) Requirement already satisfied: jedi>=0.16 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (0.17.2) Requirement already satisfied: backcall in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (0.2.0) Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (3.0.17) Requirement already satisfied: colorama in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipython>=5.3.0->cufflinks) (0.4.4) Requirement already satisfied: ipykernel>=4.5.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipywidgets>=7.0.0->cufflinks) (5.3.4) Requirement already satisfied: widgetsnbextension~=3.5.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipywidgets>=7.0.0->cufflinks) (3.5.1) Requirement already satisfied: jupyterlab-widgets>=1.0.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipywidgets>=7.0.0->cufflinks) (1.0.0) Requirement already satisfied: nbformat>=4.2.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipywidgets>=7.0.0->cufflinks) (5.1.3) Requirement already satisfied: jupyter-client in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1.12) Requirement already satisfied: tornado>=4.2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from ipykernel>=4.5.1->ipywidgets>=7.0.0->cufflinks) (6.1) Requirement already satisfied: parso<0.8.0,>=0.7.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from jedi>=0.16->ipython>=5.3.0->cufflinks) (0.7.0) Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (3.2.0) Requirement already satisfied: ipython-genutils in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.2.0) Requirement already satisfied: jupyter-core in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (4.7.1) Requirement already satisfied: attrs>=17.4.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (20.3.0) Requirement already satisfied: pyrsistent>=0.14.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (0.17.3) Requirement already satisfied: python-dateutil>=2.7.3 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from pandas>=0.19.2->cufflinks) (2.8.1) Requirement already satisfied: pytz>=2017.3 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from pandas>=0.19.2->cufflinks) (2021.1) Requirement already satisfied: retrying>=1.3.3 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from plotly>=4.1.1->cufflinks) (1.3.3) Requirement already satisfied: wcwidth in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=5.3.0->cufflinks) (0.2.5) Requirement already satisfied: notebook>=4.4.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.3.0) Requirement already satisfied: jinja2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.11.3) Requirement already satisfied: argon2-cffi in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.1.0) Requirement already satisfied: pyzmq>=17 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.0.0) Requirement already satisfied: prometheus-client in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.10.0) Requirement already satisfied: terminado>=0.8.3 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.9.4) Requirement already satisfied: nbconvert in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (6.0.7) Requirement already satisfied: Send2Trash>=1.5.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.5.0) Requirement already satisfied: pywin32>=1.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from jupyter-core->nbformat>=4.2.0->ipywidgets>=7.0.0->cufflinks) (228) Requirement already satisfied: pywinpty>=0.5 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from terminado>=0.8.3->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.7) Requirement already satisfied: cffi>=1.0.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.14.5) Requirement already satisfied: pycparser in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.20) Requirement already satisfied: MarkupSafe>=0.23 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.1.1) Requirement already satisfied: defusedxml in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.7.1) Requirement already satisfied: pandocfilters>=1.4.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.4.3) Requirement already satisfied: mistune<2,>=0.8.1 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.8.4) Requirement already satisfied: jupyterlab-pygments in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.1.2) Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.3) Requirement already satisfied: testpath in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.4.4) Requirement already satisfied: bleach in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (3.3.0) Requirement already satisfied: entrypoints>=0.2.2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.3) Requirement already satisfied: nest-asyncio in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.5.1) Requirement already satisfied: async-generator in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (1.10) Requirement already satisfied: webencodings in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (0.5.1) Requirement already satisfied: packaging in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (20.9) Requirement already satisfied: pyparsing>=2.0.2 in c:\users\rgnanase\anaconda3\envs\igsc2021_jb\lib\site-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets>=7.0.0->cufflinks) (2.4.7)
# Simple Histogram using iplot
df['AgeFormatted'].iplot(kind='hist', title='Histogram Chart of Enslaved Age vs Count')
# Simple Histogram using iplot
df['Height_Inches'].iplot(kind='hist', title='Histogram Chart of Enslaved Height vs Count')
# Below chart is a pie chart for Sex
fig = px.pie(df, names='Sex',color_discrete_sequence=px.colors.sequential.RdBu,title="Pie Chart of Distribution of CoF over Sex")
fig.show()
# Using the Plotly Express (Doc here -- https://plotly.com/python/plotly-express/), plotting a scatter plot showing Age on the y-axis with County on the x-axis.
fig = px.scatter(df[df["County"].notna()&df["AgeFormatted"].notna()], x="County", y="AgeFormatted", color="County", hover_name="Freed_FirstName",
title="County Vs Sex")
fig.show()
# Creating new dataframe below with functions like datetime, grouper, agg, etc to create new fields from the original data for plotting charts as a grouped by
# or to plot against the counts of the number of CoF's issued by Sex (for example as shown below)
dfp=pd.DataFrame()
dfp['CoFDate'] = pd.to_datetime(df['DateFormatted'])
dfp['Sex']=df['Sex']
freq='Y'
# The groupby function will aggregate the count of CoF's issued by Sex
dfp = dfp.groupby(['Sex', pd.Grouper(key='CoFDate', freq=freq)])['Sex'].agg(['count']).reset_index()
print(dfp)
# return a sorted DataFrame by date then count
dfp = dfp.sort_values(by=['CoFDate', 'count'])
# if you want to reset the index
dfp = dfp.reset_index(drop=True)
Sex CoFDate count 0 F 1795-12-31 1 1 F 1803-12-31 1 2 F 1806-12-31 2 3 F 1808-12-31 1 4 F 1810-12-31 1 .. .. ... ... 103 M 1863-12-31 123 104 M 1864-12-31 26 105 M 1865-12-31 1 106 M 1918-12-31 1 107 M 1931-12-31 1 [108 rows x 3 columns]
# Plot the chart from the data created above
fig = go.Figure()
fig = px.area(dfp, x='CoFDate', y='count', color='Sex',title="CoF Issued Date vs Sex vs Counts")
fig.show()
The visualization above where there seems to be a spike in the issue of Certificates of Freedom around 1832 matches with historical events believed to have happened around the same period in MD state.
# The code below is a sample slice of the original dataset to isolate records belonging to a Slave Owner whose last name is 'Atwell'. This sliced data is then used to show the
# network graph of the enslaved people owned by this single owner shown at the centre of the networking chart below:
LoS_CoF_df = df.loc[(df["Owner_LastName"]=='Atwell')]
LoS_CoF = networkx.from_pandas_edgelist(LoS_CoF_df,'Owner_LastName','Freed_FirstName','DataItem')
# Below are the steps required to plot a networking graph between the single owner with last name 'Atwell' from the CoF collection and the Enslaved people owned by this person.
plt.figure(figsize=(8,8))
networkx.draw(LoS_CoF, with_labels=True, node_color='skyblue', width=.3, font_size=8)
#Choose a title!
title = 'Legacy Of Slavery Certificates of Freedom - Enslaved Last Name vs Owner Last Name'
#Establish which categories will appear when hovering over each node
HOVER_TOOLTIPS = [("Freed_FirstName", "@index")]
#Create a plot — set dimensions, toolbar, and title
plot = figure(tooltips = HOVER_TOOLTIPS,
tools="pan,wheel_zoom,save,reset", active_scroll='wheel_zoom',
x_range=Range1d(-10.1, 10.1), y_range=Range1d(-10.1, 10.1), title=title)
#Create a network graph object with spring layout
# https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.drawing.layout.spring_layout.html
network_graph = from_networkx(LoS_CoF, networkx.spring_layout, scale=10, center=(0, 0))
#Set node size and color
network_graph.node_renderer.glyph = Circle(size=15, fill_color='skyblue')
#Set edge opacity and width
network_graph.edge_renderer.glyph = MultiLine(line_alpha=0.5, line_width=1)
#Add network graph to the plot
plot.renderers.append(network_graph)
show(plot)
#save(plot, filename=f"{title}.html")
# Below is an important transformation step where a new dataframe from the original data is created to map County Codes from the original CoF dataset to commonly used numeric code
# called 'fips' code, which is used in geomapping services to automatically locate the area on the geo map. For example, a fips code of 24003 is assigned to CoF entries with County='AA' that
# maps to 'Anne Arundel' county in MD based on MSA's classification here -- https://msa.maryland.gov/msa/speccol/sc2600/sc2685/html/abbrev.html
dfg = pd.DataFrame()
dfg= df
dfg.loc[(df["County"]=='AA'), "County_Code"] = "24003"
dfg.loc[(df["County"]=='AA'), "County_New"] ="Anne Arundel"
dfg.loc[(df["County"]=='AL'), "County_Code"] ="24001"
dfg.loc[(df["County"]=='AL'), "County_New"] ="Allegany"
dfg.loc[(df["County"]=='BA'), "County_Code"] ="24510"
dfg.loc[(df["County"]=='BA'), "County_New"] ="Baltimore County"
dfg.loc[(df["County"]=='BC'), "County_Code"] ="24005"
dfg.loc[(df["County"]=='BC'), "County_New"] ="Baltimore City"
dfg.loc[(df["County"]=='CA'), "County_Code"] ="24011"
dfg.loc[(df["County"]=='CA'), "County_New"] ="Caroline"
dfg.loc[(df["County"]=='CE'), "County_Code"] ="24015"
dfg.loc[(df["County"]=='CE'), "County_New"] ="Cecil"
dfg.loc[(df["County"]=='CH'), "County_Code"] ="24017"
dfg.loc[(df["County"]=='CH'), "County_New"] ="Charles"
dfg.loc[(df["County"]=='CR'), "County_Code"] ="24013"
dfg.loc[(df["County"]=='CR'), "County_New"] ="Carroll"
dfg.loc[(df["County"]=='CV'), "County_Code"] ="24009"
dfg.loc[(df["County"]=='CV'), "County_New"] ="Calvert"
dfg.loc[(df["County"]=='DO'), "County_Code"] ="24019"
dfg.loc[(df["County"]=='DO'), "County_New"] ="Dorchester"
dfg.loc[(df["County"]=='FR'), "County_Code"] ="24021"
dfg.loc[(df["County"]=='FR'), "County_New"] ="Frederick"
dfg.loc[(df["County"]=='GA'), "County_Code"] ="24023"
dfg.loc[(df["County"]=='GA'), "County_New"] ="Garrett"
dfg.loc[(df["County"]=='HA'), "County_Code"] ="24025"
dfg.loc[(df["County"]=='HA'), "County_New"] ="Harford"
dfg.loc[(df["County"]=='HO'), "County_Code"] ="24027"
dfg.loc[(df["County"]=='HO'), "County_New"] ="Howard"
dfg.loc[(df["County"]=='KE'), "County_Code"] ="24029"
dfg.loc[(df["County"]=='KE'), "County_New"] ="Kent"
dfg.loc[(df["County"]=='MO'), "County_Code"] ="24031"
dfg.loc[(df["County"]=='MO'), "County_New"] ="Montgomery"
dfg.loc[(df["County"]=='PG'), "County_Code"] ="24033"
dfg.loc[(df["County"]=='PG'), "County_New"] ="Prince George's"
dfg.loc[(df["County"]=='Qa'), "County_Code"] ="24035"
dfg.loc[(df["County"]=='Qa'), "County_New"] ="Queen Anne's"
dfg.loc[(df["County"]=='QA'), "County_Code"] ="24035"
dfg.loc[(df["County"]=='QA'), "County_New"] ="Queen Anne's"
dfg.loc[(df["County"]=='SM'), "County_Code"] ="24037"
dfg.loc[(df["County"]=='SM'), "County_New"] ="St. Mary's"
dfg.loc[(df["County"]=='SO'), "County_Code"] ="24039"
dfg.loc[(df["County"]=='SO'), "County_New"] ="Somerset"
dfg.loc[(df["County"]=='TA'), "County_Code"] ="24041"
dfg.loc[(df["County"]=='TA'), "County_New"] ="Talbot"
dfg.loc[(df["County"]=='WA'), "County_Code"] ="24043"
dfg.loc[(df["County"]=='WA'), "County_New"] ="Washington"
dfg.loc[(df["County"]=='WI'), "County_Code"] ="24045"
dfg.loc[(df["County"]=='WI'), "County_New"] ="Wicomico"
dfg.loc[(df["County"]=='WO'), "County_Code"] ="24047"
dfg.loc[(df["County"]=='WO'), "County_New"] ="Worcester"
# A similar groupby and aggregate function is run to consolidate the counts of CoF's issued across each county for use in Geo map visualization below
values=pd.DataFrame()
values=dfg.groupby('County_Code').agg('count')
values['fips']=values.index
values1=dfg.groupby('County_New').agg('count')
values['county_name']=values1.index
# Plotly Mapbox tool is used to create a geo map with the fips and county counts of CoF's from previous steps with different color ranges, and an interactive map showing
# MD state and the counties. The map automatically zooms to the MD state which is done using the zoom parameter and by adjusting the latitude and longitude values below.
fig = px.choropleth_mapbox(values, geojson=counties, locations='fips', color='County',
title='# of CoFs issued in MD State by Counties',
color_continuous_scale="Edge",
range_color=(10, 5000),
mapbox_style="carto-positron",
hover_name='county_name',
zoom=6, center = {"lat": 39.0458, "lon": -76.641273},
opacity=0.5,
hover_data=['county_name'],
labels={'County':'# Counts of CoF','county_name':'County Name'}
)
fig.update_layout(title='# of CoFs issued in MD State by Counties')
fig.show()
# This is another geo map visualization implemented using another Python plotting package called as Figure Factory. This is not as interactive as the Plotly Mapbox.
import shapely
import shapefile
import plotly
from plotly.figure_factory._county_choropleth import create_choropleth
import plotly.figure_factory as ff
fig = ff.create_choropleth(fips=values.index.to_list(),
scope=['Maryland'],
values=values.County.to_list(),
title='MD State with Counties',
round_legend_values=True,
show_state_data=True,
county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},
exponent_format=True,
legend_title='# Counts of CoF')
fig.layout.template = None
fig.show()
# Start with the Notes feature:
text = df['Notes']
# Create and generate a word cloud image:
wordcloud = WordCloud().generate(text.to_string())
# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
# Start with the Notes feature:
text = df['Notes']
stopwords = set(STOPWORDS)
stopwords.update(["Anne", "Arundel", "Baltimore", "Arundel County", "Dorchester","County"])
# Create and generate a word cloud image:
wordcloud = WordCloud(stopwords=stopwords).generate(text.to_string())
# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()