Objective: conduct spatial analysis and maps
Case Study: 2016 Voting in Wisconsin (adapted from https://datascience.quantecon.org/applications/maps.html)
# Install packages to colab environment
!sudo apt-get update && apt-get install -y libspatialindex-dev
!pip install rtree
!pip install geopandas
#import spatial analytics library
import pandas as pd
import geopandas as gpd # combines the capabilities of pandas and shapely for geospatial operations
from shapely.geometry import Point, Polygon, MultiPolygon # for manipulating text data into geospatial shapes
from shapely import wkt # stands for "well known text," allows for interchange across GIS programs
import rtree # supports geospatial join
pd.set_option('display.max_columns', None) # visualize all columns in dataframe
import warnings
warnings.filterwarnings('ignore')
An organization called Natural Earth compiled the map data that we use here.
The file provides the outlines of countries, geopandas already comes bundled with this data.
# Grab low resolution world file
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))
world = world.set_index("iso_a3")
world.head()
world is a GeoDataFrame with the following columns:
--pop_est: Contains a population estimate for the country
--continent: The country’s continent
--name: The country’s name
--iso_a3: The country’s 3 letter abbreviation
--gdp_md_est: An estimate of country’s GDP
--geometry: A POLYGON for each country
#show the map
world.plot()
##show the geometry of USA
world.loc["USA", 'geometry']
state_df = gpd.read_file("http://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_state_5m.zip")
state_df.head()
import matplotlib.pyplot as plt
fig, gax = plt.subplots(figsize=(10, 10))
#state_df.query("NAME == 'Illinois'").plot(ax=gax, edgecolor="black", color="white")
state_df.query("NAME == 'Wisconsin'").plot(ax=gax, edgecolor="black", color="white")
plt.show()
county_df = gpd.read_file("http://www2.census.gov/geo/tiger/GENZ2016/shp/cb_2016_us_county_5m.zip")
county_df.head()
#county_df = county_df.query("STATEFP == '17'")
county_df = county_df.query("STATEFP == '55'")
#county_df.plot()
fig, gax = plt.subplots(figsize=(10, 10))
#state_df.query("NAME == 'Illinois'").plot(ax=gax, edgecolor="black", color="white")
state_df.query("NAME == 'Wisconsin'").plot(ax=gax, edgecolor="black", color="white")
county_df.plot(ax=gax, edgecolor="black", color="white")
plt.show()
results = pd.read_csv("https://datascience.quantecon.org/assets/data/ruhl_cleaned_results.csv", thousands=",")
results.head()
results["county"] = results["county"].str.title()
results["county"] = results["county"].str.strip()
county_df["NAME"] = county_df["NAME"].str.title()
county_df["NAME"] = county_df["NAME"].str.strip()
res_states = county_df.merge(results, left_on="NAME", right_on="county", how="inner")
res_states.head()
%%time
res_states["trump_share"] = res_states["trump"] / (res_states["total"])
res_states["rel_trump_share"] = res_states["trump"] / (res_states["trump"]+res_states["clinton"])
res_states.head()
fig, gax = plt.subplots(figsize = (10,8))
# Plot the state
#state_df[state_df['NAME'] == 'Illinois'].plot(ax = gax, edgecolor='black',color='white')
state_df[state_df['NAME'] == 'Wisconsin'].plot(ax = gax, edgecolor='black',color='white')
# Plot the counties and pass 'rel_trump_share' as the data to color
res_states.plot(
ax=gax, edgecolor='black', column='rel_trump_share', legend=True, cmap='RdBu_r',
vmin=0.01, vmax=0.95
)
# Add text to let people know what we are plotting
gax.annotate('Republican vote share',xy=(0.76, 0.06), xycoords='figure fraction')
# No axis with long and lat
plt.axis('off')
plt.show()
res_states.eval("trump > clinton").sum()
res_states.eval("clinton>trump").sum()
res_states["trump"].sum()
res_states["clinton"].sum()