This Jupyter notebook demonstrates spatial correlation analysis between population and CVOID-19 confirmed cases in New York State.
We are taking the state of New York as our study area. This notebook uses geospatial libraries to show the spatial distribution of population data, COVID-19 confirmed cases, daily increases during the past week in the New York State, and demonstrate results for the spatial correlation analytics between population and the number of confirmed COVID-19 cases in New York State.
The first part is a demostration that shows users how to prepare population data and COVID-19 data in New York State.
This notebook depends on numpy, pandas, geopandas, shapely, and other libraries available in CyberGISX-Jupyter. In order to set up an environment to store and manipulate the Population data, we need to import these libraries.
import pathlib
import os
import tarfile
import requests
import shutil
import zipfile
import pandas as pd
import pathlib
import os
import tarfile
import requests
import shutil
import zipfile
# Plotting the population data
import matplotlib.pyplot as plt
import datetime
%matplotlib inline
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import plotly.figure_factory as ff
import plotly.express as px
import json
import plotly.graph_objects as go
import cufflinks as cf
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")
This piece of data is formatted as a shapefile. link: https://www.arcgis.com/home/item.html?id=3b69769aa9b646a483af81d05e7702d2
U.S. Counties represents the counties of the United States in the 50 states, the District of Columbia, and Puerto Rico.
Originally extracted from this layer package: http://www.arcgis.com/home/item.html?id=a00d6b6149b34ed3b833e10fb72ef47b
%%time
file = pathlib.Path("USA_Counties_as_Shape.zip")
if file.exists ():
print ("Population data exist")
else:
print ("Population data not exist, Downloading the Population data...")
!wget https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/22153815/USA_Counties_as_Shape.zip
Show the first five records of the new york state in this shapfile.
%%time
pop = gpd.read_file("zip://USA_Counties_as_Shape.zip")
pop = pop[pop.STATE_NAME=='New York']
pop
The data format is CSV file.
%%time
confirmed_cases = pd.read_csv(
"https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
)
confirmed_cases = confirmed_cases[confirmed_cases['Province_State'] == 'New York']
confirmed_cases
#confirmed_cases.head(5)
Show the time series data
columns = confirmed_cases.columns
dates = columns[11:-1]
dates
pop["Admin2"]=pop["NAME"]
pop.shape
pop.describe()
confirmed_cases = confirmed_cases[confirmed_cases['Admin2'] != 'Unassigned']
confirmed_cases.head(64)
This part is a demostration that shows spatial correlation analytics bwtween population and CVOID-19 confirmed cases in New York State.
from urllib.request import urlopen
import json
#with urlopen('https://raw.githubusercontent.com/cybergis/COVID_19/master/counties_update_new.geojson') as response:
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
Show the spatial distribution of the COVID-19 Confirmed Cases in New York State using Mapbox Choropleth maps with Plotly. It will take 5 seconds to show the interactive map.
%%time
fig = go.Figure(
go.Choroplethmapbox(
geojson=counties, locations=confirmed_cases.FIPS,
z=np.log1p(confirmed_cases['3/29/2020']),
# z=confirmed_cases['3/29/20'],
colorscale="reds", marker_opacity=0.5, marker_line_width=0,
ids = confirmed_cases['Admin2'],
name = 'Confirmed Cases',
colorbar_thickness = 10,
hoverinfo = 'text',
text = confirmed_cases['Admin2'] + ', ' + confirmed_cases['Province_State'] + '\n' + confirmed_cases['3/29/2020'].astype('str'),
# showlegend = True,
showscale = True,
colorbar = dict(
title = "# confirmed cases",
titleside = 'top',
tickmode = 'array',
tickvals = np.arange(11),
ticktext = np.round(np.exp(np.arange(0,11)) - 1),
ticks = 'inside',
outlinewidth = 0
)
))
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=5, #mapbox_center = {"lat": 37.0902, "lon": -95.7129},)
mapbox_center={"lat": 42.7, "lon": -76},
)
fig.update_layout(margin={"r":10,"t":10,"l":10,"b":10})
fig.show()
The density map is shown for the COVID-19 Confirmed Cases in New York State using Mapbox Density maps with Plotly. It will take about 200 milliseconds to show the interactive map.
%%time
fig = go.Figure(
go.Densitymapbox(
name = 'Density of Confirmed Cases',
opacity = 0.7,
z = np.log1p(confirmed_cases['3/29/2020']),
lat = confirmed_cases['Lat'],
lon = confirmed_cases['Long_'],
colorscale = 'reds',
radius = 30,
text = confirmed_cases['Admin2'] + ', ' + confirmed_cases['Province_State'] + '\n' + confirmed_cases['3/29/2020'].astype('str'),
hoverinfo = 'text',
colorbar = dict(
title = "# confirmed cases",
titleside = 'top',
tickmode = 'array',
tickvals = np.arange(11),
ticktext = np.round(np.exp(np.arange(0,11)) - 1),
ticks = 'inside',
outlinewidth = 0
)
)
)
fig.update_layout(mapbox_style="carto-positron",
mapbox_zoom=5, #mapbox_center = {"lat": 37.0902, "lon": -95.7129},)
mapbox_center={"lat": 42.7, "lon": -76})
fig.update_layout(margin={"r":0.1,"t":0.1,"l":0.1,"b":0.1})
fig.show()
The trend in the number of COVID-19 confirmed cases in all counties in New York.
nyc_count = confirmed_cases
nyc_count = nyc_count.set_index('Admin2')
nyc_count = nyc_count.T.iloc[11:]
Draw rectangles on the trace to zoom, and hover to see the data.
nyc_count[-30:].iplot(asFigure=True, xTitle="Date", yTitle="Confirmed Cases",
title = "Trend in number of confirmed cases in New York",
)
Convert the y-axis to a logarithm scale.
nyc_count[-30:].iplot(asFigure=True, xTitle="Date", yTitle="Confirmed Cases",
title = "Trend in number of confirmed cases in New York (Log Scale)",
logy = True
)
Number of confirmed cases during the Past Week in New York State. It will take 30 seconds to load the dynamic maps.
%%time
fig = go.Figure()
dates_ = dates[-7:]
for date in dates_:
fig.add_trace(
dict(
type="choroplethmapbox",
visible = False,
geojson=counties, locations=confirmed_cases.FIPS,
z=np.log1p(confirmed_cases[date]),
colorscale="reds", marker_opacity=0.5, marker_line_width=0,
ids = confirmed_cases['Admin2'],
name = 'Confirmed Cases',
colorbar_thickness = 10,
hoverinfo = 'text',
text = confirmed_cases['Admin2'] + ', ' + confirmed_cases['Province_State'] + '\n' + confirmed_cases[date].astype('str'),
showscale = True,
zmin = 0,
zmax = 11,
colorbar = dict(
# title = "# confirmed cases",
titleside = 'top',
tickmode = 'array',
tickvals = np.arange(11),
ticktext = np.round(np.exp(np.arange(0,11)) - 1),
ticks = 'inside',
outlinewidth = 0,
tickfont = {'color':'#a9a9a9'},
x = 1
)
)
)
steps = []
for i in range(len(fig.data)):
step = dict(
method='restyle',
args=["visible", [False] * len(fig.data)],
label = dates_[i],
)
step["args"][1][i] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active=0,
currentvalue={"prefix": "Date: "},
pad={"t": 0, 'l' : 50, 'r':50},
lenmode = 'fraction',
len = 0.8,
transition = {'easing': 'sin'},
font = {'color':'#a9a9a9'},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.data[0].visible = True
fig.update_layout(
mapbox_style="carto-positron",
mapbox_zoom=5, #mapbox_center = {"lat": 37.0902, "lon": -95.7129},)
mapbox_center={"lat": 42.7, "lon": -76},
margin={"r":10,"t":50,"l":15,"b":10},
title={
'text': "Confirmed Cases during the Past Week in the State of New York",
'xref': "container"
},
)
fig.show()
Drag on the slider to change date view. A preview to this visualization:
Daily increases during the Past Week in New York State. It will take 30 seconds to load the dynamic maps.
%%time
import warnings
warnings.filterwarnings("ignore")
fig = go.Figure()
dates_ = dates[-8:]
for i in range(1,8):
date = dates_[i]
yesterday = dates_[i-1]
fig.add_trace(
dict(
type="choroplethmapbox",
visible = False,
geojson=counties, locations=confirmed_cases.FIPS,
z=np.log1p(confirmed_cases[date] - confirmed_cases[yesterday]),
colorscale="reds", marker_opacity=0.5, marker_line_width=0,
ids = confirmed_cases['Admin2'],
name = 'Confirmed Cases',
colorbar_thickness = 10,
hoverinfo = 'text',
text = confirmed_cases['Admin2'] + ', ' + confirmed_cases['Province_State'] + ' - Daily Increase: ' + (confirmed_cases[date] - confirmed_cases[yesterday]).astype('str'),
showscale = True,
zmin = 0,
zmax = 8,
colorbar = dict(
# title = "# confirmed cases",
titleside = 'top',
tickmode = 'array',
tickvals = np.arange(0,9),
ticktext = np.round(np.exp(np.arange(0,9)) - 1),
ticks = 'inside',
outlinewidth = 0,
tickfont = {'color':'#a9a9a9'},
x = 1
)
)
)
steps = []
for i in range(len(fig.data)):
step = dict(
method='restyle',
args=["visible", [False] * len(fig.data)],
label = dates_[i+1],
)
step["args"][1][i] = True # Toggle i'th trace to "visible"
steps.append(step)
sliders = [dict(
active=0,
currentvalue={"prefix": "Date: "},
pad={"t": 0, 'l' : 50, 'r':50},
lenmode = 'fraction',
len = 0.8,
transition = {'easing': 'sin'},
font = {'color':'#a9a9a9'},
steps=steps,
)]
fig.update_layout(
sliders=sliders
)
fig.data[0].visible = True
fig.update_layout(
mapbox_style="carto-positron",
mapbox_zoom=5, #mapbox_center = {"lat": 37.0902, "lon": -95.7129},)
mapbox_center={"lat": 42.7, "lon": -76},
margin={"r":10,"t":50,"l":15,"b":10},
title={
'text': "Daily Increases during the Past Week in the State of New York",
'xref': "container"
},
)
fig.show()
sns.set(style='darkgrid', palette="deep", font_scale=1.1, rc={"figure.figsize": [10, 8]})
sns.distplot(pop['POP2012'], norm_hist=False, kde=False).set(xlabel='POP2012', ylabel='Count');
plt.savefig('POP2012_distplot.png')
sns.jointplot(x=pop['POP2012'], y=pop['POP2010']);
sns.jointplot(x=pop['POP2012'], y=pop['POP12_SQMI']);
%%time
merged_population = pop.merge(confirmed_cases, on=["Admin2"], how='outer')
merged_population.head()
Exploratory data analysis for population data and COVID-19 Confirmed Cases
%%time
fig, ax = plt.subplots(1,2, figsize=(18,18))
merged_population.plot(column='POP2012', scheme='Quantiles', k=5, cmap='YlGnBu', legend=True, ax=ax[0]);
merged_population.plot(column='3/29/2020', scheme='Quantiles', k=5, cmap='YlGnBu', legend=True, ax=ax[1]);
plt.tight_layout()
ax[0].set_title("Population Count")
ax[1].set_title("COVID-19 Confirmed Cases on 3/29/2020")
plt.savefig('comparison.png', bbox_inches="tight")
plt.show()
Compute the correlation matrix between the population dataset and the COVID-19 confirmed cases dataset and plot the heatmap
%%time
columns = ['POP2012','POP12_SQMI','MALES','FEMALES','WHITE','BLACK','AMERI_ES','ASIAN','HAWN_PI','HISPANIC','OTHER','3/23/2020','3/24/2020','3/25/2020', '3/26/2020',
'3/27/2020', '3/28/2020','3/29/2020','3/30/2020']
#
correlation = merged_population[columns].corr()
fig, ax = plt.subplots(figsize=(12,10))
sns.heatmap(correlation, xticklabels=columns,yticklabels=columns, ax=ax)
plt.show()