import pandas as pd
import numpy as np
## GLOBAL WK3
train_file = "https://raw.githubusercontent.com/danielcaraway/COVID19/master/WK3_0407/train.csv"
test_file = "https://raw.githubusercontent.com/danielcaraway/COVID19/master/WK3_0407/test.csv"
sub_file = "https://raw.githubusercontent.com/danielcaraway/COVID19/master/WK3_0407/submission.csv"
train = pd.read_csv(train_file)
test = pd.read_csv(test_file)
sub = pd.read_csv(sub_file)
## Using Country for Province_State when entry doesn't have Province_State
def use_country(state, country):
if pd.isna(state):
return country
else:
return state
train['Province_State'] = train.apply(lambda x: use_country(x['Province_State'], x['Country_Region']), axis=1)
test['Province_State'] = test.apply(lambda x: use_country(x['Province_State'], x['Country_Region']), axis=1)
## Converting Date column to a datetime type
train['Date'] = pd.to_datetime(train['Date'])
test['Date'] = pd.to_datetime(test['Date'])
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
ca = train[train['Province_State'] == 'California']
df = ca.copy()
graph_df = df[['Date', 'ConfirmedCases', 'Fatalities']]
data = pd.melt(graph_df, id_vars=['Date'], value_vars=['ConfirmedCases','Fatalities'])
data.head()
ax = sns.lineplot(x="Date", y="value",
hue="variable", style="variable", data=data)
train = train[train['ConfirmedCases'] > 0]
def quick_graph(column, location):
df = train[train[column] == location]
graph_df = df[['Date', 'ConfirmedCases', 'Fatalities']]
data = pd.melt(graph_df, id_vars=['Date'], value_vars=['ConfirmedCases','Fatalities'])
data.head()
ax = sns.lineplot(x="Date", y="value",
hue="variable", style="variable", data=data)
plt.title(location)
quick_graph('Country_Region','Italy')
quick_graph('Country_Region','US')
quick_graph('Country_Region','Spain')
quick_graph('Province_State','New York')