import pandas as pd
from google.colab import files
## PREPPING DATA FOR OG D3 RACES
train_file = "https://raw.githubusercontent.com/danielcaraway/COVID19/master/WK4_0411/train.csv"
train = pd.read_csv(train_file)
train_group = pd.DataFrame(train.groupby(['Country_Region','Date'])['ConfirmedCases'].sum())
train_group.reset_index(inplace = True)
train_group.columns = ['name', 'date', 'value']
## GETTING ONLY MARCH DATA
train_group = train_group[train_group['date'] >= '2020-03-01']
reshaped_df = pd.DataFrame()
for country in list(set(train_group['name'])):
df = train_group[train_group['name'] == country]
new_df = df.pivot_table('value', 'name', 'date')
reshaped_df = reshaped_df.append(new_df)
reshaped_df.reset_index(inplace=True)
## GETTING COUNTRY CODE FOR CONTINENT
df = pd.read_csv(
'https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
mydict = dict(zip(df.COUNTRY, df.CODE))
mydict_inverted = dict([[v, k] for k, v in mydict.items()])
def get_country_code(country):
try:
try:
return mydict[country]
except:
return mydict_inverted[country]
except:
return 'no code'
reshaped_df['CODE'] = reshaped_df.apply(
lambda x: get_country_code(x['name']), axis=1)
## GETTING CONTINENTS
continents = pd.read_csv("https://raw.githubusercontent.com/danielcaraway/COVID19/master/continent_df.csv")
continents_sm = continents[['Continent_Code', 'Three_Letter_Country_Code']]
continents_sm.columns = ['category','CODE']
## MERGING
train_group = reshaped_df.copy()
merged = pd.merge(train_group, continents_sm, on=['CODE'], how='left')
merged_d = merged.drop_duplicates()
merged_d = merged_d.drop('CODE', axis=1)
# merged_d.to_csv('covid_flourish.csv', index=False)
# files.download("covid_flourish.csv")
## WITH ADDED POPULATION COLUMN
population = pd.read_csv("https://raw.githubusercontent.com/danielcaraway/COVID19/master/kaggle_population_data.csv")
population_countries = population[population['Type'] == 'Country/Region']
population_province = population[population['Type'] == 'Province/State']
country_lookup = dict(zip(population_countries['Name'], population_countries['Population']))
province_lookup = dict(zip(population_province['Name'], population_province['Population']))
def get_population(country):
try:
return country_lookup[country]
except:
return 'na'
merged['population'] = merged.apply(lambda x: get_population(x['name']), axis=1)
merged
df = merged.copy()
df['population'] = pd.to_numeric(df.population, errors='coerce')
df_pop = df.iloc[:,1:-3].div(df.population, axis=0)
df_pop['name'] = df['name']
df = df_pop.copy()
df.to_csv('covid_wk4.csv', index=False)
files.download('covid_wk4.csv')