dailylog 4-11-20

1 minute read

JUST MARCH

SINCE JAN

import pandas as pd
from google.colab import files

## PREPPING DATA FOR OG D3 RACES
train_file = "https://raw.githubusercontent.com/danielcaraway/COVID19/master/WK4_0411/train.csv"
train = pd.read_csv(train_file)
train_group = pd.DataFrame(train.groupby(['Country_Region','Date'])['ConfirmedCases'].sum())
train_group.reset_index(inplace = True)
train_group.columns = ['name', 'date', 'value']

## GETTING ONLY MARCH DATA
train_group = train_group[train_group['date'] >= '2020-03-01']

reshaped_df = pd.DataFrame()
for country in list(set(train_group['name'])):
  df = train_group[train_group['name'] == country]
  new_df = df.pivot_table('value', 'name', 'date')
  reshaped_df = reshaped_df.append(new_df)

reshaped_df.reset_index(inplace=True)

## GETTING COUNTRY CODE FOR CONTINENT
df = pd.read_csv(
    'https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')

mydict = dict(zip(df.COUNTRY, df.CODE))
mydict_inverted = dict([[v, k] for k, v in mydict.items()])

def get_country_code(country):
    try:
        try:
            return mydict[country]
        except:
            return mydict_inverted[country]
    except:
        return 'no code'


reshaped_df['CODE'] = reshaped_df.apply(
    lambda x: get_country_code(x['name']), axis=1)

## GETTING CONTINENTS
continents = pd.read_csv("https://raw.githubusercontent.com/danielcaraway/COVID19/master/continent_df.csv")
continents_sm = continents[['Continent_Code', 'Three_Letter_Country_Code']]
continents_sm.columns = ['category','CODE']

## MERGING
train_group = reshaped_df.copy()
merged = pd.merge(train_group, continents_sm, on=['CODE'], how='left')
merged_d = merged.drop_duplicates()
merged_d = merged_d.drop('CODE', axis=1)
# merged_d.to_csv('covid_flourish.csv', index=False)
# files.download("covid_flourish.csv")

## WITH ADDED POPULATION COLUMN
population = pd.read_csv("https://raw.githubusercontent.com/danielcaraway/COVID19/master/kaggle_population_data.csv")
population_countries = population[population['Type'] == 'Country/Region']
population_province = population[population['Type'] == 'Province/State']
country_lookup = dict(zip(population_countries['Name'], population_countries['Population']))
province_lookup = dict(zip(population_province['Name'], population_province['Population']))

def get_population(country):
  try: 
    return country_lookup[country]
  except:
    return 'na'
merged['population'] = merged.apply(lambda x: get_population(x['name']), axis=1)
merged


df = merged.copy()
df['population'] = pd.to_numeric(df.population, errors='coerce')
df_pop = df.iloc[:,1:-3].div(df.population, axis=0)
df_pop['name'] = df['name']

df = df_pop.copy()
df.to_csv('covid_wk4.csv', index=False)
files.download('covid_wk4.csv')