COVID19 -- Checking the WHO Numbers

In [88]:
import os
import pandas as pd
entries = os.listdir('test_csvs/')
li = []
for entry in entries:
    if '.csv' in entry:
        num = entry.split('sitrep-')[1].split('-')[0]
        date = entry.split('-')[0]
        f = pd.read_csv('csvs/'+entry, index_col=None, header=0)
        if f.shape[1] == 7:
            if 'Total' in f.columns[0]:
                f.columns = ['country', 'total_confirmed', 'total_new', 'total_deaths', 'total_new_deaths', 'transmission_class', 'days_since_report']
                f['date'] = date
                li.append(f)
In [87]:
frame = pd.concat(li, axis=0, ignore_index=True)
frame
Out[87]:
country total_confirmed total_new total_deaths total_new_deaths transmission_class days_since_report date
0 Western Pacific Region NaN NaN NaN NaN NaN NaN 20200322
1 China 81498.0 82.0 3267.0 6.0 Local transmission 0.0 20200322
2 Republic of Korea 8897.0 98.0 104.0 2.0 Local transmission 0.0 20200322
3 Malaysia 1183.0 153.0 3.0 0.0 Local transmission 0.0 20200322
4 Australia 1081.0 208.0 7.0 0.0 Local transmission 0.0 20200322
5 Japan 1046.0 50.0 36.0 1.0 Local transmission 0.0 20200322
6 Singapore 432.0 47.0 2.0 2.0 Local transmission 0.0 20200322
7 Philippines 307.0 77.0 19.0 1.0 Local transmission 0.0 20200322
8 Viet Nam 94.0 3.0 0.0 0.0 Local transmission 0.0 20200322
9 Brunei Darussalam 83.0 5.0 0.0 0.0 Local transmission 0.0 20200322
10 New Zealand 66.0 13.0 0.0 0.0 Local transmission 0.0 20200322
11 Cambodia 53.0 2.0 0.0 0.0 Local transmission 0.0 20200322
12 Mongolia 10.0 4.0 0.0 0.0 Imported cases only 0.0 20200322
13 Fiji 2.0 1.0 0.0 0.0 Local transmission 0.0 20200322
14 Papua New Guinea 1.0 0.0 0.0 0.0 Imported cases only 1.0 20200322
15 Territories** NaN NaN NaN NaN NaN NaN 20200322
16 French Polynesia \n15 \n4 \n0 \n0 \nImported c... NaN NaN NaN NaN NaN NaN 20200322
17 Guam \n15 \n1 \n0 \n0 \nLocal transmission \n0 NaN NaN NaN NaN NaN NaN 20200322
18 New Caledonia \n4 \n2 \n0 \n0 \nImported cases... NaN NaN NaN NaN NaN NaN 20200322
19 European Region ^ NaN NaN NaN NaN NaN NaN 20200322
20 Italy 53578.0 6557.0 4827.0 795.0 Local transmission 0.0 20200322
21 Spain 24926.0 4946.0 1326.0 324.0 Local transmission 0.0 20200322
22 Germany 21463.0 3140.0 67.0 22.0 Local transmission 0.0 20200322
23 France 14296.0 1821.0 562.0 112.0 Local transmission 0.0 20200322
24 Switzerland 6077.0 1237.0 56.0 13.0 Local transmission 0.0 20200322
25 The United Kingdom 5018.0 1035.0 233.0 56.0 Local transmission 0.0 20200322
26 Netherlands 3631.0 637.0 136.0 30.0 Local transmission 0.0 20200322
27 Austria 3024.0 375.0 8.0 2.0 Local transmission 0.0 20200322
28 Belgium 2815.0 558.0 67.0 30.0 Local transmission 0.0 20200322
29 Norway 1926.0 184.0 7.0 0.0 Local transmission 0.0 20200322
... ... ... ... ... ... ... ... ...
110 Guam \n14 \n2 \n0 \n0 \nLocal transmission \n0 NaN NaN NaN NaN NaN NaN 20200321
111 French Polynesia \n11 \n0 \n0 \n0 \nImported c... NaN NaN NaN NaN NaN NaN 20200321
112 New Caledonia \n2 \n0 \n0 \n0 \nImported cases... NaN NaN NaN NaN NaN NaN 20200321
113 European Region ^ NaN NaN NaN NaN NaN NaN 20200321
114 Italy 47021.0 5986.0 4032.0 625.0 Local transmission 0.0 20200321
115 Spain 19980.0 2833.0 1002.0 235.0 Local transmission 0.0 20200321
116 Germany 18323.0 7324.0 45.0 25.0 Local transmission 0.0 20200321
117 France 12475.0 1598.0 450.0 78.0 Local transmission 0.0 20200321
118 Switzerland 4840.0 977.0 43.0 10.0 Local transmission 0.0 20200321
119 The United Kingdom 3983.0 706.0 177.0 33.0 Local transmission 0.0 20200321
120 Netherlands 2994.0 534.0 106.0 30.0 Local transmission 0.0 20200321
121 Austria 2649.0 806.0 6.0 1.0 Local transmission 0.0 20200321
122 Belgium 2257.0 462.0 37.0 23.0 Local transmission 0.0 20200321
123 Norway 1742.0 190.0 7.0 1.0 Local transmission 0.0 20200321
124 Sweden 1623.0 200.0 16.0 13.0 Local transmission 0.0 20200321
125 Denmark 1255.0 123.0 9.0 3.0 Local transmission 0.0 20200321
126 Portugal 1020.0 235.0 6.0 3.0 Local transmission 0.0 20200321
127 Czechia 904.0 210.0 0.0 0.0 Local transmission 0.0 20200321
128 Israel 712.0 183.0 1.0 1.0 Local transmission 0.0 20200321
129 Ireland 683.0 126.0 3.0 0.0 Local transmission 0.0 20200321
130 Turkey 670.0 479.0 9.0 7.0 Local transmission 0.0 20200321
131 Greece 495.0 77.0 8.0 3.0 Local transmission 0.0 20200321
132 Luxembourg 484.0 139.0 5.0 1.0 Local transmission 0.0 20200321
133 Finland 450.0 81.0 0.0 0.0 Local transmission 0.0 20200321
134 Poland 425.0 100.0 5.0 0.0 Local transmission 0.0 20200321
135 Iceland 409.0 79.0 1.0 1.0 Local transmission 0.0 20200321
136 Slovenia 341.0 22.0 1.0 0.0 Local transmission 0.0 20200321
137 Romania 308.0 48.0 0.0 0.0 Local transmission 0.0 20200321
138 Estonia 283.0 16.0 0.0 0.0 Local transmission 0.0 20200321
139 Russian Federation 253.0 54.0 0.0 0.0 Imported cases only 0.0 20200321

140 rows × 8 columns

In [95]:
grouped = pd.DataFrame(frame.groupby(['country', 'date','total_deaths']).sum())
In [96]:
grouped
Out[96]:
total_confirmed total_new total_new_deaths days_since_report
country date total_deaths
Australia 20200320 6.0 709.0 199.0 0.0 0.0
20200321 7.0 873.0 164.0 1.0 0.0
20200322 7.0 1081.0 208.0 0.0 0.0
Austria 20200320 5.0 1843.0 197.0 1.0 0.0
20200321 6.0 2649.0 806.0 1.0 0.0
20200322 8.0 3024.0 375.0 2.0 0.0
Belgium 20200320 14.0 1795.0 309.0 0.0 0.0
20200321 37.0 2257.0 462.0 23.0 0.0
20200322 67.0 2815.0 558.0 30.0 0.0
Brunei Darussalam 20200320 0.0 73.0 17.0 0.0 0.0
20200321 0.0 78.0 5.0 0.0 0.0
20200322 0.0 83.0 5.0 0.0 0.0
Cambodia 20200320 0.0 47.0 12.0 0.0 0.0
20200321 0.0 51.0 4.0 0.0 0.0
20200322 0.0 53.0 2.0 0.0 0.0
China 20200320 3253.0 81300.0 126.0 11.0 0.0
20200321 3261.0 81416.0 116.0 8.0 0.0
20200322 3267.0 81498.0 82.0 6.0 0.0
Croatia 20200322 1.0 206.0 80.0 0.0 0.0
Czechia 20200320 0.0 694.0 172.0 0.0 0.0
20200321 0.0 904.0 210.0 0.0 0.0
20200322 0.0 995.0 91.0 0.0 0.0
Denmark 20200320 6.0 1132.0 88.0 2.0 0.0
20200321 9.0 1255.0 123.0 3.0 0.0
20200322 13.0 1326.0 71.0 4.0 0.0
Estonia 20200320 0.0 267.0 9.0 0.0 0.0
20200321 0.0 283.0 16.0 0.0 0.0
20200322 0.0 306.0 23.0 0.0 0.0
Fiji 20200320 0.0 1.0 1.0 0.0 0.0
20200321 0.0 1.0 0.0 0.0 1.0
... ... ... ... ... ... ...
Romania 20200322 0.0 367.0 59.0 0.0 0.0
Russian Federation 20200320 0.0 199.0 52.0 0.0 0.0
20200321 0.0 253.0 54.0 0.0 0.0
20200322 0.0 306.0 53.0 0.0 0.0
San Marino 20200320 14.0 126.0 17.0 0.0 0.0
Serbia†† 20200320 0.0 123.0 41.0 0.0 0.0
Singapore 20200320 0.0 345.0 32.0 0.0 0.0
20200321 0.0 385.0 40.0 0.0 0.0
20200322 2.0 432.0 47.0 2.0 0.0
Slovenia 20200320 1.0 319.0 33.0 0.0 0.0
20200321 1.0 341.0 22.0 0.0 0.0
20200322 1.0 383.0 42.0 0.0 0.0
Spain 20200320 767.0 17147.0 3431.0 169.0 0.0
20200321 1002.0 19980.0 2833.0 235.0 0.0
20200322 1326.0 24926.0 4946.0 324.0 0.0
Sweden 20200320 3.0 1423.0 144.0 0.0 0.0
20200321 16.0 1623.0 200.0 13.0 0.0
20200322 20.0 1746.0 123.0 4.0 0.0
Switzerland 20200320 33.0 3863.0 853.0 12.0 0.0
20200321 43.0 4840.0 977.0 10.0 0.0
20200322 56.0 6077.0 1237.0 13.0 0.0
The United Kingdom 20200320 144.0 3277.0 647.0 41.0 0.0
20200321 177.0 3983.0 706.0 33.0 0.0
20200322 233.0 5018.0 1035.0 56.0 0.0
Turkey 20200320 2.0 191.0 0.0 0.0 1.0
20200321 9.0 670.0 479.0 7.0 0.0
20200322 21.0 947.0 277.0 12.0 0.0
Viet Nam 20200320 0.0 85.0 19.0 0.0 0.0
20200321 0.0 91.0 6.0 0.0 0.0
20200322 0.0 94.0 3.0 0.0 0.0

122 rows × 4 columns

In [98]:
import os
import pandas as pd
entries = os.listdir('csvs/')
li = []
for entry in entries:
    if '.csv' in entry:
        num = entry.split('sitrep-')[1].split('-')[0]
        date = entry.split('-')[0]
        f = pd.read_csv('csvs/'+entry, index_col=None, header=0)
        if f.shape[1] == 7:
            if 'Total' in f.columns[0]:
                f.columns = ['country', 'total_confirmed', 'total_new', 'total_deaths', 'total_new_deaths', 'transmission_class', 'days_since_report']
                f['date'] = date
                li.append(f)
In [100]:
frame = pd.concat(li, axis=0, ignore_index=True)
grouped = pd.DataFrame(frame.groupby(['country', 'date','total_deaths']).sum())
Out[100]:
total_confirmed total_new total_new_deaths transmission_class days_since_report
country date total_deaths
Albania 20200309 0.0 2.0 2.0 0.0 Imported cases only 0
20200311 0.0 10.0 8.0 0.0 Local transmission 0
20200312 0.0 10.0 0.0 0.0 Local transmission 1
20200313 0.0 23.0 13.0 0.0 Imported cases only 0
20200314 1.0 33.0 10.0 1.0 Local transmission 0
20200315 1.0 38.0 5.0 0.0 Local transmission 0
20200316 1.0 42.0 4.0 0.0 Local transmission 0
Andorra 20200303 0.0 1.0 1.0 0.0 Imported cases only 0
20200304 0.0 1.0 0.0 0.0 Imported cases only 1
20200305 0.0 1.0 0.0 0.0 Imported cases only 2
20200306 0.0 1.0 0.0 0.0 Imported cases only 3
20200307 0.0 1.0 0.0 0.0 Imported cases only 4
Armenia 20200302 0.0 1.0 1.0 0.0 Imported cases only 0
20200303 0.0 1.0 0.0 0.0 Imported cases only 1
20200304 0.0 1.0 0.0 0.0 Imported cases only 2
20200305 0.0 1.0 0.0 0.0 Imported cases only 3
20200306 0.0 1.0 0.0 0.0 Imported cases only 4
20200307 0.0 1.0 0.0 0.0 Imported cases only 5
Australia 20200302 1.0 27.0 2.0 1.0 Local transmission 0
20200303 1.0 33.0 6.0 0.0 Local transmission 0
20200304 1.0 43.0 10.0 0.0 Local transmission 0
20200305 2.0 57.0 14.0 2.0 Local transmission 0
20200306 2.0 57.0 0.0 0.0 Local transmission 1
20200307 2.0 62.0 5.0 0.0 Local transmission 0
20200308 3.0 74.0 12.0 1.0 Local transmission 0
20200309 3.0 77.0 3.0 0.0 Local transmission 0
20200310 3.0 92.0 15.0 0.0 Local transmission 0
20200311 3.0 112.0 20.0 0.0 Local transmission 0
20200312 3.0 122.0 10.0 0.0 Local transmission 0
20200313 3.0 140.0 18.0 0.0 Local transmission 0
... ... ... ... ... ... ... ...
Turkey 20200319 2.0 191.0 51.0 1.0 Local transmission 0
20200320 2.0 191.0 0.0 0.0 Local transmission 1
20200321 9.0 670.0 479.0 7.0 Local transmission 0
20200322 21.0 947.0 277.0 12.0 Local transmission 0
Ukraine 20200304 0.0 1.0 1.0 0.0 Imported cases only 0
Viet Nam 20200302 0.0 16.0 0.0 0.0 Local transmission 18
20200303 0.0 16.0 0.0 0.0 Local transmission 19
20200304 0.0 16.0 0.0 0.0 Local transmission 20
20200305 0.0 16.0 0.0 0.0 Local transmission 21
20200306 0.0 16.0 0.0 0.0 Local transmission 22
20200307 0.0 17.0 1.0 0.0 Local transmission 0
20200308 0.0 21.0 4.0 0.0 Local transmission 0
20200309 0.0 30.0 9.0 0.0 Local transmission 0
20200310 0.0 31.0 1.0 0.0 Local transmission 0
20200311 0.0 35.0 4.0 0.0 Local transmission 0
20200312 0.0 39.0 4.0 0.0 Local transmission 0
20200313 0.0 39.0 0.0 0.0 Local transmission 1
20200314 0.0 48.0 9.0 0.0 Local transmission 0
20200315 0.0 53.0 5.0 0.0 Local transmission 0
20200316 0.0 57.0 4.0 0.0 Local transmission 0
20200317 0.0 61.0 4.0 0.0 Local transmission 0
20200318 0.0 61.0 4.0 0.0 Local transmission 0
20200319 0.0 66.0 5.0 0.0 Local transmission 0
20200320 0.0 85.0 19.0 0.0 Local transmission 0
20200321 0.0 91.0 6.0 0.0 Local transmission 0
20200322 0.0 94.0 3.0 0.0 Local transmission 0
the United \nKingdom^^ 20200305 0.0 89.0 38.0 0.0 Local transmission 0
the United \nKingdom¶ 20200304 0.0 51.0 12.0 0.0 Local transmission 0
the United Kingdom 20200302 0.0 36.0 13.0 0.0 Local transmission 0
20200303 0.0 39.0 3.0 0.0 Local transmission 0

919 rows × 5 columns

TESTING ITALY

In [102]:
grouped.reset_index(inplace=True)
italy = grouped[grouped['country'] == 'Italy']
In [103]:
italy
Out[103]:
country date total_deaths total_confirmed total_new total_new_deaths transmission_class days_since_report
418 Italy 20200302 35 1689.0 561.0 6.0 Local transmission 0
419 Italy 20200303 52 2036.0 347.0 17.0 Local transmission 0
420 Italy 20200304 80 2502.0 466.0 28.0 Local transmission 0
421 Italy 20200305 107 3089.0 587.0 27.0 Local transmission 0
422 Italy 20200306 148 3858.0 769.0 41.0 Local transmission 0
423 Italy 20200307 197 4636.0 778.0 49.0 Local transmission 0
424 Italy 20200308 234 5883.0 1247.0 37.0 Local transmission 0
425 Italy 20200309 366 7375.0 1492.0 132.0 Local transmission 0
426 Italy 20200310 463 9172.0 1797.0 97.0 Local transmission 0
427 Italy 20200311 631 10149.0 977.0 168.0 Local transmission 0
428 Italy 20200312 827 12462.0 2313.0 196.0 Local transmission 0
429 Italy 20200313 1016 15113.0 2651.0 189.0 Local transmission 0
430 Italy 20200314 1268 17660.0 2547.0 252.0 Local transmission 0
431 Italy 20200315 1441 21157.0 3497.0 173.0 Local transmission 0
432 Italy 20200316 1809 24747.0 3590.0 368.0 Local transmission 0
433 Italy 20200317 2503 27980.0 3233.0 349.0 Local transmission 0
434 Italy 20200318 2503 31506.0 3526.0 345.0 Local transmission 0
435 Italy 20200319 2978 35713.0 4207.0 473.0 Local transmission 0
436 Italy 20200320 3407 41035.0 5322.0 429.0 Local transmission 0
437 Italy 20200321 4032 47021.0 5986.0 625.0 Local transmission 0
438 Italy 20200322 4827 53578.0 6557.0 795.0 Local transmission 0
In [118]:
df = italy.copy()
df['corrections_death'] = df['total_deaths'] - df['total_deaths'].shift(+1)
df['corrections_new'] = df['total_confirmed'].shift(+1) + df['total_new']
In [122]:
df['conf_death'] = df['corrections_death'] == df['total_new_deaths']
df['conf_new'] = df['corrections_new'] == df['total_confirmed']
In [123]:
df
Out[123]:
country date total_deaths total_confirmed total_new total_new_deaths transmission_class days_since_report corrections_death corrections_new conf_death conf_new
418 Italy 20200302 35 1689.0 561.0 6.0 Local transmission 0 NaN NaN False False
419 Italy 20200303 52 2036.0 347.0 17.0 Local transmission 0 17 2036.0 True True
420 Italy 20200304 80 2502.0 466.0 28.0 Local transmission 0 28 2502.0 True True
421 Italy 20200305 107 3089.0 587.0 27.0 Local transmission 0 27 3089.0 True True
422 Italy 20200306 148 3858.0 769.0 41.0 Local transmission 0 41 3858.0 True True
423 Italy 20200307 197 4636.0 778.0 49.0 Local transmission 0 49 4636.0 True True
424 Italy 20200308 234 5883.0 1247.0 37.0 Local transmission 0 37 5883.0 True True
425 Italy 20200309 366 7375.0 1492.0 132.0 Local transmission 0 132 7375.0 True True
426 Italy 20200310 463 9172.0 1797.0 97.0 Local transmission 0 97 9172.0 True True
427 Italy 20200311 631 10149.0 977.0 168.0 Local transmission 0 168 10149.0 True True
428 Italy 20200312 827 12462.0 2313.0 196.0 Local transmission 0 196 12462.0 True True
429 Italy 20200313 1016 15113.0 2651.0 189.0 Local transmission 0 189 15113.0 True True
430 Italy 20200314 1268 17660.0 2547.0 252.0 Local transmission 0 252 17660.0 True True
431 Italy 20200315 1441 21157.0 3497.0 173.0 Local transmission 0 173 21157.0 True True
432 Italy 20200316 1809 24747.0 3590.0 368.0 Local transmission 0 368 24747.0 True True
433 Italy 20200317 2503 27980.0 3233.0 349.0 Local transmission 0 694 27980.0 False True
434 Italy 20200318 2503 31506.0 3526.0 345.0 Local transmission 0 0 31506.0 False True
435 Italy 20200319 2978 35713.0 4207.0 473.0 Local transmission 0 475 35713.0 False True
436 Italy 20200320 3407 41035.0 5322.0 429.0 Local transmission 0 429 41035.0 True True
437 Italy 20200321 4032 47021.0 5986.0 625.0 Local transmission 0 625 47021.0 True True
438 Italy 20200322 4827 53578.0 6557.0 795.0 Local transmission 0 795 53578.0 True True
In [127]:
countries = set(grouped['country'])
len(countries)
Out[127]:
73
In [128]:
corrected_dfs = []
for country in countries:
    df = grouped[grouped['country'] == country]
    df['corrections_death'] = df['total_deaths'] - df['total_deaths'].shift(+1)
    df['corrections_new'] = df['total_confirmed'].shift(+1) + df['total_new']
    df['conf_death'] = df['corrections_death'] == df['total_new_deaths']
    df['conf_new'] = df['corrections_new'] == df['total_confirmed']
    corrected_dfs.append(df)
/Users/danielcaraway/.local/lib/python3.7/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
/Users/danielcaraway/.local/lib/python3.7/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
/Users/danielcaraway/.local/lib/python3.7/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
/Users/danielcaraway/.local/lib/python3.7/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
In [129]:
# frame = pd.concat(li, axis=0, ignore_index=True)
updated_df = pd.concat(corrected_dfs, axis=0, ignore_index=True)
# grouped = pd.DataFrame(frame.groupby(['country', 'date','total_deaths']).sum())
In [130]:
updated_df
Out[130]:
country date total_deaths total_confirmed total_new total_new_deaths transmission_class days_since_report corrections_death corrections_new conf_death conf_new
0 Slovakia 20200308 0 3.0 2.0 0.0 Local transmission 0 NaN NaN False False
1 Slovakia 20200309 0 5.0 2.0 0.0 Local transmission 0 0 5.0 True True
2 Slovakia 20200310 0 7.0 2.0 0.0 Local transmission 0 0 7.0 True True
3 Slovakia 20200313 0 21.0 11.0 0.0 Local transmission 0 0 18.0 True False
4 Slovakia 20200314 0 30.0 9.0 0.0 Local transmission 0 0 30.0 True True
5 Slovakia 20200315 0 44.0 14.0 0.0 Local transmission 0 0 44.0 True True
6 Slovakia 20200316 0 61.0 17.0 0.0 Local transmission 0 0 61.0 True True
7 Slovakia 20200317 0 72.0 11.0 0.0 Local transmission 0 0 72.0 True True
8 Slovakia 20200318 0 97.0 25.0 0.0 Local transmission 0 0 97.0 True True
9 Slovakia 20200319 0 105.0 8.0 0.0 Local transmission 0 0 105.0 True True
10 Lithuania 20200302 0 1.0 0.0 0.0 Imported cases only 3 NaN NaN False False
11 Lithuania 20200303 0 1.0 0.0 0.0 Imported cases only 4 0 1.0 True True
12 Lithuania 20200304 0 1.0 0.0 0.0 Imported cases only 5 0 1.0 True True
13 Lithuania 20200305 0 1.0 0.0 0.0 Imported cases only 6 0 1.0 True True
14 Lithuania 20200306 0 1.0 0.0 0.0 Imported cases only 7 0 1.0 True True
15 Croatia 20200302 0 7.0 0.0 0.0 Local transmission 1 NaN NaN False False
16 Croatia 20200303 0 8.0 2.0 0.0 Local transmission 0 0 9.0 True False
17 Croatia 20200304 0 9.0 1.0 0.0 Local transmission 0 0 9.0 True True
18 Croatia 20200305 0 9.0 0.0 0.0 Local transmission 1 0 9.0 True True
19 Croatia 20200306 0 10.0 1.0 0.0 Local transmission 0 0 10.0 True True
20 Croatia 20200307 0 11.0 1.0 0.0 Local transmission 0 0 11.0 True True
21 Croatia 20200308 0 11.0 0.0 0.0 Local transmission 1 0 11.0 True True
22 Croatia 20200309 0 11.0 0.0 0.0 Local transmission 2 0 11.0 True True
23 Croatia 20200310 0 12.0 1.0 0.0 Local transmission 0 0 12.0 True True
24 Croatia 20200311 0 16.0 4.0 0.0 Local transmission 0 0 16.0 True True
25 Croatia 20200312 0 16.0 0.0 0.0 Local transmission 1 0 16.0 True True
26 Croatia 20200313 0 16.0 0.0 0.0 Local transmission 2 0 16.0 True True
27 Croatia 20200314 0 27.0 11.0 0.0 Local transmission 0 0 27.0 True True
28 Croatia 20200315 0 37.0 10.0 0.0 Local transmission 0 0 37.0 True True
29 Croatia 20200316 0 49.0 12.0 0.0 Local transmission 0 0 49.0 True True
... ... ... ... ... ... ... ... ... ... ... ... ...
889 Austria 20200314 1 504.0 143.0 0.0 Local transmission 0 0 504.0 True True
890 Austria 20200315 1 800.0 296.0 0.0 Local transmission 0 0 800.0 True True
891 Austria 20200316 1 959.0 159.0 0.0 Local transmission 0 0 959.0 True True
892 Austria 20200317 3 1132.0 173.0 2.0 Local transmission 0 2 1132.0 True True
893 Austria 20200318 3 1332.0 373.0 2.0 Local transmission 0 0 1505.0 False False
894 Austria 20200319 4 1646.0 314.0 1.0 Local transmission 0 1 1646.0 True True
895 Austria 20200320 5 1843.0 197.0 1.0 Local transmission 0 1 1843.0 True True
896 Austria 20200321 6 2649.0 806.0 1.0 Local transmission 0 1 2649.0 True True
897 Austria 20200322 8 3024.0 375.0 2.0 Local transmission 0 2 3024.0 True True
898 Republic of Korea 20200302 22 4212.0 476.0 4.0 Local transmission 0 NaN NaN False False
899 Republic of Korea 20200303 28 4812.0 600.0 6.0 Local transmission 0 6 4812.0 True True
900 Republic of Korea 20200304 32 5328.0 516.0 4.0 Local transmission 0 4 5328.0 True True
901 Republic of Korea 20200305 35 5766.0 438.0 3.0 Local transmission 0 3 5766.0 True True
902 Republic of Korea 20200306 42 6284.0 518.0 7.0 Local transmission 0 7 6284.0 True True
903 Republic of Korea 20200307 44 6767.0 483.0 2.0 Local transmission 0 2 6767.0 True True
904 Republic of Korea 20200308 50 7134.0 367.0 6.0 Local transmission 0 6 7134.0 True True
905 Republic of Korea 20200309 51 7382.0 248.0 1.0 Local transmission 0 1 7382.0 True True
906 Republic of Korea 20200310 54 7513.0 131.0 3.0 Local transmission 0 3 7513.0 True True
907 Republic of Korea 20200311 60 7755.0 242.0 6.0 Local transmission 0 6 7755.0 True True
908 Republic of Korea 20200312 66 7869.0 114.0 6.0 Local transmission 0 6 7869.0 True True
909 Republic of Korea 20200313 66 7979.0 110.0 0.0 Local transmission 0 0 7979.0 True True
910 Republic of Korea 20200314 72 8086.0 107.0 6.0 Local transmission 0 6 8086.0 True True
911 Republic of Korea 20200315 75 8162.0 76.0 3.0 Local transmission 0 3 8162.0 True True
912 Republic of Korea 20200316 75 8236.0 74.0 0.0 Local transmission 0 0 8236.0 True True
913 Republic of Korea 20200317 81 8320.0 84.0 6.0 Local transmission 0 6 8320.0 True True
914 Republic of Korea 20200318 81 8320.0 84.0 6.0 Local transmission 0 0 8404.0 False False
915 Republic of Korea 20200319 84 8413.0 93.0 3.0 Local transmission 0 3 8413.0 True True
916 Republic of Korea 20200320 94 8652.0 239.0 10.0 Local transmission 0 10 8652.0 True True
917 Republic of Korea 20200321 102 8799.0 147.0 8.0 Local transmission 0 8 8799.0 True True
918 Republic of Korea 20200322 104 8897.0 98.0 2.0 Local transmission 0 2 8897.0 True True

919 rows × 12 columns

In [131]:
updated_df.to_csv('covid19_whositreps_merged_and_checked.csv', index=False)

Fixing the NaN issue

In [135]:
import numpy as np
false_death = updated_df[(updated_df['conf_death'] == False) & (np.isnan(updated_df['corrections_new']) == False )]
In [136]:
false_death
Out[136]:
country date total_deaths total_confirmed total_new total_new_deaths transmission_class days_since_report corrections_death corrections_new conf_death conf_new
32 Croatia 20200322 1 206.0 80.0 0.0 Local transmission 0 1 145.0 False False
105 Netherlands 20200319 58 2051.0 0.0 0.0 Local transmission 1 15 1705.0 False False
173 Italy 20200317 2503 27980.0 3233.0 349.0 Local transmission 0 694 27980.0 False True
174 Italy 20200318 2503 31506.0 3526.0 345.0 Local transmission 0 0 31506.0 False True
175 Italy 20200319 2978 35713.0 4207.0 473.0 Local transmission 0 475 35713.0 False True
191 The United Kingdom 20200318 55 1954.0 407.0 5.0 Local transmission 0 0 1954.0 False True
192 The United Kingdom 20200319 103 2630.0 672.0 0.0 Local transmission 0 48 2626.0 False False
221 Australia 20200305 2 57.0 14.0 2.0 Local transmission 0 1 57.0 False True
232 Australia 20200316 5 298.0 0.0 0.0 Local transmission 1 2 249.0 False False
347 Philippines 20200316 12 140.0 0.0 0.0 Local transmission 1 6 111.0 False False
438 France 20200319 244 9043.0 0.0 0.0 Local transmission 1 69 7652.0 False False
478 Denmark 20200318 4 977.0 79.0 3.0 Local transmission 0 0 1039.0 False False
501 China 20200318 3231 81116.0 39.0 13.0 Local transmission 0 0 81155.0 False False
549 Germany 20200320 20 10999.0 2801.0 8.0 Local transmission 0 7 10999.0 False True
592 Japan 20200318 28 829.0 15.0 4.0 Local transmission 0 0 844.0 False False
601 Bulgaria 20200315 2 43.0 36.0 1.0 Local transmission 0 2 42.0 False False
664 Switzerland 20200318 14 2650.0 450.0 5.0 Local transmission 0 0 2650.0 False True
665 Switzerland 20200319 21 3010.0 353.0 2.0 Local transmission 0 7 3003.0 False False
787 Greece 20200319 5 418.0 0.0 0.0 Local transmission 1 1 387.0 False False
893 Austria 20200318 3 1332.0 373.0 2.0 Local transmission 0 0 1505.0 False False
914 Republic of Korea 20200318 81 8320.0 84.0 6.0 Local transmission 0 0 8404.0 False False
In [137]:
false_new = updated_df[(updated_df['conf_new'] == False) & (np.isnan(updated_df['corrections_new']) == False )]
In [139]:
false_new['diff'] = false_new['corrections_new'] - false_new['total_confirmed']
/Users/danielcaraway/.local/lib/python3.7/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
In [140]:
false_new
Out[140]:
country date total_deaths total_confirmed total_new total_new_deaths transmission_class days_since_report corrections_death corrections_new conf_death conf_new diff
3 Slovakia 20200313 0 21.0 11.0 0.0 Local transmission 0 0 18.0 True False -3.0
16 Croatia 20200303 0 8.0 2.0 0.0 Local transmission 0 0 9.0 True False 1.0
32 Croatia 20200322 1 206.0 80.0 0.0 Local transmission 0 1 145.0 False False -61.0
49 Czechia 20200318 0 434.0 136.0 0.0 Local transmission 0 0 519.0 True False 85.0
50 Czechia 20200319 0 522.0 30.0 0.0 Local transmission 0 0 464.0 True False -58.0
105 Netherlands 20200319 58 2051.0 0.0 0.0 Local transmission 1 15 1705.0 False False -346.0
111 Norway 20200304 0 33.0 7.0 0.0 Local transmission 0 0 32.0 True False -1.0
154 Serbia†† 20200318 0 85.0 23.0 0.0 Local transmission 0 0 93.0 True False 8.0
156 Serbia†† 20200320 0 123.0 41.0 0.0 Local transmission 0 0 137.0 True False 14.0
192 The United Kingdom 20200319 103 2630.0 672.0 0.0 Local transmission 0 48 2626.0 False False -4.0
232 Australia 20200316 5 298.0 0.0 0.0 Local transmission 1 2 249.0 False False -49.0
234 Australia 20200318 5 414.0 78.0 0.0 Local transmission 0 0 453.0 True False 39.0
253 Luxembourg 20200319 2 210.0 63.0 1.0 Local transmission 0 1 203.0 True False -7.0
284 Israel 20200318 0 304.0 0.0 0.0 Local transmission 1 0 250.0 True False -54.0
285 Israel 20200319 0 427.0 0.0 0.0 Local transmission 2 0 304.0 True False -123.0
316 Slovenia 20200319 1 286.0 0.0 0.0 Local transmission 1 0 275.0 True False -11.0
347 Philippines 20200316 12 140.0 0.0 0.0 Local transmission 1 6 111.0 False False -29.0
349 Philippines 20200318 12 187.0 45.0 0.0 Local transmission 0 0 232.0 True False 45.0
363 Iceland 20200311 0 61.0 0.0 0.0 Local transmission 1 0 55.0 True False -6.0
369 Iceland 20200317 0 199.0 19.0 0.0 Local transmission 0 0 157.0 True False -42.0
370 Iceland 20200318 0 225.0 45.0 0.0 Local transmission 0 0 244.0 True False 19.0
413 Hungary 20200308 0 7.0 3.0 0.0 Local transmission 0 0 8.0 True False 1.0
425 France 20200306 6 420.0 138.0 2.0 Local transmission 0 2 350.0 True False -70.0
438 France 20200319 244 9043.0 0.0 0.0 Local transmission 1 69 7652.0 False False -1391.0
445 Portugal 20200306 0 9.0 0.0 0.0 Local transmission 1 0 7.0 True False -2.0
478 Denmark 20200318 4 977.0 79.0 3.0 Local transmission 0 0 1039.0 False False 62.0
501 China 20200318 3231 81116.0 39.0 13.0 Local transmission 0 0 81155.0 False False 39.0
571 New Zealand 20200318 0 11.0 5.0 0.0 Local transmission 0 0 16.0 True False 5.0
582 Japan 20200308 6 455.0 48.0 0.0 Local transmission 0 0 456.0 True False 1.0
592 Japan 20200318 28 829.0 15.0 4.0 Local transmission 0 0 844.0 False False 15.0
601 Bulgaria 20200315 2 43.0 36.0 1.0 Local transmission 0 2 42.0 False False -1.0
643 Viet Nam 20200318 0 61.0 4.0 0.0 Local transmission 0 0 65.0 True False 4.0
652 Switzerland 20200306 1 86.0 30.0 1.0 Local transmission 0 1 67.0 True False -19.0
665 Switzerland 20200319 21 3010.0 353.0 2.0 Local transmission 0 7 3003.0 False False -7.0
705 Cambodia 20200316 0 12.0 0.0 0.0 Local transmission 1 0 7.0 True False -5.0
707 Cambodia 20200318 0 24.0 12.0 0.0 Local transmission 0 0 36.0 True False 12.0
745 Poland 20200319 5 287.0 0.0 0.0 Local transmission 1 0 246.0 True False -41.0
787 Greece 20200319 5 418.0 0.0 0.0 Local transmission 1 1 387.0 False False -31.0
884 Austria 20200309 0 112.0 10.0 0.0 Local transmission 0 0 114.0 True False 2.0
893 Austria 20200318 3 1332.0 373.0 2.0 Local transmission 0 0 1505.0 False False 173.0
914 Republic of Korea 20200318 81 8320.0 84.0 6.0 Local transmission 0 0 8404.0 False False 84.0
In [ ]: