In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

train = pd.read_csv('../input/covid19-local-us-ca-forecasting-week-1/ca_train.csv')
test = pd.read_csv('../input/covid19-local-us-ca-forecasting-week-1/ca_test.csv')
sub = pd.read_csv('../input/covid19-local-us-ca-forecasting-week-1/ca_submission.csv')
/kaggle/input/covid19-local-us-ca-forecasting-week-1/ca_train.csv
/kaggle/input/covid19-local-us-ca-forecasting-week-1/ca_test.csv
/kaggle/input/covid19-local-us-ca-forecasting-week-1/ca_submission.csv
In [2]:
from sklearn.linear_model import LinearRegression
train=train[train.ConfirmedCases>0]

model_cc= LinearRegression()
x1=np.array(train.Id).reshape(-1,1)
y1=np.log(train.ConfirmedCases)
model_cc.fit(x1,y1)

model_fc= LinearRegression()
x2=np.array(train.ConfirmedCases).reshape(-1,1)
y2=train.Fatalities
model_fc.fit(x2,y2)

test["Id"]=50+test.ForecastId
test.head()

test["LogConf"]=model_cc.predict(np.array(test.Id).reshape(-1,1))
test["ConfirmedCases"]=np.exp(test.LogConf)//1
test["Fatalities"]=model_fc.predict(np.array(test.ConfirmedCases).reshape(-1,1))//1

for id in train.Id:
    test.ConfirmedCases[test.Id==id]=train.ConfirmedCases[train.Id==id].sum()
    test.Fatalities[test.Id==id]=train.Fatalities[train.Id==id].sum()
test
/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:22: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
/opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:23: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Out[2]:
ForecastId Province/State Country/Region Lat Long Date Id LogConf ConfirmedCases Fatalities
0 1 California US 36.1162 -119.6816 2020-03-12 51 5.424030 221.0 4.0
1 2 California US 36.1162 -119.6816 2020-03-13 52 5.627296 282.0 4.0
2 3 California US 36.1162 -119.6816 2020-03-14 53 5.830561 340.0 5.0
3 4 California US 36.1162 -119.6816 2020-03-15 54 6.033827 426.0 6.0
4 5 California US 36.1162 -119.6816 2020-03-16 55 6.237093 557.0 7.0
5 6 California US 36.1162 -119.6816 2020-03-17 56 6.440358 698.0 12.0
6 7 California US 36.1162 -119.6816 2020-03-18 57 6.643624 751.0 13.0
7 8 California US 36.1162 -119.6816 2020-03-19 58 6.846889 952.0 18.0
8 9 California US 36.1162 -119.6816 2020-03-20 59 7.050155 1177.0 23.0
9 10 California US 36.1162 -119.6816 2020-03-21 60 7.253420 1364.0 24.0
10 11 California US 36.1162 -119.6816 2020-03-22 61 7.456686 1642.0 30.0
11 12 California US 36.1162 -119.6816 2020-03-23 62 7.659951 2108.0 39.0
12 13 California US 36.1162 -119.6816 2020-03-24 63 7.863217 2538.0 50.0
13 14 California US 36.1162 -119.6816 2020-03-25 64 8.066482 3185.0 61.0
14 15 California US 36.1162 -119.6816 2020-03-26 65 8.269748 3903.0 75.0
15 16 California US 36.1162 -119.6816 2020-03-27 66 8.473013 4783.0 92.0
16 17 California US 36.1162 -119.6816 2020-03-28 67 8.676279 5862.0 113.0
17 18 California US 36.1162 -119.6816 2020-03-29 68 8.879544 7183.0 139.0
18 19 California US 36.1162 -119.6816 2020-03-30 69 9.082810 8802.0 171.0
19 20 California US 36.1162 -119.6816 2020-03-31 70 9.286075 10786.0 210.0
20 21 California US 36.1162 -119.6816 2020-04-01 71 9.489341 13218.0 258.0
21 22 California US 36.1162 -119.6816 2020-04-02 72 9.692606 16197.0 317.0
22 23 California US 36.1162 -119.6816 2020-04-03 73 9.895872 19848.0 389.0
23 24 California US 36.1162 -119.6816 2020-04-04 74 10.099137 24322.0 477.0
24 25 California US 36.1162 -119.6816 2020-04-05 75 10.302403 29804.0 584.0
25 26 California US 36.1162 -119.6816 2020-04-06 76 10.505668 36521.0 717.0
26 27 California US 36.1162 -119.6816 2020-04-07 77 10.708934 44753.0 879.0
27 28 California US 36.1162 -119.6816 2020-04-08 78 10.912199 54841.0 1077.0
28 29 California US 36.1162 -119.6816 2020-04-09 79 11.115465 67202.0 1320.0
29 30 California US 36.1162 -119.6816 2020-04-10 80 11.318730 82349.0 1619.0
30 31 California US 36.1162 -119.6816 2020-04-11 81 11.521996 100911.0 1984.0
31 32 California US 36.1162 -119.6816 2020-04-12 82 11.725261 123656.0 2431.0
32 33 California US 36.1162 -119.6816 2020-04-13 83 11.928527 151528.0 2980.0
33 34 California US 36.1162 -119.6816 2020-04-14 84 12.131792 185682.0 3652.0
34 35 California US 36.1162 -119.6816 2020-04-15 85 12.335058 227534.0 4476.0
35 36 California US 36.1162 -119.6816 2020-04-16 86 12.538323 278820.0 5485.0
36 37 California US 36.1162 -119.6816 2020-04-17 87 12.741589 341666.0 6722.0
37 38 California US 36.1162 -119.6816 2020-04-18 88 12.944854 418676.0 8237.0
38 39 California US 36.1162 -119.6816 2020-04-19 89 13.148120 513045.0 10095.0
39 40 California US 36.1162 -119.6816 2020-04-20 90 13.351385 628684.0 12370.0
40 41 California US 36.1162 -119.6816 2020-04-21 91 13.554651 770389.0 15159.0
41 42 California US 36.1162 -119.6816 2020-04-22 92 13.757917 944033.0 18576.0
42 43 California US 36.1162 -119.6816 2020-04-23 93 13.961182 1156816.0 22764.0
In [3]:
sub.ConfirmedCases=test.ConfirmedCases
sub.Fatalities=test.Fatalities
sub.to_csv("submission.csv", index=False)
In [ ]: