In [0]:
import pandas as pd
from fbprophet import Prophet 
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 

df = pd.read_csv('https://raw.githubusercontent.com/danielcaraway/data/master/Zip_Zhvi_SingleFamilyResidence.csv', encoding='latin')

df
Out[0]:
RegionID RegionName City State Metro CountyName SizeRank 1996-04 1996-05 1996-06 1996-07 1996-08 1996-09 1996-10 1996-11 1996-12 1997-01 1997-02 1997-03 1997-04 1997-05 1997-06 1997-07 1997-08 1997-09 1997-10 1997-11 1997-12 1998-01 1998-02 1998-03 1998-04 1998-05 1998-06 1998-07 1998-08 1998-09 1998-10 1998-11 1998-12 ... 2016-09 2016-10 2016-11 2016-12 2017-01 2017-02 2017-03 2017-04 2017-05 2017-06 2017-07 2017-08 2017-09 2017-10 2017-11 2017-12 2018-01 2018-02 2018-03 2018-04 2018-05 2018-06 2018-07 2018-08 2018-09 2018-10 2018-11 2018-12 2019-01 2019-02 2019-03 2019-04 2019-05 2019-06 2019-07 2019-08 2019-09 2019-10 2019-11 2019-12
0 61639 10025 New York NY New York-Newark-Jersey City New York County 1 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1486790.0 1490909.0 1493113.0 1500163.0 1497306.0 1496223.0 1498588.0 1504899.0 1511536.0 1509462.0 1518963.0 1518670.0 1526823.0 1512395.0 1503332 1483368 1468482 1461572 1454050 1454111 1451781 1449835 1450723 1440314 1433973 1429073 1428637 1425987 1407384 1391270 1380332 1386270 1394397 1404225 1406599 1399918 1380178 1358401 1350481 1345845
1 84654 60657 Chicago IL Chicago-Naperville-Elgin Cook County 2 355664.0 354736.0 355404.0 355863.0 357309.0 359906.0 362935.0 366493.0 369236.0 371964.0 374364.0 376322.0 377455.0 380637.0 381917.0 384322.0 383195.0 381889.0 378162.0 374642.0 373445.0 370798.0 369891.0 367735.0 369673.0 371771.0 378334.0 386284.0 395034.0 403790.0 412949.0 421560.0 428867.0 ... 926166.0 926066.0 929987.0 931155.0 933542.0 935872.0 945765.0 950893.0 950923.0 945971.0 940106.0 937789.0 937761.0 935120.0 931248 932154 941705 953981 959260 960954 959673 959082 954912 950660 948543 948906 951721 954743 957441 961651 966123 967557 965155 960225 956709 953095 950684 948136 946838 945928
2 61637 10023 New York NY New York-Newark-Jersey City New York County 3 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1627167.0 1630578.0 1630922.0 1629389.0 1620963.0 1608791.0 1600559.0 1600247.0 1598869.0 1595751.0 1595421.0 1597321.0 1600114.0 1584614.0 1566777 1543592 1531018 1537936 1551681 1568820 1573613 1578249 1582848 1583553 1580068 1573052 1563902 1550264 1531141 1521399 1520816 1526676 1525174 1516721 1504180 1492086 1480934 1468075 1454870 1438313
3 91982 77494 Katy TX Houston-The Woodlands-Sugar Land Harris County 4 197907.0 196854.0 195911.0 197300.0 197320.0 198443.0 197736.0 198462.0 199158.0 200824.0 201754.0 202558.0 205017.0 207485.0 208972.0 209013.0 211437.0 215407.0 217938.0 217189.0 214825.0 211272.0 209083.0 206815.0 206114.0 205047.0 204507.0 204444.0 203423.0 203923.0 204949.0 207069.0 207697.0 ... 329476.0 328831.0 328912.0 329142.0 329312.0 330345.0 331281.0 331534.0 330699.0 330184.0 330013.0 329488.0 329015.0 328902.0 329123 330287 331699 332214 332264 332737 333805 334293 333806 333609 333839 334129 334753 334497 335272 335363 335789 335035 334542 334176 334363 334127 334458 334460 334679 334309
4 84616 60614 Chicago IL Chicago-Naperville-Elgin Cook County 5 537402.0 536919.0 539044.0 540137.0 542594.0 546190.0 550015.0 554436.0 557214.0 560140.0 562389.0 565003.0 566844.0 574208.0 578552.0 583864.0 582314.0 581470.0 578402.0 575431.0 574291.0 575269.0 578602.0 583276.0 588869.0 596347.0 604371.0 615222.0 625944.0 640232.0 651765.0 662952.0 671169.0 ... 1141840.0 1140892.0 1144867.0 1147812.0 1153703.0 1159757.0 1174693.0 1185995.0 1191061.0 1187382.0 1179120.0 1174446.0 1174467.0 1173676.0 1172203 1176623 1186741 1197817 1201566 1204015 1204371 1201598 1195307 1189358 1186838 1185950 1187917 1190385 1191138 1193324 1198898 1203423 1204840 1199747 1194591 1188702 1183500 1179024 1175407 1174008
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
30429 66169 20052 Washington DC Washington-Arlington-Alexandria District of Columbia 30430 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 1076561.0 1071322.0 1077667.0 1096350.0 1121943.0 1142288.0 1144938.0 1149801.0 1147083.0 1155332.0 1155738.0 1164120.0 1172086.0 1176373.0 1183704 1184773 1197905 1218637 1247840 1268481 1282430 1283908 1283874 1285549 1292009 1301660 1305474 1306492 1307271 1310802 1321676 1327971 1331519 1331204 1331762 1333679 1344030 1357142 1376132 1383939
30430 67023 21759 Keymar MD Washington-Arlington-Alexandria Frederick County 30431 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 284661.0 287839.0 290238.0 287839.0 280391.0 275308.0 271331.0 267019.0 258368.0 248274.0 244407.0 240682.0 241395.0 240922.0 242355 242097 244675 249965 254996 256310 253785 249363 244538 239711 233729 224640 218518 216469 216721 216091 214518 214397 214373 215898 215491 216851 214608 212417 206543 201415
30431 67346 22649 Middletown VA Winchester Frederick County 30432 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 191312.0 190999.0 191940.0 193127.0 193769.0 194017.0 194651.0 196098.0 196689.0 196379.0 195647.0 195180.0 195536.0 195927.0 196912 197815 197896 199539 201742 203837 205321 206864 209032 209598 210442 211698 213021 213932 216175 219277 222071 223628 225230 226814 227609 228817 230588 233229 233064 232049
30432 60206 6230 Pomfret Center CT Worcester Windham County 30433 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 241058.0 238085.0 239948.0 243049.0 246911.0 249786.0 251893.0 254691.0 257886.0 262155.0 263728.0 264618.0 264671.0 264063.0 263250 263526 264764 266940 267460 267935 265830 264350 262716 263231 264923 267515 269397 269100 267918 266035 265994 265368 266017 266893 268598 267617 264139 260057 258609 259024
30433 76819 43738 Fultonham OH Zanesville Muskingum County 30434 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 57517.0 58763.0 58735.0 57653.0 56505.0 56189.0 56694.0 57333.0 57504.0 57391.0 57092.0 57272.0 58129.0 58419.0 58162 57121 56585 55955 55493 55199 55537 55795 56285 56644 56644 56271 55730 55400 55802 56455 57575 57353 56948 56612 57089 56956 56759 56776 57995 59235

30434 rows × 292 columns

In [0]:
states = df.groupby('State')[]
  File "<ipython-input-95-783deee001ea>", line 1
    states = df.groupby('State')[]
                                 ^
SyntaxError: invalid syntax
In [0]:
to_drop = "1996-04	1996-05	1996-06	1996-07	1996-08	1996-09	1996-10	1996-11	1996-12".split()
df_97 = df.drop(to_drop, axis=1)
df_97_nona = df_97.dropna(subset=['1997-01'])
len(df_97_nona)
In [0]:
df = df_97_nona.copy()
columns = df.columns[:7].values
region_reference = pd.DataFrame(data=df, columns=columns)
to_drop = ['RegionID',
 'City',
 'State',
 'Metro',
 'CountyName',
 'SizeRank']
just_numbers = df.drop(to_drop, axis=1)
df_t = just_numbers.set_index('RegionName').T
df_t.reset_index(inplace=True)
df_t['year'] = df_t.apply(lambda x: x['index'].split('-')[0], axis=1)
by_year = pd.DataFrame(df_t.groupby('year').mean())
by_year_t = by_year.reset_index()
by_year_t['year'] = by_year_t['year'].astype('datetime64[ns]') 
by_year_t.set_index('year', inplace=True)
by_year_t2 = by_year_t.T
by_year_t2.reset_index(inplace=True)
In [0]:
# by_year_t2.to_csv('only_year.csv')
by_year_t2.set_index('RegionName',inplace = True)
In [0]:
by_year_t2.T.to_csv('only_year_t.csv')
In [0]:
from google.colab import files
# df.to_csv('filename.csv') 
files.download('only_year.csv')
In [0]:
files.download('only_year_t.csv')
In [0]:
df = by_year_t2.copy()
df_sm = df[:5]
zip_table = []
def get_prophet_predictions(row):
  mini = pd.DataFrame(row)
  mini['ds'] = row.index
  mini['y'] = row.values
  df = mini.iloc[1:]

  m = Prophet()
  m.fit(df)
  invest_price = df.tail(1)
  future = m.make_future_dataframe(periods=60, freq='M')
  fcst = m.predict(future)
  sell_price = fcst.tail(1)

  roi = sell_price['trend'].values[0] - invest_price['y'].values[0]
  return pd.Series((fcst, roi))
In [0]:
df_sm[['forecast','roi']] = df_sm.apply(lambda x: get_prophet_predictions(x), axis=1)
# df[['forecast','roi']] = df.apply(lambda x: get_prophet_predictions(x), axis=1)
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations.Using 16.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations.Using 16.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations.Using 16.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations.Using 16.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations.Using 16.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations.Using 16.
/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py:3509: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [0]:
# df.to_csv('prophet_df.csv')
In [0]:
zips_per_state = []
for state in set(df['State']):
  state_df = df[df['State'] == state]
  zips_per_state.append({ 'state': state, 'zips': len(state_df) })
  print(state, len(state_df))
In [0]:
pd.DataFrame(zips_per_state).to_csv('zips_per_state.csv')
In [0]:
def prep_data_for_prophet(df):
  columns = df.columns[:7].values
  to_drop = ['RegionID',
  'City',
  'State',
  'Metro',
  'CountyName',
  'SizeRank']
  just_numbers = df.drop(to_drop, axis=1)
  df_t = just_numbers.set_index('RegionName').T
  df_t.reset_index(inplace=True)
  df_t['year'] = df_t.apply(lambda x: x['index'].split('-')[0], axis=1)
  by_year = pd.DataFrame(df_t.groupby('year').mean())
  by_year_t = by_year.reset_index()
  by_year_t['year'] = by_year_t['year'].astype('datetime64[ns]') 
  by_year_t.set_index('year', inplace=True)
  by_year_t2 = by_year_t.T
  by_year_t2.reset_index(inplace=True)
  return by_year_t2
In [0]:
CA = prep_data_for_prophet(df[df['State'] == 'CA'])
In [0]:
def get_prophet_predictions(row):
  mini = pd.DataFrame(row)
  mini['ds'] = row.index
  mini['y'] = row.values
  df = mini.iloc[1:]

  m = Prophet()
  m.fit(df)
  invest_price = df.tail(1)
  future = m.make_future_dataframe(periods=60, freq='M')
  fcst = m.predict(future)
  sell_price = fcst.tail(1)

  roi = sell_price['trend'].values[0] - invest_price['y'].values[0]
  return pd.Series((fcst, roi))
In [0]:
CA[['forecast','roi']] = CA.apply(lambda x: get_prophet_predictions(x), axis=1)
In [0]:
CA
In [0]:
CA.to_csv('CA_nightfile.csv')
In [0]:
mine = CA[CA['RegionName'] == 90039]
In [0]:
mine['forecast'].values
In [0]:
oy = pd.read_csv('only_year.csv')
In [0]:
oy.drop
In [0]:
 
In [0]:
oy.set_index('RegionName')
In [0]:
oy.drop(oy.columns[0], axis=1,inplace=True)
In [0]:
# oy.set_index('RegionName', inplace=True)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-98-9f4df5ac0823> in <module>()
----> 1 oy.set_index('RegionName', inplace=True)

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in set_index(self, keys, drop, append, inplace, verify_integrity)
   4409 
   4410         if missing:
-> 4411             raise KeyError("None of {} are in the columns".format(missing))
   4412 
   4413         if inplace:

KeyError: "None of ['RegionName'] are in the columns"
In [0]:
oy
Out[0]:
1997-01-01 00:00:00 1998-01-01 00:00:00 1999-01-01 00:00:00 2000-01-01 00:00:00 2001-01-01 00:00:00 2002-01-01 00:00:00 2003-01-01 00:00:00 2004-01-01 00:00:00 2005-01-01 00:00:00 2006-01-01 00:00:00 2007-01-01 00:00:00 2008-01-01 00:00:00 2009-01-01 00:00:00 2010-01-01 00:00:00 2011-01-01 00:00:00 2012-01-01 00:00:00 2013-01-01 00:00:00 2014-01-01 00:00:00 2015-01-01 00:00:00 2016-01-01 00:00:00 2017-01-01 00:00:00 2018-01-01 00:00:00 2019-01-01 00:00:00
RegionName
60657 378192.833333 389723.833333 453431.500000 521798.916667 586183.833333 628344.916667 6.691198e+05 7.073292e+05 7.562488e+05 8.070008e+05 8.102088e+05 8.231563e+05 7.720613e+05 746086.833333 709742.416667 710565.666667 791568.166667 8.466048e+05 8.935743e+05 9.266148e+05 9.397620e+05 9.536783e+05 9.566285e+05
77494 209368.250000 206195.250000 212468.750000 223155.583333 226383.916667 226765.000000 2.311428e+05 2.401042e+05 2.466654e+05 2.498909e+05 2.580278e+05 2.607339e+05 2.610133e+05 267443.583333 260171.666667 264811.166667 283496.833333 3.155041e+05 3.351682e+05 3.324876e+05 3.300152e+05 3.334704e+05 3.347144e+05
60614 573575.666667 616168.166667 706178.750000 804170.666667 905689.416667 983081.333333 1.042087e+06 1.086231e+06 1.164892e+06 1.234957e+06 1.246696e+06 1.179971e+06 1.070229e+06 984387.583333 909385.583333 868096.583333 959294.333333 1.032766e+06 1.095943e+06 1.142199e+06 1.175260e+06 1.194322e+06 1.190550e+06
77449 97162.916667 99462.916667 102394.750000 105518.250000 106366.250000 109551.000000 1.137417e+05 1.198598e+05 1.240328e+05 1.242390e+05 1.289552e+05 1.259122e+05 1.204772e+05 120319.166667 112696.750000 111168.416667 123927.750000 1.402042e+05 1.548712e+05 1.644619e+05 1.702064e+05 1.769127e+05 1.842622e+05
77084 96043.833333 97957.250000 101216.833333 104458.750000 105968.083333 109260.000000 1.127021e+05 1.193099e+05 1.231274e+05 1.239627e+05 1.281134e+05 1.262742e+05 1.232700e+05 122815.083333 115972.916667 114768.250000 125350.833333 1.396642e+05 1.535522e+05 1.617357e+05 1.673551e+05 1.741188e+05 1.823445e+05
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
89155 198409.750000 204513.416667 210780.333333 222410.000000 236548.416667 252741.333333 2.777982e+05 3.583125e+05 4.382557e+05 4.553523e+05 4.311644e+05 3.301152e+05 2.433039e+05 215128.416667 197624.416667 196687.083333 236701.583333 2.699738e+05 2.855092e+05 3.033609e+05 3.235261e+05 3.619018e+05 3.763496e+05
55144 61082.666667 63322.000000 77110.666667 89243.416667 100079.416667 110211.416667 1.227185e+05 1.338859e+05 1.508932e+05 1.521214e+05 1.468690e+05 1.314151e+05 1.196653e+05 119882.916667 124983.583333 125637.750000 127920.416667 1.381543e+05 1.499450e+05 1.667080e+05 1.849695e+05 1.923043e+05 2.051572e+05
4033 349591.500000 370049.750000 389496.166667 432842.583333 485595.666667 545738.416667 6.114274e+05 6.874357e+05 7.601644e+05 7.690475e+05 7.544142e+05 7.382319e+05 6.589907e+05 656969.916667 634582.500000 650266.250000 646866.916667 6.730225e+05 6.831582e+05 7.370223e+05 7.609642e+05 8.134617e+05 8.266645e+05
86343 82564.083333 91416.000000 99147.250000 101334.166667 102407.666667 97665.500000 9.470975e+04 1.076934e+05 1.302911e+05 1.608006e+05 1.523491e+05 1.335698e+05 1.146630e+05 108520.166667 100557.666667 107519.166667 110559.583333 1.266222e+05 1.351281e+05 1.449066e+05 1.557000e+05 1.635542e+05 1.622792e+05
2872 142758.333333 139654.833333 154084.250000 172203.750000 195521.666667 215893.000000 2.302336e+05 2.672282e+05 3.014248e+05 3.094183e+05 3.000596e+05 2.815373e+05 2.417012e+05 233953.833333 243555.000000 223910.666667 217442.833333 2.351984e+05 2.213556e+05 2.226684e+05 2.418694e+05 2.532719e+05 2.556733e+05

13916 rows × 23 columns

In [0]: