import pandas as pd
from fbprophet import Prophet
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
df = pd.read_csv('https://raw.githubusercontent.com/danielcaraway/data/master/Zip_Zhvi_SingleFamilyResidence.csv', encoding='latin')
df
states = df.groupby('State')[]
to_drop = "1996-04 1996-05 1996-06 1996-07 1996-08 1996-09 1996-10 1996-11 1996-12".split()
df_97 = df.drop(to_drop, axis=1)
df_97_nona = df_97.dropna(subset=['1997-01'])
len(df_97_nona)
df = df_97_nona.copy()
columns = df.columns[:7].values
region_reference = pd.DataFrame(data=df, columns=columns)
to_drop = ['RegionID',
'City',
'State',
'Metro',
'CountyName',
'SizeRank']
just_numbers = df.drop(to_drop, axis=1)
df_t = just_numbers.set_index('RegionName').T
df_t.reset_index(inplace=True)
df_t['year'] = df_t.apply(lambda x: x['index'].split('-')[0], axis=1)
by_year = pd.DataFrame(df_t.groupby('year').mean())
by_year_t = by_year.reset_index()
by_year_t['year'] = by_year_t['year'].astype('datetime64[ns]')
by_year_t.set_index('year', inplace=True)
by_year_t2 = by_year_t.T
by_year_t2.reset_index(inplace=True)
# by_year_t2.to_csv('only_year.csv')
by_year_t2.set_index('RegionName',inplace = True)
by_year_t2.T.to_csv('only_year_t.csv')
from google.colab import files
# df.to_csv('filename.csv')
files.download('only_year.csv')
files.download('only_year_t.csv')
df = by_year_t2.copy()
df_sm = df[:5]
zip_table = []
def get_prophet_predictions(row):
mini = pd.DataFrame(row)
mini['ds'] = row.index
mini['y'] = row.values
df = mini.iloc[1:]
m = Prophet()
m.fit(df)
invest_price = df.tail(1)
future = m.make_future_dataframe(periods=60, freq='M')
fcst = m.predict(future)
sell_price = fcst.tail(1)
roi = sell_price['trend'].values[0] - invest_price['y'].values[0]
return pd.Series((fcst, roi))
df_sm[['forecast','roi']] = df_sm.apply(lambda x: get_prophet_predictions(x), axis=1)
# df[['forecast','roi']] = df.apply(lambda x: get_prophet_predictions(x), axis=1)
# df.to_csv('prophet_df.csv')
zips_per_state = []
for state in set(df['State']):
state_df = df[df['State'] == state]
zips_per_state.append({ 'state': state, 'zips': len(state_df) })
print(state, len(state_df))
pd.DataFrame(zips_per_state).to_csv('zips_per_state.csv')
def prep_data_for_prophet(df):
columns = df.columns[:7].values
to_drop = ['RegionID',
'City',
'State',
'Metro',
'CountyName',
'SizeRank']
just_numbers = df.drop(to_drop, axis=1)
df_t = just_numbers.set_index('RegionName').T
df_t.reset_index(inplace=True)
df_t['year'] = df_t.apply(lambda x: x['index'].split('-')[0], axis=1)
by_year = pd.DataFrame(df_t.groupby('year').mean())
by_year_t = by_year.reset_index()
by_year_t['year'] = by_year_t['year'].astype('datetime64[ns]')
by_year_t.set_index('year', inplace=True)
by_year_t2 = by_year_t.T
by_year_t2.reset_index(inplace=True)
return by_year_t2
CA = prep_data_for_prophet(df[df['State'] == 'CA'])
def get_prophet_predictions(row):
mini = pd.DataFrame(row)
mini['ds'] = row.index
mini['y'] = row.values
df = mini.iloc[1:]
m = Prophet()
m.fit(df)
invest_price = df.tail(1)
future = m.make_future_dataframe(periods=60, freq='M')
fcst = m.predict(future)
sell_price = fcst.tail(1)
roi = sell_price['trend'].values[0] - invest_price['y'].values[0]
return pd.Series((fcst, roi))
CA[['forecast','roi']] = CA.apply(lambda x: get_prophet_predictions(x), axis=1)
CA
CA.to_csv('CA_nightfile.csv')
mine = CA[CA['RegionName'] == 90039]
mine['forecast'].values
oy = pd.read_csv('only_year.csv')
oy.drop
oy.set_index('RegionName')
oy.drop(oy.columns[0], axis=1,inplace=True)
# oy.set_index('RegionName', inplace=True)
oy