import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv("Refactored_Py_DS_ML_Bootcamp-master/11-Linear-Regression/USA_Housing.csv")
df.head()
df.info()
df.describe()
sns.pairplot(df)
sns.distplot(df['Price'])
df.corr()
sns.heatmap(df.corr())
sns.heatmap(df.corr(), annot=True)
from sklearn.model_selection import train_test_split
train_test_split
df.columns
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
'Avg. Area Number of Bedrooms', 'Area Population']]
y = df['Price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=101)
from sklearn.linear_model import LinearRegression
lm = LinearRegression()
lm.fit(X_train, y_train)
lm.coef_
lm.intercept_
cdf = pd.DataFrame(lm.coef_, X.columns)
cdf.columns = ['Coeff']
cdf