In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
In [2]:
app_train = pd.read_csv('application_train.csv')
app_test = pd.read_csv('application_test.csv')

Which factors are highly correlated with not repaying loans?

In [3]:
high_corr_for_graphs = ['CNT_FAM_MEMBERS',
 'CNT_CHILDREN',
 'AMT_REQ_CREDIT_BUREAU_YEAR',
 'OWN_CAR_AGE',
 'DAYS_REGISTRATION',
 'DAYS_ID_PUBLISH',
 'DAYS_LAST_PHONE_CHANGE',
 'DAYS_BIRTH']
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
In [4]:
col = high_corr_for_graphs[0]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
In [5]:
col = high_corr_for_graphs[1]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
In [6]:
col = high_corr_for_graphs[2]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
In [7]:
col = high_corr_for_graphs[3]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
In [8]:
col = high_corr_for_graphs[4]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
In [9]:
col = high_corr_for_graphs[5]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
In [10]:
col = high_corr_for_graphs[6]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:829: RuntimeWarning: invalid value encountered in greater_equal
  keep = (tmp_a >= first_edge)
/Users/danielcaraway/anaconda3/lib/python3.7/site-packages/numpy/lib/histograms.py:830: RuntimeWarning: invalid value encountered in less_equal
  keep &= (tmp_a <= last_edge)
In [11]:
col = high_corr_for_graphs[7]
df = app_train.copy()
df[col] = abs(df[col])
plt.style.use('fivethirtyeight')
plt.hist(df[col] / 365, edgecolor = 'k', bins = 25)
plt.title(col); plt.xlabel('x'); plt.ylabel('Count');
plt.show()

Fin.

In [ ]: