In [2]:
!pip install ipython-autotime
%load_ext autotime
Collecting ipython-autotime
  Downloading https://files.pythonhosted.org/packages/e6/f9/0626bbdb322e3a078d968e87e3b01341e7890544de891d0cb613641220e6/ipython-autotime-0.1.tar.bz2
Building wheels for collected packages: ipython-autotime
  Building wheel for ipython-autotime (setup.py) ... done
  Created wheel for ipython-autotime: filename=ipython_autotime-0.1-cp36-none-any.whl size=1832 sha256=2cd722a7939760d38ce542eb94c9b276a7ff418aacb1f95b367bd5a855d1c36d
  Stored in directory: /root/.cache/pip/wheels/d2/df/81/2db1e54bc91002cec40334629bc39cfa86dff540b304ebcd6e
Successfully built ipython-autotime
Installing collected packages: ipython-autotime
Successfully installed ipython-autotime-0.1
In [7]:
import matplotlib
matplotlib.use("Agg")
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD
from imutils import paths
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import argparse
import random
import pickle
import cv2
import os
time: 7.5 ms
In [4]:
## mount your Google Drive folder
from google.colab import drive
drive.mount('/content/drive')
Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
time: 18.6 s
In [5]:
## change into the directory where your data is 
os.chdir("drive/My Drive/data")
time: 8.05 ms
In [35]:
app_train = pd.read_csv('home-credit-default-risk/application_train.csv')
app_train
Out[35]:
SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE REGION_POPULATION_RELATIVE DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY ... LIVINGAPARTMENTS_MEDI LIVINGAREA_MEDI NONLIVINGAPARTMENTS_MEDI NONLIVINGAREA_MEDI FONDKAPREMONT_MODE HOUSETYPE_MODE TOTALAREA_MODE WALLSMATERIAL_MODE EMERGENCYSTATE_MODE OBS_30_CNT_SOCIAL_CIRCLE DEF_30_CNT_SOCIAL_CIRCLE OBS_60_CNT_SOCIAL_CIRCLE DEF_60_CNT_SOCIAL_CIRCLE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2 FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6 FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10 FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14 FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR
0 100002 1 Cash loans M N Y 0 202500.0 406597.5 24700.5 351000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.018801 -9461 -637 -3648.0 -2120 NaN 1 1 0 1 1 0 Laborers 1.0 2 2 WEDNESDAY 10 0 0 0 0 0 0 ... 0.0205 0.0193 0.0000 0.0000 reg oper account block of flats 0.0149 Stone, brick No 2.0 2.0 2.0 2.0 -1134.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 1.0
1 100003 0 Cash loans F N N 0 270000.0 1293502.5 35698.5 1129500.0 Family State servant Higher education Married House / apartment 0.003541 -16765 -1188 -1186.0 -291 NaN 1 1 0 1 1 0 Core staff 2.0 1 1 MONDAY 11 0 0 0 0 0 0 ... 0.0787 0.0558 0.0039 0.0100 reg oper account block of flats 0.0714 Block No 1.0 0.0 1.0 0.0 -828.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
2 100004 0 Revolving loans M Y Y 0 67500.0 135000.0 6750.0 135000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.010032 -19046 -225 -4260.0 -2531 26.0 1 1 1 1 1 0 Laborers 1.0 2 2 MONDAY 9 0 0 0 0 0 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -815.0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
3 100006 0 Cash loans F N Y 0 135000.0 312682.5 29686.5 297000.0 Unaccompanied Working Secondary / secondary special Civil marriage House / apartment 0.008019 -19005 -3039 -9833.0 -2437 NaN 1 1 0 1 0 0 Laborers 2.0 2 2 WEDNESDAY 17 0 0 0 0 0 0 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 2.0 0.0 2.0 0.0 -617.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN
4 100007 0 Cash loans M N Y 0 121500.0 513000.0 21865.5 513000.0 Unaccompanied Working Secondary / secondary special Single / not married House / apartment 0.028663 -19932 -3038 -4311.0 -3458 NaN 1 1 0 1 0 0 Core staff 1.0 2 2 THURSDAY 11 0 0 0 0 1 1 ... NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.0 0.0 0.0 0.0 -1106.0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
307506 456251 0 Cash loans M N N 0 157500.0 254700.0 27558.0 225000.0 Unaccompanied Working Secondary / secondary special Separated With parents 0.032561 -9327 -236 -8456.0 -1982 NaN 1 1 0 1 0 0 Sales staff 1.0 1 1 THURSDAY 15 0 0 0 0 0 0 ... 0.1509 0.2001 0.0757 0.1118 reg oper account block of flats 0.2898 Stone, brick No 0.0 0.0 0.0 0.0 -273.0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN
307507 456252 0 Cash loans F N Y 0 72000.0 269550.0 12001.5 225000.0 Unaccompanied Pensioner Secondary / secondary special Widow House / apartment 0.025164 -20775 365243 -4388.0 -4090 NaN 1 0 0 1 1 0 NaN 1.0 2 2 MONDAY 8 0 0 0 0 0 0 ... 0.0205 0.0261 0.0000 0.0000 reg oper account block of flats 0.0214 Stone, brick No 0.0 0.0 0.0 0.0 0.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 NaN NaN NaN NaN NaN NaN
307508 456253 0 Cash loans F N Y 0 153000.0 677664.0 29979.0 585000.0 Unaccompanied Working Higher education Separated House / apartment 0.005002 -14966 -7921 -6737.0 -5150 NaN 1 1 0 1 0 1 Managers 1.0 3 3 THURSDAY 9 0 0 0 0 1 1 ... 0.0855 0.9445 0.0000 0.0000 reg oper account block of flats 0.7970 Panel No 6.0 0.0 6.0 0.0 -1909.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1.0 0.0 0.0 1.0 0.0 1.0
307509 456254 1 Cash loans F N Y 0 171000.0 370107.0 20205.0 319500.0 Unaccompanied Commercial associate Secondary / secondary special Married House / apartment 0.005313 -11961 -4786 -2562.0 -931 NaN 1 1 0 1 0 0 Laborers 2.0 2 2 WEDNESDAY 9 0 0 0 1 1 0 ... NaN 0.0062 NaN NaN NaN block of flats 0.0086 Stone, brick No 0.0 0.0 0.0 0.0 -322.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 0.0 0.0 0.0
307510 456255 0 Cash loans F N N 0 157500.0 675000.0 49117.5 675000.0 Unaccompanied Commercial associate Higher education Married House / apartment 0.046220 -16856 -1262 -5128.0 -410 NaN 1 1 1 1 1 0 Laborers 2.0 1 1 THURSDAY 20 0 0 0 0 1 1 ... NaN 0.0805 NaN 0.0000 NaN block of flats 0.0718 Panel No 0.0 0.0 0.0 0.0 -787.0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.0 0.0 0.0 2.0 0.0 1.0

307511 rows × 122 columns

time: 4.38 s
In [36]:
app_test= pd.read_csv('home-credit-default-risk/application_test.csv')
time: 636 ms
In [37]:
app_train['TARGET']
Out[37]:
0         1
1         0
2         0
3         0
4         0
         ..
307506    0
307507    0
307508    0
307509    1
307510    0
Name: TARGET, Length: 307511, dtype: int64
time: 5.47 ms
In [38]:
print(app_train.shape)
print(app_test.shape)
(307511, 122)
(48744, 121)
time: 4.56 ms
In [39]:
app_train = pd.get_dummies(app_train)
app_test = pd.get_dummies(app_test)
time: 897 ms
In [40]:
print(app_train.shape)
print(app_test.shape)
(307511, 246)
(48744, 242)
time: 2.12 ms
In [41]:
train_labels = app_train['TARGET']
app_train, app_test = app_train.align(app_test, join = 'inner', axis = 1)
app_train['TARGET'] = train_labels
print('Training Features shape: ', app_train.shape)
print('Testing Features shape: ', app_test.shape)
Training Features shape:  (307511, 243)
Testing Features shape:  (48744, 242)
time: 413 ms
In [42]:
app_train_labels = app_train['TARGET']
app_train_nolabels = app_train.drop('TARGET', axis=1)
time: 263 ms
In [43]:
# data = np.array(data, dtype="float") / 255.0
# labels = np.array(labels)

from keras.utils import to_categorical
train_labels = to_categorical(app_train_labels)
train_labels.shape
Out[43]:
(307511, 2)
time: 15.5 ms
In [44]:
(trainX, testX, trainY, testY) = train_test_split(app_train_nolabels,
	train_labels, test_size=0.25, random_state=42)
time: 440 ms
In [48]:
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
testY = lb.transform(testY)

model = Sequential()
model.add(Dense(1024, input_shape=(242,), activation="sigmoid"))
model.add(Dense(512, activation="sigmoid"))
model.add(Dense(len(lb.classes_), activation="softmax"))
time: 98.5 ms
In [49]:
INIT_LR = 0.01
EPOCHS = 75

print("[INFO] training network...")
opt = SGD(lr=INIT_LR)
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])
[INFO] training network...
time: 45.3 ms
In [0]:
H = model.fit(trainX, trainY, validation_data=(testX, testY),
	epochs=EPOCHS, batch_size=32)
Train on 230633 samples, validate on 76878 samples
Epoch 1/75
230633/230633 [==============================] - 93s 404us/step - loss: nan - acc: 0.9192 - val_loss: nan - val_acc: 0.9195
Epoch 2/75
230633/230633 [==============================] - 89s 387us/step - loss: nan - acc: 0.9192 - val_loss: nan - val_acc: 0.9195
Epoch 3/75
111552/230633 [=============>................] - ETA: 44s - loss: nan - acc: 0.9189
In [0]: