STEP 1: Import ALL the things!¶

Following along here

%tensorflow_version 2.x
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
from tensorflow import keras

import os
import tempfile

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

TensorFlow 2.x selected.

(A) With Fraud Data¶

file = tf.keras.utils
raw_df = pd.read_csv('https://storage.googleapis.com/download.tensorflow.org/data/creditcard.csv')
raw_df.head()

(B) With Loan Data¶

from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive

import os
os.chdir("drive/My Drive/data")

raw_df_B = pd.read_csv('home-credit-default-risk/application_train.csv')

STEP 2: Examine Class Imbalance¶

### A
neg, pos = np.bincount(raw_df['Class'])
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

Examples:
    Total: 284807
    Positive: 492 (0.17% of total)

### B
repaid, defaulted = np.bincount(raw_df_B['TARGET'])
total = repaid + defaulted
print('Examples:\n    Total: {}\n    Defaulted: {} ({:.2f}% of total)\n'.format(
    total, defaulted, 100 * defaulted / total))

Examples:
    Total: 307511
    Defaulted: 24825 (8.07% of total)

STEP 3: Clean, split and normalize the data¶

### A

# You don't want the `Time` column.
# The `Amount` column covers a huge range. Convert to log-space.

cleaned_df = raw_df.copy()
cleaned_df.pop('Time')
eps=0.001 # 0 => 0.1¢
cleaned_df['Log Ammount'] = np.log(cleaned_df.pop('Amount')+eps)

### B

# Temp "cleaning"

# cleaned_df_B = raw_df_B.copy()
# cols = ['TARGET','CNT_FAM_MEMBERS',
#  'CNT_CHILDREN',
#  'AMT_REQ_CREDIT_BUREAU_YEAR',
#  'OWN_CAR_AGE',
#  'DAYS_REGISTRATION',
#  'DAYS_ID_PUBLISH',
#  'DAYS_LAST_PHONE_CHANGE',
#  'DAYS_BIRTH','AMT_INCOME_TOTAL']      
# cleaned_df_B = pd.DataFrame(cleaned_df_B, columns=cols)

continuous_cols = ['TARGET']
for col in raw_df_B.columns:
  # if raw_df_B[col].value_counts()
  column = raw_df_B[col]
  if len(list(column.unique())) > 100:
    if column.isna().sum() < 100000:
      print(column.isna().sum())
      continuous_cols.append(col)

cont_df = pd.DataFrame(raw_df_B, columns = continuous_cols)

for col in cont_df.columns:
  median = cont_df[col].median()
  cont_df[col].fillna(median, inplace=True)

cont_df = cont_df.drop('SK_ID_CURR',axis=1)
cleaned_df_B = cont_df.copy()

0
0
0
12
278
0
0
0
0
660
60965
1

print(cleaned_df.shape)
print(cleaned_df_B.shape)

(284807, 30)
(307511, 12)

cleaned_df_B

### A 

def get_split_data(df, target_label):
  # Use a utility from sklearn to split and shuffle our dataset.
  train_df, test_df = train_test_split(df, test_size=0.2)
  train_df, val_df = train_test_split(train_df, test_size=0.2)

  # Form np arrays of labels and features.
  train_labels = np.array(train_df.pop(target_label))
  bool_train_labels = train_labels != 0
  val_labels = np.array(val_df.pop(target_label))
  test_labels = np.array(test_df.pop(target_label))

  train_features = np.array(train_df)
  val_features = np.array(val_df)
  test_features = np.array(test_df)

  return train_df, train_labels, bool_train_labels, val_labels, test_labels, train_features, val_features, test_features

train_df, train_labels, bool_train_labels, val_labels, test_labels, train_features, val_features, test_features = get_split_data(cleaned_df, 'Class')

train_df_B, train_labels_B, bool_train_labels_B, val_labels_B, test_labels_B, train_features_B, val_features_B, test_features_B = get_split_data(cleaned_df_B, 'TARGET')

def scale_and_plot(train_df, train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels, col1, col2):
  scaler = StandardScaler()
  train_features = scaler.fit_transform(train_features)

  val_features = scaler.transform(val_features)
  test_features = scaler.transform(test_features)

  train_features = np.clip(train_features, -5, 5)
  val_features = np.clip(val_features, -5, 5)
  test_features = np.clip(test_features, -5, 5)

  print('Training labels shape:', train_labels.shape)
  print('Validation labels shape:', val_labels.shape)
  print('Test labels shape:', test_labels.shape)

  print('Training features shape:', train_features.shape)
  print('Validation features shape:', val_features.shape)
  print('Test features shape:', test_features.shape)
  pos_df = pd.DataFrame(train_features[ bool_train_labels], columns = train_df.columns)
  neg_df = pd.DataFrame(train_features[~bool_train_labels], columns = train_df.columns)

  sns.jointplot(pos_df[col1], pos_df[col2],
                kind='hex', xlim = (-5,5), ylim = (-5,5))
  plt.suptitle("Positive distribution")

  sns.jointplot(neg_df[col1], neg_df[col2],
                kind='hex', xlim = (-5,5), ylim = (-5,5))
  _ = plt.suptitle("Negative distribution")

scale_and_plot(train_df, train_labels, val_labels, test_labels, train_features, val_features, test_features, bool_train_labels, 'V5', 'V6')

Training labels shape: (182276,)
Validation labels shape: (45569,)
Test labels shape: (56962,)
Training features shape: (182276, 29)
Validation features shape: (45569, 29)
Test features shape: (56962, 29)

scale_and_plot(train_df_B, train_labels_B, val_labels_B, test_labels_B, train_features_B, val_features_B, test_features_B, bool_train_labels_B, 'AMT_INCOME_TOTAL','DAYS_BIRTH')

Training labels shape: (196806,)
Validation labels shape: (49202,)
Test labels shape: (61503,)
Training features shape: (196806, 11)
Validation features shape: (49202, 11)
Test features shape: (61503, 11)

STEP ? MAKE MODEL¶

METRICS = [
      keras.metrics.TruePositives(name='tp'),
      keras.metrics.FalsePositives(name='fp'),
      keras.metrics.TrueNegatives(name='tn'),
      keras.metrics.FalseNegatives(name='fn'), 
      keras.metrics.BinaryAccuracy(name='accuracy'),
      keras.metrics.Precision(name='precision'),
      keras.metrics.Recall(name='recall'),
      keras.metrics.AUC(name='auc'),
]

def make_model(metrics = METRICS, output_bias=None, tf=train_features):
  if output_bias is not None:
    output_bias = tf.keras.initializers.Constant(output_bias)
  model = keras.Sequential([
      keras.layers.Dense(
          16, activation='tanh',
          input_shape=(tf.shape[-1],)),
      keras.layers.Dropout(0.5),
      keras.layers.Dense(1, activation='sigmoid',
                         bias_initializer=output_bias),
  ])

  model.compile(
      optimizer=keras.optimizers.Adam(lr=1e-3),
      loss=keras.losses.BinaryCrossentropy(),
      metrics=metrics)

  return model

EPOCHS = 100
BATCH_SIZE = 2048

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_auc', 
    verbose=1,
    patience=10,
    mode='max',
    restore_best_weights=True)

model = make_model()
model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_27 (Dense)             (None, 16)                480       
_________________________________________________________________
dropout_13 (Dropout)         (None, 16)                0         
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 17        
=================================================================
Total params: 497
Trainable params: 497
Non-trainable params: 0
_________________________________________________________________

model.predict(train_features[:10])

array([[0.59295046],
       [0.1885627 ],
       [0.4439286 ],
       [0.15656826],
       [0.49566635],
       [0.38566503],
       [0.04725616],
       [0.42625585],
       [0.1431905 ],
       [0.660602  ]], dtype=float32)

model = make_model(tf = train_features_B)
model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_29 (Dense)             (None, 16)                192       
_________________________________________________________________
dropout_14 (Dropout)         (None, 16)                0         
_________________________________________________________________
dense_30 (Dense)             (None, 1)                 17        
=================================================================
Total params: 209
Trainable params: 209
Non-trainable params: 0
_________________________________________________________________

model.predict(train_features_B[:100])

array([[0.15034048],
       [0.2147896 ],
       [0.10096871],
       [0.2147896 ],
       [0.10096871],
       [0.1479379 ],
       [0.2147896 ],
       [0.2147896 ],
       [0.37626418],
       [0.14259915],
       [0.2147896 ],
       [0.2147896 ],
       [0.2147896 ],
       [0.2147896 ],
       [0.10096871],
       [0.10096871],
       [0.2147896 ],
       [0.2147896 ],
       [0.1479379 ],
       [0.1479379 ],
       [0.10096871],
       [0.2147896 ],
       [0.7713594 ],
       [0.2147896 ],
       [0.1885932 ],
       [0.10096871],
       [0.15034048],
       [0.2147896 ],
       [0.2147896 ],
       [0.10096871],
       [0.37626418],
       [0.15034048],
       [0.10096871],
       [0.10096871],
       [0.10096871],
       [0.37626418],
       [0.10096871],
       [0.15034048],
       [0.2147896 ],
       [0.10096871],
       [0.2147896 ],
       [0.2147896 ],
       [0.10096871],
       [0.2147896 ],
       [0.10096871],
       [0.10096871],
       [0.2147896 ],
       [0.15034048],
       [0.15034048],
       [0.10096871],
       [0.15034048],
       [0.15034048],
       [0.2147896 ],
       [0.2147896 ],
       [0.15034048],
       [0.2147896 ],
       [0.34258708],
       [0.10096871],
       [0.2147896 ],
       [0.47532254],
       [0.1479379 ],
       [0.10096871],
       [0.2147896 ],
       [0.2147896 ],
       [0.5922547 ],
       [0.15034048],
       [0.10096871],
       [0.10096871],
       [0.10096871],
       [0.1479379 ],
       [0.10096871],
       [0.15034048],
       [0.10096871],
       [0.37626418],
       [0.10096871],
       [0.2147896 ],
       [0.2147896 ],
       [0.34258708],
       [0.10096871],
       [0.2147896 ],
       [0.10096871],
       [0.10096871],
       [0.10096871],
       [0.2147896 ],
       [0.10096871],
       [0.2147896 ],
       [0.10096871],
       [0.1479379 ],
       [0.1479379 ],
       [0.10096871],
       [0.1479379 ],
       [0.20428903],
       [0.2147896 ],
       [0.15034048],
       [0.10096871],
       [0.2147896 ],
       [0.47532254],
       [0.10096871],
       [0.1885932 ],
       [0.10096871]], dtype=float32)

  raw_df_B

0
0
0
12
278
0
0
0
0
660
60965
1

cont_df = pd.DataFrame(raw_df_B, columns = continuous_cols)

for col in cont_df.columns:
  median = cont_df[col].median()
  cont_df[col].fillna(median, inplace=True)
# continuous_cols

cont_df.isna().sum()

SK_ID_CURR                0
AMT_INCOME_TOTAL          0
AMT_CREDIT                0
AMT_ANNUITY               0
AMT_GOODS_PRICE           0
DAYS_BIRTH                0
DAYS_EMPLOYED             0
DAYS_REGISTRATION         0
DAYS_ID_PUBLISH           0
EXT_SOURCE_2              0
EXT_SOURCE_3              0
DAYS_LAST_PHONE_CHANGE    0
dtype: int64

raw_df_B['EXT_SOURCE_1']

0         0.083037
1         0.311267
2              NaN
3              NaN
4              NaN
            ...   
307506    0.145570
307507         NaN
307508    0.744026
307509         NaN
307510    0.734460
Name: EXT_SOURCE_1, Length: 307511, dtype: float64

train_df_B['TARGET']

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2896             try:
-> 2897                 return self._engine.get_loc(key)
   2898             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'TARGET'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-143-c3ca830a948f> in <module>()
----> 1 train_df_B['TARGET']

/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py in __getitem__(self, key)
   2993             if self.columns.nlevels > 1:
   2994                 return self._getitem_multilevel(key)
-> 2995             indexer = self.columns.get_loc(key)
   2996             if is_integer(indexer):
   2997                 indexer = [indexer]

/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2897                 return self._engine.get_loc(key)
   2898             except KeyError:
-> 2899                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2900         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2901         if indexer.ndim > 1 or indexer.size > 1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'TARGET'

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20	V21	V22	V23	V24	V25	V26	V27	V28	Amount
0	0.0	-1.359807	-0.072781	2.536347	1.378155	-0.338321	0.462388	0.239599	0.098698	0.363787	0.090794	-0.551600	-0.617801	-0.991390	-0.311169	1.468177	-0.470401	0.207971	0.025791	0.403993	0.251412	-0.018307	0.277838	-0.110474	0.066928	0.128539	-0.189115	0.133558	-0.021053	149.62
1	0.0	1.191857	0.266151	0.166480	0.448154	0.060018	-0.082361	-0.078803	0.085102	-0.255425	-0.166974	1.612727	1.065235	0.489095	-0.143772	0.635558	0.463917	-0.114805	-0.183361	-0.145783	-0.069083	-0.225775	-0.638672	0.101288	-0.339846	0.167170	0.125895	-0.008983	0.014724	2.69
2	1.0	-1.358354	-1.340163	1.773209	0.379780	-0.503198	1.800499	0.791461	0.247676	-1.514654	0.207643	0.624501	0.066084	0.717293	-0.165946	2.345865	-2.890083	1.109969	-0.121359	-2.261857	0.524980	0.247998	0.771679	0.909412	-0.689281	-0.327642	-0.139097	-0.055353	-0.059752	378.66
3	1.0	-0.966272	-0.185226	1.792993	-0.863291	-0.010309	1.247203	0.237609	0.377436	-1.387024	-0.054952	-0.226487	0.178228	0.507757	-0.287924	-0.631418	-1.059647	-0.684093	1.965775	-1.232622	-0.208038	-0.108300	0.005274	-0.190321	-1.175575	0.647376	-0.221929	0.062723	0.061458	123.50
4	2.0	-1.158233	0.877737	1.548718	0.403034	-0.407193	0.095921	0.592941	-0.270533	0.817739	0.753074	-0.822843	0.538196	1.345852	-1.119670	0.175121	-0.451449	-0.237033	-0.038195	0.803487	0.408542	-0.009431	0.798278	-0.137458	0.141267	-0.206010	0.502292	0.219422	0.215153	69.99

	TARGET	AMT_INCOME_TOTAL	AMT_CREDIT	AMT_ANNUITY	AMT_GOODS_PRICE	DAYS_BIRTH	DAYS_EMPLOYED	DAYS_REGISTRATION	DAYS_ID_PUBLISH	EXT_SOURCE_2	EXT_SOURCE_3	DAYS_LAST_PHONE_CHANGE
0	1	202500.0	406597.5	24700.5	351000.0	-9461	-637	-3648.0	-2120	0.262949	0.139376	-1134.0
1	0	270000.0	1293502.5	35698.5	1129500.0	-16765	-1188	-1186.0	-291	0.622246	0.535276	-828.0
2	0	67500.0	135000.0	6750.0	135000.0	-19046	-225	-4260.0	-2531	0.555912	0.729567	-815.0
3	0	135000.0	312682.5	29686.5	297000.0	-19005	-3039	-9833.0	-2437	0.650442	0.535276	-617.0
4	0	121500.0	513000.0	21865.5	513000.0	-19932	-3038	-4311.0	-3458	0.322738	0.535276	-1106.0
...	...	...	...	...	...	...	...	...	...	...	...	...
307506	0	157500.0	254700.0	27558.0	225000.0	-9327	-236	-8456.0	-1982	0.681632	0.535276	-273.0
307507	0	72000.0	269550.0	12001.5	225000.0	-20775	365243	-4388.0	-4090	0.115992	0.535276	0.0
307508	0	153000.0	677664.0	29979.0	585000.0	-14966	-7921	-6737.0	-5150	0.535722	0.218859	-1909.0
307509	1	171000.0	370107.0	20205.0	319500.0	-11961	-4786	-2562.0	-931	0.514163	0.661024	-322.0
307510	0	157500.0	675000.0	49117.5	675000.0	-16856	-1262	-5128.0	-410	0.708569	0.113922	-787.0

	Time	V1	V2	V3	V4	V5	V6	V7	V8	V9	V10	V11	V12	V13	V14	V15	V16	V17	V18	V19	V20	V21	V22	V23	V24	V25	V26	V27	V28	Amount	Class
0	0.0	-1.359807	-0.072781	2.536347	1.378155	-0.338321	0.462388	0.239599	0.098698	0.363787	0.090794	-0.551600	-0.617801	-0.991390	-0.311169	1.468177	-0.470401	0.207971	0.025791	0.403993	0.251412	-0.018307	0.277838	-0.110474	0.066928	0.128539	-0.189115	0.133558	-0.021053	149.62	0
1	0.0	1.191857	0.266151	0.166480	0.448154	0.060018	-0.082361	-0.078803	0.085102	-0.255425	-0.166974	1.612727	1.065235	0.489095	-0.143772	0.635558	0.463917	-0.114805	-0.183361	-0.145783	-0.069083	-0.225775	-0.638672	0.101288	-0.339846	0.167170	0.125895	-0.008983	0.014724	2.69	0
2	1.0	-1.358354	-1.340163	1.773209	0.379780	-0.503198	1.800499	0.791461	0.247676	-1.514654	0.207643	0.624501	0.066084	0.717293	-0.165946	2.345865	-2.890083	1.109969	-0.121359	-2.261857	0.524980	0.247998	0.771679	0.909412	-0.689281	-0.327642	-0.139097	-0.055353	-0.059752	378.66	0
3	1.0	-0.966272	-0.185226	1.792993	-0.863291	-0.010309	1.247203	0.237609	0.377436	-1.387024	-0.054952	-0.226487	0.178228	0.507757	-0.287924	-0.631418	-1.059647	-0.684093	1.965775	-1.232622	-0.208038	-0.108300	0.005274	-0.190321	-1.175575	0.647376	-0.221929	0.062723	0.061458	123.50	0
4	2.0	-1.158233	0.877737	1.548718	0.403034	-0.407193	0.095921	0.592941	-0.270533	0.817739	0.753074	-0.822843	0.538196	1.345852	-1.119670	0.175121	-0.451449	-0.237033	-0.038195	0.803487	0.408542	-0.009431	0.798278	-0.137458	0.141267	-0.206010	0.502292	0.219422	0.215153	69.99	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
284802	172786.0	-11.881118	10.071785	-9.834783	-2.066656	-5.364473	-2.606837	-4.918215	7.305334	1.914428	4.356170	-1.593105	2.711941	-0.689256	4.626942	-0.924459	1.107641	1.991691	0.510632	-0.682920	1.475829	0.213454	0.111864	1.014480	-0.509348	1.436807	0.250034	0.943651	0.823731	0.77	0
284803	172787.0	-0.732789	-0.055080	2.035030	-0.738589	0.868229	1.058415	0.024330	0.294869	0.584800	-0.975926	-0.150189	0.915802	1.214756	-0.675143	1.164931	-0.711757	-0.025693	-1.221179	-1.545556	0.059616	0.214205	0.924384	0.012463	-1.016226	-0.606624	-0.395255	0.068472	-0.053527	24.79	0
284804	172788.0	1.919565	-0.301254	-3.249640	-0.557828	2.630515	3.031260	-0.296827	0.708417	0.432454	-0.484782	0.411614	0.063119	-0.183699	-0.510602	1.329284	0.140716	0.313502	0.395652	-0.577252	0.001396	0.232045	0.578229	-0.037501	0.640134	0.265745	-0.087371	0.004455	-0.026561	67.88	0
284805	172788.0	-0.240440	0.530483	0.702510	0.689799	-0.377961	0.623708	-0.686180	0.679145	0.392087	-0.399126	-1.933849	-0.962886	-1.042082	0.449624	1.962563	-0.608577	0.509928	1.113981	2.897849	0.127434	0.265245	0.800049	-0.163298	0.123205	-0.569159	0.546668	0.108821	0.104533	10.00	0
284806	172792.0	-0.533413	-0.189733	0.703337	-0.506271	-0.012546	-0.649617	1.577006	-0.414650	0.486180	-0.915427	-1.040458	-0.031513	-0.188093	-0.084316	0.041333	-0.302620	-0.660377	0.167430	-0.256117	0.382948	0.261057	0.643078	0.376777	0.008797	-0.473649	-0.818267	-0.002415	0.013649	217.00	0

	SK_ID_CURR	TARGET	NAME_CONTRACT_TYPE	CODE_GENDER	FLAG_OWN_CAR	FLAG_OWN_REALTY	CNT_CHILDREN	AMT_INCOME_TOTAL	AMT_CREDIT	AMT_ANNUITY	AMT_GOODS_PRICE	NAME_TYPE_SUITE	NAME_INCOME_TYPE	NAME_EDUCATION_TYPE	NAME_FAMILY_STATUS	NAME_HOUSING_TYPE	REGION_POPULATION_RELATIVE	DAYS_BIRTH	DAYS_EMPLOYED	DAYS_REGISTRATION	DAYS_ID_PUBLISH	OWN_CAR_AGE	FLAG_MOBIL	FLAG_EMP_PHONE	FLAG_WORK_PHONE	FLAG_CONT_MOBILE	FLAG_PHONE	FLAG_EMAIL	OCCUPATION_TYPE	CNT_FAM_MEMBERS	REGION_RATING_CLIENT	REGION_RATING_CLIENT_W_CITY	WEEKDAY_APPR_PROCESS_START	HOUR_APPR_PROCESS_START	REG_REGION_NOT_LIVE_REGION	REG_REGION_NOT_WORK_REGION	LIVE_REGION_NOT_WORK_REGION	REG_CITY_NOT_LIVE_CITY	REG_CITY_NOT_WORK_CITY	LIVE_CITY_NOT_WORK_CITY	...	LIVINGAPARTMENTS_MEDI	LIVINGAREA_MEDI	NONLIVINGAPARTMENTS_MEDI	NONLIVINGAREA_MEDI	FONDKAPREMONT_MODE	HOUSETYPE_MODE	TOTALAREA_MODE	WALLSMATERIAL_MODE	EMERGENCYSTATE_MODE	OBS_30_CNT_SOCIAL_CIRCLE	DEF_30_CNT_SOCIAL_CIRCLE	OBS_60_CNT_SOCIAL_CIRCLE	DEF_60_CNT_SOCIAL_CIRCLE	DAYS_LAST_PHONE_CHANGE	FLAG_DOCUMENT_2	FLAG_DOCUMENT_3	FLAG_DOCUMENT_4	FLAG_DOCUMENT_5	FLAG_DOCUMENT_6	FLAG_DOCUMENT_7	FLAG_DOCUMENT_8	FLAG_DOCUMENT_9	FLAG_DOCUMENT_10	FLAG_DOCUMENT_11	FLAG_DOCUMENT_12	FLAG_DOCUMENT_13	FLAG_DOCUMENT_14	FLAG_DOCUMENT_15	FLAG_DOCUMENT_16	FLAG_DOCUMENT_17	FLAG_DOCUMENT_18	FLAG_DOCUMENT_19	FLAG_DOCUMENT_20	FLAG_DOCUMENT_21	AMT_REQ_CREDIT_BUREAU_HOUR	AMT_REQ_CREDIT_BUREAU_DAY	AMT_REQ_CREDIT_BUREAU_WEEK	AMT_REQ_CREDIT_BUREAU_MON	AMT_REQ_CREDIT_BUREAU_QRT	AMT_REQ_CREDIT_BUREAU_YEAR
0	100002	1	Cash loans	M	N	Y	0	202500.0	406597.5	24700.5	351000.0	Unaccompanied	Working	Secondary / secondary special	Single / not married	House / apartment	0.018801	-9461	-637	-3648.0	-2120	NaN	1	1	0	1	1	0	Laborers	1.0	2	2	WEDNESDAY	10	0	0	0	0	0	0	...	0.0205	0.0193	0.0000	0.0000	reg oper account	block of flats	0.0149	Stone, brick	No	2.0	2.0	2.0	2.0	-1134.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0.0	0.0	0.0	0.0	0.0	1.0
1	100003	0	Cash loans	F	N	N	0	270000.0	1293502.5	35698.5	1129500.0	Family	State servant	Higher education	Married	House / apartment	0.003541	-16765	-1188	-1186.0	-291	NaN	1	1	0	1	1	0	Core staff	2.0	1	1	MONDAY	11	0	0	0	0	0	0	...	0.0787	0.0558	0.0039	0.0100	reg oper account	block of flats	0.0714	Block	No	1.0	0.0	1.0	0.0	-828.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0.0	0.0	0.0	0.0	0.0	0.0
2	100004	0	Revolving loans	M	Y	Y	0	67500.0	135000.0	6750.0	135000.0	Unaccompanied	Working	Secondary / secondary special	Single / not married	House / apartment	0.010032	-19046	-225	-4260.0	-2531	26.0	1	1	1	1	1	0	Laborers	1.0	2	2	MONDAY	9	0	0	0	0	0	0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	0.0	-815.0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0.0	0.0	0.0	0.0	0.0	0.0
3	100006	0	Cash loans	F	N	Y	0	135000.0	312682.5	29686.5	297000.0	Unaccompanied	Working	Secondary / secondary special	Civil marriage	House / apartment	0.008019	-19005	-3039	-9833.0	-2437	NaN	1	1	0	1	0	0	Laborers	2.0	2	2	WEDNESDAY	17	0	0	0	0	0	0	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	2.0	0.0	2.0	0.0	-617.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	NaN	NaN	NaN	NaN	NaN	NaN
4	100007	0	Cash loans	M	N	Y	0	121500.0	513000.0	21865.5	513000.0	Unaccompanied	Working	Secondary / secondary special	Single / not married	House / apartment	0.028663	-19932	-3038	-4311.0	-3458	NaN	1	1	0	1	0	0	Core staff	1.0	2	2	THURSDAY	11	0	0	0	0	1	1	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	0.0	0.0	0.0	0.0	-1106.0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
307506	456251	0	Cash loans	M	N	N	0	157500.0	254700.0	27558.0	225000.0	Unaccompanied	Working	Secondary / secondary special	Separated	With parents	0.032561	-9327	-236	-8456.0	-1982	NaN	1	1	0	1	0	0	Sales staff	1.0	1	1	THURSDAY	15	0	0	0	0	0	0	...	0.1509	0.2001	0.0757	0.1118	reg oper account	block of flats	0.2898	Stone, brick	No	0.0	0.0	0.0	0.0	-273.0	0	0	0	0	0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	NaN	NaN	NaN	NaN	NaN	NaN
307507	456252	0	Cash loans	F	N	Y	0	72000.0	269550.0	12001.5	225000.0	Unaccompanied	Pensioner	Secondary / secondary special	Widow	House / apartment	0.025164	-20775	365243	-4388.0	-4090	NaN	1	0	0	1	1	0	NaN	1.0	2	2	MONDAY	8	0	0	0	0	0	0	...	0.0205	0.0261	0.0000	0.0000	reg oper account	block of flats	0.0214	Stone, brick	No	0.0	0.0	0.0	0.0	0.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	NaN	NaN	NaN	NaN	NaN	NaN
307508	456253	0	Cash loans	F	N	Y	0	153000.0	677664.0	29979.0	585000.0	Unaccompanied	Working	Higher education	Separated	House / apartment	0.005002	-14966	-7921	-6737.0	-5150	NaN	1	1	0	1	0	1	Managers	1.0	3	3	THURSDAY	9	0	0	0	0	1	1	...	0.0855	0.9445	0.0000	0.0000	reg oper account	block of flats	0.7970	Panel	No	6.0	0.0	6.0	0.0	-1909.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	1.0	0.0	0.0	1.0	0.0	1.0
307509	456254	1	Cash loans	F	N	Y	0	171000.0	370107.0	20205.0	319500.0	Unaccompanied	Commercial associate	Secondary / secondary special	Married	House / apartment	0.005313	-11961	-4786	-2562.0	-931	NaN	1	1	0	1	0	0	Laborers	2.0	2	2	WEDNESDAY	9	0	0	0	1	1	0	...	NaN	0.0062	NaN	NaN	NaN	block of flats	0.0086	Stone, brick	No	0.0	0.0	0.0	0.0	-322.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0.0	0.0	0.0	0.0	0.0	0.0
307510	456255	0	Cash loans	F	N	N	0	157500.0	675000.0	49117.5	675000.0	Unaccompanied	Commercial associate	Higher education	Married	House / apartment	0.046220	-16856	-1262	-5128.0	-410	NaN	1	1	1	1	1	0	Laborers	2.0	1	1	THURSDAY	20	0	0	0	0	1	1	...	NaN	0.0805	NaN	0.0000	NaN	block of flats	0.0718	Panel	No	0.0	0.0	0.0	0.0	-787.0	0	1	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0.0	0.0	0.0	2.0	0.0	1.0