Credit Card Fraud Detection Predictive Model¶

Library Imports¶

In [13]:
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
In [14]:
import warnings

warnings.filterwarnings("ignore")
In [15]:
pip install catboost
Requirement already satisfied: catboost in /usr/local/lib/python3.10/dist-packages (1.2.2)
Requirement already satisfied: graphviz in /usr/local/lib/python3.10/dist-packages (from catboost) (0.20.1)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.10/dist-packages (from catboost) (3.7.1)
Requirement already satisfied: numpy>=1.16.0 in /usr/local/lib/python3.10/dist-packages (from catboost) (1.23.5)
Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.10/dist-packages (from catboost) (1.5.3)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from catboost) (1.11.4)
Requirement already satisfied: plotly in /usr/local/lib/python3.10/dist-packages (from catboost) (5.15.0)
Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from catboost) (1.16.0)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->catboost) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.24->catboost) (2023.3.post1)
Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (1.2.0)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (4.47.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (1.4.5)
Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (23.2)
Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (9.4.0)
Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib->catboost) (3.1.1)
Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from plotly->catboost) (8.2.3)
In [16]:
!pip install shap==0.39.0
Requirement already satisfied: shap==0.39.0 in /usr/local/lib/python3.10/dist-packages (0.39.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (1.23.5)
Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (1.11.4)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (1.2.2)
Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (1.5.3)
Requirement already satisfied: tqdm>4.25.0 in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (4.66.1)
Requirement already satisfied: slicer==0.0.7 in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (0.0.7)
Requirement already satisfied: numba in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (0.58.1)
Requirement already satisfied: cloudpickle in /usr/local/lib/python3.10/dist-packages (from shap==0.39.0) (2.2.1)
Requirement already satisfied: llvmlite<0.42,>=0.41.0dev0 in /usr/local/lib/python3.10/dist-packages (from numba->shap==0.39.0) (0.41.1)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->shap==0.39.0) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->shap==0.39.0) (2023.3.post1)
Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->shap==0.39.0) (1.3.2)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->shap==0.39.0) (3.2.0)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->shap==0.39.0) (1.16.0)
In [17]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import gc
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, confusion_matrix, make_scorer
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier

pd.set_option('display.max_columns', 100)
In [18]:
RFC_METRIC = 'gini'  #metric used for RandomForrestClassifier
NUM_ESTIMATORS = 100 #number of estimators used for RandomForrestClassifier
NO_JOBS = 1 #number of parallel jobs used for RandomForrestClassifier

GBC_METRIC = 'squared_error'  #metric used for RandomForrestClassifier
RANDOM_STATE = 42
EARLY_STOP = 20

#TRAIN/VALIDATION/TEST SPLIT
#VALIDATION
VALID_SIZE = 0.20 # simple validation using train_test_split
TEST_SIZE = 0.20 # test size using_train_test_split

#CROSS-VALIDATION
NUMBER_KFOLDS = 5 #number of KFolds for cross-validation
NUMBER_ITER = 5
NUMBER_REPEATS = 1
In [82]:
# CSV Import

data_df = pd.read_csv("/content/drive/MyDrive/Insurance_Claims/insurance_claims.csv")

EDA¶

In [20]:
print("Credit Card Fraud Detection data -  rows:",data_df.shape[0]," columns:", data_df.shape[1])
Credit Card Fraud Detection data -  rows: 1000  columns: 40
In [21]:
total = data_df.isnull().sum().sort_values(ascending = False)
total
Out[21]:
_c39                           1000
age                               0
incident_state                    0
incident_city                     0
incident_location                 0
incident_hour_of_the_day          0
number_of_vehicles_involved       0
property_damage                   0
bodily_injuries                   0
witnesses                         0
police_report_available           0
total_claim_amount                0
injury_claim                      0
property_claim                    0
vehicle_claim                     0
auto_make                         0
auto_model                        0
auto_year                         0
fraud_reported                    0
authorities_contacted             0
months_as_customer                0
collision_type                    0
insured_zip                       0
policy_number                     0
policy_bind_date                  0
policy_state                      0
policy_csl                        0
policy_deductable                 0
policy_annual_premium             0
umbrella_limit                    0
insured_sex                       0
incident_type                     0
insured_education_level           0
insured_occupation                0
insured_hobbies                   0
insured_relationship              0
capital-gains                     0
capital-loss                      0
incident_date                     0
incident_severity                 0
dtype: int64
In [22]:
data_df["auto_make"].value_counts()
Out[22]:
Saab          80
Dodge         80
Suburu        80
Nissan        78
Chevrolet     76
Ford          72
BMW           72
Toyota        70
Audi          69
Accura        68
Volkswagen    68
Jeep          67
Mercedes      65
Honda         55
Name: auto_make, dtype: int64
In [23]:
#Drop _c39 due to Nan's and policy_number as it doesn't provide additional info
data_df = data_df.drop(columns=['_c39', 'policy_number'])
In [24]:
# Checking imbalance of dataset

temp = data_df["fraud_reported"].value_counts()
df = pd.DataFrame({'fraud_reported': temp.index,'values': temp.values})

df
Out[24]:
fraud_reported values
0 N 753
1 Y 247

The dataset contains 75% non fraudulant claims and 25% fraudulant claims

In [25]:
# Conversion of target 'fraud_reported' feature to int

data_df['fraud_reported'] = data_df['fraud_reported'].str.replace('N','0')
data_df['fraud_reported'] = data_df['fraud_reported'].str.replace('Y','1')

data_df['fraud_reported'] = data_df['fraud_reported'].astype(int)
In [26]:
# Convert policy_bind_date and incident_date to datetime

data_df['policy_bind_date'] = pd.to_datetime(data_df['policy_bind_date'])
data_df['incident_date'] = pd.to_datetime(data_df['incident_date'])
In [27]:
#  Plot cardinality of categorical columns

cat_cols = data_df.select_dtypes('object').columns

data_df[cat_cols].nunique().plot.bar(zorder=2)
plt.ylabel('Count')
plt.title('Cardinality of Categorical Data')
plt.grid(zorder=0)
In [28]:
#Drop datetime data and incident_location as one hot encoding will create too many new columns
data_df = data_df.drop(columns=['policy_bind_date', 'incident_location','incident_date'])
In [29]:
# One-hot encode the data using pandas get_dummies
cleaned_df = pd.get_dummies(data_df)

Machine Learning¶

Basic Comparision¶

In [30]:
# Comparision of several models by test accuracy and speed

import time
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

features = cleaned_df.drop(['fraud_reported'], axis=1)
labels = cleaned_df['fraud_reported']

trained_models = [] #  keep track of all details for models we train

def train_model(model, features, labels):
  X = features
  y = labels.values

  X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

  oversample = RandomOverSampler(sampling_strategy=1)
  X_train, y_train = oversample.fit_resample(X_train, y_train)

  pipe = Pipeline([('scaler', StandardScaler()), ('clf', model["clf"])])
  start_time = time.time()
  pipe.fit(X_train, y_train)
  train_time = time.time() - start_time

  train_accuracy =  pipe.score(X_train, y_train)
  test_accuracy = pipe.score(X_test, y_test)
  model_details = {"name": model["name"], "train_accuracy":train_accuracy, "test_accuracy":test_accuracy, "train_time": train_time, "model": pipe}
  return model_details

models = [
          {"name": "Random Forest", "clf": RandomForestClassifier()},
          {"name": "Gradient Boosting", "clf": GradientBoostingClassifier()},
          {"name": "MLP Classifier", "clf": MLPClassifier(solver='adam', alpha=1e-1, hidden_layer_sizes=(10,10,5,2), max_iter=500, random_state=RANDOM_STATE)},
          {"name": "CatBoost", "clf":CatBoostClassifier(eval_metric='AUC', random_seed = RANDOM_STATE, od_type='Iter', od_wait=EARLY_STOP)},
          ]

for model in models:
  model_details = train_model(model, features, labels)
  trained_models.append(model_details)


# visualize accuracy and run time
#setup_plot()
model_df = pd.DataFrame(trained_models)
model_df.sort_values("test_accuracy", inplace=True)
ax = model_df[["train_accuracy","test_accuracy", "name"]].plot(kind="line", x="name", figsize=(19,5), title="Classifier Performance Sorted by Test Accuracy")
ax.legend(["Train Accuracy", "Test Accuracy"])
for p in ax.patches:
    ax.annotate( str( round(p.get_height(),3) ), (p.get_x() * 1.005, p.get_height() * 1.005))

ax.title.set_size(20)
plt.box(False)

model_df.sort_values("train_time", inplace=True)
ax= model_df[["train_time","name"]].plot(kind="line", x="name", figsize=(19,5), grid=True, title="Classifier Training Time (seconds)")
ax.title.set_size(20)
ax.legend(["Train Time"])
plt.box(False)
Learning rate set to 0.010895
0:	total: 59.7ms	remaining: 59.7s
1:	total: 86.3ms	remaining: 43.1s
2:	total: 113ms	remaining: 37.5s
3:	total: 138ms	remaining: 34.4s
4:	total: 157ms	remaining: 31.2s
5:	total: 183ms	remaining: 30.3s
6:	total: 223ms	remaining: 31.6s
7:	total: 241ms	remaining: 29.9s
8:	total: 268ms	remaining: 29.5s
9:	total: 291ms	remaining: 28.8s
10:	total: 323ms	remaining: 29.1s
11:	total: 362ms	remaining: 29.8s
12:	total: 410ms	remaining: 31.1s
13:	total: 449ms	remaining: 31.6s
14:	total: 505ms	remaining: 33.2s
15:	total: 549ms	remaining: 33.8s
16:	total: 597ms	remaining: 34.5s
17:	total: 643ms	remaining: 35.1s
18:	total: 690ms	remaining: 35.6s
19:	total: 746ms	remaining: 36.5s
20:	total: 795ms	remaining: 37s
21:	total: 826ms	remaining: 36.7s
22:	total: 890ms	remaining: 37.8s
23:	total: 935ms	remaining: 38s
24:	total: 993ms	remaining: 38.7s
25:	total: 1.05s	remaining: 39.3s
26:	total: 1.1s	remaining: 39.9s
27:	total: 1.13s	remaining: 39.1s
28:	total: 1.16s	remaining: 39s
29:	total: 1.23s	remaining: 39.6s
30:	total: 1.26s	remaining: 39.4s
31:	total: 1.33s	remaining: 40.3s
32:	total: 1.39s	remaining: 40.7s
33:	total: 1.45s	remaining: 41.2s
34:	total: 1.5s	remaining: 41.5s
35:	total: 1.56s	remaining: 41.7s
36:	total: 1.61s	remaining: 41.8s
37:	total: 1.64s	remaining: 41.4s
38:	total: 1.67s	remaining: 41.1s
39:	total: 1.74s	remaining: 41.7s
40:	total: 1.81s	remaining: 42.3s
41:	total: 1.84s	remaining: 42s
42:	total: 1.9s	remaining: 42.2s
43:	total: 1.95s	remaining: 42.5s
44:	total: 1.99s	remaining: 42.3s
45:	total: 2.03s	remaining: 42.1s
46:	total: 2.07s	remaining: 42.1s
47:	total: 2.12s	remaining: 42s
48:	total: 2.15s	remaining: 41.8s
49:	total: 2.21s	remaining: 42s
50:	total: 2.25s	remaining: 41.8s
51:	total: 2.29s	remaining: 41.8s
52:	total: 2.35s	remaining: 41.9s
53:	total: 2.4s	remaining: 42.1s
54:	total: 2.45s	remaining: 42.1s
55:	total: 2.49s	remaining: 42s
56:	total: 2.53s	remaining: 41.9s
57:	total: 2.58s	remaining: 41.8s
58:	total: 2.63s	remaining: 41.9s
59:	total: 2.66s	remaining: 41.7s
60:	total: 2.69s	remaining: 41.4s
61:	total: 2.71s	remaining: 41s
62:	total: 2.73s	remaining: 40.6s
63:	total: 2.75s	remaining: 40.3s
64:	total: 2.78s	remaining: 40.1s
65:	total: 2.81s	remaining: 39.7s
66:	total: 2.82s	remaining: 39.2s
67:	total: 2.85s	remaining: 39.1s
68:	total: 2.86s	remaining: 38.6s
69:	total: 2.88s	remaining: 38.3s
70:	total: 2.9s	remaining: 38s
71:	total: 2.91s	remaining: 37.5s
72:	total: 2.93s	remaining: 37.2s
73:	total: 2.94s	remaining: 36.8s
74:	total: 2.98s	remaining: 36.8s
75:	total: 3.01s	remaining: 36.6s
76:	total: 3.03s	remaining: 36.3s
77:	total: 3.04s	remaining: 36s
78:	total: 3.08s	remaining: 35.9s
79:	total: 3.1s	remaining: 35.7s
80:	total: 3.13s	remaining: 35.5s
81:	total: 3.15s	remaining: 35.3s
82:	total: 3.17s	remaining: 35s
83:	total: 3.2s	remaining: 34.9s
84:	total: 3.21s	remaining: 34.6s
85:	total: 3.23s	remaining: 34.4s
86:	total: 3.24s	remaining: 34s
87:	total: 3.26s	remaining: 33.8s
88:	total: 3.27s	remaining: 33.5s
89:	total: 3.3s	remaining: 33.4s
90:	total: 3.32s	remaining: 33.2s
91:	total: 3.34s	remaining: 32.9s
92:	total: 3.37s	remaining: 32.9s
93:	total: 3.39s	remaining: 32.7s
94:	total: 3.4s	remaining: 32.4s
95:	total: 3.43s	remaining: 32.3s
96:	total: 3.45s	remaining: 32.1s
97:	total: 3.46s	remaining: 31.9s
98:	total: 3.48s	remaining: 31.7s
99:	total: 3.5s	remaining: 31.5s
100:	total: 3.51s	remaining: 31.3s
101:	total: 3.54s	remaining: 31.2s
102:	total: 3.57s	remaining: 31.1s
103:	total: 3.61s	remaining: 31.1s
104:	total: 3.64s	remaining: 31.1s
105:	total: 3.65s	remaining: 30.8s
106:	total: 3.68s	remaining: 30.7s
107:	total: 3.71s	remaining: 30.6s
108:	total: 3.77s	remaining: 30.8s
109:	total: 3.8s	remaining: 30.7s
110:	total: 3.81s	remaining: 30.5s
111:	total: 3.83s	remaining: 30.3s
112:	total: 3.84s	remaining: 30.1s
113:	total: 3.87s	remaining: 30.1s
114:	total: 3.89s	remaining: 29.9s
115:	total: 3.91s	remaining: 29.8s
116:	total: 3.94s	remaining: 29.7s
117:	total: 3.96s	remaining: 29.6s
118:	total: 3.98s	remaining: 29.4s
119:	total: 3.99s	remaining: 29.3s
120:	total: 4.01s	remaining: 29.1s
121:	total: 4.02s	remaining: 29s
122:	total: 4.04s	remaining: 28.8s
123:	total: 4.06s	remaining: 28.7s
124:	total: 4.08s	remaining: 28.6s
125:	total: 4.1s	remaining: 28.5s
126:	total: 4.13s	remaining: 28.4s
127:	total: 4.15s	remaining: 28.3s
128:	total: 4.18s	remaining: 28.2s
129:	total: 4.21s	remaining: 28.2s
130:	total: 4.25s	remaining: 28.2s
131:	total: 4.27s	remaining: 28.1s
132:	total: 4.31s	remaining: 28.1s
133:	total: 4.34s	remaining: 28.1s
134:	total: 4.36s	remaining: 27.9s
135:	total: 4.38s	remaining: 27.8s
136:	total: 4.39s	remaining: 27.7s
137:	total: 4.41s	remaining: 27.5s
138:	total: 4.43s	remaining: 27.4s
139:	total: 4.45s	remaining: 27.4s
140:	total: 4.47s	remaining: 27.3s
141:	total: 4.5s	remaining: 27.2s
142:	total: 4.53s	remaining: 27.1s
143:	total: 4.54s	remaining: 27s
144:	total: 4.57s	remaining: 27s
145:	total: 4.6s	remaining: 26.9s
146:	total: 4.62s	remaining: 26.8s
147:	total: 4.64s	remaining: 26.7s
148:	total: 4.67s	remaining: 26.7s
149:	total: 4.69s	remaining: 26.6s
150:	total: 4.72s	remaining: 26.6s
151:	total: 4.76s	remaining: 26.6s
152:	total: 4.79s	remaining: 26.5s
153:	total: 4.81s	remaining: 26.4s
154:	total: 4.84s	remaining: 26.4s
155:	total: 4.87s	remaining: 26.4s
156:	total: 4.9s	remaining: 26.3s
157:	total: 4.92s	remaining: 26.2s
158:	total: 4.96s	remaining: 26.2s
159:	total: 4.97s	remaining: 26.1s
160:	total: 4.99s	remaining: 26s
161:	total: 5.02s	remaining: 25.9s
162:	total: 5.03s	remaining: 25.8s
163:	total: 5.06s	remaining: 25.8s
164:	total: 5.09s	remaining: 25.8s
165:	total: 5.12s	remaining: 25.7s
166:	total: 5.15s	remaining: 25.7s
167:	total: 5.18s	remaining: 25.7s
168:	total: 5.2s	remaining: 25.6s
169:	total: 5.23s	remaining: 25.5s
170:	total: 5.25s	remaining: 25.5s
171:	total: 5.29s	remaining: 25.5s
172:	total: 5.31s	remaining: 25.4s
173:	total: 5.33s	remaining: 25.3s
174:	total: 5.34s	remaining: 25.2s
175:	total: 5.35s	remaining: 25.1s
176:	total: 5.38s	remaining: 25s
177:	total: 5.39s	remaining: 24.9s
178:	total: 5.4s	remaining: 24.8s
179:	total: 5.42s	remaining: 24.7s
180:	total: 5.44s	remaining: 24.6s
181:	total: 5.45s	remaining: 24.5s
182:	total: 5.46s	remaining: 24.4s
183:	total: 5.48s	remaining: 24.3s
184:	total: 5.49s	remaining: 24.2s
185:	total: 5.5s	remaining: 24.1s
186:	total: 5.52s	remaining: 24s
187:	total: 5.53s	remaining: 23.9s
188:	total: 5.55s	remaining: 23.8s
189:	total: 5.56s	remaining: 23.7s
190:	total: 5.58s	remaining: 23.6s
191:	total: 5.6s	remaining: 23.6s
192:	total: 5.61s	remaining: 23.5s
193:	total: 5.63s	remaining: 23.4s
194:	total: 5.64s	remaining: 23.3s
195:	total: 5.67s	remaining: 23.2s
196:	total: 5.67s	remaining: 23.1s
197:	total: 5.7s	remaining: 23.1s
198:	total: 5.71s	remaining: 23s
199:	total: 5.75s	remaining: 23s
200:	total: 5.77s	remaining: 23s
201:	total: 5.79s	remaining: 22.9s
202:	total: 5.81s	remaining: 22.8s
203:	total: 5.83s	remaining: 22.7s
204:	total: 5.84s	remaining: 22.6s
205:	total: 5.84s	remaining: 22.5s
206:	total: 5.85s	remaining: 22.4s
207:	total: 5.86s	remaining: 22.3s
208:	total: 5.88s	remaining: 22.3s
209:	total: 5.9s	remaining: 22.2s
210:	total: 5.91s	remaining: 22.1s
211:	total: 5.95s	remaining: 22.1s
212:	total: 5.97s	remaining: 22s
213:	total: 6.01s	remaining: 22.1s
214:	total: 6.03s	remaining: 22s
215:	total: 6.05s	remaining: 22s
216:	total: 6.08s	remaining: 21.9s
217:	total: 6.1s	remaining: 21.9s
218:	total: 6.11s	remaining: 21.8s
219:	total: 6.13s	remaining: 21.7s
220:	total: 6.15s	remaining: 21.7s
221:	total: 6.17s	remaining: 21.6s
222:	total: 6.19s	remaining: 21.6s
223:	total: 6.22s	remaining: 21.6s
224:	total: 6.24s	remaining: 21.5s
225:	total: 6.25s	remaining: 21.4s
226:	total: 6.27s	remaining: 21.4s
227:	total: 6.29s	remaining: 21.3s
228:	total: 6.32s	remaining: 21.3s
229:	total: 6.34s	remaining: 21.2s
230:	total: 6.36s	remaining: 21.2s
231:	total: 6.39s	remaining: 21.2s
232:	total: 6.41s	remaining: 21.1s
233:	total: 6.43s	remaining: 21s
234:	total: 6.46s	remaining: 21s
235:	total: 6.48s	remaining: 21s
236:	total: 6.5s	remaining: 20.9s
237:	total: 6.53s	remaining: 20.9s
238:	total: 6.55s	remaining: 20.9s
239:	total: 6.57s	remaining: 20.8s
240:	total: 6.58s	remaining: 20.7s
241:	total: 6.59s	remaining: 20.7s
242:	total: 6.62s	remaining: 20.6s
243:	total: 6.63s	remaining: 20.5s
244:	total: 6.64s	remaining: 20.5s
245:	total: 6.67s	remaining: 20.5s
246:	total: 6.69s	remaining: 20.4s
247:	total: 6.71s	remaining: 20.3s
248:	total: 6.71s	remaining: 20.3s
249:	total: 6.72s	remaining: 20.2s
250:	total: 6.75s	remaining: 20.1s
251:	total: 6.75s	remaining: 20s
252:	total: 6.77s	remaining: 20s
253:	total: 6.79s	remaining: 19.9s
254:	total: 6.82s	remaining: 19.9s
255:	total: 6.83s	remaining: 19.8s
256:	total: 6.85s	remaining: 19.8s
257:	total: 6.86s	remaining: 19.7s
258:	total: 6.88s	remaining: 19.7s
259:	total: 6.89s	remaining: 19.6s
260:	total: 6.9s	remaining: 19.5s
261:	total: 6.92s	remaining: 19.5s
262:	total: 6.94s	remaining: 19.4s
263:	total: 6.96s	remaining: 19.4s
264:	total: 6.97s	remaining: 19.3s
265:	total: 7s	remaining: 19.3s
266:	total: 7.02s	remaining: 19.3s
267:	total: 7.03s	remaining: 19.2s
268:	total: 7.06s	remaining: 19.2s
269:	total: 7.09s	remaining: 19.2s
270:	total: 7.11s	remaining: 19.1s
271:	total: 7.13s	remaining: 19.1s
272:	total: 7.14s	remaining: 19s
273:	total: 7.16s	remaining: 19s
274:	total: 7.18s	remaining: 18.9s
275:	total: 7.18s	remaining: 18.8s
276:	total: 7.21s	remaining: 18.8s
277:	total: 7.23s	remaining: 18.8s
278:	total: 7.24s	remaining: 18.7s
279:	total: 7.26s	remaining: 18.7s
280:	total: 7.29s	remaining: 18.7s
281:	total: 7.3s	remaining: 18.6s
282:	total: 7.33s	remaining: 18.6s
283:	total: 7.34s	remaining: 18.5s
284:	total: 7.36s	remaining: 18.5s
285:	total: 7.39s	remaining: 18.5s
286:	total: 7.43s	remaining: 18.5s
287:	total: 7.44s	remaining: 18.4s
288:	total: 7.47s	remaining: 18.4s
289:	total: 7.49s	remaining: 18.3s
290:	total: 7.52s	remaining: 18.3s
291:	total: 7.55s	remaining: 18.3s
292:	total: 7.59s	remaining: 18.3s
293:	total: 7.61s	remaining: 18.3s
294:	total: 7.64s	remaining: 18.3s
295:	total: 7.66s	remaining: 18.2s
296:	total: 7.68s	remaining: 18.2s
297:	total: 7.7s	remaining: 18.1s
298:	total: 7.71s	remaining: 18.1s
299:	total: 7.74s	remaining: 18s
300:	total: 7.76s	remaining: 18s
301:	total: 7.79s	remaining: 18s
302:	total: 7.81s	remaining: 18s
303:	total: 7.83s	remaining: 17.9s
304:	total: 7.85s	remaining: 17.9s
305:	total: 7.87s	remaining: 17.9s
306:	total: 7.9s	remaining: 17.8s
307:	total: 7.92s	remaining: 17.8s
308:	total: 7.94s	remaining: 17.8s
309:	total: 7.96s	remaining: 17.7s
310:	total: 7.98s	remaining: 17.7s
311:	total: 8s	remaining: 17.6s
312:	total: 8.01s	remaining: 17.6s
313:	total: 8.02s	remaining: 17.5s
314:	total: 8.03s	remaining: 17.5s
315:	total: 8.05s	remaining: 17.4s
316:	total: 8.07s	remaining: 17.4s
317:	total: 8.09s	remaining: 17.4s
318:	total: 8.1s	remaining: 17.3s
319:	total: 8.13s	remaining: 17.3s
320:	total: 8.15s	remaining: 17.2s
321:	total: 8.18s	remaining: 17.2s
322:	total: 8.21s	remaining: 17.2s
323:	total: 8.22s	remaining: 17.1s
324:	total: 8.24s	remaining: 17.1s
325:	total: 8.25s	remaining: 17.1s
326:	total: 8.27s	remaining: 17s
327:	total: 8.29s	remaining: 17s
328:	total: 8.31s	remaining: 17s
329:	total: 8.34s	remaining: 16.9s
330:	total: 8.36s	remaining: 16.9s
331:	total: 8.41s	remaining: 16.9s
332:	total: 8.43s	remaining: 16.9s
333:	total: 8.45s	remaining: 16.8s
334:	total: 8.46s	remaining: 16.8s
335:	total: 8.47s	remaining: 16.7s
336:	total: 8.48s	remaining: 16.7s
337:	total: 8.5s	remaining: 16.6s
338:	total: 8.51s	remaining: 16.6s
339:	total: 8.52s	remaining: 16.5s
340:	total: 8.54s	remaining: 16.5s
341:	total: 8.55s	remaining: 16.5s
342:	total: 8.57s	remaining: 16.4s
343:	total: 8.59s	remaining: 16.4s
344:	total: 8.6s	remaining: 16.3s
345:	total: 8.62s	remaining: 16.3s
346:	total: 8.64s	remaining: 16.3s
347:	total: 8.65s	remaining: 16.2s
348:	total: 8.66s	remaining: 16.2s
349:	total: 8.67s	remaining: 16.1s
350:	total: 8.68s	remaining: 16s
351:	total: 8.69s	remaining: 16s
352:	total: 8.7s	remaining: 15.9s
353:	total: 8.7s	remaining: 15.9s
354:	total: 8.71s	remaining: 15.8s
355:	total: 8.73s	remaining: 15.8s
356:	total: 8.74s	remaining: 15.7s
357:	total: 8.75s	remaining: 15.7s
358:	total: 8.76s	remaining: 15.6s
359:	total: 8.78s	remaining: 15.6s
360:	total: 8.8s	remaining: 15.6s
361:	total: 8.81s	remaining: 15.5s
362:	total: 8.83s	remaining: 15.5s
363:	total: 8.84s	remaining: 15.5s
364:	total: 8.85s	remaining: 15.4s
365:	total: 8.86s	remaining: 15.3s
366:	total: 8.89s	remaining: 15.3s
367:	total: 8.9s	remaining: 15.3s
368:	total: 8.9s	remaining: 15.2s
369:	total: 8.93s	remaining: 15.2s
370:	total: 8.94s	remaining: 15.2s
371:	total: 8.95s	remaining: 15.1s
372:	total: 8.96s	remaining: 15.1s
373:	total: 8.97s	remaining: 15s
374:	total: 8.99s	remaining: 15s
375:	total: 9s	remaining: 14.9s
376:	total: 9.01s	remaining: 14.9s
377:	total: 9.02s	remaining: 14.8s
378:	total: 9.05s	remaining: 14.8s
379:	total: 9.07s	remaining: 14.8s
380:	total: 9.08s	remaining: 14.7s
381:	total: 9.09s	remaining: 14.7s
382:	total: 9.1s	remaining: 14.7s
383:	total: 9.11s	remaining: 14.6s
384:	total: 9.12s	remaining: 14.6s
385:	total: 9.13s	remaining: 14.5s
386:	total: 9.14s	remaining: 14.5s
387:	total: 9.15s	remaining: 14.4s
388:	total: 9.17s	remaining: 14.4s
389:	total: 9.18s	remaining: 14.4s
390:	total: 9.19s	remaining: 14.3s
391:	total: 9.2s	remaining: 14.3s
392:	total: 9.21s	remaining: 14.2s
393:	total: 9.22s	remaining: 14.2s
394:	total: 9.24s	remaining: 14.1s
395:	total: 9.25s	remaining: 14.1s
396:	total: 9.26s	remaining: 14.1s
397:	total: 9.27s	remaining: 14s
398:	total: 9.28s	remaining: 14s
399:	total: 9.29s	remaining: 13.9s
400:	total: 9.3s	remaining: 13.9s
401:	total: 9.31s	remaining: 13.9s
402:	total: 9.32s	remaining: 13.8s
403:	total: 9.33s	remaining: 13.8s
404:	total: 9.34s	remaining: 13.7s
405:	total: 9.35s	remaining: 13.7s
406:	total: 9.37s	remaining: 13.6s
407:	total: 9.38s	remaining: 13.6s
408:	total: 9.39s	remaining: 13.6s
409:	total: 9.4s	remaining: 13.5s
410:	total: 9.41s	remaining: 13.5s
411:	total: 9.43s	remaining: 13.5s
412:	total: 9.44s	remaining: 13.4s
413:	total: 9.45s	remaining: 13.4s
414:	total: 9.46s	remaining: 13.3s
415:	total: 9.47s	remaining: 13.3s
416:	total: 9.48s	remaining: 13.3s
417:	total: 9.49s	remaining: 13.2s
418:	total: 9.5s	remaining: 13.2s
419:	total: 9.51s	remaining: 13.1s
420:	total: 9.52s	remaining: 13.1s
421:	total: 9.53s	remaining: 13.1s
422:	total: 9.54s	remaining: 13s
423:	total: 9.55s	remaining: 13s
424:	total: 9.57s	remaining: 12.9s
425:	total: 9.59s	remaining: 12.9s
426:	total: 9.61s	remaining: 12.9s
427:	total: 9.63s	remaining: 12.9s
428:	total: 9.67s	remaining: 12.9s
429:	total: 9.69s	remaining: 12.8s
430:	total: 9.7s	remaining: 12.8s
431:	total: 9.71s	remaining: 12.8s
432:	total: 9.73s	remaining: 12.7s
433:	total: 9.74s	remaining: 12.7s
434:	total: 9.75s	remaining: 12.7s
435:	total: 9.76s	remaining: 12.6s
436:	total: 9.77s	remaining: 12.6s
437:	total: 9.77s	remaining: 12.5s
438:	total: 9.78s	remaining: 12.5s
439:	total: 9.78s	remaining: 12.4s
440:	total: 9.78s	remaining: 12.4s
441:	total: 9.79s	remaining: 12.4s
442:	total: 9.79s	remaining: 12.3s
443:	total: 9.8s	remaining: 12.3s
444:	total: 9.8s	remaining: 12.2s
445:	total: 9.81s	remaining: 12.2s
446:	total: 9.81s	remaining: 12.1s
447:	total: 9.81s	remaining: 12.1s
448:	total: 9.82s	remaining: 12.1s
449:	total: 9.83s	remaining: 12s
450:	total: 9.84s	remaining: 12s
451:	total: 9.84s	remaining: 11.9s
452:	total: 9.85s	remaining: 11.9s
453:	total: 9.85s	remaining: 11.8s
454:	total: 9.86s	remaining: 11.8s
455:	total: 9.87s	remaining: 11.8s
456:	total: 9.89s	remaining: 11.8s
457:	total: 9.9s	remaining: 11.7s
458:	total: 9.91s	remaining: 11.7s
459:	total: 9.93s	remaining: 11.7s
460:	total: 9.94s	remaining: 11.6s
461:	total: 9.95s	remaining: 11.6s
462:	total: 9.96s	remaining: 11.6s
463:	total: 9.98s	remaining: 11.5s
464:	total: 9.99s	remaining: 11.5s
465:	total: 10s	remaining: 11.5s
466:	total: 10s	remaining: 11.4s
467:	total: 10s	remaining: 11.4s
468:	total: 10s	remaining: 11.4s
469:	total: 10.1s	remaining: 11.3s
470:	total: 10.1s	remaining: 11.3s
471:	total: 10.1s	remaining: 11.3s
472:	total: 10.1s	remaining: 11.2s
473:	total: 10.1s	remaining: 11.2s
474:	total: 10.1s	remaining: 11.2s
475:	total: 10.1s	remaining: 11.1s
476:	total: 10.1s	remaining: 11.1s
477:	total: 10.1s	remaining: 11s
478:	total: 10.1s	remaining: 11s
479:	total: 10.1s	remaining: 11s
480:	total: 10.1s	remaining: 10.9s
481:	total: 10.1s	remaining: 10.9s
482:	total: 10.1s	remaining: 10.9s
483:	total: 10.1s	remaining: 10.8s
484:	total: 10.2s	remaining: 10.8s
485:	total: 10.2s	remaining: 10.7s
486:	total: 10.2s	remaining: 10.7s
487:	total: 10.2s	remaining: 10.7s
488:	total: 10.2s	remaining: 10.6s
489:	total: 10.2s	remaining: 10.6s
490:	total: 10.2s	remaining: 10.6s
491:	total: 10.2s	remaining: 10.5s
492:	total: 10.2s	remaining: 10.5s
493:	total: 10.2s	remaining: 10.4s
494:	total: 10.2s	remaining: 10.4s
495:	total: 10.2s	remaining: 10.4s
496:	total: 10.2s	remaining: 10.3s
497:	total: 10.2s	remaining: 10.3s
498:	total: 10.2s	remaining: 10.3s
499:	total: 10.2s	remaining: 10.2s
500:	total: 10.2s	remaining: 10.2s
501:	total: 10.2s	remaining: 10.1s
502:	total: 10.2s	remaining: 10.1s
503:	total: 10.2s	remaining: 10.1s
504:	total: 10.2s	remaining: 10s
505:	total: 10.2s	remaining: 10s
506:	total: 10.3s	remaining: 9.97s
507:	total: 10.3s	remaining: 9.93s
508:	total: 10.3s	remaining: 9.9s
509:	total: 10.3s	remaining: 9.87s
510:	total: 10.3s	remaining: 9.83s
511:	total: 10.3s	remaining: 9.8s
512:	total: 10.3s	remaining: 9.76s
513:	total: 10.3s	remaining: 9.73s
514:	total: 10.3s	remaining: 9.69s
515:	total: 10.3s	remaining: 9.66s
516:	total: 10.3s	remaining: 9.62s
517:	total: 10.3s	remaining: 9.59s
518:	total: 10.3s	remaining: 9.55s
519:	total: 10.3s	remaining: 9.52s
520:	total: 10.3s	remaining: 9.49s
521:	total: 10.3s	remaining: 9.45s
522:	total: 10.3s	remaining: 9.42s
523:	total: 10.3s	remaining: 9.39s
524:	total: 10.3s	remaining: 9.35s
525:	total: 10.3s	remaining: 9.32s
526:	total: 10.4s	remaining: 9.3s
527:	total: 10.4s	remaining: 9.26s
528:	total: 10.4s	remaining: 9.23s
529:	total: 10.4s	remaining: 9.2s
530:	total: 10.4s	remaining: 9.16s
531:	total: 10.4s	remaining: 9.13s
532:	total: 10.4s	remaining: 9.1s
533:	total: 10.4s	remaining: 9.06s
534:	total: 10.4s	remaining: 9.03s
535:	total: 10.4s	remaining: 9s
536:	total: 10.4s	remaining: 8.97s
537:	total: 10.4s	remaining: 8.94s
538:	total: 10.4s	remaining: 8.9s
539:	total: 10.4s	remaining: 8.87s
540:	total: 10.4s	remaining: 8.84s
541:	total: 10.4s	remaining: 8.81s
542:	total: 10.4s	remaining: 8.78s
543:	total: 10.4s	remaining: 8.74s
544:	total: 10.4s	remaining: 8.71s
545:	total: 10.4s	remaining: 8.68s
546:	total: 10.4s	remaining: 8.65s
547:	total: 10.4s	remaining: 8.62s
548:	total: 10.5s	remaining: 8.59s
549:	total: 10.5s	remaining: 8.56s
550:	total: 10.5s	remaining: 8.53s
551:	total: 10.5s	remaining: 8.49s
552:	total: 10.5s	remaining: 8.46s
553:	total: 10.5s	remaining: 8.43s
554:	total: 10.5s	remaining: 8.4s
555:	total: 10.5s	remaining: 8.37s
556:	total: 10.5s	remaining: 8.34s
557:	total: 10.5s	remaining: 8.31s
558:	total: 10.5s	remaining: 8.28s
559:	total: 10.5s	remaining: 8.25s
560:	total: 10.5s	remaining: 8.22s
561:	total: 10.5s	remaining: 8.19s
562:	total: 10.5s	remaining: 8.16s
563:	total: 10.5s	remaining: 8.13s
564:	total: 10.5s	remaining: 8.1s
565:	total: 10.5s	remaining: 8.07s
566:	total: 10.5s	remaining: 8.04s
567:	total: 10.5s	remaining: 8.02s
568:	total: 10.5s	remaining: 7.99s
569:	total: 10.5s	remaining: 7.96s
570:	total: 10.6s	remaining: 7.93s
571:	total: 10.6s	remaining: 7.91s
572:	total: 10.6s	remaining: 7.88s
573:	total: 10.6s	remaining: 7.86s
574:	total: 10.6s	remaining: 7.83s
575:	total: 10.6s	remaining: 7.8s
576:	total: 10.6s	remaining: 7.77s
577:	total: 10.6s	remaining: 7.74s
578:	total: 10.6s	remaining: 7.71s
579:	total: 10.6s	remaining: 7.68s
580:	total: 10.6s	remaining: 7.66s
581:	total: 10.6s	remaining: 7.63s
582:	total: 10.6s	remaining: 7.6s
583:	total: 10.6s	remaining: 7.57s
584:	total: 10.6s	remaining: 7.54s
585:	total: 10.6s	remaining: 7.51s
586:	total: 10.6s	remaining: 7.49s
587:	total: 10.6s	remaining: 7.46s
588:	total: 10.7s	remaining: 7.43s
589:	total: 10.7s	remaining: 7.41s
590:	total: 10.7s	remaining: 7.38s
591:	total: 10.7s	remaining: 7.35s
592:	total: 10.7s	remaining: 7.32s
593:	total: 10.7s	remaining: 7.29s
594:	total: 10.7s	remaining: 7.27s
595:	total: 10.7s	remaining: 7.24s
596:	total: 10.7s	remaining: 7.21s
597:	total: 10.7s	remaining: 7.19s
598:	total: 10.7s	remaining: 7.16s
599:	total: 10.7s	remaining: 7.13s
600:	total: 10.7s	remaining: 7.11s
601:	total: 10.7s	remaining: 7.08s
602:	total: 10.7s	remaining: 7.05s
603:	total: 10.7s	remaining: 7.03s
604:	total: 10.7s	remaining: 7s
605:	total: 10.7s	remaining: 6.97s
606:	total: 10.7s	remaining: 6.95s
607:	total: 10.7s	remaining: 6.92s
608:	total: 10.7s	remaining: 6.9s
609:	total: 10.8s	remaining: 6.88s
610:	total: 10.8s	remaining: 6.85s
611:	total: 10.8s	remaining: 6.83s
612:	total: 10.8s	remaining: 6.8s
613:	total: 10.8s	remaining: 6.78s
614:	total: 10.8s	remaining: 6.75s
615:	total: 10.8s	remaining: 6.73s
616:	total: 10.8s	remaining: 6.7s
617:	total: 10.8s	remaining: 6.7s
618:	total: 10.9s	remaining: 6.68s
619:	total: 10.9s	remaining: 6.66s
620:	total: 10.9s	remaining: 6.64s
621:	total: 10.9s	remaining: 6.61s
622:	total: 10.9s	remaining: 6.59s
623:	total: 10.9s	remaining: 6.57s
624:	total: 10.9s	remaining: 6.54s
625:	total: 10.9s	remaining: 6.53s
626:	total: 10.9s	remaining: 6.5s
627:	total: 11s	remaining: 6.49s
628:	total: 11s	remaining: 6.47s
629:	total: 11s	remaining: 6.45s
630:	total: 11s	remaining: 6.43s
631:	total: 11s	remaining: 6.41s
632:	total: 11s	remaining: 6.38s
633:	total: 11s	remaining: 6.36s
634:	total: 11s	remaining: 6.34s
635:	total: 11s	remaining: 6.31s
636:	total: 11s	remaining: 6.29s
637:	total: 11s	remaining: 6.26s
638:	total: 11s	remaining: 6.24s
639:	total: 11.1s	remaining: 6.22s
640:	total: 11.1s	remaining: 6.19s
641:	total: 11.1s	remaining: 6.17s
642:	total: 11.1s	remaining: 6.14s
643:	total: 11.1s	remaining: 6.12s
644:	total: 11.1s	remaining: 6.1s
645:	total: 11.1s	remaining: 6.08s
646:	total: 11.1s	remaining: 6.05s
647:	total: 11.1s	remaining: 6.03s
648:	total: 11.1s	remaining: 6.01s
649:	total: 11.1s	remaining: 5.99s
650:	total: 11.1s	remaining: 5.97s
651:	total: 11.1s	remaining: 5.95s
652:	total: 11.2s	remaining: 5.93s
653:	total: 11.2s	remaining: 5.9s
654:	total: 11.2s	remaining: 5.88s
655:	total: 11.2s	remaining: 5.86s
656:	total: 11.2s	remaining: 5.83s
657:	total: 11.2s	remaining: 5.81s
658:	total: 11.2s	remaining: 5.79s
659:	total: 11.2s	remaining: 5.76s
660:	total: 11.2s	remaining: 5.74s
661:	total: 11.2s	remaining: 5.72s
662:	total: 11.2s	remaining: 5.7s
663:	total: 11.2s	remaining: 5.67s
664:	total: 11.2s	remaining: 5.65s
665:	total: 11.2s	remaining: 5.63s
666:	total: 11.2s	remaining: 5.61s
667:	total: 11.2s	remaining: 5.58s
668:	total: 11.2s	remaining: 5.56s
669:	total: 11.2s	remaining: 5.54s
670:	total: 11.3s	remaining: 5.52s
671:	total: 11.3s	remaining: 5.49s
672:	total: 11.3s	remaining: 5.47s
673:	total: 11.3s	remaining: 5.45s
674:	total: 11.3s	remaining: 5.43s
675:	total: 11.3s	remaining: 5.41s
676:	total: 11.3s	remaining: 5.39s
677:	total: 11.3s	remaining: 5.37s
678:	total: 11.3s	remaining: 5.34s
679:	total: 11.3s	remaining: 5.32s
680:	total: 11.3s	remaining: 5.3s
681:	total: 11.3s	remaining: 5.28s
682:	total: 11.3s	remaining: 5.26s
683:	total: 11.4s	remaining: 5.24s
684:	total: 11.4s	remaining: 5.23s
685:	total: 11.4s	remaining: 5.21s
686:	total: 11.4s	remaining: 5.19s
687:	total: 11.4s	remaining: 5.17s
688:	total: 11.4s	remaining: 5.15s
689:	total: 11.4s	remaining: 5.13s
690:	total: 11.4s	remaining: 5.12s
691:	total: 11.4s	remaining: 5.1s
692:	total: 11.5s	remaining: 5.08s
693:	total: 11.5s	remaining: 5.06s
694:	total: 11.5s	remaining: 5.04s
695:	total: 11.5s	remaining: 5.03s
696:	total: 11.5s	remaining: 5.01s
697:	total: 11.5s	remaining: 4.99s
698:	total: 11.5s	remaining: 4.97s
699:	total: 11.5s	remaining: 4.95s
700:	total: 11.6s	remaining: 4.93s
701:	total: 11.6s	remaining: 4.91s
702:	total: 11.6s	remaining: 4.89s
703:	total: 11.6s	remaining: 4.88s
704:	total: 11.6s	remaining: 4.86s
705:	total: 11.6s	remaining: 4.84s
706:	total: 11.6s	remaining: 4.82s
707:	total: 11.6s	remaining: 4.8s
708:	total: 11.7s	remaining: 4.78s
709:	total: 11.7s	remaining: 4.76s
710:	total: 11.7s	remaining: 4.75s
711:	total: 11.7s	remaining: 4.73s
712:	total: 11.7s	remaining: 4.71s
713:	total: 11.7s	remaining: 4.69s
714:	total: 11.7s	remaining: 4.67s
715:	total: 11.7s	remaining: 4.65s
716:	total: 11.7s	remaining: 4.63s
717:	total: 11.8s	remaining: 4.62s
718:	total: 11.8s	remaining: 4.6s
719:	total: 11.8s	remaining: 4.58s
720:	total: 11.8s	remaining: 4.56s
721:	total: 11.8s	remaining: 4.54s
722:	total: 11.8s	remaining: 4.52s
723:	total: 11.8s	remaining: 4.5s
724:	total: 11.8s	remaining: 4.49s
725:	total: 11.8s	remaining: 4.46s
726:	total: 11.8s	remaining: 4.44s
727:	total: 11.8s	remaining: 4.42s
728:	total: 11.8s	remaining: 4.4s
729:	total: 11.9s	remaining: 4.38s
730:	total: 11.9s	remaining: 4.37s
731:	total: 11.9s	remaining: 4.34s
732:	total: 11.9s	remaining: 4.33s
733:	total: 11.9s	remaining: 4.31s
734:	total: 11.9s	remaining: 4.29s
735:	total: 11.9s	remaining: 4.27s
736:	total: 11.9s	remaining: 4.25s
737:	total: 11.9s	remaining: 4.22s
738:	total: 11.9s	remaining: 4.21s
739:	total: 11.9s	remaining: 4.19s
740:	total: 11.9s	remaining: 4.17s
741:	total: 12s	remaining: 4.16s
742:	total: 12s	remaining: 4.14s
743:	total: 12s	remaining: 4.12s
744:	total: 12s	remaining: 4.1s
745:	total: 12s	remaining: 4.09s
746:	total: 12s	remaining: 4.07s
747:	total: 12s	remaining: 4.05s
748:	total: 12s	remaining: 4.04s
749:	total: 12.1s	remaining: 4.02s
750:	total: 12.1s	remaining: 4s
751:	total: 12.1s	remaining: 3.98s
752:	total: 12.1s	remaining: 3.97s
753:	total: 12.1s	remaining: 3.96s
754:	total: 12.2s	remaining: 3.94s
755:	total: 12.2s	remaining: 3.93s
756:	total: 12.2s	remaining: 3.91s
757:	total: 12.2s	remaining: 3.89s
758:	total: 12.2s	remaining: 3.88s
759:	total: 12.2s	remaining: 3.87s
760:	total: 12.2s	remaining: 3.85s
761:	total: 12.3s	remaining: 3.83s
762:	total: 12.3s	remaining: 3.81s
763:	total: 12.3s	remaining: 3.79s
764:	total: 12.3s	remaining: 3.78s
765:	total: 12.3s	remaining: 3.77s
766:	total: 12.4s	remaining: 3.75s
767:	total: 12.4s	remaining: 3.74s
768:	total: 12.4s	remaining: 3.73s
769:	total: 12.4s	remaining: 3.71s
770:	total: 12.4s	remaining: 3.69s
771:	total: 12.5s	remaining: 3.68s
772:	total: 12.5s	remaining: 3.66s
773:	total: 12.5s	remaining: 3.64s
774:	total: 12.5s	remaining: 3.62s
775:	total: 12.5s	remaining: 3.61s
776:	total: 12.5s	remaining: 3.59s
777:	total: 12.5s	remaining: 3.57s
778:	total: 12.5s	remaining: 3.56s
779:	total: 12.6s	remaining: 3.55s
780:	total: 12.6s	remaining: 3.53s
781:	total: 12.6s	remaining: 3.52s
782:	total: 12.6s	remaining: 3.5s
783:	total: 12.7s	remaining: 3.49s
784:	total: 12.7s	remaining: 3.48s
785:	total: 12.7s	remaining: 3.46s
786:	total: 12.7s	remaining: 3.45s
787:	total: 12.8s	remaining: 3.43s
788:	total: 12.8s	remaining: 3.42s
789:	total: 12.8s	remaining: 3.4s
790:	total: 12.8s	remaining: 3.38s
791:	total: 12.8s	remaining: 3.37s
792:	total: 12.8s	remaining: 3.35s
793:	total: 12.8s	remaining: 3.33s
794:	total: 12.9s	remaining: 3.31s
795:	total: 12.9s	remaining: 3.3s
796:	total: 12.9s	remaining: 3.28s
797:	total: 12.9s	remaining: 3.26s
798:	total: 12.9s	remaining: 3.25s
799:	total: 12.9s	remaining: 3.23s
800:	total: 12.9s	remaining: 3.21s
801:	total: 13s	remaining: 3.2s
802:	total: 13s	remaining: 3.18s
803:	total: 13s	remaining: 3.16s
804:	total: 13s	remaining: 3.15s
805:	total: 13s	remaining: 3.13s
806:	total: 13s	remaining: 3.11s
807:	total: 13s	remaining: 3.1s
808:	total: 13.1s	remaining: 3.08s
809:	total: 13.1s	remaining: 3.07s
810:	total: 13.1s	remaining: 3.05s
811:	total: 13.1s	remaining: 3.04s
812:	total: 13.1s	remaining: 3.02s
813:	total: 13.2s	remaining: 3.01s
814:	total: 13.2s	remaining: 2.99s
815:	total: 13.2s	remaining: 2.98s
816:	total: 13.2s	remaining: 2.96s
817:	total: 13.2s	remaining: 2.95s
818:	total: 13.3s	remaining: 2.93s
819:	total: 13.3s	remaining: 2.92s
820:	total: 13.3s	remaining: 2.9s
821:	total: 13.3s	remaining: 2.89s
822:	total: 13.4s	remaining: 2.88s
823:	total: 13.4s	remaining: 2.86s
824:	total: 13.4s	remaining: 2.84s
825:	total: 13.4s	remaining: 2.83s
826:	total: 13.4s	remaining: 2.81s
827:	total: 13.4s	remaining: 2.79s
828:	total: 13.5s	remaining: 2.77s
829:	total: 13.5s	remaining: 2.76s
830:	total: 13.5s	remaining: 2.74s
831:	total: 13.5s	remaining: 2.73s
832:	total: 13.5s	remaining: 2.71s
833:	total: 13.5s	remaining: 2.7s
834:	total: 13.6s	remaining: 2.68s
835:	total: 13.6s	remaining: 2.67s
836:	total: 13.6s	remaining: 2.65s
837:	total: 13.6s	remaining: 2.63s
838:	total: 13.6s	remaining: 2.62s
839:	total: 13.7s	remaining: 2.6s
840:	total: 13.7s	remaining: 2.58s
841:	total: 13.7s	remaining: 2.57s
842:	total: 13.7s	remaining: 2.55s
843:	total: 13.7s	remaining: 2.53s
844:	total: 13.7s	remaining: 2.52s
845:	total: 13.7s	remaining: 2.5s
846:	total: 13.8s	remaining: 2.48s
847:	total: 13.8s	remaining: 2.47s
848:	total: 13.8s	remaining: 2.46s
849:	total: 13.8s	remaining: 2.44s
850:	total: 13.8s	remaining: 2.42s
851:	total: 13.9s	remaining: 2.41s
852:	total: 13.9s	remaining: 2.39s
853:	total: 13.9s	remaining: 2.37s
854:	total: 13.9s	remaining: 2.36s
855:	total: 13.9s	remaining: 2.34s
856:	total: 13.9s	remaining: 2.33s
857:	total: 14s	remaining: 2.31s
858:	total: 14s	remaining: 2.3s
859:	total: 14s	remaining: 2.28s
860:	total: 14s	remaining: 2.26s
861:	total: 14s	remaining: 2.25s
862:	total: 14.1s	remaining: 2.23s
863:	total: 14.1s	remaining: 2.21s
864:	total: 14.1s	remaining: 2.2s
865:	total: 14.1s	remaining: 2.18s
866:	total: 14.1s	remaining: 2.16s
867:	total: 14.1s	remaining: 2.15s
868:	total: 14.1s	remaining: 2.13s
869:	total: 14.1s	remaining: 2.11s
870:	total: 14.1s	remaining: 2.09s
871:	total: 14.2s	remaining: 2.08s
872:	total: 14.2s	remaining: 2.06s
873:	total: 14.2s	remaining: 2.04s
874:	total: 14.2s	remaining: 2.03s
875:	total: 14.2s	remaining: 2.01s
876:	total: 14.2s	remaining: 1.99s
877:	total: 14.2s	remaining: 1.98s
878:	total: 14.2s	remaining: 1.96s
879:	total: 14.3s	remaining: 1.94s
880:	total: 14.3s	remaining: 1.93s
881:	total: 14.3s	remaining: 1.91s
882:	total: 14.3s	remaining: 1.9s
883:	total: 14.3s	remaining: 1.88s
884:	total: 14.3s	remaining: 1.86s
885:	total: 14.4s	remaining: 1.85s
886:	total: 14.4s	remaining: 1.83s
887:	total: 14.4s	remaining: 1.82s
888:	total: 14.4s	remaining: 1.8s
889:	total: 14.4s	remaining: 1.78s
890:	total: 14.5s	remaining: 1.77s
891:	total: 14.5s	remaining: 1.75s
892:	total: 14.5s	remaining: 1.74s
893:	total: 14.5s	remaining: 1.72s
894:	total: 14.5s	remaining: 1.7s
895:	total: 14.5s	remaining: 1.69s
896:	total: 14.5s	remaining: 1.67s
897:	total: 14.6s	remaining: 1.65s
898:	total: 14.6s	remaining: 1.64s
899:	total: 14.6s	remaining: 1.62s
900:	total: 14.6s	remaining: 1.6s
901:	total: 14.6s	remaining: 1.59s
902:	total: 14.6s	remaining: 1.57s
903:	total: 14.7s	remaining: 1.56s
904:	total: 14.7s	remaining: 1.54s
905:	total: 14.7s	remaining: 1.53s
906:	total: 14.7s	remaining: 1.51s
907:	total: 14.7s	remaining: 1.49s
908:	total: 14.7s	remaining: 1.48s
909:	total: 14.8s	remaining: 1.46s
910:	total: 14.8s	remaining: 1.45s
911:	total: 14.8s	remaining: 1.43s
912:	total: 14.8s	remaining: 1.41s
913:	total: 14.9s	remaining: 1.4s
914:	total: 14.9s	remaining: 1.38s
915:	total: 14.9s	remaining: 1.37s
916:	total: 14.9s	remaining: 1.35s
917:	total: 14.9s	remaining: 1.33s
918:	total: 15s	remaining: 1.32s
919:	total: 15s	remaining: 1.3s
920:	total: 15s	remaining: 1.29s
921:	total: 15s	remaining: 1.27s
922:	total: 15s	remaining: 1.25s
923:	total: 15.1s	remaining: 1.24s
924:	total: 15.1s	remaining: 1.22s
925:	total: 15.1s	remaining: 1.21s
926:	total: 15.1s	remaining: 1.19s
927:	total: 15.1s	remaining: 1.17s
928:	total: 15.1s	remaining: 1.16s
929:	total: 15.2s	remaining: 1.14s
930:	total: 15.2s	remaining: 1.12s
931:	total: 15.2s	remaining: 1.11s
932:	total: 15.2s	remaining: 1.09s
933:	total: 15.2s	remaining: 1.08s
934:	total: 15.3s	remaining: 1.06s
935:	total: 15.3s	remaining: 1.04s
936:	total: 15.3s	remaining: 1.03s
937:	total: 15.3s	remaining: 1.01s
938:	total: 15.3s	remaining: 995ms
939:	total: 15.3s	remaining: 978ms
940:	total: 15.3s	remaining: 962ms
941:	total: 15.4s	remaining: 946ms
942:	total: 15.4s	remaining: 929ms
943:	total: 15.4s	remaining: 913ms
944:	total: 15.4s	remaining: 897ms
945:	total: 15.4s	remaining: 881ms
946:	total: 15.4s	remaining: 864ms
947:	total: 15.5s	remaining: 849ms
948:	total: 15.5s	remaining: 832ms
949:	total: 15.5s	remaining: 816ms
950:	total: 15.5s	remaining: 800ms
951:	total: 15.5s	remaining: 783ms
952:	total: 15.6s	remaining: 767ms
953:	total: 15.6s	remaining: 751ms
954:	total: 15.6s	remaining: 734ms
955:	total: 15.6s	remaining: 718ms
956:	total: 15.6s	remaining: 702ms
957:	total: 15.6s	remaining: 686ms
958:	total: 15.7s	remaining: 669ms
959:	total: 15.7s	remaining: 653ms
960:	total: 15.7s	remaining: 637ms
961:	total: 15.7s	remaining: 620ms
962:	total: 15.7s	remaining: 604ms
963:	total: 15.7s	remaining: 588ms
964:	total: 15.7s	remaining: 571ms
965:	total: 15.8s	remaining: 555ms
966:	total: 15.8s	remaining: 539ms
967:	total: 15.8s	remaining: 522ms
968:	total: 15.8s	remaining: 506ms
969:	total: 15.8s	remaining: 490ms
970:	total: 15.9s	remaining: 473ms
971:	total: 15.9s	remaining: 457ms
972:	total: 15.9s	remaining: 441ms
973:	total: 15.9s	remaining: 425ms
974:	total: 15.9s	remaining: 408ms
975:	total: 15.9s	remaining: 392ms
976:	total: 16s	remaining: 376ms
977:	total: 16s	remaining: 360ms
978:	total: 16s	remaining: 344ms
979:	total: 16.1s	remaining: 328ms
980:	total: 16.1s	remaining: 312ms
981:	total: 16.1s	remaining: 295ms
982:	total: 16.1s	remaining: 279ms
983:	total: 16.2s	remaining: 263ms
984:	total: 16.2s	remaining: 247ms
985:	total: 16.2s	remaining: 230ms
986:	total: 16.2s	remaining: 214ms
987:	total: 16.2s	remaining: 197ms
988:	total: 16.3s	remaining: 181ms
989:	total: 16.3s	remaining: 164ms
990:	total: 16.3s	remaining: 148ms
991:	total: 16.3s	remaining: 132ms
992:	total: 16.3s	remaining: 115ms
993:	total: 16.3s	remaining: 98.6ms
994:	total: 16.4s	remaining: 82.2ms
995:	total: 16.4s	remaining: 65.8ms
996:	total: 16.4s	remaining: 49.3ms
997:	total: 16.4s	remaining: 32.9ms
998:	total: 16.4s	remaining: 16.4ms
999:	total: 16.4s	remaining: 0us

Random Forest, Gradient Boosting and Cat Boost look promising, but Cat Boost is very slow

In the inital comparision over sampling was used as the dataset is somewhat imbalanced

In [31]:
# Train test split

train_df, test_df = train_test_split(cleaned_df, test_size=TEST_SIZE, random_state=RANDOM_STATE, shuffle=True )
train_df, valid_df = train_test_split(train_df, test_size=VALID_SIZE, random_state=RANDOM_STATE, shuffle=True )
In [32]:
# Target and Features

target = 'fraud_reported'
predictors = cleaned_df.drop(columns=['fraud_reported']).columns

Random Forests¶

In [33]:
# Creation of model
clf = RandomForestClassifier(random_state=RANDOM_STATE,
                             criterion=RFC_METRIC,
                             verbose=False)
In [34]:
#Fit to data
clf.fit(train_df[predictors], train_df[target].values)
Out[34]:
RandomForestClassifier(random_state=42, verbose=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=42, verbose=False)
In [35]:
#Make predictions
preds = clf.predict(valid_df[predictors])
In [36]:
tmp = pd.DataFrame({'Feature': predictors, 'Feature Importance': clf.feature_importances_})
tmp = tmp.sort_values(by='Feature Importance',ascending=False)

ten_most_important = tmp[0:10]
ten_most_important_df = pd.DataFrame(ten_most_important)

plt.figure(figsize = (7,4))
plt.title('Features Importance',fontsize=14)
s = sns.barplot(x='Feature',y='Feature Importance',data=ten_most_important_df)
s.set_xticklabels(s.get_xticklabels(),rotation=90)
plt.show()
In [37]:
#ROC score
roc_auc_score(valid_df[target].values, preds)
Out[37]:
0.5963949843260188

Hyperparameter Tuning¶

In [42]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV, RepeatedStratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from scipy.stats import randint

# Separate features and labels
X = cleaned_df.drop(['fraud_reported'], axis=1)
y = cleaned_df['fraud_reported'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Create the model
#rfc = RandomForestClassifier(random_state=RANDOM_STATE)

# Define the parameter space that will be searched over
param_distributions = {'clf__n_estimators': randint(1, 1000),
                       'clf__max_features': ['auto', 'sqrt'],
                       'clf__min_samples_leaf': randint(1, 5),
                       'clf__min_samples_split': randint(2, 10),
                       'clf__max_depth': randint(1, 100)}

rfc = Pipeline([('scaler', StandardScaler()), ('clf', RandomForestClassifier())])

# Create a searchCV object
search = RandomizedSearchCV(rfc,
                             n_iter=NUMBER_ITER,
                             scoring="roc_auc",
                             cv=RepeatedStratifiedKFold(n_splits=NUMBER_KFOLDS, n_repeats=NUMBER_REPEATS),
                             param_distributions=param_distributions,
                             random_state=0)

y_pred = search.fit(X_train, y_train).predict(X_test)

# Print the results
best_score = search.best_score_
print('Best ROC Score:', best_score)

best_params = search.best_params_
print('\nHyperparameter Tuning:', best_params)
Best ROC Score: 0.8440181977006616

Hyperparameter Tuning: {'clf__max_depth': 68, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__min_samples_split': 5, 'clf__n_estimators': 278}

For Random Forests, hyperparameter Tuning was run both with and without over sampling.

Higher AUC scores were obtained without the use of over sampling, but to aid the slight class imbalance RepeatedStratifiedKFold was used in the cross validation step.

In [43]:
#Generate the confusion matrix
cf_matrix = confusion_matrix(y_test, y_pred)

ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt='.0f')

ax.set_title('\nRandom Forest ROC Confusion Matrix\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])

## Display the visualization of the Confusion Matrix.
plt.show()

Gradient Boosting¶

In [65]:
clf = GradientBoostingClassifier(
                             random_state=RANDOM_STATE,
                             criterion=GBC_METRIC,
                             verbose=False)
In [66]:
clf.fit(train_df[predictors], train_df[target].values)
Out[66]:
GradientBoostingClassifier(criterion='squared_error', random_state=42,
                           verbose=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
GradientBoostingClassifier(criterion='squared_error', random_state=42,
                           verbose=False)
In [67]:
preds = clf.predict(valid_df[predictors])
In [68]:
tmp = pd.DataFrame({'Feature': predictors, 'Feature Importance': clf.feature_importances_})
tmp = tmp.sort_values(by='Feature Importance',ascending=False)

ten_most_important = tmp[0:10]
ten_most_important_df = pd.DataFrame(ten_most_important)


plt.figure(figsize = (7,4))
plt.title('Features Importance',fontsize=14)
s = sns.barplot(x='Feature',y='Feature Importance',data=ten_most_important_df)
s.set_xticklabels(s.get_xticklabels(),rotation=90)
plt.show()
In [69]:
roc_auc_score(valid_df[target].values, preds)
Out[69]:
0.7621473354231976
In [70]:
cm = pd.crosstab(valid_df[target].values, preds, rownames=['Actual'], colnames=['Predicted'])
fig, (ax1) = plt.subplots(ncols=1, figsize=(5,5))
sns.heatmap(cm,
            xticklabels=['Not Fraud', 'Fraud'],
            yticklabels=['Not Fraud', 'Fraud'],
            annot=True,ax=ax1,
            linewidths=.2,linecolor="Darkblue", cmap="Blues")
plt.title('Confusion Matrix', fontsize=14)
plt.show()

Hyperparameter Tuning¶

In [71]:
# Separate features and labels
X = cleaned_df.drop(['fraud_reported'], axis=1)
y = cleaned_df['fraud_reported'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Define the parameter space that will be searched over
param_distributions = {'clf__n_estimators': randint(1, 1000),
                       'clf__max_features': ['auto', 'sqrt'],
                       'clf__min_samples_leaf': randint(1, 5),
                       'clf__min_samples_split': randint(2, 10),
                       'clf__max_depth': randint(1, 100)}

#Create a pipeline
gbc = Pipeline([('scaler', StandardScaler()), ('clf', GradientBoostingClassifier())])

# Create a searchCV object
search = RandomizedSearchCV(gbc,
                             n_iter=NUMBER_ITER,
                             scoring="roc_auc",
                             cv=RepeatedStratifiedKFold(n_splits=NUMBER_KFOLDS, n_repeats=NUMBER_REPEATS),
                             param_distributions=param_distributions,
                             random_state=0)

y_pred = search.fit(X_train, y_train).predict(X_test)

# Print the results
best_score = search.best_score_
print('Best ROC Score:', best_score)

best_params = search.best_params_
print('\nHyperparameter Tuning:', best_params)
Best ROC Score: 0.8754595905911694

Hyperparameter Tuning: {'clf__max_depth': 13, 'clf__max_features': 'auto', 'clf__min_samples_leaf': 2, 'clf__min_samples_split': 8, 'clf__n_estimators': 552}

For Gradient Boosting, hyperparameter Tuning was run both with and without over sampling.

Higher AUC scores were obtained without the use of over sampling, but to aid the slight class imbalance RepeatedStratifiedKFold was used in the cross validation step.

In [72]:
#Generate the confusion matrix
cf_matrix = confusion_matrix(y_test, y_pred)

ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt='.0f')

ax.set_title('\nGradient Boosting ROC Confusion Matrix\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])

## Display the visualization of the Confusion Matrix.
plt.show()

Custom Loss Function and SHAP analysis¶

After hyperparameter tuning Random Forests returned a Best ROC Score: 0.844

After hyperparameter tuning Gradient Boosting returned a Best ROC Score: 0.847

Therefore Gradient Boosting was selected for the custom scorer hyperparameter tuning

In [79]:
from sklearn.metrics import make_scorer

time1 = time.time()

# Separate features and labels
data = cleaned_df.drop(['fraud_reported'], axis=1)
labels = cleaned_df['fraud_reported']

X_train, X_test, y_train, y_test = train_test_split(data, labels, random_state=42)

#undersample = RandomUnderSampler(sampling_strategy=1)
#X_train, y_train = undersample.fit_resample(X_train, y_train)

def my_custom_loss_func(X_train, y_train):
    CM = confusion_matrix(y_test, y_pred)
    CM = CM.astype('float') / CM.sum(axis=1)[:, np.newaxis]
    FN = CM[1][0]
    FP = CM[0][1]
    credit_score = (((-10*FN) + (-1*FP))/11)
    return credit_score

my_scorer = make_scorer(my_custom_loss_func, greater_is_better=False)

gbc = Pipeline([('scaler', StandardScaler()), ('clf', GradientBoostingClassifier())])

param_grid = {'clf__n_estimators': randint(1, 1000),
                         'clf__max_features': ['auto', 'sqrt'],
                         'clf__min_samples_leaf': randint(1, 5),
                         'clf__min_samples_split': randint(2, 10),
                         'clf__max_depth': randint(1, 100)}

search = RandomizedSearchCV(gbc, n_iter=NUMBER_ITER, scoring=my_scorer, cv = RepeatedStratifiedKFold(n_splits=NUMBER_KFOLDS, n_repeats=NUMBER_REPEATS),
                              param_distributions=param_grid, random_state=0)

y_pred = search.fit(X_train, y_train).predict(X_test)

Best_Score = search.best_score_
print('Best Custom Score:', Best_Score)
Best_Params = search.best_params_
print('\nHyperparameter Tuning:', Best_Params)

#Generate the confusion matrix
cf_matrix = confusion_matrix(y_test, y_pred)

ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues', fmt='.0f')

ax.set_title('\nGradient Boosting Credit Rating Confusion Matrix\n\n');
ax.set_xlabel('\nPredicted Values')
ax.set_ylabel('Actual Values ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(['False','True'])
ax.yaxis.set_ticklabels(['False','True'])

## Display the visualization of the Confusion Matrix.
plt.show()

time2 = np.round(time.time() - time1,0)
print("\ntime taken: ", time2)
Best Custom Score: 0.620022095187253

Hyperparameter Tuning: {'clf__max_depth': 45, 'clf__max_features': 'sqrt', 'clf__min_samples_leaf': 2, 'clf__min_samples_split': 2, 'clf__n_estimators': 836}
time taken:  79.0

Why do we need a custom loss function?

Both false positive and false negative results are generated from the machine learning calculations.

In the case of false positives this results in extra work for the business double checking claims.

False negatives (I.e Customers that are deliberately attempting fraud) are what we must reduce at the expense of higher false positives.

Hence with a custom loss function we can penalise false negatives, I have chosen to make each false negative ten times the cost of a false positive.

In [76]:
# Use of SHAP to show global features and their respective devation from the mean

import shap

gbc = GradientBoostingClassifier(random_state=RANDOM_STATE, max_depth=45, max_features='sqrt', min_samples_leaf=2, min_samples_split=2, n_estimators=836)

X = np.array(cleaned_df.drop(['fraud_reported'], axis=1))
y = cleaned_df['fraud_reported'].values

feature_list = list(cleaned_df.columns)

gbc.fit(X, y);

explainer = shap.Explainer(gbc)
shap_values = explainer(X)

shap.summary_plot(shap_values, X, feature_list)