In the previous sections we came up with a pretty good model based on the decision tree algorithm. Now we will store that model to use it later in the deployment phase of our project.
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import os
import pickle
input_path = "/home/fabio/Documents/data_science_project_1/data/"
X_train = pd.read_csv(input_path+"X_train.csv", header=None)
X_test = pd.read_csv(input_path+"X_test.csv", header=None)
def load_y(path,file_name):
dataset = pd.read_csv(path+file_name, header=None)
dataset = np.array(dataset)
dataset_shape = dataset.shape
dataset = np.reshape(dataset, newshape=dataset_shape[1],)
return dataset
y_train = load_y(input_path,"y_train.csv")
y_test = load_y(input_path,"y_test.csv")
clf=DecisionTreeClassifier(random_state=123,max_depth=6)
clf.fit(X_train, y_train)
DecisionTreeClassifier(max_depth=6, random_state=123)
Let's create a new folder and store the model there:
parent_path = os.path.abspath(input_path+os.pardir)
if not os.path.exists(parent_path+'/results'):
os.makedirs(parent_path+'/results')
#save the model
filename = parent_path+'/results/tuned_model.pickle'
pickle.dump(clf, open(filename, 'wb'))
#load and check the model
loaded_model = pickle.load(open(filename, 'rb'))
check = loaded_model.score(X_test, y_test)
print(check)
0.8672
The model is ready for deployment.