import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
import os
import pickle


input_path = "/home/fabio/Documents/data_science_project_1/data/"

X_train = pd.read_csv(input_path+"X_train.csv", header=None)
X_test = pd.read_csv(input_path+"X_test.csv", header=None)

def load_y(path,file_name):
    dataset = pd.read_csv(path+file_name, header=None)
    dataset = np.array(dataset)
    dataset_shape = dataset.shape
    dataset = np.reshape(dataset, newshape=dataset_shape[1],)
    return dataset

y_train = load_y(input_path,"y_train.csv")
y_test = load_y(input_path,"y_test.csv")


clf=DecisionTreeClassifier(random_state=123,max_depth=6)
clf.fit(X_train, y_train)

DecisionTreeClassifier(max_depth=6, random_state=123)


parent_path = os.path.abspath(input_path+os.pardir)

if not os.path.exists(parent_path+'/results'):
    os.makedirs(parent_path+'/results')

#save the model
filename = parent_path+'/results/tuned_model.pickle'
pickle.dump(clf, open(filename, 'wb'))
 
#load and check the model
loaded_model = pickle.load(open(filename, 'rb'))
check = loaded_model.score(X_test, y_test)
print(check)

0.8672