-
Notifications
You must be signed in to change notification settings - Fork 0
/
RicePaper_KNRegression.py
112 lines (72 loc) · 3.21 KB
/
RicePaper_KNRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# -*- coding: utf-8 -*-
print("Pasa por la clase RF")
import numpy as np
#performance
import time
import os
import psutil
process = psutil.Process(os.getpid())
#Modelling
from sklearn import metrics
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.dummy import DummyRegressor
from sklearn.externals import joblib
from sklearn.preprocessing import StandardScaler
class KNRegressorYield:
NAME="KNeighborsRegressor"
def __init__(self, models_folder):
self.models_folder=models_folder
pass
def hyper_tunin(self, X_train, y_train , num_jobs):
# RANDOM FOREST
print("KN")
#Dummy Model
self.dummyModel=DummyRegressor()
self.dummyModel.fit(X_train,y_train)
#save the scalers
self.sc_X = StandardScaler()
self.sc_y = StandardScaler()
X_train= self.sc_X.fit_transform(X_train)
y_train= self.sc_y.fit_transform(y_train.values.reshape(-1,1)).reshape(1,-1)[0]
# param_grid = {# Simple Grid
# 'bootstrap': [True],
# 'max_depth': [80],
# 'max_features': [2,3],
# 'min_samples_leaf': [5],
# 'min_samples_split': [8],
# 'n_estimators': [500,1000]
# }
param_grid = {
'n_neighbors':[2,5,10],
'weights': ['uniform', 'distance'],
'p':[1,2]
}
# param_grid = {
# 'n_neighbors':[2,5,10],
# }
#multiprocessing.set_start_method('forkserver') #is already spawn method
regressorGrid=KNeighborsRegressor()#old
grid= GridSearchCV(estimator=regressorGrid, param_grid=param_grid, cv=10, n_jobs=num_jobs, return_train_score=True)
time_start = time.clock()
grid= grid.fit(X_train, y_train)
time_elapsed = (time.clock() - time_start)
joblib.dump([grid,self.dummyModel ], "./%s/%s"%(self.models_folder,self.NAME))#Persistence for the model
return grid,time_elapsed
def evaluate(self, model, X_test, y_test):
time_start = time.clock()
X_transformed=self.sc_X.transform(X_test)
y_transformed=self.sc_y.fit_transform(y_test.values.reshape(-1,1)).reshape(1,-1)[0]
y_scaled=model.predict(X_transformed)
y_predict = self.sc_y.inverse_transform(y_scaled.reshape(-1,1)).reshape(1,-1)[0]
y_dummie=self.dummyModel.predict(X_test)
scoreMy2=model.score(X_transformed, y_transformed)
RMSE=np.sqrt(metrics.mean_squared_error(y_test, y_predict))
MAE=metrics.mean_absolute_error(y_test, y_predict)
R2=scoreMy2
RMSE_DUMMIE= np.sqrt(metrics.mean_squared_error(y_test, y_dummie ))
RRSE = RMSE/RMSE_DUMMIE
time_elapsed = (time.clock() - time_start)
return [RMSE, RRSE, R2, MAE, time_elapsed]
def loadTuninPersistence(self):
return joblib.load("./%s/%s"(self.models_folder,self.NAME))