-
Notifications
You must be signed in to change notification settings - Fork 11
/
computeScores.py
124 lines (106 loc) · 4.01 KB
/
computeScores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jan 30 19:14:19 2017
@author: corbi
"""
import sys
import numpy as np
from time import time
from sklearn.model_selection import GridSearchCV, ShuffleSplit
from loadData import load_toy_dataset, load_school_dataset, load_sarcos_dataset
from RandomMTLRegressor import randompred
from mult_ind_SVM import mult_ind_SVM
from AlternatingStructureOptimization import AlternatingStructureOptimization
from ConvexAlternatingStructureOptimization import ConvexAlternatingStructureOptimization
from ClusteredRegression import ClusteredLinearRegression
def compute_scores(X,y, model, n_splits=5, test_size=0.30, gridsearch=False, verbose=False):
"""
Compute the nrMSE score for a given model and a given dataset (X,y)
"""
t0 = time()
nrMSE = []
# Shuffle split
ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=42)
i=1
for train_index, test_index in ss.split(X):
t1 = time()
X_train, X_test = X[train_index], X[test_index]
y_train, y_test = y[train_index], y[test_index]
if verbose:
print("Random shuffle split %d"%i)
if gridsearch==True:
grid = GridSearchCV(model, cv=3, param_grid=model.params,verbose=1)
grid.fit(X_train,y_train)
print (grid.best_estimator_)
else:
model.fit(X_train,y_train)
nrMSE.append(1. - model.score(X_test,y_test))
i+=1
if verbose:
print ("....run in %fs" % (time() - t1) )
print("Total run in %fs" % (time() - t0))
if n_splits==1:
return nrMSE[0]
else:
return [np.mean(nrMSE),np.var(nrMSE)]
if __name__=='__main__':
# Get choices
dataset = sys.argv[1]
algo = sys.argv[2]
splits = int(sys.argv[3])
test_size = float(sys.argv[4])
if test_size>=1:
print("Test size > 1.")
sys.exit()
# Generate dataset
if dataset=="toy":
X, y, E = load_toy_dataset()
C = 1e2
r = 3
h = 3
elif dataset=="school":
X, y = load_school_dataset()
C = 1e1
r = 7
h = 3
elif dataset=="sarcos":
X, y = load_sarcos_dataset()
C = 1e4
r = 6
h = 3
else:
print("Unkown dataset.")
sys.exit()
m=len(np.unique(X[:,-1]))
# Initialize chosen algorithm
if algo=="random":
modele = randompred()
elif algo=="svm":
modele = mult_ind_SVM(m=m, C=C)
elif algo=="aso":
lbda = np.ones((1,m))*0.225
modele = AlternatingStructureOptimization(lbda=lbda,m=m, d=X.shape[1]-1, h=h)
elif algo=="caso":
alpha = 0.225
beta = 0.15
modele = ConvexAlternatingStructureOptimization(alpha=alpha, beta=beta,m=m, d=X.shape[1]-1, h=h)
elif algo=="cmtl":
epsilon = 0.5
epsilon_m = 0.2*epsilon
epsilon_b = 3.5*epsilon
epsilon_w = 4.5*epsilon
modele = ClusteredLinearRegression(r, m, epsilon_m, epsilon_w, epsilon_b, mu=2.5)
elif algo=="cmtl_e":
epsilon = 0.5
epsilon_m = 0.2*epsilon
epsilon_b = 3.5*epsilon
epsilon_w = 4.5*epsilon
r=E.shape[1]
modele = ClusteredLinearRegression(r, m, epsilon_m, epsilon_w, epsilon_b, E,mu=2.5)
# Compute score
nrMSE = compute_scores(X,y, modele, n_splits=splits, test_size=test_size)
if splits==1:
print("nrMSE score: %f, +/- %f " % (nrMSE,0))
else:
print("nrMSE score: %f, +/- %f " % (nrMSE[0],nrMSE[1]))