-
Notifications
You must be signed in to change notification settings - Fork 7
/
exponentialEntropy.py
40 lines (32 loc) · 1.29 KB
/
exponentialEntropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
import pandas as pd
def information_gain(x, y):
def _entropy(values):
counts = np.bincount(values)
probs = counts[np.nonzero(counts)] / float(len(values))
# print(1 - probs, probs)
return np.sum(probs * np.exp(1 - probs))
def ig(feature, y):
feature_set_indices = np.nonzero(feature)
feature_not_set_indices = [i for i in feature_range if i not in feature_set_indices[0]]
entropy_x_set = _entropy(y[feature_set_indices])
entropy_x_not_set = _entropy(y[feature_not_set_indices])
return entropy_before - (((len(feature_set_indices) / float(feature_size)) * entropy_x_set)
+ ((len(feature_not_set_indices) / float(feature_size)) * entropy_x_not_set))
feature_size = x.shape[0]
feature_range = range(0, feature_size)
# print(feature_size)
# print(feature_range)
entropy_before = _entropy(y)
# print(entropy_before)
information_gain_scores = []
print(x.T.shape)
for feature in x.T:
# print(feature)
information_gain_scores.append(ig(feature, y))
return information_gain_scores, []
aus = pd.read_csv("australian.csv")
dis = [0,3,4,5,7,8,10,11]
X = aus.iloc[:,0:14]
Y = aus.iloc[:,14:15]
print(information_gain(X.values,Y.values[:,0]))