-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
149 lines (111 loc) · 3.36 KB
/
main.py
File metadata and controls
149 lines (111 loc) · 3.36 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import json
import traceback
from rpy2.rinterface_lib.embedded import RRuntimeError
from scipy import io
from sklearn.manifold import MDS
from tsne import tsne
from embedder import ClassNeRV
from sklearn.decomposition import PCA
import dataset
import rpy2.robjects.packages as rpackages
from rpy2.robjects.vectors import StrVector
from sklearn.cluster import KMeans
from rpy2.robjects.vectors import IntVector
import pandas as pd
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
crit = ["Czekanowski_Dice", "Folkes_Mallows", "Hubert","Jaccard", "Kulczynski", "McNemar","Phi","Precision","Rand","Recall" ,"Rogers_Tanimoto", "Russel_Rao", "Sokal_Sneath1", "Sokal_Sneath2"]
utils = rpackages.importr('utils')
utils.chooseCRANmirror(ind=1)
# Install packages
packnames = ['clusterCrit']
utils.install_packages(StrVector(packnames))
# Load packages
clusterCrit = importr('clusterCrit')
pandas2ri.activate()
# sklearn2ri.activate()
def classNeRV():
model = ClassNeRV(perplex=32, scale_out=None, tradeoff_intra=1, tradeoff_inter=0)
data = dataSet()
km = KMeans(n_clusters=2)
km.fit(data)
labels = km.labels_
pos = model.fit_transform(data, labels)
p2 = km.predict(pos)
return p2
def mds(ds ):
model = MDS(n_components=2)
data = ds()
km = KMeans(n_clusters=3)
km.fit(data)
labels = km.labels_
pos = model.fit_transform(data, labels)
return pos
def tSNE(ds):
data = ds()
Y = tsne(data, 2, 50, 20.0)
km = KMeans(n_clusters=3)
km.fit(data)
return Y
def pca(ds):
model = PCA(n_components=2)
data = ds()
km = KMeans(n_clusters=3)
km.fit(data)
labels = km.labels_
pos = model.fit_transform(data, labels)
return pos
def dataSet():
data, labels = map(io.loadmat('globe.mat').get, ['data', 'labels'])
return data, labels
def kmeans(data, labels):
km = KMeans(n_clusters=3)
km.fit(data, labels)
# tous critere
def metricsCalcul(originDataSet, dataSet):
data_j={}
f = getattr(clusterCrit, "extCriteria")
for m in crit:
try:
res = f(originDataSet, dataSet, m)
data_j[m] = round(res[0][0], 2)
except RRuntimeError:
print(traceback.format_exc())
print("skip")
return data_j
def calculate(method, ds):
json_data={}
for d in ds:
dsOriginal = d()
p2 = method(d)
km2 = KMeans(n_clusters=3).fit(dsOriginal)
p1 = km2.predict(dsOriginal)
p1 = p1.flatten()
p2 = p2.flatten()
r_dsOriginal = IntVector(p1)
r_ds = IntVector(p2)
json_data[d.__name__] = metricsCalcul(r_dsOriginal, r_ds)
return json_data
if __name__ == "__main__":
method = {"mds":mds, "tSNE":tSNE, "pca":pca}
name = ["mds", "tSNE", "pca"]
ds = [dataset.dataset1, dataset.dataset2, dataset.dataset3]
json_data= {}
for k in method:
name=k
res= calculate(method[k], ds)
print(res)
json_data[name]=res
json.dump(json_data, open("mds.json", "w"))
newDict = {}
pdObj = pd.read_json("mds.json")
for i in pdObj:
s = pdObj[i].to_dict()
for j in s:
row = i + ' ' +j
#print(row)
newDict[row] = s[j]
print(newDict)
pdDs = pd.DataFrame.from_dict(newDict, orient="index")
with open("res.csv", "w") as f:
f.write(pdDs.to_csv())