Skip to content

Commit 12a6b9c

Browse files
committed
merge
1 parent 20fd4ec commit 12a6b9c

File tree

6 files changed

+200
-35
lines changed

6 files changed

+200
-35
lines changed

README.md

+116-1
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,124 @@ Once the path is under `capstone`, please run the command in following format.
2929
python3 main.py
3030
```
3131

32-
After the program complete, the results of program will display on terminal and result plots will be saved in `results`.
32+
After the program complete, the results of program will display on terminal and plots will be saved in `./results`.
33+
34+
## Description of Dataset
35+
36+
This dataset is downloaded from [link](http://multilayer.it.uu.se/datasets.html). In this graph, the multiple layers represent relationships between 61 employees of a University department in five different aspects: (i) coworking, (ii) having lunch together, (iii) Facebook friendship, (iv) offline friendship (having fun together), and (v) coauthor-ship.
37+
38+
#### Dataset Name: AUCS
39+
40+
#### Type: Multi-layers Graph
41+
42+
#### Layers
43+
44+
1. Facebook,UNDIRECTED
45+
2. Lunch,UNDIRECTED
46+
3. Coauthor,UNDIRECTED
47+
4. Leisure,UNDIRECTED
48+
5. Work,UNDIRECTED
49+
50+
#### ACTOR ATTRIBUTES
51+
52+
1. ResearchGroup,STRING
53+
2. Role,STRING
54+
55+
## Results
56+
57+
```console
58+
--------------------Load multilayers graph--------------------
59+
60+
Graph: lunch
61+
Number of nodes: 55
62+
Number of edges: 176
63+
64+
Graph: facebook
65+
Number of nodes: 55
66+
Number of edges: 116
67+
68+
Graph: leisure
69+
Number of nodes: 55
70+
Number of edges: 88
71+
72+
Graph: work
73+
Number of nodes: 55
74+
Number of edges: 155
75+
76+
Graph: coauthor
77+
Number of nodes: 55
78+
Number of edges: 21
79+
--------------------Perform alpha selection-------------------
80+
81+
Alpha = 0.2
82+
Density = 0.06324630230880231
83+
NMI = 0.28437039334841613
84+
85+
Alpha = 0.3
86+
Density = 0.05426587301587302
87+
NMI = 0.22851191671984766
88+
89+
Alpha = 0.4
90+
Density = 0.074259768009768
91+
NMI = 0.2724979205931001
92+
93+
Alpha = 0.5
94+
Density = 0.0838045634920635
95+
NMI = 0.24702647831111396
96+
97+
Alpha = 0.6
98+
Density = 0.05803571428571429
99+
NMI = 0.24631830263834753
100+
101+
Alpha = 0.7
102+
Density = 0.057311958874458876
103+
NMI = 0.26013012383823736
104+
105+
Alpha = 0.8
106+
Density = 0.059573412698412695
107+
NMI = 0.27394942614468387
108+
109+
Alpha = 0.9
110+
Density = 0.06098935786435787
111+
NMI = 0.2531115624498492
112+
113+
Alpha = 1.0
114+
Density = 0.08007756132756133
115+
NMI = 0.2515334583668016
116+
--------------------Multilayer Result--------------------
117+
NMI: 0.28437039334841613
118+
Purity: 0.34545454545454546
119+
120+
--------------------Single layer Result--------------------
121+
122+
Layer: lunch
123+
NMI: 0.4078232316115382
124+
Purity: 0.4909090909090909
125+
126+
Layer: facebook
127+
NMI: 0.23710067652109873
128+
Purity: 0.2909090909090909
129+
130+
Layer: leisure
131+
NMI: 0.36515019997995346
132+
Purity: 0.45454545454545453
133+
134+
Layer: work
135+
NMI: 0.3601914640378153
136+
Purity: 0.4727272727272727
137+
138+
Layer: coauthor
139+
NMI: 0.30730821666442587
140+
Purity: 0.4
141+
```
33142

34143
## Project Member:
35144

36145
1. Wen-Han Hu (whu24)
37146
2. Yang-Kai Chou (ychou3)
147+
148+
## Reference
149+
150+
1. Dong, Xiaowen, et al. "Clustering on multi-layer graphs via subspace analysis on Grassmann manifolds." IEEE Transactions on signal processing 62.4 (2013): 905-918.
151+
2. Kim, Jungeun, and Jae-Gil Lee. "Community detection in multi-layer graphs: A survey." ACM SIGMOD Record 44.3 (2015): 37-48.
152+
3. Zhang, Pan. "Evaluating accuracy of community detection using the relative normalized mutual information." Journal of Statistical Mechanics: Theory and Experiment 2015.11 (2015): P11006.

algo.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,14 @@ def SCML(G, k, alpha):
5050
kmeans = KMeans(init='k-means++', n_clusters=k,
5151
n_init=30, random_state=1).fit(U)
5252
labels = kmeans.predict(U)
53-
sse = kmeans.inertia_
54-
return labels, U, sse
53+
return labels
54+
55+
56+
def onelayer(g, k):
57+
lap = nx.normalized_laplacian_matrix(g)
58+
U = getU(lap, k).real.todense()
59+
U = preprocessing.normalize(U, axis=1, norm='l1')
60+
kmeans = KMeans(init='k-means++', n_clusters=k,
61+
n_init=30, random_state=1).fit(U)
62+
labels = kmeans.predict(U)
63+
return labels

main.py

+72-32
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,19 @@
11
import networkx as nx
22
import numpy as np
33
from algo import SCML
4+
from algo import onelayer
45
import matplotlib.pyplot as plt
56
from collections import Counter
6-
from sklearn.metrics import *
7+
from sklearn.metrics import v_measure_score as nmi
8+
from sklearn.metrics import silhouette_score
9+
from sklearn import metrics
10+
11+
12+
def purity_score(y_true, y_pred):
13+
# compute contingency matrix (also called confusion matrix)
14+
contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
15+
# return purity
16+
return np.sum(np.amax(contingency_matrix, axis=0)) / np.sum(contingency_matrix)
717

818

919
def init_graph():
@@ -12,10 +22,28 @@ def init_graph():
1222
with open(path) as f:
1323
for line in f:
1424
line = line.strip().split(',')
15-
g.add_node(line[0])
25+
if line[1] == 'NA':
26+
continue
27+
else:
28+
g.add_node(line[0])
1629
return g
1730

1831

32+
def get_truth():
33+
truth = []
34+
na_list = []
35+
path = './data/aucs_nodelist.txt'
36+
with open(path) as f:
37+
for line in f:
38+
line = line.strip().split(',')
39+
t = line[1]
40+
if t == 'NA':
41+
na_list.append(line[0])
42+
else:
43+
truth.append(int(t[-1])-1)
44+
return truth, na_list
45+
46+
1947
def plot_elbow(x, y, name):
2048
plt.figure(figsize=(8, 8))
2149
plt.plot(x, y, '-o')
@@ -41,7 +69,7 @@ def get_partition(labels, nodes):
4169

4270
def get_score(graph_list, partitions):
4371
density = np.zeros((len(graph_list), len(partitions)))
44-
conductance = np.zeros((len(graph_list), len(partitions)))
72+
# conductance = np.zeros((len(graph_list), len(partitions)))
4573
for i, g in enumerate(graph_list):
4674
for k in range(len(partitions)):
4775
g_sub = g.subgraph(partitions[k])
@@ -71,64 +99,76 @@ def main():
7199
'work': work,
72100
'coauthor': coauthor,
73101
}
102+
truth, na = get_truth()
74103

75104
# Load data into graph
76105
print("--------------------------------------------------Load multilayers graph--------------------------------------------------")
77106
with open(path) as f:
78107
for line in f:
79108
line = line.strip().split(',')
80109
name = line[2]
81-
table[name].add_edge(line[0], line[1])
110+
if line[0] in na or line[1] in na:
111+
continue
112+
else:
113+
table[name].add_edge(line[0], line[1])
82114
for name, graph in table.items():
83115
print("\nGraph: {}".format(name))
84116
print("\tNumber of nodes: {}".format(nx.number_of_nodes(graph)))
85117
print("\tNumber of edges: {}".format(nx.number_of_edges(graph)))
86118

87-
graph_list = [lunch, facebook, leisure, work, coauthor]
119+
graph_list = [lunch, work, coauthor, leisure]
88120
node_list = list(lunch.nodes)
89121

90-
# Tunning k
91-
print("--------------------------------------------------Perform k clusters selection--------------------------------------------------")
92-
sse_list = []
93-
range_k = np.arange(2, 15)
94-
for k in range_k:
95-
labels, matrix, sse = SCML(graph_list, k, 0.5)
96-
score = silhouette_score(matrix, labels, random_state=42)
97-
print("Number of clusters k = {}".format(k),
98-
",Silhouette Score = {}".format(round(score, 5)))
99-
sse_list.append(sse)
122+
# # Tunning k
123+
# print("--------------------------------------------------Perform k clusters selection--------------------------------------------------")
124+
# sse_list = []
125+
# range_k = np.arange(2, 15)
126+
# for k in range_k:
127+
# labels, sse = SCML(graph_list, k, 0.5)
128+
# score = silhouette_score(matrix, labels, random_state=42)
129+
# print("Number of clusters k = {}".format(k),
130+
# ",Silhouette Score = {}".format(round(score, 5)))
131+
# sse_list.append(sse)
100132

101-
# Plot elbow method for k
102-
plot_elbow(range_k, sse_list, "Selection of k")
133+
# # Plot elbow method for k
134+
# plot_elbow(range_k, sse_list, "Selection of k")
103135

104136
# Tunning alpha
105137
print("--------------------------------------------------Perform alpha selection--------------------------------------------------")
106138
range_a = np.arange(0.2, 1.1, 0.1)
107139
den = []
140+
nmi_list = []
108141
for alpha in range_a:
109-
labels, matrix, sse = SCML(graph_list, 8, alpha)
142+
labels = SCML(graph_list, 8, alpha)
110143
partitions = get_partition(labels, node_list)
111144
density = get_score(graph_list, partitions)
112145
den.append(density)
113-
print("Alpha = {}".format(round(alpha, 1)),
114-
", Density = {}".format(density))
146+
print("\nAlpha = {}".format(round(alpha, 1)))
147+
print("\tDensity = {}".format(density))
148+
nmi_value = nmi(truth, labels)
149+
print("\tNMI = {}".format(nmi_value))
150+
nmi_list.append(nmi_value)
115151

116152
# Plot elbow method for alpha
117-
plot_elbow(range_a, den, "Selection of alpha")
153+
plot_elbow(range_a, den, "Selection of alpha (Density)")
154+
plot_elbow(range_a, nmi_list, "Selection of alpha (NMI)")
118155

119156
# Select the best model
120-
labels, matrix, sse = SCML(graph_list, 8, 0.5)
157+
print("--------------------------------------------------Multilayer Result---------------------------------------------------")
158+
labels = SCML(graph_list, 8, 0.2)
121159
partitions = get_partition(labels, node_list)
122-
get_score(graph_list, partitions)
123-
124-
# Evaluation of clustring
125-
# print("--------------------------------------------------Clustering evaluation--------------------------------------------------")
126-
# db_index = round(davies_bouldin_score(matrix, labels), 5)
127-
# ch_index = round(calinski_harabasz_score(matrix, labels), 5)
128-
# s_coef = round(silhouette_score(matrix, labels, metric='euclidean'), 5)
129-
# print("Silhouette Score: {}".format(s_coef))
130-
# print("Davies-Bouldin Score: {}".format(db_index))
131-
# print("Calinski-Harabaz Score: {}".format(ch_index))
160+
161+
print("NMI: {}".format(nmi(truth, labels)))
162+
purity = purity_score(truth, labels)
163+
print("Purity: {}".format(purity))
164+
print("--------------------------------------------------Single layer Result--------------------------------------------------")
165+
for name, g in table.items():
166+
print("\nLayer: {}".format(name))
167+
labels = onelayer(g, 8)
168+
# print(labels)
169+
print("\tNMI: {}".format(nmi(truth, labels)))
170+
purity = purity_score(truth, labels)
171+
print("\tPurity: {}".format(purity))
132172

133173

134174
if __name__ == "__main__":

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ networkx
22
numpy
33
scipy
44
sklearn
5+
matplotlib

results/Selection of alpha.png

-40.6 KB
Binary file not shown.

results/Selection of k.png

-32.2 KB
Binary file not shown.

0 commit comments

Comments
 (0)