forked from CrawlScript/tf_geometric
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdemo_dgi.py
142 lines (109 loc) · 4.47 KB
/
demo_dgi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# coding=utf-8
import os
from tf_geometric.utils import tf_utils
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tf_geometric as tfg
import tensorflow as tf
from tensorflow import keras
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
graph, (train_index, valid_index, test_index) = tfg.datasets.CoraDataset().load_data()
num_classes = graph.y.max() + 1
embedding_size = 512
drop_rate = 0.0
dropout = keras.layers.Dropout(drop_rate)
class GCNNetwork(keras.Model):
"""
1-layer GCN model
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.gcn = tfg.layers.GCN(
embedding_size,
activation=tf.keras.layers.PReLU(alpha_initializer=tf.keras.initializers.Constant(value=0.25))
)
def call(self, inputs, training=None, mask=None, cache=None):
x, edge_index = inputs
h = self.gcn([x, edge_index], cache=cache)
return h
class Bilinear(keras.Model):
"""
Bilinear Model for DGL loss
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.dense = None
self.bias = tf.Variable(0.0)
def build(self, input_shapes):
self.dense = tf.keras.layers.Dense(input_shapes[1][-1], use_bias=False)
def call(self, inputs, training=None, mask=None, cache=None):
a, b = inputs
h = tf.reduce_sum(self.dense(a) * b, axis=-1) + self.bias
return h
# GCN-based Encoder
model = GCNNetwork()
# Bilinear Model for DGL loss
bilinear_model = Bilinear()
def encode(graph, permutation=False, training=False):
# permute nodes to create a noisy graph for negative sampling
if permutation:
perm_index = np.random.permutation(graph.x.shape[0])
x = tf.gather(graph.x, perm_index)
else:
x = graph.x
h = model([x, graph.edge_index], cache=graph.cache, training=training)
return h
# Fast evaluation with sklearn
def evaluate_with_sklearn():
embedded = encode(graph)
embeddings = embedded.numpy()
train_X, train_Y, test_X, test_Y = embeddings[train_index], graph.y[train_index], embeddings[test_index], graph.y[test_index]
cls = LogisticRegression(C=10000, max_iter=500)
cls.fit(train_X, train_Y)
pred_Y = cls.predict(test_X)
micro_f1 = f1_score(test_Y, pred_Y, average="micro")
return micro_f1
# Following https://github.com/PetarV-/DGI, we train a full-connected layer for evaluation, which is slow
def evaluate_with_tf_keras():
model = tf.keras.Sequential([
tf.keras.layers.Dense(num_classes, activation="softmax")
])
model.compile(loss=tf.keras.losses.categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2))
embedded = encode(graph)
embeddings = embedded.numpy()
train_X, train_Y, test_X, test_Y = embeddings[train_index], graph.y[train_index], embeddings[test_index], graph.y[test_index]
model.fit(train_X, tf.one_hot(train_Y, depth=num_classes).numpy(), epochs=100, verbose=False)
pred_Y = tf.argmax(model(test_X), axis=-1)
micro_f1 = f1_score(test_Y, pred_Y, average="micro")
return micro_f1
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
for step in range(301):
with tf.GradientTape() as tape:
# positive node representations
pos_h = encode(graph, permutation=False, training=True)
# negative node representations
neg_h = encode(graph, permutation=True, training=True)
# positive graph representations
pos_graph_h = tf.nn.sigmoid(tf.reduce_mean(pos_h, axis=0, keepdims=True))
pos_logits = bilinear_model([pos_h, pos_graph_h], training=True)
neg_logits = bilinear_model([neg_h, pos_graph_h], training=True)
pos_losses = tf.nn.sigmoid_cross_entropy_with_logits(
logits=pos_logits,
labels=tf.ones_like(pos_logits)
)
neg_losses = tf.nn.sigmoid_cross_entropy_with_logits(
logits=neg_logits,
labels=tf.zeros_like(neg_logits)
)
# DGI loss
loss = tf.reduce_mean(pos_losses + neg_losses)
vars = tape.watched_variables()
grads = tape.gradient(loss, vars)
optimizer.apply_gradients(zip(grads, vars))
if step % 50 == 0:
micro_f1 = evaluate_with_sklearn()
print("step = {}\tloss = {}\tmicro_f1 = {}".format(step, loss, micro_f1))
print("start final evaluation with tf.keras ......")
micro_f1 = evaluate_with_tf_keras()
print("final micro_f1 = {}".format(micro_f1))