-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdata.py
146 lines (112 loc) · 4.65 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import networkx as nx
import torch
import torch_geometric
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
def standardize_matrix(matrix):
# Mean and standard deviation for each dimension
mean = np.mean(matrix, axis=0)
std = np.std(matrix, axis=0)
# Standardize the matrix
eps=1e-7
standardized_matrix = (matrix - mean) / (std+eps)
return standardized_matrix
# get the feature matrix from a network
def get_feature(G):
degrees = np.array(list(dict(G.degree()).values())).astype('float')[:,None]
degree_diffs = []
degree_chis = []
for node in G.nodes():
# Degree of the node
node_degree = G.degree(node)
# Degrees of neighbors
neighbors_degrees = [G.degree(neighbor) for neighbor in G.neighbors(node)]
# Mean and standard deviation of neighbors' degrees
mean_neighbors_degree = np.mean(neighbors_degrees) if neighbors_degrees else 0
dc = (mean_neighbors_degree-node_degree)**2/mean_neighbors_degree
degree_chis.append(dc)
degree_diffs.append(node_degree-mean_neighbors_degree)
degree_chis = np.array(degree_chis)[:,None]
degree_diffs = np.array(degree_diffs)[:,None]
clustering_coefficients = np.array(list(nx.clustering(G).values()))[:,None]
core_numbers = np.array(list(nx.core_number(G).values())).astype('float')[:,None]
node_features = np.concatenate((degrees,degree_diffs,degree_chis,clustering_coefficients,core_numbers),axis=1)
normed_feas = standardize_matrix(node_features)
return torch.from_numpy(normed_feas)
# get the partition function
def get_zt(padj):
# padj = padj*(1-torch.eye(padj.shape[0]).to(device))
ts = [0.01,0.02,0.04,0.08,0.16,0.32,0.64,1.28,2.56,5.12,10.24]
cal_points = len(ts)
ts = torch.tensor(ts).unsqueeze(1).repeat(1,padj.shape[0]).to(device)
D = torch.sum(padj,dim=1).unsqueeze(1).repeat(1,padj.shape[0])
D = D*torch.eye(D.shape[0]).to(device)
L = D-padj
evl,evc = torch.linalg.eig(L)
evl = evl.unsqueeze(0).repeat(cal_points,1)
zts = torch.exp(-evl*ts)
zts = torch.sum(zts,dim=1) / padj.shape[0]
return zts
# get the partition function
def get_zt_ts(padj,ts):
cal_points = len(ts)
ts = torch.tensor(ts).unsqueeze(1).repeat(1,padj.shape[0]).to(device)
D = torch.sum(padj,dim=1).unsqueeze(1).repeat(1,padj.shape[0])
D = D*torch.eye(D.shape[0]).to(device)
L = D-padj
evl,evc = torch.linalg.eig(L)
evl = evl.unsqueeze(0).repeat(cal_points,1)
zts = torch.exp(-evl*ts)
zts = torch.sum(zts,dim=1) / padj.shape[0]
return zts
def read_mtx(file_path):
# 读取文件
with open(file_path, 'r') as file:
lines = file.readlines()
for idx in range(len(lines)):
if '%' not in lines[idx]:
x = idx
y = idx+1
break
# 第x行包含节点数,这里假设矩阵是方阵
num_nodes = int(lines[x].split()[0])
# 初始化行、列和数据列表
rows, cols, data = [], [], []
# 遍历文件的每一行,跳过第一行
for line in lines[y:]:
row, col = map(int, line.split()[:2])
# 因为.mtx格式是从1开始计数,Python是从0开始,所以要减1
rows.append(row - 1)
cols.append(col - 1)
data.append(1) # 假设连接的权重为1
nn = max(np.max(rows),np.max(cols))+1
adj = np.zeros([nn,nn])
adj[rows,cols] = 1
G = nx.from_numpy_array(adj)
# 创建稀疏矩阵的邻接矩阵
# adjacency_matrix = coo_matrix((data, (rows, cols)), shape=(num_nodes, num_nodes))
# # 将邻接矩阵转换为CSR格式以便高效查询
# adjacency_matrix_csr = csr_matrix(adjacency_matrix)
# G = nx.from_scipy_sparse_matrix(adjacency_matrix_csr)
return G
def read_edges_file(file_path):
with open(file_path, 'r') as file:
edges = []
for line in file:
if '%' not in line:
if ',' in line:
edges.append(line.strip().split(','))
elif ' ' in line:
edges.append(line.strip().split())
# edges = [ for line in file]
# 假设节点从0开始编号
max_node = max([max(int(edge[0]), int(edge[1])) for edge in edges])
# 创建一个空的邻接矩阵
adjacency_matrix = np.zeros((max_node + 1, max_node + 1), dtype=int)
# 填充邻接矩阵
for edge in edges:
i, j = int(edge[0]), int(edge[1])
adjacency_matrix[i, j] = 1
adjacency_matrix[j, i] = 1 # 如果是无向图,需要这一行
# matrix = csr_matrix(adjacency_matrix)
return nx.from_numpy_array(adjacency_matrix)