-
Notifications
You must be signed in to change notification settings - Fork 3
/
testDPMixture.R
61 lines (54 loc) · 1.69 KB
/
testDPMixture.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
rm(list = ls())
setwd("~/GitHub/GraphicalModels-BayesStat")
source("categorical.R")
library(mcclust)
generateDecomposableGraph = function(q,p){
while(TRUE){
graph = erdos.renyi.game(q,p,type="gnp",directed = FALSE)
graph = as_adjacency_matrix(graph, sparse = 0)
if(isDecomposable(graph)){
return(graph)
}
}
}
similarityMatrixtoClustering = function(similarityMatrix){
similarityMatrix = round(similarityMatrix)
indices = c()
for(i in n:1){
indices[similarityMatrix[i,] == 1] = i
}
indices = as.factor(indices)
levels(indices) = 1:(length(levels(indices)))
return(indices)
}
# Generate two different graphs
q = 10 # Number of variables
n_1 = 100 # Sample size for group 1
n_2 = 100 # Sample size for group 2
p = 2 / q # Edge-inclusion probability
graph1 = generateDecomposableGraph(q,p)
graph2 = generateDecomposableGraph(q,p)
x11()
plotGraph(graph1)
x11()
plotGraph(graph2)
# Generate the data
X_1 = generateCategoricalDataFromGraph(adjacencyMatrix = graph1, n.obs = n_1, n.variables = q)[[2]]
X_2 = generateCategoricalDataFromGraph(adjacencyMatrix = graph2, n.obs = n_2, n.variables = q)[[2]]
Xi = c(rep(1, n_1), rep(2, n_2)) # True clusters indicators
X = rbind(X_1, X_2) # Dataset
n = n_1 + n_2 # Total sample size
# Run the MCMC
n.iter = 2500
burnin = 500
a_pi = 1
b_pi = 2*(q-2)/3
a_alpha = 1
b_alpha = 3
# mcmc = DPMixture(data = X, n.iter, burnin, a_alpha, b_alpha, a_pi, b_pi)
mcmc = DPMixture_Efficient(data = X, n.iter, burnin, a_alpha, b_alpha, a_pi, b_pi)
# Cluster estimates
similarityMatrix = mcmc$similarityMatrix
clustering = similarityMatrixtoClustering(similarityMatrix)
# Variation of Information (VI) between true and estimated clustering
vi.dist(Xi,clustering)