@@ -44,109 +44,155 @@ colSums(ECP.EDN.MG94[26:33, ] + ECP.EDN.MG94[34:41, ])/colSums(ECP.EDN.MG94[42:4
44
44
45
45
```
46
46
47
- \newpage
48
- 04212017 update
49
-
50
- Now plot posterior log likelihood ratio: $ln(\frac{Pr(S_i = 1 | x)}{Pr(S_i = 0 | x)}$.
51
-
52
- The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_ {tract}}}}$ for
53
- the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_ {tract}}}^2}$ for
54
- second order.
55
-
56
- The variance is calculated by:
57
- $Var(ln(p_ {tract}))=\frac{1}{I(ln(p_ {tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_ {tract}}}^2}}$
58
-
59
- 95% confidence interval for $ln(p_ {tract})$ is $ln(p_ {tract}) \pm 1.96 * \sqrt {Var(\ln ({p_ {tract}}))}$
60
-
61
- By transforming to $3.0 / p_ {tract}$ to get the average tract length in nucleotide.
62
-
63
- ``` {r}
64
- # plot one paralog
65
- paralog = "EDN_ECP"
66
- lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_log_posterior_ratio.txt", sep = "")))
67
- Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_Viterbi_path.txt", sep = "")))
68
- lnL.surface <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_lnL_surface.txt", sep = "")))
69
- IGC.sw.lnL <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_sw_lnL.txt", sep = "")))
70
- Force.sw.lnL <- as.vector(read.table(paste("./summary/Force_", paralog, "_MG94_nonclock_sw_lnL.txt", sep = "")))
71
-
72
- plot(lnL.ratio[, 1], xlab = "codon number", ylab = "log values",
73
- type = "l", col = "black", lty = 1,
74
- main = paste(paralog, " HMM result"),
75
- ylim = c(min(-0.5, min(lnL.ratio)), max(lnL.ratio)))
76
- lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red")
77
- #lines(1:dim(IGC.sw.lnL)[1], IGC.sw.lnL[, 2] - Force.sw.lnL[, 2], type = "l", lty = 3, col = "red")
78
- legend(1, max(lnL.ratio), legend = c("log posterior ratio", "Viterbi path"),
79
- lty = c(1, 2), col = c( "black", "red"))
80
-
81
- plot(-lnL.surface[, 1], xlab = "tract length in nucleotide", ylab= "lnL", type = "l", col = "black", lty = 1,
82
- main = paste(paralog, " lnL surface"))
83
-
84
- summary.mat <- read.table("./HMM_tract_MG94_nonclock_summary.txt")
85
-
86
- # Now calculate standard deviation of lnP
87
- lnP <- log(3.0 / summary.mat[, 3])
88
- sd.lnP <- 1.0 / sqrt(-summary.mat[, 7])
89
- low.cf <- exp(lnP - 1.96 * sd.lnP)
90
- up.cf <- exp(lnP + 1.96 * sd.lnP)
91
- up.cf[up.cf > 1] <- 1.0
92
- summary.mat <- cbind(summary.mat, 3.0 / up.cf, 3.0 / low.cf)
93
- rownames(summary.mat) <- c("EDN_ECP")
94
- colnames(summary.mat) <- c("lnL", "max lnL", "tract length",
95
- "Pr(S_0)", "Pr(S_1)",
96
- "df", "d^2f", "c.i. tract length", "c.i tract length")
97
- summary.mat
98
- ```
47
+ <!-- \newpage -->
48
+ <!-- 04212017 update -->
49
+
50
+ <!-- Now plot posterior log likelihood ratio: $ln(\frac{Pr(S_i = 1 | x)}{Pr(S_i = 0 | x)}$. -->
51
+
52
+ <!-- The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_{tract}}}}$ for -->
53
+ <!-- the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}$ for -->
54
+ <!-- second order. -->
55
+
56
+ <!-- The variance is calculated by: -->
57
+ <!-- $Var(ln(p_{tract}))=\frac{1}{I(ln(p_{tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}}$ -->
58
+
59
+ <!-- 95% confidence interval for $ln(p_{tract})$ is $ln(p_{tract}) \pm 1.96 * \sqrt {Var(\ln ({p_{tract}}))}$ -->
60
+
61
+ <!-- By transforming to $3.0 / p_{tract}$ to get the average tract length in nucleotide. -->
62
+
63
+ <!-- ```{r} -->
64
+ <!-- # plot one paralog -->
65
+ <!-- paralog = "EDN_ECP" -->
66
+ <!-- lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_log_posterior_ratio.txt", sep = ""))) -->
67
+ <!-- Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_Viterbi_path.txt", sep = ""))) -->
68
+ <!-- lnL.surface <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_lnL_surface.txt", sep = ""))) -->
69
+ <!-- IGC.sw.lnL <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_sw_lnL.txt", sep = ""))) -->
70
+ <!-- Force.sw.lnL <- as.vector(read.table(paste("./summary/Force_", paralog, "_MG94_nonclock_sw_lnL.txt", sep = ""))) -->
71
+
72
+ <!-- plot(lnL.ratio[, 1], xlab = "codon number", ylab = "log values", -->
73
+ <!-- type = "l", col = "black", lty = 1, -->
74
+ <!-- main = paste(paralog, " HMM result"), -->
75
+ <!-- ylim = c(min(-0.5, min(lnL.ratio)), max(lnL.ratio))) -->
76
+ <!-- lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red") -->
77
+ <!-- #lines(1:dim(IGC.sw.lnL)[1], IGC.sw.lnL[, 2] - Force.sw.lnL[, 2], type = "l", lty = 3, col = "red") -->
78
+ <!-- legend(1, max(lnL.ratio), legend = c("log posterior ratio", "Viterbi path"), -->
79
+ <!-- lty = c(1, 2), col = c( "black", "red")) -->
80
+
81
+ <!-- plot(-lnL.surface[, 1], xlab = "tract length in nucleotide", ylab= "lnL", type = "l", col = "black", lty = 1, -->
82
+ <!-- main = paste(paralog, " lnL surface")) -->
83
+
84
+ <!-- summary.mat <- read.table("./HMM_tract_MG94_nonclock_summary.txt") -->
85
+
86
+ <!-- # Now calculate standard deviation of lnP -->
87
+ <!-- lnP <- log(3.0 / summary.mat[, 3]) -->
88
+ <!-- sd.lnP <- 1.0 / sqrt(-summary.mat[, 7]) -->
89
+ <!-- low.cf <- exp(lnP - 1.96 * sd.lnP) -->
90
+ <!-- up.cf <- exp(lnP + 1.96 * sd.lnP) -->
91
+ <!-- up.cf[up.cf > 1] <- 1.0 -->
92
+ <!-- summary.mat <- cbind(summary.mat, 3.0 / up.cf, 3.0 / low.cf) -->
93
+ <!-- rownames(summary.mat) <- c("EDN_ECP") -->
94
+ <!-- colnames(summary.mat) <- c("lnL", "max lnL", "tract length", -->
95
+ <!-- "Pr(S_0)", "Pr(S_1)", -->
96
+ <!-- "df", "d^2f", "c.i. tract length", "c.i tract length") -->
97
+ <!-- summary.mat -->
98
+ <!-- ``` -->
99
+
100
+ <!-- \newpage -->
101
+ <!-- 09132017 update -->
102
+
103
+ <!-- Now plot lnL surface of MG94+IS-IGC+HMM model. -->
104
+
105
+ <!-- The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_{tract}}}}$ for -->
106
+ <!-- the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}$ for -->
107
+ <!-- second order. -->
108
+
109
+ <!-- The variance is calculated by: -->
110
+ <!-- $Var(ln(p_{tract}))=\frac{1}{I(ln(p_{tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}}$ -->
111
+
112
+ <!-- 95% confidence interval for $ln(p_{tract})$ is $ln(p_{tract}) \pm 1.96 * \sqrt {Var(\ln ({p_{tract}}))}$ -->
113
+
114
+ <!-- By transforming to $3.0 / p_{tract}$ to get the average tract length in nucleotide. -->
115
+
116
+
117
+ <!-- ```{r} -->
118
+ <!-- # plot one paralog -->
119
+ <!-- paralog = "EDN_ECP" -->
120
+ <!-- lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Posterior_lnL.txt", sep = ""))) -->
121
+ <!-- Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Viterbi_array.txt", sep = ""))) -->
122
+ <!-- lnL.surface <- as.vector(read.table(paste("./plot/HMM_", paralog, "_lnL_1D_surface.txt", sep = ""))) -->
123
+
124
+ <!-- plot(1:dim(lnL.ratio)[1], lnL.ratio[, 2] - lnL.ratio[, 1], xlab = "codon number", ylab = "log values", -->
125
+ <!-- type = "l", col = "black", lty = 1, -->
126
+ <!-- main = paste(paralog, " HMM result"), -->
127
+ <!-- ylim = c(min(-0.5, min(lnL.ratio[, 2] - lnL.ratio[, 1])), max(lnL.ratio[, 2] - lnL.ratio[, 1]))) -->
128
+ <!-- lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red") -->
129
+ <!-- legend(1, max(lnL.ratio[, 2] - lnL.ratio[, 1]), legend = c("log posterior ratio", "Viterbi path"), -->
130
+ <!-- lty = c(1, 2), col = c( "black", "red")) -->
131
+
132
+ <!-- plot.new() -->
133
+ <!-- plot(-lnL.surface[, 1],lnL.surface[, 2], xlab = "log tract length in codon", ylab= "lnL", type = "l", col = "black", lty = 1, -->
134
+ <!-- main = paste(paralog, " lnL surface")) -->
135
+ <!-- abline(v = log(6.7/3.), col = "red") -->
136
+
137
+ <!-- summary.mat <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_1D_summary.txt") -->
138
+ <!-- hessian <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_Hessian.txt") -->
139
+
140
+ <!-- # Now calculate standard deviation of lnP -->
141
+ <!-- lnP <- log(summary.mat[10,1]) -->
142
+ <!-- sd.lnP <- 1.0 / sqrt(-hessian[2, 1]) -->
143
+ <!-- low.cf <- exp(lnP - 1.96 * sd.lnP) -->
144
+ <!-- up.cf <- exp(lnP + 1.96 * sd.lnP) -->
145
+ <!-- up.cf[up.cf > 1] <- 1.0 -->
146
+ <!-- show.mat <- matrix(c(summary.mat[1,1], max(-lnL.surface[, 2]), 3.0/summary.mat[10, 1], -->
147
+ <!-- hessian[1,1], hessian[2,1], 3.0 / up.cf, 3.0 / low.cf), 1, 7) -->
148
+ <!-- rownames(show.mat) <- c("EDN_ECP") -->
149
+ <!-- colnames(show.mat) <- c("lnL", "max lnL", "tract length", -->
150
+ <!-- "df", "d^2f", "c.i. tract length", "c.i tract length") -->
151
+ <!-- show.mat -->
152
+ <!-- ``` -->
99
153
100
154
\newpage
101
- 09132017 update
102
-
103
- Now plot lnL surface of MG94+IS-IGC+HMM model.
104
-
105
- The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_ {tract}}}}$ for
106
- the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_ {tract}}}^2}$ for
107
- second order.
108
-
109
- The variance is calculated by:
110
- $Var(ln(p_ {tract}))=\frac{1}{I(ln(p_ {tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_ {tract}}}^2}}$
111
-
112
- 95% confidence interval for $ln(p_ {tract})$ is $ln(p_ {tract}) \pm 1.96 * \sqrt {Var(\ln ({p_ {tract}}))}$
113
-
114
- By transforming to $3.0 / p_ {tract}$ to get the average tract length in nucleotide.
115
-
155
+ 12212017 update
116
156
157
+ HKY+PS-IGC results
117
158
``` {r}
118
- # plot one paralog
119
- paralog = "EDN_ECP"
120
- lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Posterior_lnL.txt", sep = "")))
121
- Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Viterbi_array.txt", sep = "")))
122
- lnL.surface <- as.vector(read.table(paste("./plot/HMM_", paralog, "_lnL_1D_surface.txt", sep = "")))
123
-
124
- plot(1:dim(lnL.ratio)[1], lnL.ratio[, 2] - lnL.ratio[, 1], xlab = "codon number", ylab = "log values",
125
- type = "l", col = "black", lty = 1,
126
- main = paste(paralog, " HMM result"),
127
- ylim = c(min(-0.5, min(lnL.ratio[, 2] - lnL.ratio[, 1])), max(lnL.ratio[, 2] - lnL.ratio[, 1])))
128
- lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red")
129
- legend(1, max(lnL.ratio[, 2] - lnL.ratio[, 1]), legend = c("log posterior ratio", "Viterbi path"),
130
- lty = c(1, 2), col = c( "black", "red"))
131
-
132
- plot.new()
133
- plot(-lnL.surface[, 1],lnL.surface[, 2], xlab = "log tract length in codon", ylab= "lnL", type = "l", col = "black", lty = 1,
134
- main = paste(paralog, " lnL surface"))
135
- abline(v = log(6.7/3.), col = "red")
136
-
137
- summary.mat <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_1D_summary.txt")
138
- hessian <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_Hessian.txt")
139
-
140
- # Now calculate standard deviation of lnP
141
- lnP <- log(summary.mat[10,1])
142
- sd.lnP <- 1.0 / sqrt(-hessian[2, 1])
143
- low.cf <- exp(lnP - 1.96 * sd.lnP)
144
- up.cf <- exp(lnP + 1.96 * sd.lnP)
145
- up.cf[up.cf > 1] <- 1.0
146
- show.mat <- matrix(c(summary.mat[1,1], max(-lnL.surface[, 2]), 3.0/summary.mat[10, 1],
147
- hessian[1,1], hessian[2,1], 3.0 / up.cf, 3.0 / low.cf), 1, 7)
148
- rownames(show.mat) <- c("EDN_ECP")
149
- colnames(show.mat) <- c("lnL", "max lnL", "tract length",
150
- "df", "d^2f", "c.i. tract length", "c.i tract length")
151
- show.mat
159
+ all <- readLines("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_rv_SCOK_nonclock_summary.txt", n = -1)
160
+ col.names <- "Guess_1"
161
+ row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
162
+ EDN.ECP.guess.1.result <- as.matrix(read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_rv_SCOK_nonclock_summary.txt", row.names = row.names, col.names = col.names))
163
+
164
+ all <- readLines("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_rv_SCOK_nonclock_summary.txt", n = -1)
165
+ col.names <- "Guess_2"
166
+ row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
167
+ EDN.ECP.guess.2.result <- as.matrix(read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_rv_SCOK_nonclock_summary.txt", row.names = row.names, col.names = col.names))
168
+
169
+ gradient.file <- "./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_nonclock_gradient.txt"
170
+ gradient.list <- read.table(gradient.file)
171
+ cat("Verify gradient ~ 0: Gradient = ", colSums(gradient.list), ". Gradient/Objective = ", colSums(gradient.list)/EDN.ECP.guess.1.result["ll",1], "\n")
172
+ Godambe.matrix.guess.1 <- read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_nonclock_godambe.txt")
173
+
174
+ gradient.file <- "./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_nonclock_gradient.txt"
175
+ gradient.list <- read.table(gradient.file)
176
+ cat("Verify gradient ~ 0: Gradient = ", colSums(gradient.list), ". Gradient/Objective = ", colSums(gradient.list)/EDN.ECP.guess.2.result["ll",1], "\n")
177
+ Godambe.matrix.guess.2 <- read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_nonclock_godambe.txt")
178
+
179
+ Results <- cbind(EDN.ECP.guess.1.result, EDN.ECP.guess.2.result)
180
+ Results
181
+
182
+ Godambe.inverse <- cbind(c(solve(Godambe.matrix.guess.1)/dim(gradient.list)[1]),
183
+ c(solve(Godambe.matrix.guess.2)/dim(gradient.list)[1]))
184
+ Godambe.inverse
185
+
186
+ # Guess 2 has higher lnL
187
+ which.max(Results["ll",])
188
+ # effective Tau
189
+ Results["init_rate", ] * Results["tract_length", ] * 3 / (1.0 + colSums(Results[c("r2", "r3"), ]))
190
+ Results["init_rate", ] * Results["tract_length", ] * 3 /exp(1.96*sqrt(Godambe.inverse[1, ])) / (1.0 + colSums(Results[c("r2", "r3"), ]))
191
+ Results["init_rate", ] * Results["tract_length", ] * 3 *exp(1.96*sqrt(Godambe.inverse[1, ])) / (1.0 + colSums(Results[c("r2", "r3"), ]))
192
+
193
+ # Tract length
194
+ Results["tract_length", ]
195
+ exp(log(Results["tract_length", ]-1.0)-1.96*sqrt(Godambe.inverse[4,]))+1.0
196
+ exp(log(Results["tract_length", ]-1.0)+1.96*sqrt(Godambe.inverse[4,]))+1.0
152
197
```
198
+
0 commit comments