Skip to content

Commit 80acc2a

Browse files
author
xji3
committed
update rmd
1 parent 753f68c commit 80acc2a

File tree

6 files changed

+147
-101
lines changed

6 files changed

+147
-101
lines changed

.DS_Store

0 Bytes
Binary file not shown.

EDN_ECP_summary.Rmd

Lines changed: 147 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -44,109 +44,155 @@ colSums(ECP.EDN.MG94[26:33, ] + ECP.EDN.MG94[34:41, ])/colSums(ECP.EDN.MG94[42:4
4444
4545
```
4646

47-
\newpage
48-
04212017 update
49-
50-
Now plot posterior log likelihood ratio: $ln(\frac{Pr(S_i = 1 | x)}{Pr(S_i = 0 | x)}$.
51-
52-
The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_{tract}}}}$ for
53-
the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}$ for
54-
second order.
55-
56-
The variance is calculated by:
57-
$Var(ln(p_{tract}))=\frac{1}{I(ln(p_{tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}}$
58-
59-
95% confidence interval for $ln(p_{tract})$ is $ln(p_{tract}) \pm 1.96 * \sqrt {Var(\ln ({p_{tract}}))}$
60-
61-
By transforming to $3.0 / p_{tract}$ to get the average tract length in nucleotide.
62-
63-
```{r}
64-
# plot one paralog
65-
paralog = "EDN_ECP"
66-
lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_log_posterior_ratio.txt", sep = "")))
67-
Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_Viterbi_path.txt", sep = "")))
68-
lnL.surface <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_lnL_surface.txt", sep = "")))
69-
IGC.sw.lnL <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_sw_lnL.txt", sep = "")))
70-
Force.sw.lnL <- as.vector(read.table(paste("./summary/Force_", paralog, "_MG94_nonclock_sw_lnL.txt", sep = "")))
71-
72-
plot(lnL.ratio[, 1], xlab = "codon number", ylab = "log values",
73-
type = "l", col = "black", lty = 1,
74-
main = paste(paralog, " HMM result"),
75-
ylim = c(min(-0.5, min(lnL.ratio)), max(lnL.ratio)))
76-
lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red")
77-
#lines(1:dim(IGC.sw.lnL)[1], IGC.sw.lnL[, 2] - Force.sw.lnL[, 2], type = "l", lty = 3, col = "red")
78-
legend(1, max(lnL.ratio), legend = c("log posterior ratio", "Viterbi path"),
79-
lty = c(1, 2), col = c( "black", "red"))
80-
81-
plot(-lnL.surface[, 1], xlab = "tract length in nucleotide", ylab= "lnL", type = "l", col = "black", lty = 1,
82-
main = paste(paralog, " lnL surface"))
83-
84-
summary.mat <- read.table("./HMM_tract_MG94_nonclock_summary.txt")
85-
86-
# Now calculate standard deviation of lnP
87-
lnP <- log(3.0 / summary.mat[, 3])
88-
sd.lnP <- 1.0 / sqrt(-summary.mat[, 7])
89-
low.cf <- exp(lnP - 1.96 * sd.lnP)
90-
up.cf <- exp(lnP + 1.96 * sd.lnP)
91-
up.cf[up.cf > 1] <- 1.0
92-
summary.mat <- cbind(summary.mat, 3.0 / up.cf, 3.0 / low.cf)
93-
rownames(summary.mat) <- c("EDN_ECP")
94-
colnames(summary.mat) <- c("lnL", "max lnL", "tract length",
95-
"Pr(S_0)", "Pr(S_1)",
96-
"df", "d^2f", "c.i. tract length", "c.i tract length")
97-
summary.mat
98-
```
47+
<!-- \newpage -->
48+
<!-- 04212017 update -->
49+
50+
<!-- Now plot posterior log likelihood ratio: $ln(\frac{Pr(S_i = 1 | x)}{Pr(S_i = 0 | x)}$. -->
51+
52+
<!-- The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_{tract}}}}$ for -->
53+
<!-- the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}$ for -->
54+
<!-- second order. -->
55+
56+
<!-- The variance is calculated by: -->
57+
<!-- $Var(ln(p_{tract}))=\frac{1}{I(ln(p_{tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}}$ -->
58+
59+
<!-- 95% confidence interval for $ln(p_{tract})$ is $ln(p_{tract}) \pm 1.96 * \sqrt {Var(\ln ({p_{tract}}))}$ -->
60+
61+
<!-- By transforming to $3.0 / p_{tract}$ to get the average tract length in nucleotide. -->
62+
63+
<!-- ```{r} -->
64+
<!-- # plot one paralog -->
65+
<!-- paralog = "EDN_ECP" -->
66+
<!-- lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_log_posterior_ratio.txt", sep = ""))) -->
67+
<!-- Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_Viterbi_path.txt", sep = ""))) -->
68+
<!-- lnL.surface <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_lnL_surface.txt", sep = ""))) -->
69+
<!-- IGC.sw.lnL <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_sw_lnL.txt", sep = ""))) -->
70+
<!-- Force.sw.lnL <- as.vector(read.table(paste("./summary/Force_", paralog, "_MG94_nonclock_sw_lnL.txt", sep = ""))) -->
71+
72+
<!-- plot(lnL.ratio[, 1], xlab = "codon number", ylab = "log values", -->
73+
<!-- type = "l", col = "black", lty = 1, -->
74+
<!-- main = paste(paralog, " HMM result"), -->
75+
<!-- ylim = c(min(-0.5, min(lnL.ratio)), max(lnL.ratio))) -->
76+
<!-- lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red") -->
77+
<!-- #lines(1:dim(IGC.sw.lnL)[1], IGC.sw.lnL[, 2] - Force.sw.lnL[, 2], type = "l", lty = 3, col = "red") -->
78+
<!-- legend(1, max(lnL.ratio), legend = c("log posterior ratio", "Viterbi path"), -->
79+
<!-- lty = c(1, 2), col = c( "black", "red")) -->
80+
81+
<!-- plot(-lnL.surface[, 1], xlab = "tract length in nucleotide", ylab= "lnL", type = "l", col = "black", lty = 1, -->
82+
<!-- main = paste(paralog, " lnL surface")) -->
83+
84+
<!-- summary.mat <- read.table("./HMM_tract_MG94_nonclock_summary.txt") -->
85+
86+
<!-- # Now calculate standard deviation of lnP -->
87+
<!-- lnP <- log(3.0 / summary.mat[, 3]) -->
88+
<!-- sd.lnP <- 1.0 / sqrt(-summary.mat[, 7]) -->
89+
<!-- low.cf <- exp(lnP - 1.96 * sd.lnP) -->
90+
<!-- up.cf <- exp(lnP + 1.96 * sd.lnP) -->
91+
<!-- up.cf[up.cf > 1] <- 1.0 -->
92+
<!-- summary.mat <- cbind(summary.mat, 3.0 / up.cf, 3.0 / low.cf) -->
93+
<!-- rownames(summary.mat) <- c("EDN_ECP") -->
94+
<!-- colnames(summary.mat) <- c("lnL", "max lnL", "tract length", -->
95+
<!-- "Pr(S_0)", "Pr(S_1)", -->
96+
<!-- "df", "d^2f", "c.i. tract length", "c.i tract length") -->
97+
<!-- summary.mat -->
98+
<!-- ``` -->
99+
100+
<!-- \newpage -->
101+
<!-- 09132017 update -->
102+
103+
<!-- Now plot lnL surface of MG94+IS-IGC+HMM model. -->
104+
105+
<!-- The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_{tract}}}}$ for -->
106+
<!-- the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}$ for -->
107+
<!-- second order. -->
108+
109+
<!-- The variance is calculated by: -->
110+
<!-- $Var(ln(p_{tract}))=\frac{1}{I(ln(p_{tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}}$ -->
111+
112+
<!-- 95% confidence interval for $ln(p_{tract})$ is $ln(p_{tract}) \pm 1.96 * \sqrt {Var(\ln ({p_{tract}}))}$ -->
113+
114+
<!-- By transforming to $3.0 / p_{tract}$ to get the average tract length in nucleotide. -->
115+
116+
117+
<!-- ```{r} -->
118+
<!-- # plot one paralog -->
119+
<!-- paralog = "EDN_ECP" -->
120+
<!-- lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Posterior_lnL.txt", sep = ""))) -->
121+
<!-- Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Viterbi_array.txt", sep = ""))) -->
122+
<!-- lnL.surface <- as.vector(read.table(paste("./plot/HMM_", paralog, "_lnL_1D_surface.txt", sep = ""))) -->
123+
124+
<!-- plot(1:dim(lnL.ratio)[1], lnL.ratio[, 2] - lnL.ratio[, 1], xlab = "codon number", ylab = "log values", -->
125+
<!-- type = "l", col = "black", lty = 1, -->
126+
<!-- main = paste(paralog, " HMM result"), -->
127+
<!-- ylim = c(min(-0.5, min(lnL.ratio[, 2] - lnL.ratio[, 1])), max(lnL.ratio[, 2] - lnL.ratio[, 1]))) -->
128+
<!-- lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red") -->
129+
<!-- legend(1, max(lnL.ratio[, 2] - lnL.ratio[, 1]), legend = c("log posterior ratio", "Viterbi path"), -->
130+
<!-- lty = c(1, 2), col = c( "black", "red")) -->
131+
132+
<!-- plot.new() -->
133+
<!-- plot(-lnL.surface[, 1],lnL.surface[, 2], xlab = "log tract length in codon", ylab= "lnL", type = "l", col = "black", lty = 1, -->
134+
<!-- main = paste(paralog, " lnL surface")) -->
135+
<!-- abline(v = log(6.7/3.), col = "red") -->
136+
137+
<!-- summary.mat <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_1D_summary.txt") -->
138+
<!-- hessian <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_Hessian.txt") -->
139+
140+
<!-- # Now calculate standard deviation of lnP -->
141+
<!-- lnP <- log(summary.mat[10,1]) -->
142+
<!-- sd.lnP <- 1.0 / sqrt(-hessian[2, 1]) -->
143+
<!-- low.cf <- exp(lnP - 1.96 * sd.lnP) -->
144+
<!-- up.cf <- exp(lnP + 1.96 * sd.lnP) -->
145+
<!-- up.cf[up.cf > 1] <- 1.0 -->
146+
<!-- show.mat <- matrix(c(summary.mat[1,1], max(-lnL.surface[, 2]), 3.0/summary.mat[10, 1], -->
147+
<!-- hessian[1,1], hessian[2,1], 3.0 / up.cf, 3.0 / low.cf), 1, 7) -->
148+
<!-- rownames(show.mat) <- c("EDN_ECP") -->
149+
<!-- colnames(show.mat) <- c("lnL", "max lnL", "tract length", -->
150+
<!-- "df", "d^2f", "c.i. tract length", "c.i tract length") -->
151+
<!-- show.mat -->
152+
<!-- ``` -->
99153

100154
\newpage
101-
09132017 update
102-
103-
Now plot lnL surface of MG94+IS-IGC+HMM model.
104-
105-
The derivatives are $\frac{{\partial \ln L}}{{\partial \ln {p_{tract}}}}$ for
106-
the first order and $\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}$ for
107-
second order.
108-
109-
The variance is calculated by:
110-
$Var(ln(p_{tract}))=\frac{1}{I(ln(p_{tract}))} \approx - \frac{1}{\frac{{\partial^2 \ln L}}{{\partial \ln {p_{tract}}}^2}}$
111-
112-
95% confidence interval for $ln(p_{tract})$ is $ln(p_{tract}) \pm 1.96 * \sqrt {Var(\ln ({p_{tract}}))}$
113-
114-
By transforming to $3.0 / p_{tract}$ to get the average tract length in nucleotide.
115-
155+
12212017 update
116156

157+
HKY+PS-IGC results
117158
```{r}
118-
# plot one paralog
119-
paralog = "EDN_ECP"
120-
lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Posterior_lnL.txt", sep = "")))
121-
Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_Ind_MG94_HMM_Viterbi_array.txt", sep = "")))
122-
lnL.surface <- as.vector(read.table(paste("./plot/HMM_", paralog, "_lnL_1D_surface.txt", sep = "")))
123-
124-
plot(1:dim(lnL.ratio)[1], lnL.ratio[, 2] - lnL.ratio[, 1], xlab = "codon number", ylab = "log values",
125-
type = "l", col = "black", lty = 1,
126-
main = paste(paralog, " HMM result"),
127-
ylim = c(min(-0.5, min(lnL.ratio[, 2] - lnL.ratio[, 1])), max(lnL.ratio[, 2] - lnL.ratio[, 1])))
128-
lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red")
129-
legend(1, max(lnL.ratio[, 2] - lnL.ratio[, 1]), legend = c("log posterior ratio", "Viterbi path"),
130-
lty = c(1, 2), col = c( "black", "red"))
131-
132-
plot.new()
133-
plot(-lnL.surface[, 1],lnL.surface[, 2], xlab = "log tract length in codon", ylab= "lnL", type = "l", col = "black", lty = 1,
134-
main = paste(paralog, " lnL surface"))
135-
abline(v = log(6.7/3.), col = "red")
136-
137-
summary.mat <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_1D_summary.txt")
138-
hessian <- read.table("./Summary/EDN_ECP_Ind_MG94_HMM_Hessian.txt")
139-
140-
# Now calculate standard deviation of lnP
141-
lnP <- log(summary.mat[10,1])
142-
sd.lnP <- 1.0 / sqrt(-hessian[2, 1])
143-
low.cf <- exp(lnP - 1.96 * sd.lnP)
144-
up.cf <- exp(lnP + 1.96 * sd.lnP)
145-
up.cf[up.cf > 1] <- 1.0
146-
show.mat <- matrix(c(summary.mat[1,1], max(-lnL.surface[, 2]), 3.0/summary.mat[10, 1],
147-
hessian[1,1], hessian[2,1], 3.0 / up.cf, 3.0 / low.cf), 1, 7)
148-
rownames(show.mat) <- c("EDN_ECP")
149-
colnames(show.mat) <- c("lnL", "max lnL", "tract length",
150-
"df", "d^2f", "c.i. tract length", "c.i tract length")
151-
show.mat
159+
all <- readLines("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_rv_SCOK_nonclock_summary.txt", n = -1)
160+
col.names <- "Guess_1"
161+
row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
162+
EDN.ECP.guess.1.result <- as.matrix(read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_rv_SCOK_nonclock_summary.txt", row.names = row.names, col.names = col.names))
163+
164+
all <- readLines("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_rv_SCOK_nonclock_summary.txt", n = -1)
165+
col.names <- "Guess_2"
166+
row.names <- strsplit(all[length(all)], ' ')[[1]][-1]
167+
EDN.ECP.guess.2.result <- as.matrix(read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_rv_SCOK_nonclock_summary.txt", row.names = row.names, col.names = col.names))
168+
169+
gradient.file <- "./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_nonclock_gradient.txt"
170+
gradient.list <- read.table(gradient.file)
171+
cat("Verify gradient ~ 0: Gradient = ", colSums(gradient.list), ". Gradient/Objective = ", colSums(gradient.list)/EDN.ECP.guess.1.result["ll",1], "\n")
172+
Godambe.matrix.guess.1 <- read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_1_nonclock_godambe.txt")
173+
174+
gradient.file <- "./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_nonclock_gradient.txt"
175+
gradient.list <- read.table(gradient.file)
176+
cat("Verify gradient ~ 0: Gradient = ", colSums(gradient.list), ". Gradient/Objective = ", colSums(gradient.list)/EDN.ECP.guess.2.result["ll",1], "\n")
177+
Godambe.matrix.guess.2 <- read.table("./Summary/PSJS_HKY_EDN_ECP_One_rate_Guess_2_nonclock_godambe.txt")
178+
179+
Results <- cbind(EDN.ECP.guess.1.result, EDN.ECP.guess.2.result)
180+
Results
181+
182+
Godambe.inverse <- cbind(c(solve(Godambe.matrix.guess.1)/dim(gradient.list)[1]),
183+
c(solve(Godambe.matrix.guess.2)/dim(gradient.list)[1]))
184+
Godambe.inverse
185+
186+
# Guess 2 has higher lnL
187+
which.max(Results["ll",])
188+
# effective Tau
189+
Results["init_rate", ] * Results["tract_length", ] * 3 / (1.0 + colSums(Results[c("r2", "r3"), ]))
190+
Results["init_rate", ] * Results["tract_length", ] * 3 /exp(1.96*sqrt(Godambe.inverse[1, ])) / (1.0 + colSums(Results[c("r2", "r3"), ]))
191+
Results["init_rate", ] * Results["tract_length", ] * 3 *exp(1.96*sqrt(Godambe.inverse[1, ])) / (1.0 + colSums(Results[c("r2", "r3"), ]))
192+
193+
# Tract length
194+
Results["tract_length", ]
195+
exp(log(Results["tract_length", ]-1.0)-1.96*sqrt(Godambe.inverse[4,]))+1.0
196+
exp(log(Results["tract_length", ]-1.0)+1.96*sqrt(Godambe.inverse[4,]))+1.0
152197
```
198+

EDN_ECP_summary.pdf

-94.8 KB
Binary file not shown.

SimulationStudy/.DS_Store

0 Bytes
Binary file not shown.

SimulationStudy/plot/.DS_Store

0 Bytes
Binary file not shown.

SimulationStudy/summary/.DS_Store

2 KB
Binary file not shown.

0 commit comments

Comments
 (0)