Skip to content

Commit 02f7c75

Browse files
author
xji3
committed
add in new HMM plots
1 parent 4422b38 commit 02f7c75

11 files changed

+1432
-176
lines changed

.DS_Store

2 KB
Binary file not shown.

.Rhistory

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,34 @@ colnames(summary.mat) <- c("lnL", "max lnL", "tract length",
237237
"Pr(S_0)", "Pr(S_1)",
238238
"df", "d^2f", "c.i. tract length", "c.i tract length")
239239
summary.mat
240+
source('~/.active-rstudio-document', echo=TRUE)
241+
# plot one paralog
242+
paralog = "EDN_ECP"
243+
lnL.ratio <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_log_posterior_ratio.txt", sep = "")))
244+
Viterbi.path <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_Viterbi_path.txt", sep = "")))
245+
lnL.surface <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_HMM_lnL_surface.txt", sep = "")))
246+
IGC.sw.lnL <- as.vector(read.table(paste("./summary/", paralog, "_MG94_nonclock_sw_lnL.txt", sep = "")))
247+
Force.sw.lnL <- as.vector(read.table(paste("./summary/Force_", paralog, "_MG94_nonclock_sw_lnL.txt", sep = "")))
248+
plot(lnL.ratio[, 1], xlab = "codon number", ylab = "log values",
249+
type = "l", col = "black", lty = 1,
250+
main = paste(paralog, " HMM result"),
251+
ylim = c(min(-0.5, min(lnL.ratio)), max(lnL.ratio)))
252+
lines(1:dim(Viterbi.path)[1], Viterbi.path[, 1], type = "S", lty = 2, col = "red")
253+
#lines(1:dim(IGC.sw.lnL)[1], IGC.sw.lnL[, 2] - Force.sw.lnL[, 2], type = "l", lty = 3, col = "red")
254+
legend(1, max(lnL.ratio), legend = c("log posterior ratio", "Viterbi path"),
255+
lty = c(1, 2), col = c( "black", "red"))
256+
plot(-lnL.surface[, 1], xlab = "tract length in nucleotide", ylab= "lnL", type = "l", col = "black", lty = 1,
257+
main = paste(paralog, " lnL surface"))
258+
summary.mat <- read.table("./HMM_tract_MG94_nonclock_summary.txt")
259+
# Now calculate standard deviation of lnP
260+
lnP <- log(3.0 / summary.mat[, 3])
261+
sd.lnP <- 1.0 / sqrt(-summary.mat[, 7])
262+
low.cf <- exp(lnP - 1.96 * sd.lnP)
263+
up.cf <- exp(lnP + 1.96 * sd.lnP)
264+
up.cf[up.cf > 1] <- 1.0
265+
summary.mat <- cbind(summary.mat, 3.0 / up.cf, 3.0 / low.cf)
266+
rownames(summary.mat) <- c("EDN_ECP")
267+
colnames(summary.mat) <- c("lnL", "max lnL", "tract length",
268+
"Pr(S_0)", "Pr(S_1)",
269+
"df", "d^2f", "c.i. tract length", "c.i tract length")
270+
summary.mat

Run_IS_HMM.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -68,23 +68,29 @@ def main(args):
6868
seq_index_file = './' + '_'.join(paralog) + '_seq_index.txt'
6969
state_list = ['No IGC event (Si = 0)','At least one IGC event (Si > 0)']
7070

71-
x_3 = np.array([-0.69727878,
72-
-0.53710801,
73-
-0.72400474,
74-
0.72385788,
75-
-0.18983735,
76-
-2.5199719,
77-
-2.01452935,
78-
-1.0337633,
79-
-3.29369029,
80-
-1.75318807,
81-
-3.25869777,
82-
-2.27341043,
83-
-4.20160402,
84-
-4.110472,
85-
-0.5])
71+
save_name = './save/EDN_ECP_Ind_MG94_IGC_save.txt'
72+
summary_file = './summary/EDN_ECP_Ind_MG94_HMM_summary.txt'
73+
summary_file_1D = './summary/EDN_ECP_Ind_MG94_HMM_1D_summary.txt'
74+
Ind_MG94_IGC = IndCodonGeneconv( newicktree, alignment_file, paralog, Model = 'MG94', Force = Force, clock = None, save_path = './save/',
75+
save_name = save_name)
76+
Ind_MG94_IGC.get_mle(True, True, 0, 'BFGS')
77+
x = np.concatenate((Ind_MG94_IGC.x, [0.0]))
78+
79+
IGC_sitewise_lnL_file = summary_path + '_'.join(paralog) + '_Ind_MG94_nonclock_sw_lnL.txt'
80+
NOIGC_sitewise_lnL_file = summary_path + 'NOIGC_' + '_'.join(paralog) + '_Ind_MG94_nonclock_sw_lnL.txt'
81+
82+
Ind_MG94_IGC.get_sitewise_loglikelihood_summary(IGC_sitewise_lnL_file, False)
83+
Ind_MG94_IGC.get_sitewise_loglikelihood_summary(NOIGC_sitewise_lnL_file, True)
84+
85+
86+
test = HMMJSGeneconv(save_file, newicktree, alignment_file, paralog, summary_path, x, save_path, IGC_sitewise_lnL_file, NOIGC_sitewise_lnL_file, state_list, seq_index_file)
87+
88+
89+
log_p_list = np.log(3.0/np.array(range(3, 1001)))
90+
plot_file = './plot/HMM_' + '_'.join(paralog) + '_lnL_1D_surface.txt'
91+
test.plot_tract_p(log_p_list, plot_file)
92+
93+
test.get_mle(display = True, two_step = True, One_Dimension = True)
94+
test.get_summary(summary_file_1D)
95+
8696

87-
test = HMMJSGeneconv(save_file, newicktree, alignment_file, paralog, summary_path, x_3, save_path, IGC_save_name, Force_save_name,
88-
IGC_sitewise_lnL_file, Force_sitewise_lnL_file,
89-
state_list, seq_index_file)
90-
test.get_mle(display = True, two_step = False, OneDimension = True)
Binary file not shown.
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
1.694970388693524455e+03
2+
2.909982296947770108e-01
3+
2.434371217926649678e-01
4+
2.068434784667338988e-01
5+
2.587211700458240116e-01
6+
2.070543014300672890e+00
7+
8.467952031245288280e-01
8+
5.280029360273003070e-01
9+
2.446307008266619676e-02
10+
4.633131449367799276e-02
11+
1.964016947431149462e-01
12+
3.266001629134150375e-01
13+
3.265333578314633806e-02
14+
1.562930819382047642e-01
15+
3.460162330033541428e-02
16+
9.009211334423779249e-02
17+
1.432976146309197611e-02
18+
1.603597178254540304e-02
19+
# ll pi_a pi_c pi_g pi_t kappa omega tau eta tract_p (N0,N1) (N0,Tamarin) (N1,N2) (N1,Macaque) (N2,N3) (N2,Orangutan) (N3,Chimpanzee) (N3,Gorilla)

0 commit comments

Comments
 (0)