forked from esherm/intSiteCaller
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathstats.Rmd
120 lines (103 loc) · 3.79 KB
/
stats.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
<style type="text/css">
/* CSS chunck 1 */
th{ background-color:#E0E0E0 ; border-bottom:1px solid black; padding:4px;}
td{ border-bottom:1px dotted black; padding:5px;}
table{ border-collapse:collapse; margin:auto; border: 1px solid black;}
table td { border-left: 1px solid #000; border-right: 1px solid #000; }
table td:first-child { border-left: none; }
table td:last-child { border-right: none; }
table, th, td {
font-family: Consolas, 'Helvetica Neue', Helvetica, Calibri;
}
body {
##margin: auto;
margin-left:30px;
width:90%;
font-family: Consolas, 'Helvetica Neue', Helvetica, Calibri;
font-size: 14px;
line-height: 18px;
color: #333333;
background-color: #ffffff;
}
</style>
```{r setup,echo=FALSE}
library(knitr)
opts_chunk$set(
fig.path=paste0(fig.path, "/"),
fig.align='left',
comment="",
echo=FALSE,
warning=FALSE,
error=TRUE,
message=FALSE,
cache=FALSE,
dpi=100,
##dev=c("svg", "png", "pdf", "postscript"),
dev="svg",
results="asis")
options(knitr.table.format = 'html')
```
# `r basename(args$dataDir)` attrition table
`r format(Sys.Date(), "%b %d %Y")`
### Things to look for
- The plots are in log scale, so the attrition table should look smooth.
- Normally there are 4 replicates for each GTSP, check the variations between replicates.
### GTSP summary information
```{r gtspTable, results="asis"}
##kable(gtspInfo, caption="GTSP-patient", row.names=FALSE)
summary <- (stats %>%
group_by(gtsp) %>%
mutate(Reps=n(),
allReads=sum(barcoded),
trimedR=sum(vTrimed),
PEMapped=sum(numProperlyPairedAlignments),
single=sum(numAllSingleReads),
multi=sum(multihitReads),
uSites=sum(numUniqueSites),
uSitesAbun=sum(numAllSingleSonicLengths),
mClusters=sum(multihitClusters),
mAbun=sum(multihitSonicLengths)) %>%
select(gtsp, info, Reps, allReads, trimedR, PEMapped,
single, multi, uSites, uSitesAbun,
mClusters, mAbun) %>%
distinct() )
kable(summary, row.names=FALSE)
```
<P style="page-break-before: always">
```{r plotList, results='hide'}
plotAttritionList <- function(mdfList, plotsPerPage=2) {
plotList <- list()
if( length(mdfList)>0 ) {
plotList <- split(1:length(mdfList), (1:length(mdfList)-1)%/%2)
}
null <- sapply(seq(plotList), function(i) {
breaks = 10**(1:10)
p <- ggplot(plyr::rbind.fill(mdfList[ plotList[[i]] ]),
aes(variable, value, fill=Replicate)) +
geom_bar(position=position_dodge(width = 0.8), stat="identity") +
##scale_y_log10() +
scale_y_log10(breaks = breaks, labels = comma(breaks)) +
##coord_flip() +
geom_vline(xintercept = 1:(nlevels(stats.mdf$variable)-1)+0.5, linetype=4) +
theme_default() +
facet_wrap( ~ gtspinfo, ncol=1, scales = "free_y")
print(p)
cat("<hr>", "\n")
cat("<P style=\"page-break-before: always\">", "\n")
} )
}
```
### Control or other samples
```{r controlstats, fig.width=8, fig.height=10, results='asis'}
## samples named not like GTSP0001, usually controls
mdfList <- subset(stats.mdf.listBygtsp,
! grepl("GTSP", names(stats.mdf.listBygtsp)))
plotAttritionList(mdfList)
```
### GTSP samples
```{r gtspstats, fig.width=8, fig.height=10, results='asis'}
## seperate samples named like GTSP0001, normally real samples
mdfList <- subset(stats.mdf.listBygtsp,
grepl("GTSP", names(stats.mdf.listBygtsp)))
plotAttritionList(mdfList)
```