Skip to content

Commit

Permalink
Merge pull request #3 from Nesvilab/dev
Browse files Browse the repository at this point in the history
functions for glyco QC and combined site report
  • Loading branch information
hsiaoyi0504 authored Apr 30, 2024
2 parents e5d49ff + a2a6be4 commit 76f0a02
Show file tree
Hide file tree
Showing 8 changed files with 100 additions and 9 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: FragPipeAnalystR
Type: Package
Title: FragPipe downstream analysis in R
Version: 0.1.0
Version: 0.1.2
Author: Who wrote it
Maintainer: Yi Hsiao <[email protected]>
Description: More about what it does (maybe more than one line)
Expand Down Expand Up @@ -36,7 +36,8 @@ Imports:
stringr,
SummarizedExperiment,
tibble,
tidyr
tidyr,
vsn
Suggests:
clusterProfiler,
devtools,
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export(HeatmapAnnotation)
export(MD_normalization)
export(PSM_barplot)
export(PTM_normalization)
export(VSN_normalization)
export(add_rejections)
export(assay)
export(average_samples)
Expand All @@ -31,6 +32,7 @@ export(plot_GSEA)
export(plot_correlation_heatmap)
export(plot_cvs)
export(plot_feature)
export(plot_glycan_distribution)
export(plot_missval_heatmap)
export(plot_or)
export(plot_pca)
Expand Down Expand Up @@ -128,3 +130,5 @@ importFrom(tibble,rownames_to_column)
importFrom(tidyr,gather)
importFrom(tidyr,spread)
importFrom(tidyr,unite)
importFrom(vsn,predict)
importFrom(vsn,vsnMatrix)
2 changes: 2 additions & 0 deletions R/FragPipeAnalystR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,7 @@
#' @importFrom tidyr gather
#' @importFrom tidyr spread
#' @importFrom tidyr unite
#' @importFrom vsn predict
#' @importFrom vsn vsnMatrix
## usethis namespace: end
NULL
58 changes: 58 additions & 0 deletions R/glyco_QC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
N_glycan_property <- function(glycan_string){
# from https://www.sciencedirect.com/science/article/pii/S1535947620351070, Table II

glycan_composition <- list()
monosaccharides <- c('HexNAc', 'Hex', 'Fuc', 'NeuAc', 'NeuGc')
for (i in 1:length(monosaccharides)){

temp <- as.numeric(gsub(paste0(monosaccharides[i],"(\\d+)(\\w+)?"), "\\1", glycan_string))
if (is.na(temp)) {
glycan_composition[monosaccharides[i]] <- 0
} else {
glycan_composition[monosaccharides[i]] <- temp
}
glycan_string <- gsub(paste0(monosaccharides[i],"\\d+"), "", glycan_string)
}

if (glycan_composition$Hex >= 5 & glycan_composition$HexNAc <= 2 & glycan_composition$Fuc <= 1){
return('oligomannose')
}
if (glycan_composition$Fuc > 0){
if (glycan_composition$NeuAc > 0 | glycan_composition$NeuGc > 0){
return('fuco-sialylated')
} else {
return('fucosylated')
}
} else {
if (glycan_composition$NeuAc > 0 | glycan_composition$NeuGc > 0)
return('sialylated')
else{
return('neutral')
}
}
}

# generate a barplot for number of glycoforms based on categories
#' @export
plot_glycan_distribution <- function(se) {
df <- as.data.frame(table(sapply(gsub(" _.*", "", gsub(".*_Hex", "Hex", rownames(se))), N_glycan_property)))
colnames(df)[1] <- "Category"
df$Category <- factor(df$Category, levels = c("sialylated",
"fuco-sialylated",
"fucosylated",
"neutral",
"oligomannose"))
p <- ggplot(df, aes(x=Category, y=Freq, fill=Category)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("sialylated"="#BC8867",
"fuco-sialylated"="#5C7099",
"fucosylated"="#699870",
"neutral"="#A45F61",
"oligomannose"="#8077A1")) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"))

return(p)
}

20 changes: 14 additions & 6 deletions R/io.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ make.unique.2 <- function(x, sep = ".") {
}

# internal function to read quantification table
readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2transform = F) {
readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2transform = F, exp_type=NULL) {
temp_data <- read.table(quant_table_path,
header = TRUE,
fill = TRUE, # to fill any missing data
Expand All @@ -53,7 +53,9 @@ readQuantTable <- function(quant_table_path, type = "TMT", level=NULL, log2trans
# validate(fragpipe_input_test(temp_data))
# remove contam
temp_data <- temp_data[!grepl("contam", temp_data$Protein),]
temp_data$Index <- paste0(temp_data$`Protein ID`, "_", temp_data$`Peptide Sequence`)
if (is.null(exp_type)) {
temp_data$Index <- paste0(temp_data$`Protein ID`, "_", temp_data$`Peptide Sequence`)
}
} else {
# handle - (dash) in experiment column
colnames(temp_data) <- gsub("-", ".", colnames(temp_data))
Expand Down Expand Up @@ -170,12 +172,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
llog2transform <- F
}

if (!level %in% c("gene", "protein", "peptide")) {
if (!level %in% c("gene", "protein", "peptide", "glycan")) {
cat(paste0("The specified level: ", level, " is not a valid level. Available levels are gene, protein, and peptide.\n"))
return(NULL)
}

quant_table <- readQuantTable(quant_table_path, type = type, level=level)
quant_table <- readQuantTable(quant_table_path, type = type, level=level, exp_type=exp_type)
exp_design <- readExpDesign(exp_anno_path, type = type, lfq_type = lfq_type)
if (type == "LFQ") {
if (level != "peptide") {
Expand Down Expand Up @@ -215,12 +217,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
lfq_columns <- setdiff(lfq_columns, grep("Total Intensity", colnames(data_unique)))
lfq_columns <- setdiff(lfq_columns, grep("Unique Intensity", colnames(data_unique)))
} else if (lfq_type == "MaxLFQ") {
lfq_columns<-grep("MaxLFQ", colnames(data_unique))
lfq_columns <- grep("MaxLFQ", colnames(data_unique))
if (length(lfq_columns) == 0) {
stop(safeError("No MaxLFQ column available. Please make sure your files have MaxLFQ intensity columns."))
}
} else if (lfq_type == "Spectral Count") {
lfq_columns<-grep("Spectral", colnames(data_unique))
lfq_columns <- grep("Spectral", colnames(data_unique))
lfq_columns <- setdiff(lfq_columns, grep("Total Spectral Count", colnames(data_unique)))
lfq_columns <- setdiff(lfq_columns, grep("Unique Spectral Count", colnames(data_unique)))
}
Expand Down Expand Up @@ -286,6 +288,12 @@ make_se_from_files <- function(quant_table_path, exp_anno_path, type = "TMT", le
temp_exp_design <- temp_exp_design[temp_exp_design$label %in% overlapped_samples, ]
cols <- colnames(data_unique)
selected_cols <- which(!(cols %in% interest_cols))
} else {
interest_cols <- c("Index", "Gene", "ProteinID", "Peptide", "SequenceWindow", "Start", "End", "MaxPepProb", "ReferenceIntensity", "name", "ID")
data_unique <- data_unique[, colnames(data_unique) %in% c(interest_cols, overlapped_samples)]
temp_exp_design <- temp_exp_design[temp_exp_design$label %in% overlapped_samples, ]
cols <- colnames(data_unique)
selected_cols <- which(!(cols %in% interest_cols))
}
data_unique[selected_cols] <- apply(data_unique[selected_cols], 2, as.numeric)

Expand Down
11 changes: 11 additions & 0 deletions R/normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ MD_normalization <- function(se) {
return(se)
}

#' @export
VSN_normalization <- function(se) {
assertthat::assert_that(inherits(se, "SummarizedExperiment"))
data <- assay(se)
if (metadata(se)$level %in% c("LFQ", "DIA")) {
vsn.fit <- vsn::vsnMatrix(2 ^ assay(se))
assay(se) <- vsn::predict(vsn.fit, 2 ^ assay(se))
}
return(se)
}

#' @export
PTM_normalization <- function(ptm_se, se, print_progress=F) {
pprot <- gsub("_.*", "", rowData(ptm_se)$Index)
Expand Down
6 changes: 5 additions & 1 deletion R/pca.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#' @export
plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
label = FALSE, n = 500, point_size = 8, label_size = 3, plot = TRUE, ID_col = "ID", exp = "LFQ", scale=F, interactive = F) {
label = FALSE, n = 500, point_size = 8, label_size = 3, plot = TRUE, ID_col = "label", exp = NULL, scale=F, interactive = F) {
if (is.null(exp)) {
exp <- metadata(dep)$exp
}

if (is.integer(x)) x <- as.numeric(x)
if (is.integer(y)) y <- as.numeric(y)
if (is.integer(n)) n <- as.numeric(n)
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ renv::install("bioc::SummarizedExperiment")
renv::install("bioc::cmapR")
renv::install("bioc::ConsensusClusterPlus")
renv::install("Nesvilab/FragPipeAnalystR")

# optional
renv::install("nicolerg/ssGSEA2")
```

## Example
Expand Down

0 comments on commit 76f0a02

Please sign in to comment.