Skip to content

Commit cdd5443

Browse files
authored
feat: add TIN score calculation (#18)
1 parent 32c7969 commit cdd5443

10 files changed

Lines changed: 113 additions & 40 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ Outputs:
6666
- `analysis`: to only run the postprocessing part of the workflow (quantification of IPA usage and intron retention) - requires `input_bam`
6767
- `tectool`: to only run the IPA usage quantification, using TECtool - requires `input_bam`
6868
- `intron`: to only run the intron retention quantification subworkflow - requires `input_bam`
69+
- `tin_score`: to only run the TIN score calculation subworkflow - requires `input_bam`
6970

7071
In the case of `full` and `preprocessing` modes, the `input_fastq` is required, using a wildcard character, e.g.: `--input_fastq='test_data\*{1,2}.fastq'`
7172

conf/envs/slurm.config

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,17 @@ process {
2828
memory = { 8.GB * task.attempt }
2929
}
3030
withLabel: TECtool {
31-
clusterOptions = "--qos=6hours"
31+
clusterOptions = { task.attempt > 1 ? "--qos=1day" : "--qos=6hours" }
3232
cpus = 8
33-
time = { 1.h * task.attempt }
33+
time = { 6.h * task.attempt }
3434
memory = { 64.GB * task.attempt }
3535
}
36+
withLabel: calculate_tin {
37+
clusterOptions = "--qos=6hours"
38+
cpus = 8
39+
time = { 3.h * task.attempt }
40+
memory = 32.GB
41+
}
3642
withLabel: salmon {
3743
clusterOptions = "--qos=6hours"
3844
cpus = 8
@@ -47,7 +53,9 @@ process {
4753
// Specific requirements
4854

4955
withName: SAMTOOLS_GET_LOW_DUP_READS {
56+
clusterOptions = "--qos=6hours"
57+
cpus = 8
5058
memory = { 40.GB * task.attempt }
51-
time = 30.m
59+
time = { 2.h * task.attempt }
5260
}
5361
}

conf/envs/slurm_med.config

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ process {
33
// standard settings if not specified otherwise
44

55
executor = 'slurm'
6-
clusterOptions = "--partition=dynamic-8cores-16g"
7-
errorStrategy = { task.exitStatus in 137..255 ? 'retry' : 'terminate' }
6+
clusterOptions = "--partition=dynamic-8cores-16g,dynamic-16cores-32g,dynamic-16cores-64g,dynamic-16cores-128g"
7+
errorStrategy = { task.exitStatus in 137..140 ? 'retry' : 'terminate' }
88
maxRetries = 2
9-
time = { 20.m * task.attempt }
9+
time = { 30.m * task.attempt }
1010
memory = { 4.GB * task.attempt }
1111
cpus = 4
1212
queueSize = 10

install/environment.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,7 @@ dependencies:
1616
- bedops
1717
- gffread
1818
- gtfparse
19-
- pip:
20-
- git+https://github.com/balajtimate/TECtool@master
19+
- zgtf
20+
- tin-score-calculation
21+
# - pip:
22+
# - git+https://github.com/balajtimate/TECtool@master

main.nf

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ include { SALMON_TRANSCRIPTOME } from './modules/salmon.nf'
3030
include { SALMON_INDEX } from './modules/salmon.nf'
3131
include { SALMON_QUANTIFY } from './modules/salmon.nf'
3232
include { INTRON_RETENTION } from './modules/intron_retention.nf'
33+
include { TIN_GTF2BED } from './modules/tin_score.nf'
34+
include { CALCULATE_TIN_SCORES } from './modules/tin_score.nf'
3335

3436
genome_index_ch = Channel.fromPath(params.genome_index, checkIfExists: true).collect()
3537
annotation_gtf_ch = Channel.fromPath(params.annotation_gtf, checkIfExists: true).collect()
@@ -55,6 +57,22 @@ workflow preprocessing {
5557
bam_low_dupl_tupl
5658
}
5759

60+
// Subworkflow for TECtool analysis and downstream steps
61+
workflow calculate_tin_scores {
62+
take:
63+
input_bam
64+
65+
main:
66+
// Convert BAM to 2 FASTQ files
67+
TIN_GTF2BED(annotation_gtf_ch)
68+
transcripts_bed12 = TIN_GTF2BED.out.transcripts_bed12
69+
CALCULATE_TIN_SCORES(input_bam, transcripts_bed12)
70+
tin_scores_tsv = CALCULATE_TIN_SCORES.out.tin_scores_tsv
71+
72+
emit:
73+
tin_scores_tsv
74+
}
75+
5876
// Subworkflow for TECtool analysis and downstream steps
5977
workflow tectool_analysis {
6078
take:
@@ -113,6 +131,7 @@ workflow {
113131
preprocessing(input_fastq_ch)
114132
tectool_analysis(preprocessing.out.bam_low_dupl_tupl)
115133
intron_retention(preprocessing.out.bam_low_dupl_tupl)
134+
calculate_tin_scores(preprocessing.out.bam_low_dupl_tupl)
116135
}
117136
}
118137
if (params.run_mode == 'preprocessing') {
@@ -134,6 +153,12 @@ workflow {
134153
tectool_analysis(input_bam_ch)
135154
}
136155
}
156+
if (params.run_mode == 'tin_score') {
157+
input_bam_ch = Channel.fromPath(params.input_bam, checkIfExists: true).map { bam_path -> tuple(bam_path.baseName, bam_path) }
158+
input_bam_ch.each {
159+
calculate_tin_scores(input_bam_ch)
160+
}
161+
}
137162
if (params.run_mode == 'intron') {
138163
input_bam_ch = Channel.fromPath(params.input_bam, checkIfExists: true).map { bam_path -> tuple(bam_path.baseName, bam_path) }
139164
input_bam_ch.each {

modules/salmon.nf

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,22 @@ process SALMON_INDEX {
3333
tag { library }
3434

3535
// publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: '*'
36-
36+
publishDir "${params.log_dir}/${library}_logs", mode: 'copy', pattern: '*.log'
37+
3738
input:
3839
tuple val(library), file(fasta)
3940

4041
output:
41-
tuple val(library), path('*'), emit: salmon_index
42+
tuple val(library), path('*_transcripts_index'), emit: salmon_index
43+
path '*.log', emit: log
4244

4345
script:
4446
"""
4547
salmon index \
4648
--transcripts ${fasta} \
4749
--index ${library}_transcripts_index \
4850
--keepDuplicates \
49-
--threads ${params.threads_pe}
51+
--threads ${params.threads_pe} &> ${library}_salmon_index.log
5052
"""
5153
}
5254

@@ -57,14 +59,16 @@ process SALMON_QUANTIFY {
5759
tag { library }
5860

5961
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: '*_quant.tsv'
60-
62+
publishDir "${params.log_dir}/${library}_logs", mode: 'copy', pattern: '*.log'
63+
6164
input:
6265
tuple val(library), path(index)
6366
tuple val(library_1), path(fastq_1)
6467
tuple val(library_2), path(fastq_2)
6568

6669
output:
6770
tuple val(library), path('*_quant.tsv'), emit: salmon_counts
71+
path '*.log', emit: log
6872

6973
script:
7074
"""
@@ -75,7 +79,7 @@ process SALMON_QUANTIFY {
7579
--validateMappings \
7680
--seqBias \
7781
--output ${library}_transcript_quant \
78-
--threads ${params.threads_pe}
82+
--threads ${params.threads_pe} &> ${library}_salmon_quant.log
7983
mv ${library}_transcript_quant/quant.sf ${library}_quant.tsv
8084
"""
8185
}

modules/samtools.nf

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,6 @@
22

33
nextflow.enable.dsl=2
44

5-
// process SAMTOOLS_INDEX {
6-
//
7-
// label "samtools"
8-
//
9-
// tag { library }
10-
//
11-
// // publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*.bai"
12-
//
13-
// input:
14-
// tuple val(library), path(bam)
15-
//
16-
// output:
17-
// tuple val(library), path('*.bai'), emit: index
18-
//
19-
// script:
20-
// """
21-
// samtools index -@ ${params.threads_se} -M ${bam}
22-
// """
23-
// }
24-
255
process SAMTOOLS_GET_UNIQUE_MAPPERS {
266

277
label "samtools"
@@ -81,8 +61,8 @@ process SAMTOOLS_BAM2FASTQ {
8161

8262
tag { library }
8363

84-
// publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*_1.fastq"
85-
// publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*_2.fastq"
64+
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*_1.fastq"
65+
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*_2.fastq"
8666
publishDir "${params.log_dir}/${library}_logs", mode: 'copy', pattern: '*.log'
8767

8868
input:
@@ -91,6 +71,7 @@ process SAMTOOLS_BAM2FASTQ {
9171
output:
9272
tuple val("${library}_1"), path("${library}_1.fastq"), emit: fastq1_tuple
9373
tuple val("${library}_2"), path("${library}_2.fastq"), emit: fastq2_tuple
74+
path '*.log', emit: log
9475

9576
script:
9677
"""

modules/tectool.nf

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ process TECTOOL {
99
tag { library_split }
1010

1111
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*/*.tsv"
12+
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*.bai"
1213
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*_enriched_annotation.gtf"
1314
publishDir "${params.log_dir}/${library}_logs", mode: 'copy', pattern: '*.log'
1415

@@ -22,6 +23,7 @@ process TECTOOL {
2223
output:
2324
tuple val(library_split), path('*.gtf'), emit: enriched_gtf
2425
path '*/*.tsv', emit: tsv
26+
path '*.log', emit: log
2527

2628

2729
script:
@@ -33,7 +35,8 @@ process TECTOOL {
3335
--bam ${bam_split} \
3436
--genome ${genome_fa} \
3537
--num_cores ${params.threads_se} \
36-
--output_dir ${library_split}_tectool &> ${library_split}_tectool.log
38+
--output_dir ${library_split}_tectool \
39+
&> ${library_split}_tectool.log
3740
mv ${library_split}_tectool/enriched_annotation.gtf ${library_split}_enriched_annotation.gtf
3841
"""
3942
}
@@ -42,9 +45,10 @@ process TECTOOL_MERGE {
4245

4346
label 'bedtools'
4447

45-
tag { library }
48+
tag { "${library}: ${library_1}, ${library_2}" }
4649

4750
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*_merged.gtf"
51+
publishDir "${params.log_dir}/${library}_logs", mode: 'copy', pattern: '*.log'
4852

4953
input:
5054
tuple val(library), file(bam)
@@ -53,13 +57,15 @@ process TECTOOL_MERGE {
5357

5458
output:
5559
tuple val(library), path('*_merged.gtf'), emit: merged_gtf
60+
path '*.log', emit: log
5661

5762
script:
5863
"""
5964
echo -e "${gtf_files_1}\n${gtf_files_2}" > ${library}_tectool_annotation_files.tsv
6065
tectool_add_novel_transcripts_to_gtf_file \
6166
--list_of_gtf_files ${library}_tectool_annotation_files.tsv \
62-
--out-dir ${library}_tectool_merged_annotations
67+
--out-dir ${library}_tectool_merged_annotations \
68+
&> ${library}_tectool_merged.log
6369
mv ${library}_tectool_merged_annotations/merged_annotation.gtf ${library}_tectool_annotation_merged.gtf
6470
"""
6571
}

modules/tin_score.nf

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env nextflow
2+
3+
nextflow.enable.dsl=2
4+
5+
process TIN_GTF2BED {
6+
7+
label 'gtf2bed'
8+
9+
publishDir "${params.out_dir}", mode: 'copy', pattern: "*.bed"
10+
publishDir "${params.log_dir}", mode: 'copy', pattern: "*.log"
11+
12+
input:
13+
path annotation_gtf
14+
15+
output:
16+
path ('*.bed'), emit: transcripts_bed12
17+
18+
script:
19+
"""
20+
sed 's/transcript_type/transcript_biotype/g' ${annotation_gtf} > ${annotation_gtf}.biotype
21+
sort -k1,1 -k4,4n -k5,5nr ${annotation_gtf}.biotype > ${annotation_gtf}.sorted
22+
gtf2bed12 --gtf ${annotation_gtf}.sorted --bed12 full_transcripts_protein_coding.bed 2> ${annotation_gtf}_gtf2bed.log
23+
"""
24+
}
25+
26+
process CALCULATE_TIN_SCORES {
27+
28+
label 'calculate_tin'
29+
30+
tag { library }
31+
32+
publishDir "${params.out_dir}/${library}_results", mode: 'copy', pattern: "*.tsv"
33+
publishDir "${params.log_dir}/${library}_logs", mode: 'copy', pattern: "*.log"
34+
35+
input:
36+
tuple val(library), path(bam)
37+
path transcripts_bed12
38+
39+
output:
40+
path ('*.tsv'), emit: tin_scores_tsv
41+
42+
script:
43+
"""
44+
samtools index -@ ${params.threads_se} -M ${bam}
45+
calculate-tin.py -i ${bam} -r ${transcripts_bed12} --names ${library} -p ${params.threads_se} > ${library}_TIN_score.tsv 2> ${library}_tin_scores.log
46+
"""
47+
}

nextflow.config

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ profiles {
4747
}
4848
conda {
4949
conda.enabled = true
50-
conda.channels = 'bioconda,conda-forge,defaults'
51-
process.conda = 'install/environment.yml'
50+
process.conda = "${HOME}/miniconda3/envs/ipa-immune"
5251
}
5352
}

0 commit comments

Comments
 (0)