Skip to content

Commit 05c9220

Browse files
authored
Merge pull request #1 from bayraktar1/integrate_FLAIR
Integrate flair
2 parents a251d95 + b6d8a72 commit 05c9220

File tree

2 files changed

+198
-10
lines changed

2 files changed

+198
-10
lines changed

config/config.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,15 @@ mincounts: "5"
2020

2121
mindatasets: "2"
2222

23+
# FLAIR parameters
24+
25+
flair_correct_window: 10
26+
27+
flair_collapse_quality: 1
28+
29+
flair_abundance_quality: 1
30+
31+
32+
2333
# threads
2434
threads: 10

workflow/Snakefile

+188-10
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ READS = [filepath for filepath in Path(READSDIR).glob('**/*')]
1515

1616
rule all:
1717
input:
18-
# clean_sam = expand( OUTDIR / "TALON" / "cleaned_alignments" / "{sample}" / "{sample}_clean.sam", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS]),
19-
# database = OUTDIR / "TALON" / "talon.db",
20-
# OUTDIR / "TALON" / "config.csv",
21-
# database_ann = OUTDIR / "TALON" / "ann_talon.db"
22-
# filtered_transcripts = OUTDIR / "TALON" / "filtered_transcripts.csv"
23-
abundance=OUTDIR / "TALON" / 'filtered_talon_abundance_filtered.tsv',
24-
GTF = OUTDIR / "TALON" / 'filtered_talon.gtf'
25-
# labeled_sam = expand(OUTDIR / "TALON" / "labeled" / "{sample}_labeled.sam",sample=[read.name.split('.')[0] for read in READS]),
26-
# config = OUTDIR / "TALON" / "config.csv",
27-
# database = OUTDIR / "TALON" / "talon.db"
18+
# abundance=OUTDIR / "TALON" / 'filtered_talon_abundance_filtered.tsv',
19+
# GTF = OUTDIR / "TALON" / 'filtered_talon.gtf',
20+
# bam = expand(OUTDIR / "alignments" / "BAM" / "{sample}_sorted.bam", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS]),
21+
# bed12 = expand(OUTDIR / "FLAIR" / "BED12" / "{sample}.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
22+
# bed = expand(OUTDIR / "FLAIR" / "BED12" / "{sample}.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
23+
# bed_corrected = expand(OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
24+
# bed_concatenated = OUTDIR / "FLAIR" / "concatenated_all_corrected.bed"
25+
# gtf = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse.isoforms.gtf"
26+
# config = OUTDIR / "FLAIR" / "manifest.tsv"
27+
abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv"
2828

2929

3030
rule minimap2_align:
@@ -52,6 +52,24 @@ rule minimap2_align:
5252
{input.fq} > {output.sam_files}
5353
'''
5454

55+
rule sam_to_bam:
56+
'''
57+
Converts SAM to BAM.
58+
'''
59+
input:
60+
sam = rules.minimap2_align.output
61+
params:
62+
outdir = lambda wildcards: OUTDIR / "alignments" / "BAM" / wildcards.sample
63+
output:
64+
bam = OUTDIR / "alignments" / "BAM" / "{sample}_sorted.bam"
65+
threads: 10
66+
singularity:
67+
"docker://quay.io/biocontainers/samtools:1.14--hb421002_0"
68+
shell:
69+
'''
70+
samtools view -Sb {input.sam} | samtools sort -@ {threads} -o {output.bam}
71+
samtools index {output.bam}
72+
'''
5573

5674
# TALON
5775
rule get_SJs_from_gtf:
@@ -260,3 +278,163 @@ rule talon_create_GTF:
260278
--o {params.outdir}
261279
'''
262280

281+
#FLAIR
282+
# rule flair_bam_to_bed12:
283+
# input:
284+
# bam = rules.sam_to_bam.output.bam
285+
# params:
286+
# outdir = lambda wildcards: OUTDIR / "FLAIR" / "BED12" / wildcards.sample
287+
# output:
288+
# bed12 = OUTDIR / "FLAIR" / "BED12" / "{sample}.bed"
289+
# threads: 1
290+
# conda:
291+
# "envs/flair_conda_env.yaml"
292+
# shell:
293+
# '''
294+
# scripts/bam2Bed12.py --input_bam {input.bam} > {output.bed12}
295+
# '''
296+
297+
298+
rule flair_align:
299+
'''
300+
Aligns samples against reference genome and smooths gaps in
301+
the alignment.
302+
'''
303+
input:
304+
genome = genome_fasta,
305+
fq = READSDIR / "{sample}.fastq"
306+
params:
307+
outdir = lambda wildcards: OUTDIR / "FLAIR" / "BED12" / wildcards.sample
308+
output:
309+
bed = OUTDIR / "FLAIR" / "BED12" / "{sample}.bed"
310+
threads: 10
311+
singularity:
312+
'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
313+
shell:
314+
'''
315+
flair.py align \
316+
--genome {input.genome}\
317+
--reads {input.fq}\
318+
--threads {threads}\
319+
--nvrna \
320+
--version1.3 \
321+
--output {params.outdir}
322+
'''
323+
324+
rule flair_correct:
325+
'''
326+
Corrects misaligned splice sites using genome annotations
327+
and/or short-read splice junctions.
328+
'''
329+
input:
330+
genome = genome_fasta,
331+
annotation= existing_annotation,
332+
bed = rules.flair_align.output.bed
333+
params:
334+
outdir = lambda wildcards: OUTDIR / "FLAIR" / "corrected" / wildcards.sample / wildcards.sample,
335+
window = config["flair_correct_window"]
336+
output:
337+
bed_corrected = OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed"
338+
threads: 10
339+
singularity:
340+
'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
341+
shell:
342+
'''
343+
flair.py correct \
344+
--genome {input.genome} \
345+
--query {input.bed} \
346+
--gtf {input.annotation} \
347+
--nvrna \
348+
--threads {threads} \
349+
--window {params.window} \
350+
--output {params.outdir}
351+
'''
352+
353+
rule flair_concatenate:
354+
'''
355+
Combines BED12 output into one file.
356+
'''
357+
input:
358+
bed_corrected = expand(OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed", sample=[".".join(read.name.split('.')[:-1]) for read in READS])
359+
output:
360+
bed_concatenated = OUTDIR / "FLAIR" / "concatenated_all_corrected.bed"
361+
threads: 10
362+
singularity:
363+
'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
364+
shell:
365+
'''
366+
cat {input.bed_corrected} >> {output.bed_concatenated}
367+
'''
368+
369+
rule flair_collapse:
370+
'''
371+
Defines high-confidence isoforms from corrected reads.
372+
'''
373+
input:
374+
bed_concatenated = rules.flair_concatenate.output.bed_concatenated,
375+
genome = genome_fasta,
376+
annotation = existing_annotation,
377+
params:
378+
reads = READS,
379+
temp_dir = OUTDIR / "FLAIR" / "COLLAPSE" / "collapse_logs",
380+
outdir = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse",
381+
quality = config["flair_collapse_quality"]
382+
output:
383+
fa = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse.isoforms.fa"
384+
threads: 10
385+
singularity:
386+
'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
387+
shell:
388+
'''
389+
flair.py collapse \
390+
--genome {input.genome} \
391+
--gtf {input.annotation} \
392+
--reads {params.reads} \
393+
--query {input.bed_concatenated} \
394+
--temp_dir {params.temp_dir} \
395+
--generate_map \
396+
--threads {threads} \
397+
--quality {params.quality} \
398+
--output {params.outdir}
399+
'''
400+
401+
rule flair_config:
402+
'''
403+
Creates read manifest.
404+
'''
405+
input:
406+
reads = READS
407+
params:
408+
datasetnames= [".".join(read.name.split('.')[:-1]) for read in READS]
409+
output:
410+
config = OUTDIR / "FLAIR" / "manifest.tsv"
411+
threads: 1
412+
run:
413+
for read, name in zip(input.reads, params.datasetnames):
414+
with open(output.config, 'a+') as config:
415+
config.write("%s\tcondition\tbatch\t%s\n" % (name, read))
416+
417+
rule flair_quantify:
418+
'''
419+
Quantify FLAIR isoform usage across samples using minimap2.
420+
'''
421+
input:
422+
manifest = rules.flair_config.output.config,
423+
coll_fasta = rules.flair_collapse.output.fa
424+
params:
425+
quality = config["flair_abundance_quality"]
426+
output:
427+
abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv"
428+
threads: 10
429+
singularity:
430+
'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
431+
shell:
432+
'''
433+
flair.py quantify \
434+
--reads_manifest {input.manifest} \
435+
--isoforms {input.coll_fasta} \
436+
--threads {threads} \
437+
--tpm \
438+
--quality {params.quality}
439+
--output {output.abundance}
440+
'''

0 commit comments

Comments
 (0)