@@ -15,16 +15,16 @@ READS = [filepath for filepath in Path(READSDIR).glob('**/*')]
15
15
16
16
rule all :
17
17
input :
18
- # clean_sam = expand( OUTDIR / "TALON" / "cleaned_alignments" / "{sample}" / "{sample}_clean.sam", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS]) ,
19
- # database = OUTDIR / "TALON" / "talon.db" ,
20
- # OUTDIR / "TALON " / "config.csv" ,
21
- # database_ann = OUTDIR / "TALON " / "ann_talon.db"
22
- # filtered_transcripts = OUTDIR / "TALON " / "filtered_transcripts.csv"
23
- abundance = OUTDIR / "TALON " / 'filtered_talon_abundance_filtered.tsv' ,
24
- GTF = OUTDIR / "TALON " / 'filtered_talon.gtf'
25
- # labeled_sam = expand( OUTDIR / "TALON " / "labeled " / "{sample}_labeled.sam",sample=[read.name.split('.')[0] for read in READS]),
26
- # config = OUTDIR / "TALON " / "config.csv",
27
- # database = OUTDIR / "TALON " / "talon.db "
18
+ # abundance= OUTDIR / "TALON" / 'filtered_talon_abundance_filtered.tsv' ,
19
+ # GTF = OUTDIR / "TALON" / 'filtered_talon.gtf' ,
20
+ # bam = expand( OUTDIR / "alignments " / "BAM" / "{sample}_sorted.bam", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS]) ,
21
+ # bed12 = expand( OUTDIR / "FLAIR " / "BED12" / "{sample}.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
22
+ # bed = expand( OUTDIR / "FLAIR " / "BED12" / "{sample}.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
23
+ # bed_corrected = expand( OUTDIR / "FLAIR " / "corrected" / "{sample}" / "{sample}_all_corrected.bed", sample=[ ".".join(read.name.split('.')[:-1]) for read in READS])
24
+ # bed_concatenated = OUTDIR / "FLAIR " / "concatenated_all_corrected.bed"
25
+ # gtf = OUTDIR / "FLAIR " / "COLLAPSE " / "flair.collapse.isoforms.gtf"
26
+ # config = OUTDIR / "FLAIR " / "manifest.tsv"
27
+ abundance = OUTDIR / "FLAIR " / "quantify" / "flair_counts_matrix.tsv "
28
28
29
29
30
30
rule minimap2_align :
@@ -52,6 +52,24 @@ rule minimap2_align:
52
52
{input.fq} > {output.sam_files}
53
53
'''
54
54
55
+ rule sam_to_bam :
56
+ '''
57
+ Converts SAM to BAM.
58
+ '''
59
+ input :
60
+ sam = rules .minimap2_align .output
61
+ params :
62
+ outdir = lambda wildcards : OUTDIR / "alignments" / "BAM" / wildcards .sample
63
+ output :
64
+ bam = OUTDIR / "alignments" / "BAM" / "{sample}_sorted.bam"
65
+ threads : 10
66
+ singularity :
67
+ "docker://quay.io/biocontainers/samtools:1.14--hb421002_0"
68
+ shell :
69
+ '''
70
+ samtools view -Sb {input.sam} | samtools sort -@ {threads} -o {output.bam}
71
+ samtools index {output.bam}
72
+ '''
55
73
56
74
# TALON
57
75
rule get_SJs_from_gtf :
@@ -260,3 +278,163 @@ rule talon_create_GTF:
260
278
--o {params.outdir}
261
279
'''
262
280
281
+ #FLAIR
282
+ # rule flair_bam_to_bed12:
283
+ # input:
284
+ # bam = rules.sam_to_bam.output.bam
285
+ # params:
286
+ # outdir = lambda wildcards: OUTDIR / "FLAIR" / "BED12" / wildcards.sample
287
+ # output:
288
+ # bed12 = OUTDIR / "FLAIR" / "BED12" / "{sample}.bed"
289
+ # threads: 1
290
+ # conda:
291
+ # "envs/flair_conda_env.yaml"
292
+ # shell:
293
+ # '''
294
+ # scripts/bam2Bed12.py --input_bam {input.bam} > {output.bed12}
295
+ # '''
296
+
297
+
298
+ rule flair_align :
299
+ '''
300
+ Aligns samples against reference genome and smooths gaps in
301
+ the alignment.
302
+ '''
303
+ input :
304
+ genome = genome_fasta ,
305
+ fq = READSDIR / "{sample}.fastq"
306
+ params :
307
+ outdir = lambda wildcards : OUTDIR / "FLAIR" / "BED12" / wildcards .sample
308
+ output :
309
+ bed = OUTDIR / "FLAIR" / "BED12" / "{sample}.bed"
310
+ threads : 10
311
+ singularity :
312
+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
313
+ shell :
314
+ '''
315
+ flair.py align \
316
+ --genome {input.genome}\
317
+ --reads {input.fq}\
318
+ --threads {threads}\
319
+ --nvrna \
320
+ --version1.3 \
321
+ --output {params.outdir}
322
+ '''
323
+
324
+ rule flair_correct :
325
+ '''
326
+ Corrects misaligned splice sites using genome annotations
327
+ and/or short-read splice junctions.
328
+ '''
329
+ input :
330
+ genome = genome_fasta ,
331
+ annotation = existing_annotation ,
332
+ bed = rules .flair_align .output .bed
333
+ params :
334
+ outdir = lambda wildcards : OUTDIR / "FLAIR" / "corrected" / wildcards .sample / wildcards .sample ,
335
+ window = config ["flair_correct_window" ]
336
+ output :
337
+ bed_corrected = OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed"
338
+ threads : 10
339
+ singularity :
340
+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
341
+ shell :
342
+ '''
343
+ flair.py correct \
344
+ --genome {input.genome} \
345
+ --query {input.bed} \
346
+ --gtf {input.annotation} \
347
+ --nvrna \
348
+ --threads {threads} \
349
+ --window {params.window} \
350
+ --output {params.outdir}
351
+ '''
352
+
353
+ rule flair_concatenate :
354
+ '''
355
+ Combines BED12 output into one file.
356
+ '''
357
+ input :
358
+ bed_corrected = expand (OUTDIR / "FLAIR" / "corrected" / "{sample}" / "{sample}_all_corrected.bed" , sample = ["." .join (read .name .split ('.' )[:- 1 ]) for read in READS ])
359
+ output :
360
+ bed_concatenated = OUTDIR / "FLAIR" / "concatenated_all_corrected.bed"
361
+ threads : 10
362
+ singularity :
363
+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
364
+ shell :
365
+ '''
366
+ cat {input.bed_corrected} >> {output.bed_concatenated}
367
+ '''
368
+
369
+ rule flair_collapse :
370
+ '''
371
+ Defines high-confidence isoforms from corrected reads.
372
+ '''
373
+ input :
374
+ bed_concatenated = rules .flair_concatenate .output .bed_concatenated ,
375
+ genome = genome_fasta ,
376
+ annotation = existing_annotation ,
377
+ params :
378
+ reads = READS ,
379
+ temp_dir = OUTDIR / "FLAIR" / "COLLAPSE" / "collapse_logs" ,
380
+ outdir = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse" ,
381
+ quality = config ["flair_collapse_quality" ]
382
+ output :
383
+ fa = OUTDIR / "FLAIR" / "COLLAPSE" / "flair.collapse.isoforms.fa"
384
+ threads : 10
385
+ singularity :
386
+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
387
+ shell :
388
+ '''
389
+ flair.py collapse \
390
+ --genome {input.genome} \
391
+ --gtf {input.annotation} \
392
+ --reads {params.reads} \
393
+ --query {input.bed_concatenated} \
394
+ --temp_dir {params.temp_dir} \
395
+ --generate_map \
396
+ --threads {threads} \
397
+ --quality {params.quality} \
398
+ --output {params.outdir}
399
+ '''
400
+
401
+ rule flair_config :
402
+ '''
403
+ Creates read manifest.
404
+ '''
405
+ input :
406
+ reads = READS
407
+ params :
408
+ datasetnames = ["." .join (read .name .split ('.' )[:- 1 ]) for read in READS ]
409
+ output :
410
+ config = OUTDIR / "FLAIR" / "manifest.tsv"
411
+ threads : 1
412
+ run :
413
+ for read , name in zip (input .reads , params .datasetnames ):
414
+ with open (output .config , 'a+' ) as config :
415
+ config .write ("%s\t condition\t batch\t %s\n " % (name , read ))
416
+
417
+ rule flair_quantify :
418
+ '''
419
+ Quantify FLAIR isoform usage across samples using minimap2.
420
+ '''
421
+ input :
422
+ manifest = rules .flair_config .output .config ,
423
+ coll_fasta = rules .flair_collapse .output .fa
424
+ params :
425
+ quality = config ["flair_abundance_quality" ]
426
+ output :
427
+ abundance = OUTDIR / "FLAIR" / "quantify" / "flair_counts_matrix.tsv"
428
+ threads : 10
429
+ singularity :
430
+ 'docker://quay.io/biocontainers/flair:1.5--hdfd78af_4'
431
+ shell :
432
+ '''
433
+ flair.py quantify \
434
+ --reads_manifest {input.manifest} \
435
+ --isoforms {input.coll_fasta} \
436
+ --threads {threads} \
437
+ --tpm \
438
+ --quality {params.quality}
439
+ --output {output.abundance}
440
+ '''
0 commit comments