Skip to content

Commit

Permalink
Merge pull request #35 from MoTrPAC/fix/rnaseq_qc_script_vial_label_p…
Browse files Browse the repository at this point in the history
…arsing

#34 Fixed the sample_name parsing for the undetermined file
archanaraja authored Apr 3, 2020
2 parents 98ad27e + aeff8b1 commit 6953106
Showing 2 changed files with 22 additions and 1 deletion.
5 changes: 4 additions & 1 deletion collect_qc_metrics/rnaseq_qc.py
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@
import argparse
import pandas as pd
import os
import re
from functools import reduce
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
@@ -62,7 +63,9 @@

#get mean raw read count
reads_raw=(df_raw["FastQC_mqc-generalstats-fastqc-total_sequences"][1]+df_raw["FastQC_mqc-generalstats-fastqc-total_sequences"][2])/2
sample_name=df_raw["Sample"][0].split("_")[0]

# Below expression extracts the sample_name by splitting the first value in the Sample column by _R1.fastq.gz or _R2.fastq.gz
sample_name=re.split('_R[1,2]',(df_raw["Sample"][0]))[0]

#get read counts after trimming (reads)
reads_trim=(df_raw["FastQC_mqc-generalstats-fastqc-total_sequences"][3]+df_raw["FastQC_mqc-generalstats-fastqc-total_sequences"][4])/2
18 changes: 18 additions & 0 deletions examples/input_json/tasks/collect_qc_metrics/collect_qc_input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"rnaseqQC_report.rnaseqQC.phix_report": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-bowtie2_phix/shard-80/Undetermined_L001_phix_report.txt",
"rnaseqQC_report.disk_space": "10",
"rnaseqQC_report.rnaseqQC.script": "gs://rna-seq_araja/test/rnaseq_qc.py",
"rnaseqQC_report.rnaseqQC.rRNA_report": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-bowtie2_rrna/shard-80/Undetermined_L001_rn_rRNA_report.txt",
"rnaseqQC_report.num_threads": "1",
"rnaseqQC_report.rnaseqQC.SID": "Undetermined_L001",
"rnaseqQC_report.memory": "10",
"rnaseqQC_report.rnaseqQC.trim_summary": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-cutadapt/shard-80/fastq_trim/Undetermined_L001_summary.txt",
"rnaseqQC_report.rnaseqQC.mapped_report": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-sm/shard-80/Undetermined_L001_mapped_report.txt",
"rnaseqQC_report.rnaseqQC.globin_report": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-bowtie2_globin/shard-80/Undetermined_L001_rn_globin_report.txt",
"rnaseqQC_report.num_preempt": "0",
"rnaseqQC_report.rnaseqQC.multiQCReports": ["gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-mqc/shard-80/multiqc_prealign_report.tar.gz","gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-mqc_pa/shard-80/multiqc_postalign_report.tar.gz"],
"rnaseqQC_report.rnaseqQC.star_log": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-star_align/shard-80/star_out/Undetermined_L001.Log.final.out",
"rnaseqQC_report.rnaseqQC.docker": "gcr.io/motrpac-portal/motrpac_rnaseq:v0.1_04_20_19",
"rnaseqQC_report.rnaseqQC.umi_report": "gs://rna-seq_araja/PASS1A/Stanford/batch5_20200318/rnaseq_pipeline/13830c5a-e4cd-4dc6-bf0b-bd5258fe7386/call-udup/shard-80/Undetermined_L001_umi_report.txt"
}

0 comments on commit 6953106

Please sign in to comment.