Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions assets/merged_library_deseq2_clustering_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#id: 'mlib_deseq2_clustering'
#section_name: 'MERGED LIB: DESeq2 sample similarity'
#description: "Matrix is generated from clustering with Euclidean distances between
# <a href='https://bioconductor.org/packages/release/bioc/html/DESeq2.html' target='_blank'>DESeq2</a>
# rlog values for each sample
# in the <a href='https://github.com/nf-core/atacseq/blob/master/bin/deseq2_qc.r'><code>deseq2_qc.r</code></a> script."
#plot_type: 'heatmap'
#anchor: 'mlib_deseq2_clustering'
#pconfig:
# title: 'DESeq2: Heatmap of the sample-to-sample distances'
# xlab: True
# reverseColors: True
11 changes: 11 additions & 0 deletions assets/merged_library_deseq2_pca_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#id: 'mlib_deseq2_pca'
#section_name: 'MERGED LIB: DESeq2 PCA plot'
#description: "PCA plot of the samples in the experiment.
# These values are calculated using <a href='https://bioconductor.org/packages/release/bioc/html/DESeq2.html'>DESeq2</a>
# in the <a href='https://github.com/nf-core/atacseq/blob/master/bin/deseq2_qc.r'><code>deseq2_qc.r</code></a> script."
#plot_type: 'scatter'
#anchor: 'mlib_deseq2_pca'
#pconfig:
# title: 'DESeq2: Principal component plot'
# xlab: PC1
# ylab: PC2
12 changes: 12 additions & 0 deletions assets/merged_replicate_deseq2_clustering_header.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#id: 'mrep_deseq2_clustering'
#section_name: 'MERGED REP: DESeq2 sample similarity'
#description: "Matrix is generated from clustering with Euclidean distances between
# <a href='https://bioconductor.org/packages/release/bioc/html/DESeq2.html' target='_blank'>DESeq2</a>
# rlog values for each sample
# in the <a href='https://github.com/nf-core/atacseq/blob/master/bin/deseq2_qc.r'><code>deseq2_qc.r</code></a> script."
#plot_type: 'heatmap'
#anchor: 'mrep_deseq2_clustering'
#pconfig:
# title: 'DESeq2: Heatmap of the sample-to-sample distances'
# xlab: True
# reverseColors: True
193 changes: 185 additions & 8 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,193 @@ module_order:
- "*.final.out"
- custom_content

# Other MultiQC config stuff here
custom_data:
mapping:
parent_id: mapping
parent_name: "Mapping"
file_format: "tsv"
section_name: "Mapping"
description: "The mapping metrics for each experiment"
plot_type: "bargraph"
dedup_reads:
parent_id: dedup
parent_name: "Deduplication"
file_format: "tsv"
section_name: "Reads"
description: "The number of reads before and after PCR deduplication for each experiment"
plot_type: "bargraph"
pconfig:
ylab: "Count"
#stacking: False
cpswitch: False
tt_percentages: False
dedup_ratio:
parent_id: dedup
parent_name: "Deduplication"
file_format: "tsv"
section_name: "Ratio"
description: "The PCR deduplication ratio for each experiment"
plot_type: "bargraph"
pconfig:
ylab: "Ratio"
#stacking: False
cpswitch: False
tt_percentages: False
dedup_mean_umis:
parent_id: dedup
parent_name: "Deduplication"
file_format: "tsv"
section_name: "Mean UMIs"
description: "Mean number of unique UMIs per position for each experiment"
plot_type: "bargraph"
pconfig:
ylab: "Mean number"
#stacking: False
cpswitch: False
tt_percentages: False
crosslinks_counts:
parent_id: crosslinks
parent_name: "Crosslinks"
file_format: "tsv"
section_name: "Counts"
description: "The number of crosslinks or crosslink sites for each experiment"
plot_type: "bargraph"
pconfig:
ylab: "Count"
#stacking: False
cpswitch: False
tt_percentages: False
crosslinks_ratio:
parent_id: crosslinks
parent_name: "Crosslinks"
file_format: "tsv"
section_name: "Ratios"
description: "The ratio of number of cDNA mapping to crosslink positions for each experiment"
#plot_type: 'bargraph'
pconfig:
ylab: "Count"
#stacking: False
cpswitch: False
tt_percentages: False
tt_decimals: 2
peaks_counts:
parent_id: peaks
parent_name: "Peaks"
file_format: "tsv"
section_name: "Counts"
description: "The total number of peaks called by each peak caller"
plot_type: "bargraph"
pconfig:
ylab: "Number of peaks"
#stacking: False
cpswitch: False
tt_percentages: False
xlinks_in_peaks:
parent_id: peaks
parent_name: "Peaks"
file_format: "tsv"
section_name: "Crosslinks positions in peaks"
description: "The total percentage of crosslinks within peaks for each peak caller"
#plot_type: 'bargraph'
pconfig:
ylab: "Percentage of crosslinks"
#stacking: False
cpswitch: False
tt_percentages: False
tt_decimals: 2
tt_suffix: "%"
xlinksites_in_peaks:
parent_id: peaks
parent_name: "Peaks"
file_format: "tsv"
section_name: "Crosslinks positions in peaks"
description: "The total percentage of crosslink sites within peaks for each peak caller"
#plot_type: 'bargraph'
pconfig:
ylab: "Percentage of crosslink sites"
#stacking: False
cpswitch: False
tt_percentages: False
tt_decimals: 2
tt_suffix: "%"
peaks_xlinksite_coverage:
parent_id: peaks
parent_name: "Peaks"
file_format: "tsv"
section_name: "Peak-crosslink coverage"
description: "The total percentage of nucleotides within peaks covered by a crosslink site"
plot_type: "bargraph"
pconfig:
ylab: "Percentage of nucleotides within peaks"
#stacking: False
cpswitch: False
tt_percentages: False
tt_decimals: 2
tt_suffix: "%"
summary_type:
parent_id: Summary
parent_name: "Summary"
file_format: "tsv"
section_name: "Percentage of cDNA premap"
description: "The total percentage of cDNA summary mapped"
#plot_type: 'bargraph'
pconfig:
ylab: "Type"
#stacking: False
cpswitch: False
tt_percentages: False
summary_subtype:
parent_id: Summary
parent_name: "Summary"
file_format: "tsv"
section_name: "Percentage of cDNA premap subtypes"
description: "The total percentage of cDNA subtypes mapped"
#plot_type: 'bargraph'
pconfig:
ylab: "Type"
#stacking: False
cpswitch: False
tt_percentages: False

sp:
mapping:
fn: "mapping.tsv"
dedup_reads:
fn: "dedup_reads.tsv"
dedup_ratio:
fn: "dedup_ratio.tsv"
dedup_mean_umis:
fn: "dedup_mean_umis.tsv"
crosslinks_counts:
fn: "xlinks_counts.tsv"
crosslinks_ratio:
fn: "xlinks_ratio.tsv"
peaks:
fn: "total_peaks.tsv"
xlinks_in_peaks:
fn: "xlinks_in_peaks.tsv"
xlinksites_in_peaks:
fn: "xlinksites_in_peaks.tsv"
peaks_xlinksite_coverage:
fn: "peaks_xlinksite_coverage.tsv"
summary_type:
fn: "summary_type_metrics.tsv"
summary_subtype:
fn: '"summary_subtype_metrics.tsv'

custom_content:
order:
- clipqc
- software-versions-by-process
- software-versions-unique

# Customise the module search patterns to speed up execution time
sp:
samtools/stats:
fn: "*.stats"
samtools/flagstat:
fn: "*.flagstat"
samtools/idxstats:
fn: "*.idxstats*"
# sp:
# samtools/stats:
# fn: "*.stats"
# samtools/flagstat:
# fn: "*.flagstat"
# samtools/idxstats:
# fn: "*.idxstats*"
# clipqc:
# fn: "*.txt"
18 changes: 10 additions & 8 deletions conf/modules.config
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SimranChhabria could you please explain these changes to the modules.config? not understanding, cheers!

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@CharlotteAnne , it is the name change in the process to look for the correct one

Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ process {
========================================================================================
*/


if(params.run_genome_prep) {
process {
withName: '.*PREPARE_GENOME:GUNZIP_.*' {
Expand Down Expand Up @@ -71,7 +72,6 @@ if(params.run_genome_prep) {
path: { "${params.outdir}/00_genome" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.save_reference
]
}

Expand Down Expand Up @@ -185,6 +185,7 @@ if(params.run_genome_prep) {
}
}


/*
========================================================================================
PRE-PROCESSING
Expand Down Expand Up @@ -756,6 +757,7 @@ if(params.run_crosslinking) {
if(params.run_peakcalling && params.consensus_peak){
process {
withName: 'NFCORE_CLIPSEQ:CLIPSEQ:.*CONSENSUS_PEAK_TABLE:CONSENSUS_MAP' {
ext.prefix = { "${meta.id}_consensus_sorted" }
publishDir = [
path: { "${params.outdir}/05_peakcalling/consensus_peak_tables" },
mode: "${params.publish_dir_mode}",
Expand Down Expand Up @@ -1004,13 +1006,13 @@ if(params.run_reporting) {
]
}

// withName: 'CLIPSEQ:CLIPSEQ_CLIPQC' {
// publishDir = [
// path: { "${params.outdir}/06_reports/clipqc" },
// mode: "${params.publish_dir_mode}",
// saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
// ]
// }
withName: 'NFCORE_CLIPSEQ:CLIPSEQ:CLIPQC' {
publishDir = [
path: { "${params.outdir}/06_reports/clipqc" },
mode: "${params.publish_dir_mode}",
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'NFCORE_CLIPSEQ:CLIPSEQ:MULTIQC' {
ext.args = params.multiqc_title ? "-v --title \"$params.multiqc_title\"" : '-v'
Expand Down
38 changes: 32 additions & 6 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '8.GB'
max_time = '6.h'
max_cpus = 8
max_memory = '60.GB'
max_time = '24.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/clipseq/refs/heads/feat-2-0/tests/test_new_samplesheet_FASTQ.csv'
source = "fastq"
// Inputs for testing dataset with yeast genome
//input = 'https://raw.githubusercontent.com/nf-core/clipseq/refs/heads/feat-2-0/tests/test_new_samplesheet_FASTQ.csv'

//input = '../tests/test_new_samplesheet_FASTQ.csv'
//source = "fastq"

// Genome references
fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV.fa.gz"
Expand All @@ -44,6 +46,20 @@ params {
seg_resolved_gtf_genic = "https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_seg_genicOthertrue.resolved.gtf"
regions_resolved_gtf_genic = "https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/yeast_MitoV_filtered_regions_genicOthertrue.resolved.gtf"


// Input data for full human testing
input = "./tests/test_new_samplesheet_FASTQ_human.csv"
fasta = "/data1/morrisq/chhabrs1/variant_calling/genome/GATK_GRCh38/Homo_sapiens.GRCh38.dna_sm.primary_assembly.fa.gz"
gtf = "/data1/morrisq/chhabrs1/variant_calling/genome/GATK_GRCh38/Homo_sapiens.GRCh38.109.gtf.gz"
ncrna_fasta = "https://raw.githubusercontent.com/nf-core/test-datasets/clipseq/v_2_0/genome/homosapiens_smallRNA.fa.gz"
source = "fastq"

// Genome references from s3 bucket
//fasta = 's3://nf-core-awsmegatests/clipseq/input_data/reference/GRCh38.primary_assembly.genome.fa.gz'
//gtf = 's3://nf-core-awsmegatests/clipseq/input_data/reference/gencode.v37.primary_assembly.annotation.gtf.gz'



// Logic
debug = true
save_reference = true
Expand All @@ -54,6 +70,16 @@ params {
save_align_intermed = true
skip_transcriptome = true

// Inputs for deseq2_qc
skip_deseq2_qc = false


// Pipeline params
umitools_bc_pattern = 'NNNNNNNNN'

// Don't call consensus
//consensus_peak = false



}
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
"installed_by": ["modules"]
},
"deseq2/differential": {
"branch": "master",
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
"installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
"git_sha": "81880787133db07d9b4c1febd152c090eb8325dc",
Expand Down
4 changes: 4 additions & 0 deletions modules/local/clipqc/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ process CLIPQC {
path("icount/*")
path("paraclu/*")
path("clippy/*")
path("pureclip/*")
path("summary_type/*")
path("summary_subtype/*")
path("summary_gene/*")

output:
path "*.tsv" , emit: tsv
Expand Down
Loading
Loading