diff --git a/AGENTS.md b/AGENTS.md index 8fd4f6f..5a85b3e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -68,7 +68,6 @@ src/ featurecounts/ mod.rs — Re-exports output output.rs — featureCounts-format output & biotype counting - genebody.rs — Gene body coverage profiling, Qualimap-compatible output preseq.rs — preseq lc_extrap library complexity extrapolation rseqc/ mod.rs — Re-exports all RSeQC modules + common helpers @@ -262,7 +261,7 @@ forwarded to `count_reads()` as the `skip_dup_check: bool` parameter). `infer_experiment:`, `read_duplication:`, `read_distribution:`, `junction_annotation:`, `junction_saturation:`, `inner_distance:`). Each has an `enabled: bool` toggle and tool-specific parameter overrides. CLI flags take precedence over config values. -- The YAML config also has sections for `tin:`, `preseq:`, `genebody_coverage:`, +- The YAML config also has sections for `tin:`, `preseq:`, `flagstat:`, `idxstats:`, and `samtools_stats:`. Each has an `enabled: bool` toggle. Preseq has additional parameters: `max_extrap`, `step_size`, `n_bootstraps`, `confidence_level`, `seed`, `max_terms`, `defects`. TIN has `sample_size` and @@ -270,10 +269,6 @@ forwarded to `count_reads()` as the `skip_dup_check: bool` parameter). - The `featurecounts.rs` module produces featureCounts-compatible output files (counts TSV, summary, biotype counts, and MultiQC files). These are generated in the same pass as the dupRadar analysis — no separate featureCounts run is needed. -- The genebody module (`genebody.rs`) produces Qualimap-compatible output files - (`coverage_profile_along_genes_(total).txt` and `rnaseq_qc_results.txt`). These - files are written to a `qualimap/` subdirectory and are parseable by MultiQC as - Qualimap rnaseq reports. Gene body coverage only runs in GTF mode. - The GTF parser (`gtf.rs`) accepts extra attribute names to extract (e.g., `gene_biotype`, `gene_type`) and stores them in `Gene.attributes`. The biotype attribute name is configurable via `--biotype-attribute` CLI flag or `featurecounts.biotype_attribute` diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 84b349e..37a7f06 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -74,7 +74,6 @@ src/ featurecounts/ mod.rs Re-exports output output.rs featureCounts-format output and biotype counting - genebody.rs Gene body coverage profiling, Qualimap-compatible output preseq.rs preseq lc_extrap library complexity extrapolation rseqc/ mod.rs Re-exports all RSeQC modules + common helpers diff --git a/docs/src/content/docs/usage/configuration.md b/docs/src/content/docs/usage/configuration.md index 7b2d9a7..26a1e16 100644 --- a/docs/src/content/docs/usage/configuration.md +++ b/docs/src/content/docs/usage/configuration.md @@ -83,10 +83,6 @@ tin: sample_size: 100 min_coverage: 10 -# Gene body coverage / Qualimap -genebody_coverage: - enabled: true - # Library complexity (preseq lc_extrap) preseq: enabled: true @@ -314,17 +310,6 @@ read origin classification (exonic/intronic/intergenic), strand-specificity estimation, and splice junction motif counting. Produces Qualimap-compatible output files parseable by MultiQC. -### genebody_coverage - -```yaml -genebody_coverage: - enabled: true # Legacy toggle for the older gene body coverage module -``` - -Requires annotation (`--gtf` only). This is a legacy config key for the -original gene body coverage implementation. The `qualimap:` section above -controls the current Qualimap-compatible gene body coverage analysis. - ### preseq ```yaml diff --git a/src/config.rs b/src/config.rs index 15934fb..f75a70e 100644 --- a/src/config.rs +++ b/src/config.rs @@ -101,10 +101,6 @@ pub struct Config { #[serde(default)] pub tin: TinConfig, - /// Gene body coverage + Qualimap-compatible output configuration. - #[serde(default)] - pub genebody_coverage: GenebodyCoverageConfig, - /// samtools stats-compatible output configuration (SN section). #[serde(default)] pub samtools_stats: SamtoolsStatsConfig, @@ -516,23 +512,6 @@ impl Default for TinConfig { } } -/// Configuration for gene body coverage and Qualimap-compatible output. -/// -/// When enabled, produces a coverage profile along gene bodies and a -/// Qualimap-compatible `rnaseq_qc_results.txt` file parseable by MultiQC. -#[derive(Debug, Deserialize)] -#[serde(default)] -pub struct GenebodyCoverageConfig { - /// Whether to produce gene body coverage output. Defaults to true. - pub enabled: bool, -} - -impl Default for GenebodyCoverageConfig { - fn default() -> Self { - Self { enabled: true } - } -} - /// Configuration for Qualimap RNA-Seq QC. /// /// When enabled, produces Qualimap-compatible output files including diff --git a/src/main.rs b/src/main.rs index 9d591e9..bdaf931 100644 --- a/src/main.rs +++ b/src/main.rs @@ -645,13 +645,6 @@ fn process_single_bam( || rseqc_config.inner_distance_enabled || rseqc_config.preseq_enabled; - // === Build gene body coverage position map (if enabled) === - let genebody_position_map = if params.config.genebody_coverage.enabled { - genes.map(rna::genebody::TranscriptPositionMap::from_genes) - } else { - None - }; - // === Build Qualimap exon index (if enabled, GTF-only) === let qualimap_index = if params.config.qualimap.enabled { genes.map(rna::qualimap::QualimapIndex::from_genes) @@ -686,7 +679,6 @@ fn process_single_bam( } else { None }, - genebody_position_map.as_ref(), qualimap_index.as_ref(), )?; info!( diff --git a/src/rna/dupradar/counting.rs b/src/rna/dupradar/counting.rs index 792c122..55cc153 100644 --- a/src/rna/dupradar/counting.rs +++ b/src/rna/dupradar/counting.rs @@ -10,7 +10,6 @@ //! This implements a simplified featureCounts-compatible counting strategy. use crate::gtf::Gene; -use crate::rna::genebody::GenebodyCoverageAccum; use crate::rna::qualimap::QualimapAccum; use crate::rna::rseqc::accumulators::{RseqcAccumulators, RseqcAnnotations, RseqcConfig}; use anyhow::{Context, Result}; @@ -342,9 +341,6 @@ pub struct CountResult { // --- RSeQC results (collected during the same BAM pass) --- /// Accumulated RSeQC tool results, if RSeQC tools were enabled pub rseqc: Option, - /// Gene body coverage results (Qualimap-compatible). - #[allow(dead_code)] - pub genebody: Option, /// Qualimap RNA-Seq QC results (if enabled). #[allow(dead_code)] pub qualimap: Option, @@ -572,8 +568,6 @@ struct ChromResult { fc_no_features: u64, fc_multimapping: u64, fc_unmapped: u64, - /// Gene body coverage accumulator (if enabled). - genebody: Option, /// Qualimap RNA-Seq QC accumulator (if enabled). qualimap: Option, } @@ -601,7 +595,6 @@ impl ChromResult { fc_no_features: 0, fc_multimapping: 0, fc_unmapped: 0, - genebody: None, qualimap: None, } } @@ -634,14 +627,6 @@ impl ChromResult { self.fc_no_features += other.fc_no_features; self.fc_multimapping += other.fc_multimapping; self.fc_unmapped += other.fc_unmapped; - // Merge gene body coverage - if let Some(other_gb) = other.genebody { - if let Some(ref mut self_gb) = self.genebody { - self_gb.merge(&other_gb); - } else { - self.genebody = Some(other_gb); - } - } // Merge Qualimap accumulator if let Some(other_qm) = other.qualimap { if let Some(ref mut self_qm) = self.qualimap { @@ -699,13 +684,9 @@ fn process_chromosome_batch( rseqc_config: Option<&RseqcConfig>, rseqc_annotations: Option<&RseqcAnnotations>, htslib_threads: usize, - genebody_position_map: Option<&crate::rna::genebody::TranscriptPositionMap>, qualimap_index: Option<&crate::rna::qualimap::QualimapIndex>, ) -> Result<(ChromResult, Option)> { let mut result = ChromResult::new(num_genes); - if genebody_position_map.is_some() { - result.genebody = Some(crate::rna::genebody::GenebodyCoverageAccum::new()); - } if qualimap_index.is_some() { result.qualimap = Some(crate::rna::qualimap::QualimapAccum::new(stranded)); } @@ -726,10 +707,8 @@ fn process_chromosome_batch( }) .collect(); let tid_to_rseqc_chrom = &tid_to_gtf_chrom; - let tid_to_rseqc_chrom_upper: Vec = tid_to_gtf_chrom - .iter() - .map(|s| s.to_uppercase()) - .collect(); + let tid_to_rseqc_chrom_upper: Vec = + tid_to_gtf_chrom.iter().map(|s| s.to_uppercase()).collect(); // Open an indexed reader for this thread (supports BAM with .bai/.csi and CRAM with .crai) let mut bam = bam::IndexedReader::from_path(bam_path) @@ -887,14 +866,8 @@ fn process_chromosome_batch( if gene_hits.is_empty() { result.stat_no_features += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_no_feature(&aligned_blocks_buf, 1); - } } else if gene_hits.len() > 1 { result.stat_ambiguous += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_ambiguous(1); - } } else if assign_fragment_to_gene( &gene_hits, &mut result.gene_counts, @@ -902,11 +875,6 @@ fn process_chromosome_batch( is_multi, ) { result.stat_assigned += 1; - if let (Some(ref mut gb), Some(pos_map)) = - (&mut result.genebody, genebody_position_map) - { - gb.record_coverage(gene_hits[0] as usize, &aligned_blocks_buf, pos_map, 1); - } } continue; } @@ -971,14 +939,8 @@ fn process_chromosome_batch( if combined_genes.is_empty() { result.stat_no_features += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_no_feature(&aligned_blocks_buf, 2); - } } else if combined_genes.len() > 1 { result.stat_ambiguous += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_ambiguous(2); - } } else if assign_fragment_to_gene( &combined_genes, &mut result.gene_counts, @@ -986,16 +948,6 @@ fn process_chromosome_batch( frag_is_multi, ) { result.stat_assigned += 1; - if let (Some(ref mut gb), Some(pos_map)) = - (&mut result.genebody, genebody_position_map) - { - gb.record_coverage( - combined_genes[0] as usize, - &aligned_blocks_buf, - pos_map, - 2, - ); - } } } else { // Take ownership of gene_hits instead of cloning (it's cleared at loop start) @@ -1016,25 +968,6 @@ fn process_chromosome_batch( Ok((result, rseqc_accums)) } -/// Count reads from an alignment file (SAM/BAM/CRAM) and assign them to genes. -/// -/// Performs four simultaneous counting modes matching dupRadar's approach: -/// - With/without multimappers -/// - With/without PCR duplicates -/// -/// When `threads > 1`, processing is parallelized by chromosome: the alignment -/// index is used to divide chromosomes among threads, each opening its own reader -/// and processing independently. Per-chromosome results are merged, and any -/// unmatched paired-end mates (from cross-chromosome pairs) are reconciled in a -/// final pass. -/// -/// For paired-end data, this function buffers mates by a composite key matching -/// featureCounts' `SAM_pairer_get_read_full_name()` (read name, R1/R2 refIDs, -/// R1/R2 positions, HI tag). Gene assignment uses featureCounts' scoring strategy: -/// genes overlapped by both mates score higher than genes overlapped by only one. -/// -/// # Arguments -/// * `bam_path` - Path to the duplicate-marked alignment file (BAM/CRAM must have /// Partition chromosome indices across workers using greedy bin-packing /// (largest-first scheduling). Assigns each chromosome to the worker with the /// smallest current total length, producing a more balanced distribution than @@ -1082,7 +1015,6 @@ pub fn count_reads( skip_dup_check: bool, rseqc_config: Option<&RseqcConfig>, rseqc_annotations: Option<&RseqcAnnotations>, - genebody_position_map: Option<&crate::rna::genebody::TranscriptPositionMap>, qualimap_index: Option<&crate::rna::qualimap::QualimapIndex>, ) -> Result { // Build gene ID interner for allocation-free lookups in the hot loop @@ -1160,7 +1092,10 @@ pub fn count_reads( .iter() .map(|b| b.iter().map(|&tid| tid_to_len[tid as usize]).sum()) .collect(); - debug!("Chromosome load distribution across {} workers: {:?}", num_workers, worker_loads); + debug!( + "Chromosome load distribution across {} workers: {:?}", + num_workers, worker_loads + ); } // Configure rayon thread pool @@ -1204,7 +1139,6 @@ pub fn count_reads( rseqc_config, rseqc_annotations, htslib_threads, - genebody_position_map, qualimap_index, ) }) @@ -1231,9 +1165,6 @@ pub fn count_reads( // This avoids the need for an index file. let global_read_counter = AtomicU64::new(0); let mut result = ChromResult::new(interner.len()); - if genebody_position_map.is_some() { - result.genebody = Some(crate::rna::genebody::GenebodyCoverageAccum::new()); - } let mut rseqc_accums: Option = rseqc_config.map(|cfg| RseqcAccumulators::new(cfg, rseqc_annotations)); let mut qualimap_accum: Option = @@ -1392,14 +1323,8 @@ pub fn count_reads( if gene_hits.is_empty() { result.stat_no_features += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_no_feature(&aligned_blocks_buf, 1); - } } else if gene_hits.len() > 1 { result.stat_ambiguous += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_ambiguous(1); - } } else if assign_fragment_to_gene( &gene_hits, &mut result.gene_counts, @@ -1407,11 +1332,6 @@ pub fn count_reads( is_multi, ) { result.stat_assigned += 1; - if let (Some(ref mut gb), Some(pos_map)) = - (&mut result.genebody, genebody_position_map) - { - gb.record_coverage(gene_hits[0] as usize, &aligned_blocks_buf, pos_map, 1); - } } continue; } @@ -1486,14 +1406,8 @@ pub fn count_reads( if combined_genes.is_empty() { result.stat_no_features += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_no_feature(&aligned_blocks_buf, 2); - } } else if combined_genes.len() > 1 { result.stat_ambiguous += 1; - if let Some(ref mut gb) = result.genebody { - gb.record_ambiguous(2); - } } else if assign_fragment_to_gene( &combined_genes, &mut result.gene_counts, @@ -1501,16 +1415,6 @@ pub fn count_reads( frag_is_multi, ) { result.stat_assigned += 1; - if let (Some(ref mut gb), Some(pos_map)) = - (&mut result.genebody, genebody_position_map) - { - gb.record_coverage( - combined_genes[0] as usize, - &aligned_blocks_buf, - pos_map, - 2, - ); - } } } else { // Take ownership of gene_hits instead of cloning (it's cleared at loop start) @@ -1705,13 +1609,10 @@ pub fn count_reads( fc_multimapping: merged.fc_multimapping, fc_unmapped: merged.fc_unmapped, rseqc: merged_rseqc, - genebody: merged - .genebody - .map(|a: GenebodyCoverageAccum| a.into_result()), qualimap: match (merged.qualimap, qualimap_index) { (Some(mut a), Some(idx)) => { a.flush_unpaired(idx); - Some(a.into_result(idx)) + Some(a.into_result()) } _ => None, }, @@ -2002,7 +1903,9 @@ mod tests { #[test] fn test_partition_chromosomes_balanced() { // Simulate human-like chromosome sizes (large variation) - let lengths = vec![250, 243, 198, 191, 182, 171, 159, 146, 138, 133, 135, 130, 57]; + let lengths = vec![ + 250, 243, 198, 191, 182, 171, 159, 146, 138, 133, 135, 130, 57, + ]; let batches = partition_chromosomes(&lengths, 4); assert_eq!(batches.len(), 4); diff --git a/src/rna/featurecounts/output.rs b/src/rna/featurecounts/output.rs index eda1ec3..04cdaef 100644 --- a/src/rna/featurecounts/output.rs +++ b/src/rna/featurecounts/output.rs @@ -330,7 +330,6 @@ mod tests { fc_multimapping: 0, fc_unmapped: 0, rseqc: None, - genebody: None, qualimap: None, }; diff --git a/src/rna/genebody.rs b/src/rna/genebody.rs deleted file mode 100644 index 19f0378..0000000 --- a/src/rna/genebody.rs +++ /dev/null @@ -1,410 +0,0 @@ -//! Gene body coverage profiling and Qualimap-compatible output. -//! -//! Computes coverage profiles along transcript bodies, mapping read positions -//! to 100 percentile bins (5'→3'). Produces Qualimap rnaseq-compatible output -//! for MultiQC parsing. - -use indexmap::IndexMap; - -use crate::gtf::Gene; - -/// Number of percentile bins for coverage profiling (0–99). -const NUM_BINS: usize = 100; - -// ============================================================ -// Gene exon offset map — precomputed for fast position mapping -// ============================================================ - -/// Per-gene exon structure for mapping genomic positions to transcript percentiles. -#[derive(Debug, Clone)] -pub struct GeneExonMap { - /// Sorted exon intervals in 0-based half-open coordinates `(start, end)`. - pub exons: Vec<(u64, u64)>, - /// Cumulative start offsets for each exon (transcript-relative). - /// `cumulative_starts[i]` = sum of lengths of exons 0..i. - pub cumulative_starts: Vec, - /// Total exonic length (sum of all exon lengths). - pub total_exonic_length: u64, - /// Strand ('+' or '-'). - pub strand: char, -} - -/// Maps gene indices to their exon position data for fast percentile bin lookup. -#[derive(Debug)] -pub struct TranscriptPositionMap { - /// Per-gene exon maps, indexed by gene position in the IndexMap. - genes: Vec>, -} - -impl TranscriptPositionMap { - /// Build the position map from GTF gene annotations. - /// - /// Uses the longest transcript per gene for exon structure. - /// Genes with no exons or zero exonic length are set to `None`. - pub fn from_genes(genes: &IndexMap) -> Self { - let mut maps = Vec::with_capacity(genes.len()); - - for gene in genes.values() { - // Use the longest transcript (by total exonic length) - let best_tx = gene.transcripts.iter().max_by_key(|tx| { - tx.exons - .iter() - .map(|(s, e)| e.saturating_sub(*s)) - .sum::() - }); - - let exon_map = best_tx.and_then(|tx| { - // Convert GTF 1-based inclusive to 0-based half-open - let mut exons: Vec<(u64, u64)> = tx - .exons - .iter() - .map(|(s, e)| (s.saturating_sub(1), *e)) - .collect(); - - // Sort by start position - exons.sort_unstable_by_key(|(s, _)| *s); - - let total_exonic_length: u64 = - exons.iter().map(|(s, e)| e.saturating_sub(*s)).sum(); - - if total_exonic_length == 0 || exons.is_empty() { - return None; - } - - let mut cumulative_starts = Vec::with_capacity(exons.len()); - let mut cum = 0u64; - for (s, e) in &exons { - cumulative_starts.push(cum); - cum += e - s; - } - - Some(GeneExonMap { - exons, - cumulative_starts, - total_exonic_length, - strand: gene.strand, - }) - }); - - maps.push(exon_map); - } - - Self { genes: maps } - } - - /// Get the exon map for a gene by index. - pub fn get(&self, gene_idx: usize) -> Option<&GeneExonMap> { - self.genes.get(gene_idx).and_then(|m| m.as_ref()) - } - -} - -// ============================================================ -// Coverage accumulator — accumulated during BAM pass -// ============================================================ - -/// Accumulator for gene body coverage profiling. -/// -/// Tracks coverage bins and genomic origin counts during BAM processing. -#[derive(Debug, Clone)] -pub struct GenebodyCoverageAccum { - /// Coverage profile bins (100 percentile bins, 5'→3'). - pub coverage_bins: [f64; NUM_BINS], - /// Number of reads/fragments assigned to genes. - pub aligned_to_genes: u64, - /// Number of ambiguous reads (overlapping multiple genes). - pub ambiguous: u64, - /// Number of reads with no gene feature. - pub no_feature: u64, - /// Exonic base count (reads overlapping exons). - pub exonic: u64, - /// Intronic base count (reads in introns of assigned genes). - pub intronic: u64, - /// Intergenic base count (reads not overlapping any gene). - pub intergenic: u64, - /// Reads overlapping multiple exons of the same gene. - pub overlapping_exon: u64, -} - -impl Default for GenebodyCoverageAccum { - fn default() -> Self { - Self { - coverage_bins: [0.0; NUM_BINS], - aligned_to_genes: 0, - ambiguous: 0, - no_feature: 0, - exonic: 0, - intronic: 0, - intergenic: 0, - overlapping_exon: 0, - } - } -} - -impl GenebodyCoverageAccum { - /// Create a new empty accumulator. - pub fn new() -> Self { - Self::default() - } - - /// Record a read's gene body coverage by mapping aligned blocks to percentile bins. - /// - /// # Arguments - /// * `gene_idx` - Index of the assigned gene - /// * `aligned_blocks` - Read's aligned genomic blocks (0-based half-open) - /// * `position_map` - Precomputed exon position map - /// * `read_count` - Number of reads in this fragment (1 for single-end, 2 for paired-end). - /// Qualimap counts reads, not fragments, so aligned_to_genes increments by read_count. - pub fn record_coverage( - &mut self, - gene_idx: usize, - aligned_blocks: &[(u64, u64)], - position_map: &TranscriptPositionMap, - read_count: u64, - ) { - let gene_map = match position_map.get(gene_idx) { - Some(m) => m, - None => return, - }; - - self.aligned_to_genes += read_count; - - // Track exon overlap count for overlapping_exon metric - let mut exons_hit = 0u32; - - // Map each aligned block to transcript-relative positions - for &(block_start, block_end) in aligned_blocks { - let mut exonic_bases: u64 = 0; - - for (i, &(exon_start, exon_end)) in gene_map.exons.iter().enumerate() { - // Find overlap between aligned block and this exon - let overlap_start = block_start.max(exon_start); - let overlap_end = block_end.min(exon_end); - - if overlap_start >= overlap_end { - continue; - } - - exonic_bases += overlap_end - overlap_start; - exons_hit += 1; - - // Map overlapping bases to transcript-relative positions - let tx_start = gene_map.cumulative_starts[i] + (overlap_start - exon_start); - let tx_end = gene_map.cumulative_starts[i] + (overlap_end - exon_start); - - // Convert to percentile bins using range-based approach - // instead of iterating per-base (O(bins_spanned) vs O(overlap_len)) - let total_len = gene_map.total_exonic_length as f64; - let (range_start, range_end) = if gene_map.strand == '-' { - // Reverse strand: flip to 5'→3' - let rs = gene_map.total_exonic_length - tx_end; - let re = gene_map.total_exonic_length - tx_start; - (rs, re) - } else { - (tx_start, tx_end) - }; - - // Compute start/end bins (clamped to valid range) - let start_bin = ((range_start as f64 / total_len) * NUM_BINS as f64) as usize; - let start_bin = start_bin.min(NUM_BINS - 1); - let end_bin = (((range_end - 1) as f64 / total_len) * NUM_BINS as f64) as usize; - let end_bin = end_bin.min(NUM_BINS - 1); - - if start_bin == end_bin { - // All bases fall in the same bin - self.coverage_bins[start_bin] += (range_end - range_start) as f64; - } else { - // Bases span multiple bins -- compute exact counts per bin - let bin_width = total_len / NUM_BINS as f64; - // First bin: bases from range_start to the end of start_bin - let first_bin_end = ((start_bin + 1) as f64 * bin_width).ceil() as u64; - self.coverage_bins[start_bin] += - (first_bin_end.min(range_end) - range_start) as f64; - // Middle bins: each gets a full bin_width of bases - for bin in (start_bin + 1)..end_bin { - let bin_start = (bin as f64 * bin_width).ceil() as u64; - let bin_end = ((bin + 1) as f64 * bin_width).ceil() as u64; - self.coverage_bins[bin] += - (bin_end.min(range_end) - bin_start.max(range_start)) as f64; - } - // Last bin: bases from the start of end_bin to range_end - let last_bin_start = (end_bin as f64 * bin_width).ceil() as u64; - self.coverage_bins[end_bin] += - (range_end - last_bin_start.max(range_start)) as f64; - } - } - - // Count exonic bases from actual overlap, remainder is intronic. - // Cap exonic_bases at block_len in case overlapping exons - // (from merged transcript models) double-count the same bases. - let block_len = block_end - block_start; - let exonic_capped = exonic_bases.min(block_len); - self.exonic += exonic_capped; - self.intronic += block_len - exonic_capped; - } - - if exons_hit > 1 { - self.overlapping_exon += 1; - } - } - - /// Record a read that was not assigned to any gene (intergenic). - /// - /// `read_count` is 1 for single-end, 2 for paired-end (Qualimap counts reads not fragments). - pub fn record_no_feature(&mut self, aligned_blocks: &[(u64, u64)], read_count: u64) { - self.no_feature += read_count; - for &(start, end) in aligned_blocks { - self.intergenic += end - start; - } - } - - /// Record an ambiguous read (overlapping multiple genes). - /// - /// `read_count` is 1 for single-end, 2 for paired-end (Qualimap counts reads not fragments). - pub fn record_ambiguous(&mut self, read_count: u64) { - self.ambiguous += read_count; - } - - /// Merge another accumulator into this one. - pub fn merge(&mut self, other: &GenebodyCoverageAccum) { - for i in 0..NUM_BINS { - self.coverage_bins[i] += other.coverage_bins[i]; - } - self.aligned_to_genes += other.aligned_to_genes; - self.ambiguous += other.ambiguous; - self.no_feature += other.no_feature; - self.exonic += other.exonic; - self.intronic += other.intronic; - self.intergenic += other.intergenic; - self.overlapping_exon += other.overlapping_exon; - } -} - -// ============================================================ -// Results and output -// ============================================================ - -/// Gene body coverage results. -#[derive(Debug)] -#[allow(dead_code)] -pub struct GenebodyCoverageResult { - /// Coverage profile bins (100 percentile bins). - pub coverage_bins: [f64; NUM_BINS], - /// Reads aligned to genes. - pub aligned_to_genes: u64, - /// Ambiguous reads. - pub ambiguous: u64, - /// Reads with no feature. - pub no_feature: u64, - /// Exonic bases. - pub exonic: u64, - /// Intronic bases. - pub intronic: u64, - /// Intergenic bases. - pub intergenic: u64, - /// Reads overlapping multiple exons. - pub overlapping_exon: u64, - /// 5' bias (mean of first 20 bins / overall mean). - pub bias_5prime: f64, - /// 3' bias (mean of last 20 bins / overall mean). - pub bias_3prime: f64, - /// 5'-3' bias (median of first 20 bins / median of last 20 bins). - pub bias_5_to_3: f64, -} - -impl GenebodyCoverageAccum { - /// Convert accumulator to final results with bias calculations. - pub fn into_result(self) -> GenebodyCoverageResult { - let overall_mean: f64 = self.coverage_bins.iter().sum::() / NUM_BINS as f64; - - // 5' bias: mean of bins 0-19 / overall mean - let first20_mean: f64 = self.coverage_bins[..20].iter().sum::() / 20.0; - let bias_5prime = if overall_mean > 0.0 { - first20_mean / overall_mean - } else { - 0.0 - }; - - // 3' bias: mean of bins 80-99 / overall mean - let last20_mean: f64 = self.coverage_bins[80..].iter().sum::() / 20.0; - let bias_3prime = if overall_mean > 0.0 { - last20_mean / overall_mean - } else { - 0.0 - }; - - // 5'-3' bias: median of first 20 bins / median of last 20 bins - let median_first20 = crate::io::median(&self.coverage_bins[..20]); - let median_last20 = crate::io::median(&self.coverage_bins[80..]); - let bias_5_to_3 = if median_last20 > 0.0 { - median_first20 / median_last20 - } else { - 0.0 - }; - - GenebodyCoverageResult { - coverage_bins: self.coverage_bins, - aligned_to_genes: self.aligned_to_genes, - ambiguous: self.ambiguous, - no_feature: self.no_feature, - exonic: self.exonic, - intronic: self.intronic, - intergenic: self.intergenic, - overlapping_exon: self.overlapping_exon, - bias_5prime, - bias_3prime, - bias_5_to_3, - } - } -} - -// ============================================================ -// Unit tests -// ============================================================ - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_median() { - assert!((crate::io::median(&[1.0, 2.0, 3.0]) - 2.0).abs() < 1e-10); - assert!((crate::io::median(&[1.0, 2.0, 3.0, 4.0]) - 2.5).abs() < 1e-10); - assert!((crate::io::median(&[]) - 0.0).abs() < 1e-10); - } - - #[test] - fn test_gene_body_coverage_merge() { - let mut a = GenebodyCoverageAccum::new(); - a.coverage_bins[0] = 10.0; - a.coverage_bins[50] = 20.0; - a.aligned_to_genes = 5; - a.exonic = 100; - - let mut b = GenebodyCoverageAccum::new(); - b.coverage_bins[0] = 5.0; - b.coverage_bins[50] = 15.0; - b.aligned_to_genes = 3; - b.intronic = 50; - - a.merge(&b); - assert!((a.coverage_bins[0] - 15.0).abs() < 1e-10); - assert!((a.coverage_bins[50] - 35.0).abs() < 1e-10); - assert_eq!(a.aligned_to_genes, 8); - assert_eq!(a.exonic, 100); - assert_eq!(a.intronic, 50); - } - - #[test] - fn test_bias_calculation() { - let mut accum = GenebodyCoverageAccum::new(); - // Uniform coverage - for bin in &mut accum.coverage_bins { - *bin = 100.0; - } - let result = accum.into_result(); - assert!((result.bias_5prime - 1.0).abs() < 1e-10); - assert!((result.bias_3prime - 1.0).abs() < 1e-10); - assert!((result.bias_5_to_3 - 1.0).abs() < 1e-10); - } -} diff --git a/src/rna/mod.rs b/src/rna/mod.rs index 394e609..207412f 100644 --- a/src/rna/mod.rs +++ b/src/rna/mod.rs @@ -6,7 +6,6 @@ pub mod bam_flags; pub mod dupradar; pub mod featurecounts; -pub mod genebody; pub mod preseq; pub mod qualimap; pub mod rseqc; diff --git a/src/rna/qualimap/accumulator.rs b/src/rna/qualimap/accumulator.rs index f0891d5..4aa4a08 100644 --- a/src/rna/qualimap/accumulator.rs +++ b/src/rna/qualimap/accumulator.rs @@ -13,7 +13,7 @@ use coitrees::IntervalTree; use rust_htslib::bam; use rust_htslib::bam::record::Cigar; -use super::coverage::{MergedGeneCoverage, TranscriptCoverage}; +use super::coverage::TranscriptCoverage; use super::index::QualimapIndex; // ============================================================ @@ -108,8 +108,6 @@ pub struct QualimapCounters { pub overlapping_exon: u64, /// Total reads counted (numReads: for PE, 2 per fragment). pub read_count: u64, - /// Total fragments counted (PE: 1 per pair, SE: 1 per read). - pub fragment_count: u64, /// Left mates in proper pairs (flag 0x2 + 0x40). pub left_proper_in_pair: u64, /// Right mates in proper pairs (flag 0x2 + 0x80). @@ -140,7 +138,6 @@ impl QualimapCounters { self.intergenic_reads += other.intergenic_reads; self.overlapping_exon += other.overlapping_exon; self.read_count += other.read_count; - self.fragment_count += other.fragment_count; self.left_proper_in_pair += other.left_proper_in_pair; self.right_proper_in_pair += other.right_proper_in_pair; self.both_proper_in_pair += other.both_proper_in_pair; @@ -190,8 +187,6 @@ pub struct QualimapAccum { pub counters: QualimapCounters, /// Per-transcript per-base coverage. pub coverage: TranscriptCoverage, - /// Per-gene merged-model coverage. - pub merged_gene_coverage: MergedGeneCoverage, /// Junction splice motif counts. pub junction_motifs: JunctionMotifCounts, /// Mate buffer for PE reconciliation. @@ -209,7 +204,6 @@ impl QualimapAccum { Self { counters: QualimapCounters::default(), coverage: TranscriptCoverage::new(), - merged_gene_coverage: MergedGeneCoverage::new(), junction_motifs: JunctionMotifCounts::default(), mate_buffer: HashMap::new(), stranded, @@ -400,9 +394,6 @@ impl QualimapAccum { .or_insert(0) += 1; } - // Fragment assignment - self.counters.fragment_count += 1; - // Check for overlapping_exon using cached hits from both mates. // At most one increment per fragment: check r2 only if r1 didn't trigger. if !check_has_overlap_not_enclosed(&r1.cached_hits) { @@ -462,8 +453,6 @@ impl QualimapAccum { .or_insert(0) += 1; } - self.counters.fragment_count += 1; - // Check for overlapping_exon using cached results. self.check_overlapping_exon_cached(&data.cached_hits); @@ -516,10 +505,6 @@ impl QualimapAccum { let transcripts = index.gene_transcripts(gidx); self.coverage .add_coverage(single_block, transcripts, tx_start); - if let Some(model) = index.merged_gene_models.get(gidx as usize) { - self.merged_gene_coverage - .add_coverage(gidx, single_block, model); - } } } } @@ -597,7 +582,6 @@ impl QualimapAccum { pub fn merge(&mut self, other: QualimapAccum) { self.counters.merge(&other.counters); self.coverage.merge(other.coverage); - self.merged_gene_coverage.merge(other.merged_gene_coverage); self.junction_motifs.merge(&other.junction_motifs); // Merge mate buffers (cross-chromosome mates). @@ -607,8 +591,6 @@ impl QualimapAccum { if let Some(existing) = self.mate_buffer.remove(&key) { // We don't have the chrom handy for intronic classification, // so treat no-feature conservatively as intergenic. - self.counters.fragment_count += 1; - let common_genes: HashSet = existing .enclosing_genes .intersection(&info.enclosing_genes) @@ -670,7 +652,7 @@ impl QualimapAccum { /// /// This should be called after all reads have been processed and /// `flush_unpaired()` has been called. - pub fn into_result(self, index: &QualimapIndex) -> super::QualimapResult { + pub fn into_result(self) -> super::QualimapResult { // Convert junction motif byte arrays to readable strings let mut junction_motifs_str = HashMap::new(); for ((donor, acceptor), count) in &self.junction_motifs.counts { @@ -681,16 +663,6 @@ impl QualimapAccum { *junction_motifs_str.entry(motif).or_insert(0u64) += count; } - // Build string-keyed coverage map from the raw coverage data, then - // take ownership of the raw coverage without cloning. - let mut transcript_coverage = HashMap::new(); - for (flat_idx, depth) in self.coverage.iter() { - if let Some(tx_info) = index.transcripts.get(flat_idx as usize) { - transcript_coverage.insert(tx_info.transcript_id.clone(), depth.to_vec()); - } - } - let transcript_coverage_raw = self.coverage; - super::QualimapResult { primary_alignments: self.counters.primary_alignments, secondary_alignments: self.counters.secondary_alignments, @@ -703,15 +675,12 @@ impl QualimapAccum { intergenic_reads: self.counters.intergenic_reads, overlapping_exon_reads: self.counters.overlapping_exon, read_count: self.counters.read_count, - fragment_count: self.counters.fragment_count, left_proper_in_pair: self.counters.left_proper_in_pair, right_proper_in_pair: self.counters.right_proper_in_pair, both_proper_in_pair: self.counters.both_proper_in_pair, reads_at_junctions: self.counters.reads_at_junctions, junction_motifs: junction_motifs_str, - transcript_coverage, - transcript_coverage_raw, - merged_gene_coverage: self.merged_gene_coverage, + transcript_coverage_raw: self.coverage, ssp_fwd: self.counters.ssp_fwd, ssp_rev: self.counters.ssp_rev, } diff --git a/src/rna/qualimap/coverage.rs b/src/rna/qualimap/coverage.rs index 9c1ae02..4cbee8f 100644 --- a/src/rna/qualimap/coverage.rs +++ b/src/rna/qualimap/coverage.rs @@ -147,11 +147,6 @@ impl TranscriptCoverage { self.coverage.len() } - /// Iterate over all transcripts with coverage data. - pub fn iter(&self) -> impl Iterator { - self.coverage.iter().map(|(&idx, v)| (idx, v.as_slice())) - } - /// Compute mean coverage for a transcript. #[allow(dead_code)] pub fn mean_coverage(&self, tx_flat_idx: u32) -> f64 { @@ -183,6 +178,7 @@ pub struct MergedGeneCoverage { coverage: HashMap>, } +#[allow(dead_code)] // tested API; not yet consumed in output pipeline impl MergedGeneCoverage { /// Create a new empty merged gene coverage tracker. pub fn new() -> Self { @@ -204,7 +200,7 @@ impl MergedGeneCoverage { m_blocks: &[(i32, i32)], model: &MergedGeneModel, ) { - let total_len = model.exonic_length as usize; + let total_len: usize = model.exons.iter().map(|(s, e)| (e - s) as usize).sum(); if total_len == 0 { return; } @@ -380,10 +376,8 @@ mod tests { #[test] fn test_merged_gene_single_exon() { let model = MergedGeneModel { - gene_idx: 0, strand: '+', exons: vec![(100, 200)], - exonic_length: 100, }; let mut cov = MergedGeneCoverage::new(); // M-block: 120..150, effective_end = 149 (Picard off-by-one) @@ -402,10 +396,8 @@ mod tests { // Gene with two merged exons: (100,200) and (300,400) // Total exonic length = 200, offset: first exon 0..100, second 100..200 let model = MergedGeneModel { - gene_idx: 0, strand: '+', exons: vec![(100, 200), (300, 400)], - exonic_length: 200, }; let mut cov = MergedGeneCoverage::new(); // M-block in second exon: 310..330 (genomic), effective_end = 329 @@ -427,12 +419,10 @@ mod tests { // tx2: exons (150,250) and (350,400) // Merged: (100,250) and (300,400) → total 250 let model = MergedGeneModel::from_transcripts( - 0, '+', &[(100, 200), (300, 400), (150, 250), (350, 400)], ); assert_eq!(model.exons, vec![(100, 250), (300, 400)]); - assert_eq!(model.exonic_length, 250); let mut cov = MergedGeneCoverage::new(); // M-block spanning the merged first exon: 180..220, effective_end = 219 @@ -450,10 +440,8 @@ mod tests { #[test] fn test_merged_gene_merge() { let model = MergedGeneModel { - gene_idx: 0, strand: '+', exons: vec![(100, 200)], - exonic_length: 100, }; let mut cov1 = MergedGeneCoverage::new(); diff --git a/src/rna/qualimap/index.rs b/src/rna/qualimap/index.rs index 21c1a47..76c3259 100644 --- a/src/rna/qualimap/index.rs +++ b/src/rna/qualimap/index.rs @@ -22,21 +22,15 @@ use crate::gtf::Gene; /// Qualimap assigns reads by checking that all M-blocks are *enclosed* /// (fully contained) within exons of the same gene. #[derive(Debug, Clone, Copy)] +#[allow(dead_code)] // fields are stored in COITree; read in tests and available for future queries pub struct ExonMeta { /// Index into the gene list (position in the IndexMap). - #[allow(dead_code)] pub gene_idx: u32, - /// Index into the transcript list within the gene. - #[allow(dead_code)] - pub transcript_idx: u16, /// Exon start in 0-based half-open coordinates. - #[allow(dead_code)] pub exon_start: i32, /// Exon end in 0-based half-open coordinates. - #[allow(dead_code)] pub exon_end: i32, /// Strand ('+', '-', or '.'). - #[allow(dead_code)] pub strand: u8, } @@ -44,7 +38,6 @@ impl Default for ExonMeta { fn default() -> Self { Self { gene_idx: 0, - transcript_idx: 0, exon_start: 0, exon_end: 0, strand: b'.', @@ -152,16 +145,10 @@ pub struct TranscriptInfo { /// merged model, which gives the correct bias and coverage profile values. #[derive(Debug, Clone)] pub struct MergedGeneModel { - /// Gene index (position in the IndexMap). - #[allow(dead_code)] - pub gene_idx: u32, /// Strand ('+', '-', or '.'). - #[allow(dead_code)] pub strand: char, /// Merged non-overlapping exon intervals in 0-based half-open coordinates, sorted by start. pub exons: Vec<(i32, i32)>, - /// Total merged exonic length (sum of merged exon lengths). - pub exonic_length: u32, } impl MergedGeneModel { @@ -170,15 +157,9 @@ impl MergedGeneModel { /// Takes all exon intervals (0-based half-open) from all transcripts, /// sorts them, and merges overlapping/adjacent intervals into a single /// non-redundant set. - pub fn from_transcripts(gene_idx: u32, strand: char, all_exons: &[(i32, i32)]) -> Self { + pub fn from_transcripts(strand: char, all_exons: &[(i32, i32)]) -> Self { let exons = merge_intervals(all_exons); - let exonic_length: u32 = exons.iter().map(|(s, e)| (e - s) as u32).sum(); - Self { - gene_idx, - strand, - exons, - exonic_length, - } + Self { strand, exons } } } @@ -239,12 +220,6 @@ pub struct QualimapIndex { /// Lookup: gene_idx -> range of transcript indices in `transcripts` vec. /// `gene_transcript_ranges[gene_idx] = (start, end)` half-open into `transcripts`. pub gene_transcript_ranges: Vec<(u32, u32)>, - /// Merged gene models: one per gene, indexed by gene_idx. - /// Each merges all exons from all transcripts into non-redundant intervals. - pub merged_gene_models: Vec, - /// Total number of genes. - #[allow(dead_code)] - pub num_genes: u32, } impl QualimapIndex { @@ -298,7 +273,6 @@ impl QualimapIndex { end, ExonMeta { gene_idx, - transcript_idx: tx_idx, exon_start: start, exon_end: end, strand: tx.strand as u8, @@ -357,7 +331,6 @@ impl QualimapIndex { // Build merged gene model from all exons across all transcripts merged_gene_models.push(MergedGeneModel::from_transcripts( - gene_idx, gene.strand, &all_gene_exons, )); @@ -415,11 +388,9 @@ impl QualimapIndex { .map(|(chrom, intervals)| (chrom, COITree::new(&intervals))) .collect(); - let num_genes = genes.len() as u32; - debug!( "Built Qualimap index: {} genes, {} transcripts, {} chromosomes with exons", - num_genes, + genes.len(), transcripts.len(), exon_trees.len(), ); @@ -430,8 +401,6 @@ impl QualimapIndex { intron_trees, transcripts, gene_transcript_ranges, - merged_gene_models, - num_genes, } } @@ -539,7 +508,7 @@ mod tests { let index = QualimapIndex::from_genes(&genes); - assert_eq!(index.num_genes, 1); + assert_eq!(index.gene_transcript_ranges.len(), 1); assert_eq!(index.transcripts.len(), 1); assert!(index.exon_tree("chr1").is_some()); assert!(index.intron_tree("chr1").is_some()); @@ -564,14 +533,14 @@ mod tests { // Check that exon tree has entries let tree = index.exon_tree("chr1").unwrap(); let mut hits = Vec::new(); - tree.query(150, 160, |iv| hits.push(iv.metadata.clone())); + tree.query(150, 160, |iv| hits.push(iv.metadata)); assert_eq!(hits.len(), 1); assert_eq!(hits[0].gene_idx, 0); // Check intron tree: intron from 200..300 let itree = index.intron_tree("chr1").unwrap(); let mut ihits = Vec::new(); - itree.query(250, 260, |iv| ihits.push(iv.metadata.clone())); + itree.query(250, 260, |iv| ihits.push(iv.metadata)); assert_eq!(ihits.len(), 1); assert_eq!(ihits[0].gene_idx, 0); } diff --git a/src/rna/qualimap/mod.rs b/src/rna/qualimap/mod.rs index e02421b..e4e118c 100644 --- a/src/rna/qualimap/mod.rs +++ b/src/rna/qualimap/mod.rs @@ -52,9 +52,6 @@ pub struct QualimapResult { // --- Fragment counters (PE) --- /// Total reads counted (PE: each mate counts as 1 read). pub read_count: u64, - /// Total fragments counted (PE: 1 per paired fragment). - #[allow(dead_code)] // populated but not yet consumed in output - pub fragment_count: u64, /// Left-of-pair reads (first-of-pair in paired mode). pub left_proper_in_pair: u64, /// Right-of-pair reads (second-of-pair in paired mode). @@ -69,24 +66,12 @@ pub struct QualimapResult { pub junction_motifs: HashMap, // --- Per-transcript coverage --- - /// Per-transcript coverage arrays (transcript_key -> per-base coverage). - #[allow(dead_code)] // populated but not yet consumed in output - pub transcript_coverage: HashMap>, - /// Raw per-transcript coverage keyed by flat index (for bias computation). /// Retains the internal indexing used during accumulation so that bias /// computation can look up transcript metadata (strand, exonic length) /// efficiently via the `QualimapIndex`. pub transcript_coverage_raw: coverage::TranscriptCoverage, - // --- Merged gene coverage --- - /// Per-gene coverage using merged exon models (gene_idx -> per-base coverage). - /// Qualimap's Picard Gene.Transcript merges all exons from all isoforms into - /// a single non-redundant exon set per gene. This produces more accurate - /// bias and profile values than per-transcript tracking. - #[allow(dead_code)] // populated but not yet consumed in output - pub merged_gene_coverage: coverage::MergedGeneCoverage, - /// Forward strand estimation count for SSP. pub ssp_fwd: u64, /// Reverse strand estimation count for SSP. diff --git a/src/rna/qualimap/output.rs b/src/rna/qualimap/output.rs index 8b5bf7e..faaca2e 100644 --- a/src/rna/qualimap/output.rs +++ b/src/rna/qualimap/output.rs @@ -790,9 +790,9 @@ mod tests { #[test] fn test_median() { - assert_eq!(median(&mut vec![1.0, 3.0, 2.0]), 2.0); - assert_eq!(median(&mut vec![1.0, 2.0, 3.0, 4.0]), 2.5); - assert!(median(&mut vec![]).is_nan()); + assert_eq!(median(&mut [1.0, 3.0, 2.0]), 2.0); + assert_eq!(median(&mut [1.0, 2.0, 3.0, 4.0]), 2.5); + assert!(median(&mut []).is_nan()); } /// Helper: build a single-transcript slice for compute_coverage_profile. diff --git a/src/rna/rseqc/accumulators.rs b/src/rna/rseqc/accumulators.rs index 501aeb2..f14b4ea 100644 --- a/src/rna/rseqc/accumulators.rs +++ b/src/rna/rseqc/accumulators.rs @@ -137,8 +137,6 @@ pub struct BamStatAccum { pub proper_pairs: u64, /// Among proper-paired unique reads: mates on different chromosomes. pub proper_pair_diff_chrom: u64, - /// MAPQ distribution for primary, non-QC-fail, non-dup, mapped reads. - pub mapq_distribution: BTreeMap, // --- samtools flagstat additional fields --- /// Secondary alignments (0x100) — counted independently of QC/dup. @@ -211,8 +209,6 @@ pub struct BamStatAccum { pub outward_pairs: u64, /// Other orientation pairs (FF, RR). pub other_orientation: u64, - /// Primary reads that are paired (for "raw total sequences" = paired means /2 fragments). - pub primary_paired: u64, /// Total primary reads (non-secondary, non-supplementary). pub primary_count: u64, /// Primary mapped reads count (non-secondary, non-supplementary, !unmapped). @@ -342,9 +338,6 @@ impl BamStatAccum { self.max_len = seq_len; } - if is_paired { - self.primary_paired += 1; - } if is_dup { self.primary_duplicates += 1; self.bases_duplicated += seq_len; @@ -485,9 +478,6 @@ impl BamStatAccum { return; } - // Mapped primary non-QC-fail non-dup: record MAPQ distribution - *self.mapq_distribution.entry(mapq).or_insert(0) += 1; - // 5. MAPQ classification if mapq < mapq_cut { self.non_unique += 1; @@ -547,9 +537,6 @@ impl BamStatAccum { self.non_splice += other.non_splice; self.proper_pairs += other.proper_pairs; self.proper_pair_diff_chrom += other.proper_pair_diff_chrom; - for (mapq, count) in other.mapq_distribution { - *self.mapq_distribution.entry(mapq).or_insert(0) += count; - } // samtools flagstat fields self.secondary += other.secondary; @@ -599,7 +586,6 @@ impl BamStatAccum { self.inward_pairs += other.inward_pairs; self.outward_pairs += other.outward_pairs; self.other_orientation += other.other_orientation; - self.primary_paired += other.primary_paired; self.primary_count += other.primary_count; self.primary_mapped += other.primary_mapped; self.primary_duplicates += other.primary_duplicates; @@ -1633,7 +1619,6 @@ impl BamStatAccum { non_splice: self.non_splice, proper_pairs: self.proper_pairs, proper_pair_diff_chrom: self.proper_pair_diff_chrom, - mapq_distribution: self.mapq_distribution, // samtools flagstat fields secondary: self.secondary, supplementary: self.supplementary, @@ -1670,7 +1655,6 @@ impl BamStatAccum { inward_pairs: self.inward_pairs, outward_pairs: self.outward_pairs, other_orientation: self.other_orientation, - primary_paired: self.primary_paired, primary_count: self.primary_count, primary_mapped: self.primary_mapped, primary_duplicates: self.primary_duplicates, diff --git a/src/rna/rseqc/bam_stat.rs b/src/rna/rseqc/bam_stat.rs index 8856c23..7d7d463 100644 --- a/src/rna/rseqc/bam_stat.rs +++ b/src/rna/rseqc/bam_stat.rs @@ -6,7 +6,7 @@ use anyhow::{Context, Result}; use log::info; -use std::collections::{BTreeMap, HashMap}; +use std::collections::HashMap; use std::io::Write; use std::path::Path; @@ -53,9 +53,6 @@ pub struct BamStatResult { pub proper_pairs: u64, /// Among proper-paired unique reads: number where mates map to different chromosomes. pub proper_pair_diff_chrom: u64, - /// MAPQ distribution for all primary, non-QC-fail, non-dup, mapped reads. - #[allow(dead_code)] // Kept for API completeness; not yet used by write_bam_stat - pub mapq_distribution: BTreeMap, // --- samtools flagstat fields --- /// Secondary alignments (0x100). @@ -128,9 +125,6 @@ pub struct BamStatResult { pub outward_pairs: u64, /// Other orientation pairs (FF, RR). pub other_orientation: u64, - /// Primary reads that are paired. - #[allow(dead_code)] - pub primary_paired: u64, /// Total primary reads (non-secondary, non-supplementary). pub primary_count: u64, /// Primary mapped reads count. diff --git a/src/rna/rseqc/idxstats.rs b/src/rna/rseqc/idxstats.rs index 2baeabc..eac7587 100644 --- a/src/rna/rseqc/idxstats.rs +++ b/src/rna/rseqc/idxstats.rs @@ -73,7 +73,7 @@ mod tests { let header_refs: Vec<(String, u64)> = (0..header.target_count()) .map(|tid| { let name = String::from_utf8_lossy(header.tid2name(tid)).to_string(); - let len = header.target_len(tid).unwrap_or(0) as u64; + let len = header.target_len(tid).unwrap_or(0); (name, len) }) .collect(); diff --git a/src/rna/rseqc/tin.rs b/src/rna/rseqc/tin.rs index fe5f151..a1ed2ea 100644 --- a/src/rna/rseqc/tin.rs +++ b/src/rna/rseqc/tin.rs @@ -34,10 +34,10 @@ pub struct TranscriptSampling { /// Transcript end position (0-based, exclusive). pub tx_end: u64, /// Sorted exon blocks as (start, end) in 0-based half-open coords. - #[allow(dead_code)] + #[allow(dead_code)] // stored for API completeness; values used during construction pub exon_regions: Vec<(u64, u64)>, /// Total exonic bases across all exon blocks. - #[allow(dead_code)] + #[allow(dead_code)] // stored for API completeness; values used during construction pub exon_length: u64, /// Genomic positions sampled within exonic regions (sorted). pub sampled_positions: Vec,