From 48cb02ad65046017c3b6dd136fa3e9b5cdd082f0 Mon Sep 17 00:00:00 2001
From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com>
Date: Thu, 12 Feb 2026 18:24:03 +0000
Subject: [PATCH 1/6] Add release process with cross-platform binaries and
 Docker container

- GitHub Actions release workflow triggered by version tags (v*)
- Builds binaries for Linux (x86_64, aarch64), macOS (x86_64, aarch64)
- Multi-arch Docker image published to GHCR (ghcr.io/ewels/duprust)
- CI workflow with test, fmt, and clippy checks on PRs
- Multi-stage Dockerfile optimized for rust-htslib static builds

Generated by Claude Code
---
 .dockerignore                 |   9 ++
 .github/workflows/ci.yml      |  76 ++++++++++++++
 .github/workflows/release.yml | 190 ++++++++++++++++++++++++++++++++++
 Dockerfile                    |  30 ++++++
 4 files changed, 305 insertions(+)
 create mode 100644 .dockerignore
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/release.yml
 create mode 100644 Dockerfile

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..0382d24
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,9 @@
+target/
+.git/
+.github/
+benchmark/
+tests/
+*.md
+LICENSE
+.gitignore
+.dockerignore
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..104305c
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,76 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+permissions:
+  contents: read
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  test:
+    name: Test (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Install Linux build deps
+        if: runner.os == 'Linux'
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \
+            libcurl4-openssl-dev libssl-dev pkg-config clang
+
+      - name: Install macOS build deps
+        if: runner.os == 'macOS'
+        run: brew install bzip2 xz
+
+      - uses: Swatinem/rust-cache@v2
+
+      - name: Build
+        run: cargo build --release
+
+      - name: Test
+        run: cargo test --release
+
+  fmt:
+    name: Formatting
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+        with:
+          components: rustfmt
+      - run: cargo fmt --check
+
+  clippy:
+    name: Clippy
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          components: clippy
+
+      - name: Install Linux build deps
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \
+            libcurl4-openssl-dev libssl-dev pkg-config clang
+
+      - uses: Swatinem/rust-cache@v2
+
+      - run: cargo clippy -- -D warnings
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..e26d307
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,190 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - "v[0-9]+.*"
+
+permissions:
+  contents: write
+  packages: write
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  # ------------------------------------------------------------------
+  # 1. Create a draft GitHub release
+  # ------------------------------------------------------------------
+  create-release:
+    name: Create release
+    runs-on: ubuntu-latest
+    outputs:
+      tag: ${{ steps.tag.outputs.tag }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Get tag
+        id: tag
+        run: echo "tag=${GITHUB_REF#refs/tags/}" >> "$GITHUB_OUTPUT"
+
+      - name: Create draft release
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: gh release create "${{ steps.tag.outputs.tag }}" --draft --verify-tag --title "${{ steps.tag.outputs.tag }}"
+
+  # ------------------------------------------------------------------
+  # 2. Build binaries for each target
+  # ------------------------------------------------------------------
+  build-binaries:
+    name: Build ${{ matrix.name }}
+    needs: create-release
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Linux x86_64
+          - name: linux-x86_64
+            os: ubuntu-latest
+            target: x86_64-unknown-linux-gnu
+            use-cross: false
+          # Linux aarch64
+          - name: linux-aarch64
+            os: ubuntu-latest
+            target: aarch64-unknown-linux-gnu
+            use-cross: true
+          # macOS x86_64
+          - name: macos-x86_64
+            os: macos-13
+            target: x86_64-apple-darwin
+            use-cross: false
+          # macOS aarch64 (Apple Silicon)
+          - name: macos-aarch64
+            os: macos-latest
+            target: aarch64-apple-darwin
+            use-cross: false
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+        with:
+          targets: ${{ matrix.target }}
+
+      # Linux build dependencies (native)
+      - name: Install Linux build deps
+        if: runner.os == 'Linux' && !matrix.use-cross
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \
+            libcurl4-openssl-dev libssl-dev pkg-config clang
+
+      # macOS build dependencies
+      - name: Install macOS build deps
+        if: runner.os == 'macOS'
+        run: brew install bzip2 xz
+
+      # Cross (for Linux aarch64)
+      - name: Install cross
+        if: matrix.use-cross
+        uses: taiki-e/install-action@v2
+        with:
+          tool: cross
+
+      - name: Build
+        run: |
+          if [ "${{ matrix.use-cross }}" = "true" ]; then
+            cross build --release --target ${{ matrix.target }}
+          else
+            cargo build --release --target ${{ matrix.target }}
+          fi
+
+      - name: Package
+        id: package
+        run: |
+          BIN="duprust"
+          TAG="${{ needs.create-release.outputs.tag }}"
+          ARCHIVE="${BIN}-${TAG}-${{ matrix.name }}"
+
+          mkdir -p "staging/${ARCHIVE}"
+          cp "target/${{ matrix.target }}/release/${BIN}" "staging/${ARCHIVE}/"
+          cp README.md LICENSE "staging/${ARCHIVE}/" 2>/dev/null || true
+
+          cd staging
+          tar czf "../${ARCHIVE}.tar.gz" "${ARCHIVE}"
+          cd ..
+
+          # SHA256 checksum
+          shasum -a 256 "${ARCHIVE}.tar.gz" > "${ARCHIVE}.tar.gz.sha256"
+
+          echo "archive=${ARCHIVE}.tar.gz" >> "$GITHUB_OUTPUT"
+          echo "checksum=${ARCHIVE}.tar.gz.sha256" >> "$GITHUB_OUTPUT"
+
+      - name: Upload to release
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          gh release upload "${{ needs.create-release.outputs.tag }}" \
+            "${{ steps.package.outputs.archive }}" \
+            "${{ steps.package.outputs.checksum }}"
+
+  # ------------------------------------------------------------------
+  # 3. Build & push Docker image to GHCR
+  # ------------------------------------------------------------------
+  docker:
+    name: Docker image
+    needs: create-release
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/${{ github.repository }}
+          tags: |
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=semver,pattern={{major}}
+            type=raw,value=latest
+
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+  # ------------------------------------------------------------------
+  # 4. Publish the release (undraft)
+  # ------------------------------------------------------------------
+  publish-release:
+    name: Publish release
+    needs: [create-release, build-binaries, docker]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Publish release
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: gh release edit "${{ needs.create-release.outputs.tag }}" --draft=false
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..65c0a78
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,30 @@
+# ---- Build stage ----
+FROM rust:1-bookworm AS builder
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    cmake \
+    zlib1g-dev \
+    libbz2-dev \
+    liblzma-dev \
+    libcurl4-openssl-dev \
+    libssl-dev \
+    pkg-config \
+    clang \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /build
+COPY Cargo.toml Cargo.lock ./
+COPY src/ src/
+
+RUN cargo build --release && strip target/release/duprust
+
+# ---- Runtime stage ----
+FROM debian:bookworm-slim
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder /build/target/release/duprust /usr/local/bin/duprust
+
+ENTRYPOINT ["duprust"]

From 4d987ecf234098b0e32bdfaae564c2e484586177 Mon Sep 17 00:00:00 2001
From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com>
Date: Thu, 12 Feb 2026 19:33:36 +0000
Subject: [PATCH 2/6] Fix CI: add libfontconfig1-dev dependency and run cargo
 fmt

- Add libfontconfig1-dev to apt-get in ci.yml, release.yml, and Dockerfile
  (required by plotters via yeslogic-fontconfig-sys)
- Run cargo fmt on all source files to pass formatting check

Generated by Claude Code
---
 .github/workflows/ci.yml      |   4 +-
 .github/workflows/release.yml |   2 +-
 Dockerfile                    |   1 +
 src/counting.rs               |  35 ++++++----
 src/gtf.rs                    |   3 +-
 src/main.rs                   |   5 +-
 src/plots.rs                  | 127 ++++++++++++++++++++++++++--------
 tests/integration_test.rs     |  28 ++------
 8 files changed, 130 insertions(+), 75 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 104305c..cab327d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -30,7 +30,7 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \
-            libcurl4-openssl-dev libssl-dev pkg-config clang
+            libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang
 
       - name: Install macOS build deps
         if: runner.os == 'macOS'
@@ -69,7 +69,7 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \
-            libcurl4-openssl-dev libssl-dev pkg-config clang
+            libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang
 
       - uses: Swatinem/rust-cache@v2
 
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index e26d307..916b180 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -79,7 +79,7 @@ jobs:
         run: |
           sudo apt-get update
           sudo apt-get install -y cmake zlib1g-dev libbz2-dev liblzma-dev \
-            libcurl4-openssl-dev libssl-dev pkg-config clang
+            libcurl4-openssl-dev libssl-dev libfontconfig1-dev pkg-config clang
 
       # macOS build dependencies
       - name: Install macOS build deps
diff --git a/Dockerfile b/Dockerfile
index 65c0a78..7b780c9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,6 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     liblzma-dev \
     libcurl4-openssl-dev \
     libssl-dev \
+    libfontconfig1-dev \
     pkg-config \
     clang \
     && rm -rf /var/lib/apt/lists/*
diff --git a/src/counting.rs b/src/counting.rs
index 7110acd..8b0b440 100644
--- a/src/counting.rs
+++ b/src/counting.rs
@@ -137,9 +137,7 @@ impl ChromIndex {
 
         // Binary search for the first interval that could overlap
         // An interval overlaps [start, end) if interval.start < end AND interval.end > start
-        let search_start = self
-            .intervals
-            .partition_point(|iv| iv.end <= start);
+        let search_start = self.intervals.partition_point(|iv| iv.end <= start);
 
         for iv in &self.intervals[search_start..] {
             if iv.start >= end {
@@ -326,9 +324,7 @@ pub fn count_reads(
     // Get chromosome names from header
     let header = bam.header().clone();
     let tid_to_name: Vec<String> = (0..header.target_count())
-        .map(|tid| {
-            String::from_utf8_lossy(header.tid2name(tid)).to_string()
-        })
+        .map(|tid| String::from_utf8_lossy(header.tid2name(tid)).to_string())
         .collect();
 
     // Track statistics
@@ -427,8 +423,7 @@ pub fn count_reads(
         let gene_hits = if let Some(chrom_idx) = index.get(chrom) {
             // Extract aligned blocks from CIGAR (M/=/X operations only).
             // This avoids false overlaps with genes in introns of spliced reads.
-            let aligned_blocks =
-                cigar_to_aligned_blocks(record.pos() as u64, &record.cigar());
+            let aligned_blocks = cigar_to_aligned_blocks(record.pos() as u64, &record.cigar());
 
             let mut overlaps = Vec::new();
             for (block_start, block_end) in &aligned_blocks {
@@ -438,9 +433,7 @@ pub fn count_reads(
             // Filter by strand and deduplicate gene IDs
             let mut genes_hit: Vec<String> = overlaps
                 .iter()
-                .filter(|iv| {
-                    strand_matches(is_reverse, is_read1, paired, iv.strand, stranded)
-                })
+                .filter(|iv| strand_matches(is_reverse, is_read1, paired, iv.strand, stranded))
                 .map(|iv| iv.gene_id.clone())
                 .collect();
             genes_hit.sort_unstable();
@@ -482,7 +475,11 @@ pub fn count_reads(
             // For the fragment, use read1's dup/multi status (featureCounts
             // considers a fragment as duplicate if read1 is flagged as duplicate)
             let frag_is_dup = if is_read1 { is_dup } else { mate_info.is_dup };
-            let frag_is_multi = if is_read1 { is_multi } else { mate_info.is_multi };
+            let frag_is_multi = if is_read1 {
+                is_multi
+            } else {
+                mate_info.is_multi
+            };
 
             // Update N totals (once per fragment)
             n_multi_dup += 1;
@@ -499,7 +496,12 @@ pub fn count_reads(
             combined_genes.dedup();
 
             // Assign to gene if unambiguous (exactly one gene from combined overlaps)
-            assign_fragment_to_gene(&combined_genes, &mut gene_counts, frag_is_dup, frag_is_multi);
+            assign_fragment_to_gene(
+                &combined_genes,
+                &mut gene_counts,
+                frag_is_dup,
+                frag_is_multi,
+            );
         } else {
             // First mate seen - buffer it and wait for the other mate
             mate_buffer.insert(
@@ -527,7 +529,12 @@ pub fn count_reads(
             n_unique_nodup += 1;
         }
 
-        assign_fragment_to_gene(&mate_info.gene_hits, &mut gene_counts, mate_info.is_dup, mate_info.is_multi);
+        assign_fragment_to_gene(
+            &mate_info.gene_hits,
+            &mut gene_counts,
+            mate_info.is_dup,
+            mate_info.is_multi,
+        );
     }
 
     info!(
diff --git a/src/gtf.rs b/src/gtf.rs
index df57775..2c84ad1 100644
--- a/src/gtf.rs
+++ b/src/gtf.rs
@@ -192,7 +192,8 @@ mod tests {
 
     #[test]
     fn test_get_attribute() {
-        let attrs = r#"gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; gene_name "DDX11L1";"#;
+        let attrs =
+            r#"gene_id "ENSG00000223972"; transcript_id "ENST00000456328"; gene_name "DDX11L1";"#;
         assert_eq!(
             get_attribute(attrs, "gene_id"),
             Some("ENSG00000223972".to_string())
diff --git a/src/main.rs b/src/main.rs
index 6238bec..e1ca28a 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -187,10 +187,7 @@ fn main() -> Result<()> {
         stats.f_regions_duplication * 100.0
     );
 
-    info!(
-        "Total runtime: {:.2}s",
-        start.elapsed().as_secs_f64()
-    );
+    info!("Total runtime: {:.2}s", start.elapsed().as_secs_f64());
 
     Ok(())
 }
diff --git a/src/plots.rs b/src/plots.rs
index 9b29cbf..d1473ef 100644
--- a/src/plots.rs
+++ b/src/plots.rs
@@ -129,9 +129,7 @@ fn estimate_density(x: &[f64], y: &[f64], nbins: usize) -> Vec<f64> {
                     let nx = bx as i32 + dx;
                     let ny = by as i32 + dy;
                     if nx >= 0 && nx <= nbins as i32 && ny >= 0 && ny <= nbins as i32 {
-                        let w = (-(dx * dx) as f64 / sigma2_x
-                            - (dy * dy) as f64 / sigma2_y)
-                            .exp();
+                        let w = (-(dx * dx) as f64 / sigma2_x - (dy * dy) as f64 / sigma2_y).exp();
                         smoothed[nx as usize][ny as usize] += c * w;
                     }
                 }
@@ -212,8 +210,11 @@ fn draw_dotted_vline<DB: DrawingBackend>(
     let mut y = y_top;
     while y < y_bot {
         let ye = (y + dash).min(y_bot);
-        root.draw(&PathElement::new(vec![(x, y), (x, ye)], color.stroke_width(sw)))
-            .ok();
+        root.draw(&PathElement::new(
+            vec![(x, y), (x, ye)],
+            color.stroke_width(sw),
+        ))
+        .ok();
         y = ye + gap;
     }
 }
@@ -296,8 +297,7 @@ where
 
     // ── points (sorted by density → dense on top) ──────────────────────
     // R uses pch=20 cex=0.25 → tiny filled dots
-    let mut order: Vec<(usize, f64)> =
-        densities.iter().enumerate().map(|(i, d)| (i, *d)).collect();
+    let mut order: Vec<(usize, f64)> = densities.iter().enumerate().map(|(i, d)| (i, *d)).collect();
     order.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
 
     for &(i, _) in &order {
@@ -395,7 +395,11 @@ where
         let ly = pa_y.0 + (pxs * 8.0) as i32;
         root.draw(&Rectangle::new(
             [(lx, ly), (lx + lw, ly + lh)],
-            ShapeStyle { color: WHITE.to_rgba(), filled: true, stroke_width: 0 },
+            ShapeStyle {
+                color: WHITE.to_rgba(),
+                filled: true,
+                stroke_width: 0,
+            },
         ))?;
         root.draw(&Rectangle::new(
             [(lx, ly), (lx + lw, ly + lh)],
@@ -426,7 +430,11 @@ where
         let samp_x = lx + (pxs * 4.0) as i32;
         root.draw(&Rectangle::new(
             [(lx, ly), (lx + lw, ly + lh)],
-            ShapeStyle { color: WHITE.to_rgba(), filled: true, stroke_width: 0 },
+            ShapeStyle {
+                color: WHITE.to_rgba(),
+                filled: true,
+                stroke_width: 0,
+            },
         ))?;
         root.draw(&Rectangle::new(
             [(lx, ly), (lx + lw, ly + lh)],
@@ -434,8 +442,16 @@ where
         ))?;
         let mut cy = ly + (pxs * 5.0) as i32;
         // "1 read/bp" with red dashed sample line
-        draw_dotted_vline(&root, samp_x + (pxs * 7.0) as i32, cy, cy + line_h - 2, &RED, sw,
-                          (pxs * 3.0) as i32, (pxs * 2.0) as i32);
+        draw_dotted_vline(
+            &root,
+            samp_x + (pxs * 7.0) as i32,
+            cy,
+            cy + line_h - 2,
+            &RED,
+            sw,
+            (pxs * 3.0) as i32,
+            (pxs * 2.0) as i32,
+        );
         root.draw(&Text::new(
             "1 read/bp",
             (txt_x, cy),
@@ -443,8 +459,16 @@ where
         ))?;
         cy += line_h;
         // "0.5 RPKM" with green dashed sample line
-        draw_dotted_vline(&root, samp_x + (pxs * 7.0) as i32, cy, cy + line_h - 2, &GREEN, sw,
-                          (pxs * 3.0) as i32, (pxs * 2.0) as i32);
+        draw_dotted_vline(
+            &root,
+            samp_x + (pxs * 7.0) as i32,
+            cy,
+            cy + line_h - 2,
+            &GREEN,
+            sw,
+            (pxs * 3.0) as i32,
+            (pxs * 2.0) as i32,
+        );
         root.draw(&Text::new(
             "0.5 RPKM",
             (txt_x, cy),
@@ -552,7 +576,12 @@ where
         } else {
             format!("{:.1}", mean_rpk)
         };
-        labels.push(format!("{} - {} % / {}", (pl * 100.0) as u32, (ph * 100.0) as u32, rpk_s));
+        labels.push(format!(
+            "{} - {} % / {}",
+            (pl * 100.0) as u32,
+            (ph * 100.0) as u32,
+            rpk_s
+        ));
         bins.push(vals);
     }
 
@@ -579,7 +608,11 @@ where
             }
         })
         .x_labels(n_bins)
-        .x_label_style(("sans-serif", ps(8.0)).into_font().transform(FontTransform::Rotate270))
+        .x_label_style(
+            ("sans-serif", ps(8.0))
+                .into_font()
+                .transform(FontTransform::Rotate270),
+        )
         .y_label_formatter(&|v| format!("{:.1}", v))
         .axis_desc_style(("sans-serif", ps(13.0)))
         .label_style(("sans-serif", ps(11.0)))
@@ -598,8 +631,17 @@ where
         let med = quantile(&sv, 0.5);
         let q3 = quantile(&sv, 0.75);
         let iqr = q3 - q1;
-        let wl = sv.iter().find(|&&v| v >= q1 - 1.5 * iqr).copied().unwrap_or(q1);
-        let wh = sv.iter().rev().find(|&&v| v <= q3 + 1.5 * iqr).copied().unwrap_or(q3);
+        let wl = sv
+            .iter()
+            .find(|&&v| v >= q1 - 1.5 * iqr)
+            .copied()
+            .unwrap_or(q1);
+        let wh = sv
+            .iter()
+            .rev()
+            .find(|&&v| v <= q3 + 1.5 * iqr)
+            .copied()
+            .unwrap_or(q3);
 
         let bl = idx as f64 + 0.2;
         let br = idx as f64 + 0.8;
@@ -609,7 +651,11 @@ where
         // box fill
         chart.draw_series(std::iter::once(Rectangle::new(
             [(bl, q1), (br, q3)],
-            ShapeStyle { color: gray_fill, filled: true, stroke_width: ps(1.0) },
+            ShapeStyle {
+                color: gray_fill,
+                filled: true,
+                stroke_width: ps(1.0),
+            },
         )))?;
         // box border
         chart.draw_series(std::iter::once(Rectangle::new(
@@ -623,7 +669,10 @@ where
         ))?;
         // whiskers + caps
         for &(from, to, cap_y) in &[(q1, wl, wl), (q3, wh, wh)] {
-            chart.draw_series(LineSeries::new(vec![(cx, from), (cx, to)], BLACK.stroke_width(ps(1.0))))?;
+            chart.draw_series(LineSeries::new(
+                vec![(cx, from), (cx, to)],
+                BLACK.stroke_width(ps(1.0)),
+            ))?;
             chart.draw_series(LineSeries::new(
                 vec![(bl + cap, cap_y), (br - cap, cap_y)],
                 BLACK.stroke_width(ps(1.0)),
@@ -717,10 +766,18 @@ where
         .y_desc("Frequency")
         .x_label_formatter(&|v| {
             let r = (*v * 10.0).round() / 10.0;
-            if (r - r.round()).abs() < 0.01 { format_rpk_tick(r) } else { String::new() }
+            if (r - r.round()).abs() < 0.01 {
+                format_rpk_tick(r)
+            } else {
+                String::new()
+            }
         })
         .y_label_formatter(&|v| {
-            if *v == v.floor() && *v >= 0.0 { format!("{}", *v as i32) } else { String::new() }
+            if *v == v.floor() && *v >= 0.0 {
+                format!("{}", *v as i32)
+            } else {
+                String::new()
+            }
         })
         .axis_desc_style(("sans-serif", ps(14.0)))
         .label_style(("sans-serif", ps(12.0)))
@@ -728,12 +785,18 @@ where
 
     let gray = RGBAColor(190, 190, 190, 1.0);
     for (i, &c) in hist.iter().enumerate() {
-        if c == 0 { continue; }
+        if c == 0 {
+            continue;
+        }
         let x0 = x_min + i as f64 * bw;
         let x1 = x0 + bw;
         chart.draw_series(std::iter::once(Rectangle::new(
             [(x0, 0.0), (x1, c as f64)],
-            ShapeStyle { color: gray, filled: true, stroke_width: 0 },
+            ShapeStyle {
+                color: gray,
+                filled: true,
+                stroke_width: 0,
+            },
         )))?;
         chart.draw_series(std::iter::once(Rectangle::new(
             [(x0, 0.0), (x1, c as f64)],
@@ -795,7 +858,10 @@ pub fn write_mqc_intercept(
     writeln!(f, "#     dupRadar_intercept:")?;
     writeln!(f, "#         title: 'dupRadar int'")?;
     writeln!(f, "#         namespace: 'dupRadar'")?;
-    writeln!(f, "#         description: 'dupRadar duplication rate at low read counts'")?;
+    writeln!(
+        f,
+        "#         description: 'dupRadar duplication rate at low read counts'"
+    )?;
     writeln!(f, "#         max: 100")?;
     writeln!(f, "#         min: 0")?;
     writeln!(f, "#         format: '{{:.2f}}'")?;
@@ -805,11 +871,7 @@ pub fn write_mqc_intercept(
 }
 
 /// Write a MultiQC-compatible line-graph curve file.
-pub fn write_mqc_curve(
-    fit: &FitResult,
-    dm: &DupMatrix,
-    path: &std::path::Path,
-) -> Result<()> {
+pub fn write_mqc_curve(fit: &FitResult, dm: &DupMatrix, path: &std::path::Path) -> Result<()> {
     use std::io::Write;
     let mut f = std::fs::File::create(path)?;
     writeln!(f, "# id: 'dupradar'")?;
@@ -824,7 +886,12 @@ pub fn write_mqc_curve(
     writeln!(f, "#     ymax: 100")?;
     writeln!(f, "#     xlog: True")?;
 
-    let rpks: Vec<f64> = dm.rows.iter().filter(|r| r.rpk > 0.0).map(|r| r.rpk).collect();
+    let rpks: Vec<f64> = dm
+        .rows
+        .iter()
+        .filter(|r| r.rpk > 0.0)
+        .map(|r| r.rpk)
+        .collect();
     if rpks.is_empty() {
         return Ok(());
     }
diff --git a/tests/integration_test.rs b/tests/integration_test.rs
index 3223db5..f4e9255 100644
--- a/tests/integration_test.rs
+++ b/tests/integration_test.rs
@@ -248,19 +248,11 @@ fn test_all_output_files_generated() {
 
     for file in &expected_files {
         let path = format!("{}/{}", outdir, file);
-        assert!(
-            Path::new(&path).exists(),
-            "Missing output file: {}",
-            file
-        );
+        assert!(Path::new(&path).exists(), "Missing output file: {}", file);
 
         // Check file is non-empty
         let metadata = fs::metadata(&path).unwrap();
-        assert!(
-            metadata.len() > 0,
-            "Output file is empty: {}",
-            file
-        );
+        assert!(metadata.len() > 0, "Output file is empty: {}", file);
     }
 
     // Cleanup
@@ -328,10 +320,7 @@ fn test_mqc_intercept_format() {
     // Check MultiQC intercept file format
     let content = fs::read_to_string(format!("{}/test_dup_intercept_mqc.txt", outdir)).unwrap();
     // Skip YAML comment lines (starting with #)
-    let data_lines: Vec<&str> = content
-        .lines()
-        .filter(|l| !l.starts_with('#'))
-        .collect();
+    let data_lines: Vec<&str> = content.lines().filter(|l| !l.starts_with('#')).collect();
     assert!(data_lines.len() >= 2, "MultiQC intercept file too short");
 
     // First data line should be a header with "Sample" and "dupRadar_intercept"
@@ -369,10 +358,7 @@ fn test_mqc_curve_format() {
     let content =
         fs::read_to_string(format!("{}/test_duprateExpDensCurve_mqc.txt", outdir)).unwrap();
     // Skip YAML comment lines (starting with #)
-    let data_lines: Vec<&str> = content
-        .lines()
-        .filter(|l| !l.starts_with('#'))
-        .collect();
+    let data_lines: Vec<&str> = content.lines().filter(|l| !l.starts_with('#')).collect();
     // Header + at least some data points (101 evenly spaced + header = 102)
     assert!(
         data_lines.len() >= 3,
@@ -382,11 +368,7 @@ fn test_mqc_curve_format() {
 
     // Should have a header line with 2 columns
     let header_parts: Vec<&str> = data_lines[0].split('\t').collect();
-    assert_eq!(
-        header_parts.len(),
-        2,
-        "Curve header should have 2 columns"
-    );
+    assert_eq!(header_parts.len(), 2, "Curve header should have 2 columns");
 
     // Data lines should have numeric values
     for line in &data_lines[1..] {

From 76337cd740059c6b82d956b38d58697323ca853c Mon Sep 17 00:00:00 2001
From: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com>
Date: Thu, 12 Feb 2026 19:51:01 +0000
Subject: [PATCH 3/6] Fix clippy lints for Rust 1.93

- is_multiple_of instead of manual modulo check
- Iterator enumerate instead of needless_range_loop
- allow too_many_arguments on private draw helper
- range contains instead of manual bounds check
- std::mem::take instead of drain().collect()

Generated by Claude Code
---
 src/counting.rs |  2 +-
 src/plots.rs    | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/counting.rs b/src/counting.rs
index 8b0b440..697d745 100644
--- a/src/counting.rs
+++ b/src/counting.rs
@@ -350,7 +350,7 @@ pub fn count_reads(
         result.context("Error reading BAM record")?;
         total_reads += 1;
 
-        if total_reads % 5_000_000 == 0 {
+        if total_reads.is_multiple_of(5_000_000) {
             debug!("Processed {} reads...", total_reads);
         }
 
diff --git a/src/plots.rs b/src/plots.rs
index d1473ef..6ad009e 100644
--- a/src/plots.rs
+++ b/src/plots.rs
@@ -118,12 +118,12 @@ fn estimate_density(x: &[f64], y: &[f64], nbins: usize) -> Vec<f64> {
 
     // Anisotropic Gaussian smoothing with adaptive bandwidth
     let mut smoothed = vec![vec![0.0f64; nbins + 1]; nbins + 1];
-    for bx in 0..=nbins {
-        for by in 0..=nbins {
-            if grid[bx][by] == 0 {
+    for (bx, row) in grid.iter().enumerate() {
+        for (by, &cell) in row.iter().enumerate() {
+            if cell == 0 {
                 continue;
             }
-            let c = grid[bx][by] as f64;
+            let c = cell as f64;
             for dx in -radius_x..=radius_x {
                 for dy in -radius_y..=radius_y {
                     let nx = bx as i32 + dx;
@@ -197,6 +197,7 @@ fn quantile(sorted: &[f64], p: f64) -> f64 {
 ///
 /// plotters has no native dashed-line support, so we draw small segments
 /// separated by gaps.
+#[allow(clippy::too_many_arguments)]
 fn draw_dotted_vline<DB: DrawingBackend>(
     root: &DrawingArea<DB, plotters::coord::Shift>,
     x: i32,
@@ -285,7 +286,7 @@ where
         .y_labels(21) // step=5 on 0-100, then filter to multiples of 25
         .y_label_formatter(&|v| {
             let iv = v.round() as i32;
-            if iv >= 0 && iv <= 100 && iv % 25 == 0 && (*v - iv as f64).abs() < 0.1 {
+            if (0..=100).contains(&iv) && iv % 25 == 0 && (*v - iv as f64).abs() < 0.1 {
                 format!("{}", iv)
             } else {
                 String::new()
@@ -336,7 +337,7 @@ where
         if seg_idx >= limit {
             if seg_on && seg_pts.len() >= 2 {
                 chart.draw_series(LineSeries::new(
-                    seg_pts.drain(..).collect::<Vec<_>>(),
+                    std::mem::take(&mut seg_pts),
                     BLACK.stroke_width(curve_sw),
                 ))?;
             }

From b9f2d5e7dae9d3af7a2a916b3f9e95d7a123d5ef Mon Sep 17 00:00:00 2001
From: Phil Ewels <phil.ewels@seqera.io>
Date: Thu, 12 Feb 2026 23:26:09 +0100
Subject: [PATCH 4/6] Run cargo fmt

---
 src/counting.rs | 66 +++++++++++++++++++++++++++++--------------------
 src/plots.rs    | 21 +++++-----------
 2 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/src/counting.rs b/src/counting.rs
index 1ed93e4..ae5c489 100644
--- a/src/counting.rs
+++ b/src/counting.rs
@@ -554,39 +554,46 @@ pub fn count_reads(
             //
             // If only one mate has gene hits (the other overlaps nothing),
             // we use that mate's gene set directly.
-            let combined_genes: Vec<String> = if mate_info.gene_hits.is_empty() && gene_hits.is_empty() {
-                Vec::new()
-            } else if mate_info.gene_hits.is_empty() {
-                gene_hits
-            } else if gene_hits.is_empty() {
-                mate_info.gene_hits
-            } else {
-                // Both mates have gene hits - use INTERSECTION
-                let set_a: std::collections::HashSet<&String> = mate_info.gene_hits.iter().collect();
-                let intersection: Vec<String> = gene_hits
-                    .iter()
-                    .filter(|g| set_a.contains(g))
-                    .cloned()
-                    .collect();
-                if intersection.is_empty() {
-                    // Mates disagree on gene assignment - treat as ambiguous
-                    // by returning the union (which will have len > 1)
-                    let mut union = mate_info.gene_hits;
-                    union.extend(gene_hits);
-                    union.sort_unstable();
-                    union.dedup();
-                    union
+            let combined_genes: Vec<String> =
+                if mate_info.gene_hits.is_empty() && gene_hits.is_empty() {
+                    Vec::new()
+                } else if mate_info.gene_hits.is_empty() {
+                    gene_hits
+                } else if gene_hits.is_empty() {
+                    mate_info.gene_hits
                 } else {
-                    intersection
-                }
-            };
+                    // Both mates have gene hits - use INTERSECTION
+                    let set_a: std::collections::HashSet<&String> =
+                        mate_info.gene_hits.iter().collect();
+                    let intersection: Vec<String> = gene_hits
+                        .iter()
+                        .filter(|g| set_a.contains(g))
+                        .cloned()
+                        .collect();
+                    if intersection.is_empty() {
+                        // Mates disagree on gene assignment - treat as ambiguous
+                        // by returning the union (which will have len > 1)
+                        let mut union = mate_info.gene_hits;
+                        union.extend(gene_hits);
+                        union.sort_unstable();
+                        union.dedup();
+                        union
+                    } else {
+                        intersection
+                    }
+                };
 
             // Assign to gene if unambiguous (exactly one gene from combined overlaps)
             if combined_genes.is_empty() {
                 stat_no_features += 1;
             } else if combined_genes.len() > 1 {
                 stat_ambiguous += 1;
-            } else if assign_fragment_to_gene(&combined_genes, &mut gene_counts, frag_is_dup, frag_is_multi) {
+            } else if assign_fragment_to_gene(
+                &combined_genes,
+                &mut gene_counts,
+                frag_is_dup,
+                frag_is_multi,
+            ) {
                 stat_assigned += 1;
             }
         } else {
@@ -620,7 +627,12 @@ pub fn count_reads(
             stat_no_features += 1;
         } else if mate_info.gene_hits.len() > 1 {
             stat_ambiguous += 1;
-        } else if assign_fragment_to_gene(&mate_info.gene_hits, &mut gene_counts, mate_info.is_dup, mate_info.is_multi) {
+        } else if assign_fragment_to_gene(
+            &mate_info.gene_hits,
+            &mut gene_counts,
+            mate_info.is_dup,
+            mate_info.is_multi,
+        ) {
             stat_assigned += 1;
         }
     }
diff --git a/src/plots.rs b/src/plots.rs
index 4386066..5d36104 100644
--- a/src/plots.rs
+++ b/src/plots.rs
@@ -121,7 +121,7 @@ fn estimate_density(x: &[f64], y: &[f64], nbins: usize) -> Vec<f64> {
         let iy = fy.floor() as i32;
         let sx = fx - ix as f64; // fractional x
         let sy = fy - iy as f64; // fractional y
-        // Distribute weight to 4 corners
+                                 // Distribute weight to 4 corners
         for (dx, wx) in [(0i32, 1.0 - sx), (1, sx)] {
             for (dy, wy) in [(0i32, 1.0 - sy), (1, sy)] {
                 let gx = ix + dx;
@@ -145,14 +145,9 @@ fn estimate_density(x: &[f64], y: &[f64], nbins: usize) -> Vec<f64> {
                 for dy in -radius_y..=radius_y {
                     let nx = bx as i32 + dx;
                     let ny = by as i32 + dy;
-                    if nx >= 0
-                        && (nx as usize) < grid_size
-                        && ny >= 0
-                        && (ny as usize) < grid_size
+                    if nx >= 0 && (nx as usize) < grid_size && ny >= 0 && (ny as usize) < grid_size
                     {
-                        let w = (-(dx * dx) as f64 / sigma2_x
-                            - (dy * dy) as f64 / sigma2_y)
-                            .exp();
+                        let w = (-(dx * dx) as f64 / sigma2_x - (dy * dy) as f64 / sigma2_y).exp();
                         smoothed[nx as usize][ny as usize] += c * w;
                     }
                 }
@@ -333,15 +328,11 @@ where
 
     // ── points (data order, matching R's plot() behavior) ─────────────
     // R draws points in data order with pch=20 cex=0.25 → tiny filled dots.
-     // R's pch=20 with cex=0.25 draws tiny filled circles.
+    // R's pch=20 with cex=0.25 draws tiny filled circles.
     // Circle radius 1 at our scale gives the closest match.
     for i in 0..xd.len() {
         let c = density_color(densities[i]);
-        chart.draw_series(std::iter::once(Circle::new(
-            (xd[i], yd[i]),
-            1,
-            c.filled(),
-        )))?;
+        chart.draw_series(std::iter::once(Circle::new((xd[i], yd[i]), 1, c.filled())))?;
     }
 
     // ── fit curve: R uses col='black', lwd=2, lty=3 (dotted) ──────────
@@ -973,7 +964,7 @@ mod tests {
         assert_eq!((c0.0, c0.1, c0.2), (0, 255, 255)); // cyan
         let c1 = density_color(1.0);
         assert_eq!((c1.0, c1.1, c1.2), (255, 0, 0)); // red
-        // Mid-point should be green
+                                                     // Mid-point should be green
         let c_mid = density_color(0.5);
         assert_eq!((c_mid.0, c_mid.1, c_mid.2), (0, 255, 0)); // green
     }

From 8a2b0fe8d596be2f879e5678a806bc11eaeb4b3d Mon Sep 17 00:00:00 2001
From: Phil Ewels <phil.ewels@seqera.io>
Date: Thu, 12 Feb 2026 23:31:08 +0100
Subject: [PATCH 5/6] Add AGENTS.md so that the auto-formatting and checks are
 done pre-push

---
 AGENTS.md | 170 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)
 create mode 100644 AGENTS.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..37c38d2
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,170 @@
+# AGENTS.md — dupRust
+
+> Fast Rust reimplementation of [dupRadar](https://github.com/ssayols/dupRadar) for assessing
+> PCR duplicate rates in RNA-Seq datasets. Binary crate (`duprust`), Rust edition 2021.
+
+## Build / Lint / Test Commands
+
+```bash
+# Build
+cargo build              # debug build
+cargo build --release    # optimized release build (LTO, strip, opt-level 3)
+
+# Format (enforced in CI — default rustfmt, no config file)
+cargo fmt                # auto-format
+cargo fmt --check        # check only (CI uses this)
+
+# Lint (enforced in CI — default clippy, warnings are errors)
+cargo clippy -- -D warnings
+
+# Test — all unit + integration tests
+cargo test               # debug mode
+cargo test --release     # release mode (CI uses this)
+
+# Run a single test by name (substring match)
+cargo test test_dup_rate_calculation
+cargo test test_dup_matrix_exact_match -- --nocapture   # with stdout
+
+# Run only unit tests (skip integration tests)
+cargo test --lib
+
+# Run only integration tests
+cargo test --test integration_test
+
+# Run a specific integration test
+cargo test --test integration_test test_intercept_slope_match
+```
+
+## Project Structure
+
+```
+src/
+  main.rs        — Entry point, orchestrates the 8-step pipeline
+  cli.rs         — CLI argument parsing (clap derive)
+  config.rs      — YAML configuration loading (serde)
+  gtf.rs         — GTF annotation file parser
+  counting.rs    — BAM read counting engine (largest module)
+  dupmatrix.rs   — Duplication matrix construction & TSV output
+  fitting.rs     — Logistic regression via IRLS
+  plots.rs       — Plot generation: density scatter, boxplot, histogram
+tests/
+  integration_test.rs  — 8 integration tests vs R dupRadar reference output
+  data/                — Test BAM/GTF input files
+  expected/            — R-generated reference outputs
+  create_test_data.R   — R script to regenerate test data + references
+```
+
+Flat module structure — all modules declared in `main.rs`, no `lib.rs`.
+Inter-module access uses `crate::` paths (e.g., `use crate::gtf::Gene;`).
+
+## Code Style
+
+### Formatting
+
+- **Default `rustfmt`** — no `rustfmt.toml` exists. Do not create one.
+- 4-space indentation, ~100 char line width.
+- Trailing commas on all multi-line constructs.
+- Chained method calls break to new line with indent.
+
+### Imports
+
+Three groups (crate-internal, third-party, std), though blank-line separation
+between groups is not strictly enforced. Each `use` is a single statement:
+
+```rust
+use crate::gtf::Gene;
+use anyhow::{Context, Result};
+use indexmap::IndexMap;
+use log::{debug, info};
+use std::collections::HashMap;
+```
+
+Localized `use` inside function bodies is acceptable for narrow imports
+(e.g., `use std::io::Write;`).
+
+### Naming
+
+| Kind              | Convention             | Examples                                       |
+|-------------------|------------------------|-------------------------------------------------|
+| Types / Structs   | `CamelCase`            | `GeneCounts`, `DupMatrix`, `FitResult`          |
+| Functions/Methods | `snake_case`           | `count_reads`, `build_index`, `format_float`    |
+| Constants         | `SCREAMING_SNAKE_CASE` | `BAM_FDUP`, `DENSITY_COLORS`, `SCALE`           |
+| Modules           | `snake_case`           | `dupmatrix`, `counting`, `fitting`              |
+| Variables/Fields  | `snake_case`           | `gene_counts`, `dup_rate_multi`, `is_dup`       |
+| Type aliases      | `CamelCase`            | `MateBufferKey`                                 |
+
+### Error Handling
+
+- **`anyhow::Result<T>`** for all fallible functions. No custom error types.
+- Propagate with `?` operator.
+- Add context with `.context("msg")` or `.with_context(|| format!(...))`.
+- Use `anyhow::bail!()` for early error returns.
+- Use `anyhow::ensure!()` for precondition checks.
+- **`unwrap()` / `expect()`** are restricted to test code only. In production code,
+  `unwrap()` is acceptable only when a prior guard makes it provably safe (add a comment).
+
+### Documentation
+
+- **Every source file** starts with `//!` module doc comment (2-4 lines).
+- **All public items** (structs, fields, functions, methods) get `///` doc comments.
+- Complex functions include `# Arguments` and `# Returns` sections.
+- Inline `//` comments explain complex logic, domain-specific behavior, and
+  references to R equivalents.
+- Long files use section dividers: `// ===` for major sections, `// ---` for sub-sections.
+
+### Types and Derives
+
+- `#[derive(Debug)]` on all structs.
+- Add `Clone`, `Default`, `Deserialize` as needed — keep derives minimal.
+- Public structs expose `pub` fields. Private helper structs keep fields private.
+- Numeric conventions: `u64` for counts/positions, `f64` for metrics, `u8` for flags/strandedness.
+- `IndexMap` when insertion order matters (gene ordering); `HashMap` for unordered lookups.
+
+### Clippy
+
+- Default clippy settings with `-D warnings` (deny all warnings).
+- Targeted `#[allow(...)]` annotations are acceptable with justification:
+  - `#[allow(dead_code)]` for fields kept for API completeness.
+  - `#[allow(clippy::too_many_arguments)]` when refactoring would reduce clarity.
+
+### Tests
+
+- **Unit tests** co-located in each source file inside `#[cfg(test)] mod tests { use super::*; ... }`.
+- **Integration tests** in `tests/integration_test.rs` — run the binary as a subprocess
+  and compare output against R reference files in `tests/expected/`.
+- Test naming: `test_<description_in_snake_case>`.
+- Use `assert_eq!` with descriptive messages for exact comparisons.
+- Use `assert!((val - expected).abs() < tolerance)` for float comparisons.
+- No dev-dependencies — tests use only std + crate dependencies.
+
+## CI Pipeline
+
+GitHub Actions (`.github/workflows/ci.yml`) runs on push to `main` and all PRs:
+
+1. **Test** — `cargo test --release` on Ubuntu and macOS
+2. **Format** — `cargo fmt --check`
+3. **Clippy** — `cargo clippy -- -D warnings`
+
+All three must pass. Uses `dtolnay/rust-toolchain@stable` and `Swatinem/rust-cache@v2`.
+
+## Key Dependencies
+
+| Crate          | Purpose                          |
+|----------------|----------------------------------|
+| `clap` v4      | CLI argument parsing (derive)    |
+| `rust-htslib`  | BAM file I/O (statically linked) |
+| `plotters`     | Chart generation (PNG + SVG)     |
+| `serde`        | YAML config deserialization      |
+| `anyhow`       | Error handling                   |
+| `log`          | Logging facade                   |
+| `env_logger`   | Log output backend               |
+| `indexmap`     | Insertion-order-preserving maps  |
+
+## Notes for Agents
+
+- The codebase is a pure binary crate with no library target.
+- Release builds use aggressive optimization (`lto = true`, `codegen-units = 1`, `strip = true`).
+- Test data is generated by `tests/create_test_data.R` — do not modify `tests/expected/` by hand.
+- Float output formatting must match R's behavior (15 significant digits, "NA" for NaN, trailing-zero trimming).
+- The pipeline processes BAM files which can be very large — performance matters.
+- System dependencies needed for building: cmake, zlib, bz2, lzma, curl, ssl, clang (for `rust-htslib`).

From c409602d6e15072f675fcd69426e0c8ee0387764 Mon Sep 17 00:00:00 2001
From: Phil Ewels <phil.ewels@seqera.io>
Date: Thu, 12 Feb 2026 23:33:09 +0100
Subject: [PATCH 6/6] Fix clippy errors

---
 src/counting.rs | 1 +
 src/plots.rs    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/counting.rs b/src/counting.rs
index ae5c489..c640a05 100644
--- a/src/counting.rs
+++ b/src/counting.rs
@@ -23,6 +23,7 @@ const BAM_FUNMAP: u16 = 0x4;
 /// Flag indicating the read failed quality checks (0x200).
 const BAM_FQCFAIL: u16 = 0x200;
 /// Flag indicating a secondary alignment (0x100).
+#[allow(dead_code)]
 const BAM_FSECONDARY: u16 = 0x100;
 /// Flag indicating a supplementary alignment (0x800).
 const BAM_FSUPPLEMENTARY: u16 = 0x800;
diff --git a/src/plots.rs b/src/plots.rs
index 5d36104..aa79948 100644
--- a/src/plots.rs
+++ b/src/plots.rs
@@ -135,6 +135,7 @@ fn estimate_density(x: &[f64], y: &[f64], nbins: usize) -> Vec<f64> {
 
     // Anisotropic Gaussian smoothing (matching bkde2D with tau=3.4)
     let mut smoothed = vec![vec![0.0f64; grid_size]; grid_size];
+    #[allow(clippy::needless_range_loop)] // bx/by used as integer coordinates for offset arithmetic
     for bx in 0..grid_size {
         for by in 0..grid_size {
             if grid[bx][by] == 0.0 {