From d2eb78f37c48b938696b979405439269fedcb635 Mon Sep 17 00:00:00 2001
From: KeKs0r <marc.hoeffl@laserfocus.io>
Date: Thu, 2 Apr 2026 00:13:01 +0200
Subject: [PATCH 1/5] replace backfill chunking with smart planner

---
 packages/cli/src/plugin.test.ts               |   2 +-
 packages/plugin-backfill/package.json         |   5 +
 .../plugin-backfill/src/chunking/analyze.ts   | 954 +++++++++++++++++-
 .../src/chunking/introspect.ts                |  67 +-
 .../smart-chunking.integration.test.ts        | 420 ++++++++
 packages/plugin-backfill/src/chunking/sql.ts  |  76 +-
 .../plugin-backfill/src/chunking/types.ts     |  51 +
 packages/plugin-backfill/src/index.ts         |  10 -
 packages/plugin-backfill/src/planner.ts       |  20 +-
 packages/plugin-backfill/src/plugin.test.ts   |  11 +
 packages/plugin-backfill/src/sdk.ts           |  29 +
 packages/plugin-backfill/src/types.ts         |  20 +-
 12 files changed, 1573 insertions(+), 92 deletions(-)
 create mode 100644 packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
 create mode 100644 packages/plugin-backfill/src/sdk.ts
diff --git a/packages/cli/src/plugin.test.ts b/packages/cli/src/plugin.test.ts
index ddda0ed..8ff0a2b 100644
--- a/packages/cli/src/plugin.test.ts
+++ b/packages/cli/src/plugin.test.ts
@@ -40,7 +40,7 @@ async function waitForParts(
   database: string,
   table: string,
   expectedPartitions: number,
-  timeoutMs = 15_000,
+  timeoutMs = 60_000,
 ): Promise<void> {
   const start = Date.now()
   while (Date.now() - start < timeoutMs) {
diff --git a/packages/plugin-backfill/package.json b/packages/plugin-backfill/package.json
index c4cbe63..9a035ca 100644
--- a/packages/plugin-backfill/package.json
+++ b/packages/plugin-backfill/package.json
@@ -27,6 +27,11 @@
       "source": "./src/index.ts",
       "types": "./dist/index.d.ts",
       "default": "./dist/index.js"
+    },
+    "./sdk": {
+      "source": "./src/sdk.ts",
+      "types": "./dist/sdk.d.ts",
+      "default": "./dist/sdk.js"
     }
   },
   "files": [
diff --git a/packages/plugin-backfill/src/chunking/analyze.ts b/packages/plugin-backfill/src/chunking/analyze.ts
index 7e051e1..278d9df 100644
--- a/packages/plugin-backfill/src/chunking/analyze.ts
+++ b/packages/plugin-backfill/src/chunking/analyze.ts
@@ -1,8 +1,44 @@
 import { hashId, randomPlanId } from '../state.js'
 
-import { buildChunkBoundaries } from './build.js'
-import { introspectTable, querySortKeyRanges } from './introspect.js'
-import type { ChunkBoundary, PartitionInfo, PlannedChunk, SortKeyInfo } from './types.js'
+import { introspectTable } from './introspect.js'
+import type {
+  ChunkBoundary,
+  EstimateConfidence,
+  EstimateReason,
+  PartitionDiagnostics,
+  PartitionInfo,
+  PlannedChunk,
+  SliceLineageStep,
+  SliceRange,
+  SortKeyInfo,
+} from './types.js'
+
+const MAX_SPLIT_DEPTH_MULTIPLIER = 3
+const TARGET_BYTES_FUZZ_FACTOR = 1.15
+const STOP_SPLIT_FUZZ_FACTOR = 1.5
+const STRING_PREFIX_START_DEPTH = 1
+const STRING_PREFIX_MAX_DEPTH = 4
+const BINARY_SEARCH_STEPS = 24
+
+interface PartitionSlice {
+  partitionId: string
+  ranges: SliceRange[]
+  estimatedRows: number
+  estimatedBytes: number
+  isHotKey: boolean
+  hotDimensionIndex?: number
+  hotKeyValue?: string
+  estimateConfidence: EstimateConfidence
+  estimateReason: EstimateReason
+  lineage: SliceLineageStep[]
+}
+
+interface QueryContext {
+  database: string
+  table: string
+  sortKeys: SortKeyInfo[]
+  query: <T>(sql: string) => Promise<T[]>
+}
 
 export interface AnalyzeAndChunkInput {
   database: string
@@ -18,11 +54,13 @@ export interface AnalyzeAndChunkResult {
   planId: string
   partitions: PartitionInfo[]
   sortKey?: SortKeyInfo
+  sortKeys: SortKeyInfo[]
   chunks: PlannedChunk[]
+  partitionDiagnostics: PartitionDiagnostics[]
 }
 
 export async function analyzeAndChunk(input: AnalyzeAndChunkInput): Promise<AnalyzeAndChunkResult> {
-  const { partitions, sortKey, boundaries } = await analyzeTable({
+  const { partitions, sortKey, sortKeys, boundaries, partitionDiagnostics } = await analyzeTable({
     database: input.database,
     table: input.table,
     from: input.from,
@@ -36,11 +74,12 @@ export async function analyzeAndChunk(input: AnalyzeAndChunkInput): Promise<Anal
   const chunks = buildPlannedChunks({
     planId,
     partitions,
+    sortKeys,
     boundaries,
     requireIdempotencyToken: input.requireIdempotencyToken,
   })
 
-  return { planId, partitions, sortKey, chunks }
+  return { planId, partitions, sortKey, sortKeys, chunks, partitionDiagnostics }
 }
 
 export interface AnalyzeTableInput {
@@ -55,11 +94,13 @@ export interface AnalyzeTableInput {
 export interface AnalyzeTableResult {
   partitions: PartitionInfo[]
   sortKey?: SortKeyInfo
+  sortKeys: SortKeyInfo[]
   boundaries: ChunkBoundary[]
+  partitionDiagnostics: PartitionDiagnostics[]
 }
 
 export async function analyzeTable(input: AnalyzeTableInput): Promise<AnalyzeTableResult> {
-  const { partitions, sortKey } = await introspectTable({
+  const { partitions, sortKey, sortKeys } = await introspectTable({
     database: input.database,
     table: input.table,
     from: input.from,
@@ -67,34 +108,64 @@ export async function analyzeTable(input: AnalyzeTableInput): Promise<AnalyzeTab
     query: input.query,
   })
 
-  const oversizedPartitionIds = partitions
-    .filter(p => p.bytesOnDisk > input.maxChunkBytes)
-    .map(p => p.partitionId)
-
-  let sortKeyRanges: Map<string, { min: string; max: string }> | undefined
-  if (sortKey && oversizedPartitionIds.length > 0) {
-    sortKeyRanges = await querySortKeyRanges({
-      database: input.database,
-      table: input.table,
-      sortKeyColumn: sortKey.column,
-      partitionIds: oversizedPartitionIds,
-      query: input.query,
-    })
+  const context: QueryContext = {
+    database: input.database,
+    table: input.table,
+    sortKeys,
+    query: input.query,
   }
 
-  const boundaries = buildChunkBoundaries({
-    partitions,
-    maxChunkBytes: input.maxChunkBytes,
-    sortKey,
-    sortKeyRanges,
-  })
+  const boundaries: ChunkBoundary[] = []
+  const partitionDiagnostics: PartitionDiagnostics[] = []
+
+  for (const partition of partitions) {
+    const slices = await planPartition(context, partition, input.maxChunkBytes)
+    const merged = mergeAdjacentSlices(slices, input.maxChunkBytes)
 
-  return { partitions, sortKey, boundaries }
+    for (const slice of merged) {
+      const primaryRange = getSliceRange(slice, 0)
+      boundaries.push({
+        partitionId: slice.partitionId,
+        ranges: slice.ranges,
+        sortKeyFrom: primaryRange.from,
+        sortKeyTo: primaryRange.to,
+        estimatedBytes: slice.estimatedBytes,
+        estimatedRows: slice.estimatedRows,
+        isHotKey: slice.isHotKey,
+        hotDimensionIndex: slice.hotDimensionIndex,
+        hotKeyValue: slice.hotKeyValue,
+        estimateConfidence: slice.estimateConfidence,
+        estimateReason: slice.estimateReason,
+        lineage: slice.lineage,
+      })
+    }
+
+    const estimatedRowSum = merged.reduce((sum, slice) => sum + slice.estimatedRows, 0)
+    const estimateToExactRatio = partition.rows > 0 ? estimatedRowSum / partition.rows : 1
+    partitionDiagnostics.push({
+      partitionId: partition.partitionId,
+      estimatedRowSum,
+      exactPartitionRows: partition.rows,
+      estimateToExactRatio,
+      suspiciousEstimate: estimateToExactRatio < 0.7 || estimateToExactRatio > 1.3,
+      lowConfidenceChunkCount: merged.filter((slice) => slice.estimateConfidence === 'low').length,
+      usedDistributionFallback: merged.some((slice) =>
+        slice.estimateReason === 'string-prefix-distribution' ||
+        slice.estimateReason === 'temporal-distribution' ||
+        slice.estimateReason === 'equal-width-distribution'
+      ),
+      usedLowConfidenceChunkRefinement: false,
+      usedExactCountFallback: false,
+    })
+  }
+
+  return { partitions, sortKey, sortKeys, boundaries, partitionDiagnostics }
 }
 
 export function buildPlannedChunks(input: {
   planId: string
   partitions: PartitionInfo[]
+  sortKeys: SortKeyInfo[]
   boundaries: ChunkBoundary[]
   requireIdempotencyToken: boolean
 }): PlannedChunk[] {
@@ -109,21 +180,846 @@ export function buildPlannedChunks(input: {
     const chunkId = hashId(`chunk:${idSeed}`).slice(0, 16)
     const token = input.requireIdempotencyToken ? hashId(`token:${idSeed}`) : ''
 
-    const partition = input.partitions.find(p => p.partitionId === boundary.partitionId)
-    const from = boundary.sortKeyFrom ?? partition?.minTime ?? ''
-    const to = boundary.sortKeyTo ?? partition?.maxTime ?? ''
+    const partition = input.partitions.find((candidate) => candidate.partitionId === boundary.partitionId)
+    const { from, to } = deriveChunkWindow(boundary.ranges ?? [], input.sortKeys, partition)
 
     chunks.push({
       id: chunkId,
       partitionId: boundary.partitionId,
+      ranges: boundary.ranges,
       sortKeyFrom: boundary.sortKeyFrom,
       sortKeyTo: boundary.sortKeyTo,
       estimatedBytes: boundary.estimatedBytes,
+      estimatedRows: boundary.estimatedRows,
       idempotencyToken: token,
       from,
       to,
+      isHotKey: boundary.isHotKey,
+      hotDimensionIndex: boundary.hotDimensionIndex,
+      hotKeyValue: boundary.hotKeyValue,
+      estimateConfidence: boundary.estimateConfidence,
+      estimateReason: boundary.estimateReason,
+      lineage: boundary.lineage,
     })
   }
 
   return chunks
 }
+
+async function planPartition(
+  context: QueryContext,
+  partition: PartitionInfo,
+  maxChunkBytes: number,
+): Promise<PartitionSlice[]> {
+  if (partition.bytesOnDisk <= maxChunkBytes || context.sortKeys.length === 0) {
+    return [buildRootSlice(partition)]
+  }
+
+  const rootSlice = buildRootSlice(partition)
+  return splitSliceRecursively(context, partition, rootSlice, maxChunkBytes, 0)
+}
+
+async function splitSliceRecursively(
+  context: QueryContext,
+  partition: PartitionInfo,
+  slice: PartitionSlice,
+  maxChunkBytes: number,
+  depth: number,
+): Promise<PartitionSlice[]> {
+  if (slice.estimatedBytes <= maxChunkBytes * STOP_SPLIT_FUZZ_FACTOR) {
+    return [slice]
+  }
+
+  if (depth >= context.sortKeys.length * MAX_SPLIT_DEPTH_MULTIPLIER) {
+    return [slice]
+  }
+
+  const children = await splitOversizedSlice(context, partition, slice, maxChunkBytes, depth)
+  if (children.length <= 1) {
+    return [slice]
+  }
+
+  const finalChildren: PartitionSlice[] = []
+  for (const child of children) {
+    finalChildren.push(...await splitSliceRecursively(context, partition, child, maxChunkBytes, depth + 1))
+  }
+  return finalChildren
+}
+
+async function splitOversizedSlice(
+  context: QueryContext,
+  partition: PartitionInfo,
+  slice: PartitionSlice,
+  maxChunkBytes: number,
+  depth: number,
+): Promise<PartitionSlice[]> {
+  for (const dimensionIndex of getCandidateDimensions(context.sortKeys, slice)) {
+    const preparedSlice = await hydrateSliceRange(context, slice, dimensionIndex)
+    if (!preparedSlice) continue
+
+    const sortKey = context.sortKeys[dimensionIndex]
+    if (!sortKey) continue
+
+    const rootLike = depth === 0
+    const hotIdentity = findHotIdentity(preparedSlice, context.sortKeys)
+
+    if (sortKey.category === 'string') {
+      const stringSlices = await splitSliceWithStringPrefixes(
+        context,
+        partition,
+        preparedSlice,
+        dimensionIndex,
+        maxChunkBytes,
+        STRING_PREFIX_START_DEPTH,
+      )
+      if (isEffectiveSplit(preparedSlice, stringSlices)) {
+        return applyHotIdentity(stringSlices, hotIdentity)
+      }
+    }
+
+    if (sortKey.category === 'datetime' && (!rootLike || hotIdentity !== undefined)) {
+      const temporalSlices = await splitSliceWithTemporalBuckets(
+        context,
+        partition,
+        markHotSlice(preparedSlice, hotIdentity),
+        dimensionIndex,
+        maxChunkBytes,
+      )
+      if (isEffectiveSplit(preparedSlice, temporalSlices)) {
+        return applyHotIdentity(temporalSlices, hotIdentity)
+      }
+    }
+
+    const quantileSlices = await splitWithRanges(
+      context,
+      partition,
+      preparedSlice,
+      dimensionIndex,
+      maxChunkBytes,
+    )
+    if (isEffectiveSplit(preparedSlice, quantileSlices)) {
+      return applyHotIdentity(quantileSlices, hotIdentity)
+    }
+  }
+
+  return [slice]
+}
+
+async function splitWithRanges(
+  context: QueryContext,
+  partition: PartitionInfo,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+  maxChunkBytes: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = context.sortKeys[dimensionIndex]
+  const range = getSliceRange(slice, dimensionIndex)
+  if (!sortKey || range.from === undefined || range.to === undefined) return [slice]
+  if (sortKey.category === 'string' && isExactSliceRange(range)) return [slice]
+
+  const subCount = Math.ceil(slice.estimatedBytes / maxChunkBytes)
+  if (subCount <= 1) return [slice]
+
+  const boundaries = await buildQuantileBoundaries(context, slice, dimensionIndex, subCount)
+  if (boundaries) {
+    return splitSliceWithBoundaries(
+      context,
+      partition,
+      slice,
+      dimensionIndex,
+      boundaries,
+      'quantile-range-split',
+      'split slice into quantile-aligned ranges',
+      'quantile-estimate',
+      'high',
+    )
+  }
+
+  const equalWidthBoundaries = buildEvenlySpacedBoundaries(range.from, range.to, subCount, sortKey)
+  return splitSliceWithBoundaries(
+    context,
+    partition,
+    slice,
+    dimensionIndex,
+    equalWidthBoundaries,
+    'equal-width-split',
+    'fallback to equal-width ranges',
+    'equal-width-distribution',
+    'low',
+  )
+}
+
+async function splitSliceWithBoundaries(
+  context: QueryContext,
+  partition: PartitionInfo,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+  boundaries: string[],
+  strategyId: string,
+  reason: string,
+  estimateReason: EstimateReason,
+  estimateConfidence: EstimateConfidence,
+): Promise<PartitionSlice[]> {
+  const slices: PartitionSlice[] = []
+
+  for (let index = 0; index < boundaries.length - 1; index++) {
+    const ranges = replaceSliceRange(slice, dimensionIndex, boundaries[index], boundaries[index + 1])
+    const estimatedRows = await countRows(context, partition.partitionId, ranges)
+    slices.push(buildSliceFromRows(partition, {
+      ranges,
+      estimatedRows,
+      isHotKey: false,
+      hotDimensionIndex: undefined,
+      hotKeyValue: undefined,
+      estimateConfidence,
+      estimateReason,
+      lineage: slice.lineage.concat([{ strategyId, dimensionIndex, reason }]),
+    }))
+  }
+
+  return slices
+}
+
+async function splitSliceWithStringPrefixes(
+  context: QueryContext,
+  partition: PartitionInfo,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+  maxChunkBytes: number,
+  depth: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = context.sortKeys[dimensionIndex]
+  const range = getSliceRange(slice, dimensionIndex)
+  if (!sortKey || sortKey.category !== 'string' || range.from === undefined || range.to === undefined) {
+    return []
+  }
+
+  const rows = await context.query<{ prefix: string; cnt: string }>(`
+SELECT
+  substring(${sortKey.column}, 1, ${depth}) AS prefix,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClause(partition.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, range.to), context.sortKeys)}
+GROUP BY prefix
+ORDER BY prefix`)
+
+  const slices: PartitionSlice[] = []
+
+  for (const row of rows) {
+    const bucket = {
+      value: row.prefix,
+      rowCount: Number(row.cnt),
+      isExactValue: Buffer.from(row.prefix, 'latin1').length < depth,
+    }
+    if (bucket.rowCount <= 0) continue
+
+    const bucketFrom = maxBinaryString(range.from, bucket.value)
+    const bucketUpper = bucket.isExactValue ? `${bucket.value}\0` : nextPrefixValue(bucket.value)
+    if (!bucketUpper) continue
+
+    const bucketTo = minBinaryString(range.to, bucketUpper)
+    const bucketSlice = buildSliceFromRows(partition, {
+      ranges: replaceSliceRange(slice, dimensionIndex, bucketFrom, bucketTo),
+      estimatedRows: bucket.rowCount,
+      isHotKey: false,
+      hotDimensionIndex: undefined,
+      hotKeyValue: undefined,
+      estimateConfidence: 'high',
+      estimateReason: 'string-prefix-distribution',
+      lineage: slice.lineage.concat([{
+        strategyId: 'string-prefix-split',
+        dimensionIndex,
+        reason: 'split slice using string prefix distribution',
+      }]),
+    })
+
+    if (bucketSlice.estimatedBytes <= maxChunkBytes * TARGET_BYTES_FUZZ_FACTOR) {
+      slices.push(bucketSlice)
+      continue
+    }
+
+    if (!bucket.isExactValue && depth < STRING_PREFIX_MAX_DEPTH) {
+      slices.push(...await splitSliceWithStringPrefixes(
+        context,
+        partition,
+        bucketSlice,
+        dimensionIndex,
+        maxChunkBytes,
+        depth + 1,
+      ))
+      continue
+    }
+
+    slices.push(bucketSlice)
+  }
+
+  return slices
+}
+
+async function splitSliceWithTemporalBuckets(
+  context: QueryContext,
+  partition: PartitionInfo,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+  maxChunkBytes: number,
+): Promise<PartitionSlice[]> {
+  const dayBuckets = await probeTemporalBuckets(context, partition.partitionId, slice.ranges, dimensionIndex, 'day')
+  if (dayBuckets.length === 0) return [slice]
+
+  const daySlices = buildTemporalSlices(partition, slice, dimensionIndex, dayBuckets, maxChunkBytes)
+  if (daySlices.every((candidate) => candidate.estimatedBytes <= maxChunkBytes * TARGET_BYTES_FUZZ_FACTOR)) {
+    return daySlices
+  }
+
+  const hourBuckets = await probeTemporalBuckets(context, partition.partitionId, slice.ranges, dimensionIndex, 'hour')
+  if (hourBuckets.length === 0) return daySlices
+  return buildTemporalSlices(partition, slice, dimensionIndex, hourBuckets, maxChunkBytes)
+}
+
+async function probeTemporalBuckets(
+  context: QueryContext,
+  partitionId: string,
+  ranges: SliceRange[],
+  dimensionIndex: number,
+  grain: 'day' | 'hour',
+): Promise<Array<{ start: string; rowCount: number }>> {
+  const sortKey = context.sortKeys[dimensionIndex]
+  if (!sortKey || sortKey.category !== 'datetime') return []
+
+  const bucketExpression = grain === 'day'
+    ? `toStartOfDay(${sortKey.column})`
+    : `toStartOfHour(${sortKey.column})`
+
+  const rows = await context.query<{ bucket: string; cnt: string }>(`
+SELECT
+  formatDateTime(${bucketExpression}, '%Y-%m-%dT%H:%i:%sZ') AS bucket,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClause(partitionId, ranges, context.sortKeys)}
+GROUP BY bucket
+ORDER BY bucket`)
+
+  return rows.map((row) => ({
+    start: row.bucket,
+    rowCount: Number(row.cnt),
+  }))
+}
+
+function buildTemporalSlices(
+  partition: PartitionInfo,
+  parentSlice: PartitionSlice,
+  dimensionIndex: number,
+  buckets: Array<{ start: string; rowCount: number }>,
+  maxChunkBytes: number,
+): PartitionSlice[] {
+  const targetChunkRows = getTargetChunkRows(partition, maxChunkBytes)
+  const slices: PartitionSlice[] = []
+  let currentStart: string | undefined
+  let currentRows = 0
+
+  for (let index = 0; index < buckets.length; index++) {
+    const bucket = buckets[index]
+    if (!bucket) continue
+
+    if (currentStart === undefined) currentStart = bucket.start
+
+    const wouldExceed = currentRows > 0 && currentRows + bucket.rowCount > targetChunkRows * TARGET_BYTES_FUZZ_FACTOR
+    if (wouldExceed && currentStart !== undefined) {
+      slices.push(buildSliceFromRows(partition, {
+        ranges: replaceSliceRange(parentSlice, dimensionIndex, currentStart, bucket.start),
+        estimatedRows: currentRows,
+        isHotKey: parentSlice.isHotKey,
+        hotDimensionIndex: parentSlice.hotDimensionIndex,
+        hotKeyValue: parentSlice.hotKeyValue,
+        estimateConfidence: 'low',
+        estimateReason: 'temporal-distribution',
+        lineage: parentSlice.lineage.concat([{
+          strategyId: 'temporal-bucket-split',
+          dimensionIndex,
+          reason: 'split slice using temporal distribution buckets',
+        }]),
+      }))
+      currentStart = bucket.start
+      currentRows = 0
+    }
+
+    currentRows += bucket.rowCount
+
+    if (index === buckets.length - 1 && currentStart !== undefined) {
+      slices.push(buildSliceFromRows(partition, {
+        ranges: replaceSliceRange(parentSlice, dimensionIndex, currentStart, getPartitionEndExclusive(partition)),
+        estimatedRows: currentRows,
+        isHotKey: parentSlice.isHotKey,
+        hotDimensionIndex: parentSlice.hotDimensionIndex,
+        hotKeyValue: parentSlice.hotKeyValue,
+        estimateConfidence: 'low',
+        estimateReason: 'temporal-distribution',
+        lineage: parentSlice.lineage.concat([{
+          strategyId: 'temporal-bucket-split',
+          dimensionIndex,
+          reason: 'split slice using temporal distribution buckets',
+        }]),
+      }))
+    }
+  }
+
+  return slices
+}
+
+async function buildQuantileBoundaries(
+  context: QueryContext,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+  subCount: number,
+): Promise<string[] | undefined> {
+  const range = getSliceRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return undefined
+
+  const boundaries = [range.from]
+  for (let step = 1; step < subCount; step++) {
+    const targetCumRows = Math.round((slice.estimatedRows * step) / subCount)
+    boundaries.push(await findQuantileBoundaryOnDimension(context, slice, dimensionIndex, targetCumRows))
+  }
+
+  const uniqueBoundaryCount = new Set(boundaries).size
+  if (uniqueBoundaryCount <= Math.max(2, Math.ceil(subCount / 3))) {
+    return undefined
+  }
+
+  return boundaries.concat([range.to])
+}
+
+async function findQuantileBoundaryOnDimension(
+  context: QueryContext,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+  targetCumRows: number,
+): Promise<string> {
+  const sortKey = context.sortKeys[dimensionIndex]
+  const range = getSliceRange(slice, dimensionIndex)
+  if (!sortKey || range.from === undefined || range.to === undefined) {
+    throw new Error(`Missing range for quantile split on dimension ${dimensionIndex}`)
+  }
+
+  if (sortKey.category === 'string') {
+    let low = strToBigInt(range.from, 8)
+    let high = strToBigInt(range.to, 8)
+
+    for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+      const midpoint = (low + high) / 2n
+      if (midpoint === low || midpoint === high) break
+
+      const mid = bigIntToStr(midpoint, 8)
+      const rows = await countRows(context, slice.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, mid))
+      if (rows < targetCumRows) low = midpoint
+      else high = midpoint
+    }
+
+    return bigIntToStr((low + high) / 2n, 8)
+  }
+
+  if (sortKey.category === 'datetime') {
+    let low = parsePlannerDateTime(range.from)
+    let high = parsePlannerDateTime(range.to)
+
+    for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+      const midpoint = Math.floor((low + high) / 2)
+      if (midpoint === low || midpoint === high) break
+
+      const mid = new Date(midpoint).toISOString()
+      const rows = await countRows(context, slice.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, mid))
+      if (rows < targetCumRows) low = midpoint
+      else high = midpoint
+    }
+
+    return new Date(Math.floor((low + high) / 2)).toISOString()
+  }
+
+  let low = Number(range.from)
+  let high = Number(range.to)
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = Math.floor((low + high) / 2)
+    if (midpoint === low || midpoint === high) break
+
+    const rows = await countRows(context, slice.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, String(midpoint)))
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return String(Math.floor((low + high) / 2))
+}
+
+async function hydrateSliceRange(
+  context: QueryContext,
+  slice: PartitionSlice,
+  dimensionIndex: number,
+): Promise<PartitionSlice | undefined> {
+  const currentRange = getSliceRange(slice, dimensionIndex)
+  if (currentRange.from !== undefined && currentRange.to !== undefined) return slice
+
+  const sortKey = context.sortKeys[dimensionIndex]
+  if (!sortKey) return undefined
+
+  const rows = await context.query<{ minVal: string; maxVal: string }>(`
+SELECT
+  toString(min(${sortKey.column})) AS minVal,
+  toString(max(${sortKey.column})) AS maxVal
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClause(slice.partitionId, slice.ranges, context.sortKeys)}`)
+
+  const observed = rows[0]
+  if (!observed) return undefined
+
+  return {
+    ...slice,
+    ranges: replaceSliceRange(slice, dimensionIndex, observed.minVal, toExclusiveUpperBound(observed.maxVal, sortKey)),
+  }
+}
+
+function buildRootSlice(partition: PartitionInfo): PartitionSlice {
+  return {
+    partitionId: partition.partitionId,
+    ranges: [],
+    estimatedRows: partition.rows,
+    estimatedBytes: partition.bytesOnDisk,
+    isHotKey: false,
+    estimateConfidence: 'high',
+    estimateReason: 'partition-metadata',
+    lineage: [],
+  }
+}
+
+function buildSliceFromRows(
+  partition: PartitionInfo,
+  input: {
+    ranges: SliceRange[]
+    estimatedRows: number
+    isHotKey: boolean
+    hotDimensionIndex?: number
+    hotKeyValue?: string
+    estimateConfidence: EstimateConfidence
+    estimateReason: EstimateReason
+    lineage: SliceLineageStep[]
+  },
+): PartitionSlice {
+  return {
+    partitionId: partition.partitionId,
+    ranges: input.ranges,
+    estimatedRows: input.estimatedRows,
+    estimatedBytes: partition.rows > 0
+      ? Math.round((input.estimatedRows / partition.rows) * partition.bytesOnDisk)
+      : 0,
+    isHotKey: input.isHotKey,
+    hotDimensionIndex: input.hotDimensionIndex,
+    hotKeyValue: input.hotKeyValue,
+    estimateConfidence: input.estimateConfidence,
+    estimateReason: input.estimateReason,
+    lineage: input.lineage,
+  }
+}
+
+function getTargetChunkRows(partition: PartitionInfo, maxChunkBytes: number): number {
+  if (partition.bytesOnDisk <= 0) return partition.rows
+  return (maxChunkBytes * partition.rows) / partition.bytesOnDisk
+}
+
+function mergeAdjacentSlices(slices: PartitionSlice[], maxChunkBytes: number): PartitionSlice[] {
+  if (slices.length <= 1) return slices
+
+  const merged: PartitionSlice[] = []
+  let current: PartitionSlice | undefined
+
+  for (const slice of slices) {
+    if (!current) {
+      current = slice
+      continue
+    }
+
+    const canMerge =
+      !current.isHotKey &&
+      !slice.isHotKey &&
+      haveSameTrailingRanges(current.ranges, slice.ranges) &&
+      current.estimatedBytes + slice.estimatedBytes <= maxChunkBytes * 1.1
+
+    if (!canMerge) {
+      merged.push(current)
+      current = slice
+      continue
+    }
+
+    current = {
+      ...current,
+      ranges: mergeRanges(current.ranges, slice.ranges),
+      estimatedRows: current.estimatedRows + slice.estimatedRows,
+      estimatedBytes: current.estimatedBytes + slice.estimatedBytes,
+    }
+  }
+
+  if (current) merged.push(current)
+  return merged
+}
+
+function mergeRanges(left: SliceRange[], right: SliceRange[]): SliceRange[] {
+  return left.map((leftRange) => {
+    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
+    return rightRange === undefined
+      ? leftRange
+      : {
+        dimensionIndex: leftRange.dimensionIndex,
+        from: leftRange.from,
+        to: rightRange.to,
+      }
+  })
+}
+
+function haveSameTrailingRanges(left: SliceRange[], right: SliceRange[]): boolean {
+  if (left.length !== right.length) return false
+
+  let differingDimensions = 0
+  for (const leftRange of left) {
+    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
+    if (!rightRange) return false
+
+    const same = leftRange.from === rightRange.from && leftRange.to === rightRange.to
+    if (!same) {
+      differingDimensions += 1
+      if (leftRange.to !== rightRange.from) return false
+    }
+  }
+
+  return differingDimensions <= 1
+}
+
+function getCandidateDimensions(sortKeys: SortKeyInfo[], slice: PartitionSlice): number[] {
+  return sortKeys
+    .map((sortKey, index) => ({
+      index,
+      priority: getDimensionPriority(sortKey.category, slice.isHotKey, slice.hotDimensionIndex, index),
+    }))
+    .sort((left, right) => left.priority - right.priority)
+    .map((candidate) => candidate.index)
+}
+
+function getDimensionPriority(
+  category: SortKeyInfo['category'],
+  isHotKey: boolean,
+  hotDimensionIndex: number | undefined,
+  dimensionIndex: number,
+): number {
+  if (isHotKey && hotDimensionIndex === dimensionIndex) return 100
+  if (category === 'string') return 0
+  if (category === 'datetime') return 1
+  return 2
+}
+
+function getSliceRange(slice: Pick<PartitionSlice, 'ranges'>, dimensionIndex: number): SliceRange {
+  return slice.ranges.find((range) => range.dimensionIndex === dimensionIndex)
+    ?? { dimensionIndex, from: undefined, to: undefined }
+}
+
+function replaceSliceRange(
+  slice: Pick<PartitionSlice, 'ranges'>,
+  dimensionIndex: number,
+  from: string | undefined,
+  to: string | undefined,
+): SliceRange[] {
+  return slice.ranges
+    .filter((range) => range.dimensionIndex !== dimensionIndex)
+    .concat([{ dimensionIndex, from, to }])
+    .sort((left, right) => left.dimensionIndex - right.dimensionIndex)
+}
+
+function isExactSliceRange(range: Pick<SliceRange, 'from' | 'to'>): boolean {
+  if (range.from === undefined || range.to === undefined) return false
+  return range.to === `${range.from}\0`
+}
+
+function findHotIdentity(
+  slice: PartitionSlice,
+  sortKeys: SortKeyInfo[],
+): { dimensionIndex: number; value: string } | undefined {
+  for (const range of slice.ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (sortKey?.category !== 'string') continue
+    if (isExactSliceRange(range) && range.from !== undefined) {
+      return { dimensionIndex: range.dimensionIndex, value: range.from }
+    }
+  }
+}
+
+function applyHotIdentity(
+  slices: PartitionSlice[],
+  hotIdentity: { dimensionIndex: number; value: string } | undefined,
+): PartitionSlice[] {
+  if (!hotIdentity) return slices
+  return slices.map((slice) => markHotSlice(slice, hotIdentity))
+}
+
+function markHotSlice(
+  slice: PartitionSlice,
+  hotIdentity: { dimensionIndex: number; value: string } | undefined,
+): PartitionSlice {
+  if (!hotIdentity) return slice
+  return {
+    ...slice,
+    isHotKey: true,
+    hotDimensionIndex: hotIdentity.dimensionIndex,
+    hotKeyValue: hotIdentity.value,
+  }
+}
+
+function isEffectiveSplit(parent: PartitionSlice, children: PartitionSlice[]): boolean {
+  if (children.length <= 1) return false
+  return children.some((child) =>
+    child.estimatedRows !== parent.estimatedRows ||
+    JSON.stringify(child.ranges) !== JSON.stringify(parent.ranges)
+  )
+}
+
+function toExclusiveUpperBound(value: string, sortKey: SortKeyInfo): string {
+  if (sortKey.category === 'string') return `${value}\0`
+  if (sortKey.category === 'datetime') return new Date(parsePlannerDateTime(value) + 1000).toISOString()
+  return String(Number(value) + 1)
+}
+
+function getPartitionEndExclusive(partition: PartitionInfo): string {
+  return new Date(parsePlannerDateTime(partition.maxTime) + 1000).toISOString()
+}
+
+function deriveChunkWindow(
+  ranges: SliceRange[],
+  sortKeys: SortKeyInfo[],
+  partition: PartitionInfo | undefined,
+): { from: string; to: string } {
+  for (const range of ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (sortKey?.category !== 'datetime') continue
+    return {
+      from: range.from ?? partition?.minTime ?? '',
+      to: range.to ?? partition?.maxTime ?? '',
+    }
+  }
+
+  return {
+    from: partition?.minTime ?? '',
+    to: partition?.maxTime ?? '',
+  }
+}
+
+async function countRows(context: QueryContext, partitionId: string, ranges: SliceRange[]): Promise<number> {
+  const rows = await context.query<{ cnt: string }>(`
+SELECT count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClause(partitionId, ranges, context.sortKeys)}`)
+  return Number(rows[0]?.cnt ?? 0)
+}
+
+function buildWhereClause(partitionId: string, ranges: SliceRange[], sortKeys: SortKeyInfo[]): string {
+  const conditions = [`_partition_id = ${quoteSqlString(partitionId)}`]
+
+  for (const range of ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) continue
+    if (range.from !== undefined) conditions.push(`${sortKey.column} >= ${formatBound(range.from, sortKey)}`)
+    if (range.to !== undefined) conditions.push(`${sortKey.column} < ${formatBound(range.to, sortKey)}`)
+  }
+
+  return conditions.join('\n  AND ')
+}
+
+function quoteSqlString(value: string): string {
+  return `'${value.replaceAll('\\', '\\\\').replaceAll('\'', '\\\'')}'`
+}
+
+function formatBound(value: string, sortKey: SortKeyInfo): string {
+  if (sortKey.category === 'datetime') {
+    return `parseDateTimeBestEffort(${quoteSqlString(value)})`
+  }
+  if (sortKey.category === 'string') {
+    return `unhex('${Buffer.from(value, 'latin1').toString('hex')}')`
+  }
+  return value
+}
+
+function buildEvenlySpacedBoundaries(
+  from: string,
+  to: string,
+  subCount: number,
+  sortKey: SortKeyInfo,
+): string[] {
+  if (sortKey.category === 'datetime') {
+    const start = parsePlannerDateTime(from)
+    const end = parsePlannerDateTime(to)
+    return Array.from({ length: subCount + 1 }, (_, index) =>
+      new Date(start + Math.floor(((end - start) * index) / subCount)).toISOString()
+    )
+  }
+
+  if (sortKey.category === 'numeric') {
+    const start = Number(from)
+    const end = Number(to)
+    return Array.from({ length: subCount + 1 }, (_, index) =>
+      String(start + Math.floor(((end - start) * index) / subCount))
+    )
+  }
+
+  const start = strToBigInt(from, 8)
+  const end = strToBigInt(to, 8)
+  return Array.from({ length: subCount + 1 }, (_, index) =>
+    bigIntToStr(start + ((end - start) * BigInt(index)) / BigInt(subCount), 8)
+  )
+}
+
+function parsePlannerDateTime(value: string): number {
+  const normalized = value.includes('T') ? value : value.replace(' ', 'T')
+  return Date.parse(normalized.endsWith('Z') ? normalized : `${normalized}Z`)
+}
+
+function strToBigInt(value: string, padTo: number): bigint {
+  const buffer = Buffer.from(value, 'latin1')
+  let result = 0n
+  for (let index = 0; index < padTo; index++) {
+    const byte = index < buffer.length ? (buffer[index] ?? 0) : 0
+    result = (result << 8n) | BigInt(byte)
+  }
+  return result
+}
+
+function bigIntToStr(value: bigint, length: number): string {
+  const buffer = Buffer.alloc(length)
+  let remaining = value
+  for (let index = length - 1; index >= 0; index--) {
+    buffer[index] = Number(remaining & 0xffn)
+    remaining >>= 8n
+  }
+  return buffer.toString('latin1')
+}
+
+function compareBinaryStrings(left: string, right: string): number {
+  return Buffer.from(left, 'latin1').compare(Buffer.from(right, 'latin1'))
+}
+
+function minBinaryString(left: string, right: string): string {
+  return compareBinaryStrings(left, right) <= 0 ? left : right
+}
+
+function maxBinaryString(left: string, right: string): string {
+  return compareBinaryStrings(left, right) >= 0 ? left : right
+}
+
+function nextPrefixValue(prefix: string): string | undefined {
+  if (prefix === '') return undefined
+
+  const buffer = Buffer.from(prefix, 'latin1')
+  for (let index = buffer.length - 1; index >= 0; index--) {
+    const byte = buffer[index]
+    if (byte === undefined) continue
+    if (byte === 0xff) continue
+
+    const next = Buffer.from(buffer.subarray(0, index + 1))
+    next[index] = (next[index] ?? 0) + 1
+    return next.toString('latin1')
+  }
+
+  return undefined
+}
diff --git a/packages/plugin-backfill/src/chunking/introspect.ts b/packages/plugin-backfill/src/chunking/introspect.ts
index e383f6f..a9f8e0e 100644
--- a/packages/plugin-backfill/src/chunking/introspect.ts
+++ b/packages/plugin-backfill/src/chunking/introspect.ts
@@ -35,6 +35,7 @@ export async function queryPartitionInfo(input: {
     partition_id: string
     total_rows: string
     total_bytes: string
+    total_uncompressed_bytes?: string
     min_time: string
     max_time: string
   }>(
@@ -42,6 +43,7 @@ export async function queryPartitionInfo(input: {
   partition_id,
   toString(sum(rows)) AS total_rows,
   toString(sum(bytes_on_disk)) AS total_bytes,
+  toString(sum(data_uncompressed_bytes)) AS total_uncompressed_bytes,
   toString(min(min_time)) AS min_time,
   toString(max(max_time)) AS max_time
 FROM system.parts
@@ -57,6 +59,7 @@ SETTINGS select_sequential_consistency = 1`
     partitionId: row.partition_id,
     rows: Number(row.total_rows),
     bytesOnDisk: Number(row.total_bytes),
+    bytesUncompressed: Number(row.total_uncompressed_bytes ?? row.total_bytes),
     minTime: new Date(row.min_time).toISOString(),
     maxTime: new Date(row.max_time).toISOString(),
   }))
@@ -68,39 +71,57 @@ SETTINGS select_sequential_consistency = 1`
   })
 }
 
-export async function querySortKeyInfo(input: {
+function extractSortKeyColumns(sortingKey: string): string[] {
+  return sortingKey
+    .split(',')
+    .map((part) => part.trim())
+    .map((part) => {
+      if (!part) return undefined
+      const match = part.match(/^\w+\((\w+)\)$/)
+      return match ? match[1] : part
+    })
+    .filter((part): part is string => Boolean(part && part.length > 0))
+}
+
+export async function querySortKeys(input: {
   database: string
   table: string
   query: <T>(sql: string) => Promise<T[]>
-}): Promise<SortKeyInfo | undefined> {
+}): Promise<SortKeyInfo[]> {
   const tableRows = await input.query<{ sorting_key: string }>(
     `SELECT sorting_key FROM system.tables WHERE database = '${input.database}' AND name = '${input.table}'`
   )
 
   const sortingKey = tableRows[0]?.sorting_key
-  if (!sortingKey) return undefined
+  if (!sortingKey) return []
 
-  // Parse first column from sorting key (may have expressions like "toDate(event_time)")
-  const firstColumn = sortingKey.split(',')[0]?.trim()
-  if (!firstColumn) return undefined
+  const columnNames = extractSortKeyColumns(sortingKey)
+  if (columnNames.length === 0) return []
 
-  // If it's a function call like toDate(col), extract the column name
-  const match = firstColumn.match(/^\w+\((\w+)\)$/)
-  const columnName = match ? match[1] : firstColumn
-  if (!columnName) return undefined
-
-  const columnRows = await input.query<{ type: string }>(
-    `SELECT type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}' AND name = '${columnName}'`
+  const inList = columnNames.map((name) => `'${name}'`).join(', ')
+  const columnRows = await input.query<{ name?: string; type: string }>(
+    `SELECT name, type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}' AND name IN (${inList})`
+  )
+  const typeByName = new Map(
+    columnRows.map((row, index) => [row.name ?? columnNames[index] ?? columnNames[0], row.type])
   )
 
-  const type = columnRows[0]?.type
-  if (!type) return undefined
+  return columnNames.map((column) => {
+    const type = typeByName.get(column) ?? 'String'
+    return {
+      column,
+      type,
+      category: classifySortKeyType(type),
+    }
+  })
+}
 
-  return {
-    column: columnName,
-    type,
-    category: classifySortKeyType(type),
-  }
+export async function querySortKeyInfo(input: {
+  database: string
+  table: string
+  query: <T>(sql: string) => Promise<T[]>
+}): Promise<SortKeyInfo | undefined> {
+  return (await querySortKeys(input))[0]
 }
 
 export async function querySortKeyRanges(input: {
@@ -134,13 +155,13 @@ export async function introspectTable(input: {
   from?: string
   to?: string
   query: <T>(sql: string) => Promise<T[]>
-}): Promise<{ partitions: PartitionInfo[]; sortKey?: SortKeyInfo }> {
+}): Promise<{ partitions: PartitionInfo[]; sortKey?: SortKeyInfo; sortKeys: SortKeyInfo[] }> {
   const partitions = await queryPartitionInfo(input)
-  const sortKey = await querySortKeyInfo({
+  const sortKeys = await querySortKeys({
     database: input.database,
     table: input.table,
     query: input.query,
   })
 
-  return { partitions, sortKey }
+  return { partitions, sortKey: sortKeys[0], sortKeys }
 }
diff --git a/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
new file mode 100644
index 0000000..7176994
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
@@ -0,0 +1,420 @@
+import { describe, expect, test } from 'bun:test'
+
+import { analyzeAndChunk } from './analyze.js'
+import { buildChunkSql } from './sql.js'
+import type { SortKeyInfo } from './types.js'
+
+const MiB = 1024 ** 2
+
+type RowValue = string | number
+
+interface FixtureRow {
+  _partition_id: string
+  event_time: string
+  [key: string]: RowValue
+}
+
+function isoAt(day: number, hour: number, minute = 0): string {
+  return new Date(Date.UTC(2026, 0, day, hour, minute, 0)).toISOString()
+}
+
+function createFixtureQuery(input: {
+  database: string
+  table: string
+  rows: FixtureRow[]
+  sortKeys: Array<{ column: string; type: string }>
+  bytesPerRow?: number
+  uncompressedBytesPerRow?: number
+}) {
+  const bytesPerRow = input.bytesPerRow ?? 1024
+  const uncompressedBytesPerRow = input.uncompressedBytesPerRow ?? bytesPerRow * 2
+
+  return async function query<T>(sql: string): Promise<T[]> {
+    if (sql.includes(`SELECT 1 FROM ${input.database}.${input.table} LIMIT 1`)) {
+      return [{ ok: 1 }] as T[]
+    }
+
+    if (sql.includes('FROM system.parts')) {
+      const partitions = summarizePartitions(input.rows, bytesPerRow, uncompressedBytesPerRow)
+      return partitions as T[]
+    }
+
+    if (sql.includes('FROM system.tables')) {
+      return [{ sorting_key: input.sortKeys.map((key) => key.column).join(', ') }] as T[]
+    }
+
+    if (sql.includes('FROM system.columns')) {
+      return input.sortKeys.map((key) => ({ name: key.column, type: key.type })) as T[]
+    }
+
+    const filteredRows = filterRows(sql, input.rows)
+
+    if (sql.includes('substring(')) {
+      const match = sql.match(/substring\((\w+), 1, (\d+)\) AS prefix/)
+      const column = match?.[1]
+      const depth = Number(match?.[2] ?? 0)
+      if (!column || depth <= 0) return [] as T[]
+
+      const grouped = new Map<string, number>()
+      for (const row of filteredRows) {
+        const value = String(row[column] ?? '')
+        const prefix = Buffer.from(value, 'latin1').subarray(0, depth).toString('latin1')
+        grouped.set(prefix, (grouped.get(prefix) ?? 0) + 1)
+      }
+
+      return Array.from(grouped.entries())
+        .sort(([left], [right]) => compareLatin1(left, right))
+        .map(([prefix, cnt]) => ({ prefix, cnt: String(cnt) })) as T[]
+    }
+
+    if (sql.includes('formatDateTime(toStartOfDay(') || sql.includes('formatDateTime(toStartOfHour(')) {
+      const grain = sql.includes('toStartOfDay(') ? 'day' : 'hour'
+      const columnMatch = sql.match(/toStartOf(?:Day|Hour)\((\w+)\)/)
+      const column = columnMatch?.[1]
+      if (!column) return [] as T[]
+
+      const grouped = new Map<string, number>()
+      for (const row of filteredRows) {
+        const bucket = grain === 'day'
+          ? toStartOfDay(String(row[column]))
+          : toStartOfHour(String(row[column]))
+        grouped.set(bucket, (grouped.get(bucket) ?? 0) + 1)
+      }
+
+      return Array.from(grouped.entries())
+        .sort(([left], [right]) => left.localeCompare(right))
+        .map(([bucket, cnt]) => ({ bucket, cnt: String(cnt) })) as T[]
+    }
+
+    if (sql.includes('toString(min(') && sql.includes('toString(max(')) {
+      const match = sql.match(/toString\(min\((\w+)\)\) AS minVal,\s+toString\(max\(\1\)\) AS maxVal/s)
+      const column = match?.[1]
+      if (!column || filteredRows.length === 0) return [] as T[]
+
+      const values = filteredRows.map((row) => row[column]).filter((value) => value !== undefined)
+      if (values.length === 0) return [] as T[]
+
+      return [{
+        minVal: formatValueForMinMax(values.reduce((current, candidate) => compareValues(candidate, current) < 0 ? candidate : current)),
+        maxVal: formatValueForMinMax(values.reduce((current, candidate) => compareValues(candidate, current) > 0 ? candidate : current)),
+      }] as T[]
+    }
+
+    if (sql.includes('SELECT count() AS cnt')) {
+      return [{ cnt: String(filteredRows.length) }] as T[]
+    }
+
+    return [] as T[]
+  }
+}
+
+function summarizePartitions(rows: FixtureRow[], bytesPerRow: number, uncompressedBytesPerRow: number) {
+  const byPartition = new Map<string, FixtureRow[]>()
+  for (const row of rows) {
+    const list = byPartition.get(row._partition_id)
+    if (list) list.push(row)
+    else byPartition.set(row._partition_id, [row])
+  }
+
+  return Array.from(byPartition.entries())
+    .sort(([left], [right]) => left.localeCompare(right))
+    .map(([partitionId, partitionRows]) => ({
+      partition_id: partitionId,
+      total_rows: String(partitionRows.length),
+      total_bytes: String(partitionRows.length * bytesPerRow),
+      total_uncompressed_bytes: String(partitionRows.length * uncompressedBytesPerRow),
+      min_time: String(partitionRows.reduce((min, row) => row.event_time < min ? row.event_time : min, partitionRows[0]?.event_time ?? '')),
+      max_time: String(partitionRows.reduce((max, row) => row.event_time > max ? row.event_time : max, partitionRows[0]?.event_time ?? '')),
+    }))
+}
+
+function filterRows(sql: string, rows: FixtureRow[]): FixtureRow[] {
+  const whereMatch = sql.match(/WHERE\s+([\s\S]*?)(?:GROUP BY|ORDER BY|SETTINGS|$)/i)
+  if (!whereMatch?.[1]) return rows
+
+  const clauses = whereMatch[1]
+    .split(/\s+AND\s+/)
+    .map((clause) => clause.replace(/\s+/g, ' ').trim())
+    .filter(Boolean)
+
+  return rows.filter((row) => clauses.every((clause) => evaluateClause(clause, row)))
+}
+
+function evaluateClause(clause: string, row: FixtureRow): boolean {
+  let match = clause.match(/^_partition_id = '([^']+)'$/)
+  if (match) return row._partition_id === match[1]
+
+  match = clause.match(/^(\w+) >= parseDateTimeBestEffort\('([^']+)'\)$/)
+  if (match) return Date.parse(String(row[match[1]])) >= Date.parse(match[2])
+
+  match = clause.match(/^(\w+) < parseDateTimeBestEffort\('([^']+)'\)$/)
+  if (match) return Date.parse(String(row[match[1]])) < Date.parse(match[2])
+
+  match = clause.match(/^(\w+) >= unhex\('([0-9a-f]+)'\)$/i)
+  if (match) return compareLatin1(String(row[match[1]] ?? ''), Buffer.from(match[2], 'hex').toString('latin1')) >= 0
+
+  match = clause.match(/^(\w+) < unhex\('([0-9a-f]+)'\)$/i)
+  if (match) return compareLatin1(String(row[match[1]] ?? ''), Buffer.from(match[2], 'hex').toString('latin1')) < 0
+
+  match = clause.match(/^(\w+) >= '([^']+)'$/)
+  if (match) return comparePrimitive(row[match[1]], match[2]) >= 0
+
+  match = clause.match(/^(\w+) < '([^']+)'$/)
+  if (match) return comparePrimitive(row[match[1]], match[2]) < 0
+
+  match = clause.match(/^(\w+) >= (-?\d+(?:\.\d+)?)$/)
+  if (match) return Number(row[match[1]]) >= Number(match[2])
+
+  match = clause.match(/^(\w+) < (-?\d+(?:\.\d+)?)$/)
+  if (match) return Number(row[match[1]]) < Number(match[2])
+
+  throw new Error(`Unsupported test clause: ${clause}`)
+}
+
+function comparePrimitive(left: RowValue | undefined, right: string): number {
+  if (typeof left === 'number') return left - Number(right)
+  return String(left ?? '').localeCompare(right)
+}
+
+function compareValues(left: RowValue, right: RowValue): number {
+  if (typeof left === 'number' && typeof right === 'number') return left - right
+  return compareLatin1(String(left), String(right))
+}
+
+function formatValueForMinMax(value: RowValue): string {
+  return typeof value === 'number' ? String(value) : String(value)
+}
+
+function compareLatin1(left: string, right: string): number {
+  return Buffer.from(left, 'latin1').compare(Buffer.from(right, 'latin1'))
+}
+
+function toStartOfDay(value: string): string {
+  const date = new Date(value)
+  return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate(), 0, 0, 0)).toISOString()
+}
+
+function toStartOfHour(value: string): string {
+  const date = new Date(value)
+  return new Date(Date.UTC(
+    date.getUTCFullYear(),
+    date.getUTCMonth(),
+    date.getUTCDate(),
+    date.getUTCHours(),
+    0,
+    0,
+  )).toISOString()
+}
+
+async function planFixture(input: {
+  rows: FixtureRow[]
+  sortKeys: Array<{ column: string; type: string }>
+  maxChunkBytes: number
+}) {
+  const query = createFixtureQuery({
+    database: 'app',
+    table: 'events',
+    rows: input.rows,
+    sortKeys: input.sortKeys,
+  })
+
+  return analyzeAndChunk({
+    database: 'app',
+    table: 'events',
+    maxChunkBytes: input.maxChunkBytes,
+    requireIdempotencyToken: true,
+    query,
+  })
+}
+
+function strategyIds(chunk: { lineage?: Array<{ strategyId: string }> }): string[] {
+  return chunk.lineage?.map((step) => step.strategyId) ?? []
+}
+
+function buildSqlForChunk(chunk: Awaited<ReturnType<typeof planFixture>>['chunks'][number], sortKeys: SortKeyInfo[]) {
+  return buildChunkSql({
+    planId: 'fixture-plan',
+    chunk,
+    target: 'app.events',
+    sortKey: sortKeys[0],
+    sortKeys,
+  })
+}
+
+function requireChunk<T>(value: T | undefined, label: string): T {
+  if (value === undefined) {
+    throw new Error(`Missing expected chunk: ${label}`)
+  }
+  return value
+}
+
+describe('smart chunking integration', () => {
+  test('keeps small partitions as a single metadata chunk', async () => {
+    const rows = Array.from({ length: 12 }, (_, index) => ({
+      _partition_id: 'p_small',
+      event_time: isoAt(1, index),
+      id: index,
+    }))
+
+    const result = await planFixture({
+      rows,
+      sortKeys: [{ column: 'id', type: 'UInt64' }],
+      maxChunkBytes: 64 * MiB,
+    })
+
+    expect(result.chunks).toHaveLength(1)
+    expect(result.chunks[0]?.estimateReason).toBe('partition-metadata')
+    expect(strategyIds(result.chunks[0] ?? {})).toHaveLength(0)
+  })
+
+  test('uses quantile range splitting for wide numeric distributions', async () => {
+    const rows = Array.from({ length: 120 }, (_, index) => ({
+      _partition_id: 'p_quantile',
+      event_time: isoAt(2, index % 24),
+      id: index,
+    }))
+
+    const result = await planFixture({
+      rows,
+      sortKeys: [{ column: 'id', type: 'UInt64' }],
+      maxChunkBytes: 30 * 1024,
+    })
+
+    expect(result.chunks.length).toBeGreaterThanOrEqual(3)
+    expect(result.chunks.every((chunk) => strategyIds(chunk).includes('quantile-range-split'))).toBe(true)
+    const estimatedRows = result.chunks.map((chunk) => chunk.estimatedRows ?? 0)
+    expect(Math.max(...estimatedRows) - Math.min(...estimatedRows)).toBeLessThanOrEqual(4)
+  })
+
+  test('falls back to equal-width splitting when quantile boundaries collapse', async () => {
+    const rows = Array.from({ length: 80 }, (_, index) => ({
+      _partition_id: 'p_equal',
+      event_time: isoAt(3, index % 24),
+      id: 100 + (index % 2),
+    }))
+
+    const result = await planFixture({
+      rows,
+      sortKeys: [{ column: 'id', type: 'UInt64' }],
+      maxChunkBytes: 20 * 1024,
+    })
+
+    expect(result.chunks.length).toBeGreaterThan(1)
+    expect(result.chunks.some((chunk) => strategyIds(chunk).includes('equal-width-split'))).toBe(true)
+  })
+
+  test('uses string-prefix splitting for string-distributed partitions', async () => {
+    const rows: FixtureRow[] = []
+    for (const prefix of ['apple', 'apricot', 'banana', 'berry', 'citrus']) {
+      for (let index = 0; index < 24; index++) {
+        rows.push({
+          _partition_id: 'p_string',
+          event_time: isoAt(4, index % 24),
+          slug: `${prefix}-${index.toString().padStart(2, '0')}`,
+        })
+      }
+    }
+
+    const result = await planFixture({
+      rows,
+      sortKeys: [{ column: 'slug', type: 'String' }],
+      maxChunkBytes: 24 * 1024,
+    })
+
+    expect(result.chunks.length).toBeGreaterThan(2)
+    expect(result.chunks.some((chunk) => strategyIds(chunk).includes('string-prefix-split'))).toBe(true)
+
+    const sql = buildSqlForChunk(requireChunk(result.chunks[0], 'string-prefix first chunk'), result.sortKeys)
+    expect(sql).toContain("unhex('")
+  })
+
+  test('combines string-prefix and temporal splitting for hot-key time windows', async () => {
+    const rows: FixtureRow[] = []
+
+    for (let day = 1; day <= 3; day++) {
+      for (let hour = 0; hour < 24; hour++) {
+        rows.push({
+          _partition_id: 'p_combo_temporal',
+          event_time: isoAt(10 + day, hour),
+          user_id: 'hot',
+          score: 1000 + day * 24 + hour,
+        })
+      }
+    }
+
+    for (let index = 0; index < 18; index++) {
+      rows.push({
+        _partition_id: 'p_combo_temporal',
+        event_time: isoAt(10, index),
+        user_id: `cold-${index}`,
+        score: index,
+      })
+    }
+
+    const result = await planFixture({
+      rows,
+      sortKeys: [
+        { column: 'user_id', type: 'String' },
+        { column: 'event_time', type: 'DateTime' },
+      ],
+      maxChunkBytes: 18 * 1024,
+    })
+
+    const hotChunks = result.chunks.filter((chunk) =>
+      strategyIds(chunk).includes('temporal-bucket-split') &&
+      (chunk.ranges?.some((range) => range.dimensionIndex === 0) ?? false) &&
+      (chunk.ranges?.some((range) => range.dimensionIndex === 1) ?? false)
+    )
+
+    expect(hotChunks.length).toBeGreaterThan(0)
+    expect(hotChunks.every((chunk) => chunk.isHotKey || (chunk.hotKeyValue !== undefined))).toBe(true)
+
+    const sql = buildSqlForChunk(requireChunk(hotChunks[0], 'temporal combo chunk'), result.sortKeys)
+    expect(sql).toContain('user_id >=')
+    expect(sql).toContain('event_time >=')
+    expect(sql).toContain('parseDateTimeBestEffort')
+  })
+
+  test('combines string-prefix and quantile splitting on secondary numeric dimensions', async () => {
+    const rows: FixtureRow[] = []
+
+    for (let index = 0; index < 96; index++) {
+      rows.push({
+        _partition_id: 'p_combo_numeric',
+        event_time: isoAt(20, index % 24),
+        account: 'vip',
+        seq: index,
+      })
+    }
+
+    for (let index = 0; index < 24; index++) {
+      rows.push({
+        _partition_id: 'p_combo_numeric',
+        event_time: isoAt(20, index % 24),
+        account: `free-${index}`,
+        seq: index,
+      })
+    }
+
+    const result = await planFixture({
+      rows,
+      sortKeys: [
+        { column: 'account', type: 'String' },
+        { column: 'seq', type: 'UInt64' },
+      ],
+      maxChunkBytes: 24 * 1024,
+    })
+
+    const comboChunks = result.chunks.filter((chunk) =>
+      strategyIds(chunk).includes('quantile-range-split') &&
+      (chunk.ranges?.some((range) => range.dimensionIndex === 0) ?? false) &&
+      (chunk.ranges?.some((range) => range.dimensionIndex === 1) ?? false)
+    )
+
+    expect(comboChunks.length).toBeGreaterThan(0)
+
+    const sql = buildSqlForChunk(requireChunk(comboChunks[0], 'numeric combo chunk'), result.sortKeys)
+    expect(sql).toContain('account >=')
+    expect(sql).toContain("seq >= '")
+  })
+})
diff --git a/packages/plugin-backfill/src/chunking/sql.ts b/packages/plugin-backfill/src/chunking/sql.ts
index e6b7458..0475815 100644
--- a/packages/plugin-backfill/src/chunking/sql.ts
+++ b/packages/plugin-backfill/src/chunking/sql.ts
@@ -7,17 +7,45 @@ function buildSettingsClause(token: string): string {
   return `SETTINGS async_insert=0`
 }
 
-function buildSortKeyCondition(
-  sortKeyColumn: string,
-  category: SortKeyInfo['category'],
-  from: string,
-  to: string,
-): string {
-  if (category === 'datetime') {
-    return `  AND ${sortKeyColumn} >= parseDateTimeBestEffort('${from}')\n  AND ${sortKeyColumn} < parseDateTimeBestEffort('${to}')`
+function quoteSqlString(value: string): string {
+  return `'${value.replaceAll('\\', '\\\\').replaceAll('\'', '\\\'')}'`
+}
+
+function formatBound(value: string, sortKey: SortKeyInfo): string {
+  if (sortKey.category === 'datetime') {
+    return `parseDateTimeBestEffort(${quoteSqlString(value)})`
+  }
+  if (sortKey.category === 'string') {
+    return `unhex('${Buffer.from(value, 'latin1').toString('hex')}')`
   }
-  // numeric and string use direct comparison
-  return `  AND ${sortKeyColumn} >= '${from}'\n  AND ${sortKeyColumn} < '${to}'`
+  return quoteSqlString(value)
+}
+
+function buildChunkConditions(chunk: PlannedChunk, sortKeys: SortKeyInfo[]): string[] {
+  if (chunk.ranges?.length) {
+    return chunk.ranges.flatMap((range) => {
+      const sortKey = sortKeys[range.dimensionIndex]
+      if (!sortKey) return []
+
+      const conditions: string[] = []
+      if (range.from !== undefined) {
+        conditions.push(`${sortKey.column} >= ${formatBound(range.from, sortKey)}`)
+      }
+      if (range.to !== undefined) {
+        conditions.push(`${sortKey.column} < ${formatBound(range.to, sortKey)}`)
+      }
+      return conditions
+    })
+  }
+
+  if (chunk.sortKeyFrom !== undefined && chunk.sortKeyTo !== undefined && sortKeys[0]) {
+    return [
+      `${sortKeys[0].column} >= ${formatBound(chunk.sortKeyFrom, sortKeys[0])}`,
+      `${sortKeys[0].column} < ${formatBound(chunk.sortKeyTo, sortKeys[0])}`,
+    ]
+  }
+
+  return []
 }
 
 export function buildChunkSql(input: {
@@ -25,24 +53,21 @@ export function buildChunkSql(input: {
   chunk: PlannedChunk
   target: string
   sortKey?: SortKeyInfo
+  sortKeys?: SortKeyInfo[]
   mvAsQuery?: string
   targetColumns?: string[]
 }): string {
   const header = `/* chkit backfill plan=${input.planId} chunk=${input.chunk.id} token=${input.chunk.idempotencyToken} */`
   const settings = buildSettingsClause(input.chunk.idempotencyToken)
   const { chunk } = input
+  const sortKeys = input.sortKeys ?? (input.sortKey ? [input.sortKey] : [])
+  const chunkConditions = buildChunkConditions(chunk, sortKeys)
 
   if (input.mvAsQuery) {
     // MV replay: inject partition + sort key filters into the MV's AS query
     let filtered = injectPartitionFilter(input.mvAsQuery, chunk.partitionId)
-    if (chunk.sortKeyFrom !== undefined && chunk.sortKeyTo !== undefined && input.sortKey) {
-      filtered = injectSortKeyFilter(
-        filtered,
-        input.sortKey.column,
-        input.sortKey.category,
-        chunk.sortKeyFrom,
-        chunk.sortKeyTo,
-      )
+    for (const condition of chunkConditions) {
+      filtered = injectWhereCondition(filtered, condition)
     }
     if (input.targetColumns?.length) {
       filtered = rewriteSelectColumns(filtered, input.targetColumns)
@@ -59,13 +84,8 @@ export function buildChunkSql(input: {
     `WHERE _partition_id = '${chunk.partitionId}'`,
   ]
 
-  if (chunk.sortKeyFrom !== undefined && chunk.sortKeyTo !== undefined && input.sortKey) {
-    lines.push(buildSortKeyCondition(
-      input.sortKey.column,
-      input.sortKey.category,
-      chunk.sortKeyFrom,
-      chunk.sortKeyTo,
-    ))
+  for (const condition of chunkConditions) {
+    lines.push(`  AND ${condition}`)
   }
 
   lines.push(settings)
@@ -88,9 +108,11 @@ export function injectSortKeyFilter(
 ): string {
   let condition: string
   if (category === 'datetime') {
-    condition = `${sortKeyColumn} >= parseDateTimeBestEffort('${from}')\n  AND ${sortKeyColumn} < parseDateTimeBestEffort('${to}')`
+    condition = `${sortKeyColumn} >= parseDateTimeBestEffort(${quoteSqlString(from)})\n  AND ${sortKeyColumn} < parseDateTimeBestEffort(${quoteSqlString(to)})`
+  } else if (category === 'string') {
+    condition = `${sortKeyColumn} >= unhex('${Buffer.from(from, 'latin1').toString('hex')}')\n  AND ${sortKeyColumn} < unhex('${Buffer.from(to, 'latin1').toString('hex')}')`
   } else {
-    condition = `${sortKeyColumn} >= '${from}'\n  AND ${sortKeyColumn} < '${to}'`
+    condition = `${sortKeyColumn} >= ${quoteSqlString(from)}\n  AND ${sortKeyColumn} < ${quoteSqlString(to)}`
   }
   return injectWhereCondition(query, condition)
 }
diff --git a/packages/plugin-backfill/src/chunking/types.ts b/packages/plugin-backfill/src/chunking/types.ts
index 0c76952..220da04 100644
--- a/packages/plugin-backfill/src/chunking/types.ts
+++ b/packages/plugin-backfill/src/chunking/types.ts
@@ -2,6 +2,7 @@ export interface PartitionInfo {
   partitionId: string
   rows: number
   bytesOnDisk: number
+  bytesUncompressed?: number
   minTime: string
   maxTime: string
 }
@@ -12,20 +13,70 @@ export interface SortKeyInfo {
   category: 'numeric' | 'datetime' | 'string'
 }
 
+export interface SliceRange {
+  dimensionIndex: number
+  from?: string
+  to?: string
+}
+
+export interface SliceLineageStep {
+  strategyId: string
+  dimensionIndex?: number
+  reason: string
+}
+
+export type EstimateConfidence = 'high' | 'low' | 'exact'
+
+export type EstimateReason =
+  | 'partition-metadata'
+  | 'quantile-estimate'
+  | 'string-prefix-distribution'
+  | 'temporal-distribution'
+  | 'equal-width-distribution'
+  | 'exact-count'
+
 export interface ChunkBoundary {
   partitionId: string
+  ranges?: SliceRange[]
   sortKeyFrom?: string
   sortKeyTo?: string
   estimatedBytes: number
+  estimatedRows?: number
+  isHotKey?: boolean
+  hotDimensionIndex?: number
+  hotKeyValue?: string
+  estimateConfidence?: EstimateConfidence
+  estimateReason?: EstimateReason
+  lineage?: SliceLineageStep[]
 }
 
 export interface PlannedChunk {
   id: string
   partitionId: string
+  ranges?: SliceRange[]
   sortKeyFrom?: string
   sortKeyTo?: string
   estimatedBytes: number
+  estimatedRows?: number
   idempotencyToken: string
   from: string
   to: string
+  isHotKey?: boolean
+  hotDimensionIndex?: number
+  hotKeyValue?: string
+  estimateConfidence?: EstimateConfidence
+  estimateReason?: EstimateReason
+  lineage?: SliceLineageStep[]
+}
+
+export interface PartitionDiagnostics {
+  partitionId: string
+  estimatedRowSum: number
+  exactPartitionRows: number
+  estimateToExactRatio: number
+  suspiciousEstimate: boolean
+  lowConfidenceChunkCount: number
+  usedDistributionFallback: boolean
+  usedLowConfidenceChunkRefinement: boolean
+  usedExactCountFallback: boolean
 }
diff --git a/packages/plugin-backfill/src/index.ts b/packages/plugin-backfill/src/index.ts
index 3420da7..c2c8446 100644
--- a/packages/plugin-backfill/src/index.ts
+++ b/packages/plugin-backfill/src/index.ts
@@ -1,16 +1,6 @@
 import './table-config.js'
 
 export { backfill, createBackfillPlugin } from './plugin.js'
-export { executeBackfill, syncProgress } from './async-backfill.js'
-export { analyzeAndChunk } from './chunking/analyze.js'
-export type {
-  BackfillOptions,
-  BackfillChunkState,
-  BackfillProgress,
-  BackfillResult,
-} from './async-backfill.js'
 export type { BackfillPlugin, BackfillPluginOptions, BackfillPluginRegistration } from './types.js'
 export type { PluginConfig } from './options.js'
 export type { BackfillTableConfig } from './table-config.js'
-export type { AnalyzeAndChunkInput, AnalyzeAndChunkResult } from './chunking/analyze.js'
-export type { PlannedChunk, PartitionInfo, SortKeyInfo } from './chunking/types.js'
diff --git a/packages/plugin-backfill/src/planner.ts b/packages/plugin-backfill/src/planner.ts
index 6f24e02..9f586c6 100644
--- a/packages/plugin-backfill/src/planner.ts
+++ b/packages/plugin-backfill/src/planner.ts
@@ -36,7 +36,14 @@ export async function buildBackfillPlan(input: {
   const env = computeEnvironmentFingerprint(input.clickhouse)
 
   // 1. Analyze table and build planned chunks
-  const { planId, partitions, sortKey, chunks: plannedChunks } = await analyzeAndChunk({
+  const {
+    planId,
+    partitions,
+    sortKey,
+    sortKeys,
+    chunks: plannedChunks,
+    partitionDiagnostics,
+  } = await analyzeAndChunk({
     database,
     table,
     from: opts.from,
@@ -88,6 +95,7 @@ export async function buildBackfillPlan(input: {
       chunk: planned,
       target: opts.target,
       sortKey,
+      sortKeys,
       mvAsQuery,
       targetColumns,
     })
@@ -102,8 +110,16 @@ export async function buildBackfillPlan(input: {
       sqlTemplate,
       partitionId: planned.partitionId,
       estimatedBytes: planned.estimatedBytes,
+      ...(planned.estimatedRows !== undefined ? { estimatedRows: planned.estimatedRows } : {}),
+      ...(planned.ranges ? { ranges: planned.ranges } : {}),
       ...(planned.sortKeyFrom !== undefined ? { sortKeyFrom: planned.sortKeyFrom } : {}),
       ...(planned.sortKeyTo !== undefined ? { sortKeyTo: planned.sortKeyTo } : {}),
+      ...(planned.isHotKey !== undefined ? { isHotKey: planned.isHotKey } : {}),
+      ...(planned.hotDimensionIndex !== undefined ? { hotDimensionIndex: planned.hotDimensionIndex } : {}),
+      ...(planned.hotKeyValue !== undefined ? { hotKeyValue: planned.hotKeyValue } : {}),
+      ...(planned.estimateConfidence !== undefined ? { estimateConfidence: planned.estimateConfidence } : {}),
+      ...(planned.estimateReason !== undefined ? { estimateReason: planned.estimateReason } : {}),
+      ...(planned.lineage ? { lineage: planned.lineage } : {}),
     }
   })
 
@@ -121,6 +137,8 @@ export async function buildBackfillPlan(input: {
     chunks,
     partitions,
     sortKey,
+    sortKeys,
+    partitionDiagnostics,
     options: {
       maxChunkBytes: opts.maxChunkBytes,
       maxParallelChunks: opts.maxParallelChunks,
diff --git a/packages/plugin-backfill/src/plugin.test.ts b/packages/plugin-backfill/src/plugin.test.ts
index 275fe78..01a85b5 100644
--- a/packages/plugin-backfill/src/plugin.test.ts
+++ b/packages/plugin-backfill/src/plugin.test.ts
@@ -1,5 +1,7 @@
 import { describe, expect, test } from 'bun:test'
 
+import * as sdk from './sdk.js'
+import * as root from './index.js'
 import { backfill, createBackfillPlugin } from './plugin.js'
 
 describe('@chkit/plugin-backfill plugin surface', () => {
@@ -21,4 +23,13 @@ describe('@chkit/plugin-backfill plugin surface', () => {
     expect(registration.enabled).toBe(true)
     expect(registration.options?.maxParallelChunks).toBe(4)
   })
+
+  test('keeps internals off the package root and exposes them via sdk', () => {
+    expect(root).not.toHaveProperty('analyzeAndChunk')
+    expect(root).not.toHaveProperty('executeBackfill')
+
+    expect(sdk).toHaveProperty('analyzeAndChunk')
+    expect(sdk).toHaveProperty('executeBackfill')
+    expect(sdk).toHaveProperty('buildChunkSql')
+  })
 })
diff --git a/packages/plugin-backfill/src/sdk.ts b/packages/plugin-backfill/src/sdk.ts
new file mode 100644
index 0000000..0570001
--- /dev/null
+++ b/packages/plugin-backfill/src/sdk.ts
@@ -0,0 +1,29 @@
+export { executeBackfill, syncProgress } from './async-backfill.js'
+export { analyzeAndChunk, analyzeTable, buildPlannedChunks } from './chunking/analyze.js'
+export { buildChunkSql, injectSortKeyFilter, rewriteSelectColumns } from './chunking/sql.js'
+
+export type {
+  BackfillOptions,
+  BackfillChunkState,
+  BackfillProgress,
+  BackfillResult,
+} from './async-backfill.js'
+
+export type {
+  AnalyzeAndChunkInput,
+  AnalyzeAndChunkResult,
+  AnalyzeTableInput,
+  AnalyzeTableResult,
+} from './chunking/analyze.js'
+
+export type {
+  ChunkBoundary,
+  EstimateConfidence,
+  EstimateReason,
+  PartitionDiagnostics,
+  PartitionInfo,
+  PlannedChunk,
+  SliceLineageStep,
+  SliceRange,
+  SortKeyInfo,
+} from './chunking/types.js'
diff --git a/packages/plugin-backfill/src/types.ts b/packages/plugin-backfill/src/types.ts
index f3b50da..cf812ea 100644
--- a/packages/plugin-backfill/src/types.ts
+++ b/packages/plugin-backfill/src/types.ts
@@ -1,7 +1,15 @@
 import type { ChxInlinePluginRegistration, ResolvedChxConfig } from '@chkit/core'
 
 import type { BackfillProgress } from './async-backfill.js'
-import type { PartitionInfo, SortKeyInfo } from './chunking/types.js'
+import type {
+  PartitionDiagnostics,
+  PartitionInfo,
+  SliceLineageStep,
+  SliceRange,
+  SortKeyInfo,
+  EstimateConfidence,
+  EstimateReason,
+} from './chunking/types.js'
 import type { PluginConfig } from './options.js'
 
 /** @deprecated Use {@link PluginConfig} instead. */
@@ -29,8 +37,16 @@ export interface BackfillChunk {
   lastError?: string
   partitionId: string
   estimatedBytes: number
+  estimatedRows?: number
+  ranges?: SliceRange[]
   sortKeyFrom?: string
   sortKeyTo?: string
+  isHotKey?: boolean
+  hotDimensionIndex?: number
+  hotKeyValue?: string
+  estimateConfidence?: EstimateConfidence
+  estimateReason?: EstimateReason
+  lineage?: SliceLineageStep[]
 }
 
 export interface BackfillPlanState {
@@ -45,6 +61,8 @@ export interface BackfillPlanState {
   chunks: BackfillChunk[]
   partitions?: PartitionInfo[]
   sortKey?: SortKeyInfo
+  sortKeys?: SortKeyInfo[]
+  partitionDiagnostics?: PartitionDiagnostics[]
   options: {
     chunkHours?: number
     maxChunkBytes?: number

From 6d224b7e731842106665913485c166ed138f1f1b Mon Sep 17 00:00:00 2001
From: KeKs0r <marc.hoeffl@laserfocus.io>
Date: Thu, 2 Apr 2026 00:23:30 +0200
Subject: [PATCH 2/5] fix smart chunking review issues

---
 packages/plugin-backfill/README.md            |  10 ++
 .../plugin-backfill/src/chunking/analyze.ts   |  38 ++++--
 .../src/chunking/introspect.test.ts           |  44 +++++-
 .../src/chunking/introspect.ts                | 127 +++++++++++++++---
 .../smart-chunking.integration.test.ts        |  13 ++
 packages/plugin-backfill/src/plugin.test.ts   |  10 ++
 6 files changed, 216 insertions(+), 26 deletions(-)

diff --git a/packages/plugin-backfill/README.md b/packages/plugin-backfill/README.md
index 6feaa72..4b3f42f 100644
--- a/packages/plugin-backfill/README.md
+++ b/packages/plugin-backfill/README.md
@@ -35,6 +35,16 @@ export default defineConfig({
 
 See the [chkit documentation](https://chkit.obsessiondb.com).
 
+## SDK Internals
+
+The package root is limited to the plugin registration API.
+
+Chunk-planning and async execution internals are exposed from the SDK subpath:
+
+```ts
+import { analyzeAndChunk, executeBackfill } from '@chkit/plugin-backfill/sdk'
+```
+
 ## License
 
 [MIT](../../LICENSE)
diff --git a/packages/plugin-backfill/src/chunking/analyze.ts b/packages/plugin-backfill/src/chunking/analyze.ts
index 278d9df..a79e030 100644
--- a/packages/plugin-backfill/src/chunking/analyze.ts
+++ b/packages/plugin-backfill/src/chunking/analyze.ts
@@ -363,8 +363,18 @@ async function splitSliceWithBoundaries(
   const slices: PartitionSlice[] = []
 
   for (let index = 0; index < boundaries.length - 1; index++) {
-    const ranges = replaceSliceRange(slice, dimensionIndex, boundaries[index], boundaries[index + 1])
+    const from = boundaries[index]
+    const to = boundaries[index + 1]
+    if (from === undefined || to === undefined || from === to) {
+      continue
+    }
+
+    const ranges = replaceSliceRange(slice, dimensionIndex, from, to)
     const estimatedRows = await countRows(context, partition.partitionId, ranges)
+    if (estimatedRows <= 0) {
+      continue
+    }
+
     slices.push(buildSliceFromRows(partition, {
       ranges,
       estimatedRows,
@@ -516,6 +526,8 @@ function buildTemporalSlices(
   const slices: PartitionSlice[] = []
   let currentStart: string | undefined
   let currentRows = 0
+  const parentRange = getSliceRange(parentSlice, dimensionIndex)
+  const sliceEnd = parentRange.to ?? getPartitionEndExclusive(partition)
 
   for (let index = 0; index < buckets.length; index++) {
     const bucket = buckets[index]
@@ -547,7 +559,7 @@ function buildTemporalSlices(
 
     if (index === buckets.length - 1 && currentStart !== undefined) {
       slices.push(buildSliceFromRows(partition, {
-        ranges: replaceSliceRange(parentSlice, dimensionIndex, currentStart, getPartitionEndExclusive(partition)),
+        ranges: replaceSliceRange(parentSlice, dimensionIndex, currentStart, sliceEnd),
         estimatedRows: currentRows,
         isHotKey: parentSlice.isHotKey,
         hotDimensionIndex: parentSlice.hotDimensionIndex,
@@ -950,24 +962,34 @@ function buildEvenlySpacedBoundaries(
   if (sortKey.category === 'datetime') {
     const start = parsePlannerDateTime(from)
     const end = parsePlannerDateTime(to)
-    return Array.from({ length: subCount + 1 }, (_, index) =>
+    return uniqueBoundaries(Array.from({ length: subCount + 1 }, (_, index) =>
       new Date(start + Math.floor(((end - start) * index) / subCount)).toISOString()
-    )
+    ))
   }
 
   if (sortKey.category === 'numeric') {
     const start = Number(from)
     const end = Number(to)
-    return Array.from({ length: subCount + 1 }, (_, index) =>
+    return uniqueBoundaries(Array.from({ length: subCount + 1 }, (_, index) =>
       String(start + Math.floor(((end - start) * index) / subCount))
-    )
+    ))
   }
 
   const start = strToBigInt(from, 8)
   const end = strToBigInt(to, 8)
-  return Array.from({ length: subCount + 1 }, (_, index) =>
+  return uniqueBoundaries(Array.from({ length: subCount + 1 }, (_, index) =>
     bigIntToStr(start + ((end - start) * BigInt(index)) / BigInt(subCount), 8)
-  )
+  ))
+}
+
+function uniqueBoundaries(boundaries: string[]): string[] {
+  const unique: string[] = []
+  for (const boundary of boundaries) {
+    if (unique[unique.length - 1] !== boundary) {
+      unique.push(boundary)
+    }
+  }
+  return unique
 }
 
 function parsePlannerDateTime(value: string): number {
diff --git a/packages/plugin-backfill/src/chunking/introspect.test.ts b/packages/plugin-backfill/src/chunking/introspect.test.ts
index 431872c..b40822b 100644
--- a/packages/plugin-backfill/src/chunking/introspect.test.ts
+++ b/packages/plugin-backfill/src/chunking/introspect.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, test } from 'bun:test'
 
-import { introspectTable, queryPartitionInfo, querySortKeyInfo, querySortKeyRanges } from './introspect.js'
+import { introspectTable, queryPartitionInfo, querySortKeyInfo, querySortKeyRanges, querySortKeys } from './introspect.js'
 
 describe('queryPartitionInfo', () => {
   test('maps system.parts rows to PartitionInfo array', async () => {
@@ -132,7 +132,7 @@ describe('querySortKeyInfo', () => {
   test('returns first column from multi-column sorting key', async () => {
     const query = async <T>(sql: string) => {
       if (sql.includes('system.tables')) return [{ sorting_key: 'event_time, id' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'DateTime' }] as T[]
+      if (sql.includes('system.columns')) return [{ name: 'event_time', type: 'DateTime' }, { name: 'id', type: 'UInt64' }] as T[]
       return [] as T[]
     }
 
@@ -140,6 +140,46 @@ describe('querySortKeyInfo', () => {
 
     expect(result?.column).toBe('event_time')
   })
+
+  test('extracts a single referenced column from function expressions with commas', async () => {
+    const query = async <T>(sql: string) => {
+      if (sql.includes('system.tables')) {
+        return [{ sorting_key: 'toStartOfInterval(ts, INTERVAL 5 MINUTE), user_id' }] as T[]
+      }
+      if (sql.includes('system.columns')) {
+        return [
+          { name: 'ts', type: 'DateTime' },
+          { name: 'user_id', type: 'String' },
+        ] as T[]
+      }
+      return [] as T[]
+    }
+
+    const result = await querySortKeys({ database: 'default', table: 'events', query })
+
+    expect(result.map((key) => key.column)).toEqual(['ts', 'user_id'])
+    expect(result.map((key) => key.category)).toEqual(['datetime', 'string'])
+  })
+
+  test('skips ambiguous tuple expressions that do not map to one physical column', async () => {
+    const query = async <T>(sql: string) => {
+      if (sql.includes('system.tables')) {
+        return [{ sorting_key: 'tuple(user_id, session_id), event_time' }] as T[]
+      }
+      if (sql.includes('system.columns')) {
+        return [
+          { name: 'user_id', type: 'String' },
+          { name: 'session_id', type: 'String' },
+          { name: 'event_time', type: 'DateTime' },
+        ] as T[]
+      }
+      return [] as T[]
+    }
+
+    const result = await querySortKeys({ database: 'default', table: 'events', query })
+
+    expect(result.map((key) => key.column)).toEqual(['event_time'])
+  })
 })
 
 describe('querySortKeyRanges', () => {
diff --git a/packages/plugin-backfill/src/chunking/introspect.ts b/packages/plugin-backfill/src/chunking/introspect.ts
index a9f8e0e..16f0c79 100644
--- a/packages/plugin-backfill/src/chunking/introspect.ts
+++ b/packages/plugin-backfill/src/chunking/introspect.ts
@@ -72,15 +72,92 @@ SETTINGS select_sequential_consistency = 1`
 }
 
 function extractSortKeyColumns(sortingKey: string): string[] {
-  return sortingKey
-    .split(',')
+  return splitTopLevelCsv(sortingKey)
     .map((part) => part.trim())
-    .map((part) => {
-      if (!part) return undefined
-      const match = part.match(/^\w+\((\w+)\)$/)
-      return match ? match[1] : part
-    })
-    .filter((part): part is string => Boolean(part && part.length > 0))
+    .filter((part): part is string => part.length > 0)
+}
+
+function splitTopLevelCsv(input: string): string[] {
+  const parts: string[] = []
+  let current = ''
+  let depth = 0
+  let quote: "'" | '"' | undefined
+
+  for (let index = 0; index < input.length; index++) {
+    const char = input[index]
+    if (char === undefined) continue
+
+    if (quote) {
+      current += char
+      if (char === quote && input[index - 1] !== '\\') {
+        quote = undefined
+      }
+      continue
+    }
+
+    if (char === '\'' || char === '"') {
+      quote = char
+      current += char
+      continue
+    }
+
+    if (char === '(') {
+      depth += 1
+      current += char
+      continue
+    }
+
+    if (char === ')') {
+      depth = Math.max(0, depth - 1)
+      current += char
+      continue
+    }
+
+    if (char === ',' && depth === 0) {
+      parts.push(current.trim())
+      current = ''
+      continue
+    }
+
+    current += char
+  }
+
+  if (current.trim().length > 0) {
+    parts.push(current.trim())
+  }
+
+  return parts
+}
+
+function resolveSortKeyColumn(expression: string, knownColumns: Set<string>): string | undefined {
+  const trimmed = expression.trim()
+  if (knownColumns.has(trimmed)) {
+    return trimmed
+  }
+
+  const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
+    .map((match) => match[0])
+    .filter((identifier): identifier is string => Boolean(identifier))
+
+  const matches = Array.from(new Set(identifiers.filter((identifier) => knownColumns.has(identifier))))
+  if (matches.length === 1) {
+    return matches[0]
+  }
+
+  return undefined
+}
+
+function resolveSortKeyColumnWithoutSchema(expression: string): string | undefined {
+  const trimmed = expression.trim()
+  if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(trimmed)) {
+    return trimmed
+  }
+
+  const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
+    .map((match) => match[0])
+    .filter((identifier): identifier is string => Boolean(identifier))
+
+  return identifiers.length > 0 ? identifiers[identifiers.length - 1] : undefined
 }
 
 export async function querySortKeys(input: {
@@ -95,24 +172,42 @@ export async function querySortKeys(input: {
   const sortingKey = tableRows[0]?.sorting_key
   if (!sortingKey) return []
 
-  const columnNames = extractSortKeyColumns(sortingKey)
-  if (columnNames.length === 0) return []
+  const expressions = extractSortKeyColumns(sortingKey)
+  if (expressions.length === 0) return []
 
-  const inList = columnNames.map((name) => `'${name}'`).join(', ')
   const columnRows = await input.query<{ name?: string; type: string }>(
-    `SELECT name, type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}' AND name IN (${inList})`
+    `SELECT name, type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}'`
   )
   const typeByName = new Map(
-    columnRows.map((row, index) => [row.name ?? columnNames[index] ?? columnNames[0], row.type])
+    columnRows
+      .filter((row): row is { name: string; type: string } => Boolean(row.name))
+      .map((row) => [row.name, row.type])
   )
+  const knownColumns = new Set(typeByName.keys())
+
+  if (knownColumns.size === 0) {
+    return expressions.flatMap((expression, index) => {
+      const column = resolveSortKeyColumnWithoutSchema(expression)
+      const type = columnRows[index]?.type ?? columnRows[0]?.type
+      if (!column || !type) return []
 
-  return columnNames.map((column) => {
+      return [{
+        column,
+        type,
+        category: classifySortKeyType(type),
+      }]
+    })
+  }
+
+  return expressions.flatMap((expression) => {
+    const column = resolveSortKeyColumn(expression, knownColumns)
+    if (!column) return []
     const type = typeByName.get(column) ?? 'String'
-    return {
+    return [{
       column,
       type,
       category: classifySortKeyType(type),
-    }
+    }]
   })
 }
 
diff --git a/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
index 7176994..e362f50 100644
--- a/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
+++ b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
@@ -301,6 +301,10 @@ describe('smart chunking integration', () => {
 
     expect(result.chunks.length).toBeGreaterThan(1)
     expect(result.chunks.some((chunk) => strategyIds(chunk).includes('equal-width-split'))).toBe(true)
+    expect(result.chunks.every((chunk) => (chunk.estimatedRows ?? 0) > 0)).toBe(true)
+    expect(result.chunks.every((chunk) =>
+      chunk.ranges?.every((range) => range.from !== range.to) ?? true
+    )).toBe(true)
   })
 
   test('uses string-prefix splitting for string-distributed partitions', async () => {
@@ -373,6 +377,15 @@ describe('smart chunking integration', () => {
     expect(sql).toContain('user_id >=')
     expect(sql).toContain('event_time >=')
     expect(sql).toContain('parseDateTimeBestEffort')
+
+    const temporalRanges = hotChunks
+      .map((chunk) => chunk.ranges?.find((range) => range.dimensionIndex === 1))
+      .filter((range): range is NonNullable<typeof range> => Boolean(range))
+      .sort((left, right) => String(left.from).localeCompare(String(right.from)))
+
+    for (let index = 1; index < temporalRanges.length; index++) {
+      expect(temporalRanges[index - 1]?.to).toBe(temporalRanges[index]?.from)
+    }
   })
 
   test('combines string-prefix and quantile splitting on secondary numeric dimensions', async () => {
diff --git a/packages/plugin-backfill/src/plugin.test.ts b/packages/plugin-backfill/src/plugin.test.ts
index 01a85b5..3e25054 100644
--- a/packages/plugin-backfill/src/plugin.test.ts
+++ b/packages/plugin-backfill/src/plugin.test.ts
@@ -32,4 +32,14 @@ describe('@chkit/plugin-backfill plugin surface', () => {
     expect(sdk).toHaveProperty('executeBackfill')
     expect(sdk).toHaveProperty('buildChunkSql')
   })
+
+  test('package exports resolve root and sdk subpath separately', async () => {
+    const packageRoot = await import('@chkit/plugin-backfill')
+    const packageSdk = await import('@chkit/plugin-backfill/sdk')
+
+    expect(packageRoot).toHaveProperty('backfill')
+    expect(packageRoot).not.toHaveProperty('analyzeAndChunk')
+    expect(packageSdk).toHaveProperty('analyzeAndChunk')
+    expect(packageSdk).toHaveProperty('executeBackfill')
+  })
 })

From ab1239e94f276e8ea24bf8d112ad883eb3971d62 Mon Sep 17 00:00:00 2001
From: KeKs0r <marc.hoeffl@laserfocus.io>
Date: Thu, 2 Apr 2026 14:53:26 +0200
Subject: [PATCH 3/5] Update Algo

---
 .../plugin-backfill/src/async-backfill.ts     |    4 +-
 .../plugin-backfill/src/chunking/analyze.ts   | 1048 +----------------
 .../src/chunking/boundary-codec.ts            |  109 ++
 .../src/chunking/build.test.ts                |  135 ---
 .../plugin-backfill/src/chunking/build.ts     |   60 -
 .../src/chunking/introspect.test.ts           |  274 -----
 .../src/chunking/introspect.ts                |  262 -----
 .../src/chunking/partition-slices.ts          |  153 +++
 .../plugin-backfill/src/chunking/planner.ts   |  358 ++++++
 .../chunking/services/distribution-source.ts  |   66 ++
 .../src/chunking/services/metadata-source.ts  |  163 +++
 .../src/chunking/services/row-probe.ts        |  102 ++
 .../smart-chunking.integration.test.ts        |  106 +-
 .../src/chunking/splitter.test.ts             |   64 -
 .../plugin-backfill/src/chunking/splitter.ts  |   86 --
 packages/plugin-backfill/src/chunking/sql.ts  |  385 +++---
 .../chunking/strategies/equal-width-split.ts  |   67 ++
 .../strategies/metadata-single-chunk.ts       |    6 +
 .../strategies/quantile-range-split.ts        |  208 ++++
 .../src/chunking/strategies/refinement.ts     |  128 ++
 .../strategies/string-prefix-split.ts         |  144 +++
 .../strategies/temporal-bucket-split.ts       |  117 ++
 .../src/chunking/strategy-policy.test.ts      |   13 +
 .../src/chunking/strategy-policy.ts           |    8 +
 .../plugin-backfill/src/chunking/types.ts     |  189 ++-
 .../src/chunking/utils/binary-string.ts       |   55 +
 .../plugin-backfill/src/chunking/utils/ids.ts |   17 +
 .../src/chunking/utils/ranges.ts              |   31 +
 .../src/partition-planner.test.ts             |  185 ---
 packages/plugin-backfill/src/payload.ts       |   10 +-
 packages/plugin-backfill/src/planner.test.ts  |  591 +++-------
 packages/plugin-backfill/src/planner.ts       |  117 +-
 packages/plugin-backfill/src/plugin.test.ts   |   29 +-
 packages/plugin-backfill/src/plugin.ts        |   30 +-
 packages/plugin-backfill/src/queries.ts       |    6 +-
 packages/plugin-backfill/src/sdk.ts           |   18 +-
 packages/plugin-backfill/src/state.ts         |   27 +-
 packages/plugin-backfill/src/types.ts         |   59 +-
 38 files changed, 2496 insertions(+), 2934 deletions(-)
 create mode 100644 packages/plugin-backfill/src/chunking/boundary-codec.ts
 delete mode 100644 packages/plugin-backfill/src/chunking/build.test.ts
 delete mode 100644 packages/plugin-backfill/src/chunking/build.ts
 delete mode 100644 packages/plugin-backfill/src/chunking/introspect.test.ts
 delete mode 100644 packages/plugin-backfill/src/chunking/introspect.ts
 create mode 100644 packages/plugin-backfill/src/chunking/partition-slices.ts
 create mode 100644 packages/plugin-backfill/src/chunking/planner.ts
 create mode 100644 packages/plugin-backfill/src/chunking/services/distribution-source.ts
 create mode 100644 packages/plugin-backfill/src/chunking/services/metadata-source.ts
 create mode 100644 packages/plugin-backfill/src/chunking/services/row-probe.ts
 delete mode 100644 packages/plugin-backfill/src/chunking/splitter.test.ts
 delete mode 100644 packages/plugin-backfill/src/chunking/splitter.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategies/refinement.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategy-policy.test.ts
 create mode 100644 packages/plugin-backfill/src/chunking/strategy-policy.ts
 create mode 100644 packages/plugin-backfill/src/chunking/utils/binary-string.ts
 create mode 100644 packages/plugin-backfill/src/chunking/utils/ids.ts
 create mode 100644 packages/plugin-backfill/src/chunking/utils/ranges.ts
 delete mode 100644 packages/plugin-backfill/src/partition-planner.test.ts

diff --git a/packages/plugin-backfill/src/async-backfill.ts b/packages/plugin-backfill/src/async-backfill.ts
index f393499..e5acd1e 100644
--- a/packages/plugin-backfill/src/async-backfill.ts
+++ b/packages/plugin-backfill/src/async-backfill.ts
@@ -7,9 +7,9 @@ export interface BackfillOptions {
   /** Plan ID used as a namespace in deterministic query IDs */
   planId: string
   /** The chunks to process (from buildChunks) */
-  chunks: Array<{ id: string; from: string; to: string; [key: string]: unknown }>
+  chunks: Array<{ id: string; from?: string; to?: string; [key: string]: unknown }>
   /** Build the SQL for a given chunk. Called once per chunk at submit time. */
-  buildQuery: (chunk: { id: string; from: string; to: string }) => string
+  buildQuery: (chunk: { id: string; from?: string; to?: string }) => string
   /** Max concurrent queries running on the server. Default: 3 */
   concurrency?: number
   /** Polling interval in ms. Default: 5000 */
diff --git a/packages/plugin-backfill/src/chunking/analyze.ts b/packages/plugin-backfill/src/chunking/analyze.ts
index a79e030..3903b72 100644
--- a/packages/plugin-backfill/src/chunking/analyze.ts
+++ b/packages/plugin-backfill/src/chunking/analyze.ts
@@ -1,1047 +1,15 @@
-import { hashId, randomPlanId } from '../state.js'
+import { generateChunkPlan } from './planner.js'
+import type { ChunkPlan, GenerateChunkPlanInput } from './types.js'
 
-import { introspectTable } from './introspect.js'
-import type {
-  ChunkBoundary,
-  EstimateConfidence,
-  EstimateReason,
-  PartitionDiagnostics,
-  PartitionInfo,
-  PlannedChunk,
-  SliceLineageStep,
-  SliceRange,
-  SortKeyInfo,
-} from './types.js'
-
-const MAX_SPLIT_DEPTH_MULTIPLIER = 3
-const TARGET_BYTES_FUZZ_FACTOR = 1.15
-const STOP_SPLIT_FUZZ_FACTOR = 1.5
-const STRING_PREFIX_START_DEPTH = 1
-const STRING_PREFIX_MAX_DEPTH = 4
-const BINARY_SEARCH_STEPS = 24
-
-interface PartitionSlice {
-  partitionId: string
-  ranges: SliceRange[]
-  estimatedRows: number
-  estimatedBytes: number
-  isHotKey: boolean
-  hotDimensionIndex?: number
-  hotKeyValue?: string
-  estimateConfidence: EstimateConfidence
-  estimateReason: EstimateReason
-  lineage: SliceLineageStep[]
-}
-
-interface QueryContext {
-  database: string
-  table: string
-  sortKeys: SortKeyInfo[]
-  query: <T>(sql: string) => Promise<T[]>
-}
-
-export interface AnalyzeAndChunkInput {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  maxChunkBytes: number
-  requireIdempotencyToken: boolean
-  query: <T>(sql: string) => Promise<T[]>
-}
-
-export interface AnalyzeAndChunkResult {
-  planId: string
-  partitions: PartitionInfo[]
-  sortKey?: SortKeyInfo
-  sortKeys: SortKeyInfo[]
-  chunks: PlannedChunk[]
-  partitionDiagnostics: PartitionDiagnostics[]
-}
+export type AnalyzeAndChunkInput = GenerateChunkPlanInput
+export type AnalyzeAndChunkResult = ChunkPlan
+export type AnalyzeTableInput = GenerateChunkPlanInput
+export type AnalyzeTableResult = ChunkPlan
 
 export async function analyzeAndChunk(input: AnalyzeAndChunkInput): Promise<AnalyzeAndChunkResult> {
-  const { partitions, sortKey, sortKeys, boundaries, partitionDiagnostics } = await analyzeTable({
-    database: input.database,
-    table: input.table,
-    from: input.from,
-    to: input.to,
-    maxChunkBytes: input.maxChunkBytes,
-    query: input.query,
-  })
-
-  const planId = randomPlanId()
-
-  const chunks = buildPlannedChunks({
-    planId,
-    partitions,
-    sortKeys,
-    boundaries,
-    requireIdempotencyToken: input.requireIdempotencyToken,
-  })
-
-  return { planId, partitions, sortKey, sortKeys, chunks, partitionDiagnostics }
-}
-
-export interface AnalyzeTableInput {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  maxChunkBytes: number
-  query: <T>(sql: string) => Promise<T[]>
-}
-
-export interface AnalyzeTableResult {
-  partitions: PartitionInfo[]
-  sortKey?: SortKeyInfo
-  sortKeys: SortKeyInfo[]
-  boundaries: ChunkBoundary[]
-  partitionDiagnostics: PartitionDiagnostics[]
+  return generateChunkPlan(input)
 }
 
 export async function analyzeTable(input: AnalyzeTableInput): Promise<AnalyzeTableResult> {
-  const { partitions, sortKey, sortKeys } = await introspectTable({
-    database: input.database,
-    table: input.table,
-    from: input.from,
-    to: input.to,
-    query: input.query,
-  })
-
-  const context: QueryContext = {
-    database: input.database,
-    table: input.table,
-    sortKeys,
-    query: input.query,
-  }
-
-  const boundaries: ChunkBoundary[] = []
-  const partitionDiagnostics: PartitionDiagnostics[] = []
-
-  for (const partition of partitions) {
-    const slices = await planPartition(context, partition, input.maxChunkBytes)
-    const merged = mergeAdjacentSlices(slices, input.maxChunkBytes)
-
-    for (const slice of merged) {
-      const primaryRange = getSliceRange(slice, 0)
-      boundaries.push({
-        partitionId: slice.partitionId,
-        ranges: slice.ranges,
-        sortKeyFrom: primaryRange.from,
-        sortKeyTo: primaryRange.to,
-        estimatedBytes: slice.estimatedBytes,
-        estimatedRows: slice.estimatedRows,
-        isHotKey: slice.isHotKey,
-        hotDimensionIndex: slice.hotDimensionIndex,
-        hotKeyValue: slice.hotKeyValue,
-        estimateConfidence: slice.estimateConfidence,
-        estimateReason: slice.estimateReason,
-        lineage: slice.lineage,
-      })
-    }
-
-    const estimatedRowSum = merged.reduce((sum, slice) => sum + slice.estimatedRows, 0)
-    const estimateToExactRatio = partition.rows > 0 ? estimatedRowSum / partition.rows : 1
-    partitionDiagnostics.push({
-      partitionId: partition.partitionId,
-      estimatedRowSum,
-      exactPartitionRows: partition.rows,
-      estimateToExactRatio,
-      suspiciousEstimate: estimateToExactRatio < 0.7 || estimateToExactRatio > 1.3,
-      lowConfidenceChunkCount: merged.filter((slice) => slice.estimateConfidence === 'low').length,
-      usedDistributionFallback: merged.some((slice) =>
-        slice.estimateReason === 'string-prefix-distribution' ||
-        slice.estimateReason === 'temporal-distribution' ||
-        slice.estimateReason === 'equal-width-distribution'
-      ),
-      usedLowConfidenceChunkRefinement: false,
-      usedExactCountFallback: false,
-    })
-  }
-
-  return { partitions, sortKey, sortKeys, boundaries, partitionDiagnostics }
-}
-
-export function buildPlannedChunks(input: {
-  planId: string
-  partitions: PartitionInfo[]
-  sortKeys: SortKeyInfo[]
-  boundaries: ChunkBoundary[]
-  requireIdempotencyToken: boolean
-}): PlannedChunk[] {
-  const chunks: PlannedChunk[] = []
-  const partitionIndex = new Map<string, number>()
-
-  for (const boundary of input.boundaries) {
-    const idx = partitionIndex.get(boundary.partitionId) ?? 0
-    partitionIndex.set(boundary.partitionId, idx + 1)
-
-    const idSeed = `${input.planId}:${boundary.partitionId}:${idx}`
-    const chunkId = hashId(`chunk:${idSeed}`).slice(0, 16)
-    const token = input.requireIdempotencyToken ? hashId(`token:${idSeed}`) : ''
-
-    const partition = input.partitions.find((candidate) => candidate.partitionId === boundary.partitionId)
-    const { from, to } = deriveChunkWindow(boundary.ranges ?? [], input.sortKeys, partition)
-
-    chunks.push({
-      id: chunkId,
-      partitionId: boundary.partitionId,
-      ranges: boundary.ranges,
-      sortKeyFrom: boundary.sortKeyFrom,
-      sortKeyTo: boundary.sortKeyTo,
-      estimatedBytes: boundary.estimatedBytes,
-      estimatedRows: boundary.estimatedRows,
-      idempotencyToken: token,
-      from,
-      to,
-      isHotKey: boundary.isHotKey,
-      hotDimensionIndex: boundary.hotDimensionIndex,
-      hotKeyValue: boundary.hotKeyValue,
-      estimateConfidence: boundary.estimateConfidence,
-      estimateReason: boundary.estimateReason,
-      lineage: boundary.lineage,
-    })
-  }
-
-  return chunks
-}
-
-async function planPartition(
-  context: QueryContext,
-  partition: PartitionInfo,
-  maxChunkBytes: number,
-): Promise<PartitionSlice[]> {
-  if (partition.bytesOnDisk <= maxChunkBytes || context.sortKeys.length === 0) {
-    return [buildRootSlice(partition)]
-  }
-
-  const rootSlice = buildRootSlice(partition)
-  return splitSliceRecursively(context, partition, rootSlice, maxChunkBytes, 0)
-}
-
-async function splitSliceRecursively(
-  context: QueryContext,
-  partition: PartitionInfo,
-  slice: PartitionSlice,
-  maxChunkBytes: number,
-  depth: number,
-): Promise<PartitionSlice[]> {
-  if (slice.estimatedBytes <= maxChunkBytes * STOP_SPLIT_FUZZ_FACTOR) {
-    return [slice]
-  }
-
-  if (depth >= context.sortKeys.length * MAX_SPLIT_DEPTH_MULTIPLIER) {
-    return [slice]
-  }
-
-  const children = await splitOversizedSlice(context, partition, slice, maxChunkBytes, depth)
-  if (children.length <= 1) {
-    return [slice]
-  }
-
-  const finalChildren: PartitionSlice[] = []
-  for (const child of children) {
-    finalChildren.push(...await splitSliceRecursively(context, partition, child, maxChunkBytes, depth + 1))
-  }
-  return finalChildren
-}
-
-async function splitOversizedSlice(
-  context: QueryContext,
-  partition: PartitionInfo,
-  slice: PartitionSlice,
-  maxChunkBytes: number,
-  depth: number,
-): Promise<PartitionSlice[]> {
-  for (const dimensionIndex of getCandidateDimensions(context.sortKeys, slice)) {
-    const preparedSlice = await hydrateSliceRange(context, slice, dimensionIndex)
-    if (!preparedSlice) continue
-
-    const sortKey = context.sortKeys[dimensionIndex]
-    if (!sortKey) continue
-
-    const rootLike = depth === 0
-    const hotIdentity = findHotIdentity(preparedSlice, context.sortKeys)
-
-    if (sortKey.category === 'string') {
-      const stringSlices = await splitSliceWithStringPrefixes(
-        context,
-        partition,
-        preparedSlice,
-        dimensionIndex,
-        maxChunkBytes,
-        STRING_PREFIX_START_DEPTH,
-      )
-      if (isEffectiveSplit(preparedSlice, stringSlices)) {
-        return applyHotIdentity(stringSlices, hotIdentity)
-      }
-    }
-
-    if (sortKey.category === 'datetime' && (!rootLike || hotIdentity !== undefined)) {
-      const temporalSlices = await splitSliceWithTemporalBuckets(
-        context,
-        partition,
-        markHotSlice(preparedSlice, hotIdentity),
-        dimensionIndex,
-        maxChunkBytes,
-      )
-      if (isEffectiveSplit(preparedSlice, temporalSlices)) {
-        return applyHotIdentity(temporalSlices, hotIdentity)
-      }
-    }
-
-    const quantileSlices = await splitWithRanges(
-      context,
-      partition,
-      preparedSlice,
-      dimensionIndex,
-      maxChunkBytes,
-    )
-    if (isEffectiveSplit(preparedSlice, quantileSlices)) {
-      return applyHotIdentity(quantileSlices, hotIdentity)
-    }
-  }
-
-  return [slice]
-}
-
-async function splitWithRanges(
-  context: QueryContext,
-  partition: PartitionInfo,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-  maxChunkBytes: number,
-): Promise<PartitionSlice[]> {
-  const sortKey = context.sortKeys[dimensionIndex]
-  const range = getSliceRange(slice, dimensionIndex)
-  if (!sortKey || range.from === undefined || range.to === undefined) return [slice]
-  if (sortKey.category === 'string' && isExactSliceRange(range)) return [slice]
-
-  const subCount = Math.ceil(slice.estimatedBytes / maxChunkBytes)
-  if (subCount <= 1) return [slice]
-
-  const boundaries = await buildQuantileBoundaries(context, slice, dimensionIndex, subCount)
-  if (boundaries) {
-    return splitSliceWithBoundaries(
-      context,
-      partition,
-      slice,
-      dimensionIndex,
-      boundaries,
-      'quantile-range-split',
-      'split slice into quantile-aligned ranges',
-      'quantile-estimate',
-      'high',
-    )
-  }
-
-  const equalWidthBoundaries = buildEvenlySpacedBoundaries(range.from, range.to, subCount, sortKey)
-  return splitSliceWithBoundaries(
-    context,
-    partition,
-    slice,
-    dimensionIndex,
-    equalWidthBoundaries,
-    'equal-width-split',
-    'fallback to equal-width ranges',
-    'equal-width-distribution',
-    'low',
-  )
-}
-
-async function splitSliceWithBoundaries(
-  context: QueryContext,
-  partition: PartitionInfo,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-  boundaries: string[],
-  strategyId: string,
-  reason: string,
-  estimateReason: EstimateReason,
-  estimateConfidence: EstimateConfidence,
-): Promise<PartitionSlice[]> {
-  const slices: PartitionSlice[] = []
-
-  for (let index = 0; index < boundaries.length - 1; index++) {
-    const from = boundaries[index]
-    const to = boundaries[index + 1]
-    if (from === undefined || to === undefined || from === to) {
-      continue
-    }
-
-    const ranges = replaceSliceRange(slice, dimensionIndex, from, to)
-    const estimatedRows = await countRows(context, partition.partitionId, ranges)
-    if (estimatedRows <= 0) {
-      continue
-    }
-
-    slices.push(buildSliceFromRows(partition, {
-      ranges,
-      estimatedRows,
-      isHotKey: false,
-      hotDimensionIndex: undefined,
-      hotKeyValue: undefined,
-      estimateConfidence,
-      estimateReason,
-      lineage: slice.lineage.concat([{ strategyId, dimensionIndex, reason }]),
-    }))
-  }
-
-  return slices
-}
-
-async function splitSliceWithStringPrefixes(
-  context: QueryContext,
-  partition: PartitionInfo,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-  maxChunkBytes: number,
-  depth: number,
-): Promise<PartitionSlice[]> {
-  const sortKey = context.sortKeys[dimensionIndex]
-  const range = getSliceRange(slice, dimensionIndex)
-  if (!sortKey || sortKey.category !== 'string' || range.from === undefined || range.to === undefined) {
-    return []
-  }
-
-  const rows = await context.query<{ prefix: string; cnt: string }>(`
-SELECT
-  substring(${sortKey.column}, 1, ${depth}) AS prefix,
-  count() AS cnt
-FROM ${context.database}.${context.table}
-WHERE ${buildWhereClause(partition.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, range.to), context.sortKeys)}
-GROUP BY prefix
-ORDER BY prefix`)
-
-  const slices: PartitionSlice[] = []
-
-  for (const row of rows) {
-    const bucket = {
-      value: row.prefix,
-      rowCount: Number(row.cnt),
-      isExactValue: Buffer.from(row.prefix, 'latin1').length < depth,
-    }
-    if (bucket.rowCount <= 0) continue
-
-    const bucketFrom = maxBinaryString(range.from, bucket.value)
-    const bucketUpper = bucket.isExactValue ? `${bucket.value}\0` : nextPrefixValue(bucket.value)
-    if (!bucketUpper) continue
-
-    const bucketTo = minBinaryString(range.to, bucketUpper)
-    const bucketSlice = buildSliceFromRows(partition, {
-      ranges: replaceSliceRange(slice, dimensionIndex, bucketFrom, bucketTo),
-      estimatedRows: bucket.rowCount,
-      isHotKey: false,
-      hotDimensionIndex: undefined,
-      hotKeyValue: undefined,
-      estimateConfidence: 'high',
-      estimateReason: 'string-prefix-distribution',
-      lineage: slice.lineage.concat([{
-        strategyId: 'string-prefix-split',
-        dimensionIndex,
-        reason: 'split slice using string prefix distribution',
-      }]),
-    })
-
-    if (bucketSlice.estimatedBytes <= maxChunkBytes * TARGET_BYTES_FUZZ_FACTOR) {
-      slices.push(bucketSlice)
-      continue
-    }
-
-    if (!bucket.isExactValue && depth < STRING_PREFIX_MAX_DEPTH) {
-      slices.push(...await splitSliceWithStringPrefixes(
-        context,
-        partition,
-        bucketSlice,
-        dimensionIndex,
-        maxChunkBytes,
-        depth + 1,
-      ))
-      continue
-    }
-
-    slices.push(bucketSlice)
-  }
-
-  return slices
-}
-
-async function splitSliceWithTemporalBuckets(
-  context: QueryContext,
-  partition: PartitionInfo,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-  maxChunkBytes: number,
-): Promise<PartitionSlice[]> {
-  const dayBuckets = await probeTemporalBuckets(context, partition.partitionId, slice.ranges, dimensionIndex, 'day')
-  if (dayBuckets.length === 0) return [slice]
-
-  const daySlices = buildTemporalSlices(partition, slice, dimensionIndex, dayBuckets, maxChunkBytes)
-  if (daySlices.every((candidate) => candidate.estimatedBytes <= maxChunkBytes * TARGET_BYTES_FUZZ_FACTOR)) {
-    return daySlices
-  }
-
-  const hourBuckets = await probeTemporalBuckets(context, partition.partitionId, slice.ranges, dimensionIndex, 'hour')
-  if (hourBuckets.length === 0) return daySlices
-  return buildTemporalSlices(partition, slice, dimensionIndex, hourBuckets, maxChunkBytes)
-}
-
-async function probeTemporalBuckets(
-  context: QueryContext,
-  partitionId: string,
-  ranges: SliceRange[],
-  dimensionIndex: number,
-  grain: 'day' | 'hour',
-): Promise<Array<{ start: string; rowCount: number }>> {
-  const sortKey = context.sortKeys[dimensionIndex]
-  if (!sortKey || sortKey.category !== 'datetime') return []
-
-  const bucketExpression = grain === 'day'
-    ? `toStartOfDay(${sortKey.column})`
-    : `toStartOfHour(${sortKey.column})`
-
-  const rows = await context.query<{ bucket: string; cnt: string }>(`
-SELECT
-  formatDateTime(${bucketExpression}, '%Y-%m-%dT%H:%i:%sZ') AS bucket,
-  count() AS cnt
-FROM ${context.database}.${context.table}
-WHERE ${buildWhereClause(partitionId, ranges, context.sortKeys)}
-GROUP BY bucket
-ORDER BY bucket`)
-
-  return rows.map((row) => ({
-    start: row.bucket,
-    rowCount: Number(row.cnt),
-  }))
-}
-
-function buildTemporalSlices(
-  partition: PartitionInfo,
-  parentSlice: PartitionSlice,
-  dimensionIndex: number,
-  buckets: Array<{ start: string; rowCount: number }>,
-  maxChunkBytes: number,
-): PartitionSlice[] {
-  const targetChunkRows = getTargetChunkRows(partition, maxChunkBytes)
-  const slices: PartitionSlice[] = []
-  let currentStart: string | undefined
-  let currentRows = 0
-  const parentRange = getSliceRange(parentSlice, dimensionIndex)
-  const sliceEnd = parentRange.to ?? getPartitionEndExclusive(partition)
-
-  for (let index = 0; index < buckets.length; index++) {
-    const bucket = buckets[index]
-    if (!bucket) continue
-
-    if (currentStart === undefined) currentStart = bucket.start
-
-    const wouldExceed = currentRows > 0 && currentRows + bucket.rowCount > targetChunkRows * TARGET_BYTES_FUZZ_FACTOR
-    if (wouldExceed && currentStart !== undefined) {
-      slices.push(buildSliceFromRows(partition, {
-        ranges: replaceSliceRange(parentSlice, dimensionIndex, currentStart, bucket.start),
-        estimatedRows: currentRows,
-        isHotKey: parentSlice.isHotKey,
-        hotDimensionIndex: parentSlice.hotDimensionIndex,
-        hotKeyValue: parentSlice.hotKeyValue,
-        estimateConfidence: 'low',
-        estimateReason: 'temporal-distribution',
-        lineage: parentSlice.lineage.concat([{
-          strategyId: 'temporal-bucket-split',
-          dimensionIndex,
-          reason: 'split slice using temporal distribution buckets',
-        }]),
-      }))
-      currentStart = bucket.start
-      currentRows = 0
-    }
-
-    currentRows += bucket.rowCount
-
-    if (index === buckets.length - 1 && currentStart !== undefined) {
-      slices.push(buildSliceFromRows(partition, {
-        ranges: replaceSliceRange(parentSlice, dimensionIndex, currentStart, sliceEnd),
-        estimatedRows: currentRows,
-        isHotKey: parentSlice.isHotKey,
-        hotDimensionIndex: parentSlice.hotDimensionIndex,
-        hotKeyValue: parentSlice.hotKeyValue,
-        estimateConfidence: 'low',
-        estimateReason: 'temporal-distribution',
-        lineage: parentSlice.lineage.concat([{
-          strategyId: 'temporal-bucket-split',
-          dimensionIndex,
-          reason: 'split slice using temporal distribution buckets',
-        }]),
-      }))
-    }
-  }
-
-  return slices
-}
-
-async function buildQuantileBoundaries(
-  context: QueryContext,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-  subCount: number,
-): Promise<string[] | undefined> {
-  const range = getSliceRange(slice, dimensionIndex)
-  if (range.from === undefined || range.to === undefined) return undefined
-
-  const boundaries = [range.from]
-  for (let step = 1; step < subCount; step++) {
-    const targetCumRows = Math.round((slice.estimatedRows * step) / subCount)
-    boundaries.push(await findQuantileBoundaryOnDimension(context, slice, dimensionIndex, targetCumRows))
-  }
-
-  const uniqueBoundaryCount = new Set(boundaries).size
-  if (uniqueBoundaryCount <= Math.max(2, Math.ceil(subCount / 3))) {
-    return undefined
-  }
-
-  return boundaries.concat([range.to])
-}
-
-async function findQuantileBoundaryOnDimension(
-  context: QueryContext,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-  targetCumRows: number,
-): Promise<string> {
-  const sortKey = context.sortKeys[dimensionIndex]
-  const range = getSliceRange(slice, dimensionIndex)
-  if (!sortKey || range.from === undefined || range.to === undefined) {
-    throw new Error(`Missing range for quantile split on dimension ${dimensionIndex}`)
-  }
-
-  if (sortKey.category === 'string') {
-    let low = strToBigInt(range.from, 8)
-    let high = strToBigInt(range.to, 8)
-
-    for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
-      const midpoint = (low + high) / 2n
-      if (midpoint === low || midpoint === high) break
-
-      const mid = bigIntToStr(midpoint, 8)
-      const rows = await countRows(context, slice.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, mid))
-      if (rows < targetCumRows) low = midpoint
-      else high = midpoint
-    }
-
-    return bigIntToStr((low + high) / 2n, 8)
-  }
-
-  if (sortKey.category === 'datetime') {
-    let low = parsePlannerDateTime(range.from)
-    let high = parsePlannerDateTime(range.to)
-
-    for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
-      const midpoint = Math.floor((low + high) / 2)
-      if (midpoint === low || midpoint === high) break
-
-      const mid = new Date(midpoint).toISOString()
-      const rows = await countRows(context, slice.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, mid))
-      if (rows < targetCumRows) low = midpoint
-      else high = midpoint
-    }
-
-    return new Date(Math.floor((low + high) / 2)).toISOString()
-  }
-
-  let low = Number(range.from)
-  let high = Number(range.to)
-  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
-    const midpoint = Math.floor((low + high) / 2)
-    if (midpoint === low || midpoint === high) break
-
-    const rows = await countRows(context, slice.partitionId, replaceSliceRange(slice, dimensionIndex, range.from, String(midpoint)))
-    if (rows < targetCumRows) low = midpoint
-    else high = midpoint
-  }
-
-  return String(Math.floor((low + high) / 2))
-}
-
-async function hydrateSliceRange(
-  context: QueryContext,
-  slice: PartitionSlice,
-  dimensionIndex: number,
-): Promise<PartitionSlice | undefined> {
-  const currentRange = getSliceRange(slice, dimensionIndex)
-  if (currentRange.from !== undefined && currentRange.to !== undefined) return slice
-
-  const sortKey = context.sortKeys[dimensionIndex]
-  if (!sortKey) return undefined
-
-  const rows = await context.query<{ minVal: string; maxVal: string }>(`
-SELECT
-  toString(min(${sortKey.column})) AS minVal,
-  toString(max(${sortKey.column})) AS maxVal
-FROM ${context.database}.${context.table}
-WHERE ${buildWhereClause(slice.partitionId, slice.ranges, context.sortKeys)}`)
-
-  const observed = rows[0]
-  if (!observed) return undefined
-
-  return {
-    ...slice,
-    ranges: replaceSliceRange(slice, dimensionIndex, observed.minVal, toExclusiveUpperBound(observed.maxVal, sortKey)),
-  }
-}
-
-function buildRootSlice(partition: PartitionInfo): PartitionSlice {
-  return {
-    partitionId: partition.partitionId,
-    ranges: [],
-    estimatedRows: partition.rows,
-    estimatedBytes: partition.bytesOnDisk,
-    isHotKey: false,
-    estimateConfidence: 'high',
-    estimateReason: 'partition-metadata',
-    lineage: [],
-  }
-}
-
-function buildSliceFromRows(
-  partition: PartitionInfo,
-  input: {
-    ranges: SliceRange[]
-    estimatedRows: number
-    isHotKey: boolean
-    hotDimensionIndex?: number
-    hotKeyValue?: string
-    estimateConfidence: EstimateConfidence
-    estimateReason: EstimateReason
-    lineage: SliceLineageStep[]
-  },
-): PartitionSlice {
-  return {
-    partitionId: partition.partitionId,
-    ranges: input.ranges,
-    estimatedRows: input.estimatedRows,
-    estimatedBytes: partition.rows > 0
-      ? Math.round((input.estimatedRows / partition.rows) * partition.bytesOnDisk)
-      : 0,
-    isHotKey: input.isHotKey,
-    hotDimensionIndex: input.hotDimensionIndex,
-    hotKeyValue: input.hotKeyValue,
-    estimateConfidence: input.estimateConfidence,
-    estimateReason: input.estimateReason,
-    lineage: input.lineage,
-  }
-}
-
-function getTargetChunkRows(partition: PartitionInfo, maxChunkBytes: number): number {
-  if (partition.bytesOnDisk <= 0) return partition.rows
-  return (maxChunkBytes * partition.rows) / partition.bytesOnDisk
-}
-
-function mergeAdjacentSlices(slices: PartitionSlice[], maxChunkBytes: number): PartitionSlice[] {
-  if (slices.length <= 1) return slices
-
-  const merged: PartitionSlice[] = []
-  let current: PartitionSlice | undefined
-
-  for (const slice of slices) {
-    if (!current) {
-      current = slice
-      continue
-    }
-
-    const canMerge =
-      !current.isHotKey &&
-      !slice.isHotKey &&
-      haveSameTrailingRanges(current.ranges, slice.ranges) &&
-      current.estimatedBytes + slice.estimatedBytes <= maxChunkBytes * 1.1
-
-    if (!canMerge) {
-      merged.push(current)
-      current = slice
-      continue
-    }
-
-    current = {
-      ...current,
-      ranges: mergeRanges(current.ranges, slice.ranges),
-      estimatedRows: current.estimatedRows + slice.estimatedRows,
-      estimatedBytes: current.estimatedBytes + slice.estimatedBytes,
-    }
-  }
-
-  if (current) merged.push(current)
-  return merged
-}
-
-function mergeRanges(left: SliceRange[], right: SliceRange[]): SliceRange[] {
-  return left.map((leftRange) => {
-    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
-    return rightRange === undefined
-      ? leftRange
-      : {
-        dimensionIndex: leftRange.dimensionIndex,
-        from: leftRange.from,
-        to: rightRange.to,
-      }
-  })
-}
-
-function haveSameTrailingRanges(left: SliceRange[], right: SliceRange[]): boolean {
-  if (left.length !== right.length) return false
-
-  let differingDimensions = 0
-  for (const leftRange of left) {
-    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
-    if (!rightRange) return false
-
-    const same = leftRange.from === rightRange.from && leftRange.to === rightRange.to
-    if (!same) {
-      differingDimensions += 1
-      if (leftRange.to !== rightRange.from) return false
-    }
-  }
-
-  return differingDimensions <= 1
-}
-
-function getCandidateDimensions(sortKeys: SortKeyInfo[], slice: PartitionSlice): number[] {
-  return sortKeys
-    .map((sortKey, index) => ({
-      index,
-      priority: getDimensionPriority(sortKey.category, slice.isHotKey, slice.hotDimensionIndex, index),
-    }))
-    .sort((left, right) => left.priority - right.priority)
-    .map((candidate) => candidate.index)
-}
-
-function getDimensionPriority(
-  category: SortKeyInfo['category'],
-  isHotKey: boolean,
-  hotDimensionIndex: number | undefined,
-  dimensionIndex: number,
-): number {
-  if (isHotKey && hotDimensionIndex === dimensionIndex) return 100
-  if (category === 'string') return 0
-  if (category === 'datetime') return 1
-  return 2
-}
-
-function getSliceRange(slice: Pick<PartitionSlice, 'ranges'>, dimensionIndex: number): SliceRange {
-  return slice.ranges.find((range) => range.dimensionIndex === dimensionIndex)
-    ?? { dimensionIndex, from: undefined, to: undefined }
-}
-
-function replaceSliceRange(
-  slice: Pick<PartitionSlice, 'ranges'>,
-  dimensionIndex: number,
-  from: string | undefined,
-  to: string | undefined,
-): SliceRange[] {
-  return slice.ranges
-    .filter((range) => range.dimensionIndex !== dimensionIndex)
-    .concat([{ dimensionIndex, from, to }])
-    .sort((left, right) => left.dimensionIndex - right.dimensionIndex)
-}
-
-function isExactSliceRange(range: Pick<SliceRange, 'from' | 'to'>): boolean {
-  if (range.from === undefined || range.to === undefined) return false
-  return range.to === `${range.from}\0`
-}
-
-function findHotIdentity(
-  slice: PartitionSlice,
-  sortKeys: SortKeyInfo[],
-): { dimensionIndex: number; value: string } | undefined {
-  for (const range of slice.ranges) {
-    const sortKey = sortKeys[range.dimensionIndex]
-    if (sortKey?.category !== 'string') continue
-    if (isExactSliceRange(range) && range.from !== undefined) {
-      return { dimensionIndex: range.dimensionIndex, value: range.from }
-    }
-  }
-}
-
-function applyHotIdentity(
-  slices: PartitionSlice[],
-  hotIdentity: { dimensionIndex: number; value: string } | undefined,
-): PartitionSlice[] {
-  if (!hotIdentity) return slices
-  return slices.map((slice) => markHotSlice(slice, hotIdentity))
-}
-
-function markHotSlice(
-  slice: PartitionSlice,
-  hotIdentity: { dimensionIndex: number; value: string } | undefined,
-): PartitionSlice {
-  if (!hotIdentity) return slice
-  return {
-    ...slice,
-    isHotKey: true,
-    hotDimensionIndex: hotIdentity.dimensionIndex,
-    hotKeyValue: hotIdentity.value,
-  }
-}
-
-function isEffectiveSplit(parent: PartitionSlice, children: PartitionSlice[]): boolean {
-  if (children.length <= 1) return false
-  return children.some((child) =>
-    child.estimatedRows !== parent.estimatedRows ||
-    JSON.stringify(child.ranges) !== JSON.stringify(parent.ranges)
-  )
-}
-
-function toExclusiveUpperBound(value: string, sortKey: SortKeyInfo): string {
-  if (sortKey.category === 'string') return `${value}\0`
-  if (sortKey.category === 'datetime') return new Date(parsePlannerDateTime(value) + 1000).toISOString()
-  return String(Number(value) + 1)
-}
-
-function getPartitionEndExclusive(partition: PartitionInfo): string {
-  return new Date(parsePlannerDateTime(partition.maxTime) + 1000).toISOString()
-}
-
-function deriveChunkWindow(
-  ranges: SliceRange[],
-  sortKeys: SortKeyInfo[],
-  partition: PartitionInfo | undefined,
-): { from: string; to: string } {
-  for (const range of ranges) {
-    const sortKey = sortKeys[range.dimensionIndex]
-    if (sortKey?.category !== 'datetime') continue
-    return {
-      from: range.from ?? partition?.minTime ?? '',
-      to: range.to ?? partition?.maxTime ?? '',
-    }
-  }
-
-  return {
-    from: partition?.minTime ?? '',
-    to: partition?.maxTime ?? '',
-  }
-}
-
-async function countRows(context: QueryContext, partitionId: string, ranges: SliceRange[]): Promise<number> {
-  const rows = await context.query<{ cnt: string }>(`
-SELECT count() AS cnt
-FROM ${context.database}.${context.table}
-WHERE ${buildWhereClause(partitionId, ranges, context.sortKeys)}`)
-  return Number(rows[0]?.cnt ?? 0)
-}
-
-function buildWhereClause(partitionId: string, ranges: SliceRange[], sortKeys: SortKeyInfo[]): string {
-  const conditions = [`_partition_id = ${quoteSqlString(partitionId)}`]
-
-  for (const range of ranges) {
-    const sortKey = sortKeys[range.dimensionIndex]
-    if (!sortKey) continue
-    if (range.from !== undefined) conditions.push(`${sortKey.column} >= ${formatBound(range.from, sortKey)}`)
-    if (range.to !== undefined) conditions.push(`${sortKey.column} < ${formatBound(range.to, sortKey)}`)
-  }
-
-  return conditions.join('\n  AND ')
-}
-
-function quoteSqlString(value: string): string {
-  return `'${value.replaceAll('\\', '\\\\').replaceAll('\'', '\\\'')}'`
-}
-
-function formatBound(value: string, sortKey: SortKeyInfo): string {
-  if (sortKey.category === 'datetime') {
-    return `parseDateTimeBestEffort(${quoteSqlString(value)})`
-  }
-  if (sortKey.category === 'string') {
-    return `unhex('${Buffer.from(value, 'latin1').toString('hex')}')`
-  }
-  return value
-}
-
-function buildEvenlySpacedBoundaries(
-  from: string,
-  to: string,
-  subCount: number,
-  sortKey: SortKeyInfo,
-): string[] {
-  if (sortKey.category === 'datetime') {
-    const start = parsePlannerDateTime(from)
-    const end = parsePlannerDateTime(to)
-    return uniqueBoundaries(Array.from({ length: subCount + 1 }, (_, index) =>
-      new Date(start + Math.floor(((end - start) * index) / subCount)).toISOString()
-    ))
-  }
-
-  if (sortKey.category === 'numeric') {
-    const start = Number(from)
-    const end = Number(to)
-    return uniqueBoundaries(Array.from({ length: subCount + 1 }, (_, index) =>
-      String(start + Math.floor(((end - start) * index) / subCount))
-    ))
-  }
-
-  const start = strToBigInt(from, 8)
-  const end = strToBigInt(to, 8)
-  return uniqueBoundaries(Array.from({ length: subCount + 1 }, (_, index) =>
-    bigIntToStr(start + ((end - start) * BigInt(index)) / BigInt(subCount), 8)
-  ))
-}
-
-function uniqueBoundaries(boundaries: string[]): string[] {
-  const unique: string[] = []
-  for (const boundary of boundaries) {
-    if (unique[unique.length - 1] !== boundary) {
-      unique.push(boundary)
-    }
-  }
-  return unique
-}
-
-function parsePlannerDateTime(value: string): number {
-  const normalized = value.includes('T') ? value : value.replace(' ', 'T')
-  return Date.parse(normalized.endsWith('Z') ? normalized : `${normalized}Z`)
-}
-
-function strToBigInt(value: string, padTo: number): bigint {
-  const buffer = Buffer.from(value, 'latin1')
-  let result = 0n
-  for (let index = 0; index < padTo; index++) {
-    const byte = index < buffer.length ? (buffer[index] ?? 0) : 0
-    result = (result << 8n) | BigInt(byte)
-  }
-  return result
-}
-
-function bigIntToStr(value: bigint, length: number): string {
-  const buffer = Buffer.alloc(length)
-  let remaining = value
-  for (let index = length - 1; index >= 0; index--) {
-    buffer[index] = Number(remaining & 0xffn)
-    remaining >>= 8n
-  }
-  return buffer.toString('latin1')
-}
-
-function compareBinaryStrings(left: string, right: string): number {
-  return Buffer.from(left, 'latin1').compare(Buffer.from(right, 'latin1'))
-}
-
-function minBinaryString(left: string, right: string): string {
-  return compareBinaryStrings(left, right) <= 0 ? left : right
-}
-
-function maxBinaryString(left: string, right: string): string {
-  return compareBinaryStrings(left, right) >= 0 ? left : right
-}
-
-function nextPrefixValue(prefix: string): string | undefined {
-  if (prefix === '') return undefined
-
-  const buffer = Buffer.from(prefix, 'latin1')
-  for (let index = buffer.length - 1; index >= 0; index--) {
-    const byte = buffer[index]
-    if (byte === undefined) continue
-    if (byte === 0xff) continue
-
-    const next = Buffer.from(buffer.subarray(0, index + 1))
-    next[index] = (next[index] ?? 0) + 1
-    return next.toString('latin1')
-  }
-
-  return undefined
+  return analyzeAndChunk(input)
 }
diff --git a/packages/plugin-backfill/src/chunking/boundary-codec.ts b/packages/plugin-backfill/src/chunking/boundary-codec.ts
new file mode 100644
index 0000000..73b8984
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/boundary-codec.ts
@@ -0,0 +1,109 @@
+import type {
+  Chunk,
+  ChunkPlan,
+  ChunkRange,
+  FocusedValue,
+  SortKey,
+} from './types.js'
+
+export function encodeBoundary(
+  value: string | undefined,
+  sortKey: SortKey | undefined,
+): string | undefined {
+  if (value === undefined || sortKey === undefined) return value
+  if (sortKey.boundaryEncoding === 'hex-latin1') {
+    return Buffer.from(value, 'latin1').toString('hex')
+  }
+  return value
+}
+
+export function decodeBoundary(
+  value: string | undefined,
+  sortKey: SortKey | undefined,
+): string | undefined {
+  if (value === undefined || sortKey === undefined) return value
+  if (sortKey.boundaryEncoding === 'hex-latin1') {
+    return Buffer.from(value, 'hex').toString('latin1')
+  }
+  return value
+}
+
+export function encodeRangesForPlan(
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): ChunkRange[] {
+  return ranges.map((range) => ({
+    dimensionIndex: range.dimensionIndex,
+    from: encodeBoundary(range.from, sortKeys[range.dimensionIndex]),
+    to: encodeBoundary(range.to, sortKeys[range.dimensionIndex]),
+  }))
+}
+
+export function decodeRangesFromPlan(
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): ChunkRange[] {
+  return ranges.map((range) => ({
+    dimensionIndex: range.dimensionIndex,
+    from: decodeBoundary(range.from, sortKeys[range.dimensionIndex]),
+    to: decodeBoundary(range.to, sortKeys[range.dimensionIndex]),
+  }))
+}
+
+function encodeFocusedValue(
+  focusedValue: FocusedValue | undefined,
+  sortKeys: SortKey[],
+): FocusedValue | undefined {
+  if (!focusedValue) return undefined
+  return {
+    dimensionIndex: focusedValue.dimensionIndex,
+    value: encodeBoundary(focusedValue.value, sortKeys[focusedValue.dimensionIndex]) ?? focusedValue.value,
+  }
+}
+
+function decodeFocusedValue(
+  focusedValue: FocusedValue | undefined,
+  sortKeys: SortKey[],
+): FocusedValue | undefined {
+  if (!focusedValue) return undefined
+  return {
+    dimensionIndex: focusedValue.dimensionIndex,
+    value: decodeBoundary(focusedValue.value, sortKeys[focusedValue.dimensionIndex]) ?? focusedValue.value,
+  }
+}
+
+export function encodeChunkForPlan(chunk: Chunk, sortKeys: SortKey[]): Chunk {
+  return {
+    ...chunk,
+    ranges: encodeRangesForPlan(chunk.ranges, sortKeys),
+    analysis: {
+      ...chunk.analysis,
+      focusedValue: encodeFocusedValue(chunk.analysis.focusedValue, sortKeys),
+    },
+  }
+}
+
+export function decodeChunkFromPlan(chunk: Chunk, sortKeys: SortKey[]): Chunk {
+  return {
+    ...chunk,
+    ranges: decodeRangesFromPlan(chunk.ranges, sortKeys),
+    analysis: {
+      ...chunk.analysis,
+      focusedValue: decodeFocusedValue(chunk.analysis.focusedValue, sortKeys),
+    },
+  }
+}
+
+export function encodeChunkPlanForPersistence(plan: ChunkPlan): ChunkPlan {
+  return {
+    ...plan,
+    chunks: plan.chunks.map((chunk) => encodeChunkForPlan(chunk, plan.table.sortKeys)),
+  }
+}
+
+export function decodeChunkPlanFromPersistence(plan: ChunkPlan): ChunkPlan {
+  return {
+    ...plan,
+    chunks: plan.chunks.map((chunk) => decodeChunkFromPlan(chunk, plan.table.sortKeys)),
+  }
+}
diff --git a/packages/plugin-backfill/src/chunking/build.test.ts b/packages/plugin-backfill/src/chunking/build.test.ts
deleted file mode 100644
index 71aa47e..0000000
--- a/packages/plugin-backfill/src/chunking/build.test.ts
+++ /dev/null
@@ -1,135 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { buildChunkBoundaries } from './build.js'
-import type { PartitionInfo, SortKeyInfo } from './types.js'
-
-const GiB = 1024 ** 3
-
-describe('buildChunkBoundaries', () => {
-  test('small partition produces one chunk boundary', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T23:59:59.000Z' },
-    ]
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-    })
-
-    expect(boundaries).toHaveLength(1)
-    expect(boundaries[0]?.partitionId).toBe('202501')
-    expect(boundaries[0]?.sortKeyFrom).toBeUndefined()
-    expect(boundaries[0]?.sortKeyTo).toBeUndefined()
-    expect(boundaries[0]?.estimatedBytes).toBe(5 * GiB)
-  })
-
-  test('large partition produces multiple sub-chunks with sort key ranges', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_time', type: 'DateTime', category: 'datetime' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '2025-01-01 00:00:00', max: '2025-01-31 00:00:00' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    expect(boundaries).toHaveLength(3)
-    for (const b of boundaries) {
-      expect(b.partitionId).toBe('202501')
-      expect(b.sortKeyFrom).toBeDefined()
-      expect(b.sortKeyTo).toBeDefined()
-    }
-  })
-
-  test('large partition without sort key produces single chunk', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-    })
-
-    expect(boundaries).toHaveLength(1)
-    expect(boundaries[0]?.estimatedBytes).toBe(30 * GiB)
-  })
-
-  test('mixed sizes produce correct boundary counts', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 500, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-      { partitionId: '202502', rows: 5000, bytesOnDisk: 25 * GiB, minTime: '2025-02-01T00:00:00.000Z', maxTime: '2025-02-28T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_time', type: 'DateTime', category: 'datetime' }
-    const sortKeyRanges = new Map([
-      ['202502', { min: '2025-02-01 00:00:00', max: '2025-02-28 00:00:00' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    // First partition: 5 GiB < 10 GiB -> 1 boundary
-    // Second partition: 25 GiB / 10 GiB = 3 sub-boundaries
-    expect(boundaries).toHaveLength(4)
-
-    const p1 = boundaries.filter((b) => b.partitionId === '202501')
-    const p2 = boundaries.filter((b) => b.partitionId === '202502')
-    expect(p1).toHaveLength(1)
-    expect(p2).toHaveLength(3)
-  })
-
-  test('large partition with min === max sort key produces single chunk', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_type', type: 'String', category: 'string' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: 'click', max: 'click' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    expect(boundaries).toHaveLength(1)
-    expect(boundaries[0]?.partitionId).toBe('202501')
-    expect(boundaries[0]?.sortKeyFrom).toBeUndefined()
-    expect(boundaries[0]?.sortKeyTo).toBeUndefined()
-  })
-
-  test('numeric sort key produces numeric range sub-chunks', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 20 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'id', type: 'UInt64', category: 'numeric' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '100', max: '200' }],
-    ])
-
-    const boundaries = buildChunkBoundaries({
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-    })
-
-    expect(boundaries).toHaveLength(2)
-    expect(boundaries[0]?.sortKeyFrom).toBe('100')
-    expect(boundaries[0]?.sortKeyTo).toBe('150')
-    expect(boundaries[1]?.sortKeyFrom).toBe('150')
-    expect(boundaries[1]?.sortKeyTo).toBe('201')
-  })
-})
diff --git a/packages/plugin-backfill/src/chunking/build.ts b/packages/plugin-backfill/src/chunking/build.ts
deleted file mode 100644
index cc6693b..0000000
--- a/packages/plugin-backfill/src/chunking/build.ts
+++ /dev/null
@@ -1,60 +0,0 @@
-import { splitSortKeyRange } from './splitter.js'
-import type { ChunkBoundary, PartitionInfo, SortKeyInfo } from './types.js'
-
-export function buildChunkBoundaries(input: {
-  partitions: PartitionInfo[]
-  maxChunkBytes: number
-  sortKey?: SortKeyInfo
-  sortKeyRanges?: Map<string, { min: string; max: string }>
-}): ChunkBoundary[] {
-  const boundaries: ChunkBoundary[] = []
-
-  for (const partition of input.partitions) {
-    if (partition.bytesOnDisk <= input.maxChunkBytes) {
-      boundaries.push({
-        partitionId: partition.partitionId,
-        estimatedBytes: partition.bytesOnDisk,
-      })
-    } else if (input.sortKey && input.sortKeyRanges) {
-      const range = input.sortKeyRanges.get(partition.partitionId)
-      if (!range) {
-        // No range data — emit as single chunk
-        boundaries.push({
-          partitionId: partition.partitionId,
-          estimatedBytes: partition.bytesOnDisk,
-        })
-        continue
-      }
-
-      // If min === max, splitting would produce empty sub-ranges; emit as single chunk
-      if (range.min === range.max) {
-        boundaries.push({
-          partitionId: partition.partitionId,
-          estimatedBytes: partition.bytesOnDisk,
-        })
-        continue
-      }
-
-      const subCount = Math.ceil(partition.bytesOnDisk / input.maxChunkBytes)
-      const subRanges = splitSortKeyRange(input.sortKey.category, range.min, range.max, subCount)
-      const estimatedBytesPerSub = Math.ceil(partition.bytesOnDisk / subCount)
-
-      for (const sub of subRanges) {
-        boundaries.push({
-          partitionId: partition.partitionId,
-          sortKeyFrom: sub.from,
-          sortKeyTo: sub.to,
-          estimatedBytes: estimatedBytesPerSub,
-        })
-      }
-    } else {
-      // No sort key info — emit as single chunk despite being oversized
-      boundaries.push({
-        partitionId: partition.partitionId,
-        estimatedBytes: partition.bytesOnDisk,
-      })
-    }
-  }
-
-  return boundaries
-}
diff --git a/packages/plugin-backfill/src/chunking/introspect.test.ts b/packages/plugin-backfill/src/chunking/introspect.test.ts
deleted file mode 100644
index b40822b..0000000
--- a/packages/plugin-backfill/src/chunking/introspect.test.ts
+++ /dev/null
@@ -1,274 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { introspectTable, queryPartitionInfo, querySortKeyInfo, querySortKeyRanges, querySortKeys } from './introspect.js'
-
-describe('queryPartitionInfo', () => {
-  test('maps system.parts rows to PartitionInfo array', async () => {
-    const mockRows = [
-      { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-      { partition_id: '202502', total_rows: '2000', total_bytes: '8000000', min_time: '2025-02-01 00:00:00', max_time: '2025-02-28 23:59:59' },
-    ]
-
-    const result = await queryPartitionInfo({
-      database: 'default',
-      table: 'events',
-      query: async () => mockRows as never,
-    })
-
-    expect(result).toHaveLength(2)
-    expect(result[0]?.partitionId).toBe('202501')
-    expect(result[0]?.rows).toBe(1000)
-    expect(result[0]?.bytesOnDisk).toBe(5000000)
-    expect(result[1]?.partitionId).toBe('202502')
-    expect(result[1]?.rows).toBe(2000)
-  })
-
-  test('filters out partitions before --from', async () => {
-    const mockRows = [
-      { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-      { partition_id: '202503', total_rows: '3000', total_bytes: '9000000', min_time: '2025-03-01 00:00:00', max_time: '2025-03-31 23:59:59' },
-    ]
-
-    const result = await queryPartitionInfo({
-      database: 'default',
-      table: 'events',
-      from: '2025-02-01T00:00:00.000Z',
-      query: async () => mockRows as never,
-    })
-
-    expect(result).toHaveLength(1)
-    expect(result[0]?.partitionId).toBe('202503')
-  })
-
-  test('filters out partitions at or after --to', async () => {
-    const mockRows = [
-      { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-      { partition_id: '202503', total_rows: '3000', total_bytes: '9000000', min_time: '2025-03-01 00:00:00', max_time: '2025-03-31 23:59:59' },
-    ]
-
-    const result = await queryPartitionInfo({
-      database: 'default',
-      table: 'events',
-      to: '2025-03-01T00:00:00.000Z',
-      query: async () => mockRows as never,
-    })
-
-    expect(result).toHaveLength(1)
-    expect(result[0]?.partitionId).toBe('202501')
-  })
-})
-
-describe('querySortKeyInfo', () => {
-  test('returns sort key info for table with DateTime sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: 'event_time' }] as T[]
-      }
-      if (sql.includes('system.columns')) {
-        return [{ type: 'DateTime' }] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({
-      database: 'default',
-      table: 'events',
-      query,
-    })
-
-    expect(result).toBeDefined()
-    expect(result?.column).toBe('event_time')
-    expect(result?.type).toBe('DateTime')
-    expect(result?.category).toBe('datetime')
-  })
-
-  test('returns numeric category for Int64 sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'id' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'Int64' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.category).toBe('numeric')
-  })
-
-  test('returns string category for String sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'name' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'String' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.category).toBe('string')
-  })
-
-  test('extracts column name from function expression', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'toDate(event_time)' }] as T[]
-      if (sql.includes('system.columns')) return [{ type: 'DateTime' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.column).toBe('event_time')
-  })
-
-  test('returns undefined when table has no sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: '' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result).toBeUndefined()
-  })
-
-  test('returns first column from multi-column sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) return [{ sorting_key: 'event_time, id' }] as T[]
-      if (sql.includes('system.columns')) return [{ name: 'event_time', type: 'DateTime' }, { name: 'id', type: 'UInt64' }] as T[]
-      return [] as T[]
-    }
-
-    const result = await querySortKeyInfo({ database: 'default', table: 'events', query })
-
-    expect(result?.column).toBe('event_time')
-  })
-
-  test('extracts a single referenced column from function expressions with commas', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: 'toStartOfInterval(ts, INTERVAL 5 MINUTE), user_id' }] as T[]
-      }
-      if (sql.includes('system.columns')) {
-        return [
-          { name: 'ts', type: 'DateTime' },
-          { name: 'user_id', type: 'String' },
-        ] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await querySortKeys({ database: 'default', table: 'events', query })
-
-    expect(result.map((key) => key.column)).toEqual(['ts', 'user_id'])
-    expect(result.map((key) => key.category)).toEqual(['datetime', 'string'])
-  })
-
-  test('skips ambiguous tuple expressions that do not map to one physical column', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: 'tuple(user_id, session_id), event_time' }] as T[]
-      }
-      if (sql.includes('system.columns')) {
-        return [
-          { name: 'user_id', type: 'String' },
-          { name: 'session_id', type: 'String' },
-          { name: 'event_time', type: 'DateTime' },
-        ] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await querySortKeys({ database: 'default', table: 'events', query })
-
-    expect(result.map((key) => key.column)).toEqual(['event_time'])
-  })
-})
-
-describe('querySortKeyRanges', () => {
-  test('returns min/max per partition', async () => {
-    const query = async <T>() => {
-      return [
-        { partition_id: '202501', min_val: '2025-01-01 00:00:00', max_val: '2025-01-31 23:59:59' },
-        { partition_id: '202502', min_val: '2025-02-01 00:00:00', max_val: '2025-02-28 23:59:59' },
-      ] as T[]
-    }
-
-    const result = await querySortKeyRanges({
-      database: 'default',
-      table: 'events',
-      sortKeyColumn: 'event_time',
-      partitionIds: ['202501', '202502'],
-      query,
-    })
-
-    expect(result.size).toBe(2)
-    expect(result.get('202501')?.min).toBe('2025-01-01 00:00:00')
-    expect(result.get('202502')?.max).toBe('2025-02-28 23:59:59')
-  })
-
-  test('returns empty map for empty partition list', async () => {
-    const query = async <T>() => [] as T[]
-
-    const result = await querySortKeyRanges({
-      database: 'default',
-      table: 'events',
-      sortKeyColumn: 'event_time',
-      partitionIds: [],
-      query,
-    })
-
-    expect(result.size).toBe(0)
-  })
-})
-
-describe('introspectTable', () => {
-  test('returns partitions and sort key in a single call', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.parts')) {
-        return [
-          { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-        ] as T[]
-      }
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: 'event_time' }] as T[]
-      }
-      if (sql.includes('system.columns')) {
-        return [{ type: 'DateTime' }] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await introspectTable({
-      database: 'default',
-      table: 'events',
-      query,
-    })
-
-    expect(result.partitions).toHaveLength(1)
-    expect(result.partitions[0]?.partitionId).toBe('202501')
-    expect(result.sortKey).toBeDefined()
-    expect(result.sortKey?.column).toBe('event_time')
-    expect(result.sortKey?.category).toBe('datetime')
-  })
-
-  test('returns undefined sortKey when table has no sorting key', async () => {
-    const query = async <T>(sql: string) => {
-      if (sql.includes('system.parts')) {
-        return [
-          { partition_id: '202501', total_rows: '1000', total_bytes: '5000000', min_time: '2025-01-01 00:00:00', max_time: '2025-01-31 23:59:59' },
-        ] as T[]
-      }
-      if (sql.includes('system.tables')) {
-        return [{ sorting_key: '' }] as T[]
-      }
-      return [] as T[]
-    }
-
-    const result = await introspectTable({
-      database: 'default',
-      table: 'events',
-      query,
-    })
-
-    expect(result.partitions).toHaveLength(1)
-    expect(result.sortKey).toBeUndefined()
-  })
-})
diff --git a/packages/plugin-backfill/src/chunking/introspect.ts b/packages/plugin-backfill/src/chunking/introspect.ts
deleted file mode 100644
index 16f0c79..0000000
--- a/packages/plugin-backfill/src/chunking/introspect.ts
+++ /dev/null
@@ -1,262 +0,0 @@
-import type { PartitionInfo, SortKeyInfo } from './types.js'
-
-const NUMERIC_TYPES = new Set([
-  'Int8', 'Int16', 'Int32', 'Int64', 'Int128', 'Int256',
-  'UInt8', 'UInt16', 'UInt32', 'UInt64', 'UInt128', 'UInt256',
-  'Float32', 'Float64',
-])
-
-const DATETIME_TYPES = new Set(['Date', 'Date32', 'DateTime', 'DateTime64'])
-
-function classifySortKeyType(type: string): SortKeyInfo['category'] {
-  if (NUMERIC_TYPES.has(type)) return 'numeric'
-  if (DATETIME_TYPES.has(type)) return 'datetime'
-  if (type.startsWith('DateTime64(')) return 'datetime'
-  if (type.startsWith("DateTime('")) return 'datetime'
-  return 'string'
-}
-
-export async function queryPartitionInfo(input: {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<PartitionInfo[]> {
-  // Force replica sync on the target table before reading system.parts.
-  // select_sequential_consistency is only effective on user tables, not system
-  // tables, so this preliminary query ensures the replica has caught up with
-  // all pending writes before we inspect part metadata.
-  await input.query(
-    `SELECT 1 FROM ${input.database}.${input.table} LIMIT 1 SETTINGS select_sequential_consistency = 1`
-  )
-
-  const rows = await input.query<{
-    partition_id: string
-    total_rows: string
-    total_bytes: string
-    total_uncompressed_bytes?: string
-    min_time: string
-    max_time: string
-  }>(
-    `SELECT
-  partition_id,
-  toString(sum(rows)) AS total_rows,
-  toString(sum(bytes_on_disk)) AS total_bytes,
-  toString(sum(data_uncompressed_bytes)) AS total_uncompressed_bytes,
-  toString(min(min_time)) AS min_time,
-  toString(max(max_time)) AS max_time
-FROM system.parts
-WHERE database = '${input.database}'
-  AND table = '${input.table}'
-  AND active = 1
-GROUP BY partition_id
-ORDER BY partition_id
-SETTINGS select_sequential_consistency = 1`
-  )
-
-  const partitions: PartitionInfo[] = rows.map((row) => ({
-    partitionId: row.partition_id,
-    rows: Number(row.total_rows),
-    bytesOnDisk: Number(row.total_bytes),
-    bytesUncompressed: Number(row.total_uncompressed_bytes ?? row.total_bytes),
-    minTime: new Date(row.min_time).toISOString(),
-    maxTime: new Date(row.max_time).toISOString(),
-  }))
-
-  return partitions.filter((p) => {
-    if (input.from && p.maxTime < input.from) return false
-    if (input.to && p.minTime >= input.to) return false
-    return true
-  })
-}
-
-function extractSortKeyColumns(sortingKey: string): string[] {
-  return splitTopLevelCsv(sortingKey)
-    .map((part) => part.trim())
-    .filter((part): part is string => part.length > 0)
-}
-
-function splitTopLevelCsv(input: string): string[] {
-  const parts: string[] = []
-  let current = ''
-  let depth = 0
-  let quote: "'" | '"' | undefined
-
-  for (let index = 0; index < input.length; index++) {
-    const char = input[index]
-    if (char === undefined) continue
-
-    if (quote) {
-      current += char
-      if (char === quote && input[index - 1] !== '\\') {
-        quote = undefined
-      }
-      continue
-    }
-
-    if (char === '\'' || char === '"') {
-      quote = char
-      current += char
-      continue
-    }
-
-    if (char === '(') {
-      depth += 1
-      current += char
-      continue
-    }
-
-    if (char === ')') {
-      depth = Math.max(0, depth - 1)
-      current += char
-      continue
-    }
-
-    if (char === ',' && depth === 0) {
-      parts.push(current.trim())
-      current = ''
-      continue
-    }
-
-    current += char
-  }
-
-  if (current.trim().length > 0) {
-    parts.push(current.trim())
-  }
-
-  return parts
-}
-
-function resolveSortKeyColumn(expression: string, knownColumns: Set<string>): string | undefined {
-  const trimmed = expression.trim()
-  if (knownColumns.has(trimmed)) {
-    return trimmed
-  }
-
-  const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
-    .map((match) => match[0])
-    .filter((identifier): identifier is string => Boolean(identifier))
-
-  const matches = Array.from(new Set(identifiers.filter((identifier) => knownColumns.has(identifier))))
-  if (matches.length === 1) {
-    return matches[0]
-  }
-
-  return undefined
-}
-
-function resolveSortKeyColumnWithoutSchema(expression: string): string | undefined {
-  const trimmed = expression.trim()
-  if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(trimmed)) {
-    return trimmed
-  }
-
-  const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
-    .map((match) => match[0])
-    .filter((identifier): identifier is string => Boolean(identifier))
-
-  return identifiers.length > 0 ? identifiers[identifiers.length - 1] : undefined
-}
-
-export async function querySortKeys(input: {
-  database: string
-  table: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<SortKeyInfo[]> {
-  const tableRows = await input.query<{ sorting_key: string }>(
-    `SELECT sorting_key FROM system.tables WHERE database = '${input.database}' AND name = '${input.table}'`
-  )
-
-  const sortingKey = tableRows[0]?.sorting_key
-  if (!sortingKey) return []
-
-  const expressions = extractSortKeyColumns(sortingKey)
-  if (expressions.length === 0) return []
-
-  const columnRows = await input.query<{ name?: string; type: string }>(
-    `SELECT name, type FROM system.columns WHERE database = '${input.database}' AND table = '${input.table}'`
-  )
-  const typeByName = new Map(
-    columnRows
-      .filter((row): row is { name: string; type: string } => Boolean(row.name))
-      .map((row) => [row.name, row.type])
-  )
-  const knownColumns = new Set(typeByName.keys())
-
-  if (knownColumns.size === 0) {
-    return expressions.flatMap((expression, index) => {
-      const column = resolveSortKeyColumnWithoutSchema(expression)
-      const type = columnRows[index]?.type ?? columnRows[0]?.type
-      if (!column || !type) return []
-
-      return [{
-        column,
-        type,
-        category: classifySortKeyType(type),
-      }]
-    })
-  }
-
-  return expressions.flatMap((expression) => {
-    const column = resolveSortKeyColumn(expression, knownColumns)
-    if (!column) return []
-    const type = typeByName.get(column) ?? 'String'
-    return [{
-      column,
-      type,
-      category: classifySortKeyType(type),
-    }]
-  })
-}
-
-export async function querySortKeyInfo(input: {
-  database: string
-  table: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<SortKeyInfo | undefined> {
-  return (await querySortKeys(input))[0]
-}
-
-export async function querySortKeyRanges(input: {
-  database: string
-  table: string
-  sortKeyColumn: string
-  partitionIds: string[]
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<Map<string, { min: string; max: string }>> {
-  if (input.partitionIds.length === 0) return new Map()
-
-  const inList = input.partitionIds.map((id) => `'${id}'`).join(', ')
-  const rows = await input.query<{
-    partition_id: string
-    min_val: string
-    max_val: string
-  }>(
-    `SELECT _partition_id AS partition_id, toString(min(${input.sortKeyColumn})) AS min_val, toString(max(${input.sortKeyColumn})) AS max_val FROM ${input.database}.${input.table} WHERE _partition_id IN (${inList}) GROUP BY _partition_id SETTINGS select_sequential_consistency = 1`
-  )
-
-  const result = new Map<string, { min: string; max: string }>()
-  for (const row of rows) {
-    result.set(row.partition_id, { min: row.min_val, max: row.max_val })
-  }
-  return result
-}
-
-export async function introspectTable(input: {
-  database: string
-  table: string
-  from?: string
-  to?: string
-  query: <T>(sql: string) => Promise<T[]>
-}): Promise<{ partitions: PartitionInfo[]; sortKey?: SortKeyInfo; sortKeys: SortKeyInfo[] }> {
-  const partitions = await queryPartitionInfo(input)
-  const sortKeys = await querySortKeys({
-    database: input.database,
-    table: input.table,
-    query: input.query,
-  })
-
-  return { partitions, sortKey: sortKeys[0], sortKeys }
-}
diff --git a/packages/plugin-backfill/src/chunking/partition-slices.ts b/packages/plugin-backfill/src/chunking/partition-slices.ts
new file mode 100644
index 0000000..18099b0
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/partition-slices.ts
@@ -0,0 +1,153 @@
+import type {
+  ChunkEstimate,
+  EstimateConfidence,
+  EstimateReason,
+  Partition,
+  PartitionSlice,
+  ChunkDerivationStep,
+  ChunkRange,
+} from './types.js'
+
+export function buildRootSlice(partition: Partition): PartitionSlice {
+  return {
+    partitionId: partition.partitionId,
+    ranges: [],
+    estimate: {
+      rows: partition.rows,
+      bytesCompressed: partition.bytesCompressed,
+      bytesUncompressed: partition.bytesUncompressed,
+      confidence: 'high',
+      reason: 'partition-metadata',
+    },
+    analysis: {
+      lineage: [],
+    },
+  }
+}
+
+export function buildSliceEstimate(
+  partition: Partition,
+  rows: number,
+  confidence: EstimateConfidence,
+  reason: EstimateReason,
+): ChunkEstimate {
+  const bytesCompressed = partition.rows > 0
+    ? Math.round((rows / partition.rows) * partition.bytesCompressed)
+    : 0
+  const bytesUncompressed = partition.rows > 0
+    ? Math.round((rows / partition.rows) * partition.bytesUncompressed)
+    : 0
+
+  return {
+    rows,
+    bytesCompressed,
+    bytesUncompressed,
+    confidence,
+    reason,
+  }
+}
+
+export function buildSliceFromRows(
+  partition: Partition,
+  input: {
+    ranges: ChunkRange[]
+    rows: number
+    focusedValue?: PartitionSlice['analysis']['focusedValue']
+    confidence: EstimateConfidence
+    reason: EstimateReason
+    lineage: ChunkDerivationStep[]
+  },
+): PartitionSlice {
+  return {
+    partitionId: partition.partitionId,
+    ranges: input.ranges,
+    estimate: buildSliceEstimate(partition, input.rows, input.confidence, input.reason),
+    analysis: {
+      focusedValue: input.focusedValue,
+      lineage: input.lineage,
+    },
+  }
+}
+
+export function getTargetChunkRows(
+  partition: Partition,
+  targetChunkBytes: number,
+): number {
+  if (partition.bytesCompressed <= 0) return partition.rows
+  return (targetChunkBytes * partition.rows) / partition.bytesCompressed
+}
+
+export function mergeAdjacentSlices(
+  slices: PartitionSlice[],
+  targetChunkBytes: number,
+): PartitionSlice[] {
+  if (slices.length <= 1) return slices
+
+  const merged: PartitionSlice[] = []
+  let current: PartitionSlice | undefined
+
+  for (const slice of slices) {
+    if (!current) {
+      current = slice
+      continue
+    }
+
+    const canMerge =
+      !current.analysis.focusedValue &&
+      !slice.analysis.focusedValue &&
+      haveSameTrailingRanges(current.ranges, slice.ranges) &&
+      current.estimate.bytesCompressed + slice.estimate.bytesCompressed <= targetChunkBytes * 1.1
+
+    if (!canMerge) {
+      merged.push(current)
+      current = slice
+      continue
+    }
+
+    current = {
+      ...current,
+      ranges: mergeRanges(current.ranges, slice.ranges),
+      estimate: {
+        ...current.estimate,
+        rows: current.estimate.rows + slice.estimate.rows,
+        bytesCompressed: current.estimate.bytesCompressed + slice.estimate.bytesCompressed,
+        bytesUncompressed: current.estimate.bytesUncompressed + slice.estimate.bytesUncompressed,
+      },
+    }
+  }
+
+  if (current) merged.push(current)
+  return merged
+}
+
+function mergeRanges(left: ChunkRange[], right: ChunkRange[]): ChunkRange[] {
+  return left.map((leftRange) => {
+    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
+    return rightRange === undefined
+      ? leftRange
+      : {
+        dimensionIndex: leftRange.dimensionIndex,
+        from: leftRange.from,
+        to: rightRange.to,
+      }
+  })
+}
+
+function haveSameTrailingRanges(left: ChunkRange[], right: ChunkRange[]): boolean {
+  if (left.length !== right.length) return false
+
+  let differingDimensions = 0
+
+  for (const leftRange of left) {
+    const rightRange = right.find((candidate) => candidate.dimensionIndex === leftRange.dimensionIndex)
+    if (!rightRange) return false
+
+    const same = leftRange.from === rightRange.from && leftRange.to === rightRange.to
+    if (!same) {
+      differingDimensions += 1
+      if (leftRange.to !== rightRange.from) return false
+    }
+  }
+
+  return differingDimensions <= 1
+}
diff --git a/packages/plugin-backfill/src/chunking/planner.ts b/packages/plugin-backfill/src/chunking/planner.ts
new file mode 100644
index 0000000..116d3a9
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/planner.ts
@@ -0,0 +1,358 @@
+import { buildRootSlice, mergeAdjacentSlices } from './partition-slices.js'
+import { introspectPartitions, introspectSortKeys } from './services/metadata-source.js'
+import { getRowProbeStrategy, getSortKeyRange, parsePlannerDateTime } from './services/row-probe.js'
+import { splitSliceWithEqualWidthRanges } from './strategies/equal-width-split.js'
+import { buildSingleChunkPartition } from './strategies/metadata-single-chunk.js'
+import {
+  findQuantileBoundaryOnDimension,
+  splitSliceWithQuantiles,
+} from './strategies/quantile-range-split.js'
+import { refinePartitionSlices } from './strategies/refinement.js'
+import { buildRootStringUpperBound, splitSliceWithStringPrefixes } from './strategies/string-prefix-split.js'
+import { splitSliceWithTemporalBuckets } from './strategies/temporal-bucket-split.js'
+import { getCandidateDimensions } from './strategy-policy.js'
+import type {
+  Chunk,
+  ChunkPlan,
+  GenerateChunkPlanInput,
+  Partition,
+  PartitionBuildResult,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+  TableProfile,
+} from './types.js'
+import { generateChunkId, generatePlanId } from './utils/ids.js'
+import { getChunkRange, isExactChunkRange, replaceChunkRange } from './utils/ranges.js'
+
+const MAX_SPLIT_DEPTH_MULTIPLIER = 3
+const STOP_SPLIT_FUZZ_FACTOR = 1.5
+
+export async function generateChunkPlan(input: GenerateChunkPlanInput): Promise<ChunkPlan> {
+  const context: PlannerContext = {
+    database: input.database,
+    table: input.table,
+    from: input.from,
+    to: input.to,
+    targetChunkBytes: input.targetChunkBytes,
+    query: input.query,
+    rowProbeStrategy: input.rowProbeStrategy ?? 'count',
+  }
+
+  const partitions = await introspectPartitions(context)
+  const sortKeys = await introspectSortKeys(context)
+  const table: TableProfile = {
+    database: input.database,
+    table: input.table,
+    sortKeys,
+  }
+  const planId = generatePlanId()
+
+  const slices: PartitionSlice[] = []
+  const plannedPartitions: Partition[] = []
+  for (const partition of partitions) {
+    const result = await planPartition(context, partition, table)
+    slices.push(...result.slices)
+    plannedPartitions.push({
+      ...partition,
+      diagnostics: result.diagnostics,
+    })
+  }
+
+  const chunks = assignChunkIds(planId, slices)
+  const chunkBytes = chunks.map((chunk) => chunk.estimate.bytesCompressed)
+
+  return {
+    planId,
+    generatedAt: new Date().toISOString(),
+    rowProbeStrategy: getRowProbeStrategy(context),
+    targetChunkBytes: context.targetChunkBytes,
+    table,
+    partitions: plannedPartitions,
+    chunks,
+    totalRows: partitions.reduce((sum, partition) => sum + partition.rows, 0),
+    totalBytesCompressed: partitions.reduce((sum, partition) => sum + partition.bytesCompressed, 0),
+    totalBytesUncompressed: partitions.reduce((sum, partition) => sum + partition.bytesUncompressed, 0),
+    stats: {
+      totalPartitions: partitions.length,
+      oversizedPartitions: partitions.filter((partition) => partition.bytesCompressed > context.targetChunkBytes).length,
+      focusedChunks: chunks.filter((chunk) => chunk.analysis.focusedValue !== undefined).length,
+      totalChunks: chunks.length,
+      avgChunkBytes: chunkBytes.length > 0
+        ? Math.round(chunkBytes.reduce((sum, value) => sum + value, 0) / chunkBytes.length)
+        : 0,
+      maxChunkBytes: chunkBytes.length > 0 ? Math.max(...chunkBytes) : 0,
+      minChunkBytes: chunkBytes.length > 0 ? Math.min(...chunkBytes) : 0,
+    },
+  }
+}
+
+async function planPartition(
+  context: PlannerContext,
+  partition: Partition,
+  table: TableProfile,
+): Promise<PartitionBuildResult> {
+  if (partition.bytesCompressed <= context.targetChunkBytes || table.sortKeys.length === 0) {
+    return refinePartitionSlices(
+      context,
+      partition,
+      buildSingleChunkPartition(partition),
+      table.sortKeys,
+      false
+    )
+  }
+
+  const rootSlice = buildRootSlice(partition)
+  const splitSlices = await splitSliceRecursively(context, partition, rootSlice, table.sortKeys, 0)
+  const mergedSlices = mergeAdjacentSlices(splitSlices, context.targetChunkBytes)
+  const usedDistributionFallback = mergedSlices.some((slice) =>
+    slice.estimate.reason === 'string-prefix-distribution' ||
+    slice.estimate.reason === 'temporal-distribution' ||
+    slice.estimate.reason === 'equal-width-distribution'
+  )
+
+  return refinePartitionSlices(
+    context,
+    partition,
+    mergedSlices,
+    table.sortKeys,
+    usedDistributionFallback
+  )
+}
+
+async function splitSliceRecursively(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  depth: number,
+): Promise<PartitionSlice[]> {
+  if (slice.estimate.bytesCompressed <= context.targetChunkBytes * STOP_SPLIT_FUZZ_FACTOR) {
+    return [slice]
+  }
+
+  if (depth >= sortKeys.length * MAX_SPLIT_DEPTH_MULTIPLIER) {
+    return [slice]
+  }
+
+  const children = await splitOversizedSlice(context, partition, slice, sortKeys, depth)
+  if (children.length <= 1) {
+    return [slice]
+  }
+
+  const finalized: PartitionSlice[] = []
+  for (const child of children) {
+    finalized.push(...(await splitSliceRecursively(context, partition, child, sortKeys, depth + 1)))
+  }
+
+  return finalized
+}
+
+async function splitOversizedSlice(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  depth: number,
+): Promise<PartitionSlice[]> {
+  const candidateDimensions = getCandidateDimensions(sortKeys, slice)
+
+  for (const dimensionIndex of candidateDimensions) {
+    const preparedSlice = await hydrateSliceRange(context, slice, sortKeys, dimensionIndex)
+    if (!preparedSlice) continue
+
+    const sortKey = sortKeys[dimensionIndex]
+    if (!sortKey) continue
+
+    const rootLike = depth === 0
+    const focusedValue = findFocusedValue(preparedSlice, sortKeys)
+
+    if (sortKey.category === 'string') {
+      const stringSlices = await splitSliceWithStringPrefixes(context, partition, preparedSlice, sortKeys, dimensionIndex)
+      if (isEffectiveSplit(preparedSlice, stringSlices)) {
+        return applyFocusedValue(stringSlices, focusedValue)
+      }
+    }
+
+    if (sortKey.category === 'datetime' && (!rootLike || focusedValue !== undefined)) {
+      const temporalSlices = await splitSliceWithTemporalBuckets(
+        context,
+        partition,
+        markFocusedSlice(preparedSlice, focusedValue),
+        sortKeys,
+        dimensionIndex
+      )
+      if (isEffectiveSplit(preparedSlice, temporalSlices)) {
+        return applyFocusedValue(temporalSlices, focusedValue)
+      }
+    }
+
+    const rangedSlices = await splitWithRanges(context, partition, preparedSlice, sortKeys, dimensionIndex)
+    if (isEffectiveSplit(preparedSlice, rangedSlices)) {
+      return applyFocusedValue(rangedSlices, focusedValue)
+    }
+  }
+
+  return [slice]
+}
+
+async function splitWithRanges(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return [slice]
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return [slice]
+  if (sortKey.category === 'string' && isExactChunkRange(range)) return [slice]
+
+  const subCount = Math.ceil(slice.estimate.bytesCompressed / context.targetChunkBytes)
+  if (subCount <= 1) return [slice]
+
+  const quantileBoundaries = await buildQuantileBoundaries(context, slice, sortKeys, dimensionIndex, subCount)
+  if (quantileBoundaries) {
+    return splitSliceWithQuantiles(context, partition, slice, sortKeys, dimensionIndex, quantileBoundaries)
+  }
+
+  return splitSliceWithEqualWidthRanges(
+    context,
+    partition,
+    slice,
+    sortKeys,
+    dimensionIndex,
+    range.from,
+    range.to,
+    subCount
+  )
+}
+
+async function buildQuantileBoundaries(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  subCount: number,
+): Promise<string[] | undefined> {
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return undefined
+
+  const boundaries: string[] = [range.from]
+  for (let step = 1; step < subCount; step++) {
+    const targetCumRows = Math.round((slice.estimate.rows * step) / subCount)
+    const boundary = await findQuantileBoundaryOnDimension(
+      context,
+      slice,
+      sortKeys,
+      dimensionIndex,
+      targetCumRows
+    )
+    boundaries.push(boundary)
+  }
+
+  const uniqueBoundaryCount = new Set(boundaries).size
+  if (uniqueBoundaryCount <= Math.max(2, Math.ceil(subCount / 3))) {
+    return undefined
+  }
+
+  return boundaries.concat([range.to])
+}
+
+async function hydrateSliceRange(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice | undefined> {
+  const existingRange = getChunkRange(slice, dimensionIndex)
+  if (existingRange.from !== undefined && existingRange.to !== undefined) {
+    return slice
+  }
+
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return undefined
+
+  const observedRange = await getSortKeyRange(context, slice.partitionId, slice.ranges, sortKeys, sortKey)
+  if (!observedRange) return undefined
+
+  return {
+    ...slice,
+    ranges: replaceChunkRange(
+      slice,
+      dimensionIndex,
+      observedRange.min,
+      toExclusiveUpperBound(observedRange.max, sortKey)
+    ),
+  }
+}
+
+function toExclusiveUpperBound(value: string, sortKey: SortKey): string {
+  if (sortKey.category === 'string') {
+    return buildRootStringUpperBound(value)
+  }
+  if (sortKey.category === 'datetime') {
+    return new Date(parsePlannerDateTime(value) + 1000).toISOString()
+  }
+  return String(Number(value) + 1)
+}
+
+function isEffectiveSplit(parentSlice: PartitionSlice, childSlices: PartitionSlice[]): boolean {
+  if (childSlices.length <= 1) return false
+
+  return childSlices.some((childSlice) =>
+    childSlice.estimate.rows !== parentSlice.estimate.rows ||
+    JSON.stringify(childSlice.ranges) !== JSON.stringify(parentSlice.ranges)
+  )
+}
+
+function findFocusedValue(
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+): { dimensionIndex: number; value: string } | undefined {
+  for (const range of slice.ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (sortKey?.category !== 'string') continue
+    if (isExactChunkRange(range) && range.from !== undefined) {
+      return { dimensionIndex: range.dimensionIndex, value: range.from }
+    }
+  }
+  return undefined
+}
+
+function applyFocusedValue(
+  slices: PartitionSlice[],
+  focusedValue: { dimensionIndex: number; value: string } | undefined,
+): PartitionSlice[] {
+  if (!focusedValue) return slices
+  return slices.map((slice) => markFocusedSlice(slice, focusedValue))
+}
+
+function markFocusedSlice(
+  slice: PartitionSlice,
+  focusedValue: { dimensionIndex: number; value: string } | undefined,
+): PartitionSlice {
+  if (!focusedValue) return slice
+  return {
+    ...slice,
+    analysis: {
+      ...slice.analysis,
+      focusedValue,
+    },
+  }
+}
+
+function assignChunkIds(planId: string, slices: PartitionSlice[]): Chunk[] {
+  const chunkIndexes = new Map<string, number>()
+
+  return slices.map((slice) => {
+    const currentIndex = chunkIndexes.get(slice.partitionId) ?? 0
+    chunkIndexes.set(slice.partitionId, currentIndex + 1)
+    return {
+      ...slice,
+      id: generateChunkId(planId, slice.partitionId, currentIndex),
+    }
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/services/distribution-source.ts b/packages/plugin-backfill/src/chunking/services/distribution-source.ts
new file mode 100644
index 0000000..98c56ad
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/services/distribution-source.ts
@@ -0,0 +1,66 @@
+import { buildWhereClauseFromRanges } from '../sql.js'
+import type {
+  ChunkRange,
+  PlannerContext,
+  SortKey,
+  StringPrefixBucket,
+  TemporalBucket,
+} from '../types.js'
+
+export async function probeStringPrefixDistribution(
+  context: Pick<PlannerContext, 'database' | 'table' | 'query'>,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKey: SortKey,
+  dimensionIndex: number,
+  depth: number,
+  sortKeys: SortKey[],
+): Promise<StringPrefixBucket[]> {
+  const range = ranges.find((candidate) => candidate.dimensionIndex === dimensionIndex)
+  if (!range?.from || !range.to) return []
+
+  const rows = await context.query<{ prefix: string; cnt: string }>(`
+SELECT
+  substring(${sortKey.name}, 1, ${depth}) AS prefix,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}
+GROUP BY prefix
+ORDER BY prefix`)
+
+  return rows.map((row) => ({
+    value: row.prefix,
+    rowCount: Number(row.cnt),
+    isExactValue: Buffer.from(row.prefix, 'latin1').length < depth,
+  }))
+}
+
+export async function probeTemporalDistribution(
+  context: Pick<PlannerContext, 'database' | 'table' | 'query'>,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  grain: 'day' | 'hour',
+): Promise<TemporalBucket[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey || sortKey.category !== 'datetime') return []
+
+  const bucketExpression = grain === 'day'
+    ? `toStartOfDay(${sortKey.name})`
+    : `toStartOfHour(${sortKey.name})`
+
+  const rows = await context.query<{ bucket: string; cnt: string }>(`
+SELECT
+  formatDateTime(${bucketExpression}, '%Y-%m-%dT%H:%i:%sZ') AS bucket,
+  count() AS cnt
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}
+GROUP BY bucket
+ORDER BY bucket`)
+
+  return rows.map((row) => ({
+    start: row.bucket,
+    rowCount: Number(row.cnt),
+  }))
+}
diff --git a/packages/plugin-backfill/src/chunking/services/metadata-source.ts b/packages/plugin-backfill/src/chunking/services/metadata-source.ts
new file mode 100644
index 0000000..cea3a4d
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/services/metadata-source.ts
@@ -0,0 +1,163 @@
+import type { Partition, PlannerContext, SortKey, SortKeyCategory } from '../types.js'
+
+const NUMERIC_TYPES = /^(U?Int|Float|Decimal)/
+const DATETIME_TYPES = /^(Date|DateTime)/
+
+function classifySortKeyType(type: string): SortKeyCategory {
+  if (NUMERIC_TYPES.test(type)) return 'numeric'
+  if (DATETIME_TYPES.test(type)) return 'datetime'
+  return 'string'
+}
+
+function boundaryEncodingForCategory(category: SortKeyCategory): SortKey['boundaryEncoding'] {
+  return category === 'string' ? 'hex-latin1' : 'literal'
+}
+
+function splitTopLevelCsv(input: string): string[] {
+  const parts: string[] = []
+  let current = ''
+  let depth = 0
+  let quote: '\'' | '"' | undefined
+
+  for (let index = 0; index < input.length; index++) {
+    const char = input[index]
+    if (char === undefined) continue
+
+    if (quote) {
+      current += char
+      if (char === quote && input[index - 1] !== '\\') quote = undefined
+      continue
+    }
+
+    if (char === '\'' || char === '"') {
+      quote = char
+      current += char
+      continue
+    }
+
+    if (char === '(') {
+      depth += 1
+      current += char
+      continue
+    }
+
+    if (char === ')') {
+      depth = Math.max(0, depth - 1)
+      current += char
+      continue
+    }
+
+    if (char === ',' && depth === 0) {
+      parts.push(current.trim())
+      current = ''
+      continue
+    }
+
+    current += char
+  }
+
+  if (current.trim().length > 0) {
+    parts.push(current.trim())
+  }
+
+  return parts
+}
+
+function resolveSortKeyColumn(expression: string, knownColumns: Set<string>): string | undefined {
+  const trimmed = expression.trim()
+  if (knownColumns.has(trimmed)) return trimmed
+
+  const identifiers = Array.from(trimmed.matchAll(/\b[A-Za-z_][A-Za-z0-9_]*\b/g))
+    .map((match) => match[0])
+    .filter((identifier): identifier is string => Boolean(identifier))
+
+  const matches = Array.from(new Set(identifiers.filter((identifier) => knownColumns.has(identifier))))
+  if (matches.length === 1) return matches[0]
+  if (knownColumns.size === 0 && identifiers.length > 0) {
+    return identifiers[identifiers.length - 1]
+  }
+  return undefined
+}
+
+export async function introspectPartitions(context: PlannerContext): Promise<Partition[]> {
+  await context.query(
+    `SELECT 1 FROM ${context.database}.${context.table} LIMIT 1 SETTINGS select_sequential_consistency = 1`
+  )
+
+  const rows = await context.query<{
+    partition_id: string
+    total_rows: string
+    total_bytes: string
+    total_uncompressed_bytes?: string
+    min_time: string
+    max_time: string
+  }>(`SELECT
+  partition_id,
+  toString(sum(rows)) AS total_rows,
+  toString(sum(bytes_on_disk)) AS total_bytes,
+  toString(sum(data_uncompressed_bytes)) AS total_uncompressed_bytes,
+  toString(min(min_time)) AS min_time,
+  toString(max(max_time)) AS max_time
+FROM system.parts
+WHERE database = '${context.database}'
+  AND table = '${context.table}'
+  AND active = 1
+GROUP BY partition_id
+ORDER BY partition_id
+SETTINGS select_sequential_consistency = 1`)
+
+  return rows
+    .map((row) => ({
+      partitionId: row.partition_id,
+      rows: Number(row.total_rows),
+      bytesCompressed: Number(row.total_bytes),
+      bytesUncompressed: Number(row.total_uncompressed_bytes ?? row.total_bytes),
+      minTime: new Date(row.min_time).toISOString(),
+      maxTime: new Date(row.max_time).toISOString(),
+    }))
+    .filter((partition) => {
+      if (context.from && partition.maxTime < context.from) return false
+      if (context.to && partition.minTime >= context.to) return false
+      return true
+    })
+}
+
+export async function introspectSortKeys(context: PlannerContext): Promise<SortKey[]> {
+  const tableRows = await context.query<{ sorting_key: string }>(
+    `SELECT sorting_key FROM system.tables WHERE database = '${context.database}' AND name = '${context.table}'`
+  )
+
+  const sortingKey = tableRows[0]?.sorting_key
+  if (!sortingKey) return []
+
+  const expressions = splitTopLevelCsv(sortingKey)
+  if (expressions.length === 0) return []
+
+  const columnRows = await context.query<{ name?: string; type: string }>(
+    `SELECT name, type FROM system.columns WHERE database = '${context.database}' AND table = '${context.table}'`
+  )
+
+  const typeByName = new Map(
+    columnRows
+      .filter((row): row is { name: string; type: string } => Boolean(row.name))
+      .map((row) => [row.name, row.type])
+  )
+
+  const knownColumns = new Set(typeByName.keys())
+
+  return expressions.flatMap((expression, index) => {
+    const column = resolveSortKeyColumn(expression, knownColumns)
+    const type = column
+      ? typeByName.get(column) ?? columnRows[index]?.type ?? columnRows[0]?.type
+      : undefined
+    if (!column || !type) return []
+
+    const category = classifySortKeyType(type)
+    return [{
+      name: column,
+      type,
+      category,
+      boundaryEncoding: boundaryEncodingForCategory(category),
+    }]
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/services/row-probe.ts b/packages/plugin-backfill/src/chunking/services/row-probe.ts
new file mode 100644
index 0000000..989ce32
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/services/row-probe.ts
@@ -0,0 +1,102 @@
+import { buildCountSql, buildEstimateSql, buildWhereClauseFromRanges } from '../sql.js'
+import type {
+  ChunkRange,
+  EstimateFilter,
+  PlannerContext,
+  RowProbeStrategy,
+  SortKey,
+} from '../types.js'
+
+export function getRowProbeStrategy(context: Pick<PlannerContext, 'rowProbeStrategy'>): RowProbeStrategy {
+  return context.rowProbeStrategy
+}
+
+export async function estimateRows(
+  context: PlannerContext,
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+): Promise<number> {
+  if (getRowProbeStrategy(context) === 'count') {
+    return countRowsExact(context, filter, sortKeys)
+  }
+
+  const rows = await context.query<Record<string, string | number | undefined>>(
+    buildEstimateSql(filter, sortKeys, context, getRowProbeStrategy(context))
+  )
+
+  const firstRow = rows[0]
+  if (!firstRow) return 0
+
+  for (const [key, value] of Object.entries(firstRow)) {
+    if (!key.toLowerCase().includes('row')) continue
+    const parsed = Number(value ?? 0)
+    if (Number.isFinite(parsed)) return parsed
+  }
+
+  for (const value of Object.values(firstRow)) {
+    const parsed = Number(value ?? 0)
+    if (Number.isFinite(parsed)) return parsed
+  }
+
+  return 0
+}
+
+export async function countRowsExact(
+  context: Pick<PlannerContext, 'database' | 'table' | 'query'>,
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+): Promise<number> {
+  const rows = await context.query<{ cnt: string }>(buildCountSql(filter, sortKeys, context))
+  return Number(rows[0]?.cnt ?? 0)
+}
+
+export async function countRows(
+  context: Pick<PlannerContext, 'database' | 'table' | 'query'>,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): Promise<number> {
+  const filter: EstimateFilter = {
+    partitionId,
+    ranges,
+    exactDimensionIndex: undefined,
+    exactValue: undefined,
+  }
+  return countRowsExact(context, filter, sortKeys)
+}
+
+export async function countPartitionRows(
+  context: Pick<PlannerContext, 'database' | 'table' | 'query'>,
+  partitionId: string,
+): Promise<number> {
+  const rows = await context.query<{ cnt: string }>(
+    `SELECT count() AS cnt FROM ${context.database}.${context.table} WHERE _partition_id = '${partitionId}'`
+  )
+  return Number(rows[0]?.cnt ?? 0)
+}
+
+export async function getSortKeyRange(
+  context: Pick<PlannerContext, 'database' | 'table' | 'query'>,
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+  sortKey: SortKey,
+): Promise<{ min: string; max: string } | undefined> {
+  const rows = await context.query<{ minVal: string; maxVal: string }>(`
+SELECT
+  toString(min(${sortKey.name})) AS minVal,
+  toString(max(${sortKey.name})) AS maxVal
+FROM ${context.database}.${context.table}
+WHERE ${buildWhereClauseFromRanges(partitionId, ranges, sortKeys)}`)
+
+  if (rows.length === 0) return undefined
+  return {
+    min: rows[0]?.minVal ?? '',
+    max: rows[0]?.maxVal ?? '',
+  }
+}
+
+export function parsePlannerDateTime(value: string): number {
+  const normalized = value.includes('T') ? value : value.replace(' ', 'T')
+  return Date.parse(normalized.endsWith('Z') ? normalized : `${normalized}Z`)
+}
diff --git a/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
index e362f50..a1398ec 100644
--- a/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
+++ b/packages/plugin-backfill/src/chunking/smart-chunking.integration.test.ts
@@ -1,8 +1,8 @@
 import { describe, expect, test } from 'bun:test'
 
 import { analyzeAndChunk } from './analyze.js'
-import { buildChunkSql } from './sql.js'
-import type { SortKeyInfo } from './types.js'
+import { buildChunkExecutionSql } from './sql.js'
+import type { Chunk, ChunkPlan } from './types.js'
 
 const MiB = 1024 ** 2
 
@@ -35,8 +35,7 @@ function createFixtureQuery(input: {
     }
 
     if (sql.includes('FROM system.parts')) {
-      const partitions = summarizePartitions(input.rows, bytesPerRow, uncompressedBytesPerRow)
-      return partitions as T[]
+      return summarizePartitions(input.rows, bytesPerRow, uncompressedBytesPerRow) as T[]
     }
 
     if (sql.includes('FROM system.tables')) {
@@ -75,9 +74,7 @@ function createFixtureQuery(input: {
 
       const grouped = new Map<string, number>()
       for (const row of filteredRows) {
-        const bucket = grain === 'day'
-          ? toStartOfDay(String(row[column]))
-          : toStartOfHour(String(row[column]))
+        const bucket = grain === 'day' ? toStartOfDay(String(row[column])) : toStartOfHour(String(row[column]))
         grouped.set(bucket, (grouped.get(bucket) ?? 0) + 1)
       }
 
@@ -150,10 +147,10 @@ function evaluateClause(clause: string, row: FixtureRow): boolean {
   match = clause.match(/^(\w+) < parseDateTimeBestEffort\('([^']+)'\)$/)
   if (match) return Date.parse(String(row[match[1]])) < Date.parse(match[2])
 
-  match = clause.match(/^(\w+) >= unhex\('([0-9a-f]+)'\)$/i)
+  match = clause.match(/^(\w+) >= unhex\('([0-9a-f]*)'\)$/i)
   if (match) return compareLatin1(String(row[match[1]] ?? ''), Buffer.from(match[2], 'hex').toString('latin1')) >= 0
 
-  match = clause.match(/^(\w+) < unhex\('([0-9a-f]+)'\)$/i)
+  match = clause.match(/^(\w+) < unhex\('([0-9a-f]*)'\)$/i)
   if (match) return compareLatin1(String(row[match[1]] ?? ''), Buffer.from(match[2], 'hex').toString('latin1')) < 0
 
   match = clause.match(/^(\w+) >= '([^']+)'$/)
@@ -182,7 +179,7 @@ function compareValues(left: RowValue, right: RowValue): number {
 }
 
 function formatValueForMinMax(value: RowValue): string {
-  return typeof value === 'number' ? String(value) : String(value)
+  return String(value)
 }
 
 function compareLatin1(left: string, right: string): number {
@@ -196,21 +193,14 @@ function toStartOfDay(value: string): string {
 
 function toStartOfHour(value: string): string {
   const date = new Date(value)
-  return new Date(Date.UTC(
-    date.getUTCFullYear(),
-    date.getUTCMonth(),
-    date.getUTCDate(),
-    date.getUTCHours(),
-    0,
-    0,
-  )).toISOString()
+  return new Date(Date.UTC(date.getUTCFullYear(), date.getUTCMonth(), date.getUTCDate(), date.getUTCHours(), 0, 0)).toISOString()
 }
 
 async function planFixture(input: {
   rows: FixtureRow[]
   sortKeys: Array<{ column: string; type: string }>
   maxChunkBytes: number
-}) {
+}): Promise<ChunkPlan> {
   const query = createFixtureQuery({
     database: 'app',
     table: 'events',
@@ -221,23 +211,22 @@ async function planFixture(input: {
   return analyzeAndChunk({
     database: 'app',
     table: 'events',
-    maxChunkBytes: input.maxChunkBytes,
-    requireIdempotencyToken: true,
+    targetChunkBytes: input.maxChunkBytes,
     query,
   })
 }
 
-function strategyIds(chunk: { lineage?: Array<{ strategyId: string }> }): string[] {
-  return chunk.lineage?.map((step) => step.strategyId) ?? []
+function strategyIds(chunk: Pick<Chunk, 'analysis'>): string[] {
+  return chunk.analysis.lineage.map((step) => step.strategyId)
 }
 
-function buildSqlForChunk(chunk: Awaited<ReturnType<typeof planFixture>>['chunks'][number], sortKeys: SortKeyInfo[]) {
-  return buildChunkSql({
+function buildSqlForChunk(plan: ChunkPlan, chunk: Chunk): string {
+  return buildChunkExecutionSql({
     planId: 'fixture-plan',
     chunk,
     target: 'app.events',
-    sortKey: sortKeys[0],
-    sortKeys,
+    sourceTarget: 'app.events',
+    table: plan.table,
   })
 }
 
@@ -256,15 +245,15 @@ describe('smart chunking integration', () => {
       id: index,
     }))
 
-    const result = await planFixture({
+    const plan = await planFixture({
       rows,
       sortKeys: [{ column: 'id', type: 'UInt64' }],
       maxChunkBytes: 64 * MiB,
     })
 
-    expect(result.chunks).toHaveLength(1)
-    expect(result.chunks[0]?.estimateReason).toBe('partition-metadata')
-    expect(strategyIds(result.chunks[0] ?? {})).toHaveLength(0)
+    expect(plan.chunks).toHaveLength(1)
+    expect(plan.chunks[0]?.estimate.reason).toBe('partition-metadata')
+    expect(strategyIds(requireChunk(plan.chunks[0], 'metadata chunk'))).toHaveLength(0)
   })
 
   test('uses quantile range splitting for wide numeric distributions', async () => {
@@ -274,15 +263,16 @@ describe('smart chunking integration', () => {
       id: index,
     }))
 
-    const result = await planFixture({
+    const plan = await planFixture({
       rows,
       sortKeys: [{ column: 'id', type: 'UInt64' }],
       maxChunkBytes: 30 * 1024,
     })
 
-    expect(result.chunks.length).toBeGreaterThanOrEqual(3)
-    expect(result.chunks.every((chunk) => strategyIds(chunk).includes('quantile-range-split'))).toBe(true)
-    const estimatedRows = result.chunks.map((chunk) => chunk.estimatedRows ?? 0)
+    expect(plan.chunks.length).toBeGreaterThanOrEqual(3)
+    expect(plan.chunks.every((chunk) => strategyIds(chunk).includes('quantile-range-split'))).toBe(true)
+
+    const estimatedRows = plan.chunks.map((chunk) => chunk.estimate.rows)
     expect(Math.max(...estimatedRows) - Math.min(...estimatedRows)).toBeLessThanOrEqual(4)
   })
 
@@ -293,17 +283,17 @@ describe('smart chunking integration', () => {
       id: 100 + (index % 2),
     }))
 
-    const result = await planFixture({
+    const plan = await planFixture({
       rows,
       sortKeys: [{ column: 'id', type: 'UInt64' }],
       maxChunkBytes: 20 * 1024,
     })
 
-    expect(result.chunks.length).toBeGreaterThan(1)
-    expect(result.chunks.some((chunk) => strategyIds(chunk).includes('equal-width-split'))).toBe(true)
-    expect(result.chunks.every((chunk) => (chunk.estimatedRows ?? 0) > 0)).toBe(true)
-    expect(result.chunks.every((chunk) =>
-      chunk.ranges?.every((range) => range.from !== range.to) ?? true
+    expect(plan.chunks.length).toBeGreaterThan(1)
+    expect(plan.chunks.some((chunk) => strategyIds(chunk).includes('equal-width-split'))).toBe(true)
+    expect(plan.chunks.every((chunk) => chunk.estimate.rows > 0)).toBe(true)
+    expect(plan.chunks.every((chunk) =>
+      chunk.ranges.every((range) => range.from !== range.to)
     )).toBe(true)
   })
 
@@ -319,20 +309,20 @@ describe('smart chunking integration', () => {
       }
     }
 
-    const result = await planFixture({
+    const plan = await planFixture({
       rows,
       sortKeys: [{ column: 'slug', type: 'String' }],
       maxChunkBytes: 24 * 1024,
     })
 
-    expect(result.chunks.length).toBeGreaterThan(2)
-    expect(result.chunks.some((chunk) => strategyIds(chunk).includes('string-prefix-split'))).toBe(true)
+    expect(plan.chunks.length).toBeGreaterThan(2)
+    expect(plan.chunks.some((chunk) => strategyIds(chunk).includes('string-prefix-split'))).toBe(true)
 
-    const sql = buildSqlForChunk(requireChunk(result.chunks[0], 'string-prefix first chunk'), result.sortKeys)
+    const sql = buildSqlForChunk(plan, requireChunk(plan.chunks[0], 'string-prefix first chunk'))
     expect(sql).toContain("unhex('")
   })
 
-  test('combines string-prefix and temporal splitting for hot-key time windows', async () => {
+  test('combines string-prefix and temporal splitting for focused time windows', async () => {
     const rows: FixtureRow[] = []
 
     for (let day = 1; day <= 3; day++) {
@@ -355,7 +345,7 @@ describe('smart chunking integration', () => {
       })
     }
 
-    const result = await planFixture({
+    const plan = await planFixture({
       rows,
       sortKeys: [
         { column: 'user_id', type: 'String' },
@@ -364,22 +354,22 @@ describe('smart chunking integration', () => {
       maxChunkBytes: 18 * 1024,
     })
 
-    const hotChunks = result.chunks.filter((chunk) =>
+    const hotChunks = plan.chunks.filter((chunk) =>
       strategyIds(chunk).includes('temporal-bucket-split') &&
-      (chunk.ranges?.some((range) => range.dimensionIndex === 0) ?? false) &&
-      (chunk.ranges?.some((range) => range.dimensionIndex === 1) ?? false)
+      chunk.ranges.some((range) => range.dimensionIndex === 0) &&
+      chunk.ranges.some((range) => range.dimensionIndex === 1)
     )
 
     expect(hotChunks.length).toBeGreaterThan(0)
-    expect(hotChunks.every((chunk) => chunk.isHotKey || (chunk.hotKeyValue !== undefined))).toBe(true)
+    expect(hotChunks.every((chunk) => chunk.analysis.focusedValue?.value === 'hot')).toBe(true)
 
-    const sql = buildSqlForChunk(requireChunk(hotChunks[0], 'temporal combo chunk'), result.sortKeys)
+    const sql = buildSqlForChunk(plan, requireChunk(hotChunks[0], 'temporal combo chunk'))
     expect(sql).toContain('user_id >=')
     expect(sql).toContain('event_time >=')
     expect(sql).toContain('parseDateTimeBestEffort')
 
     const temporalRanges = hotChunks
-      .map((chunk) => chunk.ranges?.find((range) => range.dimensionIndex === 1))
+      .map((chunk) => chunk.ranges.find((range) => range.dimensionIndex === 1))
       .filter((range): range is NonNullable<typeof range> => Boolean(range))
       .sort((left, right) => String(left.from).localeCompare(String(right.from)))
 
@@ -409,7 +399,7 @@ describe('smart chunking integration', () => {
       })
     }
 
-    const result = await planFixture({
+    const plan = await planFixture({
       rows,
       sortKeys: [
         { column: 'account', type: 'String' },
@@ -418,15 +408,15 @@ describe('smart chunking integration', () => {
       maxChunkBytes: 24 * 1024,
     })
 
-    const comboChunks = result.chunks.filter((chunk) =>
+    const comboChunks = plan.chunks.filter((chunk) =>
       strategyIds(chunk).includes('quantile-range-split') &&
-      (chunk.ranges?.some((range) => range.dimensionIndex === 0) ?? false) &&
-      (chunk.ranges?.some((range) => range.dimensionIndex === 1) ?? false)
+      chunk.ranges.some((range) => range.dimensionIndex === 0) &&
+      chunk.ranges.some((range) => range.dimensionIndex === 1)
     )
 
     expect(comboChunks.length).toBeGreaterThan(0)
 
-    const sql = buildSqlForChunk(requireChunk(comboChunks[0], 'numeric combo chunk'), result.sortKeys)
+    const sql = buildSqlForChunk(plan, requireChunk(comboChunks[0], 'numeric combo chunk'))
     expect(sql).toContain('account >=')
     expect(sql).toContain("seq >= '")
   })
diff --git a/packages/plugin-backfill/src/chunking/splitter.test.ts b/packages/plugin-backfill/src/chunking/splitter.test.ts
deleted file mode 100644
index 16f4f3f..0000000
--- a/packages/plugin-backfill/src/chunking/splitter.test.ts
+++ /dev/null
@@ -1,64 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { splitSortKeyRange, stringToUint64, uint64ToString } from './splitter.js'
-
-describe('splitSortKeyRange', () => {
-  test('numeric: splits into equal-width ranges', () => {
-    const ranges = splitSortKeyRange('numeric', '100', '200', 2)
-
-    expect(ranges).toHaveLength(2)
-    expect(ranges[0]?.from).toBe('100')
-    expect(ranges[0]?.to).toBe('150')
-    expect(ranges[1]?.from).toBe('150')
-    expect(ranges[1]?.to).toBe('201')
-  })
-
-  test('datetime: splits into equal-width time ranges', () => {
-    const ranges = splitSortKeyRange('datetime', '2025-01-01 00:00:00', '2025-01-31 00:00:00', 3)
-
-    expect(ranges).toHaveLength(3)
-    for (const r of ranges) {
-      expect(r.from).toBeDefined()
-      expect(r.to).toBeDefined()
-    }
-  })
-
-  test('string: round-trips through uint64 conversion', () => {
-    const ranges = splitSortKeyRange('string', 'aaa', 'zzz', 2)
-
-    expect(ranges).toHaveLength(2)
-    expect(ranges[0]?.from).toBeDefined()
-    expect(ranges[1]?.to).toBeDefined()
-  })
-})
-
-describe('stringToUint64 / uint64ToString', () => {
-  test('round-trips short strings', () => {
-    const original = 'abc'
-    const n = stringToUint64(original)
-    const back = uint64ToString(n)
-    expect(back).toBe(original)
-  })
-
-  test('round-trips 8-byte strings', () => {
-    const original = 'abcdefgh'
-    const n = stringToUint64(original)
-    const back = uint64ToString(n)
-    expect(back).toBe(original)
-  })
-
-  test('truncates strings longer than 8 bytes', () => {
-    const n = stringToUint64('abcdefghijklmnop')
-    const back = uint64ToString(n)
-    expect(back).toBe('abcdefgh')
-  })
-
-  test('handles embedded zero bytes from arithmetic', () => {
-    // Simulates a computed intermediate where a middle byte is 0x00
-    // e.g. 0x6200000000000001 has zero bytes between 'b' and the trailing 0x01
-    const n = 0x6200000000000001n
-    const result = uint64ToString(n)
-    expect(result).toBe('b\0\0\0\0\0\0\x01')
-    expect(result.length).toBe(8)
-  })
-})
diff --git a/packages/plugin-backfill/src/chunking/splitter.ts b/packages/plugin-backfill/src/chunking/splitter.ts
deleted file mode 100644
index c3a3e5c..0000000
--- a/packages/plugin-backfill/src/chunking/splitter.ts
+++ /dev/null
@@ -1,86 +0,0 @@
-import type { SortKeyInfo } from './types.js'
-
-export function splitNumericRange(min: number, max: number, count: number): Array<{ from: string; to: string }> {
-  const span = max - min
-  const step = span / count
-  const ranges: Array<{ from: string; to: string }> = []
-  for (let i = 0; i < count; i++) {
-    const from = min + i * step
-    const to = i === count - 1 ? max + 1 : min + (i + 1) * step
-    ranges.push({ from: String(from), to: String(to) })
-  }
-  return ranges
-}
-
-export function splitDateTimeRange(min: string, max: string, count: number): Array<{ from: string; to: string }> {
-  const minMs = new Date(min).getTime()
-  const maxMs = new Date(max).getTime()
-  const span = maxMs - minMs
-  const step = span / count
-  const ranges: Array<{ from: string; to: string }> = []
-  for (let i = 0; i < count; i++) {
-    const from = new Date(minMs + i * step).toISOString()
-    const to = i === count - 1
-      ? new Date(maxMs + 1).toISOString()
-      : new Date(minMs + (i + 1) * step).toISOString()
-    ranges.push({ from, to })
-  }
-  return ranges
-}
-
-export function stringToUint64(s: string): bigint {
-  let result = 0n
-  const bytes = Math.min(s.length, 8)
-  for (let i = 0; i < bytes; i++) {
-    result = (result << 8n) | BigInt(s.charCodeAt(i))
-  }
-  // Pad remaining bytes with zeros
-  for (let i = bytes; i < 8; i++) {
-    result = result << 8n
-  }
-  return result
-}
-
-export function uint64ToString(n: bigint): string {
-  const chars: string[] = []
-  for (let i = 7; i >= 0; i--) {
-    const byte = Number((n >> BigInt(i * 8)) & 0xffn)
-    chars.push(String.fromCharCode(byte))
-  }
-  // Trim trailing NUL bytes (padding from stringToUint64 for short strings)
-  let end = chars.length
-  while (end > 0 && chars[end - 1] === '\0') end--
-  return chars.slice(0, end).join('')
-}
-
-export function splitStringRange(min: string, max: string, count: number): Array<{ from: string; to: string }> {
-  const minVal = stringToUint64(min)
-  const maxVal = stringToUint64(max)
-  const span = maxVal - minVal
-  const step = span / BigInt(count)
-  const ranges: Array<{ from: string; to: string }> = []
-  for (let i = 0; i < count; i++) {
-    const from = uint64ToString(minVal + BigInt(i) * step)
-    const to = i === count - 1
-      ? uint64ToString(maxVal + 1n)
-      : uint64ToString(minVal + BigInt(i + 1) * step)
-    ranges.push({ from, to })
-  }
-  return ranges
-}
-
-export function splitSortKeyRange(
-  category: SortKeyInfo['category'],
-  min: string,
-  max: string,
-  count: number,
-): Array<{ from: string; to: string }> {
-  switch (category) {
-    case 'numeric':
-      return splitNumericRange(Number(min), Number(max), count)
-    case 'datetime':
-      return splitDateTimeRange(min, max, count)
-    case 'string':
-      return splitStringRange(min, max, count)
-  }
-}
diff --git a/packages/plugin-backfill/src/chunking/sql.ts b/packages/plugin-backfill/src/chunking/sql.ts
index 0475815..01817f0 100644
--- a/packages/plugin-backfill/src/chunking/sql.ts
+++ b/packages/plugin-backfill/src/chunking/sql.ts
@@ -1,71 +1,98 @@
-import type { PlannedChunk, SortKeyInfo } from './types.js'
-
-function buildSettingsClause(token: string): string {
-  if (token) {
-    return `SETTINGS async_insert=0, insert_deduplication_token='${token}'`
-  }
-  return `SETTINGS async_insert=0`
-}
-
-function quoteSqlString(value: string): string {
+import type {
+  Chunk,
+  ChunkRange,
+  EstimateFilter,
+  PlannerContext,
+  RowProbeStrategy,
+  SortKey,
+  TableProfile,
+} from './types.js'
+
+export function quoteSqlString(value: string): string {
   return `'${value.replaceAll('\\', '\\\\').replaceAll('\'', '\\\'')}'`
 }
 
-function formatBound(value: string, sortKey: SortKeyInfo): string {
+export function formatBound(value: string, sortKey: SortKey): string {
   if (sortKey.category === 'datetime') {
     return `parseDateTimeBestEffort(${quoteSqlString(value)})`
   }
+
   if (sortKey.category === 'string') {
     return `unhex('${Buffer.from(value, 'latin1').toString('hex')}')`
   }
+
   return quoteSqlString(value)
 }
 
-function buildChunkConditions(chunk: PlannedChunk, sortKeys: SortKeyInfo[]): string[] {
-  if (chunk.ranges?.length) {
-    return chunk.ranges.flatMap((range) => {
-      const sortKey = sortKeys[range.dimensionIndex]
-      if (!sortKey) return []
+export function buildWhereClauseFromRanges(
+  partitionId: string,
+  ranges: ChunkRange[],
+  sortKeys: SortKey[],
+): string {
+  const conditions = [`_partition_id = ${quoteSqlString(partitionId)}`]
 
-      const conditions: string[] = []
-      if (range.from !== undefined) {
-        conditions.push(`${sortKey.column} >= ${formatBound(range.from, sortKey)}`)
-      }
-      if (range.to !== undefined) {
-        conditions.push(`${sortKey.column} < ${formatBound(range.to, sortKey)}`)
-      }
-      return conditions
-    })
+  for (const range of ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) continue
+
+    if (range.from !== undefined) {
+      conditions.push(`${sortKey.name} >= ${formatBound(range.from, sortKey)}`)
+    }
+    if (range.to !== undefined) {
+      conditions.push(`${sortKey.name} < ${formatBound(range.to, sortKey)}`)
+    }
   }
 
-  if (chunk.sortKeyFrom !== undefined && chunk.sortKeyTo !== undefined && sortKeys[0]) {
-    return [
-      `${sortKeys[0].column} >= ${formatBound(chunk.sortKeyFrom, sortKeys[0])}`,
-      `${sortKeys[0].column} < ${formatBound(chunk.sortKeyTo, sortKeys[0])}`,
-    ]
+  return conditions.join('\n  AND ')
+}
+
+export function buildWhereClauseFromChunk(
+  chunk: Pick<Chunk, 'partitionId' | 'ranges'>,
+  table: Pick<TableProfile, 'sortKeys'>,
+): string {
+  return buildWhereClauseFromRanges(chunk.partitionId, chunk.ranges, table.sortKeys)
+}
+
+function buildSettingsClause(token: string): string {
+  if (token) {
+    return `SETTINGS async_insert=0, insert_deduplication_token='${token}'`
   }
+  return 'SETTINGS async_insert=0'
+}
 
-  return []
+function buildChunkConditions(chunk: Pick<Chunk, 'ranges'>, sortKeys: SortKey[]): string[] {
+  return chunk.ranges.flatMap((range) => {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) return []
+
+    const conditions: string[] = []
+    if (range.from !== undefined) {
+      conditions.push(`${sortKey.name} >= ${formatBound(range.from, sortKey)}`)
+    }
+    if (range.to !== undefined) {
+      conditions.push(`${sortKey.name} < ${formatBound(range.to, sortKey)}`)
+    }
+    return conditions
+  })
 }
 
-export function buildChunkSql(input: {
+export function buildChunkExecutionSql(input: {
   planId: string
-  chunk: PlannedChunk
+  chunk: Chunk
   target: string
-  sortKey?: SortKeyInfo
-  sortKeys?: SortKeyInfo[]
+  table: Pick<TableProfile, 'sortKeys'>
+  sourceTarget?: string
   mvAsQuery?: string
   targetColumns?: string[]
+  idempotencyToken?: string
 }): string {
-  const header = `/* chkit backfill plan=${input.planId} chunk=${input.chunk.id} token=${input.chunk.idempotencyToken} */`
-  const settings = buildSettingsClause(input.chunk.idempotencyToken)
-  const { chunk } = input
-  const sortKeys = input.sortKeys ?? (input.sortKey ? [input.sortKey] : [])
-  const chunkConditions = buildChunkConditions(chunk, sortKeys)
+  const sourceTarget = input.sourceTarget ?? input.target
+  const header = `/* chkit backfill plan=${input.planId} chunk=${input.chunk.id} token=${input.idempotencyToken ?? ''} */`
+  const settings = buildSettingsClause(input.idempotencyToken ?? '')
+  const chunkConditions = buildChunkConditions(input.chunk, input.table.sortKeys)
 
   if (input.mvAsQuery) {
-    // MV replay: inject partition + sort key filters into the MV's AS query
-    let filtered = injectPartitionFilter(input.mvAsQuery, chunk.partitionId)
+    let filtered = injectPartitionFilter(input.mvAsQuery, input.chunk.partitionId)
     for (const condition of chunkConditions) {
       filtered = injectWhereCondition(filtered, condition)
     }
@@ -75,13 +102,12 @@ export function buildChunkSql(input: {
     return [header, `INSERT INTO ${input.target}`, filtered, settings].join('\n')
   }
 
-  // Direct table copy
   const lines = [
     header,
     `INSERT INTO ${input.target}`,
-    `SELECT *`,
-    `FROM ${input.target}`,
-    `WHERE _partition_id = '${chunk.partitionId}'`,
+    'SELECT *',
+    `FROM ${sourceTarget}`,
+    `WHERE _partition_id = ${quoteSqlString(input.chunk.partitionId)}`,
   ]
 
   for (const condition of chunkConditions) {
@@ -92,28 +118,80 @@ export function buildChunkSql(input: {
   return lines.join('\n')
 }
 
-// --- SQL helpers ---
+export function buildEstimateSql(
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+  context: PlannerContext,
+  rowProbeStrategy: RowProbeStrategy,
+): string {
+  const whereClause = buildWhereClauseFromFilter(filter, sortKeys)
+  if (rowProbeStrategy === 'count') {
+    return `SELECT count() AS cnt FROM ${context.database}.${context.table} WHERE ${whereClause}`
+  }
+  return `EXPLAIN ESTIMATE SELECT count() FROM ${context.database}.${context.table} WHERE ${whereClause}`
+}
+
+export function buildCountSql(
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+  context: Pick<PlannerContext, 'database' | 'table'>,
+): string {
+  return `SELECT count() AS cnt FROM ${context.database}.${context.table} WHERE ${buildWhereClauseFromFilter(filter, sortKeys)}`
+}
+
+function buildWhereClauseFromFilter(
+  filter: EstimateFilter,
+  sortKeys: SortKey[],
+): string {
+  const conditions = [`_partition_id = ${quoteSqlString(filter.partitionId)}`]
+
+  for (const range of filter.ranges) {
+    const sortKey = sortKeys[range.dimensionIndex]
+    if (!sortKey) continue
+
+    if (filter.exactDimensionIndex === range.dimensionIndex && filter.exactValue !== undefined) {
+      conditions.push(`${sortKey.name} = ${formatBound(filter.exactValue, sortKey)}`)
+      continue
+    }
+
+    if (range.from !== undefined) {
+      conditions.push(`${sortKey.name} >= ${formatBound(range.from, sortKey)}`)
+    }
+    if (range.to !== undefined) {
+      conditions.push(`${sortKey.name} < ${formatBound(range.to, sortKey)}`)
+    }
+  }
+
+  return conditions.join(' AND ')
+}
 
 function injectPartitionFilter(query: string, partitionId: string): string {
-  const condition = `_partition_id = '${partitionId}'`
-  return injectWhereCondition(query, condition)
+  return injectWhereCondition(query, `_partition_id = ${quoteSqlString(partitionId)}`)
 }
 
 export function injectSortKeyFilter(
   query: string,
   sortKeyColumn: string,
-  category: SortKeyInfo['category'],
+  category: SortKey['category'],
   from: string,
   to: string,
 ): string {
   let condition: string
+
   if (category === 'datetime') {
-    condition = `${sortKeyColumn} >= parseDateTimeBestEffort(${quoteSqlString(from)})\n  AND ${sortKeyColumn} < parseDateTimeBestEffort(${quoteSqlString(to)})`
+    condition =
+      `${sortKeyColumn} >= parseDateTimeBestEffort(${quoteSqlString(from)})\n` +
+      `  AND ${sortKeyColumn} < parseDateTimeBestEffort(${quoteSqlString(to)})`
   } else if (category === 'string') {
-    condition = `${sortKeyColumn} >= unhex('${Buffer.from(from, 'latin1').toString('hex')}')\n  AND ${sortKeyColumn} < unhex('${Buffer.from(to, 'latin1').toString('hex')}')`
+    condition =
+      `${sortKeyColumn} >= unhex('${Buffer.from(from, 'latin1').toString('hex')}')\n` +
+      `  AND ${sortKeyColumn} < unhex('${Buffer.from(to, 'latin1').toString('hex')}')`
   } else {
-    condition = `${sortKeyColumn} >= ${quoteSqlString(from)}\n  AND ${sortKeyColumn} < ${quoteSqlString(to)}`
+    condition =
+      `${sortKeyColumn} >= ${quoteSqlString(from)}\n` +
+      `  AND ${sortKeyColumn} < ${quoteSqlString(to)}`
   }
+
   return injectWhereCondition(query, condition)
 }
 
@@ -121,40 +199,51 @@ function injectWhereCondition(query: string, condition: string): string {
   const trimmed = query.trimEnd()
   const upper = trimmed.toUpperCase()
 
-  interface KWHit { keyword: string; position: number }
-  const hits: KWHit[] = []
+  interface KeywordHit {
+    keyword: string
+    position: number
+  }
+
+  const hits: KeywordHit[] = []
   let depth = 0
 
-  for (let i = 0; i < trimmed.length; i++) {
-    const ch = trimmed[i]
-    if (ch === '(') { depth++; continue }
-    if (ch === ')') { depth--; continue }
-    if (ch === "'") {
-      i++
-      while (i < trimmed.length && trimmed[i] !== "'") {
-        if (trimmed[i] === '\\') i++
-        i++
+  for (let index = 0; index < trimmed.length; index++) {
+    const char = trimmed[index]
+    if (char === '(') {
+      depth += 1
+      continue
+    }
+    if (char === ')') {
+      depth -= 1
+      continue
+    }
+    if (char === '\'') {
+      index += 1
+      while (index < trimmed.length && trimmed[index] !== '\'') {
+        if (trimmed[index] === '\\') index += 1
+        index += 1
       }
       continue
     }
     if (depth !== 0) continue
-
-    if (i > 0 && /\S/.test(trimmed[i - 1] ?? '')) continue
-
-    const rest = upper.slice(i)
-    for (const kw of ['WHERE', 'GROUP BY', 'HAVING', 'ORDER BY', 'QUALIFY', 'LIMIT', 'SETTINGS']) {
-      if (rest.startsWith(kw) && (i + kw.length >= trimmed.length || /\s/.test(trimmed[i + kw.length] ?? ''))) {
-        hits.push({ keyword: kw, position: i })
+    if (index > 0 && /\S/.test(trimmed[index - 1] ?? '')) continue
+
+    const rest = upper.slice(index)
+    for (const keyword of ['WHERE', 'GROUP BY', 'HAVING', 'ORDER BY', 'QUALIFY', 'LIMIT', 'SETTINGS']) {
+      if (
+        rest.startsWith(keyword) &&
+        (index + keyword.length >= trimmed.length || /\s/.test(trimmed[index + keyword.length] ?? ''))
+      ) {
+        hits.push({ keyword, position: index })
         break
       }
     }
   }
 
-  const whereHit = hits.find(h => h.keyword === 'WHERE')
-  const trailingKeywords = ['GROUP BY', 'HAVING', 'ORDER BY', 'QUALIFY', 'LIMIT', 'SETTINGS']
+  const whereHit = hits.find((hit) => hit.keyword === 'WHERE')
   const firstTrailing = hits
-    .filter(h => trailingKeywords.includes(h.keyword))
-    .filter(h => !whereHit || h.position > whereHit.position)[0]
+    .filter((hit) => hit.keyword !== 'WHERE')
+    .filter((hit) => !whereHit || hit.position > whereHit.position)[0]
 
   const insertAt = firstTrailing ? firstTrailing.position : trimmed.length
   const before = trimmed.slice(0, insertAt).trimEnd()
@@ -163,6 +252,7 @@ function injectWhereCondition(query: string, condition: string): string {
   if (whereHit) {
     return `${before}\n  AND ${condition}${after ? `\n${after}` : ''}`
   }
+
   return `${before}\nWHERE ${condition}${after ? `\n${after}` : ''}`
 }
 
@@ -174,57 +264,85 @@ export function rewriteSelectColumns(query: string, targetColumns: string[]): st
   let fromPos = -1
   let depth = 0
 
-  for (let i = 0; i < trimmed.length; i++) {
-    const ch = trimmed[i]
-    if (ch === '(') { depth++; continue }
-    if (ch === ')') { depth--; continue }
-    if (ch === "'") {
-      i++
-      while (i < trimmed.length && trimmed[i] !== "'") {
-        if (trimmed[i] === '\\') i++
-        i++
+  for (let index = 0; index < trimmed.length; index++) {
+    const char = trimmed[index]
+    if (char === '(') {
+      depth += 1
+      continue
+    }
+    if (char === ')') {
+      depth -= 1
+      continue
+    }
+    if (char === '\'') {
+      index += 1
+      while (index < trimmed.length && trimmed[index] !== '\'') {
+        if (trimmed[index] === '\\') index += 1
+        index += 1
       }
       continue
     }
     if (depth !== 0) continue
-
-    if (i > 0 && /\S/.test(trimmed[i - 1] ?? '')) continue
-
-    const rest = upper.slice(i)
-    if (selectPos === -1 && rest.startsWith('SELECT') && (i + 6 >= trimmed.length || /\s/.test(trimmed[i + 6] ?? ''))) {
-      selectPos = i
-    } else if (selectPos !== -1 && fromPos === -1 && rest.startsWith('FROM') && (i + 4 >= trimmed.length || /\s/.test(trimmed[i + 4] ?? ''))) {
-      fromPos = i
+    if (index > 0 && /\S/.test(trimmed[index - 1] ?? '')) continue
+
+    const rest = upper.slice(index)
+    if (
+      selectPos === -1 &&
+      rest.startsWith('SELECT') &&
+      (index + 6 >= trimmed.length || /\s/.test(trimmed[index + 6] ?? ''))
+    ) {
+      selectPos = index
+    } else if (
+      selectPos !== -1 &&
+      fromPos === -1 &&
+      rest.startsWith('FROM') &&
+      (index + 4 >= trimmed.length || /\s/.test(trimmed[index + 4] ?? ''))
+    ) {
+      fromPos = index
     }
   }
 
   if (selectPos === -1 || fromPos === -1) return query
 
-  const projStart = selectPos + 6
-  const projText = trimmed.slice(projStart, fromPos).trim()
+  const projectionStart = selectPos + 6
+  const rawProjection = trimmed.slice(projectionStart, fromPos).trim()
+  let projectionPrefix = ''
+  let projection = rawProjection
+
+  const distinctMatch = rawProjection.match(/^DISTINCT\b\s*/i)
+  if (distinctMatch) {
+    projectionPrefix = distinctMatch[0] ?? ''
+    projection = rawProjection.slice(projectionPrefix.length).trim()
+  }
 
   const items: string[] = []
   let itemStart = 0
   depth = 0
 
-  for (let i = 0; i < projText.length; i++) {
-    const ch = projText[i]
-    if (ch === '(') { depth++; continue }
-    if (ch === ')') { depth--; continue }
-    if (ch === "'") {
-      i++
-      while (i < projText.length && projText[i] !== "'") {
-        if (projText[i] === '\\') i++
-        i++
+  for (let index = 0; index < projection.length; index++) {
+    const char = projection[index]
+    if (char === '(') {
+      depth += 1
+      continue
+    }
+    if (char === ')') {
+      depth -= 1
+      continue
+    }
+    if (char === '\'') {
+      index += 1
+      while (index < projection.length && projection[index] !== '\'') {
+        if (projection[index] === '\\') index += 1
+        index += 1
       }
       continue
     }
-    if (depth === 0 && ch === ',') {
-      items.push(projText.slice(itemStart, i).trim())
-      itemStart = i + 1
+    if (depth === 0 && char === ',') {
+      items.push(projection.slice(itemStart, index).trim())
+      itemStart = index + 1
     }
   }
-  items.push(projText.slice(itemStart).trim())
+  items.push(projection.slice(itemStart).trim())
 
   const aliasMap = new Map<string, string>()
   for (const item of items) {
@@ -232,38 +350,43 @@ export function rewriteSelectColumns(query: string, targetColumns: string[]): st
 
     const itemUpper = item.toUpperCase()
     let asPos = -1
-    let d = 0
-
-    for (let i = 0; i < item.length; i++) {
-      const ch = item[i]
-      if (ch === '(') { d++; continue }
-      if (ch === ')') { d--; continue }
-      if (ch === "'") {
-        i++
-        while (i < item.length && item[i] !== "'") {
-          if (item[i] === '\\') i++
-          i++
+    let itemDepth = 0
+
+    for (let index = 0; index < item.length; index++) {
+      const char = item[index]
+      if (char === '(') {
+        itemDepth += 1
+        continue
+      }
+      if (char === ')') {
+        itemDepth -= 1
+        continue
+      }
+      if (char === '\'') {
+        index += 1
+        while (index < item.length && item[index] !== '\'') {
+          if (item[index] === '\\') index += 1
+          index += 1
         }
         continue
       }
-      if (d !== 0) continue
-      if (i > 0 && /\S/.test(item[i - 1] ?? '')) continue
-
-      const rest = itemUpper.slice(i)
-      if (rest.startsWith('AS') && (i + 2 >= item.length || /\s/.test(item[i + 2] ?? ''))) {
-        asPos = i
+      if (itemDepth !== 0) continue
+      if (index > 0 && /\S/.test(item[index - 1] ?? '')) continue
+
+      const rest = itemUpper.slice(index)
+      if (
+        rest.startsWith('AS') &&
+        (index + 2 >= item.length || /\s/.test(item[index + 2] ?? ''))
+      ) {
+        asPos = index
       }
     }
 
     if (asPos !== -1) {
-      const alias = item.slice(asPos + 2).trim()
-      aliasMap.set(alias, item)
+      aliasMap.set(item.slice(asPos + 2).trim(), item)
     }
   }
 
-  const rewrittenCols = targetColumns.map(col => aliasMap.get(col) ?? col)
-
-  const before = trimmed.slice(0, projStart)
-  const after = trimmed.slice(fromPos)
-  return `${before} ${rewrittenCols.join(', ')}\n${after}`
+  const rewrittenProjection = targetColumns.map((column) => aliasMap.get(column) ?? column)
+  return `${trimmed.slice(0, projectionStart)} ${projectionPrefix}${rewrittenProjection.join(', ')}\n${trimmed.slice(fromPos)}`
 }
diff --git a/packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts b/packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts
new file mode 100644
index 0000000..f98778f
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/equal-width-split.ts
@@ -0,0 +1,67 @@
+import { buildSliceFromRows } from '../partition-slices.js'
+import { estimateRows } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+import { replaceChunkRange } from '../utils/ranges.js'
+import { buildEvenlySpacedBoundaries } from './quantile-range-split.js'
+
+export async function splitSliceWithEqualWidthRanges(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  subCount: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return [slice]
+
+  const boundaries = Array.from(
+    new Set(buildEvenlySpacedBoundaries(rangeFrom, rangeTo, subCount, sortKey))
+  )
+  if (boundaries.length <= 2) return [slice]
+
+  const slices: PartitionSlice[] = []
+
+  for (let index = 0; index < boundaries.length - 1; index++) {
+    const from = boundaries[index]
+    const to = boundaries[index + 1]
+    if (from === undefined || to === undefined || from === to) continue
+
+    const ranges = replaceChunkRange(slice, dimensionIndex, from, to)
+    const rows = await estimateRows(
+      context,
+      {
+        partitionId: partition.partitionId,
+        ranges,
+      },
+      sortKeys
+    )
+    if (rows <= 0) continue
+
+    slices.push(
+      buildSliceFromRows(partition, {
+        ranges,
+        rows,
+        focusedValue: slice.analysis.focusedValue,
+        confidence: context.rowProbeStrategy === 'count' ? 'exact' : 'low',
+        reason: context.rowProbeStrategy === 'count' ? 'exact-count' : 'equal-width-distribution',
+        lineage: slice.analysis.lineage.concat([
+          {
+            strategyId: 'equal-width-split',
+            dimensionIndex,
+            reason: 'fallback to equal-width ranges',
+          },
+        ]),
+      })
+    )
+  }
+
+  return slices.length > 0 ? slices : [slice]
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts b/packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts
new file mode 100644
index 0000000..86dca54
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/metadata-single-chunk.ts
@@ -0,0 +1,6 @@
+import { buildRootSlice } from '../partition-slices.js'
+import type { Partition, PartitionSlice } from '../types.js'
+
+export function buildSingleChunkPartition(partition: Partition): PartitionSlice[] {
+  return [buildRootSlice(partition)]
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts b/packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts
new file mode 100644
index 0000000..4c586a1
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/quantile-range-split.ts
@@ -0,0 +1,208 @@
+import { buildSliceFromRows } from '../partition-slices.js'
+import { estimateRows, parsePlannerDateTime } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+import { bigIntToStr, strToBigInt } from '../utils/binary-string.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const BINARY_SEARCH_STEPS = 24
+
+export async function splitSliceWithQuantiles(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  boundaries: string[],
+): Promise<PartitionSlice[]> {
+  const slices: PartitionSlice[] = []
+
+  for (let index = 0; index < boundaries.length - 1; index++) {
+    const from = boundaries[index]
+    const to = boundaries[index + 1]
+    if (from === undefined || to === undefined || from === to) continue
+
+    const ranges = replaceChunkRange(slice, dimensionIndex, from, to)
+    const rows = await estimateRows(
+      context,
+      {
+        partitionId: partition.partitionId,
+        ranges,
+      },
+      sortKeys
+    )
+    if (rows <= 0) continue
+
+    slices.push(
+      buildSliceFromRows(partition, {
+        ranges,
+        rows,
+        focusedValue: slice.analysis.focusedValue,
+        confidence: context.rowProbeStrategy === 'count' ? 'exact' : 'high',
+        reason: context.rowProbeStrategy === 'count' ? 'exact-count' : 'quantile-estimate',
+        lineage: slice.analysis.lineage.concat([
+          {
+            strategyId: 'quantile-range-split',
+            dimensionIndex,
+            reason: 'split slice into quantile-aligned ranges',
+          },
+        ]),
+      })
+    )
+  }
+
+  return slices
+}
+
+export async function findQuantileBoundaryOnDimension(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  targetCumRows: number,
+): Promise<string> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) {
+    throw new Error(`Missing sort key at dimension ${dimensionIndex}`)
+  }
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) {
+    throw new Error(`Missing range for quantile split on dimension ${dimensionIndex}`)
+  }
+
+  if (sortKey.category === 'string') {
+    return findStringBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows)
+  }
+  if (sortKey.category === 'datetime') {
+    return findDateTimeBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows)
+  }
+  return findNumericBoundary(context, slice, sortKeys, dimensionIndex, range.from, range.to, targetCumRows)
+}
+
+export function buildEvenlySpacedBoundaries(
+  rangeFrom: string,
+  rangeTo: string,
+  subCount: number,
+  sortKey: SortKey,
+): string[] {
+  if (subCount <= 1) return [rangeFrom, rangeTo]
+
+  if (sortKey.category === 'datetime') {
+    const start = parsePlannerDateTime(rangeFrom)
+    const end = parsePlannerDateTime(rangeTo)
+    return Array.from({ length: subCount + 1 }, (_, index) =>
+      new Date(start + Math.floor(((end - start) * index) / subCount)).toISOString()
+    )
+  }
+
+  if (sortKey.category === 'numeric') {
+    const start = Number(rangeFrom)
+    const end = Number(rangeTo)
+    return Array.from({ length: subCount + 1 }, (_, index) =>
+      String(start + Math.floor(((end - start) * index) / subCount))
+    )
+  }
+
+  const start = strToBigInt(rangeFrom, 8)
+  const end = strToBigInt(rangeTo, 8)
+  return Array.from({ length: subCount + 1 }, (_, index) =>
+    bigIntToStr(start + ((end - start) * BigInt(index)) / BigInt(subCount), 8)
+  )
+}
+
+async function findStringBoundary(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  targetCumRows: number,
+): Promise<string> {
+  let low = strToBigInt(rangeFrom, 8)
+  let high = strToBigInt(rangeTo, 8)
+
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = (low + high) / 2n
+    if (midpoint === low || midpoint === high) break
+
+    const mid = bigIntToStr(midpoint, 8)
+    const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, mid)
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return bigIntToStr((low + high) / 2n, 8)
+}
+
+async function findDateTimeBoundary(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  targetCumRows: number,
+): Promise<string> {
+  let low = parsePlannerDateTime(rangeFrom)
+  let high = parsePlannerDateTime(rangeTo)
+
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = Math.floor((low + high) / 2)
+    if (midpoint === low || midpoint === high) break
+
+    const mid = new Date(midpoint).toISOString()
+    const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, mid)
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return new Date(Math.floor((low + high) / 2)).toISOString()
+}
+
+async function findNumericBoundary(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  targetCumRows: number,
+): Promise<string> {
+  let low = Number(rangeFrom)
+  let high = Number(rangeTo)
+
+  for (let step = 0; step < BINARY_SEARCH_STEPS; step++) {
+    const midpoint = Math.floor((low + high) / 2)
+    if (midpoint === low || midpoint === high) break
+
+    const rows = await estimateRowsUntil(context, slice, sortKeys, dimensionIndex, rangeFrom, String(midpoint))
+    if (rows < targetCumRows) low = midpoint
+    else high = midpoint
+  }
+
+  return String(Math.floor((low + high) / 2))
+}
+
+async function estimateRowsUntil(
+  context: PlannerContext,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+): Promise<number> {
+  return estimateRows(
+    context,
+    {
+      partitionId: slice.partitionId,
+      ranges: replaceChunkRange(slice, dimensionIndex, rangeFrom, rangeTo),
+    },
+    sortKeys
+  )
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/refinement.ts b/packages/plugin-backfill/src/chunking/strategies/refinement.ts
new file mode 100644
index 0000000..4d27c2a
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/refinement.ts
@@ -0,0 +1,128 @@
+import { buildSliceEstimate } from '../partition-slices.js'
+import { countRowsExact, getRowProbeStrategy } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionBuildResult,
+  PartitionDiagnostics,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+} from '../types.js'
+
+const ESTIMATE_RATIO_MIN = 0.7
+const ESTIMATE_RATIO_MAX = 1.3
+
+export async function refinePartitionSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slices: PartitionSlice[],
+  sortKeys: SortKey[],
+  usedDistributionFallback: boolean,
+): Promise<PartitionBuildResult> {
+  let workingSlices = slices
+  let usedLowConfidenceChunkRefinement = false
+
+  if (slices.some((slice) => slice.estimate.confidence === 'low')) {
+    workingSlices = await refineLowConfidenceSlices(context, partition, slices, sortKeys)
+    usedLowConfidenceChunkRefinement = true
+  }
+
+  const diagnostics = buildPartitionDiagnostics(
+    partition,
+    workingSlices,
+    usedDistributionFallback,
+    usedLowConfidenceChunkRefinement,
+    false
+  )
+
+  if (
+    getRowProbeStrategy(context) !== 'explain-estimate' ||
+    !diagnostics.suspiciousEstimate
+  ) {
+    return { slices: workingSlices, diagnostics }
+  }
+
+  const refinedSlices = await refineAllSlices(context, partition, workingSlices, sortKeys)
+  return {
+    slices: refinedSlices,
+    diagnostics: buildPartitionDiagnostics(
+      partition,
+      refinedSlices,
+      usedDistributionFallback,
+      usedLowConfidenceChunkRefinement,
+      true
+    ),
+  }
+}
+
+export function buildPartitionDiagnostics(
+  partition: Partition,
+  slices: PartitionSlice[],
+  usedDistributionFallback: boolean,
+  usedLowConfidenceChunkRefinement: boolean,
+  usedExactCountFallback: boolean,
+): PartitionDiagnostics {
+  const estimatedRowSum = slices.reduce((sum, slice) => sum + slice.estimate.rows, 0)
+  const estimateToExactRatio = partition.rows > 0 ? estimatedRowSum / partition.rows : 1
+
+  return {
+    estimatedRowSum,
+    exactPartitionRows: partition.rows,
+    estimateToExactRatio,
+    suspiciousEstimate:
+      estimateToExactRatio < ESTIMATE_RATIO_MIN || estimateToExactRatio > ESTIMATE_RATIO_MAX,
+    lowConfidenceChunkCount: slices.filter((slice) => slice.estimate.confidence === 'low').length,
+    usedDistributionFallback,
+    usedLowConfidenceChunkRefinement,
+    usedExactCountFallback,
+  }
+}
+
+async function refineLowConfidenceSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slices: PartitionSlice[],
+  sortKeys: SortKey[],
+): Promise<PartitionSlice[]> {
+  const refined: PartitionSlice[] = []
+
+  for (const slice of slices) {
+    if (slice.estimate.confidence !== 'low') {
+      refined.push(slice)
+      continue
+    }
+    refined.push(await refineSlice(context, partition, slice, sortKeys))
+  }
+
+  return refined
+}
+
+async function refineAllSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slices: PartitionSlice[],
+  sortKeys: SortKey[],
+): Promise<PartitionSlice[]> {
+  return Promise.all(slices.map((slice) => refineSlice(context, partition, slice, sortKeys)))
+}
+
+async function refineSlice(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+): Promise<PartitionSlice> {
+  const rows = await countRowsExact(
+    context,
+    {
+      partitionId: partition.partitionId,
+      ranges: slice.ranges,
+    },
+    sortKeys
+  )
+
+  return {
+    ...slice,
+    estimate: buildSliceEstimate(partition, rows, 'exact', 'exact-count'),
+  }
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts b/packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts
new file mode 100644
index 0000000..bed8d57
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/string-prefix-split.ts
@@ -0,0 +1,144 @@
+import { buildSliceFromRows } from '../partition-slices.js'
+import { probeStringPrefixDistribution } from '../services/distribution-source.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+  StringPrefixBucket,
+} from '../types.js'
+import {
+  buildObservedStringUpperBound,
+  maxBinaryString,
+  minBinaryString,
+  nextPrefixValue,
+} from '../utils/binary-string.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const TARGET_BYTES_FUZZ_FACTOR = 1.15
+const PREFIX_START_DEPTH = 1
+const PREFIX_MAX_DEPTH = 4
+
+export async function splitSliceWithStringPrefixes(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey || sortKey.category !== 'string') return []
+
+  const range = getChunkRange(slice, dimensionIndex)
+  if (range.from === undefined || range.to === undefined) return []
+
+  return buildPrefixSlices(
+    context,
+    partition,
+    slice,
+    sortKeys,
+    dimensionIndex,
+    range.from,
+    range.to,
+    PREFIX_START_DEPTH
+  )
+}
+
+export function buildRootStringUpperBound(maxValue: string): string {
+  return buildObservedStringUpperBound(maxValue)
+}
+
+async function buildPrefixSlices(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  depth: number,
+): Promise<PartitionSlice[]> {
+  const sortKey = sortKeys[dimensionIndex]
+  if (!sortKey) return []
+
+  const buckets = await probeStringPrefixDistribution(
+    context,
+    partition.partitionId,
+    replaceChunkRange(slice, dimensionIndex, rangeFrom, rangeTo),
+    sortKey,
+    dimensionIndex,
+    depth,
+    sortKeys
+  )
+
+  const slices: PartitionSlice[] = []
+  for (const bucket of buckets) {
+    if (bucket.rowCount <= 0) continue
+
+    const bucketSlice = buildBucketSlice(partition, slice, dimensionIndex, rangeFrom, rangeTo, bucket)
+    if (!bucketSlice) continue
+
+    if (bucketSlice.estimate.bytesCompressed <= context.targetChunkBytes * TARGET_BYTES_FUZZ_FACTOR) {
+      slices.push(bucketSlice)
+      continue
+    }
+
+    if (!bucket.isExactValue && depth < PREFIX_MAX_DEPTH) {
+      const bucketRange = getChunkRange(bucketSlice, dimensionIndex)
+      if (bucketRange.from !== undefined && bucketRange.to !== undefined) {
+        slices.push(
+          ...(await buildPrefixSlices(
+            context,
+            partition,
+            slice,
+            sortKeys,
+            dimensionIndex,
+            bucketRange.from,
+            bucketRange.to,
+            depth + 1
+          ))
+        )
+        continue
+      }
+    }
+
+    slices.push(bucketSlice)
+  }
+
+  return slices
+}
+
+function buildBucketSlice(
+  partition: Partition,
+  parentSlice: PartitionSlice,
+  dimensionIndex: number,
+  rangeFrom: string,
+  rangeTo: string,
+  bucket: StringPrefixBucket,
+): PartitionSlice | undefined {
+  const bucketFrom = maxBinaryString(rangeFrom, bucket.value)
+  const bucketUpper = bucket.isExactValue ? `${bucket.value}\0` : nextPrefixValue(bucket.value)
+  if (bucketUpper === undefined) return undefined
+
+  const bucketTo = minBinaryString(rangeTo, bucketUpper)
+  if (bucketFrom === bucketTo) return undefined
+
+  const focusedValue = bucket.isExactValue
+    ? { dimensionIndex, value: bucket.value }
+    : parentSlice.analysis.focusedValue
+
+  return buildSliceFromRows(partition, {
+    ranges: replaceChunkRange(parentSlice, dimensionIndex, bucketFrom, bucketTo),
+    rows: bucket.rowCount,
+    focusedValue,
+    confidence: 'high',
+    reason: 'string-prefix-distribution',
+    lineage: parentSlice.analysis.lineage.concat([
+      {
+        strategyId: 'string-prefix-split',
+        dimensionIndex,
+        reason: 'split slice using string prefix distribution',
+      },
+    ]),
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts b/packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts
new file mode 100644
index 0000000..b8c16db
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategies/temporal-bucket-split.ts
@@ -0,0 +1,117 @@
+import { buildSliceFromRows, getTargetChunkRows } from '../partition-slices.js'
+import { probeTemporalDistribution } from '../services/distribution-source.js'
+import { parsePlannerDateTime } from '../services/row-probe.js'
+import type {
+  Partition,
+  PartitionSlice,
+  PlannerContext,
+  SortKey,
+  TemporalBucket,
+} from '../types.js'
+import { getChunkRange, replaceChunkRange } from '../utils/ranges.js'
+
+const TARGET_BYTES_FUZZ_FACTOR = 1.15
+
+export async function splitSliceWithTemporalBuckets(
+  context: PlannerContext,
+  partition: Partition,
+  slice: PartitionSlice,
+  sortKeys: SortKey[],
+  dimensionIndex: number,
+): Promise<PartitionSlice[]> {
+  const dayBuckets = await probeTemporalDistribution(
+    context,
+    partition.partitionId,
+    slice.ranges,
+    sortKeys,
+    dimensionIndex,
+    'day'
+  )
+  if (dayBuckets.length === 0) return [slice]
+
+  const daySlices = buildTemporalSlices(partition, slice, dimensionIndex, dayBuckets, context.targetChunkBytes)
+  if (daySlices.every((candidate) => candidate.estimate.bytesCompressed <= context.targetChunkBytes * TARGET_BYTES_FUZZ_FACTOR)) {
+    return daySlices
+  }
+
+  const hourBuckets = await probeTemporalDistribution(
+    context,
+    partition.partitionId,
+    slice.ranges,
+    sortKeys,
+    dimensionIndex,
+    'hour'
+  )
+  if (hourBuckets.length === 0) return daySlices
+
+  return buildTemporalSlices(partition, slice, dimensionIndex, hourBuckets, context.targetChunkBytes)
+}
+
+export function getPartitionEndExclusive(partition: Partition): string {
+  return new Date(parsePlannerDateTime(partition.maxTime) + 1000).toISOString()
+}
+
+function buildTemporalSlices(
+  partition: Partition,
+  parentSlice: PartitionSlice,
+  dimensionIndex: number,
+  buckets: TemporalBucket[],
+  targetChunkBytes: number,
+): PartitionSlice[] {
+  const targetChunkRows = getTargetChunkRows(partition, targetChunkBytes)
+  const slices: PartitionSlice[] = []
+  let currentStart: string | undefined
+  let currentRows = 0
+  const parentRange = getChunkRange(parentSlice, dimensionIndex)
+  const sliceStart = parentRange.from
+  const sliceEnd = parentRange.to ?? getPartitionEndExclusive(partition)
+
+  for (let index = 0; index < buckets.length; index++) {
+    const bucket = buckets[index]
+    if (!bucket) continue
+
+    const bucketStart = sliceStart && bucket.start < sliceStart ? sliceStart : bucket.start
+    if (currentStart === undefined) {
+      currentStart = bucketStart
+    }
+
+    const wouldExceed = currentRows > 0 && currentRows + bucket.rowCount > targetChunkRows * TARGET_BYTES_FUZZ_FACTOR
+    if (wouldExceed && currentStart !== undefined && currentStart < bucketStart) {
+      slices.push(buildSlice(parentSlice, partition, dimensionIndex, currentStart, bucketStart, currentRows))
+      currentStart = bucketStart
+      currentRows = 0
+    }
+
+    currentRows += bucket.rowCount
+
+    if (index === buckets.length - 1 && currentStart !== undefined && currentStart < sliceEnd) {
+      slices.push(buildSlice(parentSlice, partition, dimensionIndex, currentStart, sliceEnd, currentRows))
+    }
+  }
+
+  return slices.length > 0 ? slices : [parentSlice]
+}
+
+function buildSlice(
+  parentSlice: PartitionSlice,
+  partition: Partition,
+  dimensionIndex: number,
+  from: string,
+  to: string,
+  rows: number,
+): PartitionSlice {
+  return buildSliceFromRows(partition, {
+    ranges: replaceChunkRange(parentSlice, dimensionIndex, from, to),
+    rows,
+    focusedValue: parentSlice.analysis.focusedValue,
+    confidence: 'low',
+    reason: 'temporal-distribution',
+    lineage: parentSlice.analysis.lineage.concat([
+      {
+        strategyId: 'temporal-bucket-split',
+        dimensionIndex,
+        reason: 'split slice using temporal distribution buckets',
+      },
+    ]),
+  })
+}
diff --git a/packages/plugin-backfill/src/chunking/strategy-policy.test.ts b/packages/plugin-backfill/src/chunking/strategy-policy.test.ts
new file mode 100644
index 0000000..04880b7
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategy-policy.test.ts
@@ -0,0 +1,13 @@
+import { describe, expect, test } from 'bun:test'
+
+import { getCandidateDimensions } from './strategy-policy.js'
+
+describe('getCandidateDimensions', () => {
+  test('preserves declared sort-key order regardless of type', () => {
+    expect(getCandidateDimensions([
+      { name: 'event_time', type: 'DateTime', category: 'datetime', boundaryEncoding: 'literal' },
+      { name: 'account_id', type: 'String', category: 'string', boundaryEncoding: 'hex-latin1' },
+      { name: 'seq', type: 'UInt64', category: 'numeric', boundaryEncoding: 'literal' },
+    ])).toEqual([0, 1, 2])
+  })
+})
diff --git a/packages/plugin-backfill/src/chunking/strategy-policy.ts b/packages/plugin-backfill/src/chunking/strategy-policy.ts
new file mode 100644
index 0000000..0b1a4d1
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/strategy-policy.ts
@@ -0,0 +1,8 @@
+import type { PartitionSlice, SortKey } from './types.js'
+
+export function getCandidateDimensions(
+  sortKeys: SortKey[],
+  _slice?: PartitionSlice,
+): number[] {
+  return sortKeys.map((_, index) => index)
+}
diff --git a/packages/plugin-backfill/src/chunking/types.ts b/packages/plugin-backfill/src/chunking/types.ts
index 220da04..f45b7fe 100644
--- a/packages/plugin-backfill/src/chunking/types.ts
+++ b/packages/plugin-backfill/src/chunking/types.ts
@@ -1,76 +1,65 @@
-export interface PartitionInfo {
-  partitionId: string
-  rows: number
-  bytesOnDisk: number
-  bytesUncompressed?: number
-  minTime: string
-  maxTime: string
-}
+export type RowProbeStrategy = 'explain-estimate' | 'count'
+
+export type SortKeyCategory = 'numeric' | 'datetime' | 'string'
+
+export type SortKeyBoundaryEncoding = 'literal' | 'hex-latin1'
+
+export type EstimateConfidence = 'high' | 'low' | 'exact'
+
+export type EstimateReason =
+  | 'partition-metadata'
+  | 'quantile-estimate'
+  | 'string-prefix-distribution'
+  | 'temporal-distribution'
+  | 'equal-width-distribution'
+  | 'exact-count'
 
-export interface SortKeyInfo {
-  column: string
+export interface SortKey {
+  name: string
   type: string
-  category: 'numeric' | 'datetime' | 'string'
+  category: SortKeyCategory
+  boundaryEncoding: SortKeyBoundaryEncoding
 }
 
-export interface SliceRange {
+export interface ChunkRange {
   dimensionIndex: number
   from?: string
   to?: string
 }
 
-export interface SliceLineageStep {
+export interface ChunkDerivationStep {
   strategyId: string
   dimensionIndex?: number
   reason: string
 }
 
-export type EstimateConfidence = 'high' | 'low' | 'exact'
+export interface ChunkEstimate {
+  rows: number
+  bytesCompressed: number
+  bytesUncompressed: number
+  confidence: EstimateConfidence
+  reason: EstimateReason
+}
 
-export type EstimateReason =
-  | 'partition-metadata'
-  | 'quantile-estimate'
-  | 'string-prefix-distribution'
-  | 'temporal-distribution'
-  | 'equal-width-distribution'
-  | 'exact-count'
+export interface FocusedValue {
+  dimensionIndex: number
+  value: string
+}
 
-export interface ChunkBoundary {
-  partitionId: string
-  ranges?: SliceRange[]
-  sortKeyFrom?: string
-  sortKeyTo?: string
-  estimatedBytes: number
-  estimatedRows?: number
-  isHotKey?: boolean
-  hotDimensionIndex?: number
-  hotKeyValue?: string
-  estimateConfidence?: EstimateConfidence
-  estimateReason?: EstimateReason
-  lineage?: SliceLineageStep[]
-}
-
-export interface PlannedChunk {
+export interface ChunkAnalysis {
+  focusedValue?: FocusedValue
+  lineage: ChunkDerivationStep[]
+}
+
+export interface Chunk {
   id: string
   partitionId: string
-  ranges?: SliceRange[]
-  sortKeyFrom?: string
-  sortKeyTo?: string
-  estimatedBytes: number
-  estimatedRows?: number
-  idempotencyToken: string
-  from: string
-  to: string
-  isHotKey?: boolean
-  hotDimensionIndex?: number
-  hotKeyValue?: string
-  estimateConfidence?: EstimateConfidence
-  estimateReason?: EstimateReason
-  lineage?: SliceLineageStep[]
+  ranges: ChunkRange[]
+  estimate: ChunkEstimate
+  analysis: ChunkAnalysis
 }
 
 export interface PartitionDiagnostics {
-  partitionId: string
   estimatedRowSum: number
   exactPartitionRows: number
   estimateToExactRatio: number
@@ -80,3 +69,99 @@ export interface PartitionDiagnostics {
   usedLowConfidenceChunkRefinement: boolean
   usedExactCountFallback: boolean
 }
+
+export interface Partition {
+  partitionId: string
+  rows: number
+  bytesCompressed: number
+  bytesUncompressed: number
+  minTime: string
+  maxTime: string
+  diagnostics?: PartitionDiagnostics
+}
+
+export interface TableProfile {
+  database: string
+  table: string
+  sortKeys: SortKey[]
+}
+
+export interface ChunkPlanStats {
+  totalPartitions: number
+  oversizedPartitions: number
+  focusedChunks: number
+  totalChunks: number
+  avgChunkBytes: number
+  maxChunkBytes: number
+  minChunkBytes: number
+}
+
+export interface ChunkPlan {
+  planId: string
+  generatedAt: string
+  rowProbeStrategy: RowProbeStrategy
+  targetChunkBytes: number
+  table: TableProfile
+  partitions: Partition[]
+  chunks: Chunk[]
+  totalRows: number
+  totalBytesCompressed: number
+  totalBytesUncompressed: number
+  stats: ChunkPlanStats
+}
+
+export type PlannerQuery = <T>(sql: string) => Promise<T[]>
+
+export interface PlannerContext {
+  database: string
+  table: string
+  from?: string
+  to?: string
+  targetChunkBytes: number
+  query: PlannerQuery
+  rowProbeStrategy: RowProbeStrategy
+}
+
+export interface EstimateFilter {
+  partitionId: string
+  ranges: ChunkRange[]
+  exactDimensionIndex?: number
+  exactValue?: string
+}
+
+export interface StringPrefixBucket {
+  value: string
+  rowCount: number
+  isExactValue: boolean
+}
+
+export interface TemporalBucket {
+  start: string
+  rowCount: number
+}
+
+export interface PartitionSlice {
+  partitionId: string
+  ranges: ChunkRange[]
+  estimate: ChunkEstimate
+  analysis: ChunkAnalysis
+}
+
+export interface PartitionBuildResult {
+  slices: PartitionSlice[]
+  diagnostics: PartitionDiagnostics
+}
+
+export interface PlanChunkOptions {
+  requireIdempotencyToken: boolean
+}
+
+export interface GenerateChunkPlanInput {
+  database: string
+  table: string
+  from?: string
+  to?: string
+  targetChunkBytes: number
+  query: PlannerQuery
+  rowProbeStrategy?: RowProbeStrategy
+}
diff --git a/packages/plugin-backfill/src/chunking/utils/binary-string.ts b/packages/plugin-backfill/src/chunking/utils/binary-string.ts
new file mode 100644
index 0000000..51fb8d6
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/utils/binary-string.ts
@@ -0,0 +1,55 @@
+export function compareBinaryStrings(left: string, right: string): number {
+  return Buffer.from(left, 'latin1').compare(Buffer.from(right, 'latin1'))
+}
+
+export function minBinaryString(left: string, right: string): string {
+  return compareBinaryStrings(left, right) <= 0 ? left : right
+}
+
+export function maxBinaryString(left: string, right: string): string {
+  return compareBinaryStrings(left, right) >= 0 ? left : right
+}
+
+export function nextPrefixValue(prefix: string): string | undefined {
+  if (prefix.length === 0) return undefined
+
+  const buffer = Buffer.from(prefix, 'latin1')
+  for (let index = buffer.length - 1; index >= 0; index--) {
+    const byte = buffer[index]
+    if (byte === undefined || byte === 0xff) continue
+
+    const next = Buffer.from(buffer.subarray(0, index + 1))
+    next[index] = byte + 1
+    return next.toString('latin1')
+  }
+
+  return undefined
+}
+
+export function buildObservedStringUpperBound(maxValue: string): string {
+  return `${maxValue}\0`
+}
+
+export function strToBigInt(value: string, padTo: number): bigint {
+  const buffer = Buffer.from(value, 'latin1')
+  let result = 0n
+
+  for (let index = 0; index < padTo; index++) {
+    const byte = index < buffer.length ? (buffer[index] ?? 0) : 0
+    result = (result << 8n) | BigInt(byte)
+  }
+
+  return result
+}
+
+export function bigIntToStr(value: bigint, length: number): string {
+  const buffer = Buffer.alloc(length)
+  let remaining = value
+
+  for (let index = length - 1; index >= 0; index--) {
+    buffer[index] = Number(remaining & 0xffn)
+    remaining >>= 8n
+  }
+
+  return buffer.toString('latin1')
+}
diff --git a/packages/plugin-backfill/src/chunking/utils/ids.ts b/packages/plugin-backfill/src/chunking/utils/ids.ts
new file mode 100644
index 0000000..32c0ff3
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/utils/ids.ts
@@ -0,0 +1,17 @@
+import { hashId, randomPlanId } from '../../state.js'
+
+export function generatePlanId(): string {
+  return randomPlanId()
+}
+
+export function generateChunkId(
+  planId: string,
+  partitionId: string,
+  index: number,
+): string {
+  return hashId(`chunk:${planId}:${partitionId}:${index}`).slice(0, 16)
+}
+
+export function generateIdempotencyToken(planId: string, chunkId: string): string {
+  return hashId(`token:${planId}:${chunkId}`)
+}
diff --git a/packages/plugin-backfill/src/chunking/utils/ranges.ts b/packages/plugin-backfill/src/chunking/utils/ranges.ts
new file mode 100644
index 0000000..3af1571
--- /dev/null
+++ b/packages/plugin-backfill/src/chunking/utils/ranges.ts
@@ -0,0 +1,31 @@
+import type { ChunkRange, PartitionSlice } from '../types.js'
+
+export function getChunkRange(
+  slice: Pick<PartitionSlice, 'ranges'>,
+  dimensionIndex: number,
+): ChunkRange {
+  return (
+    slice.ranges.find((range) => range.dimensionIndex === dimensionIndex) ?? {
+      dimensionIndex,
+      from: undefined,
+      to: undefined,
+    }
+  )
+}
+
+export function replaceChunkRange(
+  slice: Pick<PartitionSlice, 'ranges'>,
+  dimensionIndex: number,
+  from: string | undefined,
+  to: string | undefined,
+): ChunkRange[] {
+  return slice.ranges
+    .filter((range) => range.dimensionIndex !== dimensionIndex)
+    .concat([{ dimensionIndex, from, to }])
+    .sort((left, right) => left.dimensionIndex - right.dimensionIndex)
+}
+
+export function isExactChunkRange(range: Pick<ChunkRange, 'from' | 'to'>): boolean {
+  if (range.from === undefined || range.to === undefined) return false
+  return range.to === `${range.from}\0`
+}
diff --git a/packages/plugin-backfill/src/partition-planner.test.ts b/packages/plugin-backfill/src/partition-planner.test.ts
deleted file mode 100644
index b10c8da..0000000
--- a/packages/plugin-backfill/src/partition-planner.test.ts
+++ /dev/null
@@ -1,185 +0,0 @@
-import { describe, expect, test } from 'bun:test'
-
-import { buildChunkBoundaries } from './chunking/build.js'
-import { buildChunkSql } from './chunking/sql.js'
-import { buildPlannedChunks } from './chunking/analyze.js'
-import type { PartitionInfo, SortKeyInfo } from './types.js'
-
-const GiB = 1024 ** 3
-
-function buildChunksWithSql(input: {
-  planId: string
-  target: string
-  partitions: PartitionInfo[]
-  maxChunkBytes: number
-  sortKey?: SortKeyInfo
-  sortKeyRanges?: Map<string, { min: string; max: string }>
-  requireIdempotencyToken: boolean
-  mvAsQuery?: string
-  targetColumns?: string[]
-}) {
-  const boundaries = buildChunkBoundaries({
-    partitions: input.partitions,
-    maxChunkBytes: input.maxChunkBytes,
-    sortKey: input.sortKey,
-    sortKeyRanges: input.sortKeyRanges,
-  })
-
-  const planned = buildPlannedChunks({
-    planId: input.planId,
-    partitions: input.partitions,
-    boundaries,
-    requireIdempotencyToken: input.requireIdempotencyToken,
-  })
-
-  return planned.map(chunk => ({
-    ...chunk,
-    sqlTemplate: buildChunkSql({
-      planId: input.planId,
-      chunk,
-      target: input.target,
-      sortKey: input.sortKey,
-      mvAsQuery: input.mvAsQuery,
-      targetColumns: input.targetColumns,
-    }),
-  }))
-}
-
-describe('buildChunksWithSql', () => {
-  const basePlanId = 'abc1234567890123'
-
-  test('small partition produces one chunk with _partition_id filter only', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T23:59:59.000Z' },
-    ]
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    expect(chunks).toHaveLength(1)
-    expect(chunks[0]?.sqlTemplate).toContain("WHERE _partition_id = '202501'")
-    expect(chunks[0]?.partitionId).toBe('202501')
-    expect(chunks[0]?.estimatedBytes).toBe(5 * GiB)
-  })
-
-  test('large partition with datetime sort key produces sub-chunks with parseDateTimeBestEffort', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 30 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'event_time', type: 'DateTime', category: 'datetime' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '2025-01-01 00:00:00', max: '2025-01-31 00:00:00' }],
-    ])
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-      requireIdempotencyToken: true,
-    })
-
-    expect(chunks).toHaveLength(3)
-    for (const chunk of chunks) {
-      expect(chunk.sqlTemplate).toContain("WHERE _partition_id = '202501'")
-      expect(chunk.sqlTemplate).toContain('event_time >= parseDateTimeBestEffort(')
-      expect(chunk.sqlTemplate).toContain('event_time < parseDateTimeBestEffort(')
-      expect(chunk.partitionId).toBe('202501')
-    }
-  })
-
-  test('chunk IDs are deterministic for same input', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const first = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    const second = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    expect(first[0]?.id).toBe(second[0]?.id)
-    expect(first[0]?.idempotencyToken).toBe(second[0]?.idempotencyToken)
-  })
-
-  test('idempotency tokens are empty when not required', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: false,
-    })
-
-    expect(chunks[0]?.idempotencyToken).toBe('')
-    expect(chunks[0]?.sqlTemplate).not.toContain('insert_deduplication_token')
-  })
-
-  test('SQL templates include correct INSERT and SELECT structure', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 1000, bytesOnDisk: 5 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      requireIdempotencyToken: true,
-    })
-
-    const sql = chunks[0]?.sqlTemplate ?? ''
-    expect(sql).toContain(`/* chkit backfill plan=${basePlanId}`)
-    expect(sql).toContain('INSERT INTO default.events')
-    expect(sql).toContain('SELECT *')
-    expect(sql).toContain('FROM default.events')
-    expect(sql).toContain('SETTINGS async_insert=0')
-  })
-
-  test('numeric sort key sub-chunks use direct comparison', () => {
-    const partitions: PartitionInfo[] = [
-      { partitionId: '202501', rows: 10000, bytesOnDisk: 20 * GiB, minTime: '2025-01-01T00:00:00.000Z', maxTime: '2025-01-31T00:00:00.000Z' },
-    ]
-    const sortKey: SortKeyInfo = { column: 'id', type: 'UInt64', category: 'numeric' }
-    const sortKeyRanges = new Map([
-      ['202501', { min: '100', max: '200' }],
-    ])
-
-    const chunks = buildChunksWithSql({
-      planId: basePlanId,
-      target: 'default.events',
-      partitions,
-      maxChunkBytes: 10 * GiB,
-      sortKey,
-      sortKeyRanges,
-      requireIdempotencyToken: false,
-    })
-
-    expect(chunks).toHaveLength(2)
-    expect(chunks[0]?.sqlTemplate).toContain("id >= '100'")
-    expect(chunks[0]?.sqlTemplate).toContain("id < '150'")
-    expect(chunks[0]?.sqlTemplate).not.toContain('parseDateTimeBestEffort')
-  })
-})
diff --git a/packages/plugin-backfill/src/payload.ts b/packages/plugin-backfill/src/payload.ts
index f17e096..f79d2aa 100644
--- a/packages/plugin-backfill/src/payload.ts
+++ b/packages/plugin-backfill/src/payload.ts
@@ -27,15 +27,13 @@ export function planPayload(output: BuildBackfillPlanOutput): {
     target: output.plan.target,
     from: output.plan.from,
     to: output.plan.to,
-    chunkCount: output.plan.chunks.length,
+    chunkCount: output.plan.chunkPlan.chunks.length,
     maxChunkBytes: output.plan.options.maxChunkBytes,
     sortKeyColumn: output.plan.options.sortKeyColumn,
     planPath: output.planPath,
-    strategy: output.plan.strategy,
-    partitionCount: output.plan.partitions?.length,
-    totalBytes: output.plan.partitions
-      ? output.plan.partitions.reduce((sum, p) => sum + p.bytesOnDisk, 0)
-      : undefined,
+    strategy: output.plan.execution.mode,
+    partitionCount: output.plan.chunkPlan.partitions.length,
+    totalBytes: output.plan.chunkPlan.totalBytesCompressed,
   }
 }
 
diff --git a/packages/plugin-backfill/src/planner.test.ts b/packages/plugin-backfill/src/planner.test.ts
index be8f8cb..4d83103 100644
--- a/packages/plugin-backfill/src/planner.test.ts
+++ b/packages/plugin-backfill/src/planner.test.ts
@@ -1,39 +1,52 @@
 import { describe, expect, test } from 'bun:test'
-import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'
-import { join, resolve } from 'node:path'
+import { mkdir, mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'
+import { dirname, join, resolve } from 'node:path'
 import { tmpdir } from 'node:os'
 
 import { resolveConfig } from '@chkit/core'
 
+import { buildChunkExecutionSql, rewriteSelectColumns } from './chunking/sql.js'
+import { generateIdempotencyToken } from './chunking/utils/ids.js'
 import { PlanSchema } from './options.js'
 import { buildBackfillPlan } from './planner.js'
-import { injectSortKeyFilter, rewriteSelectColumns } from './chunking/sql.js'
-import { computeBackfillStateDir, computeEnvironmentFingerprint } from './state.js'
+import { backfillPaths, computeBackfillStateDir, readPlan } from './state.js'
 
 function createMockQuery(opts: {
-  partitions?: Array<{ partition_id: string; total_rows: string; total_bytes: string; min_time: string; max_time: string }>
+  partitions?: Array<{
+    partition_id: string
+    total_rows: string
+    total_bytes: string
+    total_uncompressed_bytes?: string
+    min_time: string
+    max_time: string
+  }>
   sortingKey?: string
-  sortKeyType?: string
-  sortKeyRanges?: Array<{ partition_id: string; min_val: string; max_val: string }>
+  columnRows?: Array<{ name: string; type: string }>
 } = {}): <T>(sql: string) => Promise<T[]> {
   const partitions = opts.partitions ?? [
-    { partition_id: '202601', total_rows: '1000', total_bytes: '500000', min_time: '2026-01-01 00:00:00', max_time: '2026-01-01 18:00:00' },
+    {
+      partition_id: '202601',
+      total_rows: '1000',
+      total_bytes: '500000',
+      total_uncompressed_bytes: '1000000',
+      min_time: '2026-01-01 00:00:00',
+      max_time: '2026-01-01 18:00:00',
+    },
   ]
   const sortingKey = opts.sortingKey ?? 'event_time'
-  const sortKeyType = opts.sortKeyType ?? 'DateTime'
-  const sortKeyRanges = opts.sortKeyRanges ?? []
+  const columnRows = opts.columnRows ?? [{ name: 'event_time', type: 'DateTime' }]
 
   return async <T>(sql: string) => {
-    if (sql.includes('system.parts')) return partitions as T[]
-    if (sql.includes('system.tables')) return [{ sorting_key: sortingKey }] as T[]
-    if (sql.includes('system.columns')) return [{ type: sortKeyType }] as T[]
-    if (sql.includes('min(') && sql.includes('max(')) return sortKeyRanges as T[]
+    if (sql.includes('SELECT 1 FROM')) return [{ ok: 1 }] as T[]
+    if (sql.includes('FROM system.parts')) return partitions as T[]
+    if (sql.includes('FROM system.tables')) return [{ sorting_key: sortingKey }] as T[]
+    if (sql.includes('FROM system.columns')) return columnRows as T[]
     return [] as T[]
   }
 }
 
 describe('@chkit/plugin-backfill planning', () => {
-  test('each plan gets a unique random id', async () => {
+  test('each plan gets a unique random id and canonical chunk plan', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
 
@@ -42,12 +55,37 @@ describe('@chkit/plugin-backfill planning', () => {
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
-      const opts = PlanSchema.parse({ target: 'app.events', from: '2026-01-01T00:00:00.000Z', to: '2026-01-01T18:00:00.000Z' })
+      const opts = PlanSchema.parse({
+        target: 'app.events',
+        from: '2026-01-01T00:00:00.000Z',
+        to: '2026-01-01T18:00:00.000Z',
+      })
       const mockQuery = createMockQuery({
         partitions: [
-          { partition_id: '202601a', total_rows: '500', total_bytes: '250000', min_time: '2026-01-01 00:00:00', max_time: '2026-01-01 06:00:00' },
-          { partition_id: '202601b', total_rows: '500', total_bytes: '250000', min_time: '2026-01-01 06:00:00', max_time: '2026-01-01 12:00:00' },
-          { partition_id: '202601c', total_rows: '500', total_bytes: '250000', min_time: '2026-01-01 12:00:00', max_time: '2026-01-01 18:00:00' },
+          {
+            partition_id: '202601a',
+            total_rows: '500',
+            total_bytes: '250000',
+            total_uncompressed_bytes: '500000',
+            min_time: '2026-01-01 00:00:00',
+            max_time: '2026-01-01 06:00:00',
+          },
+          {
+            partition_id: '202601b',
+            total_rows: '500',
+            total_bytes: '250000',
+            total_uncompressed_bytes: '500000',
+            min_time: '2026-01-01 06:00:00',
+            max_time: '2026-01-01 12:00:00',
+          },
+          {
+            partition_id: '202601c',
+            total_rows: '500',
+            total_bytes: '250000',
+            total_uncompressed_bytes: '500000',
+            min_time: '2026-01-01 12:00:00',
+            max_time: '2026-01-01 18:00:00',
+          },
         ],
       })
 
@@ -56,12 +94,24 @@ describe('@chkit/plugin-backfill planning', () => {
 
       expect(first.plan.planId).not.toBe(second.plan.planId)
       expect(first.plan.planId).toMatch(/^[a-f0-9]{16}$/)
-      expect(first.plan.chunks).toHaveLength(3)
-
-      const chunk = first.plan.chunks[0]
-      expect(chunk?.idempotencyToken.length).toBe(64)
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.events')
-      expect(chunk?.sqlTemplate).toContain(`insert_deduplication_token='${chunk?.idempotencyToken}'`)
+      expect(first.plan.chunkPlan.chunks).toHaveLength(3)
+
+      const chunk = first.plan.chunkPlan.chunks[0]
+      const token = chunk ? generateIdempotencyToken(first.plan.planId, chunk.id) : ''
+      const sql = chunk
+        ? buildChunkExecutionSql({
+          planId: first.plan.planId,
+          chunk,
+          target: first.plan.target,
+          sourceTarget: first.plan.execution.sourceTarget,
+          table: first.plan.chunkPlan.table,
+          idempotencyToken: token,
+        })
+        : ''
+
+      expect(token).toHaveLength(64)
+      expect(sql).toContain('INSERT INTO app.events')
+      expect(sql).toContain(`insert_deduplication_token='${token}'`)
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
@@ -76,22 +126,13 @@ describe('@chkit/plugin-backfill planning', () => {
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
-      const opts = PlanSchema.parse({ target: 'app.events', from: '2026-01-01T00:00:00.000Z', to: '2026-01-01T07:00:00.000Z' })
-      const mockQuery = createMockQuery({
-        partitions: [
-          { partition_id: '202601a', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 00:00:00', max_time: '2026-01-01 02:00:00' },
-          { partition_id: '202601b', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 02:00:00', max_time: '2026-01-01 04:00:00' },
-          { partition_id: '202601c', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 04:00:00', max_time: '2026-01-01 06:00:00' },
-          { partition_id: '202601d', total_rows: '250', total_bytes: '125000', min_time: '2026-01-01 06:00:00', max_time: '2026-01-01 07:00:00' },
-        ],
-      })
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
+      const opts = PlanSchema.parse({ target: 'app.events' })
+      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
 
       const raw = await readFile(output.planPath, 'utf8')
-      const persisted = JSON.parse(raw) as { planId: string; chunks: Array<{ id: string }> }
+      const persisted = JSON.parse(raw) as { planId: string; chunkPlan: { chunks: Array<{ id: string }> } }
       expect(persisted.planId).toBe(output.plan.planId)
-      expect(persisted.chunks.length).toBe(4)
+      expect(persisted.chunkPlan.chunks.length).toBe(1)
       expect(output.planPath).toContain('/plans/')
     } finally {
       await rm(dir, { recursive: true, force: true })
@@ -108,45 +149,24 @@ describe('@chkit/plugin-backfill planning', () => {
         metaDir: './chkit/meta',
       })
       const opts = PlanSchema.parse({ target: 'app.events' })
-      const mockQuery = createMockQuery({
-        sortingKey: 'session_date',
-        sortKeyType: 'Date',
+      const output = await buildBackfillPlan({
+        opts,
+        configPath,
+        config,
+        clickhouseQuery: createMockQuery({
+          sortingKey: 'session_date',
+          columnRows: [{ name: 'session_date', type: 'Date' }],
+        }),
       })
 
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.sortKey?.column).toBe('session_date')
-      expect(output.plan.sortKey?.category).toBe('datetime')
+      expect(output.plan.chunkPlan.table.sortKeys[0]?.name).toBe('session_date')
+      expect(output.plan.chunkPlan.table.sortKeys[0]?.category).toBe('datetime')
       expect(output.plan.options.sortKeyColumn).toBe('session_date')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
   })
 
-  test('chunk IDs are deterministic within a plan (derived from planId)', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
-
-      const chunkIds = output.plan.chunks.map(c => c.id)
-      const uniqueIds = new Set(chunkIds)
-      expect(uniqueIds.size).toBe(chunkIds.length)
-      for (const id of chunkIds) {
-        expect(id).toMatch(/^[a-f0-9]{16}$/)
-      }
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
   test('computes state dir from config by default and plugin override', () => {
     const config = resolveConfig({
       schema: './schema.ts',
@@ -161,7 +181,7 @@ describe('@chkit/plugin-backfill planning', () => {
     expect(overriddenDir).toBe(resolve('/tmp/project/custom-state'))
   })
 
-  test('generates MV replay SQL when schema contains materialized view', async () => {
+  test('generates MV replay execution metadata and SQL when schema contains materialized view', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
     const schemaPath = join(dir, 'schema.ts')
@@ -196,348 +216,56 @@ export const events_mv = {
         metaDir: './chkit/meta',
       })
       const opts = PlanSchema.parse({ target: 'app.events_agg' })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.strategy).toBe('mv_replay')
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.events_agg')
-      expect(chunk?.sqlTemplate).not.toContain('WITH _backfill_source AS (')
-      expect(chunk?.sqlTemplate).toContain('SELECT toStartOfHour(event_time)')
-      expect(chunk?.sqlTemplate).toContain('FROM app.events')
-      expect(chunk?.sqlTemplate).toContain('GROUP BY event_time')
-      expect(chunk?.sqlTemplate).toContain('SETTINGS async_insert=0')
-      expect(chunk?.sqlTemplate).toContain(`insert_deduplication_token='${chunk?.idempotencyToken}'`)
-      expect(chunk?.sqlTemplate).not.toContain('FROM app.events_agg')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('MV replay rewrites SELECT columns to match target table order', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-    const schemaPath = join(dir, 'schema.ts')
-
-    try {
-      await writeFile(
-        schemaPath,
-        `export const sessions = {
-  kind: 'table',
-  database: 'app',
-  name: 'session_analytics',
-  columns: [
-    { name: 'session_date', type: 'Date' },
-    { name: 'session_id', type: 'String' },
-    { name: 'skills', type: 'Array(String)' },
-    { name: 'slash_commands', type: 'Array(String)' },
-    { name: 'ingested_at', type: 'DateTime' },
-  ],
-  engine: 'MergeTree',
-  primaryKey: ['session_date'],
-  orderBy: ['session_date', 'session_id'],
-}
-export const sessions_mv = {
-  kind: 'materialized_view',
-  database: 'app',
-  name: 'sessions_mv',
-  to: { database: 'app', name: 'session_analytics' },
-  as: "SELECT *, extractAll(content, 'skill') AS skills, extractAll(content, 'cmd') AS slash_commands FROM app.raw_sessions",
-}
-`
-      )
-
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.session_analytics' })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.strategy).toBe('mv_replay')
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.session_analytics')
-      expect(chunk?.sqlTemplate).not.toContain('INSERT INTO app.session_analytics (')
-      expect(chunk?.sqlTemplate).toContain(
-        "SELECT session_date, session_id, extractAll(content, 'skill') AS skills, extractAll(content, 'cmd') AS slash_commands, ingested_at"
-      )
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('omits insert_deduplication_token when requireIdempotencyToken is false', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events', requireIdempotencyToken: false })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.idempotencyToken).toBe('')
-      expect(chunk?.sqlTemplate).toContain('SETTINGS async_insert=0')
-      expect(chunk?.sqlTemplate).not.toContain('insert_deduplication_token')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('uses partition strategy when no MV is found', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-      const mockQuery = createMockQuery()
-
-      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-
-      expect(output.plan.strategy).toBe('partition')
-
-      const chunk = output.plan.chunks[0]
-      expect(chunk?.sqlTemplate).toContain('INSERT INTO app.events')
-      expect(chunk?.sqlTemplate).toContain('FROM app.events')
-      expect(chunk?.sqlTemplate).toContain('_partition_id')
-      expect(chunk?.sqlTemplate).toContain('SETTINGS async_insert=0')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('throws when no partitions found', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-      const mockQuery = createMockQuery({ partitions: [] })
+      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
 
-      await expect(
-        buildBackfillPlan({ opts, configPath, config, clickhouseQuery: mockQuery })
-      ).rejects.toThrow('No partitions found')
+      expect(output.plan.execution.mode).toBe('mv_replay')
+
+      const chunk = output.plan.chunkPlan.chunks[0]
+      const sql = chunk
+        ? buildChunkExecutionSql({
+          planId: output.plan.planId,
+          chunk,
+          target: output.plan.target,
+          sourceTarget: output.plan.execution.sourceTarget,
+          table: output.plan.chunkPlan.table,
+          mvAsQuery: output.plan.execution.mvAsQuery,
+          targetColumns: output.plan.execution.targetColumns,
+          idempotencyToken: generateIdempotencyToken(output.plan.planId, chunk.id),
+        })
+        : ''
+
+      expect(sql).toContain('INSERT INTO app.events_agg')
+      expect(sql).toContain('SELECT toStartOfHour(event_time)')
+      expect(sql).toContain('FROM app.events')
+      expect(sql).toContain('GROUP BY event_time')
+      expect(sql).toContain('SETTINGS async_insert=0')
+      expect(sql).not.toContain('FROM app.events_agg')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
   })
-})
-
-describe('rewriteSelectColumns', () => {
-  test('reorders SELECT columns to match target table order', () => {
-    const query = 'SELECT *, _foo as bar, _baz as qux FROM source WHERE status = 1'
-    const result = rewriteSelectColumns(query, ['col_a', 'bar', 'col_b', 'qux'])
-
-    expect(result).toContain('SELECT col_a, _foo as bar, col_b, _baz as qux')
-    expect(result).toContain('FROM source')
-    expect(result).toContain('WHERE status = 1')
-  })
-
-  test('preserves WITH clause when rewriting SELECT', () => {
-    const query = [
-      'WITH',
-      "  arrayDistinct(extractAll(content, '\\w+')) AS _skills,",
-      "  toUInt64(JSONExtractFloat(meta, 'input')) AS _input_tokens",
-      'SELECT *, _skills as skills, _input_tokens as input_tokens',
-      'FROM app.sessions',
-      'WHERE length(content) > 0',
-    ].join('\n')
-
-    const result = rewriteSelectColumns(query, ['session_id', 'skills', 'content', 'input_tokens'])
-
-    expect(result).toContain('arrayDistinct')
-    expect(result).toContain('_input_tokens')
-    expect(result).toContain('SELECT session_id, _skills as skills, content, _input_tokens as input_tokens')
-    expect(result).toContain('FROM app.sessions')
-    expect(result).toContain('WHERE length(content) > 0')
-  })
 
-  test('handles SELECT without star expansion', () => {
-    const query = 'SELECT toStartOfHour(event_time) AS event_time, count() AS cnt FROM events GROUP BY event_time'
-    const result = rewriteSelectColumns(query, ['cnt', 'event_time'])
+  test('MV replay rewrites SELECT columns to match target table order', () => {
+    const rewritten = rewriteSelectColumns(
+      "SELECT *, extractAll(content, 'skill') AS skills, extractAll(content, 'cmd') AS slash_commands FROM app.raw_sessions",
+      ['session_date', 'session_id', 'skills', 'slash_commands', 'ingested_at']
+    )
 
-    expect(result).toContain('SELECT count() AS cnt, toStartOfHour(event_time) AS event_time')
-    expect(result).toContain('FROM events')
-    expect(result).toContain('GROUP BY event_time')
+    expect(rewritten).toContain('SELECT session_date, session_id, extractAll(content, \'skill\') AS skills, extractAll(content, \'cmd\') AS slash_commands, ingested_at')
+    expect(rewritten).toContain('FROM app.raw_sessions')
   })
 
-  test('returns query unchanged when SELECT/FROM cannot be found', () => {
-    const query = 'INSERT INTO t VALUES (1, 2)'
-    const result = rewriteSelectColumns(query, ['a', 'b'])
+  test('MV replay preserves DISTINCT when rewriting projection columns', () => {
+    const rewritten = rewriteSelectColumns(
+      'SELECT DISTINCT event_time AS ts, user_id AS uid FROM app.events',
+      ['uid', 'ts']
+    )
 
-    expect(result).toBe(query)
+    expect(rewritten).toContain('SELECT DISTINCT user_id AS uid, event_time AS ts')
+    expect(rewritten).toContain('FROM app.events')
   })
-})
-
-describe('injectSortKeyFilter', () => {
-  const from = '2025-01-01T00:00:00.000Z'
-  const to = '2025-01-01T06:00:00.000Z'
-
-  test('injects WHERE before GROUP BY for datetime filter', () => {
-    const query = 'SELECT toStartOfHour(event_time) AS event_time, count() AS count FROM app.events GROUP BY event_time'
-    const result = injectSortKeyFilter(query, 'event_time', 'datetime', from, to)
-
-    expect(result).toContain("WHERE event_time >= parseDateTimeBestEffort('2025-01-01T00:00:00.000Z')")
-    expect(result).toContain("AND event_time < parseDateTimeBestEffort('2025-01-01T06:00:00.000Z')")
-    expect(result).toContain('GROUP BY event_time')
-    expect(result.indexOf('WHERE')).toBeLessThan(result.indexOf('GROUP BY'))
-  })
-
-  test('appends AND to existing WHERE clause', () => {
-    const query = 'SELECT * FROM app.events WHERE status = 1'
-    const result = injectSortKeyFilter(query, 'event_time', 'datetime', from, to)
-
-    expect(result).toContain('WHERE status = 1')
-    expect(result).toContain("AND event_time >= parseDateTimeBestEffort('")
-    expect(result).toContain("AND event_time < parseDateTimeBestEffort('")
-    expect(result.match(/WHERE/g)?.length).toBe(1)
-  })
-
-  test('numeric sort key uses direct comparison', () => {
-    const query = 'SELECT * FROM app.events WHERE status = 1'
-    const result = injectSortKeyFilter(query, 'id', 'numeric', '100', '200')
-
-    expect(result).toContain("AND id >= '100'")
-    expect(result).toContain("AND id < '200'")
-    expect(result).not.toContain('parseDateTimeBestEffort')
-  })
-
-  test('handles query with WHERE and QUALIFY', () => {
-    const query = [
-      'SELECT *, skills',
-      'FROM app.sessions AS s',
-      'WHERE length(timestamps) > 0',
-      "QUALIFY ROW_NUMBER() OVER (PARTITION BY s.id ORDER BY s.ts DESC) = 1",
-    ].join('\n')
-    const result = injectSortKeyFilter(query, 'session_date', 'datetime', from, to)
-
-    expect(result).toContain('WHERE length(timestamps) > 0')
-    expect(result).toContain("AND session_date >= parseDateTimeBestEffort('")
-    expect(result.indexOf('AND session_date')).toBeLessThan(result.indexOf('QUALIFY'))
-  })
-
-  test('handles MV query with WITH column expressions', () => {
-    const query = [
-      'WITH',
-      "  arrayDistinct(arrayFilter(x -> x != '', extractAll(content, '\\\\w+'))) AS _skills",
-      'SELECT',
-      '  id,',
-      '  _skills as skills,',
-      '  ts',
-      'FROM app.sessions',
-      'WHERE length(content) > 0',
-    ].join('\n')
-    const result = injectSortKeyFilter(query, 'ts', 'datetime', from, to)
-
-    expect(result.match(/WHERE/g)?.length).toBe(1)
-    expect(result).toContain("AND ts >= parseDateTimeBestEffort('")
-    expect(result).toContain('arrayDistinct')
-  })
-
-  test('injects WHERE at end when query has no WHERE and no trailing clauses', () => {
-    const query = 'SELECT * FROM app.events'
-    const result = injectSortKeyFilter(query, 'event_time', 'datetime', from, to)
-
-    expect(result).toContain("WHERE event_time >= parseDateTimeBestEffort('")
-    expect(result).toContain("AND event_time < parseDateTimeBestEffort('")
-  })
-
-  test('ignores WHERE inside parenthesized subquery', () => {
-    const query = 'SELECT * FROM (SELECT * FROM app.events WHERE inner = 1) AS sub GROUP BY id'
-    const result = injectSortKeyFilter(query, 'ts', 'datetime', from, to)
-
-    expect(result).toContain("WHERE ts >= parseDateTimeBestEffort('")
-    expect(result.indexOf("WHERE ts")).toBeLessThan(result.indexOf('GROUP BY'))
-    expect(result).toContain('WHERE inner = 1')
-  })
-})
-
-describe('computeEnvironmentFingerprint', () => {
-  test('returns undefined when clickhouse is undefined', () => {
-    expect(computeEnvironmentFingerprint(undefined)).toBeUndefined()
-  })
-
-  test('returns correct structure with fingerprint, url origin, and database', () => {
-    const env = computeEnvironmentFingerprint({
-      url: 'https://my-cluster.clickhouse.cloud:8443/some/path',
-      database: 'analytics',
-    })
 
-    expect(env).toBeDefined()
-    expect(env?.fingerprint).toMatch(/^[a-f0-9]{16}$/)
-    expect(env?.url).toBe('https://my-cluster.clickhouse.cloud:8443')
-    expect(env?.database).toBe('analytics')
-  })
-
-  test('same URL+database produces same fingerprint', () => {
-    const a = computeEnvironmentFingerprint({ url: 'https://host:8443/path', database: 'db1' })
-    const b = computeEnvironmentFingerprint({ url: 'https://host:8443/other', database: 'db1' })
-
-    expect(a?.fingerprint).toBe(b?.fingerprint)
-  })
-
-  test('different database produces different fingerprint', () => {
-    const a = computeEnvironmentFingerprint({ url: 'https://host:8443', database: 'staging' })
-    const b = computeEnvironmentFingerprint({ url: 'https://host:8443', database: 'production' })
-
-    expect(a?.fingerprint).not.toBe(b?.fingerprint)
-  })
-
-  test('different host produces different fingerprint', () => {
-    const a = computeEnvironmentFingerprint({ url: 'https://staging.ch.cloud:8443', database: 'db' })
-    const b = computeEnvironmentFingerprint({ url: 'https://prod.ch.cloud:8443', database: 'db' })
-
-    expect(a?.fingerprint).not.toBe(b?.fingerprint)
-  })
-})
-
-describe('environment binding in plan', () => {
-  test('plan includes environment when clickhouse is provided', async () => {
-    const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
-    const configPath = join(dir, 'clickhouse.config.ts')
-
-    try {
-      const config = resolveConfig({
-        schema: './schema.ts',
-        metaDir: './chkit/meta',
-      })
-
-      const output = await buildBackfillPlan({
-        opts: PlanSchema.parse({ target: 'app.events' }),
-        configPath,
-        config,
-        clickhouse: { url: 'https://my-cluster.ch.cloud:8443', database: 'analytics' },
-        clickhouseQuery: createMockQuery(),
-      })
-
-      expect(output.plan.environment).toBeDefined()
-      expect(output.plan.environment?.fingerprint).toMatch(/^[a-f0-9]{16}$/)
-      expect(output.plan.environment?.url).toBe('https://my-cluster.ch.cloud:8443')
-      expect(output.plan.environment?.database).toBe('analytics')
-    } finally {
-      await rm(dir, { recursive: true, force: true })
-    }
-  })
-
-  test('plan omits environment when clickhouse connection info is not provided', async () => {
+  test('omits idempotency token when disabled', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
 
@@ -546,50 +274,57 @@ describe('environment binding in plan', () => {
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
+      const opts = PlanSchema.parse({ target: 'app.events', requireIdempotencyToken: false })
+      const output = await buildBackfillPlan({ opts, configPath, config, clickhouseQuery: createMockQuery() })
 
-      const output = await buildBackfillPlan({
-        opts: PlanSchema.parse({ target: 'app.events' }),
-        configPath,
-        config,
-        clickhouseQuery: createMockQuery(),
-      })
-
-      expect(output.plan.environment).toBeUndefined()
+      const chunk = output.plan.chunkPlan.chunks[0]
+      const sql = chunk
+        ? buildChunkExecutionSql({
+          planId: output.plan.planId,
+          chunk,
+          target: output.plan.target,
+          sourceTarget: output.plan.execution.sourceTarget,
+          table: output.plan.chunkPlan.table,
+          idempotencyToken: '',
+        })
+        : ''
+
+      expect(output.plan.execution.requireIdempotencyToken).toBe(false)
+      expect(sql).toContain('SETTINGS async_insert=0')
+      expect(sql).not.toContain('insert_deduplication_token')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
   })
 
-  test('plan includes environment from different clickhouse configs', async () => {
+  test('rejects persisted legacy plans with an actionable error', async () => {
     const dir = await mkdtemp(join(tmpdir(), 'chkit-backfill-plugin-'))
     const configPath = join(dir, 'clickhouse.config.ts')
+    const planId = 'deadbeefdeadbeef'
 
     try {
       const config = resolveConfig({
         schema: './schema.ts',
         metaDir: './chkit/meta',
       })
-      const opts = PlanSchema.parse({ target: 'app.events' })
-
-      const staging = await buildBackfillPlan({
-        opts,
-        configPath,
-        config,
-        clickhouse: { url: 'https://staging.ch.cloud:8443', database: 'analytics' },
-        clickhouseQuery: createMockQuery(),
-      })
-
-      const production = await buildBackfillPlan({
-        opts,
+      const stateDir = computeBackfillStateDir(config, configPath)
+      const { planPath } = backfillPaths(stateDir, planId)
+      await mkdir(dirname(planPath), { recursive: true })
+
+      await writeFile(planPath, JSON.stringify({
+        planId,
+        target: 'app.events',
+        createdAt: '2026-01-01T00:00:00.000Z',
+        from: '2026-01-01T00:00:00.000Z',
+        to: '2026-01-01T01:00:00.000Z',
+        chunks: [],
+      }))
+
+      await expect(readPlan({
+        planId,
         configPath,
         config,
-        clickhouse: { url: 'https://prod.ch.cloud:8443', database: 'analytics' },
-        clickhouseQuery: createMockQuery(),
-      })
-
-      expect(staging.plan.environment?.url).toBe('https://staging.ch.cloud:8443')
-      expect(production.plan.environment?.url).toBe('https://prod.ch.cloud:8443')
-      expect(staging.plan.environment?.fingerprint).not.toBe(production.plan.environment?.fingerprint)
+      })).rejects.toThrow('uses a previous chunking format')
     } finally {
       await rm(dir, { recursive: true, force: true })
     }
diff --git a/packages/plugin-backfill/src/planner.ts b/packages/plugin-backfill/src/planner.ts
index 9f586c6..eb5029c 100644
--- a/packages/plugin-backfill/src/planner.ts
+++ b/packages/plugin-backfill/src/planner.ts
@@ -1,10 +1,10 @@
 import { dirname } from 'node:path'
 
-import { loadSchemaDefinitions } from '@chkit/core/schema-loader'
 import type { ResolvedChxConfig } from '@chkit/core'
+import { loadSchemaDefinitions } from '@chkit/core/schema-loader'
 
-import { analyzeAndChunk } from './chunking/analyze.js'
-import { buildChunkSql } from './chunking/sql.js'
+import { encodeChunkPlanForPersistence } from './chunking/boundary-codec.js'
+import { generateChunkPlan } from './chunking/planner.js'
 import { findMvForTarget } from './detect.js'
 import { BackfillConfigError } from './errors.js'
 import type { PlanOptions } from './options.js'
@@ -12,13 +12,10 @@ import {
   backfillPaths,
   computeBackfillStateDir,
   computeEnvironmentFingerprint,
+  nowIso,
   writeJson,
 } from './state.js'
-import type {
-  BackfillChunk,
-  BuildBackfillPlanOutput,
-  PartitionInfo,
-} from './types.js'
+import type { BuildBackfillPlanOutput } from './types.js'
 
 export async function buildBackfillPlan(input: {
   opts: PlanOptions
@@ -33,40 +30,35 @@ export async function buildBackfillPlan(input: {
     throw new BackfillConfigError('Invalid target format. Expected <database.table>.')
   }
 
-  const env = computeEnvironmentFingerprint(input.clickhouse)
-
-  // 1. Analyze table and build planned chunks
-  const {
-    planId,
-    partitions,
-    sortKey,
-    sortKeys,
-    chunks: plannedChunks,
-    partitionDiagnostics,
-  } = await analyzeAndChunk({
+  const chunkPlan = await generateChunkPlan({
     database,
     table,
     from: opts.from,
     to: opts.to,
-    maxChunkBytes: opts.maxChunkBytes,
-    requireIdempotencyToken: opts.requireIdempotencyToken,
+    targetChunkBytes: opts.maxChunkBytes,
     query: input.clickhouseQuery,
   })
 
-  if (partitions.length === 0) {
+  const firstPartition = chunkPlan.partitions[0]
+  if (!firstPartition) {
     throw new BackfillConfigError(
       `No partitions found for ${opts.target}${opts.from || opts.to ? ' within the specified time range' : ''}. The table may be empty.`
     )
   }
 
-  const firstPartition = partitions[0] as PartitionInfo
-  const derivedFrom = opts.from ?? partitions.reduce((min, p) => (p.minTime < min ? p.minTime : min), firstPartition.minTime)
-  const derivedTo = opts.to ?? partitions.reduce((max, p) => (p.maxTime > max ? p.maxTime : max), firstPartition.maxTime)
+  const env = computeEnvironmentFingerprint(input.clickhouse)
+  const derivedFrom = opts.from ?? chunkPlan.partitions.reduce(
+    (min, partition) => (partition.minTime < min ? partition.minTime : min),
+    firstPartition.minTime
+  )
+  const derivedTo = opts.to ?? chunkPlan.partitions.reduce(
+    (max, partition) => (partition.maxTime > max ? partition.maxTime : max),
+    firstPartition.maxTime
+  )
 
   const stateDir = computeBackfillStateDir(input.config, input.configPath, opts.stateDir)
-  const paths = backfillPaths(stateDir, planId)
+  const paths = backfillPaths(stateDir, chunkPlan.planId)
 
-  // 2. Detect MV for replay strategy
   let mvAsQuery: string | undefined
   let targetColumns: string[] | undefined
 
@@ -78,73 +70,37 @@ export async function buildBackfillPlan(input: {
     if (mv) {
       mvAsQuery = mv.as
       const tableDef = definitions.find(
-        (d) => d.kind === 'table' && d.database === database && d.name === table
+        (definition) => definition.kind === 'table' && definition.database === database && definition.name === table
       )
-      if (tableDef && tableDef.kind === 'table') {
-        targetColumns = tableDef.columns.map((c) => c.name)
+      if (tableDef?.kind === 'table') {
+        targetColumns = tableDef.columns.map((column) => column.name)
       }
     }
   } catch {
-    // Schema load failed — fall back to direct copy
+    // Schema load failed, fall back to direct copy.
   }
 
-  // 3. Stamp SQL on each planned chunk to produce BackfillChunk[]
-  const chunks: BackfillChunk[] = plannedChunks.map(planned => {
-    const sqlTemplate = buildChunkSql({
-      planId,
-      chunk: planned,
-      target: opts.target,
-      sortKey,
-      sortKeys,
-      mvAsQuery,
-      targetColumns,
-    })
-
-    return {
-      id: planned.id,
-      from: planned.from,
-      to: planned.to,
-      status: 'pending' as const,
-      attempts: 0,
-      idempotencyToken: planned.idempotencyToken,
-      sqlTemplate,
-      partitionId: planned.partitionId,
-      estimatedBytes: planned.estimatedBytes,
-      ...(planned.estimatedRows !== undefined ? { estimatedRows: planned.estimatedRows } : {}),
-      ...(planned.ranges ? { ranges: planned.ranges } : {}),
-      ...(planned.sortKeyFrom !== undefined ? { sortKeyFrom: planned.sortKeyFrom } : {}),
-      ...(planned.sortKeyTo !== undefined ? { sortKeyTo: planned.sortKeyTo } : {}),
-      ...(planned.isHotKey !== undefined ? { isHotKey: planned.isHotKey } : {}),
-      ...(planned.hotDimensionIndex !== undefined ? { hotDimensionIndex: planned.hotDimensionIndex } : {}),
-      ...(planned.hotKeyValue !== undefined ? { hotKeyValue: planned.hotKeyValue } : {}),
-      ...(planned.estimateConfidence !== undefined ? { estimateConfidence: planned.estimateConfidence } : {}),
-      ...(planned.estimateReason !== undefined ? { estimateReason: planned.estimateReason } : {}),
-      ...(planned.lineage ? { lineage: planned.lineage } : {}),
-    }
-  })
-
-  const strategy = mvAsQuery ? 'mv_replay' : 'partition'
-
   const plan = {
-    planId,
+    planId: chunkPlan.planId,
     target: opts.target,
-    createdAt: '1970-01-01T00:00:00.000Z',
-    status: 'planned' as const,
-    strategy: strategy as 'partition' | 'mv_replay',
+    createdAt: nowIso(),
     ...(env ? { environment: env } : {}),
     from: derivedFrom,
     to: derivedTo,
-    chunks,
-    partitions,
-    sortKey,
-    sortKeys,
-    partitionDiagnostics,
+    chunkPlan,
+    execution: {
+      mode: mvAsQuery ? 'mv_replay' as const : 'copy' as const,
+      sourceTarget: opts.target,
+      ...(mvAsQuery ? { mvAsQuery } : {}),
+      ...(targetColumns ? { targetColumns } : {}),
+      requireIdempotencyToken: opts.requireIdempotencyToken,
+    },
     options: {
       maxChunkBytes: opts.maxChunkBytes,
       maxParallelChunks: opts.maxParallelChunks,
       maxRetriesPerChunk: opts.maxRetriesPerChunk,
       requireIdempotencyToken: opts.requireIdempotencyToken,
-      sortKeyColumn: sortKey?.column,
+      sortKeyColumn: chunkPlan.table.sortKeys[0]?.name,
     },
     policy: {
       requireDryRunBeforeRun: opts.requireDryRunBeforeRun,
@@ -158,7 +114,10 @@ export async function buildBackfillPlan(input: {
     },
   }
 
-  await writeJson(paths.planPath, plan)
+  await writeJson(paths.planPath, {
+    ...plan,
+    chunkPlan: encodeChunkPlanForPersistence(plan.chunkPlan),
+  })
 
   return {
     plan,
diff --git a/packages/plugin-backfill/src/plugin.test.ts b/packages/plugin-backfill/src/plugin.test.ts
index 3e25054..a3b36e5 100644
--- a/packages/plugin-backfill/src/plugin.test.ts
+++ b/packages/plugin-backfill/src/plugin.test.ts
@@ -1,9 +1,16 @@
 import { describe, expect, test } from 'bun:test'
+import { readFileSync } from 'node:fs'
 
 import * as sdk from './sdk.js'
 import * as root from './index.js'
 import { backfill, createBackfillPlugin } from './plugin.js'
 
+const pluginBackfillPackage = JSON.parse(
+  readFileSync(new URL('../package.json', import.meta.url), 'utf8')
+) as {
+  exports: Record<string, { source: string; types: string; default: string }>
+}
+
 describe('@chkit/plugin-backfill plugin surface', () => {
   test('exposes commands and typed registration helper', () => {
     const plugin = createBackfillPlugin()
@@ -29,17 +36,21 @@ describe('@chkit/plugin-backfill plugin surface', () => {
     expect(root).not.toHaveProperty('executeBackfill')
 
     expect(sdk).toHaveProperty('analyzeAndChunk')
+    expect(sdk).toHaveProperty('generateChunkPlan')
     expect(sdk).toHaveProperty('executeBackfill')
-    expect(sdk).toHaveProperty('buildChunkSql')
+    expect(sdk).toHaveProperty('buildChunkExecutionSql')
   })
 
-  test('package exports resolve root and sdk subpath separately', async () => {
-    const packageRoot = await import('@chkit/plugin-backfill')
-    const packageSdk = await import('@chkit/plugin-backfill/sdk')
-
-    expect(packageRoot).toHaveProperty('backfill')
-    expect(packageRoot).not.toHaveProperty('analyzeAndChunk')
-    expect(packageSdk).toHaveProperty('analyzeAndChunk')
-    expect(packageSdk).toHaveProperty('executeBackfill')
+  test('package exports declare root and sdk subpath separately', () => {
+    expect(pluginBackfillPackage.exports['.']).toEqual({
+      source: './src/index.ts',
+      types: './dist/index.d.ts',
+      default: './dist/index.js',
+    })
+    expect(pluginBackfillPackage.exports['./sdk']).toEqual({
+      source: './src/sdk.ts',
+      types: './dist/sdk.d.ts',
+      default: './dist/sdk.js',
+    })
   })
 })
diff --git a/packages/plugin-backfill/src/plugin.ts b/packages/plugin-backfill/src/plugin.ts
index 53079d8..af5b64b 100644
--- a/packages/plugin-backfill/src/plugin.ts
+++ b/packages/plugin-backfill/src/plugin.ts
@@ -2,6 +2,8 @@ import { createClickHouseExecutor } from '@chkit/clickhouse'
 import { wrapPluginRun } from '@chkit/core'
 
 import { executeBackfill, type BackfillProgress } from './async-backfill.js'
+import { buildChunkExecutionSql } from './chunking/sql.js'
+import { generateIdempotencyToken } from './chunking/utils/ids.js'
 import { BackfillConfigError } from './errors.js'
 import {
   PLAN_FLAGS,
@@ -112,11 +114,22 @@ async function runBackfill(input: {
     const result = await executeBackfill({
       executor: db,
       planId: plan.planId,
-      chunks: plan.chunks.map((c) => ({ id: c.id, from: c.from, to: c.to })),
+      chunks: plan.chunkPlan.chunks.map((chunk) => ({ id: chunk.id })),
       buildQuery: (chunk) => {
-        const planChunk = plan.chunks.find((c) => c.id === chunk.id)
+        const planChunk = plan.chunkPlan.chunks.find((candidate) => candidate.id === chunk.id)
         if (!planChunk) throw new Error(`Chunk ${chunk.id} not found in plan`)
-        return planChunk.sqlTemplate
+        return buildChunkExecutionSql({
+          planId: plan.planId,
+          chunk: planChunk,
+          target: plan.target,
+          sourceTarget: plan.execution.sourceTarget,
+          table: plan.chunkPlan.table,
+          mvAsQuery: plan.execution.mvAsQuery,
+          targetColumns: plan.execution.targetColumns,
+          idempotencyToken: plan.execution.requireIdempotencyToken
+            ? generateIdempotencyToken(plan.planId, planChunk.id)
+            : '',
+        })
       },
       concurrency: input.concurrency,
       pollIntervalMs: input.pollIntervalMs,
@@ -215,12 +228,11 @@ export function createBackfillPlugin(options: PluginConfig = {}): BackfillPlugin
                 if (context.jsonMode) {
                   context.print(payload)
                 } else {
-                  const partitionCount = output.plan.partitions?.length ?? 0
-                  const totalBytes = output.plan.partitions
-                    ? formatBytes(output.plan.partitions.reduce((sum, p) => sum + p.bytesOnDisk, 0))
-                    : 'unknown'
-                  const sortKeyLabel = output.plan.sortKey
-                    ? `, sort key: ${output.plan.sortKey.column} (${output.plan.sortKey.category})`
+                  const partitionCount = output.plan.chunkPlan.partitions.length
+                  const totalBytes = formatBytes(output.plan.chunkPlan.totalBytesCompressed)
+                  const primarySortKey = output.plan.chunkPlan.table.sortKeys[0]
+                  const sortKeyLabel = primarySortKey
+                    ? `, sort key: ${primarySortKey.name} (${primarySortKey.category})`
                     : ''
                   context.print(
                     `Backfill plan ${payload.planId} for ${payload.target} (${payload.chunkCount} chunks across ${partitionCount} partitions, ~${totalBytes}${sortKeyLabel}) -> ${payload.planPath}`
diff --git a/packages/plugin-backfill/src/queries.ts b/packages/plugin-backfill/src/queries.ts
index 66780ef..ffdef75 100644
--- a/packages/plugin-backfill/src/queries.ts
+++ b/packages/plugin-backfill/src/queries.ts
@@ -35,8 +35,8 @@ export async function getBackfillStatus(input: {
       target: plan.target,
       status: 'planned',
       totals: {
-        total: plan.chunks.length,
-        pending: plan.chunks.length,
+        total: plan.chunkPlan.chunks.length,
+        pending: plan.chunkPlan.chunks.length,
         submitted: 0,
         running: 0,
         done: 0,
@@ -108,7 +108,7 @@ export async function getBackfillDoctorReport(input: {
         planId: plan.planId,
         target: plan.target,
         status: 'planned' as const,
-        totals: { total: plan.chunks.length, pending: plan.chunks.length, submitted: 0, running: 0, done: 0, failed: 0 },
+        totals: { total: plan.chunkPlan.chunks.length, pending: plan.chunkPlan.chunks.length, submitted: 0, running: 0, done: 0, failed: 0 },
         rowsWritten: 0,
         updatedAt: plan.createdAt,
         runPath: paths.runPath,
diff --git a/packages/plugin-backfill/src/sdk.ts b/packages/plugin-backfill/src/sdk.ts
index 0570001..9edf9df 100644
--- a/packages/plugin-backfill/src/sdk.ts
+++ b/packages/plugin-backfill/src/sdk.ts
@@ -1,6 +1,7 @@
 export { executeBackfill, syncProgress } from './async-backfill.js'
-export { analyzeAndChunk, analyzeTable, buildPlannedChunks } from './chunking/analyze.js'
-export { buildChunkSql, injectSortKeyFilter, rewriteSelectColumns } from './chunking/sql.js'
+export { analyzeAndChunk, analyzeTable } from './chunking/analyze.js'
+export { generateChunkPlan } from './chunking/planner.js'
+export { buildChunkExecutionSql, injectSortKeyFilter, rewriteSelectColumns } from './chunking/sql.js'
 
 export type {
   BackfillOptions,
@@ -17,13 +18,14 @@ export type {
 } from './chunking/analyze.js'
 
 export type {
-  ChunkBoundary,
+  Chunk,
+  ChunkDerivationStep,
+  ChunkPlan,
+  ChunkRange,
   EstimateConfidence,
   EstimateReason,
+  FocusedValue,
+  Partition,
   PartitionDiagnostics,
-  PartitionInfo,
-  PlannedChunk,
-  SliceLineageStep,
-  SliceRange,
-  SortKeyInfo,
+  SortKey,
 } from './chunking/types.js'
diff --git a/packages/plugin-backfill/src/state.ts b/packages/plugin-backfill/src/state.ts
index 45dd900..2b47d52 100644
--- a/packages/plugin-backfill/src/state.ts
+++ b/packages/plugin-backfill/src/state.ts
@@ -5,6 +5,7 @@ import { dirname, join, resolve } from 'node:path'
 
 import type { ResolvedChxConfig } from '@chkit/core'
 
+import { decodeChunkPlanFromPersistence } from './chunking/boundary-codec.js'
 import { BackfillConfigError } from './errors.js'
 import type {
   BackfillEnvironment,
@@ -89,6 +90,13 @@ async function readJsonMaybe<T>(filePath: string): Promise<T | null> {
   return JSON.parse(await readFile(filePath, 'utf8')) as T
 }
 
+function decodePlan(plan: BackfillPlanState): BackfillPlanState {
+  return {
+    ...plan,
+    chunkPlan: decodeChunkPlanFromPersistence(plan.chunkPlan),
+  }
+}
+
 export async function writeJson(filePath: string, value: unknown): Promise<void> {
   await mkdir(dirname(filePath), { recursive: true })
   await writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`, 'utf8')
@@ -102,12 +110,21 @@ export async function readPlan(input: {
 }): Promise<ReadPlanOutput> {
   const stateDir = computeBackfillStateDir(input.config, input.configPath, input.stateDir)
   const paths = backfillPaths(stateDir, input.planId)
-  const plan = await readJsonMaybe<BackfillPlanState>(paths.planPath)
-  if (!plan) {
+  const rawPlan = await readJsonMaybe<Record<string, unknown>>(paths.planPath)
+  if (!rawPlan) {
     throw new BackfillConfigError(`Backfill plan not found: ${paths.planPath}`)
   }
+
+  if (!('chunkPlan' in rawPlan)) {
+    throw new BackfillConfigError(
+      `Backfill plan ${input.planId} uses a previous chunking format and can no longer be loaded. Recreate the plan.`
+    )
+  }
+
+  const plan = rawPlan as unknown as BackfillPlanState
+
   return {
-    plan,
+    plan: decodePlan(plan),
     planPath: paths.planPath,
     stateDir,
   }
@@ -132,7 +149,7 @@ export function summarizeRunStatus(
   plan: BackfillPlanState,
 ): BackfillStatusSummary {
   const totals = {
-    total: plan.chunks.length,
+    total: plan.chunkPlan.chunks.length,
     pending: 0,
     submitted: 0,
     running: 0,
@@ -141,7 +158,7 @@ export function summarizeRunStatus(
   }
 
   let rowsWritten = 0
-  for (const chunk of plan.chunks) {
+  for (const chunk of plan.chunkPlan.chunks) {
     const state = run.progress[chunk.id]
     if (!state) {
       totals.pending += 1
diff --git a/packages/plugin-backfill/src/types.ts b/packages/plugin-backfill/src/types.ts
index cf812ea..4f02865 100644
--- a/packages/plugin-backfill/src/types.ts
+++ b/packages/plugin-backfill/src/types.ts
@@ -2,13 +2,7 @@ import type { ChxInlinePluginRegistration, ResolvedChxConfig } from '@chkit/core
 
 import type { BackfillProgress } from './async-backfill.js'
 import type {
-  PartitionDiagnostics,
-  PartitionInfo,
-  SliceLineageStep,
-  SliceRange,
-  SortKeyInfo,
-  EstimateConfidence,
-  EstimateReason,
+  ChunkPlan,
 } from './chunking/types.js'
 import type { PluginConfig } from './options.js'
 
@@ -24,52 +18,41 @@ export interface BackfillEnvironment {
 
 export type BackfillPlanStatus = 'planned' | 'running' | 'paused' | 'completed' | 'failed' | 'cancelled'
 
-export type { ChunkBoundary, PartitionInfo, PlannedChunk, SortKeyInfo } from './chunking/types.js'
+export type {
+  Chunk,
+  ChunkDerivationStep,
+  ChunkPlan,
+  ChunkRange,
+  EstimateConfidence,
+  EstimateReason,
+  FocusedValue,
+  Partition,
+  PartitionDiagnostics,
+  SortKey,
+} from './chunking/types.js'
 
-export interface BackfillChunk {
-  id: string
-  from: string
-  to: string
-  status: 'pending' | 'running' | 'done' | 'failed' | 'skipped'
-  attempts: number
-  idempotencyToken: string
-  sqlTemplate: string
-  lastError?: string
-  partitionId: string
-  estimatedBytes: number
-  estimatedRows?: number
-  ranges?: SliceRange[]
-  sortKeyFrom?: string
-  sortKeyTo?: string
-  isHotKey?: boolean
-  hotDimensionIndex?: number
-  hotKeyValue?: string
-  estimateConfidence?: EstimateConfidence
-  estimateReason?: EstimateReason
-  lineage?: SliceLineageStep[]
+export interface BackfillExecutionPlan {
+  mode: 'copy' | 'mv_replay'
+  sourceTarget: string
+  mvAsQuery?: string
+  targetColumns?: string[]
+  requireIdempotencyToken: boolean
 }
 
 export interface BackfillPlanState {
   planId: string
   target: string
   createdAt: string
-  status: BackfillPlanStatus
-  strategy?: 'table' | 'mv_replay' | 'partition'
   environment?: BackfillEnvironment
   from: string
   to: string
-  chunks: BackfillChunk[]
-  partitions?: PartitionInfo[]
-  sortKey?: SortKeyInfo
-  sortKeys?: SortKeyInfo[]
-  partitionDiagnostics?: PartitionDiagnostics[]
+  chunkPlan: ChunkPlan
+  execution: BackfillExecutionPlan
   options: {
-    chunkHours?: number
     maxChunkBytes?: number
     maxParallelChunks: number
     maxRetriesPerChunk: number
     requireIdempotencyToken: boolean
-    timeColumn?: string
     sortKeyColumn?: string
   }
   policy: {

From 3f9982011b813b60d11ac0c663b0044b552ce530 Mon Sep 17 00:00:00 2001
From: KeKs0r <marc.hoeffl@laserfocus.io>
Date: Thu, 2 Apr 2026 15:50:44 +0200
Subject: [PATCH 4/5] Fix CI

---
 bun.lock                                 | 1 +
 packages/plugin-obsessiondb/package.json | 1 +
 2 files changed, 2 insertions(+)

diff --git a/bun.lock b/bun.lock
index 1191f8c..0ddc569 100644
--- a/bun.lock
+++ b/bun.lock
@@ -93,6 +93,7 @@
       "name": "@chkit/plugin-obsessiondb",
       "version": "0.1.0-beta.19",
       "dependencies": {
+        "@chkit/clickhouse": "workspace:*",
         "@chkit/core": "workspace:*",
         "@orpc/client": "1.13.4",
         "@orpc/contract": "1.13.4",
diff --git a/packages/plugin-obsessiondb/package.json b/packages/plugin-obsessiondb/package.json
index 3e4eee5..01b459a 100644
--- a/packages/plugin-obsessiondb/package.json
+++ b/packages/plugin-obsessiondb/package.json
@@ -41,6 +41,7 @@
     "clean": "rm -rf dist"
   },
   "dependencies": {
+    "@chkit/clickhouse": "workspace:*",
     "@chkit/core": "workspace:*",
     "@orpc/client": "1.13.4",
     "@orpc/contract": "1.13.4",

From fbd3a1986865674ef2156778e92d38e44bedaa1d Mon Sep 17 00:00:00 2001
From: KeKs0r <marc.hoeffl@laserfocus.io>
Date: Thu, 2 Apr 2026 17:47:39 +0200
Subject: [PATCH 5/5] Export backfill SDK helpers and obsessiondb service types

---
 packages/plugin-backfill/README.md          |  9 ++++++++-
 packages/plugin-backfill/src/plugin.test.ts |  4 ++++
 packages/plugin-backfill/src/sdk.ts         | 12 +++++++++++-
 packages/plugin-obsessiondb/src/index.ts    |  4 ++++
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/packages/plugin-backfill/README.md b/packages/plugin-backfill/README.md
index 4b3f42f..112a391 100644
--- a/packages/plugin-backfill/README.md
+++ b/packages/plugin-backfill/README.md
@@ -42,7 +42,14 @@ The package root is limited to the plugin registration API.
 Chunk-planning and async execution internals are exposed from the SDK subpath:
 
 ```ts
-import { analyzeAndChunk, executeBackfill } from '@chkit/plugin-backfill/sdk'
+import {
+  analyzeAndChunk,
+  buildWhereClauseFromChunk,
+  decodeChunkPlanFromPersistence,
+  encodeChunkPlanForPersistence,
+  executeBackfill,
+  generateIdempotencyToken,
+} from '@chkit/plugin-backfill/sdk'
 ```
 
 ## License
diff --git a/packages/plugin-backfill/src/plugin.test.ts b/packages/plugin-backfill/src/plugin.test.ts
index a3b36e5..cf0f841 100644
--- a/packages/plugin-backfill/src/plugin.test.ts
+++ b/packages/plugin-backfill/src/plugin.test.ts
@@ -39,6 +39,10 @@ describe('@chkit/plugin-backfill plugin surface', () => {
     expect(sdk).toHaveProperty('generateChunkPlan')
     expect(sdk).toHaveProperty('executeBackfill')
     expect(sdk).toHaveProperty('buildChunkExecutionSql')
+    expect(sdk).toHaveProperty('buildWhereClauseFromChunk')
+    expect(sdk).toHaveProperty('encodeChunkPlanForPersistence')
+    expect(sdk).toHaveProperty('decodeChunkPlanFromPersistence')
+    expect(sdk).toHaveProperty('generateIdempotencyToken')
   })
 
   test('package exports declare root and sdk subpath separately', () => {
diff --git a/packages/plugin-backfill/src/sdk.ts b/packages/plugin-backfill/src/sdk.ts
index 9edf9df..233ff5a 100644
--- a/packages/plugin-backfill/src/sdk.ts
+++ b/packages/plugin-backfill/src/sdk.ts
@@ -1,7 +1,17 @@
 export { executeBackfill, syncProgress } from './async-backfill.js'
 export { analyzeAndChunk, analyzeTable } from './chunking/analyze.js'
+export {
+  decodeChunkPlanFromPersistence,
+  encodeChunkPlanForPersistence,
+} from './chunking/boundary-codec.js'
 export { generateChunkPlan } from './chunking/planner.js'
-export { buildChunkExecutionSql, injectSortKeyFilter, rewriteSelectColumns } from './chunking/sql.js'
+export {
+  buildChunkExecutionSql,
+  buildWhereClauseFromChunk,
+  injectSortKeyFilter,
+  rewriteSelectColumns,
+} from './chunking/sql.js'
+export { generateIdempotencyToken } from './chunking/utils/ids.js'
 
 export type {
   BackfillOptions,
diff --git a/packages/plugin-obsessiondb/src/index.ts b/packages/plugin-obsessiondb/src/index.ts
index 77efd28..d17c6bd 100644
--- a/packages/plugin-obsessiondb/src/index.ts
+++ b/packages/plugin-obsessiondb/src/index.ts
@@ -12,6 +12,10 @@ import { loadSelectedService } from './service/storage.js'
 
 export { loadCredentials, resolveBaseUrl, type Credentials } from './auth/index.js'
 export { createJobsClient, type JobsClient } from './backfill/index.js'
+export {
+  loadSelectedService,
+} from './service/storage.js'
+export type { SelectedService } from './service/types.js'
 
 export type ObsessionDBPluginOptions = Record<string, never>