Skip to content

Commit 2c67555

Browse files
committed
Add smarter notebook truncation
1 parent 43173a0 commit 2c67555

File tree

2 files changed

+203
-3
lines changed

2 files changed

+203
-3
lines changed

extensions/positron-assistant/src/notebookContextFilter.ts

Lines changed: 196 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@ export const MAX_CELLS_FOR_ALL_CELLS_CONTEXT = 20;
1818
*/
1919
const SLIDING_WINDOW_SIZE = 10;
2020

21+
/**
22+
* Maximum total characters for notebook context serialization.
23+
* Approximately 12K tokens (assuming ~4 chars per token).
24+
*/
25+
export const MAX_NOTEBOOK_CONTEXT_CHARS = 50_000;
26+
27+
/**
28+
* Maximum content length per non-selected cell when truncating.
29+
* Aggressive limit to ensure selected cells are preserved fully.
30+
*/
31+
export const MAX_NON_SELECTED_CELL_CONTENT_CHARS = 2_000;
32+
2133
/**
2234
* Calculates a sliding window of cells around an anchor cell index.
2335
* @param totalCells Total number of cells in the notebook
@@ -35,6 +47,157 @@ export function calculateSlidingWindow(
3547
return { startIndex, endIndex };
3648
}
3749

50+
/**
51+
* Extended notebook cell interface that tracks truncation metadata.
52+
* Used internally to track when cell content has been truncated.
53+
*/
54+
interface TruncatedNotebookCell extends positron.notebooks.NotebookCell {
55+
/** Original content length before truncation, if truncation occurred */
56+
originalContentLength?: number;
57+
}
58+
59+
/**
60+
* Type guard to check if a cell has truncation metadata.
61+
*
62+
* @param cell The cell to check
63+
* @returns True if the cell has originalContentLength property
64+
*/
65+
export function hasTruncationMetadata(cell: positron.notebooks.NotebookCell): cell is TruncatedNotebookCell {
66+
return 'originalContentLength' in cell;
67+
}
68+
69+
/**
70+
* Gets the original content length from a cell if it was truncated.
71+
*
72+
* @param cell The cell to check
73+
* @returns The original content length, or undefined if not truncated
74+
*/
75+
export function getOriginalContentLength(cell: positron.notebooks.NotebookCell): number | undefined {
76+
return hasTruncationMetadata(cell) ? cell.originalContentLength : undefined;
77+
}
78+
79+
/**
80+
* Truncates cell content to a maximum length, adding a truncation indicator.
81+
*
82+
* @param content The cell content to truncate
83+
* @param maxLength Maximum length for the content
84+
* @returns Truncated content with indicator, or original content if within limit
85+
*/
86+
function truncateCellContent(content: string, maxLength: number): string {
87+
if (content.length <= maxLength) {
88+
return content;
89+
}
90+
// Truncate and add indicator (accounting for indicator length)
91+
const truncationIndicator = '... [truncated]';
92+
const availableLength = maxLength - truncationIndicator.length;
93+
return content.substring(0, Math.max(0, availableLength)) + truncationIndicator;
94+
}
95+
96+
/**
97+
* Estimates the total serialized size of cells when formatted as XML.
98+
* This is a rough estimate based on cell content length plus XML overhead.
99+
*
100+
* @param cells Array of notebook cells to estimate
101+
* @param selectedIndices Set of cell indices that are selected (preserved fully)
102+
* @returns Estimated total character count for serialized output
103+
*/
104+
function estimateContextSize(
105+
cells: positron.notebooks.NotebookCell[],
106+
selectedIndices: Set<number>
107+
): number {
108+
let totalSize = 0;
109+
// Base XML overhead per cell (tags, attributes, etc.) - rough estimate
110+
const XML_OVERHEAD_PER_CELL = 200;
111+
112+
for (const cell of cells) {
113+
const isSelected = selectedIndices.has(cell.index);
114+
// Selected cells use full content, non-selected use truncated estimate
115+
const contentSize = isSelected
116+
? cell.content.length
117+
: Math.min(cell.content.length, MAX_NON_SELECTED_CELL_CONTENT_CHARS);
118+
totalSize += contentSize + XML_OVERHEAD_PER_CELL;
119+
}
120+
121+
return totalSize;
122+
}
123+
124+
/**
125+
* Applies content budget limiting to cells, preserving selected cells fully
126+
* while truncating non-selected cells and potentially reducing cell count.
127+
*
128+
* @param cells Array of notebook cells to apply budget to
129+
* @param selectedIndices Set of cell indices that are selected (must be preserved)
130+
* @param budget Maximum total character budget
131+
* @returns Array of cells with content truncated as needed to fit budget
132+
*/
133+
function applyContentBudget(
134+
cells: positron.notebooks.NotebookCell[],
135+
selectedIndices: Set<number>,
136+
budget: number
137+
): TruncatedNotebookCell[] {
138+
// First pass: truncate non-selected cell content
139+
const truncatedCells: TruncatedNotebookCell[] = cells.map(cell => {
140+
const isSelected = selectedIndices.has(cell.index);
141+
if (isSelected) {
142+
// Preserve selected cells fully
143+
return { ...cell };
144+
}
145+
146+
// Truncate non-selected cells
147+
const originalLength = cell.content.length;
148+
const truncatedContent = truncateCellContent(cell.content, MAX_NON_SELECTED_CELL_CONTENT_CHARS);
149+
const truncated: TruncatedNotebookCell = {
150+
...cell,
151+
content: truncatedContent,
152+
originalContentLength: originalLength > truncatedContent.length ? originalLength : undefined
153+
};
154+
return truncated;
155+
});
156+
157+
// Estimate size after truncation
158+
const currentSize = estimateContextSize(truncatedCells, selectedIndices);
159+
160+
// If still over budget, reduce non-selected cells (but always keep selected cells)
161+
if (currentSize > budget) {
162+
// Separate selected and non-selected cells
163+
const selectedCells: TruncatedNotebookCell[] = [];
164+
const nonSelectedCells: TruncatedNotebookCell[] = [];
165+
166+
for (const cell of truncatedCells) {
167+
if (selectedIndices.has(cell.index)) {
168+
selectedCells.push(cell);
169+
} else {
170+
nonSelectedCells.push(cell);
171+
}
172+
}
173+
174+
// Calculate budget available for non-selected cells
175+
const selectedCellsSize = estimateContextSize(selectedCells, selectedIndices);
176+
const availableBudget = Math.max(0, budget - selectedCellsSize);
177+
178+
// Keep non-selected cells that fit in remaining budget
179+
const keptNonSelectedCells: TruncatedNotebookCell[] = [];
180+
let usedBudget = 0;
181+
for (const cell of nonSelectedCells) {
182+
const cellSize = estimateContextSize([cell], new Set());
183+
if (usedBudget + cellSize <= availableBudget) {
184+
keptNonSelectedCells.push(cell);
185+
usedBudget += cellSize;
186+
} else {
187+
// Stop adding cells once budget is exceeded
188+
break;
189+
}
190+
}
191+
192+
// Combine selected cells (always included) with kept non-selected cells
193+
// Preserve original order by sorting by index
194+
const result = [...selectedCells, ...keptNonSelectedCells].sort((a, b) => a.index - b.index);
195+
return result;
196+
}
197+
198+
return truncatedCells;
199+
}
200+
38201
/**
39202
* Filters notebook context based on notebook size and selection state.
40203
*
@@ -43,6 +206,11 @@ export function calculateSlidingWindow(
43206
* - Large notebooks (>=20 cells) with selection: Apply sliding window around last selected cell
44207
* - Large notebooks (>=20 cells) without selection: Remove allCells field
45208
*
209+
* Additionally applies content-aware size limiting to prevent exceeding character budget:
210+
* - Preserves selected cells fully
211+
* - Truncates non-selected cell content aggressively
212+
* - Reduces included cell count if still over budget
213+
*
46214
* @param context The notebook context to filter
47215
* @returns Filtered notebook context
48216
*/
@@ -55,9 +223,22 @@ export function filterNotebookContext(
55223
}
56224

57225
const totalCells = context.cellCount;
226+
const selectedIndices = new Set(context.selectedCells.map(cell => cell.index));
58227

59-
// Small notebooks: keep all cells
228+
// Small notebooks: keep all cells, but still apply content budget if needed
60229
if (totalCells < MAX_CELLS_FOR_ALL_CELLS_CONTEXT) {
230+
// Estimate total content size
231+
const totalContentSize = estimateContextSize(context.allCells, selectedIndices);
232+
233+
// If over budget, apply content-aware filtering
234+
if (totalContentSize > MAX_NOTEBOOK_CONTEXT_CHARS) {
235+
const budgetedCells = applyContentBudget(context.allCells, selectedIndices, MAX_NOTEBOOK_CONTEXT_CHARS);
236+
return {
237+
...context,
238+
allCells: budgetedCells
239+
};
240+
}
241+
61242
return context;
62243
}
63244

@@ -73,7 +254,20 @@ export function filterNotebookContext(
73254
const lastSelectedIndex = Math.max(...context.selectedCells.map(cell => cell.index));
74255
const { startIndex, endIndex } = calculateSlidingWindow(totalCells, lastSelectedIndex);
75256

76-
const filteredCells = context.allCells.slice(startIndex, endIndex);
257+
let filteredCells = context.allCells.slice(startIndex, endIndex);
258+
259+
// Update selectedIndices to only include cells that are actually in the filtered window
260+
const filteredSelectedIndices = new Set(
261+
filteredCells
262+
.filter(cell => selectedIndices.has(cell.index))
263+
.map(cell => cell.index)
264+
);
265+
266+
// Apply content-aware budget limiting
267+
const totalContentSize = estimateContextSize(filteredCells, filteredSelectedIndices);
268+
if (totalContentSize > MAX_NOTEBOOK_CONTEXT_CHARS) {
269+
filteredCells = applyContentBudget(filteredCells, filteredSelectedIndices, MAX_NOTEBOOK_CONTEXT_CHARS);
270+
}
77271

78272
return {
79273
...context,

extensions/positron-assistant/src/tools/notebookUtils.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import * as vscode from 'vscode';
77
import * as positron from 'positron';
88
import * as xml from '../xml.js';
9-
import { calculateSlidingWindow, filterNotebookContext, MAX_CELLS_FOR_ALL_CELLS_CONTEXT } from '../notebookContextFilter.js';
9+
import { calculateSlidingWindow, filterNotebookContext, MAX_CELLS_FOR_ALL_CELLS_CONTEXT, getOriginalContentLength } from '../notebookContextFilter.js';
1010
import { isRuntimeSessionReference } from '../utils.js';
1111
import { log } from '../extension.js';
1212

@@ -169,11 +169,17 @@ export function formatCells(options: FormatCellsOptions): string {
169169
const cellLabel = cells.length === 1
170170
? prefix
171171
: `${prefix} ${idx + 1}`;
172+
173+
// Check if cell content was truncated (has originalContentLength property)
174+
const originalLength = getOriginalContentLength(cell);
175+
const wasTruncated = originalLength !== undefined && originalLength > cell.content.length;
176+
172177
const parts = [
173178
`<cell index="${cell.index}" type="${cell.type}">`,
174179
` <label>${cellLabel}</label>`,
175180
` <status>${statusInfo}</status>`,
176181
includeContent ? `<content>${cell.content}</content>` : '',
182+
wasTruncated ? ` <truncated original-length="${originalLength}" />` : '',
177183
`</cell>`
178184
];
179185
return parts.filter(Boolean).join('\n');

0 commit comments

Comments
 (0)