Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@botpress/cli",
"version": "5.5.2",
"version": "5.5.3",
"description": "Botpress CLI",
"scripts": {
"build": "pnpm run build:types && pnpm run bundle && pnpm run template:gen",
Expand Down Expand Up @@ -28,7 +28,7 @@
"@apidevtools/json-schema-ref-parser": "^11.7.0",
"@botpress/chat": "0.5.4",
"@botpress/client": "1.33.0",
"@botpress/sdk": "5.3.4",
"@botpress/sdk": "5.4.0",
"@bpinternal/const": "^0.1.0",
"@bpinternal/tunnel": "^0.1.1",
"@bpinternal/verel": "^0.2.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/templates/empty-bot/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"private": true,
"dependencies": {
"@botpress/client": "1.33.0",
"@botpress/sdk": "5.3.4"
"@botpress/sdk": "5.4.0"
},
"devDependencies": {
"@types/node": "^22.16.4",
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/templates/empty-integration/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"private": true,
"dependencies": {
"@botpress/client": "1.33.0",
"@botpress/sdk": "5.3.4"
"@botpress/sdk": "5.4.0"
},
"devDependencies": {
"@types/node": "^22.16.4",
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/templates/empty-plugin/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
},
"private": true,
"dependencies": {
"@botpress/sdk": "5.3.4"
"@botpress/sdk": "5.4.0"
},
"devDependencies": {
"@types/node": "^22.16.4",
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/templates/hello-world/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"private": true,
"dependencies": {
"@botpress/client": "1.33.0",
"@botpress/sdk": "5.3.4"
"@botpress/sdk": "5.4.0"
},
"devDependencies": {
"@types/node": "^22.16.4",
Expand Down
2 changes: 1 addition & 1 deletion packages/cli/templates/webhook-message/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"private": true,
"dependencies": {
"@botpress/client": "1.33.0",
"@botpress/sdk": "5.3.4",
"@botpress/sdk": "5.4.0",
"axios": "^1.6.8"
},
"devDependencies": {
Expand Down
2 changes: 1 addition & 1 deletion packages/cognitive/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@botpress/cognitive",
"version": "0.3.10",
"version": "0.3.11",
"description": "Wrapper around the Botpress Client to call LLMs",
"main": "./dist/index.cjs",
"module": "./dist/index.mjs",
Expand Down
4 changes: 2 additions & 2 deletions packages/llmz/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "llmz",
"type": "module",
"description": "LLMz - An LLM-native Typescript VM built on top of Zui",
"version": "0.0.48",
"version": "0.0.50",
"types": "./dist/index.d.ts",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
Expand Down Expand Up @@ -72,7 +72,7 @@
},
"peerDependencies": {
"@botpress/client": "1.33.0",
"@botpress/cognitive": "0.3.10",
"@botpress/cognitive": "0.3.11",
"@bpinternal/thicktoken": "^1.0.5",
"@bpinternal/zui": "^1.3.2"
},
Expand Down
190 changes: 113 additions & 77 deletions packages/llmz/src/truncator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@ const DEFAULT_REMOVE_CHUNK = 250
const WRAP_OPEN_TAG_1 = '【TRUNCATE'
const WRAP_OPEN_TAG_2 = '】'
const WRAP_CLOSE_TAG = '【/TRUNCATE】'
const getRegex = () =>
new RegExp(`(${WRAP_OPEN_TAG_1}(?:\\s+[\\w:]+)*\\s*${WRAP_OPEN_TAG_2})([\\s\\S]*?)(${WRAP_CLOSE_TAG})`, 'g')
const REGEXP = `(${WRAP_OPEN_TAG_1}(?:\\s+[\\w:]+)*\\s*${WRAP_OPEN_TAG_2})([\\s\\S]*?)(${WRAP_CLOSE_TAG})`

type ParsedMessageContent = {
attributes: SerializedTruncateOptions
wrappedContent: string | undefined
nonTruncatableContent: string | undefined
}

type TruncateOptions = {
preserve: 'top' | 'bottom' | 'both'
Expand All @@ -27,6 +32,22 @@ type TruncateOptions = {
minTokens: number
}

type SerializedTruncateOptions = {
preserve: 'top' | 'bottom' | 'both'
flex: string
min: string
}

type Part = {
/** the current remaining content */
content: string
/** the current remaining tokens */
tokens: number
/** if part is inside a <WRAPPER></WRAPPER> tag, then it's truncatable. when outside the wrapper, it's not truncatable */
truncatable: boolean
attributes?: Partial<TruncateOptions>
}

const DEFAULT_TRUNCATE_OPTIONS: TruncateOptions = {
preserve: 'top',
flex: 1,
Expand Down Expand Up @@ -143,52 +164,33 @@ export function truncateWrappedContent<T extends MessageLike>({
}: Options<T>): T[] {
const tokenizer = getTokenizer()

type Part = {
/** the current remaining content */
content: string
/** the current remaining tokens */
tokens: number
/** if part is inside a <WRAPPER></WRAPPER> tag, then it's truncatable. when outside the wrapper, it's not truncatable */
truncatable: boolean
attributes?: Partial<TruncateOptions>
}

/**
* Before { content: 'content', tokens: 10, truncatable: false }
* <WRAPPER>content</WRAPPER> { content: 'content', tokens: 10, truncatable: true }
* After { content: 'content', tokens: 10, truncatable: false }
*/

const parts: Array<Part[]> = []

const parts: Part[][] = []
// Split messages into parts and calculate initial tokens
for (const msg of messages) {
const current: Part[] = []

const content = typeof msg.content === 'string' ? msg.content : ''
let match
const regex = getRegex()
let lastIndex = 0
let match: ParsedMessageContent | null
const parser = new _MessageContentParser()

while ((match = regex.exec(content)) !== null) {
while ((match = parser.parse(content)) !== null) {
// Extract attributes from the open tag
const attributes = match[1]!
.split(/\s+/)
.slice(1)
.filter((x) => x !== WRAP_OPEN_TAG_2)
.map((x) => x.split(':'))
.reduce((acc, [key, value]) => ({ ...acc, [key!]: value }), {} as Record<string, any>)

if (match.index > lastIndex) {
const nonTruncatableContent = content.slice(lastIndex, match.index)
const { attributes, nonTruncatableContent, wrappedContent } = match

if (nonTruncatableContent) {
current.push({
content: nonTruncatableContent,
tokens: tokenizer.count(nonTruncatableContent),
truncatable: false,
})
}

const wrappedContent = match[2]
current.push({
content: wrappedContent!,
tokens: tokenizer.count(wrappedContent!),
Expand All @@ -199,12 +201,10 @@ export function truncateWrappedContent<T extends MessageLike>({
minTokens: Number(attributes.min) || DEFAULT_TRUNCATE_OPTIONS.minTokens,
},
})

lastIndex = regex.lastIndex
}

if (lastIndex < content.length) {
const remainingContent = content.slice(lastIndex)
const remainingContent = parser.getRemainingContent(content)
if (remainingContent) {
current.push({
content: remainingContent,
tokens: tokenizer.count(remainingContent),
Expand All @@ -215,39 +215,13 @@ export function truncateWrappedContent<T extends MessageLike>({
parts.push(current)
}

const getCount = () => parts.reduce((acc, x) => acc + x.reduce((acc, y) => acc + y.tokens, 0), 0)
const getTwoBiggestTruncatables = () => {
let biggest: Part | null = null
let secondBiggest: Part | null = null

for (const part of parts.flat()) {
if (part.truncatable) {
const flex = part.attributes?.flex ?? DEFAULT_TRUNCATE_OPTIONS.flex
const tokens = part.tokens * flex

if (part.tokens <= (part.attributes?.minTokens ?? 0)) {
continue
}

if (!biggest || tokens > biggest.tokens) {
secondBiggest = biggest
biggest = part
} else if (!secondBiggest || tokens > secondBiggest.tokens) {
secondBiggest = part
}
}
}

return { biggest, secondBiggest }
}

let currentCount = getCount()
let currentCount = _countTotalTokens(parts)
while (currentCount > tokenLimit) {
const { biggest, secondBiggest } = getTwoBiggestTruncatables()
const { biggest, secondBiggest } = _getTwoBiggestTruncables(parts)

if (!biggest || !biggest.truncatable || biggest.tokens <= 0) {
if (throwOnFailure) {
throw new Error(`Cannot truncate further, current count: ${getCount()}`)
throw new Error(`Cannot truncate further, current count: ${currentCount}`)
} else {
break
}
Expand All @@ -259,7 +233,7 @@ export function truncateWrappedContent<T extends MessageLike>({

if (toRemove <= 0) {
if (throwOnFailure) {
throw new Error(`Cannot truncate further, current count: ${getCount()}`)
throw new Error(`Cannot truncate further, current count: ${currentCount}`)
} else {
break
}
Expand Down Expand Up @@ -290,29 +264,91 @@ export function truncateWrappedContent<T extends MessageLike>({
currentCount -= toRemove
}

const removeRedundantWrappers = (content: string) => {
return content.replace(getRegex(), '$2')
}

// Reconstruct the messages
return messages.map((msg, i) => {
const p = parts[i]!
return {
...msg,
content:
typeof msg.content === 'string'
? removeRedundantWrappers(
p
.map((part) => {
if (part.truncatable) {
return part.content
}

return part.content
})
.join('')
)
? _renderRemainingWrappers(p.map((part) => part.content).join(''))
: msg.content,
}
})
}

class _MessageContentParser {
private _regex: RegExp
private _lastIndex: number = 0

public constructor() {
this._regex = _createRegex()
}

public parse(content: string): ParsedMessageContent | null {
const match = this._regex.exec(content)
if (!match) {
return null
}

const attributes = match[1]!
.split(/\s+/)
.slice(1)
.filter((x) => x !== WRAP_OPEN_TAG_2)
.map((x) => x.split(':'))
.reduce(
(acc, [key, value]) => ({ ...acc, [key!]: value }),
{} as Record<string, any>
) as SerializedTruncateOptions

let nonTruncatableContent: string | undefined = undefined
if (match.index > this._lastIndex) {
nonTruncatableContent = content.slice(this._lastIndex, match.index)
}

const wrappedContent = match[2]

this._lastIndex = this._regex.lastIndex
return { attributes, nonTruncatableContent, wrappedContent }
}

public getRemainingContent(content: string): string | null {
if (this._lastIndex < content.length) {
const remainingContent = content.slice(this._lastIndex)
return remainingContent
}
return null
}
}

const _createRegex = () => new RegExp(REGEXP, 'g')

const _renderRemainingWrappers = (content: string) => content.replace(_createRegex(), '$2')

const _countTotalTokens = (parts: Part[][]) =>
parts.reduce((acc, x) => acc + x.reduce((acc, y) => acc + y.tokens, 0), 0)

const _getTwoBiggestTruncables = (parts: Part[][]) => {
let biggest: Part | null = null
let secondBiggest: Part | null = null

for (const part of parts.flat()) {
if (part.truncatable) {
if (part.tokens <= (part.attributes?.minTokens ?? 0)) {
continue
}

const flex = part.attributes?.flex ?? DEFAULT_TRUNCATE_OPTIONS.flex
const tokens = part.tokens * flex

if (!biggest || tokens > biggest.tokens) {
secondBiggest = biggest
biggest = part
} else if (!secondBiggest || tokens > secondBiggest.tokens) {
secondBiggest = part
}
}
}

return { biggest, secondBiggest }
}
2 changes: 1 addition & 1 deletion packages/sdk/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@botpress/sdk",
"version": "5.3.4",
"version": "5.4.0",
"description": "Botpress SDK",
"main": "./dist/index.cjs",
"module": "./dist/index.mjs",
Expand Down
3 changes: 3 additions & 0 deletions packages/sdk/src/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@ const markdownMessageSchema = z.object({

const imageMessageSchema = z.object({
imageUrl: NonEmptyString,
title: NonEmptyString.optional(),
})

const audioMessageSchema = z.object({
audioUrl: NonEmptyString,
title: NonEmptyString.optional(),
})

const videoMessageSchema = z.object({
videoUrl: NonEmptyString,
title: NonEmptyString.optional(),
})

const fileMessageSchema = z.object({
Expand Down
2 changes: 1 addition & 1 deletion packages/vai/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@botpress/vai",
"version": "0.0.13",
"version": "0.0.14",
"description": "Vitest AI (vai) – a vitest extension for testing with LLMs",
"types": "./dist/index.d.ts",
"exports": {
Expand Down
Loading
Loading