Skip to content

Commit fd87c40

Browse files
committed
RDBC-946 Add support for querying pre-generated embeddings with task identifiers
- Introduced `embeddingsGenerationTaskIdentifier` parameter in vector search methods (`byText`, `byTexts`). - Enhanced `VectorSearchToken` to validate and generate embedding expressions with task identifiers. - Updated exception handling for invalid task identifier usage in vector search logic. - Added tests to verify correct RQL generation and error scenarios for task-based embedding queries.
1 parent ca6a717 commit fd87c40

File tree

6 files changed

+172
-18
lines changed

6 files changed

+172
-18
lines changed

src/Documents/Queries/VectorSearch/Fields/VectorEmbeddingField.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { IVectorEmbeddingField, IVectorEmbeddingFieldFactoryAccessor } from "../../../Session/VectorFieldFactory.js";
22
import {VectorEmbeddingType} from "../VectorEmbeddingType.js";
33
import { Field } from "../../../../Types/index.js";
4+
import { throwError } from "../../../../Exceptions/index.js";
45

56
export class VectorEmbeddingField<T> implements
67
IVectorEmbeddingField,
@@ -23,15 +24,15 @@ export class VectorEmbeddingField<T> implements
2324

2425
public targetQuantization(targetEmbeddingQuantization: VectorEmbeddingType): IVectorEmbeddingField {
2526
if (targetEmbeddingQuantization === "Text") {
26-
throw new Error("Cannot quantize the embedding to Text. This option is only available for sourceQuantizationType.");
27+
throwError("InvalidOperationException", "Cannot quantize the embedding to Text. This option is only available for sourceQuantizationType.");
2728
}
2829

2930
this.destinationQuantizationType = targetEmbeddingQuantization;
3031

3132
if ((this.sourceQuantizationType === "Int8" ||
3233
this.sourceQuantizationType === "Binary") &&
3334
this.destinationQuantizationType !== this.sourceQuantizationType) {
34-
throw new Error(`Cannot quantize already quantized embeddings. Source VectorEmbeddingType is ${this.sourceQuantizationType}; however the destination is ${this.destinationQuantizationType}.`);
35+
throwError("InvalidOperationException", `Cannot quantize already quantized embeddings. Source VectorEmbeddingType is ${this.sourceQuantizationType}; however the destination is ${this.destinationQuantizationType}.`);
3536
}
3637

3738
return this;

src/Documents/Queries/VectorSearch/Fields/VectorEmbeddingTextField.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
} from "../../../Session/VectorFieldFactory.js";
55
import { VectorEmbeddingType } from "../VectorEmbeddingType.js";
66
import { Field } from "../../../../Types/index.js";
7+
import { throwError } from "../../../../Exceptions/index.js";
78

89
export class VectorEmbeddingTextField<T> implements
910
IVectorEmbeddingTextField,
@@ -22,7 +23,7 @@ export class VectorEmbeddingTextField<T> implements
2223

2324
public targetQuantization(targetEmbeddingQuantization: VectorEmbeddingType): IVectorEmbeddingTextField {
2425
if (targetEmbeddingQuantization === "Text") {
25-
throw new Error("Cannot quantize the embedding to Text. This option is only available for sourceQuantizationType.");
26+
throwError("InvalidOperationException","Cannot quantize the embedding to Text. This option is only available for sourceQuantizationType.");
2627
}
2728

2829
this.destinationQuantizationType = targetEmbeddingQuantization;
@@ -31,7 +32,7 @@ export class VectorEmbeddingTextField<T> implements
3132

3233
public usingTask(embeddingsGenerationTaskIdentifier: string): IVectorEmbeddingTextField {
3334
if (this.sourceQuantizationType !== "Text") {
34-
throw new Error("The usingTask method can only be used with text embeddings (withText)");
35+
throwError("InvalidOperationException", "The usingTask method can only be used with text embeddings (withText)");
3536
}
3637

3738
this.embeddingsGenerationTaskIdentifier = embeddingsGenerationTaskIdentifier;

src/Documents/Session/AbstractDocumentQuery.ts

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2579,7 +2579,7 @@ export abstract class AbstractDocumentQuery<T extends object, TSelf extends Abst
25792579
this._assertMethodIsCurrentlySupported("vectorSearch");
25802580

25812581
const fieldAccessor = this._resolveVectorSearchFieldAccessor(fieldName);
2582-
const {value, isDocumentId} = this._resolveVectorSearchValueFactory(valueOrFactory);
2582+
const {value, isDocumentId, embeddingsGenerationTaskIdentifierByValue} = this._resolveVectorSearchValueFactory(valueOrFactory);
25832583

25842584
const tokens = this._getCurrentWhereTokens();
25852585
this._appendOperatorIfNeeded(tokens);
@@ -2600,7 +2600,8 @@ export abstract class AbstractDocumentQuery<T extends object, TSelf extends Abst
26002600
options?.numberOfCandidates || null,
26012601
options?.isExact || VectorSearchToken.DEFAULT_IS_EXACT,
26022602
isDocumentId,
2603-
taskIdentifier
2603+
taskIdentifier,
2604+
embeddingsGenerationTaskIdentifierByValue
26042605
);
26052606

26062607
tokens.push(vectorSearchToken);
@@ -2631,9 +2632,13 @@ export abstract class AbstractDocumentQuery<T extends object, TSelf extends Abst
26312632
throwError("InvalidOperationException", "No value was provided in the valueFactory");
26322633
}
26332634

2634-
return {value, isDocumentId: !!fieldValueFactory.byId};
2635+
return {
2636+
value,
2637+
isDocumentId: !!fieldValueFactory.byId,
2638+
embeddingsGenerationTaskIdentifierByValue: fieldValueFactory.embeddingsGenerationTaskIdentifier
2639+
};
26352640
} else {
2636-
return {value: valueOrFactory, isDocumentId: false};
2641+
return {value: valueOrFactory, isDocumentId: false, embeddingsGenerationTaskIdentifierByValue: null};
26372642
}
26382643
}
26392644
}

src/Documents/Session/Tokens/VectorSearchToken.ts

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { VectorEmbeddingType } from "../../Queries/VectorSearch/VectorEmbeddingT
33
import { StringBuilder } from "../../../Utility/StringBuilder.js";
44
import { vectorSearchConfigurationToMethodName } from "../../../Utility/VectorSearchUtil.js";
55
import { IVectorEmbeddingFieldFactoryAccessor } from "../VectorFieldFactory.js";
6+
import { throwError } from "../../../Exceptions/index.js";
67

78
export class VectorSearchToken extends WhereToken {
89
static readonly EMBEDDING_PREFIX = "embedding.";
@@ -25,6 +26,7 @@ export class VectorSearchToken extends WhereToken {
2526
private readonly _numberOfCandidatesForQuerying: number | null;
2627
private readonly _isDocumentId: boolean;
2728
private readonly _embeddingsGenerationTaskIdentifier: string | null;
29+
private readonly _embeddingsGenerationTaskIdentifierByValue: string | null;
2830

2931
public constructor(
3032
fieldName: string,
@@ -35,7 +37,8 @@ export class VectorSearchToken extends WhereToken {
3537
numberOfCandidatesForQuerying: number | null,
3638
isExact: boolean,
3739
isDocumentId: boolean,
38-
embeddingsGenerationTaskIdentifier: string | null
40+
embeddingsGenerationTaskIdentifier: string | null,
41+
embeddingsGenerationTaskIdentifierByValue: string | null
3942
) {
4043
super();
4144
this.fieldName = fieldName;
@@ -47,6 +50,11 @@ export class VectorSearchToken extends WhereToken {
4750
this._numberOfCandidatesForQuerying = numberOfCandidatesForQuerying;
4851
this._isDocumentId = isDocumentId;
4952
this._embeddingsGenerationTaskIdentifier = embeddingsGenerationTaskIdentifier;
53+
this._embeddingsGenerationTaskIdentifierByValue = embeddingsGenerationTaskIdentifierByValue;
54+
55+
if (embeddingsGenerationTaskIdentifier != null && embeddingsGenerationTaskIdentifierByValue != null) {
56+
throwError("InvalidOperationException", "Embeddings generation task identifier set in value factory cannot be used with field factory. It solely purpose to use already generated embeddings.");
57+
}
5058

5159
this.options = {
5260
exact: isExact,
@@ -109,11 +117,7 @@ export class VectorSearchToken extends WhereToken {
109117

110118
writer.append(", ");
111119

112-
if (this._isDocumentId) {
113-
writer.append(`${VectorSearchToken.EMBEDDING_FOR_DOCUMENT}($${this.parameterName})`);
114-
} else {
115-
writer.append(`$${this.parameterName}`);
116-
}
120+
writer.append(this.getEmbeddingExpression());
117121

118122
const parametersAreDefault = this._similarityThreshold == null &&
119123
this._numberOfCandidatesForQuerying == null;
@@ -133,4 +137,14 @@ export class VectorSearchToken extends WhereToken {
133137
writer.append(`, ${this.options.boost.toString()})`);
134138
}
135139
}
140+
141+
private getEmbeddingExpression() {
142+
if (this._isDocumentId) {
143+
return `${VectorSearchToken.EMBEDDING_FOR_DOCUMENT}($${this.parameterName})`;
144+
}
145+
if (this._embeddingsGenerationTaskIdentifierByValue != null) {
146+
return `${VectorSearchToken.EMBEDDING_TEXT}($${this.parameterName}, ${VectorSearchToken.AI_TASK_METHOD_NAME}('${this._embeddingsGenerationTaskIdentifierByValue}'))`;
147+
}
148+
return `$${this.parameterName}`;
149+
}
136150
}

src/Documents/Session/VectorFieldFactory.ts

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,9 @@ export interface IVectorEmbeddingTextFieldValueFactory {
8181
/**
8282
* Defines queried text.
8383
* @param text Queried text
84+
* @param embeddingsGenerationTaskIdentifier Task identifier for embeddings generation
8485
*/
85-
byText(text: string): void;
86+
byText(text: string, embeddingsGenerationTaskIdentifier?: string): void;
8687

8788
/**
8889
* Query by the embedding(s) indexed from the specified document for the quried field.
@@ -93,8 +94,9 @@ export interface IVectorEmbeddingTextFieldValueFactory {
9394
/**
9495
* Defines queried texts.
9596
* @param texts Queried texts
97+
* @param embeddingsGenerationTaskIdentifier Task identifier for embeddings generation
9698
*/
97-
byTexts(texts: string[]): void;
99+
byTexts(texts: string[], embeddingsGenerationTaskIdentifier?: string): void;
98100
}
99101

100102
export interface IVectorEmbeddingFieldValueFactory {
@@ -143,6 +145,8 @@ export interface IVectorFieldValueFactoryAccessor {
143145
texts: string[];
144146

145147
byId: string;
148+
149+
embeddingsGenerationTaskIdentifier: string;
146150
}
147151

148152
export class VectorEmbeddingFieldValueFactory implements IVectorEmbeddingFieldValueFactory,
@@ -153,6 +157,7 @@ export class VectorEmbeddingFieldValueFactory implements IVectorEmbeddingFieldVa
153157
public text: string = null;
154158
public texts: string[] = null;
155159
public byId: string = null;
160+
public embeddingsGenerationTaskIdentifier: string = null;
156161

157162
public byEmbedding<T extends number>(embedding: T[]): void;
158163
public byEmbedding<T extends number>(embedding: { "@vector": IRavenVector<T> }): void;
@@ -179,17 +184,21 @@ export class VectorEmbeddingFieldValueFactory implements IVectorEmbeddingFieldVa
179184
/**
180185
* Defines queried text.
181186
* @param text Queried text
187+
* @param embeddingsGenerationTaskIdentifier Task identifier for embeddings generation
182188
*/
183-
public byText(text: string): void {
189+
public byText(text: string, embeddingsGenerationTaskIdentifier?: string): void {
184190
this.text = text;
191+
this.embeddingsGenerationTaskIdentifier = embeddingsGenerationTaskIdentifier;
185192
}
186193

187194
/**
188195
* Defines queried texts.
189196
* @param texts Queried texts
197+
* @param embeddingsGenerationTaskIdentifier Task identifier for embeddings generation
190198
*/
191-
public byTexts(texts: string[]): void {
199+
public byTexts(texts: string[], embeddingsGenerationTaskIdentifier?: string): void {
192200
this.texts = texts;
201+
this.embeddingsGenerationTaskIdentifier = embeddingsGenerationTaskIdentifier;
193202
}
194203

195204
public forDocument(documentId: string): void {

test/Issues/RDBC-946.ts

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import assert from "node:assert";
2+
import {IDocumentStore, IndexDefinition, PutIndexesOperation} from "../../src/index.js";
3+
import {disposeTestDocumentStore, RavenTestContext, testContext} from "../Utils/TestUtil.js";
4+
import {INDEXES} from "../../src/Constants.js";
5+
6+
(RavenTestContext.isRavenDbServerVersion("7.0") ? describe : describe.skip)("[RDBC-946]", () => {
7+
let store: IDocumentStore;
8+
9+
beforeEach(async function () {
10+
store = await testContext.getDocumentStore("RDBC-946", false, null, (record) => {
11+
record.settings[INDEXES.INDEXING_AUTO_SEARCH_ENGINE_TYPE] = "Corax";
12+
record.settings[INDEXES.INDEXING_STATIC_SEARCH_ENGINE_TYPE] = "Corax";
13+
});
14+
});
15+
16+
afterEach(async () =>
17+
await disposeTestDocumentStore(store));
18+
19+
it("can use task to query pregenerated embedding - RQL generation with byText", async () => {
20+
await setupVectorIndex(store);
21+
22+
const session = store.openSession();
23+
24+
const rqlResult = session.query({indexName: "VectorIndex"})
25+
.vectorSearch(f => f.withField("Vector"),
26+
v => v.byText("car", "localaitask"))
27+
.toString();
28+
29+
assert.strictEqual(rqlResult, "from index 'VectorIndex' where vector.search(Vector, embedding.text($p0, ai.task('localaitask')))");
30+
});
31+
32+
it("can use task to query pregenerated embedding - RQL generation with byTexts", async () => {
33+
await setupVectorIndex(store);
34+
35+
const session = store.openSession();
36+
37+
const rqlResult = session.query({indexName: "VectorIndex"})
38+
.vectorSearch(f => f.withField("Vector"),
39+
v => v.byTexts(["car", "planet"], "localaitask"))
40+
.toString();
41+
42+
assert.strictEqual(rqlResult, "from index 'VectorIndex' where vector.search(Vector, embedding.text($p0, ai.task('localaitask')))");
43+
});
44+
45+
it("can use task to query pregenerated embedding - document query with byText", async () => {
46+
await setupVectorIndex(store);
47+
48+
const session = store.openSession();
49+
50+
const rqlResult = session.advanced.documentQuery({indexName: "VectorIndex"})
51+
.vectorSearch(f => f.withField("Vector"),
52+
v => v.byText("animal", "localaitask"))
53+
.toString();
54+
55+
assert.strictEqual(rqlResult, "from index 'VectorIndex' where vector.search(Vector, embedding.text($p0, ai.task('localaitask')))");
56+
});
57+
58+
it("can use task to query pregenerated embedding - document query with byTexts", async () => {
59+
await setupVectorIndex(store);
60+
61+
const session = store.openSession();
62+
63+
const rqlResult = session.advanced.documentQuery({indexName: "VectorIndex"})
64+
.vectorSearch(f => f.withField("Vector"),
65+
v => v.byTexts(["car", "cosmos"], "localaitask"))
66+
.toString();
67+
68+
assert.strictEqual(rqlResult, "from index 'VectorIndex' where vector.search(Vector, embedding.text($p0, ai.task('localaitask')))");
69+
});
70+
71+
it("should generate RQL for vector search with byText, task identifier and similarity options", async () => {
72+
await setupVectorIndex(store);
73+
74+
const session = store.openSession();
75+
76+
const query = session.query({indexName: "VectorIndex"})
77+
.vectorSearch(field => field.withField("Vector"),
78+
factory => factory.byText("query text", "openai-task"), {
79+
similarity: 0.8,
80+
numberOfCandidates: 50
81+
})
82+
.toString();
83+
84+
assert.strictEqual(query, "from index 'VectorIndex' where vector.search(Vector, embedding.text($p0, ai.task('openai-task')), 0.8, 50)");
85+
});
86+
87+
it("should generate RQL for vector search with byTexts, task identifier and exact matching", async () => {
88+
await setupVectorIndex(store);
89+
90+
const session = store.openSession();
91+
92+
const query = session.query({indexName: "VectorIndex"})
93+
.vectorSearch(field => field.withField("Vector"),
94+
factory => factory.byTexts(["query one", "query two"], "embedding-task"), {
95+
isExact: true
96+
})
97+
.toString();
98+
99+
assert.strictEqual(query, "from index 'VectorIndex' where exact(vector.search(Vector, embedding.text($p0, ai.task('embedding-task'))))");
100+
});
101+
102+
it("should throw error when both field factory and value factory task identifiers are set", async () => {
103+
const session = store.openSession();
104+
105+
assert.throws(() => {
106+
session.query({collection: "Dtos"})
107+
.vectorSearch(field => field.withText("TextualValue").usingTask("field-task"),
108+
factory => factory.byText("query text", "value-task"))
109+
.toString();
110+
}, /Embeddings generation task identifier set in value factory cannot be used with field factory/);
111+
});
112+
113+
async function setupVectorIndex(store: IDocumentStore) {
114+
const indexDefinition = new IndexDefinition();
115+
indexDefinition.name = "VectorIndex";
116+
indexDefinition.maps = new Set([`
117+
from dto in docs.Dtos
118+
let attachment = LoadAttachment(dto, "vector")
119+
select new { Vector = CreateVector(attachment.GetContentAsStream())}`]);
120+
121+
const putIndexesOperation = new PutIndexesOperation(indexDefinition);
122+
await store.maintenance.send(putIndexesOperation);
123+
}
124+
});

0 commit comments

Comments
 (0)