Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/python/vectorize_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@
from vectorize_client.models.get_pipelines_response import GetPipelinesResponse
from vectorize_client.models.get_source_connectors200_response import GetSourceConnectors200Response
from vectorize_client.models.get_upload_files_response import GetUploadFilesResponse
from vectorize_client.models.metadata_extraction_strategy import MetadataExtractionStrategy
from vectorize_client.models.metadata_extraction_strategy_schema import MetadataExtractionStrategySchema
from vectorize_client.models.n8_n_config import N8NConfig
from vectorize_client.models.pipeline_configuration_schema import PipelineConfigurationSchema
from vectorize_client.models.pipeline_events import PipelineEvents
Expand Down
2 changes: 2 additions & 0 deletions src/python/vectorize_client/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
from vectorize_client.models.get_pipelines_response import GetPipelinesResponse
from vectorize_client.models.get_source_connectors200_response import GetSourceConnectors200Response
from vectorize_client.models.get_upload_files_response import GetUploadFilesResponse
from vectorize_client.models.metadata_extraction_strategy import MetadataExtractionStrategy
from vectorize_client.models.metadata_extraction_strategy_schema import MetadataExtractionStrategySchema
from vectorize_client.models.n8_n_config import N8NConfig
from vectorize_client.models.pipeline_configuration_schema import PipelineConfigurationSchema
from vectorize_client.models.pipeline_events import PipelineEvents
Expand Down
12 changes: 10 additions & 2 deletions src/python/vectorize_client/models/extraction_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import re # noqa: F401
import json

from pydantic import BaseModel, ConfigDict, StrictBool, StrictStr
from pydantic import BaseModel, ConfigDict, Field, StrictBool, StrictStr
from typing import Any, ClassVar, Dict, List, Optional
from typing import Optional, Set
from typing_extensions import Self
Expand All @@ -29,8 +29,12 @@ class ExtractionResult(BaseModel):
success: StrictBool
chunks: Optional[List[StrictStr]] = None
text: Optional[StrictStr] = None
metadata: Optional[StrictStr] = None
metadata_schema: Optional[StrictStr] = Field(default=None, alias="metadataSchema")
chunks_metadata: Optional[List[StrictStr]] = Field(default=None, alias="chunksMetadata")
chunks_schema: Optional[List[StrictStr]] = Field(default=None, alias="chunksSchema")
error: Optional[StrictStr] = None
__properties: ClassVar[List[str]] = ["success", "chunks", "text", "error"]
__properties: ClassVar[List[str]] = ["success", "chunks", "text", "metadata", "metadataSchema", "chunksMetadata", "chunksSchema", "error"]

model_config = ConfigDict(
populate_by_name=True,
Expand Down Expand Up @@ -86,6 +90,10 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
"success": obj.get("success"),
"chunks": obj.get("chunks"),
"text": obj.get("text"),
"metadata": obj.get("metadata"),
"metadataSchema": obj.get("metadataSchema"),
"chunksMetadata": obj.get("chunksMetadata"),
"chunksSchema": obj.get("chunksSchema"),
"error": obj.get("error")
})
return _obj
Expand Down
10 changes: 8 additions & 2 deletions src/python/vectorize_client/models/start_extraction_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import Any, ClassVar, Dict, List, Optional, Union
from vectorize_client.models.extraction_chunking_strategy import ExtractionChunkingStrategy
from vectorize_client.models.extraction_type import ExtractionType
from vectorize_client.models.metadata_extraction_strategy import MetadataExtractionStrategy
from typing import Optional, Set
from typing_extensions import Self

Expand All @@ -32,7 +33,8 @@ class StartExtractionRequest(BaseModel):
type: Optional[ExtractionType] = ExtractionType.IRIS
chunking_strategy: Optional[ExtractionChunkingStrategy] = Field(default=ExtractionChunkingStrategy.MARKDOWN, alias="chunkingStrategy")
chunk_size: Optional[Union[StrictFloat, StrictInt]] = Field(default=256, alias="chunkSize")
__properties: ClassVar[List[str]] = ["fileId", "type", "chunkingStrategy", "chunkSize"]
metadata: Optional[MetadataExtractionStrategy] = None
__properties: ClassVar[List[str]] = ["fileId", "type", "chunkingStrategy", "chunkSize", "metadata"]

model_config = ConfigDict(
populate_by_name=True,
Expand Down Expand Up @@ -73,6 +75,9 @@ def to_dict(self) -> Dict[str, Any]:
exclude=excluded_fields,
exclude_none=True,
)
# override the default output from pydantic by calling `to_dict()` of metadata
if self.metadata:
_dict['metadata'] = self.metadata.to_dict()
return _dict

@classmethod
Expand All @@ -88,7 +93,8 @@ def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]:
"fileId": obj.get("fileId"),
"type": obj.get("type") if obj.get("type") is not None else ExtractionType.IRIS,
"chunkingStrategy": obj.get("chunkingStrategy") if obj.get("chunkingStrategy") is not None else ExtractionChunkingStrategy.MARKDOWN,
"chunkSize": obj.get("chunkSize") if obj.get("chunkSize") is not None else 256
"chunkSize": obj.get("chunkSize") if obj.get("chunkSize") is not None else 256,
"metadata": MetadataExtractionStrategy.from_dict(obj["metadata"]) if obj.get("metadata") is not None else None
})
return _obj

Expand Down
32 changes: 32 additions & 0 deletions src/ts/src/models/ExtractionResult.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,30 @@ export interface ExtractionResult {
* @memberof ExtractionResult
*/
text?: string;
/**
*
* @type {string}
* @memberof ExtractionResult
*/
metadata?: string;
/**
*
* @type {string}
* @memberof ExtractionResult
*/
metadataSchema?: string;
/**
*
* @type {Array<string>}
* @memberof ExtractionResult
*/
chunksMetadata?: Array<string>;
/**
*
* @type {Array<string>}
* @memberof ExtractionResult
*/
chunksSchema?: Array<string>;
/**
*
* @type {string}
Expand Down Expand Up @@ -66,6 +90,10 @@ export function ExtractionResultFromJSONTyped(json: any, ignoreDiscriminator: bo
'success': json['success'],
'chunks': json['chunks'] == null ? undefined : json['chunks'],
'text': json['text'] == null ? undefined : json['text'],
'metadata': json['metadata'] == null ? undefined : json['metadata'],
'metadataSchema': json['metadataSchema'] == null ? undefined : json['metadataSchema'],
'chunksMetadata': json['chunksMetadata'] == null ? undefined : json['chunksMetadata'],
'chunksSchema': json['chunksSchema'] == null ? undefined : json['chunksSchema'],
'error': json['error'] == null ? undefined : json['error'],
};
}
Expand All @@ -84,6 +112,10 @@ export function ExtractionResultToJSONTyped(value?: ExtractionResult | null, ign
'success': value['success'],
'chunks': value['chunks'],
'text': value['text'],
'metadata': value['metadata'],
'metadataSchema': value['metadataSchema'],
'chunksMetadata': value['chunksMetadata'],
'chunksSchema': value['chunksSchema'],
'error': value['error'],
};
}
Expand Down
15 changes: 15 additions & 0 deletions src/ts/src/models/StartExtractionRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,13 @@
*/

import { mapValues } from '../runtime';
import type { MetadataExtractionStrategy } from './MetadataExtractionStrategy';
import {
MetadataExtractionStrategyFromJSON,
MetadataExtractionStrategyFromJSONTyped,
MetadataExtractionStrategyToJSON,
MetadataExtractionStrategyToJSONTyped,
} from './MetadataExtractionStrategy';
import type { ExtractionType } from './ExtractionType';
import {
ExtractionTypeFromJSON,
Expand Down Expand Up @@ -58,6 +65,12 @@ export interface StartExtractionRequest {
* @memberof StartExtractionRequest
*/
chunkSize?: number;
/**
*
* @type {MetadataExtractionStrategy}
* @memberof StartExtractionRequest
*/
metadata?: MetadataExtractionStrategy;
}


Expand All @@ -84,6 +97,7 @@ export function StartExtractionRequestFromJSONTyped(json: any, ignoreDiscriminat
'type': json['type'] == null ? undefined : ExtractionTypeFromJSON(json['type']),
'chunkingStrategy': json['chunkingStrategy'] == null ? undefined : ExtractionChunkingStrategyFromJSON(json['chunkingStrategy']),
'chunkSize': json['chunkSize'] == null ? undefined : json['chunkSize'],
'metadata': json['metadata'] == null ? undefined : MetadataExtractionStrategyFromJSON(json['metadata']),
};
}

Expand All @@ -102,6 +116,7 @@ export function StartExtractionRequestToJSONTyped(value?: StartExtractionRequest
'type': ExtractionTypeToJSON(value['type']),
'chunkingStrategy': ExtractionChunkingStrategyToJSON(value['chunkingStrategy']),
'chunkSize': value['chunkSize'],
'metadata': MetadataExtractionStrategyToJSON(value['metadata']),
};
}

2 changes: 2 additions & 0 deletions src/ts/src/models/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ export * from './GetPipelines400Response';
export * from './GetPipelinesResponse';
export * from './GetSourceConnectors200Response';
export * from './GetUploadFilesResponse';
export * from './MetadataExtractionStrategy';
export * from './MetadataExtractionStrategySchema';
export * from './N8NConfig';
export * from './PipelineConfigurationSchema';
export * from './PipelineEvents';
Expand Down
2 changes: 1 addition & 1 deletion tests/ts/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions tests/ts/tests/extraction.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import {beforeEach, describe, it, expect} from "vitest";
import {createTestContext, TestContext} from "./testContext";
import {ExtractionApi, FilesApi} from "@vectorize-io/vectorize-client";
import * as fs from "node:fs";
import exp from "node:constants";

export let testContext: TestContext;

Expand Down Expand Up @@ -39,6 +40,9 @@ describe("extraction", () => {
startExtractionRequest: {
fileId: startResponse.fileId,
chunkSize: 512,
metadata: {
inferSchema: true
}
}
})
await pollExtraction(extractionApi, response.extractionId)
Expand All @@ -58,10 +62,17 @@ describe("extraction", () => {
extractionId: extractionId
})
if (result.ready) {

expect(result.data?.success).toBe(true)
expect(result.data?.error).toBeFalsy()
expect(result.data?.chunks?.length).toBeGreaterThan(2)
expect(result.data?.text).toBeTruthy()
expect(result.data?.metadata).toBeTruthy()
const parsedMeta = JSON.parse(result.data!.metadata!)
console.log(parsedMeta)
expect(result.data?.metadataSchema).toBe("generated")
expect(result.data?.chunksSchema?.length).toBe(result.data?.chunks?.length)
expect(result.data?.chunksMetadata?.length).toBe(result.data?.chunks?.length)
break
} else {
console.log("not ready")
Expand Down
2 changes: 1 addition & 1 deletion vectorize_api.json

Large diffs are not rendered by default.

Loading