diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7a7342cb..8380eca4 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -8,10 +8,9 @@ on: - 'App/frontend-app/**' - 'App/kernel-memory/**' - '.github/workflows/codeql.yml' - paths-ignore: - - '**/.gitignore' - - '**/Dockerfile' - - '**/.dockerignore' + - '!**/.gitignore' + - '!**/Dockerfile' + - '!**/.dockerignore' pull_request: branches: [ "main", "dev", "demo" ] paths: @@ -19,10 +18,9 @@ on: - 'App/frontend-app/**' - 'App/kernel-memory/**' - '.github/workflows/codeql.yml' - paths-ignore: - - '**/.gitignore' - - '**/Dockerfile' - - '**/.dockerignore' + - '!**/.gitignore' + - '!**/Dockerfile' + - '!**/.dockerignore' schedule: - cron: '37 2 * * 5' @@ -55,6 +53,9 @@ jobs: with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} + config: | + queries: + - uses: security-extended - if: matrix.build-mode == 'manual' shell: bash @@ -65,4 +66,4 @@ jobs: - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: - category: "/language:${{matrix.language}}" + category: "/language:${{matrix.language}}" \ No newline at end of file diff --git a/App/frontend-app/package.json b/App/frontend-app/package.json index 2fa7341a..f0d2e0cc 100644 --- a/App/frontend-app/package.json +++ b/App/frontend-app/package.json @@ -90,6 +90,10 @@ "typescript": "^5.9.3", "vite": "^7.1.7" }, + "resolutions": { + "qs": "^6.14.1", + "glob": "^10.5.0" + }, "volta": { "node": "18.16.0", "yarn": "1.22.19" diff --git a/App/frontend-app/public/web.config b/App/frontend-app/public/web.config index 89375a82..7a2fcc88 100644 --- a/App/frontend-app/public/web.config +++ b/App/frontend-app/public/web.config @@ -17,5 +17,14 @@ + + + + + + + + + \ No newline at end of file diff --git a/App/frontend-app/src/components/chat/chatRoom.tsx b/App/frontend-app/src/components/chat/chatRoom.tsx index 89952cb9..9116aed6 100644 --- a/App/frontend-app/src/components/chat/chatRoom.tsx +++ b/App/frontend-app/src/components/chat/chatRoom.tsx @@ -601,9 +601,29 @@ export function ChatRoom({ searchResultDocuments, selectedDocuments, chatWithDoc ); } function uuidv4() { - return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) { - const r = Math.random() * 16 | 0, v = c === 'x' ? r : (r & 0x3 | 0x8); - return v.toString(16); - }); + // Use cryptographically secure random number generation for session IDs + const cryptoObj: Crypto | undefined = + typeof window !== "undefined" ? window.crypto : undefined; + if (cryptoObj && typeof cryptoObj.getRandomValues === "function") { + const bytes = new Uint8Array(16); + cryptoObj.getRandomValues(bytes); + + // RFC 4122 v4 and variant bits + bytes[6] = (bytes[6] & 0x0f) | 0x40; // version 4 + bytes[8] = (bytes[8] & 0x3f) | 0x80; // variant RFC 4122 + + const hex = Array.from(bytes, (b) => b.toString(16).padStart(2, "0")); + return ( + hex.slice(0, 4).join("") + "-" + + hex.slice(4, 6).join("") + "-" + + hex.slice(6, 8).join("") + "-" + + hex.slice(8, 10).join("") + "-" + + hex.slice(10, 16).join("") + ); + } + +// Avoid insecure fallback (Math.random); fail fast if unsupported. +throw new Error("Secure UUID generation not supported in this environment."); + } diff --git a/App/frontend-app/src/components/documentViewer/pageNumberTab.tsx b/App/frontend-app/src/components/documentViewer/pageNumberTab.tsx index 87612e36..7bdb1cc2 100644 --- a/App/frontend-app/src/components/documentViewer/pageNumberTab.tsx +++ b/App/frontend-app/src/components/documentViewer/pageNumberTab.tsx @@ -13,10 +13,17 @@ export const PageNumberTab: React.FC = ({ selectedTab, sele return null; } - const imageUrl = window.ENV.STORAGE_URL + - selectedPageMetadata.document_url.replace(/^(?:\/\/|[^/]+)*\//, "") + - "/" - + const base = window.ENV.STORAGE_URL.replace(/\r|\n/g, "").replace(/\/+$/,""); + let path: string; + try { + path = new URL(selectedPageMetadata.document_url, base).pathname.replace(/^\/+/, ""); + } catch (error) { + // Avoid rendering if the document_url is invalid and cannot be parsed as a URL. + console.error("Invalid document URL in PageNumberTab:", selectedPageMetadata.document_url, error); + return null; + } + const imageUrl = `${base}/${path}/`; + return (
diff --git a/App/frontend-app/yarn.lock b/App/frontend-app/yarn.lock index f531e3d5..85a0ef1e 100644 --- a/App/frontend-app/yarn.lock +++ b/App/frontend-app/yarn.lock @@ -5132,11 +5132,6 @@ fresh@^2.0.0: resolved "https://registry.yarnpkg.com/fresh/-/fresh-2.0.0.tgz#8dd7df6a1b3a1b3a5cf186c05a5dd267622635a4" integrity sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A== -fs.realpath@^1.0.0: - version "1.0.0" - resolved "https://registry.yarnpkg.com/fs.realpath/-/fs.realpath-1.0.0.tgz#1504ad2523158caa40db4a2787cb01411994ea4f" - integrity sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw== - fsevents@^2.3.3, fsevents@~2.3.2, fsevents@~2.3.3: version "2.3.3" resolved "https://registry.yarnpkg.com/fsevents/-/fsevents-2.3.3.tgz#cac6407785d03675a2a5e1a5305c697b347d90d6" @@ -5243,10 +5238,10 @@ glob-parent@^6.0.2: dependencies: is-glob "^4.0.3" -glob@^10.3.10: - version "10.4.5" - resolved "https://registry.yarnpkg.com/glob/-/glob-10.4.5.tgz#f4d9f0b90ffdbab09c9d77f5f29b4262517b0956" - integrity sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg== +glob@^10.3.10, glob@^10.5.0, glob@^7.1.4: + version "10.5.0" + resolved "https://registry.yarnpkg.com/glob/-/glob-10.5.0.tgz#8ec0355919cd3338c28428a23d4f24ecc5fe738c" + integrity sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg== dependencies: foreground-child "^3.1.0" jackspeak "^3.1.2" @@ -5255,18 +5250,6 @@ glob@^10.3.10: package-json-from-dist "^1.0.0" path-scurry "^1.11.1" -glob@^7.1.4: - version "7.2.3" - resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" - integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q== - dependencies: - fs.realpath "^1.0.0" - inflight "^1.0.4" - inherits "2" - minimatch "^3.1.1" - once "^1.3.0" - path-is-absolute "^1.0.0" - globals@^14.0.0: version "14.0.0" resolved "https://registry.yarnpkg.com/globals/-/globals-14.0.0.tgz#898d7413c29babcf6bafe56fcadded858ada724e" @@ -5532,15 +5515,7 @@ indent-string@^4.0.0: resolved "https://registry.yarnpkg.com/indent-string/-/indent-string-4.0.0.tgz#624f8f4497d619b2d9768531d58f4122854d7251" integrity sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg== -inflight@^1.0.4: - version "1.0.6" - resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9" - integrity sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA== - dependencies: - once "^1.3.0" - wrappy "1" - -inherits@2, inherits@2.0.4: +inherits@2.0.4: version "2.0.4" resolved "https://registry.yarnpkg.com/inherits/-/inherits-2.0.4.tgz#0fa2c64f932917c3433a0ded55363aae37416b7c" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== @@ -6399,7 +6374,7 @@ keyv@^4.5.4: i18next "^25.5.3" i18next-browser-languagedetector "^8.2.0" i18next-http-backend "^3.0.2" - km-app "file:../../../../../../../AppData/Local/Yarn/Cache/v6/npm-km-app-1.0.0-3364c375-9406-4ea7-a926-82d2ee61d0ba-1760537549847/node_modules/km-app" + km-app "file:../../../../../AppData/Local/Yarn/Cache/v6/npm-km-app-1.0.0-40f9f2ea-ee53-4cb7-ba64-cdd6b8e2ba8b-1767243556655/node_modules/km-app" marked "^16.3.0" notistack "^3.0.2" pdfjs-dist "^5.4.149" @@ -6891,7 +6866,7 @@ min-indent@^1.0.0: resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== -minimatch@^3.0.4, minimatch@^3.1.1, minimatch@^3.1.2: +minimatch@^3.0.4, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== @@ -7100,7 +7075,7 @@ on-finished@^2.4.1: dependencies: ee-first "1.1.1" -once@^1.3.0, once@^1.4.0: +once@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/once/-/once-1.4.0.tgz#583b1aa775961d4b113ac17d9c50baef9dd76bd1" integrity sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w== @@ -7225,11 +7200,6 @@ path-exists@^4.0.0: resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== -path-is-absolute@^1.0.0: - version "1.0.1" - resolved "https://registry.yarnpkg.com/path-is-absolute/-/path-is-absolute-1.0.1.tgz#174b9268735534ffbc7ace6bf53a5a9e1b5c5f5f" - integrity sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg== - path-key@^3.0.0, path-key@^3.1.0: version "3.1.1" resolved "https://registry.yarnpkg.com/path-key/-/path-key-3.1.1.tgz#581f6ade658cbba65a0d3380de7753295054f375" @@ -7425,10 +7395,10 @@ pure-rand@^7.0.0: resolved "https://registry.yarnpkg.com/pure-rand/-/pure-rand-7.0.1.tgz#6f53a5a9e3e4a47445822af96821ca509ed37566" integrity sha512-oTUZM/NAZS8p7ANR3SHh30kXB+zK2r2BPcEn/awJIbOvq82WoMN4p62AWWp3Hhw50G0xMsw1mhIBLqHw64EcNQ== -qs@^6.14.0: - version "6.14.0" - resolved "https://registry.yarnpkg.com/qs/-/qs-6.14.0.tgz#c63fa40680d2c5c941412a0e899c89af60c0a930" - integrity sha512-YWWTjgABSKcvs/nWBi9PycY/JiPJqOD4JA6o9Sej2AtvSGarXxKC3OQSk4pAarbdQlKAh5D4FCQkJNkW+GAn3w== +qs@^6.14.0, qs@^6.14.1: + version "6.14.1" + resolved "https://registry.yarnpkg.com/qs/-/qs-6.14.1.tgz#a41d85b9d3902f31d27861790506294881871159" + integrity sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ== dependencies: side-channel "^1.1.0" diff --git a/App/kernel-memory/extensions/AWS/S3/AWSS3Storage.cs b/App/kernel-memory/extensions/AWS/S3/AWSS3Storage.cs index 1c54d6b9..8ffc579e 100644 --- a/App/kernel-memory/extensions/AWS/S3/AWSS3Storage.cs +++ b/App/kernel-memory/extensions/AWS/S3/AWSS3Storage.cs @@ -67,7 +67,9 @@ public Task CreateIndexDirectoryAsync( /// public async Task DeleteIndexDirectoryAsync(string index, CancellationToken cancellationToken = default) { - this._log.LogTrace("Deleting index '{0}'", index); + this._log.LogTrace( + "Deleting index {Index}", + index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); if (string.IsNullOrWhiteSpace(index)) { throw new DocumentStorageException("The index name is empty, stopping the process to prevent data loss"); @@ -126,11 +128,15 @@ public async Task WriteFileAsync( var objectKey = $"{index}/{documentId}/{fileName}"; var len = streamContent.Length; - this._log.LogTrace("Writing object {0} ...", objectKey); + this._log.LogTrace( + "Writing object {ObjectKey} ...", + objectKey?.Replace("\r", string.Empty).Replace("\n", string.Empty)); if (streamContent.Length == 0) { - this._log.LogWarning("The file {0} is empty", objectKey); + this._log.LogWarning( + "The file {ObjectKey} is empty", + objectKey?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } await this._client.PutObjectAsync(new PutObjectRequest @@ -140,7 +146,10 @@ await this._client.PutObjectAsync(new PutObjectRequest InputStream = streamContent }, cancellationToken: cancellationToken).ConfigureAwait(false); - this._log.LogTrace("Object {0} ready, size {1}", objectKey, len); + this._log.LogTrace( + "Object {ObjectKey} ready, size {Size}", + objectKey?.Replace("\r", string.Empty).Replace("\n", string.Empty), + len); } /// @@ -177,7 +186,9 @@ public async Task ReadFileAsync( { if (logErrIfNotFound) { - this._log.LogInformation("File not found: {0}", objectKey); + this._log.LogInformation( + "File not found: {ObjectKey}", + objectKey?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } throw new DocumentStorageFileNotFoundException("File not found", e); @@ -199,7 +210,9 @@ private async Task DeleteObjectsByPrefixAsync(string prefix, CancellationToken c throw new DocumentStorageException("The object prefix is empty, stopping the process to prevent data loss"); } - this._log.LogTrace("Deleting objects with prefix '{0}'", prefix); + this._log.LogTrace( + "Deleting objects with prefix {Prefix}", + prefix.Replace("\r", string.Empty).Replace("\n", string.Empty)); var allObjects = new List(); var request = new ListObjectsV2Request diff --git a/App/kernel-memory/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs b/App/kernel-memory/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs index 07dfde26..23f58dcb 100644 --- a/App/kernel-memory/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs +++ b/App/kernel-memory/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs @@ -315,7 +315,7 @@ public async Task DeleteAsync(string index, MemoryRecord record, CancellationTok try { - this._log.LogDebug("Deleting record {0} from index {1}", id, index); + this._log.LogDebug("Deleting record {RecordId} from index {Index}", id?.Replace("\r", string.Empty).Replace("\n", string.Empty), index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); Response? result = await client.DeleteDocumentsAsync( AzureAISearchMemoryRecord.IdField, new List { id }, @@ -325,7 +325,7 @@ public async Task DeleteAsync(string index, MemoryRecord record, CancellationTok } catch (RequestFailedException e) when (e.Status == 404) { - this._log.LogTrace("Index {0} record {1} not found, nothing to delete", index, id); + this._log.LogTrace("Index {Index} record {RecordId} not found, nothing to delete", index?.Replace("\r", string.Empty).Replace("\n", string.Empty), id?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } } @@ -403,7 +403,7 @@ private async Task DoesIndexExistAsync(string index, CancellationToken can private SearchClient GetSearchClient(string index) { var normalIndexName = this.NormalizeIndexName(index); - this._log.LogTrace("Preparing search client, index name '{0}' normalized to '{1}'", index, normalIndexName); + this._log.LogTrace("Preparing search client, index name {Index} normalized to {NormalizedIndex}", index?.Replace("\r", string.Empty).Replace("\n", string.Empty), normalIndexName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); // Search an available client from the local cache if (!this._clientsByIndex.TryGetValue(normalIndexName, out SearchClient? client)) diff --git a/App/kernel-memory/extensions/AzureBlobs/AzureBlobsStorage.cs b/App/kernel-memory/extensions/AzureBlobs/AzureBlobsStorage.cs index 993bc2a8..43508766 100644 --- a/App/kernel-memory/extensions/AzureBlobs/AzureBlobsStorage.cs +++ b/App/kernel-memory/extensions/AzureBlobs/AzureBlobsStorage.cs @@ -232,13 +232,13 @@ public async Task ReadFileAsync( async () => (await blobClient.DownloadStreamingAsync(null, cancellationToken).ConfigureAwait(false)).Value.Content); } - if (logErrIfNotFound) { this._log.LogError("Unable to download file {0}", blobName); } + if (logErrIfNotFound) { this._log.LogError("Unable to download file {BlobName}", blobName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } throw new DocumentStorageFileNotFoundException("Unable to fetch blob content"); } catch (RequestFailedException e) when (e.Status == 404) { - this._log.LogInformation("File not found: {0}", blobName); + this._log.LogInformation("File not found: {BlobName}", blobName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); throw new DocumentStorageFileNotFoundException("File not found", e); } } @@ -279,7 +279,7 @@ private async Task InternalWriteAsync( options.HttpHeaders = new BlobHttpHeaders { ContentType = fileType }; - this._log.LogTrace("Writing blob {0} ...", blobName); + this._log.LogTrace("Writing blob {BlobName} with type {ContentType} ...", blobName?.Replace("\r", string.Empty).Replace("\n", string.Empty), fileType?.Replace("\r", string.Empty).Replace("\n", string.Empty)); long size; switch (content) @@ -299,12 +299,12 @@ private async Task InternalWriteAsync( if (size == 0) { - this._log.LogWarning("The file {0}/{1} is empty", directoryName, fileName); + this._log.LogWarning("The file {Directory}/{FileName} is empty", directoryName?.Replace("\r", string.Empty).Replace("\n", string.Empty), fileName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } await this.ReleaseBlobAsync(blobLeaseClient, lease, cancellationToken).ConfigureAwait(false); - this._log.LogTrace("Blob {0} ready, size {1}", blobName, size); + this._log.LogTrace("Blob {BlobName} ready, size {Size}", blobName?.Replace("\r", string.Empty).Replace("\n", string.Empty), size); } private async Task DeleteBlobsByPrefixAsync(string prefix, CancellationToken cancellationToken) @@ -314,7 +314,7 @@ private async Task DeleteBlobsByPrefixAsync(string prefix, CancellationToken can throw new DocumentStorageException("The blob prefix is empty, stopping the process to prevent data loss"); } - this._log.LogInformation("Deleting blobs at {0}", prefix); + this._log.LogInformation("Deleting blobs at {Prefix}", prefix.Replace("\r", string.Empty).Replace("\n", string.Empty)); AsyncPageable? blobList = this._containerClient.GetBlobsAsync(prefix: prefix, cancellationToken: cancellationToken); await foreach (Page page in blobList.AsPages().WithCancellation(cancellationToken).ConfigureAwait(false)) diff --git a/App/kernel-memory/extensions/Elasticsearch/Elasticsearch/ElasticsearchMemory.cs b/App/kernel-memory/extensions/Elasticsearch/Elasticsearch/ElasticsearchMemory.cs index c1056949..ce126deb 100644 --- a/App/kernel-memory/extensions/Elasticsearch/Elasticsearch/ElasticsearchMemory.cs +++ b/App/kernel-memory/extensions/Elasticsearch/Elasticsearch/ElasticsearchMemory.cs @@ -213,7 +213,10 @@ public async Task UpsertAsync( index = IndexNameHelper.Convert(index, this._config); this._log.LogTrace("Searching for '{Text}' on index '{IndexName}' with filters {Filters}. {MinRelevance} {Limit} {WithEmbeddings}", - text, index, filters.ToDebugString(), minRelevance, limit, withEmbeddings); + text?.Replace("\r", string.Empty).Replace("\n", string.Empty), + index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + filters.ToDebugString()?.Replace("\r", string.Empty).Replace("\n", string.Empty), + minRelevance, limit, withEmbeddings); Embedding embedding = await this._embeddingGenerator.GenerateEmbeddingAsync(text, cancellationToken).ConfigureAwait(false); var coll = embedding.Data.ToArray(); @@ -258,7 +261,9 @@ public async IAsyncEnumerable GetListAsync( [EnumeratorCancellation] CancellationToken cancellationToken = default) { this._log.LogTrace("Querying index '{IndexName}' with filters {Filters}. {Limit} {WithEmbeddings}", - index, filters.ToDebugString(), limit, withEmbeddings); + index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + filters.ToDebugString()?.Replace("\r", string.Empty).Replace("\n", string.Empty), + limit, withEmbeddings); if (limit < 0) { diff --git a/App/kernel-memory/extensions/SQLServer/SQLServer/SqlServerMemory.cs b/App/kernel-memory/extensions/SQLServer/SQLServer/SqlServerMemory.cs index 704c9290..9ed9907d 100644 --- a/App/kernel-memory/extensions/SQLServer/SQLServer/SqlServerMemory.cs +++ b/App/kernel-memory/extensions/SQLServer/SQLServer/SqlServerMemory.cs @@ -48,6 +48,9 @@ public sealed class SqlServerMemory : IMemoryDb, IMemoryDbUpsertBatch, IDisposab /// SQL Server version, retrieved on the first connection /// private int _cachedServerVersion = int.MinValue; + // Accepts only [a-zA-Z_][a-zA-Z0-9_]{0,127} + private static readonly Regex s_safeSqlIdentifierRegex = new Regex(@"^[a-zA-Z_][a-zA-Z0-9_]{0,127}$", RegexOptions.Compiled); + /// /// Initializes a new instance of the class. @@ -78,6 +81,7 @@ public async Task CreateIndexAsync(string index, int vectorSize, CancellationTok return; } + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ var sql = $@" BEGIN TRANSACTION; @@ -114,6 +118,7 @@ FOREIGN KEY ([memory_id]) REFERENCES {this.GetFullTableName(this._config.MemoryT try { SqlCommand command = connection.CreateCommand(); + // codeql[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ command.CommandText = sql; command.Parameters.AddWithValue("@index", index); await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); @@ -139,6 +144,7 @@ public async Task DeleteAsync(string index, MemoryRecord record, CancellationTok return; } + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ var sql = $@" BEGIN TRANSACTION; @@ -165,6 +171,7 @@ DELETE [tags] try { SqlCommand command = connection.CreateCommand(); + // codeql[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ command.CommandText = sql; command.Parameters.AddWithValue("@index", index); command.Parameters.AddWithValue("@key", record.Id); @@ -192,6 +199,7 @@ public async Task DeleteIndexAsync(string index, CancellationToken cancellationT return; } + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ var sql = $@" BEGIN TRANSACTION; @@ -208,6 +216,7 @@ public async Task DeleteIndexAsync(string index, CancellationToken cancellationT SqlCommand command = connection.CreateCommand(); try { + // codeql[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ command.CommandText = sql; command.Parameters.AddWithValue("@index", index); await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); @@ -234,6 +243,7 @@ public async Task> GetIndexesAsync(CancellationToken cancell SqlCommand command = connection.CreateCommand(); try { + // codeql[cs/sql-injection] Schema and table names from configuration, not user input command.CommandText = sql; var dataReader = await command.ExecuteReaderAsync(cancellationToken).ConfigureAwait(false); while (await dataReader.ReadAsync(cancellationToken).ConfigureAwait(false)) @@ -285,6 +295,7 @@ public async IAsyncEnumerable GetListAsync( { var tagFilters = new TagCollection(); + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ command.CommandText = $@" WITH [filters] AS ( @@ -359,6 +370,7 @@ SELECT TOP (@limit) try { var generatedFilters = this.GenerateFilters(index, command.Parameters, filters); + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ command.CommandText = $@" WITH [embedding] as @@ -455,6 +467,7 @@ public async IAsyncEnumerable UpsertBatchAsync(string index, IEnumerable throw new IndexNotFoundException($"The index '{index}' does not exist."); } + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ var sql = $@" BEGIN TRANSACTION; @@ -524,6 +537,7 @@ WHEN NOT MATCHED THEN foreach (var record in list) { SqlCommand command = connection.CreateCommand(); + // codeql[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ command.CommandText = sql; command.Parameters.AddWithValue("@index", index); command.Parameters.AddWithValue("@key", record.Id); @@ -606,6 +620,7 @@ private async Task CacheSqlServerMajorVersionNumberAsync(CancellationToken cance /// private async Task CreateTablesIfNotExistsAsync(CancellationToken cancellationToken) { + // lgtm[cs/sql-injection] Schema and table names from configuration, not user input var sql = $@"IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = N'{this._config.Schema}' ) @@ -634,6 +649,7 @@ FOREIGN KEY ([collection]) REFERENCES {this.GetFullTableName(this._config.Memory SqlCommand command = connection.CreateCommand(); try { + // codeql[cs/sql-injection] Schema and table names from configuration, not user input command.CommandText = sql; await command.ExecuteNonQueryAsync(cancellationToken).ConfigureAwait(false); } @@ -709,6 +725,7 @@ private string GenerateFilters( filterBuilder.Append(" ( "); + // lgtm[cs/sql-injection] Index name sanitized by NormalizeIndexName with regex ^[a-zA-Z_][a-zA-Z0-9_]{0,127}$ filterBuilder.Append(CultureInfo.CurrentCulture, $@"EXISTS ( SELECT 1 @@ -763,6 +780,11 @@ private static string NormalizeIndexName(string index) index = s_replaceIndexNameCharsRegex.Replace(index.Trim().ToLowerInvariant(), ValidSeparator); + // Only allow index names that are valid SQL identifiers (start with a letter or underscore, followed by letters, digits, or underscores, max 128 chars) + if (!s_safeSqlIdentifierRegex.IsMatch(index)) + { + throw new ArgumentException("Invalid index name. Allowed: letters, digits, underscores, max length 128, cannot start with digit.", nameof(index)); + } return index; } diff --git a/App/kernel-memory/service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs b/App/kernel-memory/service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs index 8c8791a6..8cf3aa2f 100644 --- a/App/kernel-memory/service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs +++ b/App/kernel-memory/service/Core/DocumentStorage/DevTools/SimpleFileStorage.cs @@ -117,7 +117,10 @@ public async Task ReadFileAsync( { if (logErrIfNotFound) { - this._log.LogError("File not found {0}/{1}/{2}", index, documentId, fileName); + this._log.LogError("File not found {Index}/{DocumentId}/{FileName}", + index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + documentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + fileName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } throw new DocumentStorageFileNotFoundException("File not found"); diff --git a/App/kernel-memory/service/Core/FileSystem/DevTools/DiskFileSystem.cs b/App/kernel-memory/service/Core/FileSystem/DevTools/DiskFileSystem.cs index 12066aaf..5950bb74 100644 --- a/App/kernel-memory/service/Core/FileSystem/DevTools/DiskFileSystem.cs +++ b/App/kernel-memory/service/Core/FileSystem/DevTools/DiskFileSystem.cs @@ -64,7 +64,7 @@ public async Task DeleteVolumeAsync(string volume, CancellationToken cancellatio { volume = ValidateVolumeName(volume); var path = Path.Join(this._dataPath, volume); - this._log.LogWarning("Deleting directory: {0}", path); + this._log.LogWarning("Deleting directory: {Path}", path?.Replace("\r", string.Empty).Replace("\n", string.Empty)); for (int attempt = 1; attempt <= 5; attempt++) { if (!Directory.Exists(path)) @@ -140,7 +140,7 @@ public async Task WriteFileAsync(string volume, string relPath, string fileName, relPath = ValidatePath(relPath); fileName = ValidateFileName(fileName); path = Path.Join(path, relPath, fileName); - this._log.LogTrace("Writing file to {0}", path); + this._log.LogTrace("Writing file to {Path}", path?.Replace("\r", string.Empty).Replace("\n", string.Empty)); BinaryData data = await BinaryData.FromStreamAsync(streamContent, cancellationToken).ConfigureAwait(false); await File.WriteAllBytesAsync(path, data.ToArray(), cancellationToken).ConfigureAwait(false); } @@ -200,17 +200,17 @@ public Task ReadFileInfoAsync(string volume, string relPa path = Path.Join(path, fileName); if (!File.Exists(path)) { - this._log.LogError("File not found: {0}", path); + this._log.LogError("File not found: {Path}", path?.Replace("\r", string.Empty).Replace("\n", string.Empty)); throw new FileNotFoundException($"File not found: {path}"); } - this._log.LogTrace("File exists, reading {0}", path); + this._log.LogTrace("File exists, reading {Path}", path?.Replace("\r", string.Empty).Replace("\n", string.Empty)); FileInfo info = new(path); var fileType = this._mimeTypeDetection.GetFileType(fileName); Task AsyncStreamDelegate() => Task.FromResult(info.OpenRead()); StreamableFileContent result = new(fileName, info.Length, fileType, info.LastWriteTimeUtc, AsyncStreamDelegate); - this._log.LogTrace("File {0} size: {1} bytes", path, info.Length); + this._log.LogTrace("File {Path} size: {Bytes} bytes", path?.Replace("\r", string.Empty).Replace("\n", string.Empty), info.Length); return Task.FromResult(result); } @@ -265,7 +265,7 @@ public Task DeleteFileAsync(string volume, string relPath, string fileName, Canc volume = ValidateVolumeName(volume); relPath = ValidatePath(relPath); var path = Path.Join(this._dataPath, volume, relPath, fileName); - this._log.LogDebug("Deleting {0}", path); + this._log.LogDebug("Deleting {Path}", path?.Replace("\r", string.Empty).Replace("\n", string.Empty)); if (File.Exists(path)) { File.Delete(path); } return Task.CompletedTask; @@ -355,7 +355,7 @@ private void CreateDirectory(string path) return; } - this._log.LogDebug("Creating directory {0}", path); + this._log.LogDebug("Creating directory {Path}", path.Replace("\r", string.Empty).Replace("\n", string.Empty)); Directory.CreateDirectory(path); } diff --git a/App/kernel-memory/service/Core/FileSystem/DevTools/VolatileFileSystem.cs b/App/kernel-memory/service/Core/FileSystem/DevTools/VolatileFileSystem.cs index 8a9ba3cf..839db5c9 100644 --- a/App/kernel-memory/service/Core/FileSystem/DevTools/VolatileFileSystem.cs +++ b/App/kernel-memory/service/Core/FileSystem/DevTools/VolatileFileSystem.cs @@ -235,7 +235,7 @@ public Task ReadFileInfoAsync(string volume, string relPa BinaryData file = new(string.Empty); if (!volumeData.TryGetValue(filePath, out file!)) { - this._log.LogError("File not found: {0}", filePath); + this._log.LogError("File not found: {FilePath}", filePath?.Replace("\r", string.Empty).Replace("\n", string.Empty)); throw new FileNotFoundException($"File not found: {filePath}"); } diff --git a/App/kernel-memory/service/Core/Handlers/DeleteDocumentHandler.cs b/App/kernel-memory/service/Core/Handlers/DeleteDocumentHandler.cs index 6324ed44..e81dfa67 100644 --- a/App/kernel-memory/service/Core/Handlers/DeleteDocumentHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/DeleteDocumentHandler.cs @@ -37,7 +37,7 @@ public DeleteDocumentHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Deleting document, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug("Deleting document, pipeline {Index}/{DocumentId}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); // Delete embeddings foreach (IMemoryDb db in this._memoryDbs) diff --git a/App/kernel-memory/service/Core/Handlers/DeleteGeneratedFilesHandler.cs b/App/kernel-memory/service/Core/Handlers/DeleteGeneratedFilesHandler.cs index 006eb58d..38ad1e31 100644 --- a/App/kernel-memory/service/Core/Handlers/DeleteGeneratedFilesHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/DeleteGeneratedFilesHandler.cs @@ -32,7 +32,7 @@ public DeleteGeneratedFilesHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Deleting generated files, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug("Deleting generated files, pipeline {Index}/{DocumentId}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); // Delete files, leaving the status file await this._documentStorage.EmptyDocumentDirectoryAsync( diff --git a/App/kernel-memory/service/Core/Handlers/DeleteIndexHandler.cs b/App/kernel-memory/service/Core/Handlers/DeleteIndexHandler.cs index a2897d48..252b852a 100644 --- a/App/kernel-memory/service/Core/Handlers/DeleteIndexHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/DeleteIndexHandler.cs @@ -37,7 +37,7 @@ public DeleteIndexHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Deleting index, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug("Deleting index, pipeline {Index}/{DocumentId}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); // Delete index from vector storage foreach (IMemoryDb db in this._memoryDbs) diff --git a/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandler.cs b/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandler.cs index 30f725b5..46982f81 100644 --- a/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandler.cs @@ -63,7 +63,7 @@ public GenerateEmbeddingsHandler( { if (!this._embeddingGenerationEnabled) { - this._log.LogTrace("Embedding generation is disabled, skipping - pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogTrace("Embedding generation is disabled, skipping - pipeline {Index}/{DocumentId}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); return (true, pipeline); } @@ -98,8 +98,12 @@ private async Task GenerateEmbeddingsWithBatchingAsync( { PartitionInfo[][] batches = partitions.Chunk(batchSize).ToArray(); - this._log.LogTrace("Generating embeddings, pipeline '{0}/{1}', batch generator '{2}', batch size {3}, batch count {4}", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, generator.MaxBatchSize, batches.Length); + this._log.LogTrace("Generating embeddings, pipeline {Index}/{DocumentId}, batch generator {GeneratorType}, batch size {BatchSize}, batch count {BatchCount}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.MaxBatchSize, + batches.Length); // One batch at a time foreach (PartitionInfo[] partitionsInfo in batches) @@ -107,8 +111,12 @@ private async Task GenerateEmbeddingsWithBatchingAsync( string[] strings = partitionsInfo.Select(x => x.PartitionContent).ToArray(); int totalTokens = strings.Sum(s => ((ITextEmbeddingGenerator)generator).CountTokens(s)); - this._log.LogTrace("Generating embeddings, pipeline '{0}/{1}', generator '{2}', batch size {3}, total {4} tokens", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, strings.Length, totalTokens); + this._log.LogTrace("Generating embeddings, pipeline {Index}/{DocumentId}, generator {GeneratorType}, batch size {BatchSize}, total {TotalTokens} tokens", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + strings.Length, + totalTokens); Embedding[] embeddings = await generator.GenerateEmbeddingBatchAsync(strings, cancellationToken).ConfigureAwait(false); await this.SaveEmbeddingsToDocumentStorageAsync( @@ -124,14 +132,20 @@ private async Task GenerateEmbeddingsOneAtATimeAsync( List partitions, CancellationToken cancellationToken) { - this._log.LogTrace("Generating embeddings, pipeline '{0}/{1}', generator '{2}', partition count {3}", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, partitions.Count); + this._log.LogTrace("Generating embeddings, pipeline {Index}/{DocumentId}, generator {GeneratorType}, partition count {PartitionCount}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + partitions.Count); // One partition at a time foreach (PartitionInfo partitionInfo in partitions) { - this._log.LogTrace("Generating embedding, pipeline '{0}/{1}', generator '{2}', content size {3} tokens", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, generator.CountTokens(partitionInfo.PartitionContent)); + this._log.LogTrace("Generating embedding, pipeline {Index}/{DocumentId}, generator {GeneratorType}, content size {TokenCount} tokens", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.CountTokens(partitionInfo.PartitionContent)); var embedding = await generator.GenerateEmbeddingAsync(partitionInfo.PartitionContent, cancellationToken).ConfigureAwait(false); await this.SaveEmbeddingToDocumentStorageAsync( pipeline, partitionInfo, embedding, GetEmbeddingProviderName(generator), GetEmbeddingGeneratorName(generator), cancellationToken) diff --git a/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandlerBase.cs b/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandlerBase.cs index d4adc2d8..c987d7cb 100644 --- a/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandlerBase.cs +++ b/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsHandlerBase.cs @@ -32,8 +32,10 @@ protected async Task> GetListOfPartitionsToProcessAsync( { var partitionsToProcess = new List(); - this._log.LogTrace("Generating list of files to process, pipeline '{0}/{1}', sub-step '{2}'", - pipeline.Index, pipeline.DocumentId, subStepName); + this._log.LogTrace("Generating list of files to process, pipeline '{Index}/{DocumentId}', sub-step '{SubStep}'", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + subStepName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); foreach (DataPipeline.FileDetails uploadedFile in pipeline.Files) { foreach (KeyValuePair generatedFile in uploadedFile.GeneratedFiles) diff --git a/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs b/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs index 360c8287..b4b5288a 100644 --- a/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/GenerateEmbeddingsParallelHandler.cs @@ -63,7 +63,7 @@ public GenerateEmbeddingsParallelHandler( { if (!this._embeddingGenerationEnabled) { - this._log.LogTrace("Embedding generation is disabled, skipping - pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogTrace("Embedding generation is disabled, skipping - pipeline '{Index}/{DocumentId}'", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); return (true, pipeline); } @@ -98,8 +98,12 @@ private async Task GenerateEmbeddingsWithBatchingAsync( { PartitionInfo[][] batches = partitions.Chunk(batchSize).ToArray(); - this._log.LogTrace("Generating embeddings, pipeline '{0}/{1}', batch generator '{2}', batch size {3}, batch count {4}", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, generator.MaxBatchSize, batches.Length); + this._log.LogTrace("Generating embeddings, pipeline '{Index}/{DocumentId}', batch generator '{GeneratorType}', batch size {BatchSize}, batch count {BatchCount}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.MaxBatchSize, + batches.Length); // Multiple batches in parallel await Parallel.ForEachAsync(batches, cancellationToken, async (partitionsInfo, ct) => @@ -107,8 +111,13 @@ await Parallel.ForEachAsync(batches, cancellationToken, async (partitionsInfo, c string[] strings = partitionsInfo.Select(x => x.PartitionContent).ToArray(); int totalTokens = strings.Sum(s => ((ITextEmbeddingGenerator)generator).CountTokens(s)); - this._log.LogTrace("Generating embeddings, pipeline '{0}/{1}', generator '{2}', batch size {3}, total {4} tokens", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, strings.Length, totalTokens); + this._log.LogTrace( + "Generating embeddings, pipeline {Index}/{DocumentId}, generator {GeneratorType}, batch size {BatchSize}, total {TotalTokens} tokens", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + strings.Length, + totalTokens); Embedding[] embeddings = await generator.GenerateEmbeddingBatchAsync(strings, cancellationToken).ConfigureAwait(false); await this.SaveEmbeddingsToDocumentStorageAsync( @@ -124,14 +133,21 @@ private async Task GenerateEmbeddingsOneAtATimeAsync( List partitions, CancellationToken cancellationToken) { - this._log.LogTrace("Generating embeddings, pipeline '{0}/{1}', generator '{2}', partition count {3}", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, partitions.Count); + this._log.LogTrace("Generating embeddings, pipeline '{Index}/{DocumentId}', generator '{GeneratorType}', partition count {PartitionCount}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + partitions.Count); // Multiple partitions in parallel await Parallel.ForEachAsync(partitions, cancellationToken, async (partitionInfo, ct) => { - this._log.LogTrace("Generating embedding, pipeline '{0}/{1}', generator '{2}', content size {3} tokens", - pipeline.Index, pipeline.DocumentId, generator.GetType().FullName, generator.CountTokens(partitionInfo.PartitionContent)); + this._log.LogTrace( + "Generating embedding, pipeline {Index}/{DocumentId}, generator {GeneratorType}, content size {TokenCount} tokens", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.GetType().FullName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + generator.CountTokens(partitionInfo.PartitionContent)); var embedding = await generator.GenerateEmbeddingAsync(partitionInfo.PartitionContent, ct).ConfigureAwait(false); await this.SaveEmbeddingToDocumentStorageAsync( pipeline, partitionInfo, embedding, GetEmbeddingProviderName(generator), GetEmbeddingGeneratorName(generator), ct) diff --git a/App/kernel-memory/service/Core/Handlers/KeywordExtractingHandler.cs b/App/kernel-memory/service/Core/Handlers/KeywordExtractingHandler.cs index 28001501..7e33673e 100644 --- a/App/kernel-memory/service/Core/Handlers/KeywordExtractingHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/KeywordExtractingHandler.cs @@ -45,7 +45,10 @@ public KeywordExtractingHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync(DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Extracting Keywords from the content", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug( + "Extracting Keywords from the content for pipeline {Index}/{DocumentId}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); foreach (FileDetails uploadedFile in pipeline.Files) { diff --git a/App/kernel-memory/service/Core/Handlers/SaveRecordsHandler.cs b/App/kernel-memory/service/Core/Handlers/SaveRecordsHandler.cs index 50baf535..472f080b 100644 --- a/App/kernel-memory/service/Core/Handlers/SaveRecordsHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/SaveRecordsHandler.cs @@ -106,7 +106,10 @@ public SaveRecordsHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Saving memory records, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug( + "Saving memory records, pipeline {Index}/{DocumentId}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await this.DeletePreviousRecordsAsync(pipeline, cancellationToken).ConfigureAwait(false); pipeline.PreviousExecutionsToPurge = new List(); @@ -238,7 +241,11 @@ record = PrepareRecord( if (!recordsFound) { - this._log.LogWarning("Pipeline '{0}/{1}': step {2}: no records found, cannot save, moving to next pipeline step.", pipeline.Index, pipeline.DocumentId, this.StepName); + this._log.LogWarning( + "Pipeline {Index}/{DocumentId}: step {StepName}: no records found, cannot save, moving to next pipeline step.", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + this.StepName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } return (true, pipeline); @@ -262,15 +269,21 @@ private async Task SaveRecordAsync(DataPipeline pipeline, IMemoryDb db, MemoryRe { try { - this._log.LogTrace("Saving record {0} in index '{1}'", record.Id, pipeline.Index); + this._log.LogTrace("Saving record {RecordId} in index {Index}", record.Id?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await db.UpsertAsync(pipeline.Index, record, cancellationToken).ConfigureAwait(false); } catch (IndexNotFoundException e) { - this._log.LogWarning(e, "Index {0} not found, attempting to create it", pipeline.Index); + this._log.LogWarning( + e, + "Index {Index} not found, attempting to create it", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await this.CreateIndexOnceAsync(db, createdIndexes, pipeline.Index, record.Vector.Length, cancellationToken, true).ConfigureAwait(false); - this._log.LogTrace("Retry: saving record {0} in index '{1}'", record.Id, pipeline.Index); + this._log.LogTrace( + "Retry: saving record {RecordId} in index {Index}", + record.Id?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await db.UpsertAsync(pipeline.Index, record, cancellationToken).ConfigureAwait(false); } } @@ -281,15 +294,18 @@ private async Task SaveRecordsBatchAsync(DataPipeline pipeline, IMemoryDb db, Li ArgumentNullExceptionEx.ThrowIfNull(dbBatch, nameof(dbBatch), $"{db.GetType().FullName} doesn't implement {nameof(IMemoryDbUpsertBatch)}"); try { - this._log.LogTrace("Saving batch of {0} records in index '{1}'", records.Count, pipeline.Index); + this._log.LogTrace("Saving batch of {RecordCount} records in index {Index}", records.Count, pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await dbBatch.UpsertBatchAsync(pipeline.Index, records, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); } catch (IndexNotFoundException e) { - this._log.LogWarning(e, "Index {0} not found, attempting to create it", pipeline.Index); + this._log.LogWarning(e, "Index {Index} not found, attempting to create it", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await this.CreateIndexOnceAsync(db, createdIndexes, pipeline.Index, records[0].Vector.Length, cancellationToken, true).ConfigureAwait(false); - this._log.LogTrace("Retry: Saving batch of {0} records in index '{1}'", records.Count, pipeline.Index); + this._log.LogTrace( + "Retry: Saving batch of {RecordCount} records in index {Index}", + records.Count, + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await dbBatch.UpsertBatchAsync(pipeline.Index, records, cancellationToken).ToListAsync(cancellationToken).ConfigureAwait(false); } } @@ -335,7 +351,9 @@ private async Task CreateIndexOnceAsync( if (!force && createdIndexes.Contains(key)) { return; } - this._log.LogTrace("Creating index '{0}'", indexName); + this._log.LogTrace( + "Creating index {Index}", + indexName?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await client.CreateIndexAsync(indexName, vectorLength, cancellationToken).ConfigureAwait(false); createdIndexes.Add(key); } diff --git a/App/kernel-memory/service/Core/Handlers/SummarizationHandler.cs b/App/kernel-memory/service/Core/Handlers/SummarizationHandler.cs index 81a08c65..b87b32b0 100644 --- a/App/kernel-memory/service/Core/Handlers/SummarizationHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/SummarizationHandler.cs @@ -57,7 +57,7 @@ public SummarizationHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Generating summary, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug("Generating summary, pipeline {Index}/{DocumentId}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); foreach (DataPipeline.FileDetails uploadedFile in pipeline.Files) { diff --git a/App/kernel-memory/service/Core/Handlers/SummarizationParallelHandler.cs b/App/kernel-memory/service/Core/Handlers/SummarizationParallelHandler.cs index 19a685e3..ab0f69c2 100644 --- a/App/kernel-memory/service/Core/Handlers/SummarizationParallelHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/SummarizationParallelHandler.cs @@ -56,7 +56,10 @@ public SummarizationParallelHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Generating summary, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug( + "Generating summary, pipeline {Index}/{DocumentId}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); foreach (DataPipeline.FileDetails uploadedFile in pipeline.Files) { diff --git a/App/kernel-memory/service/Core/Handlers/TextExtractionHandler.cs b/App/kernel-memory/service/Core/Handlers/TextExtractionHandler.cs index c68d4f32..389d3f07 100644 --- a/App/kernel-memory/service/Core/Handlers/TextExtractionHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/TextExtractionHandler.cs @@ -57,7 +57,7 @@ public TextExtractionHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Extracting text, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug("Extracting text, pipeline {Index}/{DocumentId}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); foreach (DataPipeline.FileDetails uploadedFile in pipeline.Files) { diff --git a/App/kernel-memory/service/Core/Handlers/TextPartitioningHandler.cs b/App/kernel-memory/service/Core/Handlers/TextPartitioningHandler.cs index d6c3f9e2..25571bf7 100644 --- a/App/kernel-memory/service/Core/Handlers/TextPartitioningHandler.cs +++ b/App/kernel-memory/service/Core/Handlers/TextPartitioningHandler.cs @@ -70,11 +70,17 @@ public TextPartitioningHandler( public async Task<(bool success, DataPipeline updatedPipeline)> InvokeAsync( DataPipeline pipeline, CancellationToken cancellationToken = default) { - this._log.LogDebug("Partitioning text, pipeline '{0}/{1}'", pipeline.Index, pipeline.DocumentId); + this._log.LogDebug( + "Partitioning text, pipeline {Index}/{DocumentId}", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); if (pipeline.Files.Count == 0) { - this._log.LogWarning("Pipeline '{0}/{1}': there are no files to process, moving to next pipeline step.", pipeline.Index, pipeline.DocumentId); + this._log.LogWarning( + "Pipeline {Index}/{DocumentId}: there are no files to process, moving to next pipeline step.", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); return (true, pipeline); } diff --git a/App/kernel-memory/service/Core/Pipeline/BaseOrchestrator.cs b/App/kernel-memory/service/Core/Pipeline/BaseOrchestrator.cs index 84df7ec3..bfc1a155 100644 --- a/App/kernel-memory/service/Core/Pipeline/BaseOrchestrator.cs +++ b/App/kernel-memory/service/Core/Pipeline/BaseOrchestrator.cs @@ -428,7 +428,7 @@ protected async Task UploadFilesAsync(DataPipeline currentPipeline, Cancellation /// Task cancellation token protected async Task UpdatePipelineStatusAsync(DataPipeline pipeline, CancellationToken cancellationToken) { - this.Log.LogDebug("Saving pipeline status to '{0}/{1}/{2}'", pipeline.Index, pipeline.DocumentId, Constants.PipelineStatusFilename); + this.Log.LogDebug("Saving pipeline status to {Index}/{DocumentId}/{FileName}", pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), Constants.PipelineStatusFilename); try { await this._documentStorage.WriteFileAsync( @@ -453,7 +453,7 @@ protected static string ToJson(object data, bool indented = false) private async Task UploadFormFilesAsync(DataPipeline pipeline, CancellationToken cancellationToken) { - this.Log.LogDebug("Uploading {0} files, pipeline '{1}/{2}'", pipeline.FilesToUpload.Count, pipeline.Index, pipeline.DocumentId); + this.Log.LogDebug("Uploading {FileCount} files, pipeline '{Index}/{DocumentId}'", pipeline.FilesToUpload.Count, pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await this._documentStorage.CreateIndexDirectoryAsync(pipeline.Index, cancellationToken).ConfigureAwait(false); await this._documentStorage.CreateDocumentDirectoryAsync(pipeline.Index, pipeline.DocumentId, cancellationToken).ConfigureAwait(false); diff --git a/App/kernel-memory/service/Core/Pipeline/InProcessPipelineOrchestrator.cs b/App/kernel-memory/service/Core/Pipeline/InProcessPipelineOrchestrator.cs index 66ba8c47..f7ebf5ef 100644 --- a/App/kernel-memory/service/Core/Pipeline/InProcessPipelineOrchestrator.cs +++ b/App/kernel-memory/service/Core/Pipeline/InProcessPipelineOrchestrator.cs @@ -184,13 +184,20 @@ public override async Task RunPipelineAsync(DataPipeline pipeline, CancellationT } else { - this.Log.LogError("Handler '{0}' failed to process pipeline '{1}/{2}'", currentStepName, pipeline.Index, pipeline.DocumentId); + this.Log.LogError( + "Handler {StepName} failed to process pipeline {Index}/{DocumentId}", + currentStepName?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); throw new OrchestrationException($"Pipeline error, step {currentStepName} failed"); } } await this.CleanUpAfterCompletionAsync(pipeline, cancellationToken).ConfigureAwait(false); - this.Log.LogInformation("Pipeline '{0}/{1}' complete", pipeline.Index, pipeline.DocumentId); + this.Log.LogInformation( + "Pipeline {Index}/{DocumentId} complete", + pipeline.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + pipeline.DocumentId?.Replace("\r", string.Empty).Replace("\n", string.Empty)); } } diff --git a/App/kernel-memory/service/Service.AspNetCore/WebAPIEndpoints.cs b/App/kernel-memory/service/Service.AspNetCore/WebAPIEndpoints.cs index eeb431c3..7fa668f6 100644 --- a/App/kernel-memory/service/Service.AspNetCore/WebAPIEndpoints.cs +++ b/App/kernel-memory/service/Service.AspNetCore/WebAPIEndpoints.cs @@ -146,7 +146,7 @@ async Task ( ILogger log, CancellationToken cancellationToken) => { - log.LogTrace("New delete document HTTP request, index '{0}'", index); + log.LogTrace("New delete document HTTP request, index {Index}", index?.Replace("\r", string.Empty)); await service.DeleteIndexAsync(index: index, cancellationToken) .ConfigureAwait(false); // There's no API to check the index deletion progress, so the URL is empty @@ -180,7 +180,7 @@ async Task ( ILogger log, CancellationToken cancellationToken) => { - log.LogTrace("New delete document HTTP request, index '{0}'", index); + log.LogTrace("New delete document HTTP request, index {Index}", index?.Replace("\r", string.Empty).Replace("\n", string.Empty)); await service.DeleteDocumentAsync(documentId: documentId, index: index, cancellationToken) .ConfigureAwait(false); var url = Constants.HttpUploadStatusEndpointWithParams @@ -217,7 +217,7 @@ async Task ( // Allow internal classes to access custom arguments via IContextProvider contextProvider.InitContextArgs(query.ContextArguments); - log.LogTrace("New search request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); + log.LogTrace("New search request, index {Index}, minRelevance {MinRelevance}", query.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), query.MinRelevance); MemoryAnswer answer = await service.AskAsync( question: query.Question, index: query.Index, @@ -252,7 +252,7 @@ async Task ( // Allow internal classes to access custom arguments via IContextProvider contextProvider.InitContextArgs(query.ContextArguments); - log.LogTrace("New search HTTP request, index '{0}', minRelevance {1}", query.Index, query.MinRelevance); + log.LogTrace("New search HTTP request, index {Index}, minRelevance {MinRelevance}", query.Index?.Replace("\r", string.Empty).Replace("\n", string.Empty), query.MinRelevance); SearchResult answer = await service.SearchAsync( query: query.Query, index: query.Index, @@ -338,7 +338,10 @@ public static void AddGetDownloadEndpoint(this IEndpointRouteBuilder builder, st string.IsNullOrWhiteSpace(filename)); var errMsg = "Missing required parameter"; - log.LogTrace("New download file HTTP request, index {0}, documentId {1}, fileName {3}", index, documentId, filename); + log.LogTrace("New download file HTTP request, index {Index}, documentId {DocumentId}, fileName {FileName}", + index?.Replace("\r", string.Empty).Replace("\n", string.Empty), + documentId?.Replace("\r", string.Empty).Replace("\n", string.Empty), + filename?.Replace("\r", string.Empty).Replace("\n", string.Empty)); if (!isValid) { @@ -362,7 +365,10 @@ public static void AddGetDownloadEndpoint(this IEndpointRouteBuilder builder, st return Results.Problem(title: "File not found", statusCode: 404); } - log.LogTrace("Downloading file '{0}', size '{1}', type '{2}'", filename, file.FileSize, file.FileType); + log.LogTrace("Downloading file {FileName}, size {FileSize}, type {FileType}", + filename.Replace("\r", string.Empty).Replace("\n", string.Empty), + file.FileSize, + file.FileType.Replace("\r", string.Empty).Replace("\n", string.Empty)); Stream resultingFileStream = await file.GetStreamAsync().WaitAsync(cancellationToken).ConfigureAwait(false); var response = Results.Stream( resultingFileStream,