From e5c77da2273d63f8b3f006583e5c95dde9ec7b08 Mon Sep 17 00:00:00 2001 From: Drew Davis Date: Thu, 23 Oct 2025 11:15:45 +0200 Subject: [PATCH 1/4] perf: Improve getKeyValues query performance for JSON keys --- .changeset/sweet-vans-pump.md | 5 ++ .../src/__tests__/metadata.test.ts | 77 ++----------------- packages/common-utils/src/metadata.ts | 24 +++--- 3 files changed, 21 insertions(+), 85 deletions(-) create mode 100644 .changeset/sweet-vans-pump.md diff --git a/.changeset/sweet-vans-pump.md b/.changeset/sweet-vans-pump.md new file mode 100644 index 000000000..fbf2d4513 --- /dev/null +++ b/.changeset/sweet-vans-pump.md @@ -0,0 +1,5 @@ +--- +"@hyperdx/common-utils": patch +--- + +perf: Improve getKeyValues query performance for JSON keys diff --git a/packages/common-utils/src/__tests__/metadata.test.ts b/packages/common-utils/src/__tests__/metadata.test.ts index 7667391e6..f0aecdd22 100644 --- a/packages/common-utils/src/__tests__/metadata.test.ts +++ b/packages/common-utils/src/__tests__/metadata.test.ts @@ -251,22 +251,6 @@ describe('Metadata', () => { ], }), }); - - // Mock getColumns to return columns including materialized ones - mockCache.getOrFetch.mockImplementation((key, queryFn) => { - if (key.includes('.columns')) { - return Promise.resolve([ - { name: 'regular_column', type: 'String', default_type: '' }, - { - name: 'materialized_column', - type: 'String', - default_type: 'MATERIALIZED', - }, - { name: 'default_column', type: 'String', default_type: 'DEFAULT' }, - ]); - } - return queryFn(); - }); }); it('should apply row limit when disableRowLimit is false', async () => { @@ -355,71 +339,20 @@ describe('Metadata', () => { expect(result).toEqual([{ key: 'column1', value: ['value1', 'value2'] }]); }); - it('should include materialized fields when selecting all columns', async () => { - const renderChartConfigSpy = jest.spyOn( - renderChartConfigModule, - 'renderChartConfig', - ); - - await metadata.getKeyValues({ - chartConfig: mockChartConfig, - keys: ['column1'], - limit: 10, - }); - - // Verify that renderChartConfig was called with the expanded select list - // that includes all columns by name (including materialized ones) - expect(renderChartConfigSpy).toHaveBeenCalledWith( - expect.objectContaining({ - with: [ - expect.objectContaining({ - name: 'sampledData', - chartConfig: expect.objectContaining({ - // Should expand to all column names instead of using '*' - select: - '`regular_column`, `materialized_column`, `default_column`', - }), - }), - ], - }), - metadata, - ); - }); - - it('should fallback to * when no columns are found', async () => { - // Mock getColumns to return empty array - mockCache.getOrFetch.mockImplementation((key, queryFn) => { - if (key.includes('.columns')) { - return Promise.resolve([]); - } - return queryFn(); - }); - + it('should return an empty list when no keys are provided', async () => { const renderChartConfigSpy = jest.spyOn( renderChartConfigModule, 'renderChartConfig', ); - await metadata.getKeyValues({ + const results = await metadata.getKeyValues({ chartConfig: mockChartConfig, - keys: ['column1'], + keys: [], limit: 10, }); - // Should fallback to '*' when no columns are found - expect(renderChartConfigSpy).toHaveBeenCalledWith( - expect.objectContaining({ - with: [ - expect.objectContaining({ - name: 'sampledData', - chartConfig: expect.objectContaining({ - select: '*', - }), - }), - ], - }), - metadata, - ); + expect(results).toEqual([]); + expect(renderChartConfigSpy).not.toHaveBeenCalled(); }); }); diff --git a/packages/common-utils/src/metadata.ts b/packages/common-utils/src/metadata.ts index 603853607..2494c6993 100644 --- a/packages/common-utils/src/metadata.ts +++ b/packages/common-utils/src/metadata.ts @@ -668,29 +668,22 @@ export class Metadata { return this.cache.getOrFetch( `${chartConfig.connection}.${chartConfig.from.databaseName}.${chartConfig.from.tableName}.${keys.join(',')}.${chartConfig.dateRange.toString()}.${disableRowLimit}.values`, async () => { - const selectClause = keys - .map((k, i) => `groupUniqArray(${limit})(${k}) AS param${i}`) - .join(', '); + if (keys.length === 0) return []; // When disableRowLimit is true, query directly without CTE // Otherwise, use CTE with row limits for sampling const sqlConfig = disableRowLimit ? { ...chartConfig, - select: selectClause, + select: keys + .map((k, i) => `groupUniqArray(${limit})(${k}) AS param${i}`) + .join(', '), } : await (async () => { - // Get all columns including materialized ones - const columns = await this.getColumns({ - databaseName: chartConfig.from.databaseName, - tableName: chartConfig.from.tableName, - connectionId: chartConfig.connection, - }); - // Build select expression that includes all columns by name // This ensures materialized columns are included const selectExpr = - columns.map(col => `\`${col.name}\``).join(', ') || '*'; + keys.map((k, i) => `${k} as param${i}`).join(', ') || '*'; return { with: [ @@ -710,7 +703,12 @@ export class Metadata { isSubquery: true, }, ], - select: selectClause, + select: keys + .map( + (_, i) => + `groupUniqArray(${limit})(param${i}) AS param${i}`, + ) + .join(', '), connection: chartConfig.connection, from: { databaseName: '', tableName: 'sampledData' }, where: '', From 23f37dbee10d1786e3c8047821285abfed0aa232 Mon Sep 17 00:00:00 2001 From: Drew Davis Date: Thu, 23 Oct 2025 11:15:54 +0200 Subject: [PATCH 2/4] test: Add integration tests for common-utils/metadata --- Makefile | 8 +- packages/common-utils/.env.test | 4 + packages/common-utils/jest.config.js | 1 + packages/common-utils/jest.int.config.js | 13 ++ packages/common-utils/package.json | 5 +- .../src/__tests__/metadata.int.test.ts | 191 ++++++++++++++++++ yarn.lock | 8 + 7 files changed, 228 insertions(+), 2 deletions(-) create mode 100644 packages/common-utils/.env.test create mode 100644 packages/common-utils/jest.int.config.js create mode 100644 packages/common-utils/src/__tests__/metadata.int.test.ts diff --git a/Makefile b/Makefile index 5aabc1b8c..a7667b297 100644 --- a/Makefile +++ b/Makefile @@ -45,10 +45,16 @@ dev-int: npx nx run @hyperdx/api:dev:int $(FILE) docker compose -p int -f ./docker-compose.ci.yml down +.PHONY: dev-int-common-utils +dev-int-common-utils: + docker compose -p int -f ./docker-compose.ci.yml up -d + npx nx run @hyperdx/common-utils:dev:int $(FILE) + docker compose -p int -f ./docker-compose.ci.yml down + .PHONY: ci-int ci-int: docker compose -p int -f ./docker-compose.ci.yml up -d - npx nx run @hyperdx/api:ci:int + npx nx run-many -t ci:int --parallel=false docker compose -p int -f ./docker-compose.ci.yml down .PHONY: dev-unit diff --git a/packages/common-utils/.env.test b/packages/common-utils/.env.test new file mode 100644 index 000000000..9d94fa697 --- /dev/null +++ b/packages/common-utils/.env.test @@ -0,0 +1,4 @@ +CLICKHOUSE_HOST=http://localhost:8123 +CLICKHOUSE_PASSWORD= +CLICKHOUSE_USER=default +NODE_ENV=test diff --git a/packages/common-utils/jest.config.js b/packages/common-utils/jest.config.js index 1b04ebf14..67fc4dcfe 100644 --- a/packages/common-utils/jest.config.js +++ b/packages/common-utils/jest.config.js @@ -5,6 +5,7 @@ module.exports = { verbose: true, rootDir: './src', testMatch: ['**/__tests__/*.test.ts?(x)'], + testPathIgnorePatterns: ['.*\\.int\\.test\\.ts$'], testTimeout: 30000, moduleNameMapper: { '@/(.*)$': '/$1', diff --git a/packages/common-utils/jest.int.config.js b/packages/common-utils/jest.int.config.js new file mode 100644 index 000000000..d7450412a --- /dev/null +++ b/packages/common-utils/jest.int.config.js @@ -0,0 +1,13 @@ +/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */ +module.exports = { + setupFiles: ['dotenv/config'], + preset: 'ts-jest', + testEnvironment: 'node', + verbose: true, + rootDir: './src', + testMatch: ['**/__tests__/*.int.test.ts?(x)'], + testTimeout: 30000, + moduleNameMapper: { + '@/(.*)$': '/$1', + }, +}; diff --git a/packages/common-utils/package.json b/packages/common-utils/package.json index 58fec5782..b9d1367e7 100644 --- a/packages/common-utils/package.json +++ b/packages/common-utils/package.json @@ -36,6 +36,7 @@ "@types/sqlstring": "^2.3.0", "@types/supertest": "^2.0.12", "@types/uuid": "^8.3.4", + "dotenv": "^17.2.3", "jest": "^28.1.1", "nodemon": "^2.0.20", "rimraf": "^4.4.1", @@ -56,6 +57,8 @@ "lint:fix": "npx eslint . --ext .ts --fix", "ci:lint": "yarn lint && yarn tsc --noEmit", "ci:unit": "jest --runInBand --ci --forceExit --coverage", - "dev:unit": "jest --watchAll --runInBand --detectOpenHandles" + "dev:unit": "jest --watchAll --runInBand --detectOpenHandles", + "ci:int": "DOTENV_CONFIG_PATH=.env.test jest --config jest.int.config.js --runInBand --ci --forceExit --coverage", + "dev:int": "DOTENV_CONFIG_PATH=.env.test jest --config jest.int.config.js --watchAll --runInBand --detectOpenHandles" } } diff --git a/packages/common-utils/src/__tests__/metadata.int.test.ts b/packages/common-utils/src/__tests__/metadata.int.test.ts new file mode 100644 index 000000000..633433888 --- /dev/null +++ b/packages/common-utils/src/__tests__/metadata.int.test.ts @@ -0,0 +1,191 @@ +import { createClient } from '@clickhouse/client'; +import { ClickHouseClient } from '@clickhouse/client-common'; + +import { ClickhouseClient as HdxClickhouseClient } from '@/clickhouse/node'; +import { Metadata, MetadataCache } from '@/metadata'; +import { ChartConfigWithDateRange } from '@/types'; + +describe('Metadata Integration Tests', () => { + let client: ClickHouseClient; + let hdxClient: HdxClickhouseClient; + + beforeAll(() => { + const host = process.env.CLICKHOUSE_HOST || 'http://localhost:8123'; + const username = process.env.CLICKHOUSE_USER || 'default'; + const password = process.env.CLICKHOUSE_PASSWORD || ''; + + client = createClient({ + url: host, + username, + password, + }); + + hdxClient = new HdxClickhouseClient({ + host, + username, + password, + }); + }); + + describe('getKeyValues', () => { + let metadata: Metadata; + const chartConfig: ChartConfigWithDateRange = { + connection: 'test_connection', + from: { + databaseName: 'default', + tableName: 'test_table', + }, + dateRange: [new Date('2023-01-01'), new Date('2025-01-01')], + select: 'col1, col2', + timestampValueExpression: 'Timestamp', + where: '', + }; + + beforeAll(async () => { + await client.command({ + query: `CREATE OR REPLACE TABLE default.test_table ( + Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)), + SeverityText LowCardinality(String) CODEC(ZSTD(1)), + TraceId String, + LogAttributes JSON CODEC(ZSTD(1)), + ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)), + \`__hdx_materialized_k8s.pod.name\` String MATERIALIZED ResourceAttributes['k8s.pod.name'] CODEC(ZSTD(1)), + ) + ENGINE = MergeTree() + ORDER BY (Timestamp) + `, + }); + + await client.command({ + query: `INSERT INTO default.test_table (Timestamp, SeverityText, TraceId, ResourceAttributes, LogAttributes) VALUES + ('2023-06-01 12:00:00', 'info', '1o2udn120d8n', { 'k8s.pod.name': 'pod1', 'env': 'prod' },'{"action":"ping"}'), + ('2024-06-01 12:00:00', 'error', '67890-09098', { 'k8s.pod.name': 'pod2', 'env': 'prod' },'{}'), + ('2024-06-01 12:00:00', 'info', '11h9238re1h92', { 'env': 'staging' },'{"user":"john"}'), + ('2024-06-01 12:00:00', 'warning', '1o2udn120d8n', { 'k8s.pod.name': 'pod1', 'env': 'prod' }, '{"user":"jack","action":"login"}'), + ('2024-06-01 12:00:00', '', '1o2udn120d8n', { 'env': 'prod' }, '{"user":"jane","action":"login"}') + `, + }); + }); + + beforeEach(async () => { + metadata = new Metadata(hdxClient, new MetadataCache()); + }); + + afterAll(async () => { + await client.command({ + query: 'DROP TABLE IF EXISTS default.test_table', + }); + }); + + describe.each([true, false])('with disableRowLimit=%s', disableRowLimit => { + it('should return key-value pairs for a given metadata key', async () => { + const resultSeverityText = await metadata.getKeyValues({ + chartConfig, + keys: ['SeverityText'], + disableRowLimit, + }); + + expect(resultSeverityText).toHaveLength(1); + expect(resultSeverityText[0].key).toBe('SeverityText'); + expect(resultSeverityText[0].value).toHaveLength(3); + expect(resultSeverityText[0].value).toEqual( + expect.arrayContaining(['info', 'error', 'warning']), + ); + + const resultTraceId = await metadata.getKeyValues({ + chartConfig, + keys: ['TraceId'], + disableRowLimit, + }); + + expect(resultTraceId).toHaveLength(1); + expect(resultTraceId[0].key).toBe('TraceId'); + expect(resultTraceId[0].value).toHaveLength(3); + expect(resultTraceId[0].value).toEqual( + expect.arrayContaining([ + '1o2udn120d8n', + '67890-09098', + '11h9238re1h92', + ]), + ); + + const resultBoth = await metadata.getKeyValues({ + chartConfig, + keys: ['TraceId', 'SeverityText'], + disableRowLimit, + }); + + expect(resultBoth).toEqual([ + { + key: 'TraceId', + value: expect.arrayContaining([ + '1o2udn120d8n', + '67890-09098', + '11h9238re1h92', + ]), + }, + { + key: 'SeverityText', + value: expect.arrayContaining(['info', 'error', 'warning']), + }, + ]); + }); + + it('should handle materialized columns correctly', async () => { + const resultPodName = await metadata.getKeyValues({ + chartConfig, + keys: ['__hdx_materialized_k8s.pod.name'], + disableRowLimit, + }); + + expect(resultPodName).toHaveLength(1); + expect(resultPodName[0].key).toBe('__hdx_materialized_k8s.pod.name'); + expect(resultPodName[0].value).toHaveLength(2); + expect(resultPodName[0].value).toEqual( + expect.arrayContaining(['pod1', 'pod2']), + ); + }); + + it('should handle JSON columns correctly', async () => { + const resultLogAttributes = await metadata.getKeyValues({ + chartConfig, + keys: ['LogAttributes.user'], + disableRowLimit, + }); + + expect(resultLogAttributes).toHaveLength(1); + expect(resultLogAttributes[0].key).toBe('LogAttributes.user'); + expect(resultLogAttributes[0].value).toHaveLength(3); + expect(resultLogAttributes[0].value).toEqual( + expect.arrayContaining(['john', 'jack', 'jane']), + ); + }); + + it('should return an empty list when no keys are provided', async () => { + const resultEmpty = await metadata.getKeyValues({ + chartConfig, + keys: [], + }); + + expect(resultEmpty).toEqual([]); + }); + + it('should correctly limit the number of returned values', async () => { + const resultLimited = await metadata.getKeyValues({ + chartConfig, + keys: ['SeverityText'], + limit: 2, + }); + + expect(resultLimited).toHaveLength(1); + expect(resultLimited[0].key).toBe('SeverityText'); + expect(resultLimited[0].value).toHaveLength(2); + expect( + resultLimited[0].value.every(v => + ['info', 'error', 'warning'].includes(v), + ), + ).toBeTruthy(); + }); + }); + }); +}); diff --git a/yarn.lock b/yarn.lock index 25eb2878e..325f35eb5 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4536,6 +4536,7 @@ __metadata: "@types/uuid": "npm:^8.3.4" date-fns: "npm:^2.28.0" date-fns-tz: "npm:^2.0.0" + dotenv: "npm:^17.2.3" jest: "npm:^28.1.1" lodash: "npm:^4.17.21" node-sql-parser: "npm:^5.3.5" @@ -14691,6 +14692,13 @@ __metadata: languageName: node linkType: hard +"dotenv@npm:^17.2.3": + version: 17.2.3 + resolution: "dotenv@npm:17.2.3" + checksum: 10c0/c884403209f713214a1b64d4d1defa4934c2aa5b0002f5a670ae298a51e3c3ad3ba79dfee2f8df49f01ae74290fcd9acdb1ab1d09c7bfb42b539036108bb2ba0 + languageName: node + linkType: hard + "dunder-proto@npm:^1.0.1": version: 1.0.1 resolution: "dunder-proto@npm:1.0.1" From d4171595940fc0af94f1bc9107516785322b4eb2 Mon Sep 17 00:00:00 2001 From: Drew Davis Date: Sat, 25 Oct 2025 12:17:25 +0200 Subject: [PATCH 3/4] fix: Close client in metadata integration test --- packages/common-utils/src/__tests__/metadata.int.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/common-utils/src/__tests__/metadata.int.test.ts b/packages/common-utils/src/__tests__/metadata.int.test.ts index 633433888..793411a0e 100644 --- a/packages/common-utils/src/__tests__/metadata.int.test.ts +++ b/packages/common-utils/src/__tests__/metadata.int.test.ts @@ -75,6 +75,8 @@ describe('Metadata Integration Tests', () => { await client.command({ query: 'DROP TABLE IF EXISTS default.test_table', }); + + await client.close(); }); describe.each([true, false])('with disableRowLimit=%s', disableRowLimit => { From 2dc77ff90110b37d702964b6ff3b4ff2f614ada7 Mon Sep 17 00:00:00 2001 From: Drew Davis Date: Mon, 27 Oct 2025 11:28:18 -0400 Subject: [PATCH 4/4] fix: Improve getKeyValues comments --- packages/common-utils/src/metadata.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/common-utils/src/metadata.ts b/packages/common-utils/src/metadata.ts index 2494c6993..47ebd43e1 100644 --- a/packages/common-utils/src/metadata.ts +++ b/packages/common-utils/src/metadata.ts @@ -680,8 +680,10 @@ export class Metadata { .join(', '), } : await (async () => { - // Build select expression that includes all columns by name - // This ensures materialized columns are included + // Build select expression that includes each of the given keys. + // This avoids selecting entire JSON columns, which is significantly slower + // than selecting just the JSON paths corresponding to the given keys. + // paramN aliases are used to avoid issues with special characters or complex expressions in keys. const selectExpr = keys.map((k, i) => `${k} as param${i}`).join(', ') || '*';