Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(decoder): Add support for Structured CLP IR streams but without log-level filtering. #85

Merged
merged 10 commits into from
Nov 15, 2024
9 changes: 4 additions & 5 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"@mui/icons-material": "^6.1.0",
"@mui/joy": "^5.0.0-beta.48",
"axios": "^1.7.2",
"clp-ffi-js": "^0.2.0",
"clp-ffi-js": "^0.3.0",
"dayjs": "^1.11.11",
"monaco-editor": "^0.50.0",
"react": "^18.3.1",
Expand Down
10 changes: 5 additions & 5 deletions src/services/LogFileManager/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* eslint max-lines: ["error", 400] */
import {
Decoder,
DecoderOptionsType,
DecoderOptions,
} from "../../typings/decoders";
import {MAX_V8_STRING_LENGTH} from "../../typings/js";
import {LogLevelFilter} from "../../typings/logs";
Expand Down Expand Up @@ -110,7 +110,7 @@ class LogFileManager {
static async create (
fileSrc: FileSrcType,
pageSize: number,
decoderOptions: DecoderOptionsType,
decoderOptions: DecoderOptions,
onQueryResults: (queryResults: QueryResults) => void,
): Promise<LogFileManager> {
const {fileName, fileData} = await loadFile(fileSrc);
Expand Down Expand Up @@ -138,13 +138,13 @@ class LogFileManager {
static async #initDecoder (
fileName: string,
fileData: Uint8Array,
decoderOptions: DecoderOptionsType
decoderOptions: DecoderOptions
): Promise<Decoder> {
let decoder: Decoder;
if (fileName.endsWith(".jsonl")) {
decoder = new JsonlDecoder(fileData, decoderOptions);
} else if (fileName.endsWith(".clp.zst")) {
decoder = await ClpIrDecoder.create(fileData);
decoder = await ClpIrDecoder.create(fileData, decoderOptions);
} else {
throw new Error(`No decoder supports ${fileName}`);
}
Expand All @@ -161,7 +161,7 @@ class LogFileManager {
/* Sets any formatter options that exist in the decoder's options.
* @param options
*/
setFormatterOptions (options: DecoderOptionsType) {
setFormatterOptions (options: DecoderOptions) {
this.#decoder.setFormatterOptions(options);
}

Expand Down
2 changes: 2 additions & 0 deletions src/services/MainWorker.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import dayjs from "dayjs";
import dayjsBigIntSupport from "dayjs/plugin/bigIntSupport";
import dayjsTimezone from "dayjs/plugin/timezone";
import dayjsUtc from "dayjs/plugin/utc";

Expand All @@ -17,6 +18,7 @@ import LogFileManager from "./LogFileManager";
/* eslint-disable import/no-named-as-default-member */
dayjs.extend(dayjsUtc);
dayjs.extend(dayjsTimezone);
dayjs.extend(dayjsBigIntSupport);
/* eslint-enable import/no-named-as-default-member */

/**
Expand Down
98 changes: 87 additions & 11 deletions src/services/decoders/ClpIrDecoder.ts
Original file line number Diff line number Diff line change
@@ -1,32 +1,68 @@
import clpFfiJsModuleInit, {ClpIrStreamReader} from "clp-ffi-js";
import clpFfiJsModuleInit, {ClpStreamReader} from "clp-ffi-js";
import {Dayjs} from "dayjs";

import {Nullable} from "../../typings/common";
import {
Decoder,
DecodeResultType,
DecodeResult,
DecoderOptions,
FilteredLogEventMap,
LogEventCount,
} from "../../typings/decoders";
import {Formatter} from "../../typings/formatters";
import {JsonObject} from "../../typings/js";
import {LogLevelFilter} from "../../typings/logs";
import LogbackFormatter from "../formatters/LogbackFormatter";
import {
convertToDayjsTimestamp,
isJsonObject,
} from "./JsonlDecoder/utils";


enum CLP_IR_STREAM_TYPE {
STRUCTURED = "structured",
UNSTRUCTURED = "unstructured",
}

class ClpIrDecoder implements Decoder {
#streamReader: ClpIrStreamReader;
#streamReader: ClpStreamReader;

readonly #streamType: CLP_IR_STREAM_TYPE;

constructor (streamReader: ClpIrStreamReader) {
#formatter: Nullable<Formatter>;

constructor (
streamType: CLP_IR_STREAM_TYPE,
streamReader: ClpStreamReader,
decoderOptions: DecoderOptions
) {
this.#streamType = streamType;
this.#streamReader = streamReader;
this.#formatter = (streamType === CLP_IR_STREAM_TYPE.STRUCTURED) ?
new LogbackFormatter({formatString: decoderOptions.formatString}) :
null;
}

/**
* Creates a new ClpIrDecoder instance.
* NOTE: `decoderOptions` only affects decode results if the stream type is
* {@link CLP_IR_STREAM_TYPE.STRUCTURED}.
*
* @param dataArray The input data array to be passed to the decoder.
* @param decoderOptions
* @return The created ClpIrDecoder instance.
*/
static async create (dataArray: Uint8Array): Promise<ClpIrDecoder> {
static async create (
dataArray: Uint8Array,
decoderOptions: DecoderOptions
): Promise<ClpIrDecoder> {
const module = await clpFfiJsModuleInit();
const streamReader = new module.ClpIrStreamReader(dataArray);
return new ClpIrDecoder(streamReader);
const streamReader = new module.ClpStreamReader(dataArray, decoderOptions);
const streamType = streamReader.getIrStreamType() === module.IrStreamType.STRUCTURED ?
CLP_IR_STREAM_TYPE.STRUCTURED :
CLP_IR_STREAM_TYPE.UNSTRUCTURED;

return new ClpIrDecoder(streamType, streamReader, decoderOptions);
}

getEstimatedNumEvents (): number {
Expand All @@ -50,18 +86,58 @@ class ClpIrDecoder implements Decoder {
};
}

// eslint-disable-next-line class-methods-use-this
setFormatterOptions (): boolean {
setFormatterOptions (options: DecoderOptions): boolean {
this.#formatter = new LogbackFormatter({formatString: options.formatString});

return true;
}

decodeRange (
beginIdx: number,
endIdx: number,
useFilter: boolean
): Nullable<DecodeResultType[]> {
return this.#streamReader.decodeRange(beginIdx, endIdx, useFilter);
): Nullable<DecodeResult[]> {
const results: DecodeResult[] =
this.#streamReader.decodeRange(beginIdx, endIdx, useFilter);

if (null === this.#formatter) {
if (this.#streamType === CLP_IR_STREAM_TYPE.STRUCTURED) {
// eslint-disable-next-line no-warning-comments
// TODO: Revisit when we allow displaying structured logs without a formatter.
console.error("Formatter is not set for structured logs.");
}

return results;
}

for (const r of results) {
const [
message,
timestamp,
level,
] = r;
const dayJsTimestamp: Dayjs = convertToDayjsTimestamp(timestamp);
let fields: JsonObject = {};

try {
fields = JSON.parse(message) as JsonObject;
if (false === isJsonObject(fields)) {
throw new Error("Unexpected non-object.");
}
} catch (e) {
console.error(e, message);
}

r[0] = this.#formatter.formatLogEvent({
fields: fields,
level: level,
timestamp: dayJsTimestamp,
});
}

return results;
}
}


export default ClpIrDecoder;
18 changes: 9 additions & 9 deletions src/services/decoders/JsonlDecoder/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import {Dayjs} from "dayjs";
import {Nullable} from "../../../typings/common";
import {
Decoder,
DecodeResultType,
DecodeResult,
DecoderOptions,
FilteredLogEventMap,
JsonlDecoderOptionsType,
LogEventCount,
} from "../../../typings/decoders";
import {Formatter} from "../../../typings/formatters";
Expand All @@ -25,7 +25,7 @@ import {


/**
* A decoder for JSONL (JSON lines) files that contain log events. See `JsonlDecoderOptionsType` for
* A decoder for JSONL (JSON lines) files that contain log events. See `DecoderOptions` for
* properties that are specific to log events (compared to generic JSON records).
*/
class JsonlDecoder implements Decoder {
Expand All @@ -49,7 +49,7 @@ class JsonlDecoder implements Decoder {
* @param dataArray
* @param decoderOptions
*/
constructor (dataArray: Uint8Array, decoderOptions: JsonlDecoderOptionsType) {
constructor (dataArray: Uint8Array, decoderOptions: DecoderOptions) {
this.#dataArray = dataArray;
this.#logLevelKey = decoderOptions.logLevelKey;
this.#timestampKey = decoderOptions.timestampKey;
Expand Down Expand Up @@ -81,7 +81,7 @@ class JsonlDecoder implements Decoder {
};
}

setFormatterOptions (options: JsonlDecoderOptionsType): boolean {
setFormatterOptions (options: DecoderOptions): boolean {
this.#formatter = new LogbackFormatter({formatString: options.formatString});

return true;
Expand All @@ -91,7 +91,7 @@ class JsonlDecoder implements Decoder {
beginIdx: number,
endIdx: number,
useFilter: boolean,
): Nullable<DecodeResultType[]> {
): Nullable<DecodeResult[]> {
if (useFilter && null === this.#filteredLogEventMap) {
return null;
}
Expand All @@ -104,7 +104,7 @@ class JsonlDecoder implements Decoder {
return null;
}

const results: DecodeResultType[] = [];
const results: DecodeResult[] = [];
for (let i = beginIdx; i < endIdx; i++) {
// Explicit cast since typescript thinks `#filteredLogEventMap[i]` can be undefined, but
// it shouldn't be since we performed a bounds check at the beginning of the method.
Expand Down Expand Up @@ -204,12 +204,12 @@ class JsonlDecoder implements Decoder {
}

/**
* Decodes a log event into a `DecodeResultType`.
* Decodes a log event into a `DecodeResult`.
*
* @param logEventIdx
* @return The decoded log event.
*/
#decodeLogEvent = (logEventIdx: number): DecodeResultType => {
#decodeLogEvent = (logEventIdx: number): DecodeResult => {
let timestamp: number;
let message: string;
let logLevel: LOG_LEVEL;
Expand Down
5 changes: 3 additions & 2 deletions src/services/decoders/JsonlDecoder/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,13 @@ const convertToLogLevelValue = (field: JsonValue | undefined): LOG_LEVEL => {
* - the timestamp's value is an unsupported type.
* - the timestamp's value is not a valid dayjs timestamp.
*/
const convertToDayjsTimestamp = (field: JsonValue | undefined): dayjs.Dayjs => {
const convertToDayjsTimestamp = (field: JsonValue | bigint | undefined): dayjs.Dayjs => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fyi i just noticed i dont think json.parse will work with big int? Like it will throw exception since it is not a json value? How do you feel about using this? There is also this hack i saw on github. I feel like in practice we won't see these weird timestamps, but wtv u think is best.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also how do u feel about this file to utils? Since it is shared by both IR and Json now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right that's an issue... Just to understand why we need the mitigation, where are we using json.parse with BigInts?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potentially also in json decoder if logs have an bigint?

Copy link
Member Author

@junhaoliao junhaoliao Nov 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

omg...

  1. nlohmann/json acknowledges there are implementation whose number types can store up to 2^53-1 : https://json.nlohmann.me/features/types/number_handling/#number-interoperability
  2. yet it does serialize numbers out of that range, imo for good reasons though: https://github.com/nlohmann/json/blob/4a602df34e71afca2242b34c743d4cc2486cd071/tests/src/unit-serialization.cpp#L168-L183

funny finding: https://www.github.com/nlohmann/json/issues/1708 -> https://www.github.com/nlohmann/json/pull/1722

Let's discuss the issue with the clp core people before we make changes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As discussed offline, calling JSON.parse() with any clp-ffi-js-returned JSON string containing number in +/- 2^53-1 ~ 2^64-1 would encounter loss of precision, but luckily nothing throws.
Let's address the loss of precision issue in another PR.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be tracked at #116

// If the field is an invalid type, then set the timestamp to `INVALID_TIMESTAMP_VALUE`.
// NOTE: dayjs surprisingly thinks `undefined` is a valid date. See
// https://day.js.org/docs/en/parse/now#docsNav
if (("string" !== typeof field &&
"number" !== typeof field) ||
"number" !== typeof field &&
"bigint" !== typeof field) ||
"undefined" === typeof field
) {
// `INVALID_TIMESTAMP_VALUE` is a valid dayjs date. Another potential option is
Expand Down
4 changes: 2 additions & 2 deletions src/typings/config.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {JsonlDecoderOptionsType} from "./decoders";
import {DecoderOptions} from "./decoders";
import {TAB_NAME} from "./tab";


Expand Down Expand Up @@ -27,7 +27,7 @@ enum LOCAL_STORAGE_KEY {
/* eslint-enable @typescript-eslint/prefer-literal-enum-member */

type ConfigMap = {
[CONFIG_KEY.DECODER_OPTIONS]: JsonlDecoderOptionsType,
[CONFIG_KEY.DECODER_OPTIONS]: DecoderOptions,
[CONFIG_KEY.INITIAL_TAB_NAME]: TAB_NAME,
[CONFIG_KEY.THEME]: THEME_NAME,
[CONFIG_KEY.PAGE_SIZE]: number,
Expand Down
17 changes: 6 additions & 11 deletions src/typings/decoders.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,16 @@ interface LogEventCount {
}

/**
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Description intentionally omitted.

* Options for the JSONL decoder.
*
* @property formatString The format string to use to serialize records as plain text.
* @property logLevelKey The key of the kv-pair that contains the log level in every record.
* @property timestampKey The key of the kv-pair that contains the timestamp in every record.
*/
interface JsonlDecoderOptionsType {
interface DecoderOptions {
formatString: string,
logLevelKey: string,
timestampKey: string,
}

type DecoderOptionsType = JsonlDecoderOptionsType;

/**
* Type of the decoded log event. We use an array rather than object so that it's easier to return
* results from WASM-based decoders.
Expand All @@ -31,7 +27,7 @@ type DecoderOptionsType = JsonlDecoderOptionsType;
* @property level
* @property number
*/
type DecodeResultType = [string, number, number, number];
type DecodeResult = [string, bigint, number, number];

/**
* Mapping between an index in the filtered log events collection to an index in the unfiltered log
Expand Down Expand Up @@ -85,7 +81,7 @@ interface Decoder {
* @param options
* @return Whether the options were successfully set.
*/
setFormatterOptions(options: DecoderOptionsType): boolean;
setFormatterOptions(options: DecoderOptions): boolean;

/**
* Decodes log events in the range `[beginIdx, endIdx)` of the filtered or unfiltered
Expand All @@ -101,15 +97,14 @@ interface Decoder {
beginIdx: number,
endIdx: number,
useFilter: boolean
): Nullable<DecodeResultType[]>;
): Nullable<DecodeResult[]>;
}

export type {
ActiveLogCollectionEventIdx,
Decoder,
DecodeResultType,
DecoderOptionsType,
DecodeResult,
DecoderOptions,
FilteredLogEventMap,
JsonlDecoderOptionsType,
LogEventCount,
};
Loading
Loading