Skip to content

Commit 2069779

Browse files
antonisclaudeLms24
authored
fix(core): Sanitize lone surrogates in log body and attributes (#20245)
Lone UTF-16 surrogates (U+D800–U+DFFF not part of a valid pair) in log message bodies or string attribute values/keys cause `serde_json` on the server to reject the **entire queued log batch**. This means one bad log entry silently drops all healthy logs in the same flush. This PR sanitizes unpaired surrogates by replacing them with U+FFFD (replacement character) at log capture time, scoped exclusively to the logs code path (`packages/core/src/logs/internal.ts`). - Sanitizes log `body` (plain string and `fmt` parameterized string messages) - Sanitizes log attribute string values - Sanitizes log attribute keys - Uses native `String.prototype.toWellFormed()` when available (Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes). On older runtimes without native support, the string passes through as-is. Server-side fix in Relay: getsentry/relay#5833 Fixes getsentry/sentry-react-native#5186 --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Lukas Stracke <lukas.stracke@sentry.io>
1 parent 83227af commit 2069779

2 files changed

Lines changed: 203 additions & 4 deletions

File tree

packages/core/src/logs/internal.ts

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import type { Attributes } from '../attributes';
12
import { serializeAttributes } from '../attributes';
23
import { getGlobalSingleton } from '../carrier';
34
import type { Client } from '../client';
@@ -161,14 +162,14 @@ export function _INTERNAL_captureLog(
161162
const serializedLog: SerializedLog = {
162163
timestamp,
163164
level,
164-
body: message,
165+
body: _removeLoneSurrogates(String(message)),
165166
trace_id: traceContext?.trace_id,
166167
severity_number: severityNumber ?? SEVERITY_TEXT_TO_SEVERITY_NUMBER[level],
167-
attributes: {
168+
attributes: sanitizeLogAttributes({
168169
...serializeAttributes(scopeAttributes),
169170
...serializeAttributes(logAttributes, true),
170171
[sequenceAttr.key]: sequenceAttr.value,
171-
},
172+
}),
172173
};
173174

174175
captureSerializedLog(client, serializedLog);
@@ -226,3 +227,45 @@ function _getBufferMap(): WeakMap<Client, Array<SerializedLog>> {
226227
// The reference to the Client <> LogBuffer map is stored on the carrier to ensure it's always the same
227228
return getGlobalSingleton('clientToLogBufferMap', () => new WeakMap<Client, Array<SerializedLog>>());
228229
}
230+
231+
/**
232+
* Sanitizes serialized log attributes by replacing lone surrogates in both
233+
* keys and string values with U+FFFD.
234+
*/
235+
function sanitizeLogAttributes(attributes: Attributes): Attributes {
236+
const sanitized: Attributes = {};
237+
for (const [key, attr] of Object.entries(attributes)) {
238+
const sanitizedKey = _removeLoneSurrogates(key);
239+
if (attr.type === 'string') {
240+
sanitized[sanitizedKey] = { ...attr, value: _removeLoneSurrogates(attr.value) };
241+
} else {
242+
sanitized[sanitizedKey] = attr;
243+
}
244+
}
245+
return sanitized;
246+
}
247+
248+
/**
249+
* Replaces unpaired UTF-16 surrogates with U+FFFD (replacement character).
250+
*
251+
* Lone surrogates (U+D800–U+DFFF not part of a valid pair) cause `serde_json`
252+
* on the server to reject the entire log batch when they appear in
253+
* JSON-escaped form (e.g. `\uD800`). Replacing them at the SDK level ensures
254+
* only the offending characters are lost instead of the whole payload.
255+
*
256+
* Uses the native `String.prototype.toWellFormed()` when available
257+
* (Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes).
258+
* On older runtimes without native support, returns the string as-is.
259+
*
260+
* Exported for testing
261+
*/
262+
export function _removeLoneSurrogates(str: string): string {
263+
// isWellFormed/toWellFormed are ES2024 (not in our TS lib target), so we feature-detect via Object().
264+
const strObj: Record<string, Function> = Object(str);
265+
const isWellFormed = strObj['isWellFormed'];
266+
const toWellFormed = strObj['toWellFormed'];
267+
if (typeof isWellFormed === 'function' && typeof toWellFormed === 'function') {
268+
return isWellFormed.call(str) ? str : toWellFormed.call(str);
269+
}
270+
return str;
271+
}

packages/core/test/lib/logs/internal.test.ts

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import { beforeEach, describe, expect, it, vi } from 'vitest';
22
import { fmt, Scope } from '../../../src';
3-
import { _INTERNAL_captureLog, _INTERNAL_flushLogsBuffer, _INTERNAL_getLogBuffer } from '../../../src/logs/internal';
3+
import {
4+
_INTERNAL_captureLog,
5+
_INTERNAL_flushLogsBuffer,
6+
_INTERNAL_getLogBuffer,
7+
_removeLoneSurrogates,
8+
} from '../../../src/logs/internal';
49
import type { Log } from '../../../src/types/log';
510
import * as loggerModule from '../../../src/utils/debug-logger';
611
import * as timeModule from '../../../src/utils/time';
@@ -9,6 +14,9 @@ import { getDefaultTestClientOptions, TestClient } from '../../mocks/client';
914

1015
const PUBLIC_DSN = 'https://username@domain/123';
1116

17+
// toWellFormed() is only available in Node 20+, Chrome 111+, Safari 15.4+, Firefox 119+, Hermes
18+
const hasToWellFormed = typeof ''.isWellFormed === 'function';
19+
1220
describe('_INTERNAL_captureLog', () => {
1321
beforeEach(() => {
1422
_INTERNAL_resetSequenceNumber();
@@ -1269,4 +1277,152 @@ describe('_INTERNAL_captureLog', () => {
12691277
expect(buffer2?.[0]?.attributes?.['sentry.timestamp.sequence']).toEqual({ value: 0, type: 'integer' });
12701278
});
12711279
});
1280+
1281+
describe.runIf(hasToWellFormed)('lone surrogate sanitization', () => {
1282+
it('sanitizes lone surrogates in log message body', () => {
1283+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1284+
const client = new TestClient(options);
1285+
const scope = new Scope();
1286+
scope.setClient(client);
1287+
1288+
_INTERNAL_captureLog({ level: 'error', message: 'bad surrogate \uD800 here' }, scope);
1289+
1290+
const logBuffer = _INTERNAL_getLogBuffer(client);
1291+
expect(logBuffer?.[0]?.body).toBe('bad surrogate \uFFFD here');
1292+
});
1293+
1294+
it('sanitizes lone surrogates in parameterized (fmt) log message body', () => {
1295+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1296+
const client = new TestClient(options);
1297+
const scope = new Scope();
1298+
scope.setClient(client);
1299+
1300+
const badValue = 'bad\uD800value';
1301+
_INTERNAL_captureLog({ level: 'error', message: fmt`parameterized ${badValue} message` }, scope);
1302+
1303+
const logBuffer = _INTERNAL_getLogBuffer(client);
1304+
expect(logBuffer?.[0]?.body).toBe('parameterized bad\uFFFDvalue message');
1305+
});
1306+
1307+
it('sanitizes lone surrogates in log attribute values', () => {
1308+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1309+
const client = new TestClient(options);
1310+
const scope = new Scope();
1311+
scope.setClient(client);
1312+
1313+
_INTERNAL_captureLog(
1314+
{
1315+
level: 'error',
1316+
message: 'test',
1317+
attributes: { bad: '{"a":"\uD800"}' },
1318+
},
1319+
scope,
1320+
);
1321+
1322+
const logBuffer = _INTERNAL_getLogBuffer(client);
1323+
expect(logBuffer?.[0]?.attributes?.['bad']).toEqual({
1324+
value: '{"a":"\uFFFD"}',
1325+
type: 'string',
1326+
});
1327+
});
1328+
1329+
it('sanitizes lone surrogates in log attribute keys', () => {
1330+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1331+
const client = new TestClient(options);
1332+
const scope = new Scope();
1333+
scope.setClient(client);
1334+
1335+
_INTERNAL_captureLog(
1336+
{
1337+
level: 'error',
1338+
message: 'test',
1339+
attributes: { ['bad\uD800key']: 'value' },
1340+
},
1341+
scope,
1342+
);
1343+
1344+
const logBuffer = _INTERNAL_getLogBuffer(client);
1345+
expect(logBuffer?.[0]?.attributes?.['bad\uFFFDkey']).toEqual({
1346+
value: 'value',
1347+
type: 'string',
1348+
});
1349+
});
1350+
1351+
it('preserves valid emoji in log messages and attributes', () => {
1352+
const options = getDefaultTestClientOptions({ dsn: PUBLIC_DSN, enableLogs: true });
1353+
const client = new TestClient(options);
1354+
const scope = new Scope();
1355+
scope.setClient(client);
1356+
1357+
_INTERNAL_captureLog(
1358+
{
1359+
level: 'info',
1360+
message: 'hello 😀 world',
1361+
attributes: { emoji: '🎉 party' },
1362+
},
1363+
scope,
1364+
);
1365+
1366+
const logBuffer = _INTERNAL_getLogBuffer(client);
1367+
expect(logBuffer?.[0]?.body).toBe('hello 😀 world');
1368+
expect(logBuffer?.[0]?.attributes?.['emoji']).toEqual({
1369+
value: '🎉 party',
1370+
type: 'string',
1371+
});
1372+
});
1373+
});
1374+
});
1375+
1376+
describe('_removeLoneSurrogates', () => {
1377+
it('returns the same string when there are no surrogates', () => {
1378+
expect(_removeLoneSurrogates('hello world')).toBe('hello world');
1379+
});
1380+
1381+
it('returns the same string for empty input', () => {
1382+
expect(_removeLoneSurrogates('')).toBe('');
1383+
});
1384+
1385+
it('preserves valid surrogate pairs (emoji)', () => {
1386+
expect(_removeLoneSurrogates('hello 😀 world')).toBe('hello 😀 world');
1387+
});
1388+
1389+
it.runIf(hasToWellFormed)('replaces a lone high surrogate with U+FFFD', () => {
1390+
expect(_removeLoneSurrogates('before\uD800after')).toBe('before\uFFFDafter');
1391+
});
1392+
1393+
it.runIf(hasToWellFormed)('replaces a lone low surrogate with U+FFFD', () => {
1394+
expect(_removeLoneSurrogates('before\uDC00after')).toBe('before\uFFFDafter');
1395+
});
1396+
1397+
it.runIf(hasToWellFormed)('replaces lone high surrogate at end of string', () => {
1398+
expect(_removeLoneSurrogates('end\uD800')).toBe('end\uFFFD');
1399+
});
1400+
1401+
it.runIf(hasToWellFormed)('replaces lone low surrogate at start of string', () => {
1402+
expect(_removeLoneSurrogates('\uDC00start')).toBe('\uFFFDstart');
1403+
});
1404+
1405+
it.runIf(hasToWellFormed)('replaces multiple lone surrogates', () => {
1406+
expect(_removeLoneSurrogates('\uD800\uD801\uDC00')).toBe('\uFFFD\uD801\uDC00');
1407+
});
1408+
1409+
it.runIf(hasToWellFormed)('handles two consecutive lone high surrogates', () => {
1410+
expect(_removeLoneSurrogates('\uD800\uD800')).toBe('\uFFFD\uFFFD');
1411+
});
1412+
1413+
it.runIf(hasToWellFormed)('handles mixed valid pairs and lone surrogates', () => {
1414+
expect(_removeLoneSurrogates('\uD83D\uDE00\uD800')).toBe('😀\uFFFD');
1415+
});
1416+
1417+
it.runIf(hasToWellFormed)('handles the exact reproduction case from issue #5186', () => {
1418+
const badValue = '{"a":"\uD800"}';
1419+
const result = _removeLoneSurrogates(badValue);
1420+
expect(result).toBe('{"a":"\uFFFD"}');
1421+
expect(() => JSON.parse(result)).not.toThrow();
1422+
});
1423+
1424+
it('returns the string as-is when toWellFormed is not available', () => {
1425+
// Verify the function doesn't throw regardless of runtime support
1426+
expect(_removeLoneSurrogates('normal string')).toBe('normal string');
1427+
});
12721428
});

0 commit comments

Comments
 (0)