mirror of
https://github.com/stack-auth/stack.git
synced 2026-06-04 21:04:37 +08:00
fix clickhouse surrogate pair bug (#1270)
<!-- Make sure you've read the CONTRIBUTING.md guidelines: https://github.com/stack-auth/stack-auth/blob/dev/CONTRIBUTING.md --> <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Enhanced analytics event processing to properly handle edge cases when data contains certain truncated special characters or emoji sequences, ensuring data integrity. * **Tests** * Added coverage for analytics data edge case handling. <!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
parent
1d00ed2c64
commit
d51c303fb0
@ -10,6 +10,28 @@ const UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[089ab][0-9a-f]{3}-[0
|
||||
|
||||
const MAX_EVENTS = 500;
|
||||
|
||||
// Lone surrogates (\uD800-\uDFFF not part of a valid pair) are technically
|
||||
// representable in JS strings but rejected by ClickHouse's JSON parser.
|
||||
// The client-side event tracker can produce these when .substring() truncates
|
||||
// text in the middle of a surrogate pair (e.g. emoji characters).
|
||||
// eslint-disable-next-line no-control-regex
|
||||
const LONE_SURROGATE_RE = /[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF]/g;
|
||||
|
||||
function stripLoneSurrogates(value: unknown): unknown {
|
||||
if (typeof value === "string") {
|
||||
return value.replace(LONE_SURROGATE_RE, "\uFFFD");
|
||||
}
|
||||
if (Array.isArray(value)) {
|
||||
return value.map(stripLoneSurrogates);
|
||||
}
|
||||
if (value !== null && typeof value === "object") {
|
||||
return Object.fromEntries(
|
||||
Object.entries(value).map(([k, v]) => [k, stripLoneSurrogates(v)])
|
||||
);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export const POST = createSmartRouteHandler({
|
||||
metadata: {
|
||||
summary: "Upload analytics event batch",
|
||||
@ -69,7 +91,7 @@ export const POST = createSmartRouteHandler({
|
||||
const rows = body.events.map((event) => ({
|
||||
event_type: event.event_type,
|
||||
event_at: new Date(event.event_at_ms),
|
||||
data: event.data,
|
||||
data: stripLoneSurrogates(event.data),
|
||||
project_id: projectId,
|
||||
branch_id: branchId,
|
||||
user_id: userId,
|
||||
|
||||
@ -160,6 +160,51 @@ it("accepts valid $click events", async ({ expect }) => {
|
||||
`);
|
||||
});
|
||||
|
||||
it("handles click event data containing a truncated surrogate pair (lone high surrogate)", async ({ expect }) => {
|
||||
await Project.createAndSwitch({ config: { magic_link_enabled: true } });
|
||||
await Project.updateConfig({ apps: { installed: { analytics: { enabled: true } } } });
|
||||
await Auth.Otp.signIn();
|
||||
|
||||
// Simulate what the client-side event tracker does: .substring(0, 200) can
|
||||
// cut a string in the middle of a surrogate pair when emoji characters are
|
||||
// near the boundary. For example, 🍉 is "\uD83C\uDF49" in UTF-16; cutting
|
||||
// after the high surrogate leaves a lone "\uD83C" that ClickHouse cannot parse.
|
||||
const paddedText = "a".repeat(199) + "\uD83C"; // lone high surrogate at position 199
|
||||
|
||||
const now = Date.now();
|
||||
const res = await uploadEventBatch({
|
||||
sessionReplaySegmentId: randomUUID(),
|
||||
batchId: randomUUID(),
|
||||
sentAtMs: now,
|
||||
events: [
|
||||
{
|
||||
event_type: "$click",
|
||||
event_at_ms: now - 50,
|
||||
data: {
|
||||
tag_name: "div",
|
||||
text: paddedText,
|
||||
href: null,
|
||||
selector: "div.container",
|
||||
x: 100,
|
||||
y: 200,
|
||||
page_x: 100,
|
||||
page_y: 500,
|
||||
viewport_width: 375,
|
||||
viewport_height: 647,
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
expect(res).toMatchInlineSnapshot(`
|
||||
NiceResponse {
|
||||
"status": 200,
|
||||
"body": { "inserted": 1 },
|
||||
"headers": Headers { <some fields may have been hidden> },
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it("rejects empty events array", async ({ expect }) => {
|
||||
await Project.createAndSwitch({ config: { magic_link_enabled: true } });
|
||||
await Project.updateConfig({ apps: { installed: { analytics: { enabled: true } } } });
|
||||
|
||||
Loading…
Reference in New Issue
Block a user