From edd33b1e147f64e5bf23c1cfc07479744d9c7c0c Mon Sep 17 00:00:00 2001 From: Aadesh Kheria Date: Mon, 20 Apr 2026 11:56:26 -0700 Subject: [PATCH] added e2e tests --- apps/e2e/tests/backend/backend-helpers.ts | 24 +++ .../api/latest/internal/mcp-review.test.ts | 72 +++++++++ .../spacetimedb-enroll-reviewer.test.ts | 58 +++++++ apps/e2e/tests/spacetimedb/helpers.ts | 143 +++++++++++++++++ .../tests/spacetimedb/operators-rls.test.ts | 113 +++++++++++++ .../tests/spacetimedb/private-tables.test.ts | 120 ++++++++++++++ .../published-qa-projection.test.ts | 70 ++++++++ .../published-qa-visibility.test.ts | 151 ++++++++++++++++++ .../tests/spacetimedb/reducer-auth.test.ts | 103 ++++++++++++ apps/internal-tool/spacetimedb/src/index.ts | 35 +++- apps/internal-tool/src/app/questions/page.tsx | 4 +- .../internal-tool/src/hooks/useSpacetimeDB.ts | 11 +- .../src/module_bindings/published_qa_table.ts | 29 +--- .../src/module_bindings/types.ts | 7 +- apps/internal-tool/src/types.ts | 2 +- 15 files changed, 903 insertions(+), 39 deletions(-) create mode 100644 apps/e2e/tests/backend/endpoints/api/latest/internal/mcp-review.test.ts create mode 100644 apps/e2e/tests/backend/endpoints/api/latest/internal/spacetimedb-enroll-reviewer.test.ts create mode 100644 apps/e2e/tests/spacetimedb/helpers.ts create mode 100644 apps/e2e/tests/spacetimedb/operators-rls.test.ts create mode 100644 apps/e2e/tests/spacetimedb/private-tables.test.ts create mode 100644 apps/e2e/tests/spacetimedb/published-qa-projection.test.ts create mode 100644 apps/e2e/tests/spacetimedb/published-qa-visibility.test.ts create mode 100644 apps/e2e/tests/spacetimedb/reducer-auth.test.ts diff --git a/apps/e2e/tests/backend/backend-helpers.ts b/apps/e2e/tests/backend/backend-helpers.ts index 7365aefd2..6468984a5 100644 --- a/apps/e2e/tests/backend/backend-helpers.ts +++ b/apps/e2e/tests/backend/backend-helpers.ts @@ -1570,6 +1570,30 @@ export namespace User { } return users; } + + export async function setClientReadOnlyMetadata(userId: string, metadata: Record) { + const response = await niceBackendFetch(`/api/v1/users/${userId}`, { + method: "PATCH", + accessType: "server", + body: { + client_read_only_metadata: metadata, + }, + }); + expect(response).toMatchObject({ status: 200 }); + return response; + } +} + +export namespace AiChatReviewer { + export async function createReviewer() { + const { userId, accessToken, refreshToken } = await Auth.fastSignUp(); + await User.setClientReadOnlyMetadata(userId, { isAiChatReviewer: true }); + return { userId, accessToken, refreshToken }; + } + + export async function createNonReviewer() { + return await Auth.fastSignUp(); + } } diff --git a/apps/e2e/tests/backend/endpoints/api/latest/internal/mcp-review.test.ts b/apps/e2e/tests/backend/endpoints/api/latest/internal/mcp-review.test.ts new file mode 100644 index 000000000..3f4de1fa1 --- /dev/null +++ b/apps/e2e/tests/backend/endpoints/api/latest/internal/mcp-review.test.ts @@ -0,0 +1,72 @@ +import { it } from "../../../../../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../../../../backend-helpers"; + +// Every mcp-review endpoint shares the same auth gate (isAiChatReviewer metadata check) +// and the same short-circuit order: auth → metadata → yup → reducer. Tests here cover +// the first three. +const endpoints = [ + { + path: "/api/latest/internal/mcp-review/mark-reviewed", + validBody: { correlationId: "abc123" }, + invalidBody: {}, + }, + { + path: "/api/latest/internal/mcp-review/unmark-reviewed", + validBody: { correlationId: "abc123" }, + invalidBody: {}, + }, + { + path: "/api/latest/internal/mcp-review/update-correction", + validBody: { + correlationId: "abc123", + correctedQuestion: "q", + correctedAnswer: "a", + publish: false, + }, + invalidBody: { correlationId: "abc123", publish: "yes" as unknown as boolean }, + }, + { + path: "/api/latest/internal/mcp-review/add-manual", + validBody: { question: "q", answer: "a", publish: false }, + invalidBody: { question: "q" }, + }, + { + path: "/api/latest/internal/mcp-review/delete", + validBody: { correlationId: "abc123" }, + invalidBody: {}, + }, +] as const; + +for (const { path, validBody, invalidBody } of endpoints) { + it(`${path}: rejects unauthenticated requests`, async ({ expect }) => { + const response = await niceBackendFetch(path, { + method: "POST", + accessType: "client", + body: validBody, + }); + // yup schema on createSmartRouteHandler requires auth.user; missing auth fails + // shape validation as 400 before reaching the handler's auth-specific error path. + expect([400, 401]).toContain(response.status); + }); + + it(`${path}: rejects a signed-in user without isAiChatReviewer metadata`, async ({ expect }) => { + await AiChatReviewer.createNonReviewer(); + const response = await niceBackendFetch(path, { + method: "POST", + accessType: "client", + body: validBody, + }); + expect(response.status).toBe(403); + expect(String(response.body)).toContain("not approved to perform MCP review operations"); + }); + + it(`${path}: rejects a reviewer sending an invalid body`, async ({ expect }) => { + await AiChatReviewer.createReviewer(); + const response = await niceBackendFetch(path, { + method: "POST", + accessType: "client", + body: invalidBody, + }); + expect(response.status).toBe(400); + }); +} diff --git a/apps/e2e/tests/backend/endpoints/api/latest/internal/spacetimedb-enroll-reviewer.test.ts b/apps/e2e/tests/backend/endpoints/api/latest/internal/spacetimedb-enroll-reviewer.test.ts new file mode 100644 index 000000000..053acf407 --- /dev/null +++ b/apps/e2e/tests/backend/endpoints/api/latest/internal/spacetimedb-enroll-reviewer.test.ts @@ -0,0 +1,58 @@ +import { it } from "../../../../../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../../../../backend-helpers"; + +const VALID_HEX = "a".repeat(64); + +it("rejects unauthenticated requests", async ({ expect }) => { + const response = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: VALID_HEX }, + }); + // createSmartRouteHandler's yup schema requires auth.user; missing auth trips + // schema validation before the handler runs, so this comes back as 400, not 401. + expect([400, 401]).toContain(response.status); +}); + +it("rejects a signed-in user without isAiChatReviewer metadata", async ({ expect }) => { + await AiChatReviewer.createNonReviewer(); + const response = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: VALID_HEX }, + }); + expect(response.status).toBe(403); + expect(String(response.body)).toContain("not approved to perform MCP review operations"); +}); + +it("rejects a reviewer sending a non-hex identity", async ({ expect }) => { + await AiChatReviewer.createReviewer(); + const response = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: "not-a-hex-identity" }, + }); + expect(response.status).toBe(400); + expect(String(response.body)).toContain("Invalid identity"); +}); + +it("rejects a reviewer sending a hex identity of the wrong length", async ({ expect }) => { + await AiChatReviewer.createReviewer(); + const response = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: "a".repeat(63) }, + }); + expect(response.status).toBe(400); + expect(String(response.body)).toContain("Invalid identity"); +}); + +it("rejects a reviewer sending a request without an identity field", async ({ expect }) => { + await AiChatReviewer.createReviewer(); + const response = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: {}, + }); + expect(response.status).toBe(400); +}); diff --git a/apps/e2e/tests/spacetimedb/helpers.ts b/apps/e2e/tests/spacetimedb/helpers.ts new file mode 100644 index 000000000..fc8be5e1b --- /dev/null +++ b/apps/e2e/tests/spacetimedb/helpers.ts @@ -0,0 +1,143 @@ +// All traffic uses the +// SpacetimeDB HTTP API (POST /v1/identity, /v1/database/{db}/call/{reducer}, +// /v1/database/{db}/sql) — avoids pulling the `spacetimedb` client SDK into +// the e2e package just for a handful of subscriptions. + +export type MintedIdentity = { + token: string, + /** 64-hex identity string, without the "0x" prefix the WS SDK sometimes prints. */ + identity: string, +}; + +type SqlRow = Record; + +export type SpacetimedbConfig = { + baseUrl: string, + dbName: string, + logToken: string | null, +}; + +export function getSpacetimedbConfig(): SpacetimedbConfig { + return { + baseUrl: process.env.STACK_SPACETIMEDB_URL ?? "", + dbName: process.env.STACK_SPACETIMEDB_DB_NAME ?? "stack-auth-llm", + logToken: process.env.STACK_MCP_LOG_TOKEN ?? null, + }; +} + +export async function isSpacetimedbReachable(): Promise { + const { baseUrl } = getSpacetimedbConfig(); + if (!baseUrl) return false; + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 2000); + try { + const res = await fetch(`${baseUrl}/v1/identity`, { + method: "POST", + signal: controller.signal, + }); + return res.ok; + } catch { + return false; + } finally { + clearTimeout(timeout); + } +} + +export async function mintIdentity(): Promise { + const { baseUrl } = getSpacetimedbConfig(); + const res = await fetch(`${baseUrl}/v1/identity`, { method: "POST" }); + if (!res.ok) throw new Error(`mintIdentity failed: HTTP ${res.status}`); + const body = await res.json() as { token: string, identity: string }; + // SpacetimeDB sometimes returns the identity with a leading "0x"; normalize it off. + const identity = body.identity.startsWith("0x") ? body.identity.slice(2) : body.identity; + return { token: body.token, identity }; +} + +/** + * SpacetimeDB encodes `.optional()` fields as a tagged sum type — clients must + * send `{ some: value }` or `{ none: [] }`, not raw null. Mirrors the `opt()` + * helper in apps/backend/src/lib/ai/mcp-logger.ts:87. + */ +export function opt(value: T | null | undefined): { some: T } | { none: [] } { + return value == null ? { none: [] } : { some: value }; +} + +export type ReducerCallResult = { + status: number, + ok: boolean, + body: string, +}; + +export async function callReducer( + token: string, + reducer: string, + args: unknown[], +): Promise { + const { baseUrl, dbName } = getSpacetimedbConfig(); + const res = await fetch(`${baseUrl}/v1/database/${dbName}/call/${reducer}`, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Authorization": `Bearer ${token}`, + }, + body: JSON.stringify(args, (_, v) => { + if (typeof v !== "bigint") return v; + const MAX = BigInt(Number.MAX_SAFE_INTEGER); + if (v <= MAX && v >= -MAX) return Number(v); + return v.toString(); + }), + }); + return { status: res.status, ok: res.ok, body: await res.text() }; +} + +export type SqlQueryResult = { + columns: string[], + rows: SqlRow[], +}; + +/** + * Look up the `correlationId` of a freshly-inserted row by its unique `question`. + * Caller must pass a SpacetimeDB token for an enrolled operator — only they can + * read `my_visible_mcp_call_log`. Returns undefined if no match is found. + */ +export async function findCorrelationIdByQuestion( + token: string, + question: string, +): Promise { + const { rows } = await sqlQuery(token, "SELECT * FROM my_visible_mcp_call_log"); + const match = rows.find(r => r.question === question); + if (!match) return undefined; + const raw = match.correlation_id ?? match.correlationId; + return typeof raw === "string" ? raw : undefined; +} + +export async function sqlQuery(token: string, sql: string): Promise { + const { baseUrl, dbName } = getSpacetimedbConfig(); + const res = await fetch(`${baseUrl}/v1/database/${dbName}/sql`, { + method: "POST", + headers: { + "Content-Type": "text/plain", + "Authorization": `Bearer ${token}`, + }, + body: sql, + }); + if (!res.ok) { + throw new Error(`SQL ${JSON.stringify(sql)} failed: HTTP ${res.status} ${await res.text()}`); + } + const payload = await res.json() as Array<{ + schema: { elements: Array<{ name: { some: string } | { none: null } }> }, + rows: unknown[][], + }>; + // `/sql` returns an array of query results (one per statement). We only send one. + if (payload.length === 0) return { columns: [], rows: [] }; + const first = payload[0]; + const columns = first.schema.elements.map(el => "some" in el.name ? el.name.some : ""); + const rows: SqlRow[] = first.rows.map(tuple => { + const obj: SqlRow = {}; + columns.forEach((c, i) => { + obj[c] = tuple[i]; + }); + return obj; + }); + return { columns, rows }; +} diff --git a/apps/e2e/tests/spacetimedb/operators-rls.test.ts b/apps/e2e/tests/spacetimedb/operators-rls.test.ts new file mode 100644 index 000000000..f84b007b9 --- /dev/null +++ b/apps/e2e/tests/spacetimedb/operators-rls.test.ts @@ -0,0 +1,113 @@ +import { describe } from "vitest"; +import { it } from "../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../backend/backend-helpers"; +import { callReducer, getSpacetimedbConfig, isSpacetimedbReachable, mintIdentity, sqlQuery } from "./helpers"; + +const canRun = await isSpacetimedbReachable(); +const { logToken } = getSpacetimedbConfig(); + +describe.skipIf(!canRun)("operators table RLS", () => { + it("each reviewer sees only their own operators row", async ({ expect }) => { + const a = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enrollA = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: a.identity }, + }); + expect(enrollA.status).toBe(200); + + const b = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enrollB = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: b.identity }, + }); + expect(enrollB.status).toBe(200); + + const asA = await sqlQuery(a.token, "SELECT * FROM operators"); + const asB = await sqlQuery(b.token, "SELECT * FROM operators"); + + expect(asA.rows.length).toBe(1); + expect(asB.rows.length).toBe(1); + // Different reviewers must see different (own) rows — if RLS broke, both would see two. + expect(JSON.stringify(asA.rows[0])).not.toEqual(JSON.stringify(asB.rows[0])); + }); + + it("a freshly-minted non-operator identity sees zero operators rows", async ({ expect }) => { + // Seed at least one operator so the table isn't empty. + const seeded = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: seeded.identity }, + }); + expect(enroll.status).toBe(200); + + const stranger = await mintIdentity(); + const { rows } = await sqlQuery(stranger.token, "SELECT * FROM operators"); + expect(rows.length).toBe(0); + }); + + it("enrolling a second identity as the same reviewer sweeps the first", async ({ expect }) => { + // The add_operator reducer's sweep logic deletes prior rows with the same + // stackUserId before inserting a new identity — a reviewer switching browsers + // should not accumulate stale operator rows. + const x = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enrollX = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: x.identity }, + }); + expect(enrollX.status).toBe(200); + + // Same reviewer (backendContext.userAuth unchanged) enrolls a second identity. + const y = await mintIdentity(); + const enrollY = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: y.identity }, + }); + expect(enrollY.status).toBe(200); + + // X should no longer be in operators — sweep removed its row. + const asX = await sqlQuery(x.token, "SELECT * FROM operators"); + expect(asX.rows.length).toBe(0); + // Y should still be the active operator. + const asY = await sqlQuery(y.token, "SELECT * FROM operators"); + expect(asY.rows.length).toBe(1); + }); + + it.skipIf(!logToken)( + "remove_operator reducer revokes an operator's view access", + async ({ expect }) => { + const target = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: target.identity }, + }); + expect(enroll.status).toBe(200); + + // Confirm enrolled. + const before = await sqlQuery(target.token, "SELECT * FROM operators"); + expect(before.rows.length).toBe(1); + + // Directly call remove_operator with the log token. + const caller = await mintIdentity(); + const removed = await callReducer(caller.token, "remove_operator", [ + logToken!, + [`0x${target.identity}`], + ]); + expect(removed.ok).toBe(true); + + // Target is no longer an operator. + const after = await sqlQuery(target.token, "SELECT * FROM operators"); + expect(after.rows.length).toBe(0); + }, + ); +}); diff --git a/apps/e2e/tests/spacetimedb/private-tables.test.ts b/apps/e2e/tests/spacetimedb/private-tables.test.ts new file mode 100644 index 000000000..a7ddcee2e --- /dev/null +++ b/apps/e2e/tests/spacetimedb/private-tables.test.ts @@ -0,0 +1,120 @@ +import { describe } from "vitest"; +import { it } from "../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../backend/backend-helpers"; +import { callReducer, getSpacetimedbConfig, isSpacetimedbReachable, mintIdentity, opt, sqlQuery } from "./helpers"; + +const canRun = await isSpacetimedbReachable(); +const { logToken } = getSpacetimedbConfig(); + +describe.skipIf(!canRun)("private log tables and view gating", () => { + // my_visible_ai_query_log is the counterpart to my_visible_mcp_call_log. Seeding + // requires the log token (no user-facing endpoint writes to ai_query_log), so + // skip when unavailable rather than asserting against an empty table. + it.skipIf(!logToken)( + "a freshly-minted non-operator identity sees zero rows in my_visible_ai_query_log", + async ({ expect }) => { + const seeder = await mintIdentity(); + const seed = await callReducer(seeder.token, "log_ai_query", [ + logToken!, + `corr-${Date.now()}`, + "chat", + "system-prompt-id", + "high", + "fast", + "some-model", + false, + opt(null), + opt(null), + "[]", + "[]", + "[]", + "final text", + opt(null), + opt(null), + opt(null), + opt(null), + 0, + 0n, + opt(null), + opt(null), + opt(null), + ]); + expect(seed.ok).toBe(true); + + const stranger = await mintIdentity(); + const { rows } = await sqlQuery(stranger.token, "SELECT * FROM my_visible_ai_query_log"); + expect(rows.length).toBe(0); + }, + ); + + it("cannot subscribe to the private mcp_call_log table directly", async ({ expect }) => { + // Seed a row so the table isn't empty — we're testing access control, not emptiness. + const seeder = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: seeder.identity }, + }); + expect(enroll.status).toBe(200); + const seed = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: "seeded", answer: "a", publish: false }, + }); + expect(seed.status).toBe(200); + + // Private table: SpacetimeDB should either reject the query outright or return + // zero rows to non-operators. Either outcome is acceptable — the invariant is + // "the caller does not see any private-table rows." + const stranger = await mintIdentity(); + try { + const { rows } = await sqlQuery(stranger.token, "SELECT * FROM mcp_call_log"); + expect(rows.length).toBe(0); + } catch (err) { + // Rejection path: the error confirms the table isn't publicly readable. + expect(err).toBeInstanceOf(Error); + } + }); + + it.skipIf(!logToken)( + "cannot subscribe to the private ai_query_log table directly", + async ({ expect }) => { + const seeder = await mintIdentity(); + const seed = await callReducer(seeder.token, "log_ai_query", [ + logToken!, + `corr-${Date.now()}`, + "chat", + "system-prompt-id", + "high", + "fast", + "some-model", + false, + opt(null), + opt(null), + "[]", + "[]", + "[]", + "final text", + opt(null), + opt(null), + opt(null), + opt(null), + 0, + 0n, + opt(null), + opt(null), + opt(null), + ]); + expect(seed.ok).toBe(true); + + const stranger = await mintIdentity(); + try { + const { rows } = await sqlQuery(stranger.token, "SELECT * FROM ai_query_log"); + expect(rows.length).toBe(0); + } catch (err) { + expect(err).toBeInstanceOf(Error); + } + }, + ); +}); diff --git a/apps/e2e/tests/spacetimedb/published-qa-projection.test.ts b/apps/e2e/tests/spacetimedb/published-qa-projection.test.ts new file mode 100644 index 000000000..574bbea2e --- /dev/null +++ b/apps/e2e/tests/spacetimedb/published-qa-projection.test.ts @@ -0,0 +1,70 @@ +import { describe } from "vitest"; +import { it } from "../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../backend/backend-helpers"; +import { isSpacetimedbReachable, mintIdentity, sqlQuery } from "./helpers"; + +const canRun = await isSpacetimedbReachable(); + +const EXPECTED_PUBLISHED_QA_COLUMNS = ["id", "question", "answer", "published_at"] as const; + +// Fields from mcp_call_log that MUST NOT appear in the public view. If any of these +// reappear, the projection has regressed and internal metadata is leaking to the +// unauthenticated /questions page. +const FORBIDDEN_COLUMNS = [ + "human_reviewed_by", + "human_reviewed_at", + "user_prompt", + "qa_reviewed_at", + "qa_flags_json", + "qa_improvement_suggestions", + "qa_conversation_json", + "model_id", + "correlation_id", + "conversation_id", + "response", + "reason", + "tool_name", + "inner_tool_calls_json", + "human_corrected_question", + "human_corrected_answer", +]; + +describe.skipIf(!canRun)("published_qa view projection", () => { + it("exposes only {id, question, answer, publishedAt} — no reviewer or QA internals", async ({ expect }) => { + const reviewerIdentity = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewerIdentity.identity }, + }); + expect(enroll.status).toBe(200); + + const markerQuestion = `test-projection-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; + const markerAnswer = "answer-for-projection-test"; + + const publish = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: markerQuestion, answer: markerAnswer, publish: true }, + }); + expect(publish.status).toBe(200); + + // Query with a fresh non-operator token — published_qa is anonymousView so any + // bearer works; using a stranger identity matches the real public-page scenario. + const stranger = await mintIdentity(); + const result = await sqlQuery(stranger.token, "SELECT * FROM published_qa"); + + // Regression: the projected column set must be exactly these four names. + expect([...result.columns].sort()).toEqual([...EXPECTED_PUBLISHED_QA_COLUMNS].sort()); + for (const forbidden of FORBIDDEN_COLUMNS) { + expect(result.columns).not.toContain(forbidden); + } + + // Our marker row must be present and carry the corrected (answer) payload, not + // the raw response (which would be empty for a manually-added row). + const ours = result.rows.find(r => r.question === markerQuestion); + expect(ours).toBeDefined(); + expect(ours).toMatchObject({ question: markerQuestion, answer: markerAnswer }); + }); +}); diff --git a/apps/e2e/tests/spacetimedb/published-qa-visibility.test.ts b/apps/e2e/tests/spacetimedb/published-qa-visibility.test.ts new file mode 100644 index 000000000..481eb6c5b --- /dev/null +++ b/apps/e2e/tests/spacetimedb/published-qa-visibility.test.ts @@ -0,0 +1,151 @@ +import { describe } from "vitest"; +import { it } from "../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../backend/backend-helpers"; +import { findCorrelationIdByQuestion, isSpacetimedbReachable, mintIdentity, sqlQuery } from "./helpers"; + +const canRun = await isSpacetimedbReachable(); + +function uniqueMarker(prefix: string): string { + return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; +} + +async function publishedQaContains(question: string): Promise { + const stranger = await mintIdentity(); + const { rows } = await sqlQuery(stranger.token, "SELECT * FROM published_qa"); + return rows.some(r => r.question === question); +} + +describe.skipIf(!canRun)("published_qa visibility", () => { + it("does not expose rows added with publish:false", async ({ expect }) => { + const reviewer = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewer.identity }, + }); + expect(enroll.status).toBe(200); + + const marker = uniqueMarker("unpublished"); + const add = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: marker, answer: "x", publish: false }, + }); + expect(add.status).toBe(200); + + expect(await publishedQaContains(marker)).toBe(false); + }); + + it("removes a row from published_qa when update-correction sets publish:false", async ({ expect }) => { + const reviewer = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewer.identity }, + }); + expect(enroll.status).toBe(200); + + const marker = uniqueMarker("to-unpublish"); + const add = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: marker, answer: "x", publish: true }, + }); + expect(add.status).toBe(200); + expect(await publishedQaContains(marker)).toBe(true); + + const correlationId = await findCorrelationIdByQuestion(reviewer.token, marker); + expect(correlationId).toBeDefined(); + + const update = await niceBackendFetch("/api/latest/internal/mcp-review/update-correction", { + method: "POST", + accessType: "client", + body: { + correlationId, + correctedQuestion: marker, + correctedAnswer: "x", + publish: false, + }, + }); + expect(update.status).toBe(200); + + expect(await publishedQaContains(marker)).toBe(false); + }); + + it("removes a row from published_qa when deleted", async ({ expect }) => { + const reviewer = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewer.identity }, + }); + expect(enroll.status).toBe(200); + + const marker = uniqueMarker("to-delete"); + const add = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: marker, answer: "x", publish: true }, + }); + expect(add.status).toBe(200); + expect(await publishedQaContains(marker)).toBe(true); + + const correlationId = await findCorrelationIdByQuestion(reviewer.token, marker); + expect(correlationId).toBeDefined(); + + const del = await niceBackendFetch("/api/latest/internal/mcp-review/delete", { + method: "POST", + accessType: "client", + body: { correlationId }, + }); + expect(del.status).toBe(200); + + expect(await publishedQaContains(marker)).toBe(false); + }); + + it("lets reviewer B delete a row published by reviewer A (cross-reviewer integrity)", async ({ expect }) => { + // A publishes. + const reviewerA = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enrollA = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewerA.identity }, + }); + expect(enrollA.status).toBe(200); + + const marker = uniqueMarker("cross-reviewer"); + const add = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: marker, answer: "x", publish: true }, + }); + expect(add.status).toBe(200); + expect(await publishedQaContains(marker)).toBe(true); + + const correlationId = await findCorrelationIdByQuestion(reviewerA.token, marker); + expect(correlationId).toBeDefined(); + + // B deletes. fastSignUp re-points backendContext.userAuth to B; subsequent calls use B's auth. + const reviewerB = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enrollB = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewerB.identity }, + }); + expect(enrollB.status).toBe(200); + + const del = await niceBackendFetch("/api/latest/internal/mcp-review/delete", { + method: "POST", + accessType: "client", + body: { correlationId }, + }); + expect(del.status).toBe(200); + + expect(await publishedQaContains(marker)).toBe(false); + }); +}); diff --git a/apps/e2e/tests/spacetimedb/reducer-auth.test.ts b/apps/e2e/tests/spacetimedb/reducer-auth.test.ts new file mode 100644 index 000000000..b1a2c6569 --- /dev/null +++ b/apps/e2e/tests/spacetimedb/reducer-auth.test.ts @@ -0,0 +1,103 @@ +import { describe } from "vitest"; +import { it } from "../helpers"; +import { AiChatReviewer, niceBackendFetch } from "../backend/backend-helpers"; +import { callReducer, getSpacetimedbConfig, isSpacetimedbReachable, mintIdentity, opt, sqlQuery } from "./helpers"; + +const canRun = await isSpacetimedbReachable(); +const { logToken } = getSpacetimedbConfig(); + +describe.skipIf(!canRun)("SpacetimeDB reducer auth", () => { + it("a freshly-minted non-operator identity sees zero rows in my_visible_mcp_call_log", async ({ expect }) => { + // Seed a published row so the underlying mcp_call_log is definitely non-empty — + // otherwise a 0-row result could be a false positive from an empty table. + const reviewerIdentity = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enroll = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: reviewerIdentity.identity }, + }); + expect(enroll.status).toBe(200); + const seedPublish = await niceBackendFetch("/api/latest/internal/mcp-review/add-manual", { + method: "POST", + accessType: "client", + body: { question: "q", answer: "a", publish: false }, + }); + expect(seedPublish.status).toBe(200); + + const stranger = await mintIdentity(); + const result = await sqlQuery(stranger.token, "SELECT * FROM my_visible_mcp_call_log"); + expect(result.rows.length).toBe(0); + }); + + // Smoke-test every mutating reducer's token gate. The existing add_operator test + // above catches regressions on that one reducer specifically; this loop ensures + // no new mutating reducer ships without a token check. If a reducer is added, it + // must be added here too or this smoke covers one less attack surface. + it("every mutating reducer rejects calls with a wrong log token", async ({ expect }) => { + const caller = await mintIdentity(); + const victim = await mintIdentity(); + const wrong = "definitely-not-the-real-token"; + const hexId = `0x${victim.identity}`; + + const cases = [ + { name: "add_operator", args: [wrong, [hexId], "some-user", "Some Name"] }, + { name: "remove_operator", args: [wrong, [hexId]] }, + { name: "enroll_service", args: [wrong, "Some Service"] }, + { name: "mark_human_reviewed", args: [wrong, "corr", "reviewer"] }, + { name: "unmark_human_reviewed", args: [wrong, "corr"] }, + { + name: "update_human_correction", + args: [wrong, "corr", "q", "a", false, "reviewer"], + }, + { name: "add_manual_qa", args: [wrong, "q", "a", false, "reviewer"] }, + { name: "delete_qa_entry", args: [wrong, "corr"] }, + { + name: "log_mcp_call", + args: [wrong, "corr", opt(null), "tool", "reason", "prompt", "q", "r", 0, "[]", 0n, "model", opt(null)], + }, + { + name: "update_mcp_qa_review", + args: [wrong, "corr", false, false, false, "[]", "", 0, "model", opt(null), opt(null)], + }, + { + name: "log_ai_query", + args: [wrong, "corr", "chat", "sys", "q", "s", "model", false, opt(null), opt(null), "[]", "[]", "[]", "text", opt(null), opt(null), opt(null), opt(null), 0, 0n, opt(null), opt(null), opt(null)], + }, + ]; + + for (const { name, args } of cases) { + const result = await callReducer(caller.token, name, args); + expect(result.ok, `reducer ${name} should reject wrong token`).toBe(false); + expect(result.body, `reducer ${name} should report invalid-token error`).toContain("Invalid log token"); + } + }); + + it.skipIf(!logToken)( + "rejects add_operator when an existing identity is claimed under a different stackUserId", + async ({ expect }) => { + // Enroll identity X with stackUserId=A via the backend endpoint (legitimate flow). + const target = await mintIdentity(); + const callerA = await mintIdentity(); + await AiChatReviewer.createReviewer(); + const enrollA = await niceBackendFetch("/api/latest/internal/spacetimedb-enroll-reviewer", { + method: "POST", + accessType: "client", + body: { identity: target.identity }, + }); + expect(enrollA.status).toBe(200); + + // Now directly call add_operator with a DIFFERENT stackUserId for the same identity. + // Simulates an attacker with the log token trying to relabel X's row. + const result = await callReducer(callerA.token, "add_operator", [ + logToken!, + [`0x${target.identity}`], + "attacker-different-stack-user-id", + "Attacker Display Name", + ]); + + expect(result.ok).toBe(false); + expect(result.body).toContain("Identity is bound to a different Stack user"); + }, + ); +}); diff --git a/apps/internal-tool/spacetimedb/src/index.ts b/apps/internal-tool/spacetimedb/src/index.ts index f326f9e59..850dd548d 100644 --- a/apps/internal-tool/spacetimedb/src/index.ts +++ b/apps/internal-tool/spacetimedb/src/index.ts @@ -1,4 +1,5 @@ import { schema, t, table, SenderError } from 'spacetimedb/server'; +import type { Timestamp } from 'spacetimedb'; // Injected at publish time by the spacetime:inject-token pnpm script from STACK_MCP_LOG_TOKEN env var. // Must match STACK_MCP_LOG_TOKEN in the backend .env. @@ -118,11 +119,36 @@ export const myVisibleAiQueryLog = spacetimedb.view( // Public view for the /questions page — returns rows reviewers have explicitly // published. Uses `anonymousView` so SpacetimeDB materializes once and shares -// the result across all subscribers. +// the result across all subscribers. Projected to only fields the public page +// needs; everything else (reviewer attribution, QA internals, raw prompt, +// tool-call metadata) stays private. +const publishedQaRow = t.object('PublishedQaRow', { + id: t.u64(), + question: t.string(), + answer: t.string(), + publishedAt: t.timestamp().optional(), +}); + export const publishedQa = spacetimedb.anonymousView( { name: 'published_qa', public: true }, - t.array(mcpCallLog.rowType), - (ctx) => Array.from(ctx.db.mcpCallLog.publishedToQa.filter(true)), + t.array(publishedQaRow), + (ctx) => { + const out: Array<{ + id: bigint, + question: string, + answer: string, + publishedAt: Timestamp | undefined, + }> = []; + for (const row of ctx.db.mcpCallLog.publishedToQa.filter(true)) { + out.push({ + id: row.id, + question: row.humanCorrectedQuestion ?? row.question, + answer: row.humanCorrectedAnswer ?? row.response, + publishedAt: row.publishedAt, + }); + } + return out; + }, ); export const add_operator = spacetimedb.reducer( @@ -138,6 +164,9 @@ export const add_operator = spacetimedb.reducer( } const existing = ctx.db.operators.identity.find(args.identity); if (existing != null) { + if (existing.stackUserId !== args.stackUserId) { + throw new SenderError('Identity is bound to a different Stack user'); + } ctx.db.operators.identity.update({ identity: args.identity, addedAt: existing.addedAt, diff --git a/apps/internal-tool/src/app/questions/page.tsx b/apps/internal-tool/src/app/questions/page.tsx index e5a7a6307..a9b8a38ef 100644 --- a/apps/internal-tool/src/app/questions/page.tsx +++ b/apps/internal-tool/src/app/questions/page.tsx @@ -48,10 +48,10 @@ export default function QuestionsPage() {
{publishedQa.map(row => (
-

{row.humanCorrectedQuestion ?? row.question}

+

{row.question}

- {row.humanCorrectedAnswer ?? row.response} + {row.answer}
{row.publishedAt && ( diff --git a/apps/internal-tool/src/hooks/useSpacetimeDB.ts b/apps/internal-tool/src/hooks/useSpacetimeDB.ts index a25eae496..af5315a51 100644 --- a/apps/internal-tool/src/hooks/useSpacetimeDB.ts +++ b/apps/internal-tool/src/hooks/useSpacetimeDB.ts @@ -2,7 +2,7 @@ import { captureError } from "@stackframe/stack-shared/dist/utils/errors"; import { useEffect, useState, useRef } from "react"; import type { Identity } from "spacetimedb"; import { DbConnection, type ErrorContext, type EventContext, type SubscriptionEventContext } from "../module_bindings"; -import type { AiQueryLogRow, McpCallLogRow } from "../types"; +import type { AiQueryLogRow, McpCallLogRow, PublishedQaRow } from "../types"; export type EnsureEnrolled = (identity: Identity) => Promise; @@ -16,6 +16,9 @@ function resolveEnv(raw: string | undefined, devDefault: string, name: string): throw new Error(`${name} is not configured. Set it in .env.local or hosting platform env.`); } const HOST = resolveEnv(rawHost, "ws://localhost:8139", "NEXT_PUBLIC_SPACETIMEDB_HOST"); +if (!IS_DEV && !HOST.startsWith("wss://")) { + throw new Error("NEXT_PUBLIC_SPACETIMEDB_HOST must use wss:// in production"); +} const DB_NAME = resolveEnv(rawDbName, "stack-auth-llm", "NEXT_PUBLIC_SPACETIMEDB_DB_NAME"); const TOKEN_KEY = `spacetimedb_${HOST}/${DB_NAME}/auth_token`; @@ -186,14 +189,14 @@ const aiQueryBinding: TableBinding = { }, }; -const publishedQaBinding: TableBinding = { +const publishedQaBinding: TableBinding = { tableName: "published_qa", iter: (ctx) => ctx.db.publishedQa.iter(), onInsert: (conn, cb) => { - conn.db.publishedQa.onInsert((_ctx: EventContext, row: McpCallLogRow) => cb(row)); + conn.db.publishedQa.onInsert((_ctx: EventContext, row: PublishedQaRow) => cb(row)); }, onDelete: (conn, cb) => { - conn.db.publishedQa.onDelete((_ctx: EventContext, row: McpCallLogRow) => cb(row)); + conn.db.publishedQa.onDelete((_ctx: EventContext, row: PublishedQaRow) => cb(row)); }, }; diff --git a/apps/internal-tool/src/module_bindings/published_qa_table.ts b/apps/internal-tool/src/module_bindings/published_qa_table.ts index cd54c54f7..3d5f5bf7e 100644 --- a/apps/internal-tool/src/module_bindings/published_qa_table.ts +++ b/apps/internal-tool/src/module_bindings/published_qa_table.ts @@ -12,34 +12,7 @@ import { export default __t.row({ id: __t.u64(), - shard: __t.u8(), - correlationId: __t.string().name("correlation_id"), - conversationId: __t.option(__t.string()).name("conversation_id"), - createdAt: __t.timestamp().name("created_at"), - toolName: __t.string().name("tool_name"), - reason: __t.string(), - userPrompt: __t.string().name("user_prompt"), question: __t.string(), - response: __t.string(), - stepCount: __t.u32().name("step_count"), - innerToolCallsJson: __t.string().name("inner_tool_calls_json"), - durationMs: __t.u64().name("duration_ms"), - modelId: __t.string().name("model_id"), - errorMessage: __t.option(__t.string()).name("error_message"), - qaReviewedAt: __t.option(__t.timestamp()).name("qa_reviewed_at"), - qaNeedsHumanReview: __t.option(__t.bool()).name("qa_needs_human_review"), - qaAnswerCorrect: __t.option(__t.bool()).name("qa_answer_correct"), - qaAnswerRelevant: __t.option(__t.bool()).name("qa_answer_relevant"), - qaFlagsJson: __t.option(__t.string()).name("qa_flags_json"), - qaImprovementSuggestions: __t.option(__t.string()).name("qa_improvement_suggestions"), - qaOverallScore: __t.option(__t.u32()).name("qa_overall_score"), - qaReviewModelId: __t.option(__t.string()).name("qa_review_model_id"), - qaConversationJson: __t.option(__t.string()).name("qa_conversation_json"), - qaErrorMessage: __t.option(__t.string()).name("qa_error_message"), - humanReviewedAt: __t.option(__t.timestamp()).name("human_reviewed_at"), - humanReviewedBy: __t.option(__t.string()).name("human_reviewed_by"), - humanCorrectedQuestion: __t.option(__t.string()).name("human_corrected_question"), - humanCorrectedAnswer: __t.option(__t.string()).name("human_corrected_answer"), - publishedToQa: __t.bool().name("published_to_qa"), + answer: __t.string(), publishedAt: __t.option(__t.timestamp()).name("published_at"), }); diff --git a/apps/internal-tool/src/module_bindings/types.ts b/apps/internal-tool/src/module_bindings/types.ts index b37d5ae23..439f15226 100644 --- a/apps/internal-tool/src/module_bindings/types.ts +++ b/apps/internal-tool/src/module_bindings/types.ts @@ -88,6 +88,11 @@ export const Operators = __t.object("Operators", { }); export type Operators = __Infer; -export const PublishedQa = __t.object("PublishedQa", {}); +export const PublishedQa = __t.object("PublishedQa", { + id: __t.u64(), + question: __t.string(), + answer: __t.string(), + publishedAt: __t.option(__t.timestamp()), +}); export type PublishedQa = __Infer; diff --git a/apps/internal-tool/src/types.ts b/apps/internal-tool/src/types.ts index 6e37c4864..a22b9a58e 100644 --- a/apps/internal-tool/src/types.ts +++ b/apps/internal-tool/src/types.ts @@ -1 +1 @@ -export type { AiQueryLog as AiQueryLogRow, McpCallLog as McpCallLogRow } from "./module_bindings/types"; +export type { AiQueryLog as AiQueryLogRow, McpCallLog as McpCallLogRow, PublishedQa as PublishedQaRow } from "./module_bindings/types";