Ask AI Huge Response (#1328)

This PR fixes the bug where analytics tool returns a lot of rows, which
results in huge token count. We do it by checking the number of
characters in the tool call, and if it is more than 50000 characters, we
send an error message rather than the rows and ask the ai to make more
focused queries.


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

* **New Features**
* AI assistant shows friendlier, categorized error messages and captures
unexpected errors for diagnosis.
  * UI now displays classifier-derived, user-friendly AI error text.

* **Bug Fixes & Improvements**
* Enforced a hard size budget for SQL query results and gracefully
handles oversized responses.
* Centralized safer database error messaging to avoid leaking internal
details.
* Strengthened AI guidance to prefer narrower queries, safer column
selection, and pairing GROUP BY with ORDER BY + LIMIT.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
This commit is contained in:
aadesh18 2026-04-13 15:12:07 -07:00 committed by GitHub
parent c46767f4e8
commit 5573927429
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 155 additions and 67 deletions

View File

@ -1,9 +1,9 @@
import { getClickhouseExternalClient } from "@/lib/clickhouse";
import { getSafeClickhouseErrorMessage } from "@/lib/clickhouse-errors";
import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler";
import { KnownErrors } from "@stackframe/stack-shared";
import { adaptSchema, adminAuthTypeSchema, jsonSchema, yupBoolean, yupMixed, yupNumber, yupObject, yupRecord, yupString } from "@stackframe/stack-shared/dist/schema-fields";
import { getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
import { Result } from "@stackframe/stack-shared/dist/utils/results";
import { randomUUID } from "crypto";
@ -72,45 +72,5 @@ export const POST = createSmartRouteHandler({
},
});
const SAFE_CLICKHOUSE_ERROR_CODES = [
62, // SYNTAX_ERROR
159, // TIMEOUT_EXCEEDED
164, // READONLY
158, // TOO_MANY_ROWS
396, // TOO_MANY_ROWS_OR_BYTES
636, // CANNOT_EXTRACT_TABLE_STRUCTURE
];
const UNSAFE_CLICKHOUSE_ERROR_CODES = [
36, // BAD_ARGUMENTS
43, // ILLEGAL_TYPE_OF_ARGUMENT
47, // UNKNOWN_IDENTIFIER
60, // UNKNOWN_TABLE
497, // ACCESS_DENIED
];
const DEFAULT_CLICKHOUSE_ERROR_MESSAGE = "Error during execution of this query.";
const MAX_RESULT_ROWS = 10_000;
const MAX_RESULT_BYTES = 10 * 1024 * 1024;
function getSafeClickhouseErrorMessage(error: unknown, query: string) {
if (typeof error !== "object" || error === null || !("code" in error) || typeof error.code !== "string" || isNaN(Number(error.code)) || !("message" in error) || typeof error.message !== "string") {
captureError("unknown-clickhouse-error-for-query-not-clickhouse-error", new StackAssertionError("Unknown error from Clickhouse is not a Clickhouse error", { cause: error, query: query }));
return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
}
const errorCode = Number(error.code);
const message = error.message;
if (SAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode)) {
return message;
}
const isKnown = UNSAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode);
if (!isKnown) {
captureError("unknown-clickhouse-error-for-query", new StackAssertionError(`Unknown Clickhouse error: code ${errorCode} not in safe or unsafe codes`, { cause: error, query: query }));
}
if (getNodeEnvironment() === "development" || getNodeEnvironment() === "test") {
return `${DEFAULT_CLICKHOUSE_ERROR_MESSAGE}${!isKnown ? "\n\nThis error is not known and you should probably add it to the safe or unsafe codes in analytics/query/route.ts." : ""}\n\nAs you are in development mode, you can see the full error: ${errorCode} ${message}`;
}
return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
}

View File

@ -1,3 +1,5 @@
import { SQL_QUERY_RESULT_MAX_CHARS } from "@/lib/ai/tools/sql-query";
/**
* Base prompt for all Stack Auth AI interactions.
* Contains global guidelines and core knowledge about Stack Auth.
@ -102,6 +104,44 @@ SQL QUERY GUIDELINES:
- Recent signups: SELECT * FROM users ORDER BY signed_up_at DESC LIMIT 10
- Events today: SELECT COUNT(*) FROM events WHERE toDate(event_at) = today()
- Event types: SELECT event_type, COUNT(*) as count FROM events GROUP BY event_type ORDER BY count DESC LIMIT 10
TOOL RESULT BUDGET (HARD LIMIT):
- The queryAnalytics tool returns { success: false } if the result JSON exceeds ${SQL_QUERY_RESULT_MAX_CHARS.toLocaleString()} characters.
NO ROWS reach you in that case you get { success: false, error, rowCount, characters, columnsReturned }
and you MUST re-query with a more specific SQL statement.
- The events.data JSON blob typically triples per-row cost. Never SELECT * on events unless you have
a very small LIMIT and truly need every column.
PREFER AGGREGATION OVER RAW ROWS:
For "how many", "top N", "distribution", "unique count", "average", "over time" questions,
push the math into SQL using ClickHouse functions. Examples:
Count: SELECT COUNT(*) FROM events WHERE event_type='$token-refresh' AND event_at >= today()
Distinct count: SELECT uniqExact(user_id) FROM events WHERE event_at >= today() - INTERVAL 7 DAY
Top N: SELECT user_id, COUNT(*) AS c FROM events GROUP BY user_id ORDER BY c DESC LIMIT 10
Quantiles: SELECT quantile(0.5)(c), quantile(0.95)(c) FROM (SELECT user_id, COUNT(*) AS c FROM events GROUP BY user_id)
Time bucketing: SELECT toStartOfHour(event_at) AS bucket, COUNT(*) AS c FROM events
WHERE event_at >= now() - INTERVAL 1 DAY GROUP BY bucket ORDER BY bucket
JSON key discovery: SELECT arrayJoin(JSONExtractKeys(data)) AS k, COUNT(*) AS c FROM events
GROUP BY k ORDER BY c DESC LIMIT 20
Multi-metric: SELECT COUNT(*), uniqExact(user_id), min(event_at), max(event_at)
FROM events WHERE event_type='$token-refresh'
WHEN INDIVIDUAL ROWS MATTER (user explicitly asked to see records):
- ALWAYS use LIMIT <= 50.
- ALWAYS specify the exact columns you need never SELECT * on events.
- Drop the 'data' column unless the user specifically asked about event payloads.
GROUP BY REQUIRES ORDER BY + LIMIT unless you expect <= 50 groups, otherwise the result may
exceed the ${SQL_QUERY_RESULT_MAX_CHARS.toLocaleString()}-character budget and fail.
HANDLING { success: false } ERRORS:
When the tool returns success:false with "Result too large":
1. Read rowCount if it's large (>100), switch to aggregation (COUNT, uniqExact, GROUP BY...).
2. Read columnsReturned if it includes 'data', re-query without it.
3. Re-query with a narrower WHERE clause or a smaller LIMIT.
4. Do NOT present the error to the user fix the query and try again.
5. Do NOT claim you saw rows that you didn't the error response contains no row data.
`,
"docs-ask-ai": `
# Stack Auth AI Assistant System Prompt

View File

@ -1,8 +1,12 @@
import { getClickhouseExternalClient } from "@/lib/clickhouse";
import { getSafeClickhouseErrorMessage } from "@/lib/clickhouse-errors";
import { SmartRequestAuth } from "@/route-handlers/smart-request";
import { ClickHouseError } from "@clickhouse/client";
import { tool } from "ai";
import { z } from "zod";
export const SQL_QUERY_RESULT_MAX_CHARS = 50_000;
export function createSqlQueryTool(auth: SmartRequestAuth | null, targetProjectId?: string | null) {
if (auth == null) {
// Return null or throw - analytics queries require authentication
@ -21,32 +25,48 @@ export function createSqlQueryTool(auth: SmartRequestAuth | null, targetProjectI
}),
execute: async ({ query }: { query: string }) => {
const client = getClickhouseExternalClient();
return await client.query({
query,
clickhouse_settings: {
SQL_project_id: projectId,
SQL_branch_id: branchId,
max_execution_time: 5,
readonly: "1",
allow_ddl: 0,
max_result_rows: "10000",
max_result_bytes: (10 * 1024 * 1024).toString(),
result_overflow_mode: "throw",
},
format: "JSONEachRow",
})
.then(async (resultSet) => {
const rows = await resultSet.json<Record<string, unknown>[]>();
try {
const resultSet = await client.query({
query,
clickhouse_settings: {
SQL_project_id: projectId,
SQL_branch_id: branchId,
max_execution_time: 5,
readonly: "1",
allow_ddl: 0,
max_result_rows: "10000",
max_result_bytes: (10 * 1024 * 1024).toString(),
result_overflow_mode: "throw",
},
format: "JSONEachRow",
});
const rows = await resultSet.json<Record<string, unknown>[]>();
const response = { success: true as const, rowCount: rows.length, result: rows };
const serialized = JSON.stringify(response);
if (serialized.length > SQL_QUERY_RESULT_MAX_CHARS) {
return {
success: true as const,
success: false as const,
error:
`Result too large: ${rows.length} rows, ${serialized.length} characters (limit ${SQL_QUERY_RESULT_MAX_CHARS}). ` +
`To fix: ` +
`(1) Use aggregation (COUNT, uniqExact, GROUP BY, topK, quantile) instead of fetching rows. ` +
`(2) If you need rows, add a WHERE clause or reduce LIMIT. ` +
`(3) Select only the columns you need — avoid the 'data' column on events unless essential.`,
rowCount: rows.length,
result: rows,
characters: serialized.length,
columnsReturned: rows.length > 0 ? Object.keys(rows[0]) : [],
};
})
.catch((error: unknown) => ({
}
return response;
} catch (error) {
if (!(error instanceof ClickHouseError)) {
throw error;
}
return {
success: false as const,
error: error instanceof Error ? error.message : "Query failed",
}));
error: getSafeClickhouseErrorMessage(error, query),
};
}
},
});
}

View File

@ -0,0 +1,43 @@
import { getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
const SAFE_CLICKHOUSE_ERROR_CODES = [
62, // SYNTAX_ERROR
159, // TIMEOUT_EXCEEDED
164, // READONLY
158, // TOO_MANY_ROWS
396, // TOO_MANY_ROWS_OR_BYTES
636, // CANNOT_EXTRACT_TABLE_STRUCTURE
];
const UNSAFE_CLICKHOUSE_ERROR_CODES = [
36, // BAD_ARGUMENTS
43, // ILLEGAL_TYPE_OF_ARGUMENT
47, // UNKNOWN_IDENTIFIER
60, // UNKNOWN_TABLE
497, // ACCESS_DENIED
];
const DEFAULT_CLICKHOUSE_ERROR_MESSAGE = "Error during execution of this query.";
export function getSafeClickhouseErrorMessage(error: unknown, query: string) {
if (typeof error !== "object" || error === null || !("code" in error) || typeof error.code !== "string" || isNaN(Number(error.code)) || !("message" in error) || typeof error.message !== "string") {
captureError("unknown-clickhouse-error-for-query-not-clickhouse-error", new StackAssertionError("Unknown error from Clickhouse is not a Clickhouse error", { cause: error, query: query }));
return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
}
const errorCode = Number(error.code);
const message = error.message;
if (SAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode)) {
return message;
}
const isKnown = UNSAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode);
if (!isKnown) {
captureError("unknown-clickhouse-error-for-query", new StackAssertionError(`Unknown Clickhouse error: code ${errorCode} not in safe or unsafe codes`, { cause: error, query: query }));
}
if (getNodeEnvironment() === "development" || getNodeEnvironment() === "test") {
return `${DEFAULT_CLICKHOUSE_ERROR_MESSAGE}${!isKnown ? "\n\nThis error is not known and you should probably add it to the safe or unsafe codes in clickhouse-errors.ts." : ""}\n\nAs you are in development mode, you can see the full error: ${errorCode} ${message}`;
}
return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
}

View File

@ -3,13 +3,14 @@ import { buildStackAuthHeaders, type CurrentUser } from "@/lib/api-headers";
import { getPublicEnvVar } from "@/lib/env";
import type { UIMessage } from "@ai-sdk/react";
import { ArrowSquareOutIcon, CaretDownIcon, CheckIcon, CopyIcon, DatabaseIcon, SparkleIcon, SpinnerGapIcon, UserIcon } from "@phosphor-icons/react";
import { throwErr } from "@stackframe/stack-shared/dist/utils/errors";
import { captureError, throwErr } from "@stackframe/stack-shared/dist/utils/errors";
import { runAsynchronously } from "@stackframe/stack-shared/dist/utils/promises";
import { convertToModelMessages, DefaultChatTransport } from "ai";
import { memo, useCallback, useEffect, useRef, useState } from "react";
import ReactMarkdown from "react-markdown";
import remarkGfm from "remark-gfm";
export function createAskAiTransport({
currentUser,
projectId,
@ -532,3 +533,26 @@ export function useWordStreaming(content: string) {
isRevealing: displayedWordCount < targetWordCount,
};
}
// Classifies raw AI provider errors into user-friendly messages.
// The raw error is captured to Sentry separately via captureError — never shown to the user.
export function getFriendlyAiErrorMessage(error: Error): string {
const causeMessage = (error as { cause?: { message?: string } }).cause?.message ?? "";
const blob = `${error.message} ${causeMessage}`;
if (/maximum context length|context_length_exceeded|too many tokens|context length/i.test(blob)) {
return "The conversation got too long. Try starting a new chat or asking a more focused question.";
}
if (/rate limit|429|quota|too many requests/i.test(blob)) {
return "Service is busy. Please try again in a moment.";
}
if (/timeout|ECONNRESET|fetch failed|network/i.test(blob)) {
return "Request timed out. Please try again.";
}
if (/result too large|limit \d+/i.test(blob)) {
return "The query returned too much data. Try narrowing your question or requesting fewer rows.";
}
// Unclassified — this is unexpected, report it
captureError("ask-ai", error);
return "Something went wrong. Please try again.";
}

View File

@ -10,10 +10,11 @@ import { CmdKPreviewProps } from "../cmdk-commands";
import {
AssistantMessage,
createAskAiTransport,
getFriendlyAiErrorMessage,
getMessageContent,
getToolInvocations,
UserMessage,
useWordStreaming,
useWordStreaming
} from "./ai-chat-shared";
@ -216,7 +217,7 @@ const AIChatPreviewInner = memo(function AIChatPreview({
{aiError && (
<div className="flex items-start gap-2 text-[12px] text-red-400/90 px-3 py-2 bg-red-500/[0.08] rounded-lg ring-1 ring-red-500/20">
<span className="shrink-0 mt-0.5"></span>
<span>{aiError.message || "Failed to get response. Please try again."}</span>
<span>{getFriendlyAiErrorMessage(aiError)}</span>
</div>
)}
</div>