Ask AI Huge Response (#1328)

This PR fixes the bug where analytics tool returns a lot of rows, which results in huge token count. We do it by checking the number of characters in the tool call, and if it is more than 50000 characters, we send an error message rather than the rows and ask the ai to make more focused queries.  ## Summary by CodeRabbit * **New Features** * AI assistant shows friendlier, categorized error messages and captures unexpected errors for diagnosis. * UI now displays classifier-derived, user-friendly AI error text. * **Bug Fixes & Improvements** * Enforced a hard size budget for SQL query results and gracefully handles oversized responses. * Centralized safer database error messaging to avoid leaking internal details. * Strengthened AI guidance to prefer narrower queries, safer column selection, and pairing GROUP BY with ORDER BY + LIMIT.
2026-07-20 21:29:36 +08:00 · 2026-04-13 15:12:07 -07:00 · 2026-04-13 15:12:07 -07:00 · 5573927429
commit 5573927429
parent c46767f4e8
6 changed files with 155 additions and 67 deletions
--- a/apps/backend/src/app/api/latest/internal/analytics/query/route.ts
+++ b/apps/backend/src/app/api/latest/internal/analytics/query/route.ts
@ -1,9 +1,9 @@
 import { getClickhouseExternalClient } from "@/lib/clickhouse";
+import { getSafeClickhouseErrorMessage } from "@/lib/clickhouse-errors";
 import { createSmartRouteHandler } from "@/route-handlers/smart-route-handler";
 import { KnownErrors } from "@stackframe/stack-shared";
 import { adaptSchema, adminAuthTypeSchema, jsonSchema, yupBoolean, yupMixed, yupNumber, yupObject, yupRecord, yupString } from "@stackframe/stack-shared/dist/schema-fields";
-import { getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
-import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
+import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
 import { Result } from "@stackframe/stack-shared/dist/utils/results";
 import { randomUUID } from "crypto";

@ -72,45 +72,5 @@ export const POST = createSmartRouteHandler({
  },
 });

-const SAFE_CLICKHOUSE_ERROR_CODES = [
-  62, // SYNTAX_ERROR
-  159, // TIMEOUT_EXCEEDED
-  164, // READONLY
-  158, // TOO_MANY_ROWS
-  396, // TOO_MANY_ROWS_OR_BYTES
-  636, // CANNOT_EXTRACT_TABLE_STRUCTURE
-];
-
-const UNSAFE_CLICKHOUSE_ERROR_CODES = [
-  36, // BAD_ARGUMENTS
-  43, // ILLEGAL_TYPE_OF_ARGUMENT
-  47, // UNKNOWN_IDENTIFIER
-  60, // UNKNOWN_TABLE
-  497, // ACCESS_DENIED
-];
-
-const DEFAULT_CLICKHOUSE_ERROR_MESSAGE = "Error during execution of this query.";
 const MAX_RESULT_ROWS = 10_000;
 const MAX_RESULT_BYTES = 10 * 1024 * 1024;
-
-function getSafeClickhouseErrorMessage(error: unknown, query: string) {
-  if (typeof error !== "object" || error === null || !("code" in error) || typeof error.code !== "string" || isNaN(Number(error.code)) || !("message" in error) || typeof error.message !== "string") {
-    captureError("unknown-clickhouse-error-for-query-not-clickhouse-error", new StackAssertionError("Unknown error from Clickhouse is not a Clickhouse error", { cause: error, query: query }));
-    return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
-  }
-
-  const errorCode = Number(error.code);
-  const message = error.message;
-  if (SAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode)) {
-    return message;
-  }
-  const isKnown = UNSAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode);
-  if (!isKnown) {
-    captureError("unknown-clickhouse-error-for-query", new StackAssertionError(`Unknown Clickhouse error: code ${errorCode} not in safe or unsafe codes`, { cause: error, query: query }));
-  }
-
-  if (getNodeEnvironment() === "development" || getNodeEnvironment() === "test") {
-    return `${DEFAULT_CLICKHOUSE_ERROR_MESSAGE}${!isKnown ? "\n\nThis error is not known and you should probably add it to the safe or unsafe codes in analytics/query/route.ts." : ""}\n\nAs you are in development mode, you can see the full error: ${errorCode} ${message}`;
-  }
-  return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
-}
--- a/apps/backend/src/lib/ai/prompts.ts
+++ b/apps/backend/src/lib/ai/prompts.ts
@ -1,3 +1,5 @@
+import { SQL_QUERY_RESULT_MAX_CHARS } from "@/lib/ai/tools/sql-query";
+
 /**
 * Base prompt for all Stack Auth AI interactions.
 * Contains global guidelines and core knowledge about Stack Auth.
@ -102,6 +104,44 @@ SQL QUERY GUIDELINES:
  - Recent signups: SELECT * FROM users ORDER BY signed_up_at DESC LIMIT 10
  - Events today: SELECT COUNT(*) FROM events WHERE toDate(event_at) = today()
  - Event types: SELECT event_type, COUNT(*) as count FROM events GROUP BY event_type ORDER BY count DESC LIMIT 10
+
+TOOL RESULT BUDGET (HARD LIMIT):
+- The queryAnalytics tool returns { success: false } if the result JSON exceeds ${SQL_QUERY_RESULT_MAX_CHARS.toLocaleString()} characters.
+  NO ROWS reach you in that case — you get { success: false, error, rowCount, characters, columnsReturned }
+  and you MUST re-query with a more specific SQL statement.
+- The events.data JSON blob typically triples per-row cost. Never SELECT * on events unless you have
+  a very small LIMIT and truly need every column.
+
+PREFER AGGREGATION OVER RAW ROWS:
+For "how many", "top N", "distribution", "unique count", "average", "over time" questions,
+push the math into SQL using ClickHouse functions. Examples:
+
+  Count:              SELECT COUNT(*) FROM events WHERE event_type='$token-refresh' AND event_at >= today()
+  Distinct count:     SELECT uniqExact(user_id) FROM events WHERE event_at >= today() - INTERVAL 7 DAY
+  Top N:              SELECT user_id, COUNT(*) AS c FROM events GROUP BY user_id ORDER BY c DESC LIMIT 10
+  Quantiles:          SELECT quantile(0.5)(c), quantile(0.95)(c) FROM (SELECT user_id, COUNT(*) AS c FROM events GROUP BY user_id)
+  Time bucketing:     SELECT toStartOfHour(event_at) AS bucket, COUNT(*) AS c FROM events
+                      WHERE event_at >= now() - INTERVAL 1 DAY GROUP BY bucket ORDER BY bucket
+  JSON key discovery: SELECT arrayJoin(JSONExtractKeys(data)) AS k, COUNT(*) AS c FROM events
+                      GROUP BY k ORDER BY c DESC LIMIT 20
+  Multi-metric:       SELECT COUNT(*), uniqExact(user_id), min(event_at), max(event_at)
+                      FROM events WHERE event_type='$token-refresh'
+
+WHEN INDIVIDUAL ROWS MATTER (user explicitly asked to see records):
+- ALWAYS use LIMIT <= 50.
+- ALWAYS specify the exact columns you need — never SELECT * on events.
+- Drop the 'data' column unless the user specifically asked about event payloads.
+
+GROUP BY REQUIRES ORDER BY + LIMIT unless you expect <= 50 groups, otherwise the result may
+exceed the ${SQL_QUERY_RESULT_MAX_CHARS.toLocaleString()}-character budget and fail.
+
+HANDLING { success: false } ERRORS:
+When the tool returns success:false with "Result too large":
+1. Read rowCount — if it's large (>100), switch to aggregation (COUNT, uniqExact, GROUP BY...).
+2. Read columnsReturned — if it includes 'data', re-query without it.
+3. Re-query with a narrower WHERE clause or a smaller LIMIT.
+4. Do NOT present the error to the user — fix the query and try again.
+5. Do NOT claim you saw rows that you didn't — the error response contains no row data.
 `,
  "docs-ask-ai": `
  # Stack Auth AI Assistant System Prompt
--- a/apps/backend/src/lib/ai/tools/sql-query.ts
+++ b/apps/backend/src/lib/ai/tools/sql-query.ts
@ -1,8 +1,12 @@
 import { getClickhouseExternalClient } from "@/lib/clickhouse";
+import { getSafeClickhouseErrorMessage } from "@/lib/clickhouse-errors";
 import { SmartRequestAuth } from "@/route-handlers/smart-request";
+import { ClickHouseError } from "@clickhouse/client";
 import { tool } from "ai";
 import { z } from "zod";

+export const SQL_QUERY_RESULT_MAX_CHARS = 50_000;
+
 export function createSqlQueryTool(auth: SmartRequestAuth | null, targetProjectId?: string | null) {
  if (auth == null) {
    // Return null or throw - analytics queries require authentication
@ -21,32 +25,48 @@ export function createSqlQueryTool(auth: SmartRequestAuth | null, targetProjectI
    }),
    execute: async ({ query }: { query: string }) => {
      const client = getClickhouseExternalClient();
-      return await client.query({
-        query,
-        clickhouse_settings: {
-          SQL_project_id: projectId,
-          SQL_branch_id: branchId,
-          max_execution_time: 5,
-          readonly: "1",
-          allow_ddl: 0,
-          max_result_rows: "10000",
-          max_result_bytes: (10 * 1024 * 1024).toString(),
-          result_overflow_mode: "throw",
-        },
-        format: "JSONEachRow",
-      })
-        .then(async (resultSet) => {
-          const rows = await resultSet.json<Record<string, unknown>[]>();
+      try {
+        const resultSet = await client.query({
+          query,
+          clickhouse_settings: {
+            SQL_project_id: projectId,
+            SQL_branch_id: branchId,
+            max_execution_time: 5,
+            readonly: "1",
+            allow_ddl: 0,
+            max_result_rows: "10000",
+            max_result_bytes: (10 * 1024 * 1024).toString(),
+            result_overflow_mode: "throw",
+          },
+          format: "JSONEachRow",
+        });
+        const rows = await resultSet.json<Record<string, unknown>[]>();
+        const response = { success: true as const, rowCount: rows.length, result: rows };
+        const serialized = JSON.stringify(response);
+        if (serialized.length > SQL_QUERY_RESULT_MAX_CHARS) {
          return {
-            success: true as const,
+            success: false as const,
+            error:
+              `Result too large: ${rows.length} rows, ${serialized.length} characters (limit ${SQL_QUERY_RESULT_MAX_CHARS}). ` +
+              `To fix: ` +
+              `(1) Use aggregation (COUNT, uniqExact, GROUP BY, topK, quantile) instead of fetching rows. ` +
+              `(2) If you need rows, add a WHERE clause or reduce LIMIT. ` +
+              `(3) Select only the columns you need — avoid the 'data' column on events unless essential.`,
            rowCount: rows.length,
-            result: rows,
+            characters: serialized.length,
+            columnsReturned: rows.length > 0 ? Object.keys(rows[0]) : [],
          };
-        })
-        .catch((error: unknown) => ({
+        }
+        return response;
+      } catch (error) {
+        if (!(error instanceof ClickHouseError)) {
+          throw error;
+        }
+        return {
          success: false as const,
-          error: error instanceof Error ? error.message : "Query failed",
-        }));
+          error: getSafeClickhouseErrorMessage(error, query),
+        };
+      }
    },
  });
 }
--- a/apps/backend/src/lib/clickhouse-errors.ts
+++ b/apps/backend/src/lib/clickhouse-errors.ts
@ -0,0 +1,43 @@
+import { getNodeEnvironment } from "@stackframe/stack-shared/dist/utils/env";
+import { captureError, StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
+
+const SAFE_CLICKHOUSE_ERROR_CODES = [
+  62, // SYNTAX_ERROR
+  159, // TIMEOUT_EXCEEDED
+  164, // READONLY
+  158, // TOO_MANY_ROWS
+  396, // TOO_MANY_ROWS_OR_BYTES
+  636, // CANNOT_EXTRACT_TABLE_STRUCTURE
+];
+
+const UNSAFE_CLICKHOUSE_ERROR_CODES = [
+  36, // BAD_ARGUMENTS
+  43, // ILLEGAL_TYPE_OF_ARGUMENT
+  47, // UNKNOWN_IDENTIFIER
+  60, // UNKNOWN_TABLE
+  497, // ACCESS_DENIED
+];
+
+const DEFAULT_CLICKHOUSE_ERROR_MESSAGE = "Error during execution of this query.";
+
+export function getSafeClickhouseErrorMessage(error: unknown, query: string) {
+  if (typeof error !== "object" || error === null || !("code" in error) || typeof error.code !== "string" || isNaN(Number(error.code)) || !("message" in error) || typeof error.message !== "string") {
+    captureError("unknown-clickhouse-error-for-query-not-clickhouse-error", new StackAssertionError("Unknown error from Clickhouse is not a Clickhouse error", { cause: error, query: query }));
+    return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
+  }
+
+  const errorCode = Number(error.code);
+  const message = error.message;
+  if (SAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode)) {
+    return message;
+  }
+  const isKnown = UNSAFE_CLICKHOUSE_ERROR_CODES.includes(errorCode);
+  if (!isKnown) {
+    captureError("unknown-clickhouse-error-for-query", new StackAssertionError(`Unknown Clickhouse error: code ${errorCode} not in safe or unsafe codes`, { cause: error, query: query }));
+  }
+
+  if (getNodeEnvironment() === "development" || getNodeEnvironment() === "test") {
+    return `${DEFAULT_CLICKHOUSE_ERROR_MESSAGE}${!isKnown ? "\n\nThis error is not known and you should probably add it to the safe or unsafe codes in clickhouse-errors.ts." : ""}\n\nAs you are in development mode, you can see the full error: ${errorCode} ${message}`;
+  }
+  return DEFAULT_CLICKHOUSE_ERROR_MESSAGE;
+}
--- a/apps/dashboard/src/components/commands/ai-chat-shared.tsx
+++ b/apps/dashboard/src/components/commands/ai-chat-shared.tsx
@ -3,13 +3,14 @@ import { buildStackAuthHeaders, type CurrentUser } from "@/lib/api-headers";
 import { getPublicEnvVar } from "@/lib/env";
 import type { UIMessage } from "@ai-sdk/react";
 import { ArrowSquareOutIcon, CaretDownIcon, CheckIcon, CopyIcon, DatabaseIcon, SparkleIcon, SpinnerGapIcon, UserIcon } from "@phosphor-icons/react";
-import { throwErr } from "@stackframe/stack-shared/dist/utils/errors";
+import { captureError, throwErr } from "@stackframe/stack-shared/dist/utils/errors";
 import { runAsynchronously } from "@stackframe/stack-shared/dist/utils/promises";
 import { convertToModelMessages, DefaultChatTransport } from "ai";
 import { memo, useCallback, useEffect, useRef, useState } from "react";
 import ReactMarkdown from "react-markdown";
 import remarkGfm from "remark-gfm";

+
 export function createAskAiTransport({
  currentUser,
  projectId,
@ -532,3 +533,26 @@ export function useWordStreaming(content: string) {
    isRevealing: displayedWordCount < targetWordCount,
  };
 }
+
+
+// Classifies raw AI provider errors into user-friendly messages.
+// The raw error is captured to Sentry separately via captureError — never shown to the user.
+export function getFriendlyAiErrorMessage(error: Error): string {
+  const causeMessage = (error as { cause?: { message?: string } }).cause?.message ?? "";
+  const blob = `${error.message} ${causeMessage}`;
+  if (/maximum context length|context_length_exceeded|too many tokens|context length/i.test(blob)) {
+    return "The conversation got too long. Try starting a new chat or asking a more focused question.";
+  }
+  if (/rate limit|429|quota|too many requests/i.test(blob)) {
+    return "Service is busy. Please try again in a moment.";
+  }
+  if (/timeout|ECONNRESET|fetch failed|network/i.test(blob)) {
+    return "Request timed out. Please try again.";
+  }
+  if (/result too large|limit \d+/i.test(blob)) {
+    return "The query returned too much data. Try narrowing your question or requesting fewer rows.";
+  }
+  // Unclassified — this is unexpected, report it
+  captureError("ask-ai", error);
+  return "Something went wrong. Please try again.";
+}
--- a/apps/dashboard/src/components/commands/ask-ai.tsx
+++ b/apps/dashboard/src/components/commands/ask-ai.tsx
@ -10,10 +10,11 @@ import { CmdKPreviewProps } from "../cmdk-commands";
 import {
  AssistantMessage,
  createAskAiTransport,
+  getFriendlyAiErrorMessage,
  getMessageContent,
  getToolInvocations,
  UserMessage,
-  useWordStreaming,
+  useWordStreaming
 } from "./ai-chat-shared";


@ -216,7 +217,7 @@ const AIChatPreviewInner = memo(function AIChatPreview({
        {aiError && (
          <div className="flex items-start gap-2 text-[12px] text-red-400/90 px-3 py-2 bg-red-500/[0.08] rounded-lg ring-1 ring-red-500/20">
            <span className="shrink-0 mt-0.5">⚠</span>
-            <span>{aiError.message || "Failed to get response. Please try again."}</span>
+            <span>{getFriendlyAiErrorMessage(aiError)}</span>
          </div>
        )}
      </div>