Address Greptile review: escape agent prompt, fix result detection, roll back all agent-written files, fix abort masking

Co-Authored-By: mantra <mantra@stack-auth.com>
2026-06-04 21:04:37 +08:00 · 2026-06-02 20:26:58 +00:00 · 2026-06-02 20:26:58 +00:00 · 14bd4254fc
commit 14bd4254fc
parent e3c7065c4f
3 changed files with 136 additions and 33 deletions
--- a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts
+++ b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts
@ -1,15 +1,16 @@
-import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
+import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
 import { join } from "path";
 import { afterEach, describe, expect, it, vi } from "vitest";

 // Lets each AI-path test inject what the (otherwise network-backed) agent does
 // to the files on disk, so we can exercise the orchestration and validation
 // around `updateConfigObject` without calling a real model.
-let mockAgentImpl: ((options: { prompt: string, cwd: string }) => void | Promise<void>) | null = null;
+type MockAgentOptions = { prompt: string, cwd: string, onFileWillChange?: (filePath: string) => void };
+let mockAgentImpl: ((options: MockAgentOptions) => void | Promise<void>) | null = null;

 vi.mock("server-only", () => ({}));
 vi.mock("./config-update-agent", () => ({
-  runConfigUpdateAgent: async (options: { prompt: string, cwd: string }) => {
+  runConfigUpdateAgent: async (options: MockAgentOptions) => {
    if (mockAgentImpl == null) {
      throw new Error("runConfigUpdateAgent was called but no mock implementation was set for this test.");
    }
@ -19,9 +20,20 @@ vi.mock("./config-update-agent", () => ({

 let tempDir: string | undefined;

+function getTempDir(): string {
+  if (tempDir == null) {
+    tempDir = mkdtempSync(join(process.cwd(), ".stack-rde-config-test-"));
+    // Give the temp dir its own package.json so SDK-package detection (which
+    // walks up to the nearest package.json) resolves deterministically here
+    // instead of picking up the dashboard's own dependencies, which would make
+    // the rendered `StackConfig` import env-dependent.
+    writeFileSync(join(tempDir, "package.json"), JSON.stringify({ name: "stack-rde-config-test" }), "utf-8");
+  }
+  return tempDir;
+}
+
 function writeTempFile(name: string, content: string): string {
-  tempDir ??= mkdtempSync(join(process.cwd(), ".stack-rde-config-test-"));
-  const filePath = join(tempDir, name);
+  const filePath = join(getTempDir(), name);
  writeFileSync(filePath, content, "utf-8");
  return filePath;
 }
@ -331,6 +343,32 @@ describe("remote development environment config file", () => {
    expect(readFileSync(templatePath, "utf-8")).toBe("export default <div>Old email</div>;\n");
  });

+  it("rolls back a brand-new file the agent creates outside the statically-imported set", async () => {
+    // A wrapped config takes the agent path (it isn't a plain static literal),
+    // so the agent mock actually runs.
+    const configSource = `import { defineStackConfig } from "@hexclave/shared/config";\nexport const config = defineStackConfig({ auth: { allowSignUp: true } });\n`;
+    const configPath = writeTempConfig(configSource);
+    const newFilePath = join(getTempDir(), "generated-extra.ts");
+
+    const { updateConfigObject } = await import("./config-file");
+
+    // The agent creates a file that the config doesn't statically import, so the
+    // only way it can be rolled back is via the `onFileWillChange` hook firing
+    // before the write. After creating it, the agent fails so the run must roll
+    // back — deleting the newly-created file.
+    mockAgentImpl = (options) => {
+      options.onFileWillChange?.(newFilePath);
+      writeFileSync(newFilePath, "export const extra = true;\n", "utf-8");
+      throw new Error("agent blew up");
+    };
+
+    await expect(updateConfigObject(configPath, { "auth.allowSignUp": false }))
+      .rejects.toThrow("agent blew up");
+
+    expect(existsSync(newFilePath)).toBe(false);
+    expect(readFileSync(configPath, "utf-8")).toBe(configSource);
+  });
+
  it("fails a non-evaluable update when the agent leaves every file unchanged", async () => {
    const templatePath = writeTempFile("welcome-email.tsx", "export default <div>Old email</div>;\n");
    const configSource = `import welcomeEmail from "./welcome-email.tsx" with { type: "text" };\n\nexport const config = {\n  emails: { templates: { welcome: welcomeEmail } },\n};\n`;
--- a/apps/dashboard/src/lib/remote-development-environment/config-file.ts
+++ b/apps/dashboard/src/lib/remote-development-environment/config-file.ts
@ -166,14 +166,18 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C
  // full semantic check afterwards), then let the agent apply the change.
  const baselineConfig = await tryReadConfigForValidation(configFilePath);

-  // Snapshot the config file and any externally-referenced files the agent might
-  // edit, so we can roll back to the original state if the agent fails or its
-  // result doesn't validate — never leaving a half-applied update behind.
+  // Snapshot the config file and its statically-referenced imports up front, then
+  // additionally capture any file the agent is about to write *before* it writes
+  // (via `onFileWillChange`). Together these let us roll back to the exact
+  // original state if the agent fails or its result doesn't validate — including
+  // brand-new files and files the agent edits that weren't statically imported —
+  // so we never leave a half-applied update behind.
  const snapshots = snapshotConfigFiles(configFilePath, content);
  try {
    await runConfigUpdateAgent({
      prompt: buildConfigUpdatePrompt(path.basename(configFilePath), configUpdate),
      cwd: path.dirname(configFilePath),
+      onFileWillChange: (filePath) => captureSnapshotIfAbsent(snapshots, filePath),
    });
    await validateAgentUpdate(configFilePath, baselineConfig, configUpdate, snapshots);
  } catch (error) {
@ -186,26 +190,30 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C
 type ConfigFileSnapshot = { path: string, content: string | null };

 /**
- * Captures the config file plus every file it imports via a relative path (e.g.
- * `import x from "./welcome-email.tsx" with { type: "text" }`), which are exactly
- * the files an in-place update is expected to touch. Files are read as UTF-8
- * text, which matches this feature's text-import use case.
+ * Captures a file's current on-disk content into `snapshots` (recording `null`
+ * if it doesn't exist) unless it's already captured. Paths are resolved to
+ * absolute form so the same file is never snapshotted twice under different
+ * spellings. Files are read as UTF-8 text, which matches this feature's
+ * text-import use case.
+ */
+function captureSnapshotIfAbsent(snapshots: ConfigFileSnapshot[], filePath: string): void {
+  const resolved = path.resolve(filePath);
+  if (snapshots.some((snapshot) => snapshot.path === resolved)) return;
+  snapshots.push({ path: resolved, content: existsSync(resolved) ? readFileSync(resolved, "utf-8") : null });
+}
+
+/**
+ * Seeds the rollback set with the config file plus every file it imports via a
+ * relative path (e.g. `import x from "./welcome-email.tsx" with { type: "text" }`),
+ * which are exactly the files an in-place update is expected to touch. Any other
+ * file the agent ends up writing is captured on the fly via `onFileWillChange`,
+ * so this static pass is just a best-effort head start, not the full guarantee.
 */
 function snapshotConfigFiles(configFilePath: string, configContent: string): ConfigFileSnapshot[] {
  const dir = path.dirname(configFilePath);
-  const snapshots: ConfigFileSnapshot[] = [{ path: configFilePath, content: configContent }];
+  const snapshots: ConfigFileSnapshot[] = [{ path: path.resolve(configFilePath), content: configContent }];
  for (const specifier of getRelativeImportSpecifiers(configContent)) {
-    const resolved = path.resolve(dir, specifier);
-    // Skip imports that resolve outside the config directory (e.g. `../shared/x`).
-    // The agent runs with its cwd set to the config directory, so it can only edit
-    // files within it; parent-directory imports are valid project layouts but
-    // simply aren't snapshot/rollback targets here (the agent won't touch them).
-    const relative = path.relative(dir, resolved);
-    if (relative.startsWith("..") || path.isAbsolute(relative)) {
-      continue;
-    }
-    if (snapshots.some((snapshot) => snapshot.path === resolved)) continue;
-    snapshots.push({ path: resolved, content: existsSync(resolved) ? readFileSync(resolved, "utf-8") : null });
+    captureSnapshotIfAbsent(snapshots, path.resolve(dir, specifier));
  }
  return snapshots;
 }
@ -312,7 +320,11 @@ function flattenConfigUpdate(update: Config): ConfigChange[] {
 function buildConfigUpdatePrompt(configFileName: string, configUpdate: Config): string {
  const changes = flattenConfigUpdate(configUpdate);
  const changeLines = changes.map(({ path: configPath, value }) => {
-    return `- \`${configPath}\`: set to ${JSON.stringify(value)}`;
+    // Both the path and value come from the (untrusted) config update, so they're
+    // JSON-encoded rather than interpolated raw — this escapes backticks, quotes,
+    // and newlines that could otherwise break out of the prompt's formatting or
+    // inject extra instructions.
+    return `- ${JSON.stringify(configPath)}: set to ${JSON.stringify(value)}`;
  }).join("\n");

  return `You are editing a Hexclave / Stack Auth configuration file in place. Apply a set of configuration changes WITHOUT changing how the file is written.
--- a/apps/dashboard/src/lib/remote-development-environment/config-update-agent.ts
+++ b/apps/dashboard/src/lib/remote-development-environment/config-update-agent.ts
@ -1,5 +1,6 @@
 import "server-only";

+import path from "path";
 import { query } from "@anthropic-ai/claude-agent-sdk";

 /**
@ -33,6 +34,36 @@ const AGENT_TIMEOUT_MS: number = (() => {
  return parsed;
 })();

+/**
+ * True for the error produced when an `AbortController` aborts an awaited
+ * operation. The SDK surfaces this as either a real `AbortError` or a
+ * `DOMException` whose `name` is `"AbortError"`.
+ */
+function isAbortError(error: unknown): boolean {
+  return error instanceof Error && error.name === "AbortError";
+}
+
+// The file-mutating tools we allow the agent to use, each of which takes the
+// target file in a `file_path` field. We watch these so callers can snapshot a
+// file for rollback *before* the agent overwrites it.
+const FILE_MUTATING_TOOLS = new Set(["Write", "Edit", "MultiEdit", "NotebookEdit"]);
+
+function hasStringFilePath(input: unknown): input is { file_path: string } {
+  return typeof input === "object" && input !== null && "file_path" in input && typeof input.file_path === "string";
+}
+
+/**
+ * Returns the absolute path a file-mutating tool call is about to write, or
+ * `null` if the tool doesn't write a file. Relative paths are resolved against
+ * the agent's `cwd` to match where the tool will actually write.
+ */
+function getToolWriteTargetPath(toolName: string, toolInput: unknown, cwd: string): string | null {
+  if (!FILE_MUTATING_TOOLS.has(toolName) || !hasStringFilePath(toolInput)) {
+    return null;
+  }
+  return path.isAbsolute(toolInput.file_path) ? toolInput.file_path : path.resolve(cwd, toolInput.file_path);
+}
+
 function stripClaudeCodeEnv(): Record<string, string | undefined> {
  const env = { ...process.env };
  // Removing CLAUDECODE prevents the SDK from detecting a nested agent. The
@ -51,6 +82,11 @@ function stripClaudeCodeEnv(): Record<string, string | undefined> {
 export async function runConfigUpdateAgent(options: {
  prompt: string,
  cwd: string,
+  // Called with the absolute path of each file the agent is about to write or
+  // edit, *before* the change happens, so the caller can capture the original
+  // content for rollback even when the file isn't statically referenced by the
+  // config (a new file, or an existing-but-unimported file).
+  onFileWillChange?: (filePath: string) => void,
 }): Promise<void> {
  const abortController = new AbortController();
  const timeout = setTimeout(() => abortController.abort(), AGENT_TIMEOUT_MS);
@ -66,6 +102,17 @@ export async function runConfigUpdateAgent(options: {
        // runs server-side so it must be fully isolated from the host environment.
        settingSources: [],
        strictMcpConfig: true,
+        hooks: options.onFileWillChange == null ? undefined : {
+          PreToolUse: [{
+            hooks: [async (input) => {
+              if (input.hook_event_name === "PreToolUse") {
+                const target = getToolWriteTargetPath(input.tool_name, input.tool_input, options.cwd);
+                if (target != null) options.onFileWillChange?.(target);
+              }
+              return { continue: true };
+            }],
+          }],
+        },
        // Bash is intentionally omitted: applying a config delta only needs file
        // inspection and editing, and withholding shell access reduces the blast
        // radius of running an agent against the user's project.
@ -82,17 +129,23 @@ export async function runConfigUpdateAgent(options: {
        stderr: (data: string) => { console.warn(`${LOG_PREFIX} [agent] ${data}`); },
      },
    })) {
-      // Detect the terminal message via `"result" in message` to match the CLI
-      // (`runClaudeAgent`); only a successful result carries a `result` field,
-      // so any other terminal subtype is reported as a failure.
-      if ("result" in message) {
-        sawResult = true;
-      } else if (message.type === "result") {
-        throw new Error(`Config update agent failed (${message.subtype}). It was unable to apply the config changes to the file.`);
+      // Only the terminal `result` message signals completion; a successful one
+      // carries a `result` field, so any other `result`-type subtype is a
+      // failure. Gating both branches on `type === "result"` avoids treating an
+      // intermediate message that happens to carry a `result` property as done.
+      if (message.type === "result") {
+        if ("result" in message) {
+          sawResult = true;
+        } else {
+          throw new Error(`Config update agent failed (${message.subtype}). It was unable to apply the config changes to the file.`);
+        }
      }
    }
  } catch (error) {
-    if (abortController.signal.aborted) {
+    // Only translate to a timeout error when the failure is the abort we
+    // triggered; otherwise a real agent error that races with the timeout would
+    // be masked by the generic "timed out" message.
+    if (abortController.signal.aborted && isAbortError(error)) {
      throw new Error(`Config update agent timed out after ${AGENT_TIMEOUT_MS}ms. It was unable to apply the config changes to the file.`);
    }
    throw error;