From 14bd4254fc6a9c2cdbaa09c094025d973cec8490 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 2 Jun 2026 20:26:58 +0000 Subject: [PATCH] Address Greptile review: escape agent prompt, fix result detection, roll back all agent-written files, fix abort masking Co-Authored-By: mantra --- .../config-file.test.ts | 48 +++++++++++-- .../config-file.ts | 52 ++++++++------ .../config-update-agent.ts | 69 ++++++++++++++++--- 3 files changed, 136 insertions(+), 33 deletions(-) diff --git a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts index 3bedcc2a9..64f9d40da 100644 --- a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts +++ b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts @@ -1,15 +1,16 @@ -import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs"; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs"; import { join } from "path"; import { afterEach, describe, expect, it, vi } from "vitest"; // Lets each AI-path test inject what the (otherwise network-backed) agent does // to the files on disk, so we can exercise the orchestration and validation // around `updateConfigObject` without calling a real model. -let mockAgentImpl: ((options: { prompt: string, cwd: string }) => void | Promise) | null = null; +type MockAgentOptions = { prompt: string, cwd: string, onFileWillChange?: (filePath: string) => void }; +let mockAgentImpl: ((options: MockAgentOptions) => void | Promise) | null = null; vi.mock("server-only", () => ({})); vi.mock("./config-update-agent", () => ({ - runConfigUpdateAgent: async (options: { prompt: string, cwd: string }) => { + runConfigUpdateAgent: async (options: MockAgentOptions) => { if (mockAgentImpl == null) { throw new Error("runConfigUpdateAgent was called but no mock implementation was set for this test."); } @@ -19,9 +20,20 @@ vi.mock("./config-update-agent", () => ({ let tempDir: string | undefined; +function getTempDir(): string { + if (tempDir == null) { + tempDir = mkdtempSync(join(process.cwd(), ".stack-rde-config-test-")); + // Give the temp dir its own package.json so SDK-package detection (which + // walks up to the nearest package.json) resolves deterministically here + // instead of picking up the dashboard's own dependencies, which would make + // the rendered `StackConfig` import env-dependent. + writeFileSync(join(tempDir, "package.json"), JSON.stringify({ name: "stack-rde-config-test" }), "utf-8"); + } + return tempDir; +} + function writeTempFile(name: string, content: string): string { - tempDir ??= mkdtempSync(join(process.cwd(), ".stack-rde-config-test-")); - const filePath = join(tempDir, name); + const filePath = join(getTempDir(), name); writeFileSync(filePath, content, "utf-8"); return filePath; } @@ -331,6 +343,32 @@ describe("remote development environment config file", () => { expect(readFileSync(templatePath, "utf-8")).toBe("export default
Old email
;\n"); }); + it("rolls back a brand-new file the agent creates outside the statically-imported set", async () => { + // A wrapped config takes the agent path (it isn't a plain static literal), + // so the agent mock actually runs. + const configSource = `import { defineStackConfig } from "@hexclave/shared/config";\nexport const config = defineStackConfig({ auth: { allowSignUp: true } });\n`; + const configPath = writeTempConfig(configSource); + const newFilePath = join(getTempDir(), "generated-extra.ts"); + + const { updateConfigObject } = await import("./config-file"); + + // The agent creates a file that the config doesn't statically import, so the + // only way it can be rolled back is via the `onFileWillChange` hook firing + // before the write. After creating it, the agent fails so the run must roll + // back — deleting the newly-created file. + mockAgentImpl = (options) => { + options.onFileWillChange?.(newFilePath); + writeFileSync(newFilePath, "export const extra = true;\n", "utf-8"); + throw new Error("agent blew up"); + }; + + await expect(updateConfigObject(configPath, { "auth.allowSignUp": false })) + .rejects.toThrow("agent blew up"); + + expect(existsSync(newFilePath)).toBe(false); + expect(readFileSync(configPath, "utf-8")).toBe(configSource); + }); + it("fails a non-evaluable update when the agent leaves every file unchanged", async () => { const templatePath = writeTempFile("welcome-email.tsx", "export default
Old email
;\n"); const configSource = `import welcomeEmail from "./welcome-email.tsx" with { type: "text" };\n\nexport const config = {\n emails: { templates: { welcome: welcomeEmail } },\n};\n`; diff --git a/apps/dashboard/src/lib/remote-development-environment/config-file.ts b/apps/dashboard/src/lib/remote-development-environment/config-file.ts index e72b06d25..14b1cd19f 100644 --- a/apps/dashboard/src/lib/remote-development-environment/config-file.ts +++ b/apps/dashboard/src/lib/remote-development-environment/config-file.ts @@ -166,14 +166,18 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C // full semantic check afterwards), then let the agent apply the change. const baselineConfig = await tryReadConfigForValidation(configFilePath); - // Snapshot the config file and any externally-referenced files the agent might - // edit, so we can roll back to the original state if the agent fails or its - // result doesn't validate — never leaving a half-applied update behind. + // Snapshot the config file and its statically-referenced imports up front, then + // additionally capture any file the agent is about to write *before* it writes + // (via `onFileWillChange`). Together these let us roll back to the exact + // original state if the agent fails or its result doesn't validate — including + // brand-new files and files the agent edits that weren't statically imported — + // so we never leave a half-applied update behind. const snapshots = snapshotConfigFiles(configFilePath, content); try { await runConfigUpdateAgent({ prompt: buildConfigUpdatePrompt(path.basename(configFilePath), configUpdate), cwd: path.dirname(configFilePath), + onFileWillChange: (filePath) => captureSnapshotIfAbsent(snapshots, filePath), }); await validateAgentUpdate(configFilePath, baselineConfig, configUpdate, snapshots); } catch (error) { @@ -186,26 +190,30 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C type ConfigFileSnapshot = { path: string, content: string | null }; /** - * Captures the config file plus every file it imports via a relative path (e.g. - * `import x from "./welcome-email.tsx" with { type: "text" }`), which are exactly - * the files an in-place update is expected to touch. Files are read as UTF-8 - * text, which matches this feature's text-import use case. + * Captures a file's current on-disk content into `snapshots` (recording `null` + * if it doesn't exist) unless it's already captured. Paths are resolved to + * absolute form so the same file is never snapshotted twice under different + * spellings. Files are read as UTF-8 text, which matches this feature's + * text-import use case. + */ +function captureSnapshotIfAbsent(snapshots: ConfigFileSnapshot[], filePath: string): void { + const resolved = path.resolve(filePath); + if (snapshots.some((snapshot) => snapshot.path === resolved)) return; + snapshots.push({ path: resolved, content: existsSync(resolved) ? readFileSync(resolved, "utf-8") : null }); +} + +/** + * Seeds the rollback set with the config file plus every file it imports via a + * relative path (e.g. `import x from "./welcome-email.tsx" with { type: "text" }`), + * which are exactly the files an in-place update is expected to touch. Any other + * file the agent ends up writing is captured on the fly via `onFileWillChange`, + * so this static pass is just a best-effort head start, not the full guarantee. */ function snapshotConfigFiles(configFilePath: string, configContent: string): ConfigFileSnapshot[] { const dir = path.dirname(configFilePath); - const snapshots: ConfigFileSnapshot[] = [{ path: configFilePath, content: configContent }]; + const snapshots: ConfigFileSnapshot[] = [{ path: path.resolve(configFilePath), content: configContent }]; for (const specifier of getRelativeImportSpecifiers(configContent)) { - const resolved = path.resolve(dir, specifier); - // Skip imports that resolve outside the config directory (e.g. `../shared/x`). - // The agent runs with its cwd set to the config directory, so it can only edit - // files within it; parent-directory imports are valid project layouts but - // simply aren't snapshot/rollback targets here (the agent won't touch them). - const relative = path.relative(dir, resolved); - if (relative.startsWith("..") || path.isAbsolute(relative)) { - continue; - } - if (snapshots.some((snapshot) => snapshot.path === resolved)) continue; - snapshots.push({ path: resolved, content: existsSync(resolved) ? readFileSync(resolved, "utf-8") : null }); + captureSnapshotIfAbsent(snapshots, path.resolve(dir, specifier)); } return snapshots; } @@ -312,7 +320,11 @@ function flattenConfigUpdate(update: Config): ConfigChange[] { function buildConfigUpdatePrompt(configFileName: string, configUpdate: Config): string { const changes = flattenConfigUpdate(configUpdate); const changeLines = changes.map(({ path: configPath, value }) => { - return `- \`${configPath}\`: set to ${JSON.stringify(value)}`; + // Both the path and value come from the (untrusted) config update, so they're + // JSON-encoded rather than interpolated raw — this escapes backticks, quotes, + // and newlines that could otherwise break out of the prompt's formatting or + // inject extra instructions. + return `- ${JSON.stringify(configPath)}: set to ${JSON.stringify(value)}`; }).join("\n"); return `You are editing a Hexclave / Stack Auth configuration file in place. Apply a set of configuration changes WITHOUT changing how the file is written. diff --git a/apps/dashboard/src/lib/remote-development-environment/config-update-agent.ts b/apps/dashboard/src/lib/remote-development-environment/config-update-agent.ts index 23036beee..1f8d24fda 100644 --- a/apps/dashboard/src/lib/remote-development-environment/config-update-agent.ts +++ b/apps/dashboard/src/lib/remote-development-environment/config-update-agent.ts @@ -1,5 +1,6 @@ import "server-only"; +import path from "path"; import { query } from "@anthropic-ai/claude-agent-sdk"; /** @@ -33,6 +34,36 @@ const AGENT_TIMEOUT_MS: number = (() => { return parsed; })(); +/** + * True for the error produced when an `AbortController` aborts an awaited + * operation. The SDK surfaces this as either a real `AbortError` or a + * `DOMException` whose `name` is `"AbortError"`. + */ +function isAbortError(error: unknown): boolean { + return error instanceof Error && error.name === "AbortError"; +} + +// The file-mutating tools we allow the agent to use, each of which takes the +// target file in a `file_path` field. We watch these so callers can snapshot a +// file for rollback *before* the agent overwrites it. +const FILE_MUTATING_TOOLS = new Set(["Write", "Edit", "MultiEdit", "NotebookEdit"]); + +function hasStringFilePath(input: unknown): input is { file_path: string } { + return typeof input === "object" && input !== null && "file_path" in input && typeof input.file_path === "string"; +} + +/** + * Returns the absolute path a file-mutating tool call is about to write, or + * `null` if the tool doesn't write a file. Relative paths are resolved against + * the agent's `cwd` to match where the tool will actually write. + */ +function getToolWriteTargetPath(toolName: string, toolInput: unknown, cwd: string): string | null { + if (!FILE_MUTATING_TOOLS.has(toolName) || !hasStringFilePath(toolInput)) { + return null; + } + return path.isAbsolute(toolInput.file_path) ? toolInput.file_path : path.resolve(cwd, toolInput.file_path); +} + function stripClaudeCodeEnv(): Record { const env = { ...process.env }; // Removing CLAUDECODE prevents the SDK from detecting a nested agent. The @@ -51,6 +82,11 @@ function stripClaudeCodeEnv(): Record { export async function runConfigUpdateAgent(options: { prompt: string, cwd: string, + // Called with the absolute path of each file the agent is about to write or + // edit, *before* the change happens, so the caller can capture the original + // content for rollback even when the file isn't statically referenced by the + // config (a new file, or an existing-but-unimported file). + onFileWillChange?: (filePath: string) => void, }): Promise { const abortController = new AbortController(); const timeout = setTimeout(() => abortController.abort(), AGENT_TIMEOUT_MS); @@ -66,6 +102,17 @@ export async function runConfigUpdateAgent(options: { // runs server-side so it must be fully isolated from the host environment. settingSources: [], strictMcpConfig: true, + hooks: options.onFileWillChange == null ? undefined : { + PreToolUse: [{ + hooks: [async (input) => { + if (input.hook_event_name === "PreToolUse") { + const target = getToolWriteTargetPath(input.tool_name, input.tool_input, options.cwd); + if (target != null) options.onFileWillChange?.(target); + } + return { continue: true }; + }], + }], + }, // Bash is intentionally omitted: applying a config delta only needs file // inspection and editing, and withholding shell access reduces the blast // radius of running an agent against the user's project. @@ -82,17 +129,23 @@ export async function runConfigUpdateAgent(options: { stderr: (data: string) => { console.warn(`${LOG_PREFIX} [agent] ${data}`); }, }, })) { - // Detect the terminal message via `"result" in message` to match the CLI - // (`runClaudeAgent`); only a successful result carries a `result` field, - // so any other terminal subtype is reported as a failure. - if ("result" in message) { - sawResult = true; - } else if (message.type === "result") { - throw new Error(`Config update agent failed (${message.subtype}). It was unable to apply the config changes to the file.`); + // Only the terminal `result` message signals completion; a successful one + // carries a `result` field, so any other `result`-type subtype is a + // failure. Gating both branches on `type === "result"` avoids treating an + // intermediate message that happens to carry a `result` property as done. + if (message.type === "result") { + if ("result" in message) { + sawResult = true; + } else { + throw new Error(`Config update agent failed (${message.subtype}). It was unable to apply the config changes to the file.`); + } } } } catch (error) { - if (abortController.signal.aborted) { + // Only translate to a timeout error when the failure is the abort we + // triggered; otherwise a real agent error that races with the timeout would + // be masked by the generic "timed out" message. + if (abortController.signal.aborted && isAbortError(error)) { throw new Error(`Config update agent timed out after ${AGENT_TIMEOUT_MS}ms. It was unable to apply the config changes to the file.`); } throw error;