mirror of
https://github.com/stack-auth/stack.git
synced 2026-06-04 21:04:37 +08:00
Address Greptile review: escape agent prompt, fix result detection, roll back all agent-written files, fix abort masking
Co-Authored-By: mantra <mantra@stack-auth.com>
This commit is contained in:
parent
e3c7065c4f
commit
14bd4254fc
@ -1,15 +1,16 @@
|
||||
import { mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
|
||||
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
import { afterEach, describe, expect, it, vi } from "vitest";
|
||||
|
||||
// Lets each AI-path test inject what the (otherwise network-backed) agent does
|
||||
// to the files on disk, so we can exercise the orchestration and validation
|
||||
// around `updateConfigObject` without calling a real model.
|
||||
let mockAgentImpl: ((options: { prompt: string, cwd: string }) => void | Promise<void>) | null = null;
|
||||
type MockAgentOptions = { prompt: string, cwd: string, onFileWillChange?: (filePath: string) => void };
|
||||
let mockAgentImpl: ((options: MockAgentOptions) => void | Promise<void>) | null = null;
|
||||
|
||||
vi.mock("server-only", () => ({}));
|
||||
vi.mock("./config-update-agent", () => ({
|
||||
runConfigUpdateAgent: async (options: { prompt: string, cwd: string }) => {
|
||||
runConfigUpdateAgent: async (options: MockAgentOptions) => {
|
||||
if (mockAgentImpl == null) {
|
||||
throw new Error("runConfigUpdateAgent was called but no mock implementation was set for this test.");
|
||||
}
|
||||
@ -19,9 +20,20 @@ vi.mock("./config-update-agent", () => ({
|
||||
|
||||
let tempDir: string | undefined;
|
||||
|
||||
function getTempDir(): string {
|
||||
if (tempDir == null) {
|
||||
tempDir = mkdtempSync(join(process.cwd(), ".stack-rde-config-test-"));
|
||||
// Give the temp dir its own package.json so SDK-package detection (which
|
||||
// walks up to the nearest package.json) resolves deterministically here
|
||||
// instead of picking up the dashboard's own dependencies, which would make
|
||||
// the rendered `StackConfig` import env-dependent.
|
||||
writeFileSync(join(tempDir, "package.json"), JSON.stringify({ name: "stack-rde-config-test" }), "utf-8");
|
||||
}
|
||||
return tempDir;
|
||||
}
|
||||
|
||||
function writeTempFile(name: string, content: string): string {
|
||||
tempDir ??= mkdtempSync(join(process.cwd(), ".stack-rde-config-test-"));
|
||||
const filePath = join(tempDir, name);
|
||||
const filePath = join(getTempDir(), name);
|
||||
writeFileSync(filePath, content, "utf-8");
|
||||
return filePath;
|
||||
}
|
||||
@ -331,6 +343,32 @@ describe("remote development environment config file", () => {
|
||||
expect(readFileSync(templatePath, "utf-8")).toBe("export default <div>Old email</div>;\n");
|
||||
});
|
||||
|
||||
it("rolls back a brand-new file the agent creates outside the statically-imported set", async () => {
|
||||
// A wrapped config takes the agent path (it isn't a plain static literal),
|
||||
// so the agent mock actually runs.
|
||||
const configSource = `import { defineStackConfig } from "@hexclave/shared/config";\nexport const config = defineStackConfig({ auth: { allowSignUp: true } });\n`;
|
||||
const configPath = writeTempConfig(configSource);
|
||||
const newFilePath = join(getTempDir(), "generated-extra.ts");
|
||||
|
||||
const { updateConfigObject } = await import("./config-file");
|
||||
|
||||
// The agent creates a file that the config doesn't statically import, so the
|
||||
// only way it can be rolled back is via the `onFileWillChange` hook firing
|
||||
// before the write. After creating it, the agent fails so the run must roll
|
||||
// back — deleting the newly-created file.
|
||||
mockAgentImpl = (options) => {
|
||||
options.onFileWillChange?.(newFilePath);
|
||||
writeFileSync(newFilePath, "export const extra = true;\n", "utf-8");
|
||||
throw new Error("agent blew up");
|
||||
};
|
||||
|
||||
await expect(updateConfigObject(configPath, { "auth.allowSignUp": false }))
|
||||
.rejects.toThrow("agent blew up");
|
||||
|
||||
expect(existsSync(newFilePath)).toBe(false);
|
||||
expect(readFileSync(configPath, "utf-8")).toBe(configSource);
|
||||
});
|
||||
|
||||
it("fails a non-evaluable update when the agent leaves every file unchanged", async () => {
|
||||
const templatePath = writeTempFile("welcome-email.tsx", "export default <div>Old email</div>;\n");
|
||||
const configSource = `import welcomeEmail from "./welcome-email.tsx" with { type: "text" };\n\nexport const config = {\n emails: { templates: { welcome: welcomeEmail } },\n};\n`;
|
||||
|
||||
@ -166,14 +166,18 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C
|
||||
// full semantic check afterwards), then let the agent apply the change.
|
||||
const baselineConfig = await tryReadConfigForValidation(configFilePath);
|
||||
|
||||
// Snapshot the config file and any externally-referenced files the agent might
|
||||
// edit, so we can roll back to the original state if the agent fails or its
|
||||
// result doesn't validate — never leaving a half-applied update behind.
|
||||
// Snapshot the config file and its statically-referenced imports up front, then
|
||||
// additionally capture any file the agent is about to write *before* it writes
|
||||
// (via `onFileWillChange`). Together these let us roll back to the exact
|
||||
// original state if the agent fails or its result doesn't validate — including
|
||||
// brand-new files and files the agent edits that weren't statically imported —
|
||||
// so we never leave a half-applied update behind.
|
||||
const snapshots = snapshotConfigFiles(configFilePath, content);
|
||||
try {
|
||||
await runConfigUpdateAgent({
|
||||
prompt: buildConfigUpdatePrompt(path.basename(configFilePath), configUpdate),
|
||||
cwd: path.dirname(configFilePath),
|
||||
onFileWillChange: (filePath) => captureSnapshotIfAbsent(snapshots, filePath),
|
||||
});
|
||||
await validateAgentUpdate(configFilePath, baselineConfig, configUpdate, snapshots);
|
||||
} catch (error) {
|
||||
@ -186,26 +190,30 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C
|
||||
type ConfigFileSnapshot = { path: string, content: string | null };
|
||||
|
||||
/**
|
||||
* Captures the config file plus every file it imports via a relative path (e.g.
|
||||
* `import x from "./welcome-email.tsx" with { type: "text" }`), which are exactly
|
||||
* the files an in-place update is expected to touch. Files are read as UTF-8
|
||||
* text, which matches this feature's text-import use case.
|
||||
* Captures a file's current on-disk content into `snapshots` (recording `null`
|
||||
* if it doesn't exist) unless it's already captured. Paths are resolved to
|
||||
* absolute form so the same file is never snapshotted twice under different
|
||||
* spellings. Files are read as UTF-8 text, which matches this feature's
|
||||
* text-import use case.
|
||||
*/
|
||||
function captureSnapshotIfAbsent(snapshots: ConfigFileSnapshot[], filePath: string): void {
|
||||
const resolved = path.resolve(filePath);
|
||||
if (snapshots.some((snapshot) => snapshot.path === resolved)) return;
|
||||
snapshots.push({ path: resolved, content: existsSync(resolved) ? readFileSync(resolved, "utf-8") : null });
|
||||
}
|
||||
|
||||
/**
|
||||
* Seeds the rollback set with the config file plus every file it imports via a
|
||||
* relative path (e.g. `import x from "./welcome-email.tsx" with { type: "text" }`),
|
||||
* which are exactly the files an in-place update is expected to touch. Any other
|
||||
* file the agent ends up writing is captured on the fly via `onFileWillChange`,
|
||||
* so this static pass is just a best-effort head start, not the full guarantee.
|
||||
*/
|
||||
function snapshotConfigFiles(configFilePath: string, configContent: string): ConfigFileSnapshot[] {
|
||||
const dir = path.dirname(configFilePath);
|
||||
const snapshots: ConfigFileSnapshot[] = [{ path: configFilePath, content: configContent }];
|
||||
const snapshots: ConfigFileSnapshot[] = [{ path: path.resolve(configFilePath), content: configContent }];
|
||||
for (const specifier of getRelativeImportSpecifiers(configContent)) {
|
||||
const resolved = path.resolve(dir, specifier);
|
||||
// Skip imports that resolve outside the config directory (e.g. `../shared/x`).
|
||||
// The agent runs with its cwd set to the config directory, so it can only edit
|
||||
// files within it; parent-directory imports are valid project layouts but
|
||||
// simply aren't snapshot/rollback targets here (the agent won't touch them).
|
||||
const relative = path.relative(dir, resolved);
|
||||
if (relative.startsWith("..") || path.isAbsolute(relative)) {
|
||||
continue;
|
||||
}
|
||||
if (snapshots.some((snapshot) => snapshot.path === resolved)) continue;
|
||||
snapshots.push({ path: resolved, content: existsSync(resolved) ? readFileSync(resolved, "utf-8") : null });
|
||||
captureSnapshotIfAbsent(snapshots, path.resolve(dir, specifier));
|
||||
}
|
||||
return snapshots;
|
||||
}
|
||||
@ -312,7 +320,11 @@ function flattenConfigUpdate(update: Config): ConfigChange[] {
|
||||
function buildConfigUpdatePrompt(configFileName: string, configUpdate: Config): string {
|
||||
const changes = flattenConfigUpdate(configUpdate);
|
||||
const changeLines = changes.map(({ path: configPath, value }) => {
|
||||
return `- \`${configPath}\`: set to ${JSON.stringify(value)}`;
|
||||
// Both the path and value come from the (untrusted) config update, so they're
|
||||
// JSON-encoded rather than interpolated raw — this escapes backticks, quotes,
|
||||
// and newlines that could otherwise break out of the prompt's formatting or
|
||||
// inject extra instructions.
|
||||
return `- ${JSON.stringify(configPath)}: set to ${JSON.stringify(value)}`;
|
||||
}).join("\n");
|
||||
|
||||
return `You are editing a Hexclave / Stack Auth configuration file in place. Apply a set of configuration changes WITHOUT changing how the file is written.
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import "server-only";
|
||||
|
||||
import path from "path";
|
||||
import { query } from "@anthropic-ai/claude-agent-sdk";
|
||||
|
||||
/**
|
||||
@ -33,6 +34,36 @@ const AGENT_TIMEOUT_MS: number = (() => {
|
||||
return parsed;
|
||||
})();
|
||||
|
||||
/**
|
||||
* True for the error produced when an `AbortController` aborts an awaited
|
||||
* operation. The SDK surfaces this as either a real `AbortError` or a
|
||||
* `DOMException` whose `name` is `"AbortError"`.
|
||||
*/
|
||||
function isAbortError(error: unknown): boolean {
|
||||
return error instanceof Error && error.name === "AbortError";
|
||||
}
|
||||
|
||||
// The file-mutating tools we allow the agent to use, each of which takes the
|
||||
// target file in a `file_path` field. We watch these so callers can snapshot a
|
||||
// file for rollback *before* the agent overwrites it.
|
||||
const FILE_MUTATING_TOOLS = new Set(["Write", "Edit", "MultiEdit", "NotebookEdit"]);
|
||||
|
||||
function hasStringFilePath(input: unknown): input is { file_path: string } {
|
||||
return typeof input === "object" && input !== null && "file_path" in input && typeof input.file_path === "string";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the absolute path a file-mutating tool call is about to write, or
|
||||
* `null` if the tool doesn't write a file. Relative paths are resolved against
|
||||
* the agent's `cwd` to match where the tool will actually write.
|
||||
*/
|
||||
function getToolWriteTargetPath(toolName: string, toolInput: unknown, cwd: string): string | null {
|
||||
if (!FILE_MUTATING_TOOLS.has(toolName) || !hasStringFilePath(toolInput)) {
|
||||
return null;
|
||||
}
|
||||
return path.isAbsolute(toolInput.file_path) ? toolInput.file_path : path.resolve(cwd, toolInput.file_path);
|
||||
}
|
||||
|
||||
function stripClaudeCodeEnv(): Record<string, string | undefined> {
|
||||
const env = { ...process.env };
|
||||
// Removing CLAUDECODE prevents the SDK from detecting a nested agent. The
|
||||
@ -51,6 +82,11 @@ function stripClaudeCodeEnv(): Record<string, string | undefined> {
|
||||
export async function runConfigUpdateAgent(options: {
|
||||
prompt: string,
|
||||
cwd: string,
|
||||
// Called with the absolute path of each file the agent is about to write or
|
||||
// edit, *before* the change happens, so the caller can capture the original
|
||||
// content for rollback even when the file isn't statically referenced by the
|
||||
// config (a new file, or an existing-but-unimported file).
|
||||
onFileWillChange?: (filePath: string) => void,
|
||||
}): Promise<void> {
|
||||
const abortController = new AbortController();
|
||||
const timeout = setTimeout(() => abortController.abort(), AGENT_TIMEOUT_MS);
|
||||
@ -66,6 +102,17 @@ export async function runConfigUpdateAgent(options: {
|
||||
// runs server-side so it must be fully isolated from the host environment.
|
||||
settingSources: [],
|
||||
strictMcpConfig: true,
|
||||
hooks: options.onFileWillChange == null ? undefined : {
|
||||
PreToolUse: [{
|
||||
hooks: [async (input) => {
|
||||
if (input.hook_event_name === "PreToolUse") {
|
||||
const target = getToolWriteTargetPath(input.tool_name, input.tool_input, options.cwd);
|
||||
if (target != null) options.onFileWillChange?.(target);
|
||||
}
|
||||
return { continue: true };
|
||||
}],
|
||||
}],
|
||||
},
|
||||
// Bash is intentionally omitted: applying a config delta only needs file
|
||||
// inspection and editing, and withholding shell access reduces the blast
|
||||
// radius of running an agent against the user's project.
|
||||
@ -82,17 +129,23 @@ export async function runConfigUpdateAgent(options: {
|
||||
stderr: (data: string) => { console.warn(`${LOG_PREFIX} [agent] ${data}`); },
|
||||
},
|
||||
})) {
|
||||
// Detect the terminal message via `"result" in message` to match the CLI
|
||||
// (`runClaudeAgent`); only a successful result carries a `result` field,
|
||||
// so any other terminal subtype is reported as a failure.
|
||||
if ("result" in message) {
|
||||
sawResult = true;
|
||||
} else if (message.type === "result") {
|
||||
throw new Error(`Config update agent failed (${message.subtype}). It was unable to apply the config changes to the file.`);
|
||||
// Only the terminal `result` message signals completion; a successful one
|
||||
// carries a `result` field, so any other `result`-type subtype is a
|
||||
// failure. Gating both branches on `type === "result"` avoids treating an
|
||||
// intermediate message that happens to carry a `result` property as done.
|
||||
if (message.type === "result") {
|
||||
if ("result" in message) {
|
||||
sawResult = true;
|
||||
} else {
|
||||
throw new Error(`Config update agent failed (${message.subtype}). It was unable to apply the config changes to the file.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (abortController.signal.aborted) {
|
||||
// Only translate to a timeout error when the failure is the abort we
|
||||
// triggered; otherwise a real agent error that races with the timeout would
|
||||
// be masked by the generic "timed out" message.
|
||||
if (abortController.signal.aborted && isAbortError(error)) {
|
||||
throw new Error(`Config update agent timed out after ${AGENT_TIMEOUT_MS}ms. It was unable to apply the config changes to the file.`);
|
||||
}
|
||||
throw error;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user