mirror of
https://github.com/stack-auth/stack.git
synced 2026-06-13 21:01:21 +08:00
Detect no-op agent runs in non-evaluable config validation
When the config can't be evaluated (e.g. it imports external text files) we can't do a semantic check, so a wrong agent result could previously pass the structural `export config` check. Now a non-empty update that leaves every snapshotted file byte-for-byte unchanged is treated as a failure (and rolled back), so the agent doing nothing is no longer reported as success. Addresses cubic P1: fallback validation too weak for non-evaluable configs. Co-Authored-By: mantra <mantra@stack-auth.com>
This commit is contained in:
parent
fbf46864d9
commit
b9a4f76349
@ -330,4 +330,26 @@ describe("remote development environment config file", () => {
|
||||
expect(readFileSync(configPath, "utf-8")).toBe(configSource);
|
||||
expect(readFileSync(templatePath, "utf-8")).toBe("export default <div>Old email</div>;\n");
|
||||
});
|
||||
|
||||
it("fails a non-evaluable update when the agent leaves every file unchanged", async () => {
|
||||
const templatePath = writeTempFile("welcome-email.tsx", "export default <div>Old email</div>;\n");
|
||||
const configSource = `import welcomeEmail from "./welcome-email.tsx" with { type: "text" };\n\nexport const config = {\n emails: { templates: { welcome: welcomeEmail } },\n};\n`;
|
||||
const configPath = writeTempConfig(configSource);
|
||||
|
||||
const { updateConfigObject } = await import("./config-file");
|
||||
|
||||
// The agent reports success but doesn't actually touch any file. Since this
|
||||
// config isn't evaluable, we can't do a semantic check, but a no-op for a
|
||||
// non-empty update must still be reported as a failure rather than silently
|
||||
// succeeding.
|
||||
mockAgentImpl = () => {};
|
||||
|
||||
await expect(updateConfigObject(configPath, {
|
||||
"emails.templates.welcome": "export default <div>New email</div>;\n",
|
||||
})).rejects.toThrow(/did not modify/);
|
||||
|
||||
// The files are untouched (a no-op restored to its identical original).
|
||||
expect(readFileSync(configPath, "utf-8")).toBe(configSource);
|
||||
expect(readFileSync(templatePath, "utf-8")).toBe("export default <div>Old email</div>;\n");
|
||||
});
|
||||
});
|
||||
|
||||
@ -171,7 +171,7 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C
|
||||
prompt: buildConfigUpdatePrompt(path.basename(configFilePath), configUpdate),
|
||||
cwd: path.dirname(configFilePath),
|
||||
});
|
||||
await validateAgentUpdate(configFilePath, baselineConfig, configUpdate);
|
||||
await validateAgentUpdate(configFilePath, baselineConfig, configUpdate, snapshots);
|
||||
} catch (error) {
|
||||
restoreConfigFiles(snapshots);
|
||||
throw error;
|
||||
@ -226,7 +226,7 @@ async function tryReadConfigForValidation(configFilePath: string): Promise<Confi
|
||||
}
|
||||
}
|
||||
|
||||
async function validateAgentUpdate(configFilePath: string, baselineConfig: Config | null, configUpdate: Config): Promise<void> {
|
||||
async function validateAgentUpdate(configFilePath: string, baselineConfig: Config | null, configUpdate: Config, snapshots: ConfigFileSnapshot[]): Promise<void> {
|
||||
if (baselineConfig != null) {
|
||||
const target = canonicalizeConfig(override(baselineConfig, configUpdate));
|
||||
const result = canonicalizeConfig((await readConfigFile(configFilePath)).config);
|
||||
@ -237,14 +237,35 @@ async function validateAgentUpdate(configFilePath: string, baselineConfig: Confi
|
||||
}
|
||||
|
||||
// The config couldn't be evaluated (e.g. it imports external text files), so a
|
||||
// full semantic comparison isn't possible. Ensure at least that the agent left
|
||||
// a syntactically valid file that still exports `config`.
|
||||
// full semantic comparison isn't possible. We make the weaker checks we can:
|
||||
|
||||
// 1. The agent must have actually written something. If a non-empty update
|
||||
// left every file we snapshotted byte-for-byte unchanged, the agent didn't
|
||||
// apply the change (e.g. it couldn't find the referenced file) — fail loud
|
||||
// rather than report a success that did nothing.
|
||||
if (flattenConfigUpdate(configUpdate).length > 0 && !snapshotsChangedOnDisk(snapshots)) {
|
||||
throw new Error(`Config update validation failed for ${configFilePath}: the agent did not modify the config or any of its referenced files.`);
|
||||
}
|
||||
|
||||
// 2. The file must still be syntactically valid and export `config`.
|
||||
const content = readFileSync(configFilePath, "utf-8");
|
||||
if (!stackConfigFileExportsConfig(content, configFilePath)) {
|
||||
throw new Error(`Config update validation failed for ${configFilePath}: the updated file no longer exports a valid \`config\`.`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether any snapshotted file's current on-disk content differs from
|
||||
* what it was when captured (including being created or deleted). Used to detect
|
||||
* a no-op agent run when the config isn't evaluable enough for a semantic check.
|
||||
*/
|
||||
function snapshotsChangedOnDisk(snapshots: ConfigFileSnapshot[]): boolean {
|
||||
return snapshots.some(({ path: filePath, content }) => {
|
||||
const current = existsSync(filePath) ? readFileSync(filePath, "utf-8") : null;
|
||||
return current !== content;
|
||||
});
|
||||
}
|
||||
|
||||
type ConfigChange = { path: string, value: ConfigValue };
|
||||
|
||||
/**
|
||||
|
||||
Loading…
Reference in New Issue
Block a user