From b9a4f7634981dba53de25873df70be3cf7a2722c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 2 Jun 2026 19:34:22 +0000 Subject: [PATCH] Detect no-op agent runs in non-evaluable config validation When the config can't be evaluated (e.g. it imports external text files) we can't do a semantic check, so a wrong agent result could previously pass the structural `export config` check. Now a non-empty update that leaves every snapshotted file byte-for-byte unchanged is treated as a failure (and rolled back), so the agent doing nothing is no longer reported as success. Addresses cubic P1: fallback validation too weak for non-evaluable configs. Co-Authored-By: mantra --- .../config-file.test.ts | 22 ++++++++++++++ .../config-file.ts | 29 ++++++++++++++++--- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts index 57346ce1c..3bedcc2a9 100644 --- a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts +++ b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts @@ -330,4 +330,26 @@ describe("remote development environment config file", () => { expect(readFileSync(configPath, "utf-8")).toBe(configSource); expect(readFileSync(templatePath, "utf-8")).toBe("export default
Old email
;\n"); }); + + it("fails a non-evaluable update when the agent leaves every file unchanged", async () => { + const templatePath = writeTempFile("welcome-email.tsx", "export default
Old email
;\n"); + const configSource = `import welcomeEmail from "./welcome-email.tsx" with { type: "text" };\n\nexport const config = {\n emails: { templates: { welcome: welcomeEmail } },\n};\n`; + const configPath = writeTempConfig(configSource); + + const { updateConfigObject } = await import("./config-file"); + + // The agent reports success but doesn't actually touch any file. Since this + // config isn't evaluable, we can't do a semantic check, but a no-op for a + // non-empty update must still be reported as a failure rather than silently + // succeeding. + mockAgentImpl = () => {}; + + await expect(updateConfigObject(configPath, { + "emails.templates.welcome": "export default
New email
;\n", + })).rejects.toThrow(/did not modify/); + + // The files are untouched (a no-op restored to its identical original). + expect(readFileSync(configPath, "utf-8")).toBe(configSource); + expect(readFileSync(templatePath, "utf-8")).toBe("export default
Old email
;\n"); + }); }); diff --git a/apps/dashboard/src/lib/remote-development-environment/config-file.ts b/apps/dashboard/src/lib/remote-development-environment/config-file.ts index 2b232abd0..452276200 100644 --- a/apps/dashboard/src/lib/remote-development-environment/config-file.ts +++ b/apps/dashboard/src/lib/remote-development-environment/config-file.ts @@ -171,7 +171,7 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C prompt: buildConfigUpdatePrompt(path.basename(configFilePath), configUpdate), cwd: path.dirname(configFilePath), }); - await validateAgentUpdate(configFilePath, baselineConfig, configUpdate); + await validateAgentUpdate(configFilePath, baselineConfig, configUpdate, snapshots); } catch (error) { restoreConfigFiles(snapshots); throw error; @@ -226,7 +226,7 @@ async function tryReadConfigForValidation(configFilePath: string): Promise { +async function validateAgentUpdate(configFilePath: string, baselineConfig: Config | null, configUpdate: Config, snapshots: ConfigFileSnapshot[]): Promise { if (baselineConfig != null) { const target = canonicalizeConfig(override(baselineConfig, configUpdate)); const result = canonicalizeConfig((await readConfigFile(configFilePath)).config); @@ -237,14 +237,35 @@ async function validateAgentUpdate(configFilePath: string, baselineConfig: Confi } // The config couldn't be evaluated (e.g. it imports external text files), so a - // full semantic comparison isn't possible. Ensure at least that the agent left - // a syntactically valid file that still exports `config`. + // full semantic comparison isn't possible. We make the weaker checks we can: + + // 1. The agent must have actually written something. If a non-empty update + // left every file we snapshotted byte-for-byte unchanged, the agent didn't + // apply the change (e.g. it couldn't find the referenced file) — fail loud + // rather than report a success that did nothing. + if (flattenConfigUpdate(configUpdate).length > 0 && !snapshotsChangedOnDisk(snapshots)) { + throw new Error(`Config update validation failed for ${configFilePath}: the agent did not modify the config or any of its referenced files.`); + } + + // 2. The file must still be syntactically valid and export `config`. const content = readFileSync(configFilePath, "utf-8"); if (!stackConfigFileExportsConfig(content, configFilePath)) { throw new Error(`Config update validation failed for ${configFilePath}: the updated file no longer exports a valid \`config\`.`); } } +/** + * Returns whether any snapshotted file's current on-disk content differs from + * what it was when captured (including being created or deleted). Used to detect + * a no-op agent run when the config isn't evaluable enough for a semantic check. + */ +function snapshotsChangedOnDisk(snapshots: ConfigFileSnapshot[]): boolean { + return snapshots.some(({ path: filePath, content }) => { + const current = existsSync(filePath) ? readFileSync(filePath, "utf-8") : null; + return current !== content; + }); +} + type ConfigChange = { path: string, value: ConfigValue }; /**