From b9a4f7634981dba53de25873df70be3cf7a2722c Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 2 Jun 2026 19:34:22 +0000
Subject: [PATCH] Detect no-op agent runs in non-evaluable config validation

When the config can't be evaluated (e.g. it imports external text files) we
can't do a semantic check, so a wrong agent result could previously pass the
structural `export config` check. Now a non-empty update that leaves every
snapshotted file byte-for-byte unchanged is treated as a failure (and rolled
back), so the agent doing nothing is no longer reported as success.

Addresses cubic P1: fallback validation too weak for non-evaluable configs.

Co-Authored-By: mantra <mantra@stack-auth.com>
---
 .../config-file.test.ts                       | 22 ++++++++++++++
 .../config-file.ts                            | 29 ++++++++++++++++---
 2 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts
index 57346ce1c..3bedcc2a9 100644
--- a/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts
+++ b/apps/dashboard/src/lib/remote-development-environment/config-file.test.ts
@@ -330,4 +330,26 @@ describe("remote development environment config file", () => {
     expect(readFileSync(configPath, "utf-8")).toBe(configSource);
     expect(readFileSync(templatePath, "utf-8")).toBe("export default <div>Old email</div>;\n");
   });
+
+  it("fails a non-evaluable update when the agent leaves every file unchanged", async () => {
+    const templatePath = writeTempFile("welcome-email.tsx", "export default <div>Old email</div>;\n");
+    const configSource = `import welcomeEmail from "./welcome-email.tsx" with { type: "text" };\n\nexport const config = {\n  emails: { templates: { welcome: welcomeEmail } },\n};\n`;
+    const configPath = writeTempConfig(configSource);
+
+    const { updateConfigObject } = await import("./config-file");
+
+    // The agent reports success but doesn't actually touch any file. Since this
+    // config isn't evaluable, we can't do a semantic check, but a no-op for a
+    // non-empty update must still be reported as a failure rather than silently
+    // succeeding.
+    mockAgentImpl = () => {};
+
+    await expect(updateConfigObject(configPath, {
+      "emails.templates.welcome": "export default <div>New email</div>;\n",
+    })).rejects.toThrow(/did not modify/);
+
+    // The files are untouched (a no-op restored to its identical original).
+    expect(readFileSync(configPath, "utf-8")).toBe(configSource);
+    expect(readFileSync(templatePath, "utf-8")).toBe("export default <div>Old email</div>;\n");
+  });
 });
diff --git a/apps/dashboard/src/lib/remote-development-environment/config-file.ts b/apps/dashboard/src/lib/remote-development-environment/config-file.ts
index 2b232abd0..452276200 100644
--- a/apps/dashboard/src/lib/remote-development-environment/config-file.ts
+++ b/apps/dashboard/src/lib/remote-development-environment/config-file.ts
@@ -171,7 +171,7 @@ export async function updateConfigObject(configFilePath: string, configUpdate: C
       prompt: buildConfigUpdatePrompt(path.basename(configFilePath), configUpdate),
       cwd: path.dirname(configFilePath),
     });
-    await validateAgentUpdate(configFilePath, baselineConfig, configUpdate);
+    await validateAgentUpdate(configFilePath, baselineConfig, configUpdate, snapshots);
   } catch (error) {
     restoreConfigFiles(snapshots);
     throw error;
@@ -226,7 +226,7 @@ async function tryReadConfigForValidation(configFilePath: string): Promise<Confi
   }
 }
 
-async function validateAgentUpdate(configFilePath: string, baselineConfig: Config | null, configUpdate: Config): Promise<void> {
+async function validateAgentUpdate(configFilePath: string, baselineConfig: Config | null, configUpdate: Config, snapshots: ConfigFileSnapshot[]): Promise<void> {
   if (baselineConfig != null) {
     const target = canonicalizeConfig(override(baselineConfig, configUpdate));
     const result = canonicalizeConfig((await readConfigFile(configFilePath)).config);
@@ -237,14 +237,35 @@ async function validateAgentUpdate(configFilePath: string, baselineConfig: Confi
   }
 
   // The config couldn't be evaluated (e.g. it imports external text files), so a
-  // full semantic comparison isn't possible. Ensure at least that the agent left
-  // a syntactically valid file that still exports `config`.
+  // full semantic comparison isn't possible. We make the weaker checks we can:
+
+  // 1. The agent must have actually written something. If a non-empty update
+  //    left every file we snapshotted byte-for-byte unchanged, the agent didn't
+  //    apply the change (e.g. it couldn't find the referenced file) — fail loud
+  //    rather than report a success that did nothing.
+  if (flattenConfigUpdate(configUpdate).length > 0 && !snapshotsChangedOnDisk(snapshots)) {
+    throw new Error(`Config update validation failed for ${configFilePath}: the agent did not modify the config or any of its referenced files.`);
+  }
+
+  // 2. The file must still be syntactically valid and export `config`.
   const content = readFileSync(configFilePath, "utf-8");
   if (!stackConfigFileExportsConfig(content, configFilePath)) {
     throw new Error(`Config update validation failed for ${configFilePath}: the updated file no longer exports a valid \`config\`.`);
   }
 }
 
+/**
+ * Returns whether any snapshotted file's current on-disk content differs from
+ * what it was when captured (including being created or deleted). Used to detect
+ * a no-op agent run when the config isn't evaluable enough for a semantic check.
+ */
+function snapshotsChangedOnDisk(snapshots: ConfigFileSnapshot[]): boolean {
+  return snapshots.some(({ path: filePath, content }) => {
+    const current = existsSync(filePath) ? readFileSync(filePath, "utf-8") : null;
+    return current !== content;
+  });
+}
+
 type ConfigChange = { path: string, value: ConfigValue };
 
 /**