From a2590be68e15147a5e52aaf8d7c031c5d4441e3f Mon Sep 17 00:00:00 2001 From: Bilal Godil Date: Tue, 16 Jun 2026 14:18:26 -0700 Subject: [PATCH] fix(cli): fall back to installed CLI when npx auto-update fails `hexclave dev` re-execs through `npx @latest` to pick up the latest dashboard. If that npx run failed (npm error, blocked download, lock contention, offline) it exited nonzero and `maybeReexecToLatest` propagated the code via process.exit, killing `hexclave dev` even though a perfectly good CLI was already installed. This bites users behind a package firewall (e.g. Socket Firewall on Replit), which blocks the @hexclave/cli download and surfaces as `npm error Lock compromised` (ECOMPROMISED) from npx's own cache lock. Use a startup-marker handshake to tell apart an npx/install failure (our CLI never ran -> fall back to the installed CLI) from a genuine nonzero exit of the wrapped command (our CLI ran -> propagate). The marker only needs file create/exists, so it's robust on sandboxed/networked filesystems. If the marker can't be created we keep the old always-propagate behavior. decidePostReexec + signalReexecStartedIfChild are pure and unit-tested. --- packages/cli/src/lib/self-update.test.ts | 63 +++++++++++++ packages/cli/src/lib/self-update.ts | 113 +++++++++++++++++++++-- 2 files changed, 166 insertions(+), 10 deletions(-) diff --git a/packages/cli/src/lib/self-update.test.ts b/packages/cli/src/lib/self-update.test.ts index 2b35eb5f3..badd5cb20 100644 --- a/packages/cli/src/lib/self-update.test.ts +++ b/packages/cli/src/lib/self-update.test.ts @@ -1,11 +1,17 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { existsSync, mkdtempSync, readFileSync, rmSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; import { buildNpxInvocation, + decidePostReexec, decideReexec, DISABLE_AUTO_UPDATE_ENV, isEnvFlagEnabled, maybeReexecToLatest, + REEXEC_MARKER_ENV, shouldAutoUpdate, + signalReexecStartedIfChild, SKIP_AUTO_UPDATE_ENV, } from "./self-update.js"; import type { OwnPackage } from "./own-package.js"; @@ -179,3 +185,60 @@ describe("maybeReexecToLatest", () => { await expect(maybeReexecToLatest({ forwardArgs: ["dev"] })).resolves.toBeUndefined(); }); }); + +describe("decidePostReexec", () => { + it("propagates the exit code when the CLI ran to completion (code 0)", () => { + expect(decidePostReexec({ result: { exited: true, code: 0 }, started: true })) + .toEqual({ kind: "exit", code: 0 }); + }); + + it("propagates a nonzero exit code when the CLI actually started (real command failure)", () => { + // The re-exec'd CLI ran (marker present) and the wrapped command failed — we + // must surface that failure, not silently re-run it. + expect(decidePostReexec({ result: { exited: true, code: 1 }, started: true })) + .toEqual({ kind: "exit", code: 1 }); + }); + + it("falls back when npx exits nonzero before the CLI starts (e.g. Lock compromised)", () => { + // npm errored during install/lock; our CLI never ran. Don't take down dev — + // run the installed CLI instead. + const action = decidePostReexec({ result: { exited: true, code: 1 }, started: false }); + expect(action.kind).toBe("fallback"); + }); + + it("falls back when npx cannot be spawned at all", () => { + const action = decidePostReexec({ result: { exited: false, error: "spawn npx ENOENT" }, started: false }); + expect(action.kind).toBe("fallback"); + }); +}); + +describe("signalReexecStartedIfChild", () => { + let dir: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "hexclave-reexec-test-")); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("writes the marker file when the marker env var is set (we are the child)", () => { + const marker = join(dir, "started"); + signalReexecStartedIfChild({ [REEXEC_MARKER_ENV]: marker }); + expect(existsSync(marker)).toBe(true); + expect(readFileSync(marker, "utf8")).toBe("1"); + }); + + it("does nothing when no marker env var is set (normal top-level run)", () => { + const marker = join(dir, "started"); + signalReexecStartedIfChild({}); + expect(existsSync(marker)).toBe(false); + }); + + it("does not throw when the marker path is unwritable (best-effort)", () => { + const marker = join(dir, "nonexistent-subdir", "started"); + expect(() => signalReexecStartedIfChild({ [REEXEC_MARKER_ENV]: marker })).not.toThrow(); + expect(existsSync(marker)).toBe(false); + }); +}); diff --git a/packages/cli/src/lib/self-update.ts b/packages/cli/src/lib/self-update.ts index 16655014c..4355ae0d0 100644 --- a/packages/cli/src/lib/self-update.ts +++ b/packages/cli/src/lib/self-update.ts @@ -1,4 +1,7 @@ import { spawn } from "child_process"; +import { existsSync, mkdtempSync, rmSync, writeFileSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; import { forwardSignals } from "./child-process.js"; import { getOwnPackage, type OwnPackage } from "./own-package.js"; @@ -7,6 +10,12 @@ import { getOwnPackage, type OwnPackage } from "./own-package.js"; export const SKIP_AUTO_UPDATE_ENV = "STACK_CLI_SKIP_AUTO_UPDATE"; // User-facing opt-out. Set to a truthy value to never auto-update. export const DISABLE_AUTO_UPDATE_ENV = "STACK_CLI_NO_AUTO_UPDATE"; +// Path to a marker file the re-exec'd child touches the instant it starts. The +// parent uses its presence to tell a genuine command failure (our CLI ran and +// exited nonzero) from an npx/install failure where our CLI never ran at all +// (e.g. npm "Lock compromised" on sandboxed/networked filesystems). Set by the +// parent on the child's env; read back by the parent after the child exits. +export const REEXEC_MARKER_ENV = "STACK_CLI_REEXEC_MARKER"; const LOG_PREFIX = "[Hexclave] "; @@ -96,7 +105,7 @@ export function decideReexec(opts: { }; } -type ReexecResult = +export type ReexecResult = | { exited: true, code: number } | { exited: false, error: string }; @@ -110,12 +119,14 @@ function quoteShellArg(arg: string): string { return `"${arg.replace(/"/g, '\\"')}"`; } -function runReexec(invocation: NpxInvocation): Promise { +function runReexec(invocation: NpxInvocation, markerFile: string | null): Promise { return new Promise((resolvePromise) => { const args = invocation.shell ? invocation.args.map(quoteShellArg) : invocation.args; + const env: NodeJS.ProcessEnv = { ...process.env, [SKIP_AUTO_UPDATE_ENV]: "1" }; + if (markerFile != null) env[REEXEC_MARKER_ENV] = markerFile; const child = spawn(invocation.command, args, { stdio: "inherit", - env: { ...process.env, [SKIP_AUTO_UPDATE_ENV]: "1" }, + env, shell: invocation.shell, }); const cleanup = forwardSignals(child); @@ -133,13 +144,83 @@ function runReexec(invocation: NpxInvocation): Promise { }); } +// What the parent should do once the re-exec'd npx process is done. Kept pure so +// the fallback branching can be unit-tested without spawning anything. +// - exit: propagate the child's exit code (it ran our CLI to completion) +// - fallback: the update attempt failed before our CLI ran — run the installed +// CLI inline instead of taking down `hexclave dev` +export type PostReexecAction = + | { kind: "exit", code: number } + | { kind: "fallback", detail: string }; + +// `started` is whether the re-exec'd CLI actually began running (its startup +// marker appeared). A nonzero exit *with* the CLI started is a real command +// failure and must propagate. A nonzero exit *without* it — or npx not being +// spawnable at all — means the auto-update failed before our CLI ran (e.g. npm +// "Lock compromised" on Replit/Docker/WSL/NFS filesystems, ETARGET, registry or +// network errors); auto-update is best-effort, so we fall back. +export function decidePostReexec(opts: { result: ReexecResult, started: boolean }): PostReexecAction { + const { result, started } = opts; + if (!result.exited) { + return { kind: "fallback", detail: `could not run npx (${result.error})` }; + } + if (result.code !== 0 && !started) { + return { kind: "fallback", detail: `npx exited with code ${result.code} before the CLI started` }; + } + return { kind: "exit", code: result.code }; +} + +// Create a unique, empty marker directory; the child writes a file inside it on +// startup. Returns null if the temp dir can't be created, in which case the +// caller treats every exit as "the CLI started" (i.e. preserves the old +// always-propagate behavior rather than risk a spurious fallback). +function createReexecMarker(): { dir: string, file: string } | null { + try { + const dir = mkdtempSync(join(tmpdir(), "hexclave-reexec-")); + return { dir, file: join(dir, "started") }; + } catch { + return null; + } +} + +function cleanupReexecMarker(marker: { dir: string } | null): void { + if (marker == null) return; + try { + rmSync(marker.dir, { recursive: true, force: true }); + } catch { + // best-effort temp cleanup + } +} + +// Called at the very start of the (potential) re-exec. When we are the +// npx-spawned child — identified by the marker path the parent put on our env — +// touch the marker so the parent knows the latest CLI actually started. No-op in +// the normal top-level invocation, where no marker env var is set. +export function signalReexecStartedIfChild(env: NodeJS.ProcessEnv): void { + const markerFile = env[REEXEC_MARKER_ENV]; + if (markerFile == null || markerFile === "") return; + try { + writeFileSync(markerFile, "1"); + } catch { + // best-effort; if we can't write it the parent simply propagates the exit + // code as before — no worse than the pre-fallback behavior. + } +} + // Re-runs the requested command through `npx @latest` so the user always // gets the latest CLI + dashboard without reinstalling, then exits with the // child's code. The re-exec'd child carries SKIP_AUTO_UPDATE_ENV so it runs the -// freshly downloaded CLI directly instead of recursing. Best-effort: if npx -// can't be spawned (or auto-update is off / opted out) we silently fall through -// to the installed CLI. +// freshly downloaded CLI directly instead of recursing, and a marker path so it +// can signal that it started. Best-effort: if the update fails before our CLI +// runs — npx not spawnable, or npx/npm itself erroring (e.g. "Lock compromised" +// on sandboxed filesystems) — we fall back to the installed CLI instead of +// failing `hexclave dev`. export async function maybeReexecToLatest(opts: { forwardArgs: string[] }): Promise { + // If npx already re-exec'd us to the latest CLI, record that we started so the + // parent can tell a real command failure apart from an npx/install failure. + signalReexecStartedIfChild(process.env); + + let marker: { dir: string, file: string } | null = null; try { const decision = decideReexec({ env: process.env, @@ -148,13 +229,25 @@ export async function maybeReexecToLatest(opts: { forwardArgs: string[] }): Prom }); if (!decision.reexec) return; - const result = await runReexec(decision.invocation); - if (result.exited) { - process.exit(result.code); + marker = createReexecMarker(); + const result = await runReexec(decision.invocation, marker?.file ?? null); + // No marker means we couldn't create one; treat that as "started" so we keep + // the old always-propagate behavior rather than fall back spuriously. + const started = marker == null || existsSync(marker.file); + cleanupReexecMarker(marker); + marker = null; + + const action = decidePostReexec({ result, started }); + if (action.kind === "exit") { + process.exit(action.code); } - logUpdate(`Could not run npx (${result.error}); continuing with the installed CLI.`); + logUpdate(`Auto-update skipped: ${action.detail}; continuing with the installed CLI.`); } catch { // Fail open: any unexpected error must not block the installed CLI from // running. + } finally { + // Covers the early-return / throw / opt-out paths; the success path already + // cleaned up before process.exit (which would skip finally). + cleanupReexecMarker(marker); } }