diff --git a/packages/cli/src/commands/dev.ts b/packages/cli/src/commands/dev.ts index 50630b72b..26ac5ee08 100644 --- a/packages/cli/src/commands/dev.ts +++ b/packages/cli/src/commands/dev.ts @@ -9,7 +9,7 @@ import { resolveConfigFilePathOption } from "../lib/config-file-path.js"; import { devEnvStatePath, ensureLocalDashboardSecret, readDevEnvState, recordLocalDashboardProcess } from "../lib/dev-env-state.js"; import { CliError } from "../lib/errors.js"; import { cliVersion } from "../lib/own-package.js"; -import { maybeReexecToLatest } from "../lib/self-update.js"; +import { maybeReexecToLatest, REEXEC_MARKER_ENV } from "../lib/self-update.js"; type ChildCommand = { command: string, @@ -788,14 +788,18 @@ child.on("error", (error) => { `; function runChildProcess(command: ChildCommand, env: NodeJS.ProcessEnv): Promise { + // Scrub the internal re-exec handshake marker so it never leaks into the user's + // command. Done here (not in the wrapper script) to cover both spawn paths. + const childEnv = { ...env }; + delete childEnv[REEXEC_MARKER_ENV]; return new Promise((resolvePromise, reject) => { const child = process.platform === "win32" - ? spawn(command.command, command.args, { stdio: "inherit", env }) + ? spawn(command.command, command.args, { stdio: "inherit", env: childEnv }) : spawn(process.execPath, ["-e", APP_COMMAND_WRAPPER_SCRIPT], { detached: true, stdio: "inherit", env: { - ...env, + ...childEnv, [APP_COMMAND_WRAPPER_PARENT_PID_ENV_VAR]: String(process.pid), [APP_COMMAND_WRAPPER_COMMAND_ENV_VAR]: command.command, [APP_COMMAND_WRAPPER_ARGS_ENV_VAR]: JSON.stringify(command.args), diff --git a/packages/cli/src/lib/self-update.test.ts b/packages/cli/src/lib/self-update.test.ts index 2b35eb5f3..84394a6fc 100644 --- a/packages/cli/src/lib/self-update.test.ts +++ b/packages/cli/src/lib/self-update.test.ts @@ -1,11 +1,28 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs"; +import { tmpdir } from "os"; +import { join } from "path"; +import { EventEmitter } from "events"; +import * as childProcess from "child_process"; +import * as ownPackage from "./own-package.js"; + +// `spawn` is a non-configurable built-in export, so it can't be vi.spyOn'd; +// replace it with a vi.fn. Everything else in child_process stays real. +vi.mock("child_process", async (importOriginal) => { + const actual = await importOriginal(); + return { ...actual, spawn: vi.fn() }; +}); + import { buildNpxInvocation, + decidePostReexec, decideReexec, DISABLE_AUTO_UPDATE_ENV, isEnvFlagEnabled, maybeReexecToLatest, + REEXEC_MARKER_ENV, shouldAutoUpdate, + signalReexecStartedIfChild, SKIP_AUTO_UPDATE_ENV, } from "./self-update.js"; import type { OwnPackage } from "./own-package.js"; @@ -179,3 +196,159 @@ describe("maybeReexecToLatest", () => { await expect(maybeReexecToLatest({ forwardArgs: ["dev"] })).resolves.toBeUndefined(); }); }); + +describe("decidePostReexec", () => { + it("propagates the exit code when the CLI ran to completion (code 0)", () => { + expect(decidePostReexec({ result: { exited: true, code: 0, signal: null }, started: true })) + .toEqual({ kind: "exit", code: 0 }); + }); + + it("propagates a nonzero exit code when the CLI actually started (real command failure)", () => { + // The re-exec'd CLI ran (marker present) and the wrapped command failed — we + // must surface that failure, not silently re-run it. + expect(decidePostReexec({ result: { exited: true, code: 1, signal: null }, started: true })) + .toEqual({ kind: "exit", code: 1 }); + }); + + it("falls back when npx exits nonzero before the CLI starts (e.g. Lock compromised)", () => { + // npm errored during install/lock; our CLI never ran. Don't take down dev — + // run the installed CLI instead. + const action = decidePostReexec({ result: { exited: true, code: 1, signal: null }, started: false }); + expect(action.kind).toBe("fallback"); + }); + + it("propagates (does not fall back) when npx was killed by a signal before the CLI started", () => { + // e.g. the user pressed Ctrl-C during the download. They want to abort, not + // get a fresh dev session launched on the installed CLI. + expect(decidePostReexec({ result: { exited: true, code: 130, signal: "SIGINT" }, started: false })) + .toEqual({ kind: "exit", code: 130 }); + }); + + it("falls back when npx cannot be spawned at all", () => { + const action = decidePostReexec({ result: { exited: false, error: "spawn npx ENOENT" }, started: false }); + expect(action.kind).toBe("fallback"); + }); +}); + +describe("signalReexecStartedIfChild", () => { + let dir: string; + + beforeEach(() => { + dir = mkdtempSync(join(tmpdir(), "hexclave-reexec-test-")); + }); + + afterEach(() => { + rmSync(dir, { recursive: true, force: true }); + }); + + it("writes the marker file when the marker env var is set (we are the child)", () => { + const marker = join(dir, "started"); + signalReexecStartedIfChild({ [REEXEC_MARKER_ENV]: marker }); + expect(existsSync(marker)).toBe(true); + expect(readFileSync(marker, "utf8")).toBe("1"); + }); + + it("does nothing when no marker env var is set (normal top-level run)", () => { + const marker = join(dir, "started"); + signalReexecStartedIfChild({}); + expect(existsSync(marker)).toBe(false); + }); + + it("does not throw when the marker path is unwritable (best-effort)", () => { + const marker = join(dir, "nonexistent-subdir", "started"); + expect(() => signalReexecStartedIfChild({ [REEXEC_MARKER_ENV]: marker })).not.toThrow(); + expect(existsSync(marker)).toBe(false); + }); +}); + +// End-to-end wiring of maybeReexecToLatest (marker -> spawn -> existence check -> +// decidePostReexec). The decision functions are pure-tested above; these guard +// against the glue regressing. +describe("maybeReexecToLatest fallback wiring", () => { + const managedKeys = [SKIP_AUTO_UPDATE_ENV, DISABLE_AUTO_UPDATE_ENV, REEXEC_MARKER_ENV]; + const savedEnv: Record = {}; + + beforeEach(() => { + for (const key of managedKeys) { + savedEnv[key] = process.env[key]; + delete process.env[key]; + } + vi.spyOn(ownPackage, "getOwnPackage").mockReturnValue({ + name: "@hexclave/cli", + version: "1.0.0", + binName: "hexclave", + }); + }); + + afterEach(() => { + for (const key of managedKeys) { + if (savedEnv[key] == null) delete process.env[key]; + else process.env[key] = savedEnv[key]; + } + vi.restoreAllMocks(); + }); + + // Fake npx child. `writeMarker` simulates whether the re-exec'd CLI touched the + // marker (as signalReexecStartedIfChild would) before the process closes. + function mockNpxChild(opts: { writeMarker: boolean }): EventEmitter & { pid: number, kill: () => void } { + const child = Object.assign(new EventEmitter(), { pid: 4242, kill: () => {} }); + vi.mocked(childProcess.spawn).mockImplementation((( + _command: string, + _args: readonly string[], + spawnOpts: { env?: NodeJS.ProcessEnv }, + ) => { + if (opts.writeMarker) { + const markerFile = spawnOpts.env?.[REEXEC_MARKER_ENV]; + if (markerFile != null) writeFileSync(markerFile, "1"); + } + return child; + }) as unknown as typeof childProcess.spawn); + return child; + } + + it("falls back (never calls process.exit) when npx exits nonzero without the CLI starting", async () => { + const exitSpy = vi.spyOn(process, "exit").mockImplementation(((() => { + throw new Error("process.exit should not have been called"); + }) as never)); + const child = mockNpxChild({ writeMarker: false }); + + const promise = maybeReexecToLatest({ forwardArgs: ["dev"] }); + child.emit("close", 1, null); + + await expect(promise).resolves.toBeUndefined(); + expect(exitSpy).not.toHaveBeenCalled(); + }); + + it("propagates the exit code (calls process.exit) when the CLI started then failed", async () => { + const exitSpy = vi.spyOn(process, "exit").mockImplementation(((() => { + // Throw so we don't actually exit the test runner; the SUT's catch swallows it. + throw new Error("__exit__"); + }) as never)); + const child = mockNpxChild({ writeMarker: true }); + + const promise = maybeReexecToLatest({ forwardArgs: ["dev"] }); + child.emit("close", 1, null); + + await promise; + expect(exitSpy).toHaveBeenCalledWith(1); + }); + + it("propagates a nonzero, non-NaN code when killed by a signal missing from os.constants", async () => { + const exitSpy = vi.spyOn(process, "exit").mockImplementation(((() => { + throw new Error("__exit__"); + }) as never)); + const child = mockNpxChild({ writeMarker: false }); + + const promise = maybeReexecToLatest({ forwardArgs: ["dev"] }); + // SIGSTKFLT is absent from os.constants.signals on macOS (present on Linux). + // Either way the abort must surface as a real nonzero code, never NaN. + child.emit("close", null, "SIGSTKFLT"); + + await promise; + expect(exitSpy).toHaveBeenCalledTimes(1); + const code = exitSpy.mock.calls[0][0]; + expect(typeof code).toBe("number"); + expect(Number.isNaN(code as number)).toBe(false); + expect(code as number).toBeGreaterThan(0); + }); +}); diff --git a/packages/cli/src/lib/self-update.ts b/packages/cli/src/lib/self-update.ts index 16655014c..c1ad8fb3e 100644 --- a/packages/cli/src/lib/self-update.ts +++ b/packages/cli/src/lib/self-update.ts @@ -1,4 +1,7 @@ import { spawn } from "child_process"; +import { existsSync, mkdtempSync, rmSync, writeFileSync } from "fs"; +import { constants as osConstants, tmpdir } from "os"; +import { join } from "path"; import { forwardSignals } from "./child-process.js"; import { getOwnPackage, type OwnPackage } from "./own-package.js"; @@ -7,6 +10,10 @@ import { getOwnPackage, type OwnPackage } from "./own-package.js"; export const SKIP_AUTO_UPDATE_ENV = "STACK_CLI_SKIP_AUTO_UPDATE"; // User-facing opt-out. Set to a truthy value to never auto-update. export const DISABLE_AUTO_UPDATE_ENV = "STACK_CLI_NO_AUTO_UPDATE"; +// Marker file the re-exec'd child touches on startup; its presence lets the +// parent tell a real command failure from an npx/install failure where our CLI +// never ran. Set by the parent on the child's env. +export const REEXEC_MARKER_ENV = "HEXCLAVE_CLI_REEXEC_MARKER"; const LOG_PREFIX = "[Hexclave] "; @@ -96,8 +103,10 @@ export function decideReexec(opts: { }; } -type ReexecResult = - | { exited: true, code: number } +export type ReexecResult = + // `signal` is set when the child was killed by one (e.g. a forwarded Ctrl-C), + // distinguishing an abort from an npx failure (a bare exit code can't). + | { exited: true, code: number, signal: NodeJS.Signals | null } | { exited: false, error: string }; // Quote an argument for the single cmd.exe command line that Node builds when @@ -110,19 +119,31 @@ function quoteShellArg(arg: string): string { return `"${arg.replace(/"/g, '\\"')}"`; } -function runReexec(invocation: NpxInvocation): Promise { +function runReexec(invocation: NpxInvocation, markerFile: string | null): Promise { return new Promise((resolvePromise) => { const args = invocation.shell ? invocation.args.map(quoteShellArg) : invocation.args; + const env: NodeJS.ProcessEnv = { ...process.env, [SKIP_AUTO_UPDATE_ENV]: "1" }; + if (markerFile != null) env[REEXEC_MARKER_ENV] = markerFile; const child = spawn(invocation.command, args, { stdio: "inherit", - env: { ...process.env, [SKIP_AUTO_UPDATE_ENV]: "1" }, + env, shell: invocation.shell, }); const cleanup = forwardSignals(child); - child.on("close", (code) => { + child.on("close", (code, signal) => { cleanup(); - resolvePromise({ exited: true, code: code ?? 1 }); + if (signal != null) { + // Report the conventional 128 + signal-number exit code. The lookup can + // be undefined at runtime (not every signal is in os.constants.signals on + // every platform); 128 + undefined is NaN and process.exit(NaN) coerces + // to 0, masking the abort — so fall back to a generic nonzero code. + const signalNumber = osConstants.signals[signal] as number | undefined; + const code = signalNumber != null ? 128 + signalNumber : 1; + resolvePromise({ exited: true, code, signal }); + return; + } + resolvePromise({ exited: true, code: code ?? 1, signal: null }); }); // npx missing / not spawnable: report so the caller can fall back to the // installed CLI instead of failing the whole `hexclave dev`. @@ -133,13 +154,76 @@ function runReexec(invocation: NpxInvocation): Promise { }); } -// Re-runs the requested command through `npx @latest` so the user always -// gets the latest CLI + dashboard without reinstalling, then exits with the -// child's code. The re-exec'd child carries SKIP_AUTO_UPDATE_ENV so it runs the -// freshly downloaded CLI directly instead of recursing. Best-effort: if npx -// can't be spawned (or auto-update is off / opted out) we silently fall through -// to the installed CLI. +// What the parent does once the re-exec'd npx process is done: `exit` propagates +// the child's code (our CLI ran), `fallback` runs the installed CLI inline (the +// update failed before our CLI ran). Pure so it can be unit-tested. +export type PostReexecAction = + | { kind: "exit", code: number } + | { kind: "fallback", detail: string }; + +// `started` = whether the re-exec'd CLI's startup marker appeared. A nonzero +// exit with it started is a real failure (propagate); without it — or npx not +// spawnable — the update failed before our CLI ran, so we fall back. +export function decidePostReexec(opts: { result: ReexecResult, started: boolean }): PostReexecAction { + const { result, started } = opts; + if (!result.exited) { + return { kind: "fallback", detail: `could not run npx (${result.error})` }; + } + // Killed by a forwarded signal (e.g. Ctrl-C): the user wants to abort, not + // relaunch dev on the installed CLI. Propagate instead of falling back. + if (result.signal != null) { + return { kind: "exit", code: result.code }; + } + if (result.code !== 0 && !started) { + return { kind: "fallback", detail: `npx exited with code ${result.code} before the CLI started` }; + } + return { kind: "exit", code: result.code }; +} + +// Create a unique marker dir; the child writes a file inside it on startup. +// Returns null if the temp dir can't be created, in which case the caller treats +// every exit as "started" (preserving the old always-propagate behavior). +function createReexecMarker(): { dir: string, file: string } | null { + try { + const dir = mkdtempSync(join(tmpdir(), "hexclave-reexec-")); + return { dir, file: join(dir, "started") }; + } catch { + return null; + } +} + +function cleanupReexecMarker(marker: { dir: string } | null): void { + if (marker == null) return; + try { + rmSync(marker.dir, { recursive: true, force: true }); + } catch { + // best-effort temp cleanup + } +} + +// When we're the npx-spawned child (the parent set the marker env), touch the +// marker so the parent knows the latest CLI started. No-op at top level. +export function signalReexecStartedIfChild(env: NodeJS.ProcessEnv): void { + const markerFile = env[REEXEC_MARKER_ENV]; + if (markerFile == null || markerFile === "") return; + try { + writeFileSync(markerFile, "1"); + } catch { + // best-effort; if the write fails the parent just propagates as before. + } +} + +// Re-runs the command through `npx @latest` so the user always gets the +// latest CLI + dashboard without reinstalling, then exits with the child's code. +// The child carries SKIP_AUTO_UPDATE_ENV (run directly, don't recurse) and a +// marker path to signal it started. Best-effort: if the update fails before our +// CLI runs we fall back to the installed CLI instead of failing `hexclave dev`. export async function maybeReexecToLatest(opts: { forwardArgs: string[] }): Promise { + // If npx re-exec'd us to the latest CLI, record that we started so the parent + // can tell a real command failure from an npx/install failure. + signalReexecStartedIfChild(process.env); + + let marker: { dir: string, file: string } | null = null; try { const decision = decideReexec({ env: process.env, @@ -148,13 +232,25 @@ export async function maybeReexecToLatest(opts: { forwardArgs: string[] }): Prom }); if (!decision.reexec) return; - const result = await runReexec(decision.invocation); - if (result.exited) { - process.exit(result.code); + marker = createReexecMarker(); + const result = await runReexec(decision.invocation, marker?.file ?? null); + // No marker (couldn't create one): treat as "started" to keep the old + // always-propagate behavior rather than fall back spuriously. + const started = marker == null || existsSync(marker.file); + cleanupReexecMarker(marker); + marker = null; + + const action = decidePostReexec({ result, started }); + if (action.kind === "exit") { + process.exit(action.code); } - logUpdate(`Could not run npx (${result.error}); continuing with the installed CLI.`); + logUpdate(`Auto-update skipped: ${action.detail}; continuing with the installed CLI.`); } catch { // Fail open: any unexpected error must not block the installed CLI from // running. + } finally { + // Covers early-return/throw/opt-out paths; the success path already cleaned + // up before process.exit (which skips finally). + cleanupReexecMarker(marker); } }