fix(cli): don't fall back when npx is killed by a signal; harden tests

Review follow-ups:
- A Ctrl-C forwarded to the npx child surfaced as exit code null (-> 1), which
  the fallback logic misread as an npx failure and relaunched dev on the
  installed CLI instead of aborting. Capture the terminating signal in
  runReexec and propagate (128+signum) instead of falling back.
- Scrub STACK_CLI_REEXEC_MARKER from the wrapped user command's env so the
  internal handshake var doesn't leak into arbitrary child processes.
- Add wiring tests (marker present -> propagate; absent -> fall back without
  process.exit) and a signal-abort case, so a regression in the glue between
  createReexecMarker/spawn/decidePostReexec is caught.
This commit is contained in:
Bilal Godil 2026-06-16 14:46:58 -07:00
parent a2590be68e
commit a9c22da3bd
3 changed files with 123 additions and 8 deletions

View File

@ -693,6 +693,8 @@ const childEnv = { ...process.env };
delete childEnv.HEXCLAVE_DEV_APP_COMMAND_PARENT_PID;
delete childEnv.HEXCLAVE_DEV_APP_COMMAND;
delete childEnv.HEXCLAVE_DEV_APP_COMMAND_ARGS_JSON;
// Internal to the npx auto-update handshake; never meant for the user's command.
delete childEnv.STACK_CLI_REEXEC_MARKER;
const child = spawn(command, args, {
env: childEnv,

View File

@ -1,7 +1,19 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { existsSync, mkdtempSync, readFileSync, rmSync } from "fs";
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "fs";
import { tmpdir } from "os";
import { join } from "path";
import { EventEmitter } from "events";
import * as childProcess from "child_process";
import * as ownPackage from "./own-package.js";
// `spawn` is a non-configurable built-in export, so it can't be vi.spyOn'd;
// replace it with a vi.fn the wiring tests drive per-case. Everything else in
// child_process stays real.
vi.mock("child_process", async (importOriginal) => {
const actual = await importOriginal<typeof import("child_process")>();
return { ...actual, spawn: vi.fn() };
});
import {
buildNpxInvocation,
decidePostReexec,
@ -188,24 +200,31 @@ describe("maybeReexecToLatest", () => {
describe("decidePostReexec", () => {
it("propagates the exit code when the CLI ran to completion (code 0)", () => {
expect(decidePostReexec({ result: { exited: true, code: 0 }, started: true }))
expect(decidePostReexec({ result: { exited: true, code: 0, signal: null }, started: true }))
.toEqual({ kind: "exit", code: 0 });
});
it("propagates a nonzero exit code when the CLI actually started (real command failure)", () => {
// The re-exec'd CLI ran (marker present) and the wrapped command failed — we
// must surface that failure, not silently re-run it.
expect(decidePostReexec({ result: { exited: true, code: 1 }, started: true }))
expect(decidePostReexec({ result: { exited: true, code: 1, signal: null }, started: true }))
.toEqual({ kind: "exit", code: 1 });
});
it("falls back when npx exits nonzero before the CLI starts (e.g. Lock compromised)", () => {
// npm errored during install/lock; our CLI never ran. Don't take down dev —
// run the installed CLI instead.
const action = decidePostReexec({ result: { exited: true, code: 1 }, started: false });
const action = decidePostReexec({ result: { exited: true, code: 1, signal: null }, started: false });
expect(action.kind).toBe("fallback");
});
it("propagates (does not fall back) when npx was killed by a signal before the CLI started", () => {
// e.g. the user pressed Ctrl-C during the download. They want to abort, not
// get a fresh dev session launched on the installed CLI.
expect(decidePostReexec({ result: { exited: true, code: 130, signal: "SIGINT" }, started: false }))
.toEqual({ kind: "exit", code: 130 });
});
it("falls back when npx cannot be spawned at all", () => {
const action = decidePostReexec({ result: { exited: false, error: "spawn npx ENOENT" }, started: false });
expect(action.kind).toBe("fallback");
@ -242,3 +261,79 @@ describe("signalReexecStartedIfChild", () => {
expect(existsSync(marker)).toBe(false);
});
});
// End-to-end wiring of maybeReexecToLatest: createReexecMarker -> spawn -> the
// marker existence check -> decidePostReexec. The decision functions are pure-
// tested above; these guard against the glue regressing (e.g. inverting the
// started default, or propagating instead of falling back).
describe("maybeReexecToLatest fallback wiring", () => {
const managedKeys = [SKIP_AUTO_UPDATE_ENV, DISABLE_AUTO_UPDATE_ENV, REEXEC_MARKER_ENV];
const savedEnv: Record<string, string | undefined> = {};
beforeEach(() => {
for (const key of managedKeys) {
savedEnv[key] = process.env[key];
delete process.env[key];
}
vi.spyOn(ownPackage, "getOwnPackage").mockReturnValue({
name: "@hexclave/cli",
version: "1.0.0",
binName: "hexclave",
});
});
afterEach(() => {
for (const key of managedKeys) {
if (savedEnv[key] == null) delete process.env[key];
else process.env[key] = savedEnv[key];
}
vi.restoreAllMocks();
});
// Fake npx child. `writeMarker` simulates whether the re-exec'd CLI got far
// enough to touch the marker (as signalReexecStartedIfChild would) before the
// process closes.
function mockNpxChild(opts: { writeMarker: boolean }): EventEmitter & { pid: number, kill: () => void } {
const child = Object.assign(new EventEmitter(), { pid: 4242, kill: () => {} });
vi.mocked(childProcess.spawn).mockImplementation(((
_command: string,
_args: readonly string[],
spawnOpts: { env?: NodeJS.ProcessEnv },
) => {
if (opts.writeMarker) {
const markerFile = spawnOpts.env?.[REEXEC_MARKER_ENV];
if (markerFile != null) writeFileSync(markerFile, "1");
}
return child;
}) as unknown as typeof childProcess.spawn);
return child;
}
it("falls back (never calls process.exit) when npx exits nonzero without the CLI starting", async () => {
const exitSpy = vi.spyOn(process, "exit").mockImplementation(((() => {
throw new Error("process.exit should not have been called");
}) as never));
const child = mockNpxChild({ writeMarker: false });
const promise = maybeReexecToLatest({ forwardArgs: ["dev"] });
child.emit("close", 1, null);
await expect(promise).resolves.toBeUndefined();
expect(exitSpy).not.toHaveBeenCalled();
});
it("propagates the exit code (calls process.exit) when the CLI started then failed", async () => {
const exitSpy = vi.spyOn(process, "exit").mockImplementation(((() => {
// Throw so we don't actually exit the test runner; the SUT's catch swallows
// it and the promise resolves, which is fine — we assert on the call.
throw new Error("__exit__");
}) as never));
const child = mockNpxChild({ writeMarker: true });
const promise = maybeReexecToLatest({ forwardArgs: ["dev"] });
child.emit("close", 1, null);
await promise;
expect(exitSpy).toHaveBeenCalledWith(1);
});
});

View File

@ -1,6 +1,6 @@
import { spawn } from "child_process";
import { existsSync, mkdtempSync, rmSync, writeFileSync } from "fs";
import { tmpdir } from "os";
import { constants as osConstants, tmpdir } from "os";
import { join } from "path";
import { forwardSignals } from "./child-process.js";
import { getOwnPackage, type OwnPackage } from "./own-package.js";
@ -106,7 +106,11 @@ export function decideReexec(opts: {
}
export type ReexecResult =
| { exited: true, code: number }
// `signal` is the terminating signal when the child was killed by one (e.g. a
// Ctrl-C we forwarded), else null. It disambiguates "user/system aborted us"
// from "npx failed", which a bare exit code can't (a signal death surfaces as
// code null -> would otherwise look like a generic failure).
| { exited: true, code: number, signal: NodeJS.Signals | null }
| { exited: false, error: string };
// Quote an argument for the single cmd.exe command line that Node builds when
@ -131,9 +135,17 @@ function runReexec(invocation: NpxInvocation, markerFile: string | null): Promis
});
const cleanup = forwardSignals(child);
child.on("close", (code) => {
child.on("close", (code, signal) => {
cleanup();
resolvePromise({ exited: true, code: code ?? 1 });
if (signal != null) {
// Killed by a signal we forwarded (e.g. Ctrl-C). Report it with the
// conventional 128 + signal-number exit code so the caller can both
// recognize the abort and propagate a sensible code.
const signalNumber = osConstants.signals[signal];
resolvePromise({ exited: true, code: 128 + signalNumber, signal });
return;
}
resolvePromise({ exited: true, code: code ?? 1, signal: null });
});
// npx missing / not spawnable: report so the caller can fall back to the
// installed CLI instead of failing the whole `hexclave dev`.
@ -164,6 +176,12 @@ export function decidePostReexec(opts: { result: ReexecResult, started: boolean
if (!result.exited) {
return { kind: "fallback", detail: `could not run npx (${result.error})` };
}
// Killed by a forwarded signal (e.g. the user pressed Ctrl-C, possibly mid
// download before our CLI started): they want to abort, not silently relaunch
// dev on the installed CLI. Propagate the termination instead of falling back.
if (result.signal != null) {
return { kind: "exit", code: result.code };
}
if (result.code !== 0 && !started) {
return { kind: "fallback", detail: `npx exited with code ${result.code} before the CLI started` };
}