Merge branch 'dev' into promptless/changelog-oauth-retry-reliability

This commit is contained in:
promptless[bot] 2026-04-20 21:26:15 +00:00
commit c39a0853c0
20 changed files with 2886 additions and 212 deletions

View File

@ -22,8 +22,16 @@ concurrency:
env:
EMULATOR_IMAGE_NAME: stack-local-emulator
# Shell scripts (build-image.sh, run-emulator.sh) read these directly.
EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
# The stack-cli ignores EMULATOR_IMAGE_DIR/RUN_DIR and derives its own paths
# from STACK_EMULATOR_HOME. Point it at the same workspace so `emulator
# start` finds the freshly-built qcow2 from build-image.sh and cold-boots
# it, instead of auto-pulling from a prior release. CI doesn't capture a
# savevm (EMULATOR_CAPTURE_SAVEVM defaults to 0); users capture locally
# on first `stack emulator pull`.
STACK_EMULATOR_HOME: ${{ github.workspace }}/docker/local-emulator/qemu
jobs:
build:
@ -34,15 +42,16 @@ jobs:
fail-fast: false
matrix:
include:
# amd64 runs natively under KVM on ubicloud's amd64 runner.
# Both arches build on ubicloud's amd64 runner. amd64 uses KVM;
# arm64 runs under cross-arch TCG (slow, but only cloud-init
# provisioning has to complete — the boot/verify smoke test below
# is gated to amd64 because TCG can't boot Next.js in any
# reasonable time). Snapshots are NOT published — `stack emulator
# pull` captures one locally on first run, which is the only way
# to guarantee KVM/HVF/TCG + `-cpu max` compatibility on the
# user's machine.
- arch: amd64
runner: ubicloud-standard-8
# arm64 runs under cross-arch TCG on ubicloud's amd64 runner.
# No KVM for arm64 guests on an amd64 host; cortex-a72 + V8
# --jitless together sidestep the SIGTRAPs that cross-arch TCG
# hits on aggressive arm64 JIT code. Smoke test is still skipped
# because the backend can't come up reliably under cross-arch
# TCG within any sane window.
- arch: arm64
runner: ubicloud-standard-8
@ -55,10 +64,60 @@ jobs:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Install QEMU dependencies
# Node/pnpm are needed on both arches: arm64 also runs
# generate-env-development.mjs inside build-image.sh. amd64 additionally
# builds and runs the CLI for the verification steps below.
- uses: pnpm/action-setup@v4
with:
version: 10.23.0
- uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64
# qemu-utils gives us qemu-img; qemu-efi-aarch64 provides the arm64
# UEFI firmware. The actual qemu-system-* binaries come from the
# source build below — Ubuntu 24.04 ships QEMU 8.2 which predates
# the mapped-ram migration capability we rely on.
sudo apt-get install -y qemu-utils qemu-efi-aarch64 socat genisoimage zstd \
ninja-build pkg-config python3-venv \
libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev
# QEMU 10.2.2 is required for the mapped-ram + multifd migration path
# used by the fast-resume snapshot. Cache the compiled prefix so CI
# only pays the ~5-8 min build cost once per runner image.
- name: Restore QEMU 10.2.2 cache
id: qemu-cache
uses: actions/cache@v4
with:
path: /opt/qemu
key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1
- name: Build QEMU 10.2.2 from source
if: steps.qemu-cache.outputs.cache-hit != 'true'
run: |
set -euxo pipefail
curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz
mkdir -p /tmp/qemu-src
tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1
cd /tmp/qemu-src
./configure --prefix=/opt/qemu \
--target-list=x86_64-softmmu,aarch64-softmmu \
--enable-kvm --enable-slirp --enable-tcg \
--disable-docs --disable-gtk --disable-sdl --disable-vnc \
--disable-guest-agent --disable-tools
make -j"$(nproc)"
sudo make install
- name: Put QEMU 10.2.2 on PATH
run: |
echo "/opt/qemu/bin" >> "$GITHUB_PATH"
/opt/qemu/bin/qemu-system-x86_64 --version
/opt/qemu/bin/qemu-system-aarch64 --version
- name: Enable KVM access
run: |
@ -82,41 +141,56 @@ jobs:
- name: Generate emulator env
run: node docker/local-emulator/generate-env-development.mjs
# arm64 runs under cross-arch TCG on an amd64 runner; the backend's
# V8 TurboFan JIT re-triggers the SIGTRAPs we dodge in migrations
# with --no-opt, and even if it didn't, boot is too slow under TCG
# to verify in any sane window. amd64 KVM already exercises the
# service stack; real arm64 hosts have KVM for end-users.
- name: Start emulator and verify
# amd64 runs under KVM on the runner so we can boot the newly-built
# image to verify it works end-to-end before publishing. arm64 runs
# under cross-arch TCG on an amd64 host, which can't reliably boot
# Next.js within any sane window — skipped.
- name: Build stack-cli (for emulator CLI)
if: matrix.arch == 'amd64'
run: |
chmod +x docker/local-emulator/qemu/run-emulator.sh
EMULATOR_ARCH=${{ matrix.arch }} \
EMULATOR_READY_TIMEOUT=3200 \
docker/local-emulator/qemu/run-emulator.sh start
pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
# Turbo's trailing `...` filter builds stack-cli AND its workspace
# deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli
# imports them at runtime from their dist/ outputs.
pnpm exec turbo run build --filter='@stackframe/stack-cli...'
- name: Start emulator and verify
if: matrix.arch == 'amd64'
env:
EMULATOR_ARCH: ${{ matrix.arch }}
EMULATOR_READY_TIMEOUT: 3200
EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
run: node packages/stack-cli/dist/index.js emulator start
- name: Verify services are healthy
if: matrix.arch == 'amd64'
run: |
EMULATOR_ARCH=${{ matrix.arch }} \
docker/local-emulator/qemu/run-emulator.sh status
env:
EMULATOR_ARCH: ${{ matrix.arch }}
EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
run: node packages/stack-cli/dist/index.js emulator status
- name: Stop emulator
if: always() && matrix.arch == 'amd64'
run: |
EMULATOR_ARCH=${{ matrix.arch }} \
docker/local-emulator/qemu/run-emulator.sh stop
env:
EMULATOR_ARCH: ${{ matrix.arch }}
EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
run: node packages/stack-cli/dist/index.js emulator stop
- name: Package image
run: |
BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.qcow2"
cp "$BASE_IMG" "stack-emulator-${{ matrix.arch }}.qcow2"
ls -lh "stack-emulator-${{ matrix.arch }}.qcow2"
- name: Upload image artifact
uses: actions/upload-artifact@v4
with:
name: qemu-emulator-${{ matrix.arch }}
path: stack-emulator-${{ matrix.arch }}.qcow2
if-no-files-found: error
retention-days: 30
compression-level: 0
@ -134,31 +208,80 @@ jobs:
steps:
- uses: actions/checkout@v6
- name: Install QEMU dependencies
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y qemu-system-x86 qemu-utils genisoimage socat
sudo apt-get install -y qemu-utils socat zstd \
ninja-build pkg-config python3-venv \
libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev
- name: Restore QEMU 10.2.2 cache
id: qemu-cache
uses: actions/cache@v4
with:
path: /opt/qemu
key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1
- name: Build QEMU 10.2.2 from source
if: steps.qemu-cache.outputs.cache-hit != 'true'
run: |
set -euxo pipefail
curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz
mkdir -p /tmp/qemu-src
tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1
cd /tmp/qemu-src
./configure --prefix=/opt/qemu \
--target-list=x86_64-softmmu,aarch64-softmmu \
--enable-kvm --enable-slirp --enable-tcg \
--disable-docs --disable-gtk --disable-sdl --disable-vnc \
--disable-guest-agent --disable-tools
make -j"$(nproc)"
sudo make install
- name: Put QEMU 10.2.2 on PATH
run: |
echo "/opt/qemu/bin" >> "$GITHUB_PATH"
/opt/qemu/bin/qemu-system-x86_64 --version
- uses: pnpm/action-setup@v4
with:
version: 10.23.0
- uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
- name: Install stack-cli deps + build
run: |
pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
# Turbo's trailing `...` filter builds stack-cli AND its workspace
# deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli
# imports them at runtime from their dist/ outputs.
pnpm exec turbo run build --filter='@stackframe/stack-cli...'
- name: Download built image
uses: actions/download-artifact@v4
with:
name: qemu-emulator-${{ matrix.arch }}
path: docker/local-emulator/qemu/images/
path: ${{ github.workspace }}/.stack-emulator-images/
- name: Generate emulator env
run: node docker/local-emulator/generate-env-development.mjs
- name: Start emulator from artifact
- name: Place qcow2 into STACK_EMULATOR_HOME layout
run: |
mkdir -p "$STACK_EMULATOR_HOME/images"
cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$STACK_EMULATOR_HOME/images/"
ls -lh "$STACK_EMULATOR_HOME/images/"
# No savevm.zst artifact (users capture locally via `emulator pull`),
# so `emulator start` cold-boots the qcow2. Budget accordingly.
- name: Start emulator via CLI
run: |
chmod +x docker/local-emulator/qemu/run-emulator.sh docker/local-emulator/qemu/common.sh
EMULATOR_ARCH=${{ matrix.arch }} \
EMULATOR_READY_TIMEOUT=600 \
docker/local-emulator/qemu/run-emulator.sh start
node packages/stack-cli/dist/index.js emulator start
- name: Verify services are healthy
run: |
EMULATOR_ARCH=${{ matrix.arch }} \
docker/local-emulator/qemu/run-emulator.sh status
run: node packages/stack-cli/dist/index.js emulator status
- name: Smoke test — backend health
run: curl -sf http://localhost:26701/health?db=1
@ -174,13 +297,11 @@ jobs:
- name: Stop emulator
if: always()
run: |
EMULATOR_ARCH=${{ matrix.arch }} \
docker/local-emulator/qemu/run-emulator.sh stop
run: node packages/stack-cli/dist/index.js emulator stop
- name: Print serial log on failure
if: failure()
run: tail -100 docker/local-emulator/qemu/run/vm/serial.log 2>/dev/null || true
run: tail -100 "$STACK_EMULATOR_HOME/run/vm/serial.log" 2>/dev/null || true
publish:
name: Publish to GitHub Releases
@ -220,8 +341,14 @@ jobs:
### Images
| File | Description |
|------|-------------|
| \`stack-emulator-arm64.qcow2\` | ARM64 emulator image |
| \`stack-emulator-amd64.qcow2\` | AMD64 emulator image |
| \`stack-emulator-arm64.qcow2\` | ARM64 disk image |
| \`stack-emulator-amd64.qcow2\` | AMD64 disk image |
\`emulator pull\` downloads the qcow2 and captures a local fast-start
snapshot (~1-3 min). Subsequent \`emulator start\`s resume in ~3-8 s.
Snapshots are captured locally because QEMU migration state isn't
portable across accelerators (KVM / HVF / TCG) or \`-cpu max\`
feature sets.
### Usage
\`\`\`bash

3
.gitignore vendored
View File

@ -144,3 +144,6 @@ packages/stack/*
!packages/react/package.json
!packages/next/package.json
!packages/stack/package.json
# claude code
.claude/scheduled_tasks.lock

View File

@ -1485,6 +1485,12 @@ async function seedDummySessionActivityEvents(options: SessionActivityEventSeedO
await tx.event.createMany({
data: events,
});
}, {
// Under cross-arch arm64 TCG in the emulator qcow2 build, this batch
// takes ~10s; Prisma's default is 5s. Production (KVM/native) runs it
// in well under 1s, so the looser bound only kicks in when the DB is
// genuinely slow.
timeout: 30_000,
});
if (clickhouseClient && clickhouseRows.length > 0) {

View File

@ -58,8 +58,22 @@ ENV NEXT_PUBLIC_STACK_STRIPE_PUBLISHABLE_KEY=pk_test_mock_publishable_key_for_lo
# Build the backend NextJS app
RUN pnpm turbo run docker-build --filter=@stackframe/backend... --filter=@stackframe/dashboard...
# Build the self-host seed script
RUN cd apps/backend && pnpm build-self-host-migration-script
# Build the self-host seed script.
# tsdown -> rolldown is multi-threaded Rust; under qemu-user (cross-arch
# arm64-on-amd64) its futex emulation occasionally deadlocks and the build
# hangs forever. Bound each attempt and retry to ride out the race.
RUN cd apps/backend && \
attempt=1; \
while :; do \
timeout --kill-after=30s 600s pnpm build-self-host-migration-script && break; \
rc=$?; \
if [ "$attempt" -ge 3 ]; then \
echo "build-self-host-migration-script failed after $attempt attempts (last rc=$rc)" >&2; \
exit "$rc"; \
fi; \
echo "build-self-host-migration-script attempt $attempt failed (rc=$rc); retrying..." >&2; \
attempt=$((attempt + 1)); \
done
# Prune node_modules for runtime: remove dev tools, heavy UI packages,
@ -263,10 +277,11 @@ COPY docker/local-emulator/run-cron-jobs.sh /run-cron-jobs.sh
COPY docker/local-emulator/entrypoint.sh /entrypoint.sh
COPY docker/local-emulator/init-services.sh /init-services.sh
COPY docker/local-emulator/start-app.sh /start-app.sh
COPY docker/local-emulator/rotate-secrets.sh /usr/local/bin/rotate-secrets
COPY docker/local-emulator/clickhouse-config.xml /etc/clickhouse-server/config.xml
COPY docker/local-emulator/clickhouse-users.xml /etc/clickhouse-server/users.xml
COPY docker/server/entrypoint.sh /app-entrypoint.sh
RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh
RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh /usr/local/bin/rotate-secrets
# PostgreSQL: 5432, Redis: 6379, Inbucket: 2500/9001/1100,
# Svix: 8071, ClickHouse: 8123/9009, MinIO: 9090, QStash: 8080

View File

@ -33,6 +33,12 @@ fi
# baked-in mock value from .env.development to be a usable credential against
# a running emulator. Overriding here propagates to both the backend and the
# run-cron-jobs.sh loop via supervisord's inherited environment.
export CRON_SECRET="$(openssl rand -hex 32)"
#
# In snapshot-build mode the VM supplies a deterministic placeholder via the
# --env-file so the baked snapshot doesn't contain a real secret; on resume,
# /usr/local/bin/rotate-secrets swaps in a fresh per-install value.
if [ -z "${CRON_SECRET:-}" ]; then
export CRON_SECRET="$(openssl rand -hex 32)"
fi
exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf

View File

@ -12,9 +12,34 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
DEBIAN_VERSION="${DEBIAN_VERSION:-13}"
DISK_SIZE="${EMULATOR_DISK_SIZE:-12G}"
RAM="${EMULATOR_BUILD_RAM:-4096}"
CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}"
EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}"
# Snapshot-ready qcow2: bake deterministic placeholder secrets (PCK/SSK/SAK/
# CRON_SECRET) into the image so runtime `rotate-secrets` can swap them for
# fresh per-install values on every `emulator start`. Without this, the image
# would ship with random shared secrets — a security regression. Cheap to
# build (no extra wall-clock cost in CI), so it stays on by default.
EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}"
# Capture RAM/device state via QMP at build time, producing a
# `stack-emulator-<arch>.savevm.zst` next to the qcow2. Off by default —
# users capture locally on first `stack emulator pull` (run-emulator.sh
# capture) because migration state isn't portable across accelerators
# (KVM/HVF/TCG) or `-cpu max` feature sets, so a CI-captured snapshot
# couldn't resume reliably on arbitrary user hardware. Implies
# EMULATOR_BUILD_SNAPSHOT=1.
EMULATOR_CAPTURE_SAVEVM="${EMULATOR_CAPTURE_SAVEVM:-0}"
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ] && [ "$EMULATOR_BUILD_SNAPSHOT" != "1" ]; then
echo "EMULATOR_CAPTURE_SAVEVM=1 requires EMULATOR_BUILD_SNAPSHOT=1" >&2
exit 1
fi
# Capture mode pins SMP to a fixed value so the resume QEMU command (which
# uses EMULATOR_CPUS, default 4) can match the captured device topology —
# RAM migration replay requires identical vCPU count.
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
CPUS="${EMULATOR_BUILD_CPUS:-4}"
else
CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
fi
RED='\033[0;31m'
GREEN='\033[0;32m'
@ -47,9 +72,18 @@ check_deps() {
command -v "$qemu_bin" >/dev/null 2>&1 || missing+=("$qemu_bin")
done
for cmd in qemu-img curl docker gzip; do
for cmd in qemu-img curl gzip; do
command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
done
if [ "${SKIP_DOCKER_BUILD:-0}" != "1" ]; then
command -v docker >/dev/null 2>&1 || missing+=("docker")
fi
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
for cmd in socat zstd; do
command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
done
fi
if ! command -v mkisofs >/dev/null 2>&1 && ! command -v genisoimage >/dev/null 2>&1 && ! command -v hdiutil >/dev/null 2>&1; then
missing+=("mkisofs/genisoimage/hdiutil")
@ -231,6 +265,9 @@ persist_provision_logs() {
cp "$provision_log" "$IMAGE_DIR/provision-emulator-${arch}.progress.log" 2>/dev/null || true
}
# qmp_session() and capture_vm_state() live in common.sh; both build-image.sh
# (CI) and run-emulator.sh (stack emulator pull local capture) call them.
build_one() {
local arch="$1"
local base_img="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
@ -245,7 +282,9 @@ build_one() {
local tmp_img="$tmp_dir/disk.qcow2"
local seed_iso="$tmp_dir/seed.iso"
local bundle_iso="$tmp_dir/bundle.iso"
local runtime_iso="$tmp_dir/runtime.iso"
local bundle_dir="$tmp_dir/bundle"
local runtime_cfg_dir="$tmp_dir/runtime"
local serial_log="$tmp_dir/serial.log"
local provision_log="$tmp_dir/provision.log"
local pidfile="$tmp_dir/qemu.pid"
@ -269,16 +308,83 @@ build_one() {
mkdir -p "$bundle_dir"
cp "$bundle_tgz" "$bundle_dir/img.tgz"
cp "$BUILD_ENV_FILE" "$bundle_dir/build.env"
if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
# Guest reads this flag to use deterministic placeholder secrets so that
# runtime rotate-secrets can swap them out per-install.
printf 'STACK_EMULATOR_BUILD_SNAPSHOT=1\n' >> "$bundle_dir/build.env"
fi
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
# Guest reads this flag to start stack.service during provision-build,
# wait for backend+dashboard health, then block forever waiting for the
# host to capture VM state via QMP (stop + migrate + quit).
printf 'STACK_EMULATOR_CAPTURE_SAVEVM=1\n' >> "$bundle_dir/build.env"
fi
# Tell the guest which arch it's being built for so cross-arch (TCG) builds
# can skip the smoke test, which isn't reliable under software emulation.
printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env"
make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir"
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
# render-stack-env (inside the guest) mounts a STACKCFG disk containing
# runtime.env + base.env. At runtime the host-side run-emulator.sh builds
# this ISO; in capture mode stack.service also starts during the build,
# so we must provide the same shape here. Values mirror the defaults the
# runtime would supply — port-prefix 81 and matching host-port numbers
# (unused at build time since nothing is port-forwarded, but
# render-stack-env embeds them into /run/stack-auth/local-emulator.env).
mkdir -p "$runtime_cfg_dir"
{
printf 'STACK_EMULATOR_PORT_PREFIX=81\n'
printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n'
printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n'
printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n'
printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n'
printf 'STACK_EMULATOR_VM_DIR_HOST=\n'
} > "$runtime_cfg_dir/runtime.env"
cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env"
make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir"
fi
: > "$serial_log"
: > "$provision_log"
qemu_base="$(qemu_cmd_prefix_for_arch "$arch")"
log "QEMU command prefix (${arch}): $qemu_base"
local monitor_sock="$tmp_dir/monitor.sock"
local qga_sock="$tmp_dir/qga.sock"
local snapshot_args=()
local runtime_disk_args=()
local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none")
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
# STACKCFG runtime ISO lets stack.service start during the build — same
# disk shape render-stack-env expects at runtime. Placed before netdev
# so its virtio-blk PCI slot precedes virtio-net-pci, matching the
# resume argv order in run-emulator.sh (slots must line up or
# migrate-incoming fails the device-tree check).
runtime_disk_args=(
-drive "file=$runtime_iso,format=raw,if=virtio,readonly=on"
)
# QMP for stop/migrate/quit; virtio-serial + QGA channel so we can exec
# inside the guest post-resume (only needed at runtime but harmless here).
snapshot_args=(
-chardev "socket,id=monitor,path=$monitor_sock,server=on,wait=off"
-mon "chardev=monitor,mode=control"
-chardev "socket,path=$qga_sock,server=on,wait=off,id=qga0"
-device virtio-serial
-device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
# Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
# The integrated pcie.0 bus on q35 / arm64-virt is static — hotplug
# only works through a root port. Must be present at snapshot capture
# so the resumed device tree matches.
-device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
)
# QEMU disallows migration when virtfs is mounted in the guest — virtfs
# has guest-side state (open handles, mount table) that isn't migratable.
# Drop the host fs mount in capture mode; STACK_SERVICES_READY still
# arrives on the serial log so contains_provision_marker can detect it.
virtfs_args=()
fi
# shellcheck disable=SC2086
$qemu_base \
-boot order=c \
@ -287,18 +393,24 @@ build_one() {
-drive "file=$tmp_img,format=qcow2,if=virtio,discard=on,detect-zeroes=unmap" \
-drive "file=$seed_iso,format=raw,if=virtio,readonly=on" \
-drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \
${runtime_disk_args[@]+"${runtime_disk_args[@]}"} \
-netdev user,id=net0 \
-device virtio-net-pci,netdev=net0 \
-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none" \
${virtfs_args[@]+"${virtfs_args[@]}"} \
${snapshot_args[@]+"${snapshot_args[@]}"} \
-serial "file:$serial_log" \
-display none \
-daemonize \
-pidfile "$pidfile"
pid="$(cat "$pidfile")"
local ready_marker="STACK_CLOUD_INIT_DONE"
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
ready_marker="STACK_SERVICES_READY"
fi
elapsed=0
while [ "$elapsed" -lt "$PROVISION_TIMEOUT" ]; do
if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
if contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then
break
fi
@ -312,7 +424,7 @@ build_one() {
if [ "$total_build_lines" -gt "$last_build_lines" ]; then
echo ""
sed -n "$((last_build_lines + 1)),${total_build_lines}p" "$provision_log" 2>/dev/null | while IFS= read -r msg; do
if [ "$msg" = "STACK_CLOUD_INIT_DONE" ]; then
if [ "$msg" = "STACK_CLOUD_INIT_DONE" ] || [ "$msg" = "STACK_SERVICES_READY" ]; then
continue
fi
printf " [%3ds] %s\n" "$elapsed" "$msg"
@ -332,7 +444,7 @@ build_one() {
done
echo ""
if ! contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
if ! contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then
if [ "$guest_failed" = true ]; then
err "Guest provisioning reported failure for emulator (${arch})"
elif [ "$guest_exited" = true ]; then
@ -358,17 +470,69 @@ build_one() {
exit 1
fi
local shutdown_wait=0
while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do
sleep 1
shutdown_wait=$((shutdown_wait + 1))
done
if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
local savevm_file="$IMAGE_DIR/stack-emulator-${arch}.savevm.zst"
local savevm_raw="$tmp_dir/state.raw"
local savevm_tmp="$tmp_dir/state.zst"
if kill -0 "$pid" 2>/dev/null; then
warn "Guest did not power off cleanly; forcing shutdown."
kill "$pid" 2>/dev/null || true
sleep 2
kill -9 "$pid" 2>/dev/null || true
# Capture raw RAM/device state via QEMU's native file: migration; then
# compress on the host side. Avoids any reliance on QEMU spawning a shell
# that has zstd in PATH.
log "Capturing VM state via QMP (${arch})..."
if ! capture_vm_state "$monitor_sock" "$savevm_raw"; then
err "Failed to capture VM state for ${arch}"
if kill -0 "$pid" 2>/dev/null; then
kill "$pid" 2>/dev/null || true
sleep 1
kill -9 "$pid" 2>/dev/null || true
fi
persist_provision_logs "$arch" "$serial_log" "$provision_log"
rm -rf "$tmp_dir"
exit 1
fi
# QEMU exited cleanly via `quit`. Wait briefly to release the pid file.
local shutdown_wait=0
while [ "$shutdown_wait" -lt 30 ] && kill -0 "$pid" 2>/dev/null; do
sleep 1
shutdown_wait=$((shutdown_wait + 1))
done
if kill -0 "$pid" 2>/dev/null; then
warn "QEMU did not exit after quit; forcing."
kill "$pid" 2>/dev/null || true
sleep 2
kill -9 "$pid" 2>/dev/null || true
fi
if [ ! -s "$savevm_raw" ]; then
err "VM state file missing or empty at $savevm_raw"
persist_provision_logs "$arch" "$serial_log" "$provision_log"
rm -rf "$tmp_dir"
exit 1
fi
# zstd -1 trades ~30% larger file for ~40% faster decompression at resume.
# For shipping-and-decompress-once-per-start, that's the right balance.
log "Compressing VM state with zstd..."
zstd -1 -T0 --rm -o "$savevm_tmp" "$savevm_raw"
mv "$savevm_tmp" "$savevm_file"
local savevm_size
savevm_size="$(du -h "$savevm_file" | cut -f1)"
log "Saved VM state: $savevm_file (${savevm_size})"
else
local shutdown_wait=0
while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do
sleep 1
shutdown_wait=$((shutdown_wait + 1))
done
if kill -0 "$pid" 2>/dev/null; then
warn "Guest did not power off cleanly; forcing shutdown."
kill "$pid" 2>/dev/null || true
sleep 2
kill -9 "$pid" 2>/dev/null || true
fi
fi
persist_provision_logs "$arch" "$serial_log" "$provision_log"
@ -389,8 +553,16 @@ BUILD_ENV_FILE="$REPO_ROOT/docker/local-emulator/.env.development"
for arch in "${TARGET_ARCHS[@]}"; do
local_base="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
download_cloud_image "$arch" "$local_base"
build_local_emulator_image "$arch"
prepare_bundle_artifacts "$arch"
if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then
log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle"
if [ ! -f "$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" ]; then
err "Pre-built bundle not found: $IMAGE_DIR/emulator-${arch}-docker-images.tar.gz"
exit 1
fi
else
build_local_emulator_image "$arch"
prepare_bundle_artifacts "$arch"
fi
build_one "$arch"
done

View File

@ -75,12 +75,24 @@ write_files:
# ssk/sak: required by the emulator's own dashboard (StackServerApp
# construction throws without them). Not used by user-app flows; the
# /local-emulator/project route mints separate per-project credentials.
#
# Snapshot-build mode (STACK_EMULATOR_BUILD_SNAPSHOT=1 in /etc/stack-build.env):
# use deterministic placeholder hex strings instead of random values. The
# built image then contains these placeholders; at every `emulator start`
# resume the host generates fresh per-install secrets and
# /usr/local/bin/rotate-secrets (inside the stack container) swaps them in.
umask 077
for key in internal-pck internal-ssk internal-sak; do
if [ ! -s "/var/lib/stack-auth/$key" ]; then
openssl rand -hex 32 > "/var/lib/stack-auth/$key"
fi
done
if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
printf '%s' '00000000000000000000000000000000ffffffffffffffffffffffffffffffff' > /var/lib/stack-auth/internal-pck
printf '%s' '00000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee' > /var/lib/stack-auth/internal-ssk
printf '%s' '00000000000000000000000000000000dddddddddddddddddddddddddddddddd' > /var/lib/stack-auth/internal-sak
else
for key in internal-pck internal-ssk internal-sak; do
if [ ! -s "/var/lib/stack-auth/$key" ]; then
openssl rand -hex 32 > "/var/lib/stack-auth/$key"
fi
done
fi
INTERNAL_PCK="$(cat /var/lib/stack-auth/internal-pck)"
INTERNAL_SSK="$(cat /var/lib/stack-auth/internal-ssk)"
INTERNAL_SAK="$(cat /var/lib/stack-auth/internal-sak)"
@ -92,6 +104,15 @@ write_files:
HOST_SERVICES_HOST=10.0.2.2
P="$STACK_EMULATOR_PORT_PREFIX"
# Snapshot-build mode: ship a deterministic placeholder CRON_SECRET so the
# baked VM contains a known-public value that rotate-secrets swaps out on
# every resume. Outside snapshot-build mode, leave CRON_SECRET unset so
# docker/local-emulator/entrypoint.sh generates a fresh random one.
EMULATOR_CRON_SECRET=""
if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
EMULATOR_CRON_SECRET="00000000000000000000000000000000cccccccccccccccccccccccccccccccc"
fi
{
# Static vars from base config and runtime (e.g. API keys, feature flags)
cat /mnt/stack-runtime/base.env
@ -99,6 +120,9 @@ write_files:
printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$INTERNAL_PCK"
printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$INTERNAL_SSK"
printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$INTERNAL_SAK"
if [ -n "$EMULATOR_CRON_SECRET" ]; then
printf 'CRON_SECRET=%s\n' "$EMULATOR_CRON_SECRET"
fi
# Computed vars — depend on port prefix or deps host
# Host-side ports (for browser URLs — browser runs on host, not in VM)
@ -142,14 +166,45 @@ write_files:
permissions: '0755'
content: |
#!/bin/bash
set -euo pipefail
# Mount the host filesystem at /host. Two modes:
# (no args) — cold-boot: bind /host on itself, make it a shared
# mount point, then mount virtio-9p on top. The
# bind+shared step is what lets the docker bind
# mount (-v /host:/host:rshared) receive later
# propagation events.
# --post-resume — snapshot-resume: /host is already shared (set up
# at build time and preserved across the snapshot,
# plus the docker bind mount has rshared
# propagation). The host has just hot-plugged
# virtio-9p; mount it on /host and the new mount
# propagates into the running container.
set -uo pipefail
mkdir -p /host
# Idempotent: bind /host on itself once so it becomes a mount point
# with its own propagation, then make it shared. mount --make-shared
# requires a mount point, hence the bind first.
if ! mountpoint -q /host; then
if ! mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then
echo "Failed to mount host filesystem at /host" >&2
exit 1
fi
mount --bind /host /host
fi
mount --make-shared /host
if [ "${1:-}" = "--post-resume" ]; then
if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then
exit 0
fi
echo "post-resume 9p mount failed" >&2
exit 1
fi
# Cold boot. In snapshot-build mode the host detaches virtfs (QEMU
# disallows migration while it's mounted), so the 9p mount may not be
# available — tolerate that and fall through to an empty /host.
if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host 2>/dev/null; then
exit 0
fi
echo "host filesystem unavailable; continuing with empty /host" >&2
exit 0
- path: /usr/local/bin/run-stack-container
permissions: '0755'
@ -190,7 +245,7 @@ write_files:
-v stack-clickhouse-data:/data/clickhouse \
-v stack-minio-data:/data/minio \
-v stack-inbucket-data:/data/inbucket \
-v /host:/host \
-v /host:/host:rshared \
stack-local-emulator 2>&1 | tee -a "$host_log"
else
exec docker run \
@ -204,7 +259,7 @@ write_files:
-v stack-clickhouse-data:/data/clickhouse \
-v stack-minio-data:/data/minio \
-v stack-inbucket-data:/data/inbucket \
-v /host:/host \
-v /host:/host:rshared \
stack-local-emulator
fi
@ -522,6 +577,74 @@ write_files:
fstrim -av 2>/dev/null || true
log "slim-docker-image done."
- path: /usr/local/bin/wait-for-stack-ready
permissions: '0755'
content: |
#!/bin/bash
# Poll the stack container's backend + dashboard on the guest's own
# localhost until both respond healthy. Used at snapshot-build time to
# gate "emit STACK_SERVICES_READY" on the app actually being warm.
set -uo pipefail
TIMEOUT="${STACK_READY_TIMEOUT:-600}"
BACKEND_PORT="${STACK_READY_BACKEND_PORT:-8102}"
DASHBOARD_PORT="${STACK_READY_DASHBOARD_PORT:-8101}"
log() { /usr/local/bin/log-provision "wait-for-stack-ready: $*"; }
start=$SECONDS
next_heartbeat=$((start + 30))
log "waiting for backend:$BACKEND_PORT and dashboard:$DASHBOARD_PORT (timeout=${TIMEOUT}s)"
while true; do
backend_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${BACKEND_PORT}/health?db=1" 2>/dev/null || true)
dashboard_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${DASHBOARD_PORT}/handler/sign-in" 2>/dev/null || true)
if [ "$backend_code" = "200" ] && [ "$dashboard_code" = "200" ]; then
log "ready ($((SECONDS - start))s)"
exit 0
fi
if [ "$SECONDS" -ge "$next_heartbeat" ]; then
log "still waiting (backend=$backend_code dashboard=$dashboard_code, $((SECONDS - start))s elapsed)"
next_heartbeat=$((SECONDS + 30))
fi
if [ "$((SECONDS - start))" -ge "$TIMEOUT" ]; then
log "TIMEOUT after $((SECONDS - start))s (backend=$backend_code dashboard=$dashboard_code)"
docker ps -a 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: ps" || true
docker logs --tail 200 stack 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: stack" || true
systemctl status stack.service --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: svc" || true
journalctl -u stack.service --no-pager -n 100 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: jrnl" || true
docker image ls 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: img" || true
exit 1
fi
sleep 2
done
- path: /usr/local/bin/trigger-fast-rotate
permissions: '0755'
content: |
#!/bin/bash
# Called via qemu-guest-agent on every snapshot resume. Reads fresh
# secrets from stdin (key=value lines, written by the host via QGA's
# guest-exec input-data) and execs rotate-secrets inside the stack
# container with those values exported.
set -euo pipefail
tmp="$(mktemp /var/run/stack-fresh-XXXXXX.env)"
cat > "$tmp"
chmod 0600 "$tmp"
# shellcheck disable=SC1090
set -a
source "$tmp"
set +a
rm -f "$tmp"
exec docker exec \
-e STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \
-e STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \
-e STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \
-e CRON_SECRET \
stack /usr/local/bin/rotate-secrets
- path: /etc/systemd/system/stack.service
content: |
[Unit]
@ -591,6 +714,14 @@ write_files:
systemctl disable --now ssh || true
systemctl mask ssh || true
# qemu-guest-agent: used by the host to inject fresh secrets + trigger
# rotate-secrets after a snapshot resume. Must be running INSIDE the VM
# at snapshot capture time — the virtio-serial port's "open" state is
# part of the migrated device state. If QGA wasn't connected at capture,
# the resumed VM's port stays closed and the host can't reach it.
systemctl enable qemu-guest-agent || true
systemctl start qemu-guest-agent || true
log_provision "installing emulator containers"
bash /usr/local/bin/install-emulator-containers
@ -603,6 +734,53 @@ write_files:
log_provision "starting slim-docker-image"
bash /usr/local/bin/slim-docker-image
# Capture mode: bring the stack container up, wait for full
# readiness, emit STACK_SERVICES_READY, then wait indefinitely for the
# host build script to capture VM state over QMP (stop + migrate + quit).
# The VM never shuts itself down in this path — the host tears it down
# once the savevm file has been written.
#
# CI never sets STACK_EMULATOR_CAPTURE_SAVEVM=1 (snapshots aren't
# portable across accelerators, so they're captured locally on first
# `stack emulator pull`). This branch only fires for opt-in local
# builds run with EMULATOR_CAPTURE_SAVEVM=1.
if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_CAPTURE_SAVEVM=1' /etc/stack-build.env 2>/dev/null; then
log_provision "capture mode: starting stack.service"
systemctl start stack.service || true
log_provision "waiting for backend + dashboard to be ready"
if ! /usr/local/bin/wait-for-stack-ready; then
log_provision "ERROR: stack services did not become ready"
exit 1
fi
# Ensure qemu-guest-agent is running so its virtio-serial port stays
# "open" in the snapshot — the host needs that port at runtime to
# trigger rotate-secrets.
log_provision "ensuring qemu-guest-agent is up"
systemctl restart qemu-guest-agent || true
sleep 2
if ! systemctl is-active --quiet qemu-guest-agent; then
log_provision "ERROR: qemu-guest-agent failed to start"
systemctl status qemu-guest-agent --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "qga"
exit 1
fi
log_provision "qemu-guest-agent active"
log_provision "services ready; signalling STACK_SERVICES_READY"
if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then
printf '%s\n' "STACK_SERVICES_READY" >> "$STACK_PROVISION_LOG_FILE"
fi
write_marker_to_consoles "STACK_SERVICES_READY"
sync || true
# Clear the EXIT trap so the cleanup path doesn't mark this as failed
# when the host powers us off via QMP quit.
trap - EXIT
# Block forever; host will issue qmp quit after migrate completes.
while true; do sleep 3600; done
fi
log_provision "build pipeline complete"
if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then
printf '%s\n' "STACK_CLOUD_INIT_DONE" >> "$STACK_PROVISION_LOG_FILE"

View File

@ -68,3 +68,142 @@ make_iso_from_dir() {
exit 1
fi
}
# Send one or more QMP commands over the monitor socket. Stdin is a stream of
# JSON objects; qmp_capabilities is always sent first to exit negotiation mode.
# Keep stdin open briefly after writing so socat doesn't close before QEMU
# responds — QMP replies in milliseconds so 0.5s is plenty.
#
# Callers: build-image.sh capture flow, run-emulator.sh cmd_capture.
qmp_session() {
local sock="$1"
local payload
payload="$(cat)"
( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}"
}
# Drive the snapshot capture over QMP:
# 1. qmp_capabilities — exit negotiation mode.
# 2. stop — pause the VM so no more disk writes happen.
# 3. migrate-set-capabilities — enable mapped-ram + multifd for fast resume.
# 4. migrate to file:<path> — streams RAM/device state out.
# 5. Poll query-migrate until status=completed (or failed).
# 6. quit — terminate QEMU cleanly.
#
# Depends on log/err/warn being defined by the sourcing script.
capture_vm_state() {
local sock="$1"
local guest_path="$2"
if [ ! -S "$sock" ]; then
err "QMP monitor socket missing: $sock"
return 1
fi
log " QMP: stopping VM..."
{
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' '{"execute":"stop"}'
} | qmp_session "$sock" >/dev/null || {
err "QMP stop failed"
return 1
}
log " QMP: enabling mapped-ram + multifd for fast resume..."
# mapped-ram: writes each RAM page to a fixed offset in the output file
# (vs the legacy streamed format). This lets the target QEMU mmap the file
# and fault pages lazily — and combined with multifd, load RAM in parallel.
# multifd-channels=4 matches our pinned SMP so the channels don't starve
# each other on the target's 4 vCPUs.
local caps_cmd params_cmd
caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
local setup_resp
setup_resp=$({
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' "$caps_cmd"
printf '%s\n' "$params_cmd"
} | qmp_session "$sock") || {
err "QMP capabilities setup failed"
return 1
}
if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then
err "QMP capabilities returned error: $setup_resp"
return 1
fi
log " QMP: migrating RAM state to ${guest_path}..."
# Use file: migration (native QEMU) instead of exec: to avoid relying on a
# spawned shell finding zstd in PATH. Compressed as a separate host step
# after migrate completes.
local migrate_cmd
migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path")
local migrate_resp
migrate_resp=$({
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' "$migrate_cmd"
} | qmp_session "$sock") || {
err "QMP migrate failed"
return 1
}
if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then
err "QMP migrate returned error: $migrate_resp"
return 1
fi
# Poll migration status. Migration runs in the background after the
# migrate command returns; we watch for "completed" or "failed".
local migrate_timeout=600
local waited=0
local last_heartbeat=0
while [ "$waited" -lt "$migrate_timeout" ]; do
local status_line status
status_line=$({
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' '{"execute":"query-migrate"}'
} | qmp_session "$sock" 2>/dev/null || true)
status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')"
case "$status" in
completed)
log " QMP: migrate completed (${waited}s)"
break
;;
failed|cancelled)
err " QMP: migrate ended with status=$status"
err " QMP response: $status_line"
return 1
;;
active|setup|device|"")
# still running
if [ "$((waited - last_heartbeat))" -ge 30 ]; then
local transferred
transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
log " QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})"
last_heartbeat=$waited
fi
;;
*)
log " QMP: migrate status=$status (${waited}s)"
;;
esac
sleep 2
waited=$((waited + 2))
done
if [ "$waited" -ge "$migrate_timeout" ]; then
err "QMP migrate timed out after ${migrate_timeout}s"
err "Last query-migrate response: $({
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' '{"execute":"query-migrate"}'
} | qmp_session "$sock" 2>/dev/null || true)"
return 1
fi
log " QMP: quitting VM..."
{
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' '{"execute":"quit"}'
} | qmp_session "$sock" >/dev/null || true
return 0
}

View File

@ -12,6 +12,22 @@ VM_RAM="${EMULATOR_RAM:-4096}"
VM_CPUS="${EMULATOR_CPUS:-4}"
PORT_PREFIX="${PORT_PREFIX:-${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}}"
READY_TIMEOUT="${EMULATOR_READY_TIMEOUT:-240}"
# Shorter timeout when resuming from a snapshot: services are already running,
# we only need to wait for rotate-secrets + Node restart (~3-10s).
SNAPSHOT_READY_TIMEOUT="${EMULATOR_SNAPSHOT_READY_TIMEOUT:-45}"
# Set to 1 to force a cold boot and ignore any shipped savevm file.
EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}"
# Skip the post-resume secret rotation. Keeps the baked placeholder secrets
# in place — acceptable for tests and CI that don't reach the emulator over
# a shared network. Shaves ~2-3s off `emulator start`.
EMULATOR_NO_ROTATION="${EMULATOR_NO_ROTATION:-0}"
# Internal: set to 1 by cmd_capture to build QEMU with the snapshot-compatible
# device layout (phantom ISOs, no virtfs, pcie-root-port, pinned 4096MB/4CPU)
# without the `-incoming defer` that resume mode adds. The captured snapshot
# must be byte-compatible with what the resume path will later feed to QEMU.
EMULATOR_CAPTURING_SNAPSHOT="${EMULATOR_CAPTURING_SNAPSHOT:-0}"
# Force re-capture even if a .savevm.zst is already present.
EMULATOR_FORCE_CAPTURE="${EMULATOR_FORCE_CAPTURE:-0}"
# Fixed host-side ports for the QEMU emulator (267xx range).
# Only user-facing services are exposed; internal deps stay inside the VM.
@ -62,10 +78,67 @@ image_path() {
echo "$IMAGE_DIR/stack-emulator-$ARCH.qcow2"
}
savevm_path() {
echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.zst"
}
# Cached, decompressed mapped-ram file. Created on first resume from the .zst
# and reused on subsequent resumes — mapped-ram format requires a seekable
# file, so we can't stream through zstd and use multifd at the same time.
savevm_raw_path() {
echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.raw"
}
runtime_iso_path() {
echo "$VM_DIR/runtime-config.iso"
}
snapshot_available() {
[ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ "$EMULATOR_CAPTURING_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
}
# True when QEMU must use the snapshot-compatible device layout — either to
# resume from an existing snapshot or to capture a new one. Resume adds
# `-incoming defer`; capture does not. Everything else (phantom ISOs, no
# virtfs, pcie-root-port, pinned RAM/SMP) matches.
snapshot_layout() {
snapshot_available || [ "$EMULATOR_CAPTURING_SNAPSHOT" = "1" ]
}
# Ensure the decompressed mapped-ram cache is up-to-date with the shipped
# .zst. Compares mtime: if .raw is older or missing, re-decompress.
ensure_savevm_raw() {
local zst raw
zst="$(savevm_path)"
raw="$(savevm_raw_path)"
local zst_ts raw_ts
case "$HOST_OS" in
darwin)
zst_ts="$(stat -f '%m' "$zst" 2>/dev/null || echo 0)"
raw_ts="$(stat -f '%m' "$raw" 2>/dev/null || echo 0)"
;;
*)
zst_ts="$(stat -c '%Y' "$zst" 2>/dev/null || echo 0)"
raw_ts="$(stat -c '%Y' "$raw" 2>/dev/null || echo 0)"
;;
esac
if [ -s "$raw" ] && [ "$raw_ts" -ge "$zst_ts" ]; then
return 0
fi
log "Decompressing snapshot cache (one-time; ~2-3GB sparse)..."
local tmp="${raw}.tmp"
rm -f "$tmp"
if ! zstd -dc "$zst" > "$tmp"; then
err "Failed to decompress $zst"
rm -f "$tmp"
return 1
fi
mv "$tmp" "$raw"
}
# Returns a fast fingerprint (size:mtime) of the base QEMU image.
# Used to detect whether the image has changed since the overlay was created.
base_image_fingerprint() {
@ -77,10 +150,62 @@ base_image_fingerprint() {
esac
}
prepare_runtime_config_iso() {
# Fingerprint used to detect stale overlays. Includes both the base qcow2 and
# the savevm file so the overlay is rebuilt whenever either input changes. The
# overlay disk must match the disk state the snapshot was taken against for
# -incoming resume to be consistent.
runtime_fingerprint() {
local base="$1"
local savevm="$2"
local base_fp savevm_fp
base_fp="$(base_image_fingerprint "$base")"
if [ -f "$savevm" ]; then
savevm_fp="$(base_image_fingerprint "$savevm")"
else
savevm_fp="no-savevm"
fi
printf '%s|%s\n' "$base_fp" "$savevm_fp"
}
ensure_runtime_config_iso() {
# When invoked via stack-cli, the CLI writes the runtime ISO natively
# (packages/stack-cli/src/lib/iso.ts) immediately before spawning us and
# sets STACK_EMULATOR_CLI_WROTE_ISO=1. Trust it and skip regeneration —
# otherwise we'd fall through to make_iso_from_dir and require
# hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path
# is designed to remove.
if [ "${STACK_EMULATOR_CLI_WROTE_ISO:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
return 0
fi
# In capture mode, cmd_capture already wrote a specialized ISO with an
# empty STACK_EMULATOR_VM_DIR_HOST — required because virtfs is detached
# for snapshot compatibility, and run-stack-container would otherwise
# try to publish internal-pck to /host/... and restart-loop
# stack.service. Trust that write and don't overwrite it.
if [ "${EMULATOR_CAPTURING_SNAPSHOT:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
return 0
fi
# Direct-shell invocation path: regenerate unconditionally. Port env vars
# (PORT_PREFIX, EMULATOR_*_PORT) may have changed since the last run, and
# an ISO cached from a prior invocation would silently override them.
write_runtime_config_iso "$VM_DIR"
}
# Write a STACKCFG runtime-config.iso containing runtime.env + base.env.
# The VM_DIR_HOST arg is the path to publish internal-pck / stack.log to on
# /host; pass empty string to suppress publication (used by capture mode
# where /host isn't mounted — virtfs is detached for snapshot compatibility,
# so any host-side write would fail and restart-loop stack.service).
write_runtime_config_iso() {
local vm_dir_host="$1"
local base_env="$SCRIPT_DIR/../.env.development"
if [ ! -f "$base_env" ]; then
err "Cannot generate runtime config ISO: $base_env is missing."
err "Run 'pnpm run emulator:generate-env' first, or invoke via 'stack emulator start'."
exit 1
fi
local cfg_dir="$VM_DIR/runtime-config"
local cfg_iso
cfg_iso="$(runtime_iso_path)"
rm -rf "$cfg_dir"
mkdir -p "$cfg_dir"
{
@ -89,10 +214,10 @@ prepare_runtime_config_iso() {
printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT"
printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT"
printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT"
printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR"
printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$vm_dir_host"
} > "$cfg_dir/runtime.env"
cp "$SCRIPT_DIR/../.env.development" "$cfg_dir/base.env"
make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir"
cp "$base_env" "$cfg_dir/base.env"
make_iso_from_dir "$(runtime_iso_path)" "STACKCFG" "$cfg_dir"
}
service_is_up() {
@ -145,7 +270,7 @@ wait_for_condition() {
log "${label} ready in ${elapsed}s"
return 0
fi
sleep 1
sleep 0.2
elapsed=$((SECONDS - started))
printf "\r [%3ds] %s..." "$elapsed" "$label"
done
@ -154,8 +279,9 @@ wait_for_condition() {
}
build_qemu_cmd() {
local base_img
local base_img savevm_file
base_img="$(image_path)"
savevm_file="$(savevm_path)"
if [ ! -f "$base_img" ]; then
err "Missing QEMU image: $base_img"
@ -166,18 +292,36 @@ build_qemu_cmd() {
mkdir -p "$VM_DIR"
local fingerprint_file="$VM_DIR/base-image.fingerprint"
local current_fp
current_fp="$(base_image_fingerprint "$base_img")"
if [ -f "$VM_DIR/disk.qcow2" ]; then
if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
log "Reusing existing overlay disk (changes persist)"
else
warn "QEMU base image has changed — recreating overlay."
current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")"
if snapshot_layout; then
# The savevm RAM state was captured against the base image's exact disk
# state. An overlay with writes from a previous session diverges from
# that point, so -incoming would resume RAM against inconsistent disk.
# Always start from a fresh overlay in the snapshot path; per-session
# state is not preserved. Users who want persistence can opt out with
# EMULATOR_NO_SNAPSHOT=1. Capture mode also needs a clean overlay so the
# snapshot we write is taken against the base's known disk state.
if [ -f "$VM_DIR/disk.qcow2" ]; then
rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
fi
fi
if [ ! -f "$VM_DIR/disk.qcow2" ]; then
qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
base_image_fingerprint "$base_img" > "$fingerprint_file"
printf '%s' "$current_fp" > "$fingerprint_file"
else
# If the overlay was created against a different base or savevm, it will
# diverge from the snapshot's disk state — force a rebuild.
if [ -f "$VM_DIR/disk.qcow2" ]; then
if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
log "Reusing existing overlay disk (changes persist)"
else
warn "Base image or snapshot has changed — recreating overlay."
rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
fi
fi
if [ ! -f "$VM_DIR/disk.qcow2" ]; then
qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
printf '%s' "$current_fp" > "$fingerprint_file"
fi
fi
local qemu_bin machine cpu firmware_args=()
@ -213,28 +357,127 @@ build_qemu_cmd() {
# are mutually exclusive.
netdev+=",hostfwd=tcp:127.0.0.1:${PORT_PREFIX}14-:${PORT_PREFIX}14"
QEMU_CMD=(
"$qemu_bin"
-machine "$machine"
-accel "$ACCEL"
-cpu "$cpu"
"${firmware_args[@]}"
-boot order=c
-m "$VM_RAM"
-smp "$VM_CPUS"
-drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
-drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
-netdev "$netdev"
-device virtio-net-pci,netdev=net0
-device virtio-balloon-pci
-virtfs "local,path=/,mount_tag=hostfs,security_model=none"
-chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
-mon "chardev=monitor,mode=control"
-serial "file:$VM_DIR/serial.log"
-display none
-daemonize
-pidfile "$VM_DIR/qemu.pid"
)
# In snapshot-resume mode the QEMU command-line MUST match the device set
# used at snapshot capture time, otherwise migration replay fails (broken
# pipe / device tree mismatch). At capture time the build attaches:
# disk(if=virtio) + seed.iso + bundle.iso + runtime.iso (all if=virtio)
# netdev + virtio-net-pci + monitor + QGA virtio-serial
# SMP=4, RAM=4096 (pinned in build-image.sh snapshot mode)
# We mirror that exactly. The seed/bundle ISOs were used by cloud-init at
# build and are not needed at runtime, but their virtio-blk slots must
# exist so the migration replay matches device IDs. Runtime-only devices
# (virtfs, balloon) live at higher slots — extra at destination is fine.
local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM"
if snapshot_layout; then
if snapshot_available; then
log "Snapshot found at $savevm_file — fast-resume enabled."
# -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
# We use that to set mapped-ram + multifd capabilities before loading,
# which enables parallel RAM restore (~2-3x faster than streamed decode).
snapshot_args+=(-incoming defer)
else
log "Capture mode: booting with snapshot-compatible layout (no -incoming)."
fi
snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
# RAM size is baked into the snapshot; migration replay requires an
# identical -m value. Pin to the build-time RAM (4096) and ignore
# EMULATOR_RAM — override via EMULATOR_SNAPSHOT_RAM if a different
# snapshot was produced.
snapshot_ram="${EMULATOR_SNAPSHOT_RAM:-4096}"
if [ "$snapshot_smp" != "$VM_CPUS" ]; then
log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)."
fi
if [ "$snapshot_ram" != "$VM_RAM" ]; then
log "Pinning RAM to ${snapshot_ram}MB for snapshot resume (ignoring EMULATOR_RAM=${VM_RAM})."
fi
# Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present
# at snapshot time. Their content doesn't matter (cloud-init has already
# run); only the virtio-blk slot count must match.
local seed_phantom="$VM_DIR/seed.phantom"
local bundle_phantom="$VM_DIR/bundle.phantom"
if [ ! -s "$seed_phantom" ]; then
dd if=/dev/zero of="$seed_phantom" bs=1M count=1 status=none
fi
if [ ! -s "$bundle_phantom" ]; then
dd if=/dev/zero of="$bundle_phantom" bs=1M count=1 status=none
fi
runtime_only_args+=(
-drive "file=$seed_phantom,format=raw,if=virtio,readonly=on"
-drive "file=$bundle_phantom,format=raw,if=virtio,readonly=on"
)
else
# Cold-boot: include virtio-balloon and virtfs as before.
runtime_only_args+=(
-device virtio-balloon-pci
-virtfs "local,path=/,mount_tag=hostfs,security_model=none"
)
fi
if snapshot_layout; then
QEMU_CMD=(
"$qemu_bin"
-machine "$machine"
-accel "$ACCEL"
-cpu "$cpu"
"${firmware_args[@]}"
-boot order=c
-m "$snapshot_ram"
-smp "$snapshot_smp"
-drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
"${runtime_only_args[@]}"
-drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
-netdev "$netdev"
-device virtio-net-pci,netdev=net0
-chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
-mon "chardev=monitor,mode=control"
-chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
-device virtio-serial
-device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
# Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
# MUST be the last explicit -device entry — slot order has to mirror
# build-image.sh exactly or migration replay stalls in inmigrate.
-device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
# Pre-create the host-side fsdev backend so the post-resume QMP
# device_add can attach to it by id. -fsdev is host-only state — not
# part of the migrated device tree — so it's safe to add here even
# though the snapshot was captured without it. Going through -fsdev
# avoids the HMP fsdev_add command, whose error path is invisible
# via human-monitor-command (errors come back as a return string,
# not a QMP error).
-fsdev "local,id=hostfs,path=/,security_model=none"
${snapshot_args[@]+"${snapshot_args[@]}"}
-serial "file:$VM_DIR/serial.log"
-display none
-daemonize
-pidfile "$VM_DIR/qemu.pid"
)
else
QEMU_CMD=(
"$qemu_bin"
-machine "$machine"
-accel "$ACCEL"
-cpu "$cpu"
"${firmware_args[@]}"
-boot order=c
-m "$VM_RAM"
-smp "$snapshot_smp"
-drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
-drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
-netdev "$netdev"
-device virtio-net-pci,netdev=net0
"${runtime_only_args[@]}"
-chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
-mon "chardev=monitor,mode=control"
-chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
-device virtio-serial
-device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
-serial "file:$VM_DIR/serial.log"
-display none
-daemonize
-pidfile "$VM_DIR/qemu.pid"
)
fi
}
@ -269,11 +512,225 @@ ensure_ports_free() {
start_vm() {
mkdir -p "$VM_DIR"
: > "$VM_DIR/serial.log"
prepare_runtime_config_iso
ensure_runtime_config_iso
build_qemu_cmd
"${QEMU_CMD[@]}"
}
# Send one or more QMP commands over the monitor socket. Each line of stdin is
# a JSON object; capabilities are always negotiated first. Keep stdin open
# briefly after writing so socat doesn't close before QEMU responds — QMP
# typically replies in milliseconds so 0.3s is enough.
qmp_send() {
if [ ! -S "$VM_DIR/monitor.sock" ]; then
return 1
fi
local payload
payload="$(cat)"
{
printf '%s\n' '{"execute":"qmp_capabilities"}'
printf '%s\n' "$payload"
sleep 0.3
} | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null
}
# After -incoming defer, QEMU waits for a migrate-incoming command. This sets
# up mapped-ram + multifd capabilities and kicks off the RAM load from the
# decompressed cache file. Returns once the VM is running.
qmp_incoming_and_cont() {
local raw_file="$1"
# Set caps + parameters before migrate-incoming, same as source.
local setup_resp
setup_resp=$( {
printf '%s\n' '{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
printf '%s\n' '{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
} | qmp_send)
if printf '%s' "$setup_resp" | grep -q '"error"'; then
err "QMP caps setup failed: $setup_resp"
return 1
fi
# Kick off the incoming migration from the mapped-ram file.
local inc_cmd inc_resp
inc_cmd=$(printf '{"execute":"migrate-incoming","arguments":{"uri":"file:%s"}}' "$raw_file")
inc_resp=$(printf '%s\n' "$inc_cmd" | qmp_send)
if printf '%s' "$inc_resp" | grep -q '"error"'; then
err "QMP migrate-incoming failed: $inc_resp"
return 1
fi
# Poll until status reaches a runnable state, then cont.
local deadline=$((SECONDS + 60))
while [ "$SECONDS" -lt "$deadline" ]; do
local out status
out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true)
status=$(printf '%s' "$out" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')
case "$status" in
running)
return 0
;;
paused|postmigrate|prelaunch)
printf '%s\n' '{"execute":"cont"}' | qmp_send >/dev/null || true
return 0
;;
inmigrate|"")
;;
*)
log "unexpected QMP status: $status"
;;
esac
sleep 0.2
done
return 1
}
# Placeholder PCK baked into the snapshot. Kept in sync with the value in
# docker/local-emulator/qemu/cloud-init/emulator/user-data.
SNAPSHOT_PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
# Write the internal PCK to the host path the CLI reads (see
# readInternalPck() in packages/stack-cli/src/commands/emulator.ts). In
# cold-boot mode the guest publishes this via virtfs/9p, but snapshot mode
# drops virtfs, so the host has to write it itself.
write_internal_pck_for_cli() {
local pck="$1"
(umask 077 && printf '%s' "$pck" > "$VM_DIR/internal-pck")
}
# Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same
# JSON protocol as QMP but over a separate channel. We use guest-sync to make
# sure the agent is responsive, then guest-exec to fire trigger-fast-rotate.
qga_send() {
if [ ! -S "$VM_DIR/qga.sock" ]; then
return 1
fi
# socat closes the connection on stdin EOF before QGA can reply, so keep
# stdin open for a short window after writing the request to give the
# agent time to respond. QGA replies in milliseconds; the only reason this
# isn't 0.1s is to absorb scheduling jitter on a busy host.
local payload
payload="$(cat)"
( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t10 - "UNIX-CONNECT:$VM_DIR/qga.sock" 2>/dev/null
}
qga_wait_ready() {
local deadline=$((SECONDS + 30))
while [ "$SECONDS" -lt "$deadline" ]; do
local resp
resp=$(printf '%s\n' '{"execute":"guest-sync","arguments":{"id":424242}}' | qga_send || true)
if printf '%s' "$resp" | grep -q '"return":[[:space:]]*424242'; then
return 0
fi
sleep 0.2
done
return 1
}
# Hot-plug a virtio-9p device backed by host `/` after a snapshot resume.
# The snapshot was captured WITHOUT virtfs (QEMU disallows migration while
# 9p is mounted in the guest), so the resumed VM has no host filesystem
# available until we add one here. The fsdev backend was pre-created by
# the -fsdev option in build_qemu_cmd; we only need the device_add half.
qmp_hotplug_9p() {
local resp
resp=$(printf '%s\n' \
'{"execute":"device_add","arguments":{"driver":"virtio-9p-pci","id":"hostfs-dev","fsdev":"hostfs","mount_tag":"hostfs","bus":"hostfs-port"}}' \
| qmp_send)
if printf '%s' "$resp" | grep -q '"error"'; then
err "QMP device_add virtio-9p-pci failed: $resp"
return 1
fi
return 0
}
# Run /usr/local/bin/mount-host-fs --post-resume in the guest. The script
# mounts the freshly-hot-plugged 9p device on /host, which is a shared
# mount point — so the new mount propagates into the running stack
# container's `-v /host:/host:rshared` bind mount without a container
# restart.
qga_mount_host_fs() {
local cmd resp pid status_resp exited exitcode
cmd='{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/mount-host-fs","arg":["--post-resume"],"capture-output":true}}'
resp=$(printf '%s\n' "$cmd" | qga_send || true)
pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
if [ -z "$pid" ]; then
err "guest-exec mount-host-fs did not return a pid; response: $resp"
return 1
fi
local deadline=$((SECONDS + 20))
while [ "$SECONDS" -lt "$deadline" ]; do
status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
if [ "$exited" = "true" ]; then
exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
if [ "${exitcode:-0}" = "0" ]; then
log "host fs mounted in guest"
return 0
fi
err "mount-host-fs exited with code ${exitcode:-unknown}; response: $status_resp"
return 1
fi
sleep 0.2
done
err "mount-host-fs did not complete within 20s"
return 1
}
qga_trigger_fast_rotate() {
# guest-exec returns a pid; we then poll guest-exec-status until the
# process exits, and surface its exit code. Capture output so a failure
# message is available in serial.log. We pipe the fresh-secrets env file
# (as base64) to the script via input-data — keeps secrets off the
# filesystem and avoids needing virtfs.
local fresh_pck fresh_ssk fresh_sak fresh_cron payload secrets_b64 resp pid
fresh_pck="$(openssl rand -hex 32)"
fresh_ssk="$(openssl rand -hex 32)"
fresh_sak="$(openssl rand -hex 32)"
fresh_cron="$(openssl rand -hex 32)"
payload=$(
printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$fresh_pck"
printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$fresh_ssk"
printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$fresh_sak"
printf 'CRON_SECRET=%s\n' "$fresh_cron"
)
# Publish the fresh PCK to the host path the CLI reads. Writing before the
# guest-exec so a --config-file flow that polls from another process can
# pick it up the moment rotation completes.
write_internal_pck_for_cli "$fresh_pck"
secrets_b64=$(printf '%s' "$payload" | base64 | tr -d '\n')
local cmd
cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64")
resp=$(printf '%s\n' "$cmd" | qga_send || true)
pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
if [ -z "$pid" ]; then
err "guest-exec did not return a pid; response: $resp"
return 1
fi
# Rotation (sed + UPDATE + supervisorctl restart + node startup) fits well
# inside this window.
local deadline=$((SECONDS + 60))
while [ "$SECONDS" -lt "$deadline" ]; do
local status_resp exited exitcode
status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
if [ "$exited" = "true" ]; then
exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
if [ "${exitcode:-0}" = "0" ]; then
log "rotate-secrets completed."
return 0
fi
err "rotate-secrets exited with code ${exitcode:-unknown}"
err "response: $status_resp"
return 1
fi
sleep 0.2
done
err "rotate-secrets did not complete within 60s"
return 1
}
stop_vm() {
if [ ! -f "$VM_DIR/qemu.pid" ]; then
return 0
@ -292,9 +749,10 @@ stop_vm() {
kill -9 "$pid" 2>/dev/null || true
fi
fi
rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log"
rm -rf "$VM_DIR/runtime-config"
rm -f "$VM_DIR/runtime-config.iso"
rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log"
# runtime-config.iso is left in place; ensure_runtime_config_iso regenerates
# it on the next start. `cmd_reset` wipes $RUN_DIR entirely when a full reset
# is wanted.
}
cmd_start() {
@ -305,18 +763,92 @@ cmd_start() {
info "Arch: $ARCH | Accel: $ACCEL"
info "Ports: Dashboard=$EMULATOR_DASHBOARD_PORT Backend=$EMULATOR_BACKEND_PORT MinIO=$EMULATOR_MINIO_PORT Inbucket=$EMULATOR_INBUCKET_PORT"
local using_snapshot=0
if snapshot_available; then
if ! ensure_savevm_raw; then
warn "Snapshot decompression failed — falling back to cold boot."
snapshot_fallback_to_cold_boot
return
fi
using_snapshot=1
fi
start_vm
info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs"
if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
tail_vm_logs
exit 1
fi
if [ "$using_snapshot" = "1" ]; then
log "Resuming from snapshot (mapped-ram + multifd)..."
if ! qmp_incoming_and_cont "$(savevm_raw_path)"; then
warn "Snapshot resume did not reach a runnable state — falling back to cold boot."
snapshot_fallback_to_cold_boot
return
fi
if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
tail_vm_logs
exit 1
log "VM resumed; waiting for guest agent..."
if ! qga_wait_ready; then
warn "Guest agent did not respond — falling back to cold boot."
snapshot_fallback_to_cold_boot
return
fi
# Hot-plug the host filesystem. The snapshot was captured without
# virtfs, so the running container has an empty /host bind mount until
# we add the 9p device and mount it in the guest. Required for routes
# like /local-emulator/project that read user-supplied paths via /host.
log "Hot-plugging host filesystem..."
if ! qmp_hotplug_9p; then
warn "Failed to hot-plug 9p device — falling back to cold boot."
snapshot_fallback_to_cold_boot
return
fi
if ! qga_mount_host_fs; then
warn "Failed to mount host fs in guest — falling back to cold boot."
snapshot_fallback_to_cold_boot
return
fi
if [ "$EMULATOR_NO_ROTATION" = "1" ]; then
warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance."
# The placeholder PCK is live in the running image; publish it to the
# host path so --config-file flows still work.
write_internal_pck_for_cli "$SNAPSHOT_PLACEHOLDER_PCK"
if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
warn "Services did not respond after resume — falling back to cold boot."
tail_vm_logs
snapshot_fallback_to_cold_boot
return
fi
else
log "Generating fresh secrets + triggering rotation..."
if ! qga_trigger_fast_rotate; then
warn "Failed to trigger rotate-secrets — falling back to cold boot."
snapshot_fallback_to_cold_boot
return
fi
# Wait for the *new* backend (post-supervisor-restart) to actually be
# listening. all_ready may briefly return true against the OLD Node
# processes between when supervisor sends SIGTERM and when the children
# die; sleep a beat so we measure the real readiness.
sleep 1
if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
warn "Services did not recover after rotation — falling back to cold boot."
tail_vm_logs
snapshot_fallback_to_cold_boot
return
fi
fi
else
if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
tail_vm_logs
exit 1
fi
if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
tail_vm_logs
exit 1
fi
fi
log "All services are green."
@ -324,6 +856,20 @@ cmd_start() {
info "Backend: http://localhost:${EMULATOR_BACKEND_PORT}"
}
# If anything about the snapshot resume fails, stop the VM, wipe the overlay,
# and retry as a cold boot. Keeps the user unblocked even when the snapshot is
# broken (e.g. stale, incompatible host-arch/QEMU-version mismatch).
snapshot_fallback_to_cold_boot() {
warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
stop_vm
# Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one.
# runtime-config.iso is regenerated by ensure_runtime_config_iso on recursion.
rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \
"$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom"
EMULATOR_NO_SNAPSHOT=1
cmd_start
}
cmd_stop() {
stop_vm
log "QEMU emulator stopped."
@ -335,6 +881,100 @@ cmd_reset() {
log "Emulator state reset. Next start will be a fresh boot."
}
# Cold-boot the VM with the snapshot-compatible device layout, wait for all
# services to be healthy, then capture a snapshot via QMP migrate and compress
# it to .savevm.zst. Called by `stack emulator pull` so first-run users get a
# fast-resume snapshot that's guaranteed compatible with their host's QEMU
# version + accelerator (which CI-built snapshots can't guarantee across
# KVM/HVF/TCG).
cmd_capture() {
if [ ! -f "$(image_path)" ]; then
err "Missing qcow2: $(image_path). Run 'stack emulator pull' first."
exit 1
fi
if [ -s "$(savevm_path)" ] && [ "$EMULATOR_FORCE_CAPTURE" != "1" ]; then
log "Snapshot already present at $(savevm_path); skipping capture."
log "Pass EMULATOR_FORCE_CAPTURE=1 to rebuild it."
return 0
fi
if is_running; then
err "Emulator is already running; stop it first (stack emulator stop)."
exit 1
fi
# Start with a clean slate if we're force-recapturing; stale raw/zst would
# otherwise make snapshot_available() return true and flip QEMU into
# -incoming defer mode.
rm -f "$(savevm_path)" "$(savevm_raw_path)"
ensure_ports_free
mkdir -p "$RUN_DIR" "$VM_DIR"
# Regenerate runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST empty —
# virtfs is detached in capture mode, so run-stack-container's
# `install internal-pck → /host/$VM_DIR_HOST/...` would fail and restart-loop
# stack.service. Mirrors build-image.sh's CI runtime.env shape.
rm -f "$(runtime_iso_path)"
write_runtime_config_iso ""
info "Cold-booting VM to capture local snapshot (one-time, ~1-3 min)..."
EMULATOR_CAPTURING_SNAPSHOT=1
start_vm
info "VM: 4096MB / 4 CPUs (pinned for snapshot compatibility)"
# Cold boot with snapshot-compatible layout drops virtfs, so stack.service
# starts without /host mounted — fine for capture; hostfs is hot-plugged on
# resume via qmp_hotplug_9p.
if ! wait_for_condition "all services" "$READY_TIMEOUT" all_ready; then
tail_vm_logs
stop_vm
err "Services did not come up; capture aborted."
exit 1
fi
local raw tmp_raw zst tmp_zst
raw="$(savevm_raw_path)"
tmp_raw="${raw}.capture.tmp"
zst="$(savevm_path)"
tmp_zst="${zst}.capture.tmp"
rm -f "$tmp_raw" "$tmp_zst"
log "Capturing VM state via QMP (mapped-ram + multifd)..."
if ! capture_vm_state "$VM_DIR/monitor.sock" "$tmp_raw"; then
err "QMP capture failed."
stop_vm
exit 1
fi
# capture_vm_state sent QMP quit; wait for QEMU to exit, then clean sockets.
local waited=0
while [ "$waited" -lt 30 ] && is_running; do
sleep 1
waited=$((waited + 1))
done
if is_running; then
warn "QEMU did not exit after QMP quit; forcing."
stop_vm
fi
rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock"
if [ ! -s "$tmp_raw" ]; then
err "Captured raw file is empty: $tmp_raw"
exit 1
fi
log "Compressing snapshot with zstd..."
zstd -1 -T0 -f -o "$tmp_zst" "$tmp_raw"
mv "$tmp_zst" "$zst"
# Keep the uncompressed file too — resume reads it directly via mapped-ram,
# and ensure_savevm_raw skips re-decompression when the raw's mtime >= zst's.
mv "$tmp_raw" "$raw"
touch -r "$zst" "$raw"
local size
size="$(du -h "$zst" | cut -f1)"
log "Snapshot captured: $zst (${size})"
}
STATUS_FAILED=0
print_service_status() {
@ -382,12 +1022,12 @@ ACTION="start"
while [[ $# -gt 0 ]]; do
case "$1" in
start|stop|reset|status|bench)
start|stop|reset|status|bench|capture)
ACTION="$1"
shift
;;
*)
echo "Usage: $0 [start|stop|reset|status|bench]"
echo "Usage: $0 [start|stop|reset|status|bench|capture]"
exit 1
;;
esac
@ -399,4 +1039,5 @@ case "$ACTION" in
reset) cmd_reset ;;
status) cmd_status ;;
bench) cmd_bench ;;
capture) cmd_capture ;;
esac

View File

@ -0,0 +1,104 @@
#!/bin/bash
# Rotate baked-in placeholder secrets with fresh host-generated values.
#
# Called inside the stack container by the emulator snapshot-resume path.
# Host writes fresh secrets to /host/stack-runtime/fresh-secrets.env before
# invoking this script (via `docker exec stack /usr/local/bin/rotate-secrets`).
#
# Flow:
# 1. Read fresh secrets from host-supplied env file.
# 2. Validate they are 64-char hex (the build placeholders are too).
# 3. Write rotated-secrets.env that app-entrypoint and run-cron-jobs source
# on restart.
# 4. Targeted sed across built files: swap the placeholder PCK for the fresh
# one (this is the only secret baked into JS via sentinel replacement at
# build time — SSK/SAK/CRON_SECRET flow through process.env only).
# 5. UPDATE the internal ApiKeySet row in Postgres.
# 6. supervisorctl restart stack-app + cron-jobs so the new values take
# effect in the running Node processes.
set -euo pipefail
OUTPUT=/run/stack-auth/rotated-secrets.env
WORK_DIR="${STACK_RUNTIME_WORK_DIR:-/app}"
PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
log() { printf '[rotate-secrets] %s\n' "$*"; }
# Fresh secrets arrive via env vars (passed by trigger-fast-rotate using
# `docker exec -e`). For backward compatibility, fall back to a file path if
# STACK_ROTATE_INPUT is set.
if [ -n "${STACK_ROTATE_INPUT:-}" ] && [ -f "$STACK_ROTATE_INPUT" ]; then
log "reading fresh secrets from $STACK_ROTATE_INPUT"
set -a
# shellcheck disable=SC1090
source "$STACK_ROTATE_INPUT"
set +a
fi
for var in STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \
STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \
STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \
CRON_SECRET; do
val="${!var:-}"
if [ -z "$val" ]; then
log "ERROR: $var is missing from environment"
exit 1
fi
if ! printf '%s' "$val" | grep -Eq '^[0-9a-fA-F]{64}$'; then
log "ERROR: $var is not a 64-char hex string"
exit 1
fi
done
mkdir -p "$(dirname "$OUTPUT")"
umask 077
{
printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY"
printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY"
printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"
printf 'CRON_SECRET=%s\n' "$CRON_SECRET"
# Mirror these so process.env lookups in Node match env after restart.
printf 'NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY"
printf 'STACK_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY"
printf 'STACK_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"
} > "$OUTPUT"
chmod 0600 "$OUTPUT"
log "wrote $OUTPUT"
# The PCK is baked into built JS via STACK_ENV_VAR_SENTINEL replacement at
# container start (see /app-entrypoint.sh). Swap the placeholder hex for the
# fresh value across the built tree. Only *.js files need patching; this
# runs in ~1s on the standalone Next.js bundles.
if [ "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY" != "$PLACEHOLDER_PCK" ]; then
log "rewriting PCK placeholder in $WORK_DIR"
# grep -rl narrows the find to only files that contain the placeholder, so
# the follow-up sed doesn't walk the whole tree.
mapfile -t files < <(grep -rl --include='*.js' "$PLACEHOLDER_PCK" "$WORK_DIR/apps" 2>/dev/null || true)
if [ "${#files[@]}" -gt 0 ]; then
sed -i "s|${PLACEHOLDER_PCK}|${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}|g" "${files[@]}"
log "patched ${#files[@]} file(s)"
else
log "no files contained the placeholder (already rotated?)"
fi
fi
# Update the internal ApiKeySet row so existing dashboard sessions keep
# working with the new keys. Values are already validated as hex above, so
# inlining is safe.
if [ -n "${STACK_DATABASE_CONNECTION_STRING:-}" ]; then
log "updating internal ApiKeySet"
psql "$STACK_DATABASE_CONNECTION_STRING" -v ON_ERROR_STOP=1 <<SQL
UPDATE "ApiKeySet" SET
"publishableClientKey" = '${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}',
"secretServerKey" = '${STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY}',
"superSecretAdminKey" = '${STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY}',
"updatedAt" = NOW()
WHERE "projectId" = 'internal' AND id = '3142e763-b230-44b5-8636-aa62f7489c26';
SQL
fi
log "restarting stack-app and cron-jobs"
supervisorctl restart stack-app cron-jobs
log "done"

View File

@ -4,6 +4,14 @@
set -e
# Pick up rotated secrets from the emulator snapshot resume path if present.
if [ -f /run/stack-auth/rotated-secrets.env ]; then
set -a
# shellcheck disable=SC1091
source /run/stack-auth/rotated-secrets.env
set +a
fi
BACKEND_URL="http://127.0.0.1:${BACKEND_PORT:-8102}"
if [ -z "${CRON_SECRET:-}" ]; then

View File

@ -4,6 +4,18 @@ logfile=/var/log/supervisor/supervisord.log
pidfile=/var/run/supervisord.pid
loglevel=info
; supervisorctl endpoint — rotate-secrets uses this to restart stack-app and
; cron-jobs after the emulator snapshot-resume path injects fresh secrets.
[unix_http_server]
file=/var/run/supervisor.sock
chmod=0700
[rpcinterface:supervisor]
supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface
[supervisorctl]
serverurl=unix:///var/run/supervisor.sock
; --- PostgreSQL ---
[program:postgres]
@ -13,7 +25,7 @@ command=/usr/lib/postgresql/16/bin/postgres
-c max_connections=500
-c shared_preload_libraries=pg_stat_statements
-c pg_stat_statements.track=all
-c statement_timeout=30s
-c statement_timeout=120s
user=postgres
autostart=true
autorestart=true
@ -180,6 +192,8 @@ autostart=true
autorestart=true
startsecs=0
priority=70
stopasgroup=true
killasgroup=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
@ -193,6 +207,13 @@ autostart=true
autorestart=unexpected
startsecs=0
priority=60
; The wrapper script spawns Node backends as background children. On
; supervisor restart we MUST kill the whole process group, otherwise the
; old Node servers keep their port bindings and the new ones fail with
; EADDRINUSE — breaking the snapshot-resume rotation flow.
stopasgroup=true
killasgroup=true
stopwaitsecs=10
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr

View File

@ -2,6 +2,17 @@
set -e
# ============= ROTATED SECRETS OVERLAY =============
# On emulator snapshot resume, the host injects freshly-generated secrets into
# /run/stack-auth/rotated-secrets.env before supervisorctl restarts us. Sourcing
# here lets a fast-restart pick up new values without a full container restart.
if [ -f /run/stack-auth/rotated-secrets.env ]; then
set -a
# shellcheck disable=SC1091
source /run/stack-auth/rotated-secrets.env
set +a
fi
# ============= FORWARD MOCK OAUTH SERVER =============
# Start socat to forward port 32202 for mock-oauth-server if enabled
@ -130,39 +141,51 @@ if [ "$WORK_DIR" != "/app" ]; then
cp -r /app/. "$WORK_DIR"/.
fi
# Find all files in the apps directory that contain a STACK_ENV_VAR_SENTINEL and extract the unique sentinel strings.
echo "Finding unhandled sentinels..."
unhandled_sentinels=$(find "$WORK_DIR/apps" -type f -exec grep -l "STACK_ENV_VAR_SENTINEL" {} + | \
xargs grep -h "STACK_ENV_VAR_SENTINEL" | \
grep -o "STACK_ENV_VAR_SENTINEL[A-Z_]*" | \
sort -u | grep -v "^STACK_ENV_VAR_SENTINEL$")
# The full-tree sentinel scan is expensive (several seconds over the whole built
# app tree). On a fast-restart — triggered by the emulator snapshot rotation
# path — the placeholders have already been sed-replaced by rotate-secrets,
# and no new sentinels need substitution. Skip the scan in that case. Marker
# lives in WORK_DIR because the docker/server image runs as the unprivileged
# `node` user and cannot write to /var/run.
SENTINEL_MARKER="$WORK_DIR/.stack-sentinels-replaced"
if [ -f "$SENTINEL_MARKER" ]; then
echo "Sentinels already replaced on a previous start; skipping scan."
else
# Find all files in the apps directory that contain a STACK_ENV_VAR_SENTINEL and extract the unique sentinel strings.
echo "Finding unhandled sentinels..."
unhandled_sentinels=$(find "$WORK_DIR/apps" -type f -exec grep -l "STACK_ENV_VAR_SENTINEL" {} + | \
xargs grep -h "STACK_ENV_VAR_SENTINEL" | \
grep -o "STACK_ENV_VAR_SENTINEL[A-Z_]*" | \
sort -u | grep -v "^STACK_ENV_VAR_SENTINEL$")
# Choose an uncommon delimiter here, we use the ASCII Unit Separator (0x1F)
delimiter=$(printf '\037')
# Choose an uncommon delimiter here, we use the ASCII Unit Separator (0x1F)
delimiter=$(printf '\037')
echo "Replacing sentinels..."
for sentinel in $unhandled_sentinels; do
# The sentinel is like "STACK_ENV_VAR_SENTINEL_MY_VAR", so extract the env var name.
env_var=${sentinel#STACK_ENV_VAR_SENTINEL_}
# Get the corresponding environment variable value.
value="${!env_var}"
# If the env var is not set, skip replacement.
if [ -z "$value" ]; then
continue
fi
echo "Replacing sentinels..."
for sentinel in $unhandled_sentinels; do
# The sentinel is like "STACK_ENV_VAR_SENTINEL_MY_VAR", so extract the env var name.
env_var=${sentinel#STACK_ENV_VAR_SENTINEL_}
# Although the sentinel only contains [A-Z_] we still escape it for any regex meta-characters.
escaped_sentinel=$(printf '%s\n' "$sentinel" | sed -e 's/\\/\\\\/g' -e 's/[][\/.^$*]/\\&/g')
# Get the corresponding environment variable value.
value="${!env_var}"
# For the replacement value, first escape backslashes, then escape any occurrence of
# the chosen delimiter and the '&' (which has special meaning in sed replacements).
escaped_value=$(printf '%s\n' "$value" | sed -e 's/\\/\\\\/g' -e "s/[${delimiter}&]/\\\\&/g")
# If the env var is not set, skip replacement.
if [ -z "$value" ]; then
continue
fi
# Now replace the sentinel with the (properly escaped) value in all files in the working directory.
find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
done
# Although the sentinel only contains [A-Z_] we still escape it for any regex meta-characters.
escaped_sentinel=$(printf '%s\n' "$sentinel" | sed -e 's/\\/\\\\/g' -e 's/[][\/.^$*]/\\&/g')
# For the replacement value, first escape backslashes, then escape any occurrence of
# the chosen delimiter and the '&' (which has special meaning in sed replacements).
escaped_value=$(printf '%s\n' "$value" | sed -e 's/\\/\\\\/g' -e "s/[${delimiter}&]/\\\\&/g")
# Now replace the sentinel with the (properly escaped) value in all files in the working directory.
find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
done
touch "$SENTINEL_MARKER"
fi
# ============= START BACKEND AND DASHBOARD =============

View File

@ -13,7 +13,8 @@
"build": "tsdown && node scripts/copy-emulator-assets.mjs",
"dev": "tsdown --watch",
"lint": "eslint --ext .tsx,.ts .",
"typecheck": "tsc --noEmit"
"typecheck": "tsc --noEmit",
"test": "vitest run"
},
"files": [
"README.md",
@ -31,6 +32,7 @@
"@stackframe/js": "workspace:*",
"@stackframe/stack-shared": "workspace:*",
"commander": "^13.1.0",
"extract-zip": "^2.0.1",
"jiti": "^2.4.2"
},
"devDependencies": {

View File

@ -0,0 +1,166 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import {
envPort,
formatBytes,
formatDuration,
platformInstallHint,
renderProgressLine,
resolveArch,
} from "./emulator.js";
describe("formatBytes", () => {
it("renders B / KB / MB / GB across unit boundaries", () => {
expect(formatBytes(0)).toBe("0 B");
expect(formatBytes(1)).toBe("1 B");
expect(formatBytes(1023)).toBe("1023 B");
expect(formatBytes(1024)).toBe("1.0 KB");
expect(formatBytes(1536)).toBe("1.5 KB");
expect(formatBytes(1024 * 1024)).toBe("1.0 MB");
expect(formatBytes(1024 * 1024 * 1024)).toBe("1.0 GB");
expect(formatBytes(1024 * 1024 * 1024 * 1024)).toBe("1.0 TB");
});
it("switches precision at v>=10 within a unit", () => {
expect(formatBytes(1024 * 10)).toBe("10 KB");
expect(formatBytes(1024 * 9.5)).toBe("9.5 KB");
});
it("returns '?' for non-finite and negative values", () => {
expect(formatBytes(NaN)).toBe("?");
expect(formatBytes(Infinity)).toBe("?");
expect(formatBytes(-1)).toBe("?");
});
it("caps at TB for very large values", () => {
// Even if we exceed TB, we don't walk off the end of the units array.
const huge = 1024 ** 6; // exabyte-scale
expect(formatBytes(huge)).toMatch(/ TB$/);
});
});
describe("formatDuration", () => {
it("uses s/m/h units at the right boundaries", () => {
expect(formatDuration(0)).toBe("0s");
expect(formatDuration(59)).toBe("59s");
expect(formatDuration(60)).toBe("1m00s");
expect(formatDuration(61)).toBe("1m01s");
expect(formatDuration(3599)).toBe("59m59s");
expect(formatDuration(3600)).toBe("1h00m");
expect(formatDuration(3660)).toBe("1h01m");
});
it("rounds seconds to integers", () => {
expect(formatDuration(59.4)).toBe("59s");
expect(formatDuration(59.9)).toBe("1m00s");
});
it("returns '?' for non-finite and negative values", () => {
expect(formatDuration(NaN)).toBe("?");
expect(formatDuration(Infinity)).toBe("?");
expect(formatDuration(-1)).toBe("?");
});
});
describe("renderProgressLine", () => {
it("renders a known-size progress bar with percent, size, speed, and ETA", () => {
const line = renderProgressLine(1024, 2048, 512);
expect(line).toContain("50.0%");
expect(line).toContain("/");
expect(line).toContain("/s");
expect(line).toContain("eta");
});
it("hides the percent / ETA fields when total size is unknown (total=0)", () => {
const line = renderProgressLine(1024, 0, 512);
expect(line).not.toContain("%");
expect(line).not.toContain("eta");
expect(line).toContain("/s");
});
it("clamps percent at 100 if downloaded overshoots total (rounding)", () => {
const line = renderProgressLine(2050, 2048, 100);
expect(line).toContain("100.0%");
});
it("handles bytesPerSec = 0 by suppressing ETA", () => {
const line = renderProgressLine(512, 2048, 0);
expect(line).not.toContain("eta");
});
});
describe("envPort", () => {
const SAVED = process.env.__TEST_PORT;
beforeEach(() => {
delete process.env.__TEST_PORT;
});
afterEach(() => {
if (SAVED === undefined) delete process.env.__TEST_PORT;
else process.env.__TEST_PORT = SAVED;
});
it("returns the fallback when the env var is not set", () => {
expect(envPort("__TEST_PORT", 1234)).toBe(1234);
});
it("parses a valid integer value", () => {
process.env.__TEST_PORT = "9876";
expect(envPort("__TEST_PORT", 1234)).toBe(9876);
});
it("rejects zero and negative values", () => {
process.env.__TEST_PORT = "0";
expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
process.env.__TEST_PORT = "-5";
expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
});
it("rejects non-integer and non-numeric values", () => {
process.env.__TEST_PORT = "3.14";
expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
process.env.__TEST_PORT = "not-a-port";
expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
});
it("treats empty string as not set (returns fallback)", () => {
// Regression target: earlier versions sometimes parsed "" as 0 and threw.
process.env.__TEST_PORT = "";
expect(envPort("__TEST_PORT", 1234)).toBe(1234);
});
});
describe("resolveArch", () => {
it("accepts explicit arm64 / amd64", () => {
expect(resolveArch("arm64")).toBe("arm64");
expect(resolveArch("amd64")).toBe("amd64");
});
it("throws on unsupported explicit arch", () => {
expect(() => resolveArch("mips")).toThrow(/Invalid architecture/);
expect(() => resolveArch("x86")).toThrow(/Invalid architecture/);
});
it("maps the current process arch when raw is undefined", () => {
const expected = process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null;
if (expected === null) {
expect(() => resolveArch()).toThrow(/Invalid architecture/);
} else {
expect(resolveArch()).toBe(expected);
}
});
});
describe("platformInstallHint", () => {
it("uses brew on darwin and apt on linux", () => {
const spy = vi.spyOn(process, "platform", "get");
try {
spy.mockReturnValue("darwin");
expect(platformInstallHint("foo-linux", "foo-mac")).toContain("brew install foo-mac");
spy.mockReturnValue("linux");
expect(platformInstallHint("foo-linux", "foo-mac")).toContain("apt install foo-linux");
spy.mockReturnValue("win32");
expect(platformInstallHint("foo-linux", "foo-mac")).toContain("install foo-mac");
} finally {
spy.mockRestore();
}
});
});

View File

@ -1,23 +1,43 @@
import { Command } from "commander";
import { execFileSync, spawn } from "child_process";
import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
import extract from "extract-zip";
import { chmodSync, createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
import { homedir } from "os";
import { dirname, join, resolve } from "path";
import { Readable } from "stream";
import { pipeline } from "stream/promises";
import { fileURLToPath } from "url";
import { CliError } from "../lib/errors.js";
import { writeIso } from "../lib/iso.js";
const DEFAULT_EMULATOR_BACKEND_PORT = 26701;
const DEFAULT_EMULATOR_DASHBOARD_PORT = 26700;
const DEFAULT_EMULATOR_MINIO_PORT = 26702;
const DEFAULT_EMULATOR_INBUCKET_PORT = 26703;
const DEFAULT_PORT_PREFIX = "81";
const GITHUB_API = "https://api.github.com";
const DEFAULT_REPO = "stack-auth/stack-auth";
const AARCH64_FIRMWARE_PATHS = [
"/opt/homebrew/share/qemu/edk2-aarch64-code.fd",
"/usr/share/qemu/edk2-aarch64-code.fd",
"/usr/share/AAVMF/AAVMF_CODE.fd",
"/usr/share/qemu-efi-aarch64/QEMU_EFI.fd",
];
function emulatorBackendPort(): number {
const raw = process.env.EMULATOR_BACKEND_PORT;
if (!raw) return DEFAULT_EMULATOR_BACKEND_PORT;
export function envPort(name: string, fallback: number): number {
const raw = process.env[name];
if (!raw) return fallback;
const parsed = Number(raw);
if (!Number.isInteger(parsed) || parsed <= 0) {
throw new CliError(`Invalid EMULATOR_BACKEND_PORT: ${raw}`);
throw new CliError(`Invalid ${name}: ${raw}`);
}
return parsed;
}
function emulatorBackendPort(): number {
return envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT);
}
function emulatorHome(): string {
return process.env.STACK_EMULATOR_HOME ?? join(homedir(), ".stack", "emulator");
}
@ -37,11 +57,13 @@ function internalPckPath(): string {
async function readInternalPck(timeoutMs = 60_000): Promise<string> {
const path = internalPckPath();
const deadline = Date.now() + timeoutMs;
let delay = 250;
let delay = 50;
while (Date.now() < deadline) {
if (existsSync(path)) {
try {
const contents = readFileSync(path, "utf-8").trim();
if (contents) return contents;
} catch (e) {
if ((e as NodeJS.ErrnoException).code !== "ENOENT") throw e;
}
await new Promise((r) => setTimeout(r, delay));
delay = Math.min(delay * 2, 2000);
@ -82,26 +104,71 @@ async function fetchEmulatorCredentials(pck: string, backendPort: number, config
};
}
function gh(args: string[]): string {
// Resolve a GitHub auth token. We try GITHUB_TOKEN first so users can pin a
// PAT, then fall back to `gh auth token` if the gh CLI is installed and
// signed in. If neither works we return undefined — public release downloads
// still work (anonymous, lower rate limit) but artifact downloads fail with a
// clear error at the call site.
function githubToken(): string | undefined {
if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN;
try {
return execFileSync("gh", args, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
} catch (err: unknown) {
if (err instanceof Error && "stderr" in err && typeof err.stderr === "string") {
throw new CliError(`GitHub CLI error: ${err.stderr}`);
}
throw new CliError("GitHub CLI (gh) is required. Install: https://cli.github.com/");
const out = execFileSync("gh", ["auth", "token"], {
encoding: "utf-8",
stdio: ["pipe", "pipe", "pipe"],
}).trim();
return out || undefined;
} catch {
return undefined;
}
}
async function ghApi<T>(path: string): Promise<T> {
const token = githubToken();
const headers: Record<string, string> = {
Accept: "application/vnd.github+json",
"X-GitHub-Api-Version": "2022-11-28",
};
if (token) headers.Authorization = `Bearer ${token}`;
const res = await fetch(`${GITHUB_API}${path}`, { headers });
if (!res.ok) {
const body = await res.text().catch(() => "");
const hint = res.status === 401 || res.status === 403
? " (set GITHUB_TOKEN or run `gh auth login` for higher rate limits / private access)"
: "";
throw new CliError(`GitHub API ${res.status} ${res.statusText} for ${path}${hint}${body ? `: ${body.slice(0, 300)}` : ""}`);
}
return await (res.json() as Promise<T>);
}
function emulatorScriptsDir(): string {
const here = dirname(fileURLToPath(import.meta.url));
const bundled = join(here, "emulator");
if (existsSync(join(bundled, "run-emulator.sh"))) return bundled;
if (existsSync(join(bundled, "run-emulator.sh"))) return ensureExecutable(bundled);
const repo = resolve(here, "../../../docker/local-emulator/qemu");
if (existsSync(join(repo, "run-emulator.sh"))) return repo;
if (existsSync(join(repo, "run-emulator.sh"))) return ensureExecutable(repo);
throw new CliError("Emulator scripts not found in CLI bundle.");
}
// npm pack strips the execute bit from non-`bin` files, so restore it here.
function ensureExecutable(scriptsDir: string): string {
try {
chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755);
} catch {
// best-effort
}
return scriptsDir;
}
function baseEnvPath(): string {
// Lives one directory up from the scripts dir in both bundled and repo
// layouts (dist/.env.development vs docker/local-emulator/.env.development).
const path = resolve(emulatorScriptsDir(), "..", ".env.development");
if (!existsSync(path)) {
throw new CliError(`Emulator base.env not found at ${path}`);
}
return path;
}
function emulatorSpawnEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
return {
...process.env,
@ -111,6 +178,33 @@ function emulatorSpawnEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
};
}
// Generate the runtime config ISO that the VM mounts via STACKCFG. Replaces
// the hdiutil/mkisofs/genisoimage host dep — see ../lib/iso.ts.
function prepareRuntimeConfigIso(): void {
const vmDir = join(emulatorRunDir(), "vm");
mkdirSync(vmDir, { recursive: true });
const portPrefix = process.env.PORT_PREFIX ?? process.env.NEXT_PUBLIC_STACK_PORT_PREFIX ?? DEFAULT_PORT_PREFIX;
const dashboardPort = envPort("EMULATOR_DASHBOARD_PORT", DEFAULT_EMULATOR_DASHBOARD_PORT);
const backendPort = envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT);
const minioPort = envPort("EMULATOR_MINIO_PORT", DEFAULT_EMULATOR_MINIO_PORT);
const inbucketPort = envPort("EMULATOR_INBUCKET_PORT", DEFAULT_EMULATOR_INBUCKET_PORT);
const runtimeEnv = [
`STACK_EMULATOR_PORT_PREFIX=${portPrefix}`,
`STACK_EMULATOR_DASHBOARD_HOST_PORT=${dashboardPort}`,
`STACK_EMULATOR_BACKEND_HOST_PORT=${backendPort}`,
`STACK_EMULATOR_MINIO_HOST_PORT=${minioPort}`,
`STACK_EMULATOR_INBUCKET_HOST_PORT=${inbucketPort}`,
`STACK_EMULATOR_VM_DIR_HOST=${vmDir}`,
"",
].join("\n");
const baseEnv = readFileSync(baseEnvPath());
writeIso(join(vmDir, "runtime-config.iso"), "STACKCFG", [
{ name: "runtime.env", data: Buffer.from(runtimeEnv, "utf-8") },
{ name: "base.env", data: baseEnv },
]);
}
function runEmulator(action: string, env?: Record<string, string>): Promise<void> {
const scriptsDir = emulatorScriptsDir();
mkdirSync(emulatorRunDir(), { recursive: true });
@ -141,82 +235,327 @@ function isEmulatorRunning(): boolean {
}
async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
mkdirSync(emulatorImageDir(), { recursive: true });
const img = join(emulatorImageDir(), `stack-emulator-${arch}.qcow2`);
if (!existsSync(img)) {
console.log("No emulator image found. Pulling latest...");
pullRelease(arch);
await pullRelease(arch);
// Capture now so this and all subsequent starts resume fast. Skipping it
// would cold-boot today plus every future start (we never auto-capture).
await captureLocalSnapshot(arch);
}
await runEmulator("start", { EMULATOR_ARCH: arch });
prepareRuntimeConfigIso();
// Signal to run-emulator.sh that runtime-config.iso was written by the CLI
// via lib/iso.ts; the shell's ensure_runtime_config_iso should trust it and
// skip its own regeneration (which would otherwise require the
// hdiutil/mkisofs/genisoimage host dep the TS writer replaces).
await runEmulator("start", { EMULATOR_ARCH: arch, STACK_EMULATOR_CLI_WROTE_ISO: "1" });
}
function resolveArch(raw?: string): "arm64" | "amd64" {
export function resolveArch(raw?: string): "arm64" | "amd64" {
const arch = raw ?? (process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null);
if (arch === "arm64" || arch === "amd64") return arch;
throw new CliError(`Invalid architecture: ${raw ?? process.arch}. Expected arm64 or amd64.`);
}
function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string; branch?: string; tag?: string } = {}) {
const repo = opts.repo ?? "stack-auth/stack-auth";
type ReleaseAsset = { name: string, url: string, size: number };
type ReleaseResponse = { assets: ReleaseAsset[] };
async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branch?: string, tag?: string } = {}) {
const repo = opts.repo ?? DEFAULT_REPO;
const branch = opts.branch ?? "dev";
const tag = opts.tag ?? `emulator-${branch}-latest`;
const asset = `stack-emulator-${arch}.qcow2`;
const imageDir = emulatorImageDir();
mkdirSync(imageDir, { recursive: true });
const diskAsset = `stack-emulator-${arch}.qcow2`;
const release = await ghApi<ReleaseResponse>(`/repos/${repo}/releases/tags/${tag}`);
const diskMatch = release.assets.find((a) => a.name === diskAsset);
if (!diskMatch) {
throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
}
const token = githubToken();
await downloadReleaseAsset(diskMatch, imageDir, diskAsset, token, tag);
}
// Cold-boot the VM, wait for services, capture a snapshot via QMP, compress,
// stop. Runs once per qcow2 download so subsequent `stack emulator start`s
// resume in ~3-8s. Snapshots are always captured on the user's own machine
// because QEMU migration state isn't portable across accelerators
// (KVM/HVF/TCG) or `-cpu max` feature sets.
async function captureLocalSnapshot(arch: "arm64" | "amd64"): Promise<void> {
preflightForVmStart("pull", arch);
prepareRuntimeConfigIso();
console.log("Capturing local snapshot (first-time, ~1-3 min cold boot + capture)...");
await runEmulator("capture", { EMULATOR_ARCH: arch });
}
async function downloadReleaseAsset(
match: ReleaseAsset,
imageDir: string,
asset: string,
token: string | undefined,
tag: string,
): Promise<void> {
const dest = join(imageDir, asset);
const tmpDest = `${dest}.download`;
console.log(`Pulling ${asset} from release ${tag}...`);
const headers: Record<string, string> = { Accept: "application/octet-stream" };
if (token) headers.Authorization = `Bearer ${token}`;
try {
execFileSync("gh", ["release", "download", tag, "--repo", repo, "--pattern", asset, "--output", tmpDest, "--clobber"], { stdio: "inherit" });
await downloadWithProgress(match.url, headers, tmpDest, match.size);
} catch (err) {
if (existsSync(tmpDest)) unlinkSync(tmpDest);
throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}\nRun 'stack emulator list-releases' to see available releases.`);
if (err instanceof CliError) throw err;
throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}`);
}
renameSync(tmpDest, dest);
console.log(`Downloaded: ${dest}`);
}
async function downloadWithProgress(url: string, headers: Record<string, string>, dest: string, totalBytes?: number): Promise<void> {
const res = await fetch(url, { headers, redirect: "follow" });
if (!res.ok || !res.body) {
throw new CliError(`Download failed (${res.status} ${res.statusText}): ${url}`);
}
const total = totalBytes ?? (Number(res.headers.get("content-length")) || 0);
const isTty = Boolean(process.stderr.isTTY);
const startedAt = Date.now();
let downloaded = 0;
let lastRender = 0;
const render = (final: boolean) => {
const now = Date.now();
if (!final && now - lastRender < 100) return;
lastRender = now;
const elapsed = Math.max(0.001, (now - startedAt) / 1000);
const speed = downloaded / elapsed;
const line = renderProgressLine(downloaded, total, speed);
if (isTty) {
process.stderr.write(`\r\x1b[2K${line}`);
} else if (final) {
process.stderr.write(`${line}\n`);
}
};
const body = Readable.fromWeb(res.body as Parameters<typeof Readable.fromWeb>[0]);
body.on("data", (chunk: Buffer) => {
downloaded += chunk.byteLength;
render(false);
});
await pipeline(body, createWriteStream(dest));
render(true);
if (isTty) process.stderr.write("\n");
}
export function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string {
const barWidth = 30;
const pct = total > 0 ? Math.min(100, (downloaded / total) * 100) : 0;
const filled = total > 0 ? Math.round((downloaded / total) * barWidth) : 0;
const bar = "█".repeat(filled) + "░".repeat(Math.max(0, barWidth - filled));
const pctStr = total > 0 ? `${pct.toFixed(1).padStart(5)}%` : " ? ";
const sizeStr = total > 0 ? `${formatBytes(downloaded)}/${formatBytes(total)}` : formatBytes(downloaded);
const speedStr = `${formatBytes(bytesPerSec)}/s`;
const etaStr = total > 0 && bytesPerSec > 0 ? ` eta ${formatDuration((total - downloaded) / bytesPerSec)}` : "";
return ` [${bar}] ${pctStr} ${sizeStr} ${speedStr}${etaStr}`;
}
export function formatBytes(bytes: number): string {
if (!Number.isFinite(bytes) || bytes < 0) return "?";
const units = ["B", "KB", "MB", "GB", "TB"];
let v = bytes;
let i = 0;
while (v >= 1024 && i < units.length - 1) {
v /= 1024;
i++;
}
return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${units[i]}`;
}
export function formatDuration(seconds: number): string {
if (!Number.isFinite(seconds) || seconds < 0) return "?";
const s = Math.round(seconds);
if (s < 60) return `${s}s`;
const m = Math.floor(s / 60);
const rs = s % 60;
if (m < 60) return `${m}m${rs.toString().padStart(2, "0")}s`;
const h = Math.floor(m / 60);
const rm = m % 60;
return `${h}h${rm.toString().padStart(2, "0")}m`;
}
// --- Dependency preflight ---------------------------------------------------
type BinarySpec = { name: string, install: string };
function commandExists(bin: string): boolean {
try {
execFileSync(process.platform === "win32" ? "where" : "which", [bin], { stdio: "pipe" });
return true;
} catch {
return false;
}
}
export function platformInstallHint(linuxPkg: string, macPkg: string): string {
switch (process.platform) {
case "darwin": {
return `brew install ${macPkg}`;
}
case "linux": {
return `apt install ${linuxPkg} (or your distro's equivalent)`;
}
default: {
return `install ${macPkg}`;
}
}
}
function bin(name: string, linuxPkg: string, macPkg: string): BinarySpec {
return { name, install: platformInstallHint(linuxPkg, macPkg) };
}
function requireBinaries(commandName: string, bins: BinarySpec[]): void {
const missing = bins.filter((b) => !commandExists(b.name));
if (missing.length === 0) return;
const lines = missing.map((b) => ` - ${b.name}${b.install}`);
throw new CliError(
`\`stack emulator ${commandName}\` requires the following missing binaries:\n${lines.join("\n")}`,
);
}
function warnIfMissing(commandName: string, bins: BinarySpec[]): void {
const missing = bins.filter((b) => !commandExists(b.name));
if (missing.length === 0) return;
for (const b of missing) {
console.warn(`[stack emulator ${commandName}] optional dep '${b.name}' missing — feature degraded. Install: ${b.install}`);
}
}
function aarch64FirmwareAvailable(): boolean {
return AARCH64_FIRMWARE_PATHS.some((p) => existsSync(p));
}
function commonVmBins(): BinarySpec[] {
return [
bin("qemu-img", "qemu-utils", "qemu"),
bin("socat", "socat", "socat"),
bin("curl", "curl", "curl"),
bin("nc", "ncat", "netcat"),
bin("lsof", "lsof", "lsof"),
bin("openssl", "openssl", "openssl"),
];
}
function archSpecificQemuBin(arch: "arm64" | "amd64"): BinarySpec {
if (arch === "arm64") {
return bin("qemu-system-aarch64", "qemu-system-arm", "qemu");
}
return bin("qemu-system-x86_64", "qemu-system-x86", "qemu");
}
function preflightForVmStart(commandName: string, arch: "arm64" | "amd64"): void {
requireBinaries(commandName, [archSpecificQemuBin(arch), ...commonVmBins()]);
warnIfMissing(commandName, [bin("zstd", "zstd", "zstd")]);
if (arch === "arm64" && !aarch64FirmwareAvailable()) {
throw new CliError(
`aarch64 UEFI firmware not found. Looked in:\n${AARCH64_FIRMWARE_PATHS.map((p) => ` - ${p}`).join("\n")}\n` +
`Install: ${platformInstallHint("qemu-efi-aarch64", "qemu")}`,
);
}
}
// --- Workflow run / artifact downloads (replaces `gh run download`) ---------
type WorkflowRunsResponse = { workflow_runs: { id: number }[] };
type ArtifactsResponse = { artifacts: { id: number, name: string, size_in_bytes: number }[] };
type PullResponse = { head: { ref: string } };
async function downloadArtifactByName(repo: string, runId: string, name: string, destDir: string): Promise<boolean> {
const token = githubToken();
if (!token) {
throw new CliError(
"Downloading workflow run artifacts requires authentication. Set GITHUB_TOKEN or run `gh auth login`.",
);
}
const list = await ghApi<ArtifactsResponse>(`/repos/${repo}/actions/runs/${runId}/artifacts?per_page=100`);
const match = list.artifacts.find((a) => a.name === name);
if (!match) return false;
const zipPath = join(destDir, `${name}.zip`);
console.log(`Downloading artifact '${name}' from run ${runId}...`);
await downloadWithProgress(
`${GITHUB_API}/repos/${repo}/actions/artifacts/${match.id}/zip`,
{ Accept: "application/vnd.github+json", Authorization: `Bearer ${token}` },
zipPath,
match.size_in_bytes,
);
await extract(zipPath, { dir: destDir });
unlinkSync(zipPath);
return true;
}
export function registerEmulatorCommand(program: Command) {
const emulator = program.command("emulator").description("Manage the QEMU local emulator");
emulator
.command("pull")
.description("Download an emulator image from GitHub Releases or a PR build")
.description("Download an emulator image from GitHub Releases or a PR build, then capture a local fast-start snapshot")
.option("--arch <arch>", "Target architecture (default: current system arch)")
.option("--branch <branch>", "Release branch (default: dev)")
.option("--tag <tag>", "Specific release tag (default: latest)")
.option("--repo <repo>", "GitHub repository (default: stack-auth/stack-auth)")
.option("--pr <number>", "Pull from a PR's CI artifacts")
.option("--run <id>", "Pull from a specific workflow run's artifacts")
.action(async (opts) => {
.option("--skip-snapshot", "Download only the qcow2; skip the one-time local snapshot capture")
.action(async (opts: { arch?: string, repo?: string, branch?: string, tag?: string, pr?: string, run?: string, skipSnapshot?: boolean }) => {
const arch = resolveArch(opts.arch);
const repo = opts.repo ?? "stack-auth/stack-auth";
const repo = opts.repo ?? DEFAULT_REPO;
if (opts.run || opts.pr) {
let runId = opts.run as string | undefined;
let runId = opts.run;
if (!runId) {
console.log(`Finding latest successful build for PR #${opts.pr}...`);
const { headRefName } = JSON.parse(gh(["pr", "view", opts.pr, "--repo", repo, "--json", "headRefName"]));
const runs = JSON.parse(gh(["run", "list", "--repo", repo, "--workflow", "qemu-emulator-build.yaml", "--branch", headRefName, "--status", "success", "--limit", "1", "--json", "databaseId"]));
if (runs.length === 0) throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`);
runId = String(runs[0].databaseId);
const pr = await ghApi<PullResponse>(`/repos/${repo}/pulls/${opts.pr}`);
const headRefName = pr.head.ref;
const runs = await ghApi<WorkflowRunsResponse>(
`/repos/${repo}/actions/workflows/qemu-emulator-build.yaml/runs?branch=${encodeURIComponent(headRefName)}&status=success&per_page=1`,
);
if (runs.workflow_runs.length === 0) {
throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`);
}
runId = String(runs.workflow_runs[0].id);
}
const imageDir = emulatorImageDir();
mkdirSync(imageDir, { recursive: true });
const dest = join(imageDir, `stack-emulator-${arch}.qcow2`);
const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`);
if (existsSync(dest)) unlinkSync(dest);
console.log(`Downloading qemu-emulator-${arch} from workflow run ${runId}...`);
try {
execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}`, "--dir", imageDir], { stdio: "inherit" });
} catch (err) {
throw new CliError(`Failed to download artifact from run ${runId}: ${err instanceof Error ? err.message : err}`);
// Stale snapshots from a previous pull would resume against the new
// qcow2 and crash; wipe them so capture rebuilds cleanly.
if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest);
const downloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}`, imageDir);
if (!downloaded) {
throw new CliError(`Artifact qemu-emulator-${arch} not found in workflow run ${runId}.`);
}
if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
console.log(`Downloaded: ${dest}`);
} else {
pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
// Same stale-snapshot concern as the PR branch above.
const imageDir = emulatorImageDir();
const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`);
if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest);
await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
}
if (opts.skipSnapshot) {
console.log("--skip-snapshot: not capturing a local snapshot. First `stack emulator start` will cold-boot.");
} else {
await captureLocalSnapshot(arch);
}
});
@ -227,6 +566,7 @@ export function registerEmulatorCommand(program: Command) {
.option("--config-file <path>", "Path to a config file; when set, credentials for this project are printed to stdout as JSON")
.action(async (opts: { arch?: string, configFile?: string }) => {
const arch = resolveArch(opts.arch);
preflightForVmStart("start", arch);
let resolvedConfigFile: string | undefined;
if (opts.configFile) {
@ -257,6 +597,7 @@ export function registerEmulatorCommand(program: Command) {
.option("--config-file <path>", "Path to a config file; fetches credentials and injects STACK_PROJECT_ID / STACK_PUBLISHABLE_CLIENT_KEY / STACK_SECRET_SERVER_KEY into the child")
.action(async (cmd: string, opts: { arch?: string, configFile?: string }) => {
const arch = resolveArch(opts.arch);
preflightForVmStart("run", arch);
let resolvedConfigFile: string | undefined;
if (opts.configFile) {
@ -281,11 +622,17 @@ export function registerEmulatorCommand(program: Command) {
const apiUrl = `http://127.0.0.1:${backendPort}`;
childEnv.STACK_PROJECT_ID = creds.project_id;
childEnv.NEXT_PUBLIC_STACK_PROJECT_ID = creds.project_id;
childEnv.VITE_STACK_PROJECT_ID = creds.project_id;
childEnv.EXPO_PUBLIC_STACK_PROJECT_ID = creds.project_id;
childEnv.STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
childEnv.NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
childEnv.VITE_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
childEnv.EXPO_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
childEnv.STACK_SECRET_SERVER_KEY = creds.secret_server_key;
childEnv.STACK_API_URL = apiUrl;
childEnv.NEXT_PUBLIC_STACK_API_URL = apiUrl;
childEnv.VITE_STACK_API_URL = apiUrl;
childEnv.EXPO_PUBLIC_STACK_API_URL = apiUrl;
}
const child = spawn(cmd, { shell: true, stdio: "inherit", env: childEnv });
@ -304,25 +651,61 @@ export function registerEmulatorCommand(program: Command) {
process.exit(exitCode);
} else {
console.log("\nStopping emulator...");
const warnStopFailed = (e: unknown) => {
const msg = e instanceof Error ? e.message : String(e);
process.stderr.write(`Failed to stop emulator cleanly: ${msg}\n`);
};
runEmulator("stop")
.catch(() => { /* best-effort stop */ })
.catch(warnStopFailed)
.finally(() => process.exit(exitCode));
}
});
});
emulator.command("stop").description("Stop the emulator (data preserved; use 'reset' to clear)").action(() => runEmulator("stop"));
emulator.command("reset").description("Reset emulator state for a fresh boot").action(() => runEmulator("reset"));
emulator.command("status").description("Show emulator and service health").action(() => runEmulator("status"));
emulator
.command("stop")
.description("Stop the emulator (data preserved; use 'reset' to clear)")
.action(() => {
requireBinaries("stop", [bin("socat", "socat", "socat")]);
return runEmulator("stop");
});
emulator
.command("reset")
.description("Reset emulator state for a fresh boot")
.action(() => {
requireBinaries("reset", [bin("socat", "socat", "socat")]);
return runEmulator("reset");
});
emulator
.command("status")
.description("Show emulator and service health")
.action(() => {
requireBinaries("status", [
bin("curl", "curl", "curl"),
bin("nc", "ncat", "netcat"),
]);
return runEmulator("status");
});
emulator
.command("list-releases")
.description("List available emulator releases")
.option("--repo <repo>", "GitHub repository (default: stack-auth/stack-auth)")
.action((opts) => {
const repo = opts.repo ?? "stack-auth/stack-auth";
.action(async (opts) => {
const repo = opts.repo ?? DEFAULT_REPO;
console.log(`Available emulator releases from ${repo}:\n`);
const lines = gh(["release", "list", "--repo", repo, "--limit", "20"]).split("\n").filter((l) => l.toLowerCase().includes("emulator"));
type Release = { tag_name: string, name: string | null, published_at: string | null, draft: boolean, prerelease: boolean };
const releases = await ghApi<Release[]>(`/repos/${repo}/releases?per_page=50`);
const lines = releases
.filter((r) => (r.tag_name + " " + (r.name ?? "")).toLowerCase().includes("emulator"))
.slice(0, 20)
.map((r) => {
const status = r.draft ? "Draft" : r.prerelease ? "Pre-release" : "Latest";
const date = r.published_at ? r.published_at.slice(0, 10) : "";
return `${r.tag_name}\t${status}\t${date}`;
});
if (lines.length === 0) console.log("No emulator releases found.");
else for (const line of lines) console.log(line);
});

View File

@ -0,0 +1,259 @@
import { describe, expect, it } from "vitest";
import { buildIso, type IsoFile } from "./iso.js";
const SECTOR = 2048;
// --- Test helpers: a minimal ISO 9660 parser, just enough to walk the
// directory records we produce so tests can assert the bytes we emitted really
// are addressable at the offsets claimed in the directory records.
function readSector(iso: Buffer, sector: number): Buffer {
return iso.subarray(sector * SECTOR, (sector + 1) * SECTOR);
}
function readVolumeDescriptor(iso: Buffer, sector: number): { type: number, id: string } {
const buf = readSector(iso, sector);
return { type: buf[0], id: buf.toString("ascii", 1, 6) };
}
type DirRecord = {
lenDr: number,
extentSector: number,
dataLength: number,
isDir: boolean,
fileId: Buffer,
};
function parseDirRecords(sector: Buffer): DirRecord[] {
const records: DirRecord[] = [];
let offset = 0;
while (offset < sector.length) {
const lenDr = sector[offset];
if (lenDr === 0) break;
const extentSector = sector.readUInt32LE(offset + 2);
const dataLength = sector.readUInt32LE(offset + 10);
const flags = sector[offset + 25];
const lenFi = sector[offset + 32];
const fileId = sector.subarray(offset + 33, offset + 33 + lenFi);
records.push({
lenDr,
extentSector,
dataLength,
isDir: (flags & 0x02) !== 0,
fileId: Buffer.from(fileId),
});
offset += lenDr;
}
return records;
}
// Follow PVD → root dir → pull file bytes by ISO-9660 name ("NAME.EXT;1").
function readIsoFile(iso: Buffer, isoName: string): Buffer | null {
const pvd = readSector(iso, 16);
const rootSector = pvd.readUInt32LE(156 + 2);
const rootRecords = parseDirRecords(readSector(iso, rootSector));
const match = rootRecords.find((r) => r.fileId.toString("ascii") === isoName);
if (!match) return null;
const start = match.extentSector * SECTOR;
return iso.subarray(start, start + match.dataLength);
}
// Same, but follow the Joliet SVD (so names are UCS-2 BE).
function readJolietFile(iso: Buffer, name: string): Buffer | null {
const svd = readSector(iso, 17);
if (svd[0] !== 2) return null;
const rootSector = svd.readUInt32LE(156 + 2);
const rootRecords = parseDirRecords(readSector(iso, rootSector));
const expected = Buffer.alloc(name.length * 2);
for (let i = 0; i < name.length; i++) expected.writeUInt16BE(name.charCodeAt(i), i * 2);
const match = rootRecords.find((r) => r.fileId.equals(expected));
if (!match) return null;
const start = match.extentSector * SECTOR;
return iso.subarray(start, start + match.dataLength);
}
function sampleFile(name: string, size: number, byte = 0x41): IsoFile {
return { name, data: Buffer.alloc(size, byte) };
}
describe("buildIso — structural invariants", () => {
it("emits the ISO 9660 standard identifiers at sectors 16, 17, 18", () => {
const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hi") }]);
expect(readVolumeDescriptor(iso, 16)).toEqual({ type: 1, id: "CD001" });
expect(readVolumeDescriptor(iso, 17)).toEqual({ type: 2, id: "CD001" });
expect(readVolumeDescriptor(iso, 18)).toEqual({ type: 0xff, id: "CD001" });
});
it("stores the volume identifier verbatim in the PVD for blkid discovery", () => {
const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
const pvd = readSector(iso, 16);
expect(pvd.toString("ascii", 40, 40 + 8)).toBe("STACKCFG");
});
it("stores the volume identifier in the Joliet SVD as UCS-2 BE", () => {
const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
const svd = readSector(iso, 17);
const ucs = svd.subarray(40, 40 + 16);
let decoded = "";
for (let i = 0; i < ucs.length; i += 2) decoded += String.fromCharCode(ucs.readUInt16BE(i));
expect(decoded).toBe("STACKCFG");
});
it("sets the Joliet escape sequence %/E", () => {
const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
const svd = readSector(iso, 17);
expect(svd[88]).toBe(0x25);
expect(svd[89]).toBe(0x2f);
expect(svd[90]).toBe(0x45);
});
it("declares a volume space size equal to the emitted sector count", () => {
const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hello world") }]);
const pvd = readSector(iso, 16);
const declared = pvd.readUInt32LE(80);
expect(iso.length).toBe(declared * SECTOR);
});
});
describe("buildIso — file round-trip", () => {
it("makes files readable by ISO 9660 name", () => {
const iso = buildIso("STACKCFG", [
{ name: "runtime.env", data: Buffer.from("KEY=value\n") },
{ name: "base.env", data: Buffer.from("FOO=bar\n") },
]);
expect(readIsoFile(iso, "RUNTIME.ENV;1")?.toString()).toBe("KEY=value\n");
expect(readIsoFile(iso, "BASE.ENV;1")?.toString()).toBe("FOO=bar\n");
});
it("makes files readable by Joliet (lowercase) name", () => {
const iso = buildIso("STACKCFG", [
{ name: "runtime.env", data: Buffer.from("KEY=value\n") },
{ name: "base.env", data: Buffer.from("FOO=bar\n") },
]);
expect(readJolietFile(iso, "runtime.env")?.toString()).toBe("KEY=value\n");
expect(readJolietFile(iso, "base.env")?.toString()).toBe("FOO=bar\n");
});
it("preserves exact file contents byte-for-byte", () => {
const content = Buffer.from([0x00, 0xff, 0x7f, 0x80, 0x41, 0x42, 0x43]);
const iso = buildIso("STACKCFG", [{ name: "bin.dat", data: content }]);
expect(readJolietFile(iso, "bin.dat")?.equals(content)).toBe(true);
});
it("handles files whose length is exactly one sector", () => {
const content = Buffer.alloc(SECTOR, 0x37);
const iso = buildIso("STACKCFG", [{ name: "one.bin", data: content }]);
expect(readJolietFile(iso, "one.bin")?.equals(content)).toBe(true);
});
it("handles files that span multiple sectors", () => {
const content = Buffer.alloc(SECTOR * 3 + 17, 0x55);
const iso = buildIso("STACKCFG", [{ name: "big.bin", data: content }]);
expect(readJolietFile(iso, "big.bin")?.equals(content)).toBe(true);
});
it("keeps files byte-exact at the claimed extent sector across multi-file layouts", () => {
// Fingerprint each file so we can tell them apart even if extents shift.
const files: IsoFile[] = [
{ name: "alpha.bin", data: Buffer.alloc(SECTOR + 5, 0xaa) },
{ name: "beta.bin", data: Buffer.alloc(SECTOR * 2, 0xbb) },
{ name: "gamma.bin", data: Buffer.alloc(42, 0xcc) },
];
const iso = buildIso("STACKCFG", files);
for (const f of files) {
expect(readJolietFile(iso, f.name)?.equals(f.data)).toBe(true);
}
});
});
describe("buildIso — edge cases", () => {
it("handles empty files without misaligning subsequent file extents", () => {
// Regression: `padToSector(Buffer.alloc(0))` used to return a 0-byte
// buffer, but the layout reserved 1 sector for the empty file — the next
// file was then read from the empty file's reserved slot.
const files: IsoFile[] = [
{ name: "empty.txt", data: Buffer.alloc(0) },
{ name: "after.txt", data: Buffer.from("marker\n") },
];
const iso = buildIso("STACKCFG", files);
expect(readJolietFile(iso, "empty.txt")?.length).toBe(0);
expect(readJolietFile(iso, "after.txt")?.toString()).toBe("marker\n");
// And: the declared volume space size must cover every emitted byte.
const pvd = readSector(iso, 16);
expect(iso.length).toBe(pvd.readUInt32LE(80) * SECTOR);
});
it("writes the exact file length in the directory record (not padded to sector)", () => {
const content = Buffer.from("abc");
const iso = buildIso("STACKCFG", [{ name: "tiny.txt", data: content }]);
const svd = readSector(iso, 17);
const rootSector = svd.readUInt32LE(156 + 2);
const records = parseDirRecords(readSector(iso, rootSector));
const file = records.find((r) => !r.isDir);
expect(file?.dataLength).toBe(3);
});
it("places the root directory records for . and .. pointing at the root extent", () => {
const iso = buildIso("STACKCFG", [{ name: "x.txt", data: Buffer.from("1") }]);
const svd = readSector(iso, 17);
const rootSector = svd.readUInt32LE(156 + 2);
const records = parseDirRecords(readSector(iso, rootSector));
expect(records.length).toBeGreaterThanOrEqual(2);
expect(records[0].fileId.equals(Buffer.from([0x00]))).toBe(true);
expect(records[1].fileId.equals(Buffer.from([0x01]))).toBe(true);
expect(records[0].isDir).toBe(true);
expect(records[0].extentSector).toBe(rootSector);
expect(records[1].extentSector).toBe(rootSector);
});
it("truncates volume identifiers longer than 32 bytes rather than corrupting the PVD", () => {
const longId = "A".repeat(64);
const iso = buildIso(longId, [{ name: "x.txt", data: Buffer.from("1") }]);
const pvd = readSector(iso, 16);
expect(pvd.toString("ascii", 40, 40 + 32)).toBe("A".repeat(32));
// Sector 17 should still be the Joliet SVD, not clobbered.
expect(pvd[881]).toBe(1);
expect(readVolumeDescriptor(iso, 17).type).toBe(2);
});
it("rejects an input set whose root directory record overflows one sector", () => {
// Each Joliet dir record for an N-char name is 33 + 2N + (2N even ? 1 : 0)
// ≈ 2N + 34 bytes. A sector is 2048. Thirty 30-char names → ~1860 bytes
// plus "." + ".." (68) → fits. Eighty of them → well over a sector.
const many: IsoFile[] = Array.from({ length: 80 }, (_, i) => ({
name: `file-${String(i).padStart(3, "0")}-padding-padding.bin`,
data: Buffer.from("x"),
}));
expect(() => buildIso("STACKCFG", many)).toThrow(/Root directory exceeds/);
});
it("produces a sector-aligned buffer regardless of file sizes", () => {
for (const size of [0, 1, SECTOR - 1, SECTOR, SECTOR + 1, SECTOR * 5 - 1]) {
const iso = buildIso("STACKCFG", [sampleFile("a.bin", size)]);
expect(iso.length % SECTOR).toBe(0);
}
});
});
describe("buildIso — multiple file sector layout", () => {
it("assigns non-overlapping extents to all files", () => {
const files: IsoFile[] = [
sampleFile("a.bin", 10, 0x01),
sampleFile("b.bin", SECTOR, 0x02),
sampleFile("c.bin", SECTOR * 2 + 500, 0x03),
sampleFile("d.bin", 1, 0x04),
];
const iso = buildIso("STACKCFG", files);
const svd = readSector(iso, 17);
const rootSector = svd.readUInt32LE(156 + 2);
const records = parseDirRecords(readSector(iso, rootSector)).filter((r) => !r.isDir);
// Extents must be strictly ordered and non-overlapping.
const sorted = [...records].sort((a, b) => a.extentSector - b.extentSector);
for (let i = 1; i < sorted.length; i++) {
const prev = sorted[i - 1];
const prevEndSector = prev.extentSector + Math.max(1, Math.ceil(prev.dataLength / SECTOR));
expect(sorted[i].extentSector).toBeGreaterThanOrEqual(prevEndSector);
}
});
});

View File

@ -0,0 +1,399 @@
// Minimal ISO 9660 + Joliet writer used to package the runtime config blob
// that the emulator VM mounts at boot via /dev/disk/by-label/STACKCFG.
//
// Replaces the host-side dependency on hdiutil/mkisofs/genisoimage. Only the
// subset of ECMA-119 needed for a single-level root directory of small UTF-8
// text files is implemented: PVD + Joliet SVD + path tables + root dir + file
// data. Names are emitted in both ISO 9660 ("BASE.ENV;1") and Joliet
// (lower-case UCS-2) form so Linux mounts the Joliet view by default and the
// guest's `source /mnt/stack-runtime/runtime.env` works unchanged.
import { writeFileSync } from "fs";
const SECTOR = 2048;
function bothEndian32(n: number): Buffer {
const b = Buffer.alloc(8);
b.writeUInt32LE(n, 0);
b.writeUInt32BE(n, 4);
return b;
}
function bothEndian16(n: number): Buffer {
const b = Buffer.alloc(4);
b.writeUInt16LE(n, 0);
b.writeUInt16BE(n, 2);
return b;
}
function padString(s: string, len: number, fill = " "): Buffer {
const buf = Buffer.alloc(len, fill.charCodeAt(0));
buf.write(s.slice(0, len), 0, "ascii");
return buf;
}
function ucs2BE(s: string): Buffer {
const buf = Buffer.alloc(s.length * 2);
for (let i = 0; i < s.length; i++) {
buf.writeUInt16BE(s.charCodeAt(i), i * 2);
}
return buf;
}
function padUcs2BE(s: string, byteLen: number): Buffer {
const buf = Buffer.alloc(byteLen);
const wholeChars = Math.floor(byteLen / 2);
for (let i = 0; i < wholeChars; i++) {
buf.writeUInt16BE(i < s.length ? s.charCodeAt(i) : 0x0020, i * 2);
}
// Odd-length fields (e.g. 37-byte Copyright/Abstract/Bibliographic IDs) get
// a trailing space byte; spec allows either NUL or 0x20 padding.
if (byteLen % 2 === 1) {
buf[byteLen - 1] = 0x20;
}
return buf;
}
function dirRecordingDate(d: Date): Buffer {
const buf = Buffer.alloc(7);
buf[0] = d.getUTCFullYear() - 1900;
buf[1] = d.getUTCMonth() + 1;
buf[2] = d.getUTCDate();
buf[3] = d.getUTCHours();
buf[4] = d.getUTCMinutes();
buf[5] = d.getUTCSeconds();
buf[6] = 0;
return buf;
}
function volumeDate(d: Date): Buffer {
const pad = (n: number, w: number) => String(n).padStart(w, "0");
const s =
pad(d.getUTCFullYear(), 4) +
pad(d.getUTCMonth() + 1, 2) +
pad(d.getUTCDate(), 2) +
pad(d.getUTCHours(), 2) +
pad(d.getUTCMinutes(), 2) +
pad(d.getUTCSeconds(), 2) +
"00";
const buf = Buffer.alloc(17);
buf.write(s, 0, 16, "ascii");
buf[16] = 0;
return buf;
}
const UNUSED_VOLUME_DATE = (() => {
const buf = Buffer.alloc(17, "0".charCodeAt(0));
buf[16] = 0;
return buf;
})();
// Encodes an ISO 9660 file identifier ("FILENAME.EXT;1"). Caller must pass an
// already-uppercased 8.3 name without the version suffix.
function isoFileIdentifier(name: string): Buffer {
const upper = name.toUpperCase();
return Buffer.from(`${upper};1`, "ascii");
}
// Builds a single directory record. `idBytes` is the file identifier bytes
// (ASCII for ISO, UCS-2 BE for Joliet); `idForDot` overrides with a single
// 0x00 / 0x01 byte for "." / ".." entries.
function buildDirRecord(
extentSector: number,
dataLength: number,
isDir: boolean,
recDate: Buffer,
idBytes: Buffer,
): Buffer {
const lenFi = idBytes.length;
const pad = lenFi % 2 === 0 ? 1 : 0;
const lenDr = 33 + lenFi + pad;
const buf = Buffer.alloc(lenDr);
buf[0] = lenDr;
buf[1] = 0;
bothEndian32(extentSector).copy(buf, 2);
bothEndian32(dataLength).copy(buf, 10);
recDate.copy(buf, 18);
buf[25] = isDir ? 0x02 : 0x00;
buf[26] = 0;
buf[27] = 0;
bothEndian16(1).copy(buf, 28);
buf[32] = lenFi;
idBytes.copy(buf, 33);
return buf;
}
function buildRootDirEntries(
rootSector: number,
rootSize: number,
recDate: Buffer,
files: { idBytes: Buffer, sector: number, size: number }[],
): Buffer {
const records: Buffer[] = [];
records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00])));
records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x01])));
for (const f of files) {
records.push(buildDirRecord(f.sector, f.size, false, recDate, f.idBytes));
}
// Records may not span sector boundaries; pack them with sector padding.
const sectors: Buffer[] = [];
let current = Buffer.alloc(0);
for (const r of records) {
if (current.length + r.length > SECTOR) {
sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)]));
current = Buffer.alloc(0);
}
current = Buffer.concat([current, r]);
}
if (current.length > 0) {
sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)]));
}
return Buffer.concat(sectors);
}
// Single-entry path table for the root directory. Used for both L (LE) and M
// (BE) tables; pass writeUInt32LE/BE accordingly.
function buildPathTable(rootSector: number, byteOrder: "LE" | "BE"): Buffer {
const buf = Buffer.alloc(10);
buf[0] = 1; // LEN_DI
buf[1] = 0; // EAR length
if (byteOrder === "LE") {
buf.writeUInt32LE(rootSector, 2);
buf.writeUInt16LE(1, 6);
} else {
buf.writeUInt32BE(rootSector, 2);
buf.writeUInt16BE(1, 6);
}
buf[8] = 0; // root identifier
buf[9] = 0; // pad
return buf;
}
function padToSector(buf: Buffer): Buffer {
const rem = buf.length % SECTOR;
if (rem === 0) return buf;
return Buffer.concat([buf, Buffer.alloc(SECTOR - rem)]);
}
// Build a Volume Descriptor (PVD or Joliet SVD). `joliet` switches volume-name
// fields to UCS-2 BE and sets the Joliet escape sequence.
function buildVolumeDescriptor(opts: {
joliet: boolean,
volumeId: string,
volumeSpaceSize: number,
pathTableSize: number,
lPathSector: number,
mPathSector: number,
rootDirRecord: Buffer,
date: Buffer,
}): Buffer {
const buf = Buffer.alloc(SECTOR);
buf[0] = opts.joliet ? 2 : 1;
buf.write("CD001", 1, 5, "ascii");
buf[6] = 1;
buf[7] = 0;
// System Identifier (32 bytes)
if (opts.joliet) {
padUcs2BE("", 32).copy(buf, 8);
} else {
padString("", 32).copy(buf, 8);
}
// Volume Identifier (32 bytes) — must be "STACKCFG" so udev exposes it as
// /dev/disk/by-label/STACKCFG. blkid reads from PVD by default but Joliet
// takes precedence when both are present.
if (opts.joliet) {
padUcs2BE(opts.volumeId, 32).copy(buf, 40);
} else {
padString(opts.volumeId, 32).copy(buf, 40);
}
bothEndian32(opts.volumeSpaceSize).copy(buf, 80);
if (opts.joliet) {
// Escape sequence for UCS-2 Level 3 ("%/E") at offset 88 (32 bytes).
buf[88] = 0x25;
buf[89] = 0x2f;
buf[90] = 0x45;
}
bothEndian16(1).copy(buf, 120); // Volume Set Size
bothEndian16(1).copy(buf, 124); // Volume Sequence Number
bothEndian16(SECTOR).copy(buf, 128); // Logical Block Size
bothEndian32(opts.pathTableSize).copy(buf, 132);
buf.writeUInt32LE(opts.lPathSector, 140);
buf.writeUInt32LE(0, 144); // optional L
buf.writeUInt32BE(opts.mPathSector, 148);
buf.writeUInt32BE(0, 152); // optional M
opts.rootDirRecord.copy(buf, 156);
const padFn = opts.joliet
? (s: string, n: number) => padUcs2BE(s, n)
: (s: string, n: number) => padString(s, n);
padFn("", 128).copy(buf, 190); // Volume Set Identifier
padFn("", 128).copy(buf, 318); // Publisher Identifier
padFn("", 128).copy(buf, 446); // Data Preparer Identifier
padFn("", 128).copy(buf, 574); // Application Identifier
padFn("", 37).copy(buf, 702); // Copyright File Identifier
padFn("", 37).copy(buf, 739); // Abstract File Identifier
padFn("", 37).copy(buf, 776); // Bibliographic File Identifier
opts.date.copy(buf, 813); // Creation
opts.date.copy(buf, 830); // Modification
UNUSED_VOLUME_DATE.copy(buf, 847); // Expiration
UNUSED_VOLUME_DATE.copy(buf, 864); // Effective
buf[881] = 1; // File Structure Version
return buf;
}
function buildVolumeDescriptorTerminator(): Buffer {
const buf = Buffer.alloc(SECTOR);
buf[0] = 0xff;
buf.write("CD001", 1, 5, "ascii");
buf[6] = 1;
return buf;
}
export type IsoFile = { name: string, data: Buffer };
export function buildIso(volumeId: string, files: IsoFile[]): Buffer {
const date = new Date();
const recDate = dirRecordingDate(date);
const volDateBuf = volumeDate(date);
// Compute per-file directory record sizes for both views.
const isoEntries = files.map((f) => ({
file: f,
idBytes: isoFileIdentifier(f.name),
}));
const jolietEntries = files.map((f) => ({
file: f,
idBytes: ucs2BE(f.name),
}));
// We need root sector + size before we know file sectors — but file sectors
// depend only on the root dir size, which depends only on the file count.
// Compute the root dir buffer twice if needed (sizes are stable since they
// depend only on identifier bytes, not on file extents).
const dirRecLen = (lenFi: number) => 33 + lenFi + (lenFi % 2 === 0 ? 1 : 0);
const isoRootSize = 34 + 34 + isoEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0);
const jolietRootSize = 34 + 34 + jolietEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0);
if (isoRootSize > SECTOR || jolietRootSize > SECTOR) {
throw new Error(`Root directory exceeds ${SECTOR} bytes; multi-sector root not supported.`);
}
// Sector layout.
const sysAreaSectors = 16;
const pvdSector = sysAreaSectors;
const svdSector = pvdSector + 1;
const termSector = svdSector + 1;
const isoLPathSector = termSector + 1;
const isoMPathSector = isoLPathSector + 1;
const jolietLPathSector = isoMPathSector + 1;
const jolietMPathSector = jolietLPathSector + 1;
const isoRootSector = jolietMPathSector + 1;
const jolietRootSector = isoRootSector + 1;
let nextSector = jolietRootSector + 1;
const fileLayout = files.map((f) => {
const sector = nextSector;
const sectors = Math.max(1, Math.ceil(f.data.length / SECTOR));
nextSector += sectors;
return { file: f, sector, size: f.data.length };
});
const totalSectors = nextSector;
const pathTableSize = 10;
// Root directory record inside the volume descriptor (BP 157-190 of PVD/SVD):
// same layout as a regular dir record but the identifier is the single byte 0x00.
const rootIdent = Buffer.from([0x00]);
const isoRootDirRecordVD = buildDirRecord(isoRootSector, SECTOR, true, recDate, rootIdent);
const jolietRootDirRecordVD = buildDirRecord(jolietRootSector, SECTOR, true, recDate, rootIdent);
const pvd = buildVolumeDescriptor({
joliet: false,
volumeId,
volumeSpaceSize: totalSectors,
pathTableSize,
lPathSector: isoLPathSector,
mPathSector: isoMPathSector,
rootDirRecord: isoRootDirRecordVD,
date: volDateBuf,
});
const svd = buildVolumeDescriptor({
joliet: true,
volumeId,
volumeSpaceSize: totalSectors,
pathTableSize,
lPathSector: jolietLPathSector,
mPathSector: jolietMPathSector,
rootDirRecord: jolietRootDirRecordVD,
date: volDateBuf,
});
const term = buildVolumeDescriptorTerminator();
const isoLPath = padToSector(buildPathTable(isoRootSector, "LE"));
const isoMPath = padToSector(buildPathTable(isoRootSector, "BE"));
const jolietLPath = padToSector(buildPathTable(jolietRootSector, "LE"));
const jolietMPath = padToSector(buildPathTable(jolietRootSector, "BE"));
const isoRoot = buildRootDirEntries(
isoRootSector,
SECTOR,
recDate,
isoEntries.map((e, i) => ({
idBytes: e.idBytes,
sector: fileLayout[i].sector,
size: fileLayout[i].size,
})),
);
const jolietRoot = buildRootDirEntries(
jolietRootSector,
SECTOR,
recDate,
jolietEntries.map((e, i) => ({
idBytes: e.idBytes,
sector: fileLayout[i].sector,
size: fileLayout[i].size,
})),
);
// Each file must occupy the exact number of sectors the layout reserved for
// it. An empty file reserves 1 sector (via Math.max(1, …)) but
// padToSector(Buffer.alloc(0)) returns 0 bytes — that would desync every
// subsequent file's extent. Explicitly pad to the reserved size instead.
const fileBuffers = fileLayout.map((f) => {
const reservedSectors = Math.max(1, Math.ceil(f.file.data.length / SECTOR));
const reservedBytes = reservedSectors * SECTOR;
if (f.file.data.length === reservedBytes) return f.file.data;
const out = Buffer.alloc(reservedBytes);
f.file.data.copy(out, 0);
return out;
});
return Buffer.concat([
Buffer.alloc(sysAreaSectors * SECTOR),
pvd,
svd,
term,
isoLPath,
isoMPath,
jolietLPath,
jolietMPath,
isoRoot,
jolietRoot,
...fileBuffers,
]);
}
export function writeIso(path: string, volumeId: string, files: IsoFile[]): void {
const buf = buildIso(volumeId, files);
writeFileSync(path, buf);
}

View File

@ -0,0 +1,19 @@
import { defineConfig, mergeConfig } from 'vitest/config';
import sharedConfig from '../../vitest.shared';
export default mergeConfig(
sharedConfig,
defineConfig({
test: {
// Override the shared `maxWorkers: 8` — with it set, tinypool defaults
// minThreads to the host's available parallelism, producing
// "minThreads/maxThreads must not conflict" on machines with >8 cores.
poolOptions: {
threads: {
minThreads: 1,
maxThreads: 4,
},
},
},
}),
);

View File

@ -746,7 +746,7 @@ importers:
version: 1.166.6(crossws@0.4.4(srvx@0.8.16))
nitro:
specifier: ^3.0.0
version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2)
version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2)
react:
specifier: 19.2.1
version: 19.2.1
@ -2097,6 +2097,9 @@ importers:
commander:
specifier: ^13.1.0
version: 13.1.0
extract-zip:
specifier: ^2.0.1
version: 2.0.1
jiti:
specifier: ^2.4.2
version: 2.6.1
@ -11395,6 +11398,7 @@ packages:
basic-ftp@5.2.0:
resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==}
engines: {node: '>=10.0.0'}
deprecated: Security vulnerability fixed in 5.2.1, please upgrade
bcrypt@6.0.0:
resolution: {integrity: sha512-cU8v/EGSrnH+HnxV2z0J7/blxH8gq7Xh2JFT6Aroax7UohdmiJJlxApMxtKfuI7z68NvvVcmR78k2LbT6efhRg==}
@ -33362,7 +33366,7 @@ snapshots:
debug: 4.4.3
enhanced-resolve: 5.17.1
eslint: 8.57.1
eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1)
eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
fast-glob: 3.3.3
get-tsconfig: 4.8.1
is-bun-module: 1.2.1
@ -33405,7 +33409,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1):
eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
dependencies:
debug: 3.2.7
optionalDependencies:
@ -33483,7 +33487,7 @@ snapshots:
doctrine: 2.1.0
eslint: 8.57.1
eslint-import-resolver-node: 0.3.9
eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1)
eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
hasown: 2.0.2
is-core-module: 2.15.1
is-glob: 4.0.3
@ -37347,7 +37351,7 @@ snapshots:
jsonpath-plus: 10.4.0
lodash.topath: 4.5.2
nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2):
nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2):
dependencies:
consola: 3.4.2
cookie-es: 2.0.0
@ -37367,7 +37371,6 @@ snapshots:
unenv: 2.0.0-rc.21
unstorage: 2.0.0-alpha.3(chokidar@4.0.3)(db0@0.3.4(@electric-sql/pglite@0.3.2)(mysql2@3.15.3))(lru-cache@11.2.2)(ofetch@1.5.1)
optionalDependencies:
rolldown: 1.0.0-rc.3
vite: 7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0)
xml2js: 0.6.2
transitivePeerDependencies: