Merge branch 'dev' into promptless/changelog-oauth-retry-reliability

2026-06-19 21:00:40 +08:00 · 2026-04-20 21:26:15 +00:00 · 2026-04-20 21:26:15 +00:00 · c39a0853c0
commit c39a0853c0
parent 16ddca5f29 37ee5ec320
20 changed files with 2886 additions and 212 deletions
--- a/.github/workflows/qemu-emulator-build.yaml
+++ b/.github/workflows/qemu-emulator-build.yaml
@ -22,8 +22,16 @@ concurrency:

 env:
  EMULATOR_IMAGE_NAME: stack-local-emulator
+  # Shell scripts (build-image.sh, run-emulator.sh) read these directly.
  EMULATOR_IMAGE_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/images
  EMULATOR_RUN_DIR: ${{ github.workspace }}/docker/local-emulator/qemu/run
+  # The stack-cli ignores EMULATOR_IMAGE_DIR/RUN_DIR and derives its own paths
+  # from STACK_EMULATOR_HOME. Point it at the same workspace so `emulator
+  # start` finds the freshly-built qcow2 from build-image.sh and cold-boots
+  # it, instead of auto-pulling from a prior release. CI doesn't capture a
+  # savevm (EMULATOR_CAPTURE_SAVEVM defaults to 0); users capture locally
+  # on first `stack emulator pull`.
+  STACK_EMULATOR_HOME: ${{ github.workspace }}/docker/local-emulator/qemu

 jobs:
  build:
@ -34,15 +42,16 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          # amd64 runs natively under KVM on ubicloud's amd64 runner.
+          # Both arches build on ubicloud's amd64 runner. amd64 uses KVM;
+          # arm64 runs under cross-arch TCG (slow, but only cloud-init
+          # provisioning has to complete — the boot/verify smoke test below
+          # is gated to amd64 because TCG can't boot Next.js in any
+          # reasonable time). Snapshots are NOT published — `stack emulator
+          # pull` captures one locally on first run, which is the only way
+          # to guarantee KVM/HVF/TCG + `-cpu max` compatibility on the
+          # user's machine.
          - arch: amd64
            runner: ubicloud-standard-8
-          # arm64 runs under cross-arch TCG on ubicloud's amd64 runner.
-          # No KVM for arm64 guests on an amd64 host; cortex-a72 + V8
-          # --jitless together sidestep the SIGTRAPs that cross-arch TCG
-          # hits on aggressive arm64 JIT code. Smoke test is still skipped
-          # because the backend can't come up reliably under cross-arch
-          # TCG within any sane window.
          - arch: arm64
            runner: ubicloud-standard-8

@ -55,10 +64,60 @@ jobs:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

-      - name: Install QEMU dependencies
+      # Node/pnpm are needed on both arches: arm64 also runs
+      # generate-env-development.mjs inside build-image.sh. amd64 additionally
+      # builds and runs the CLI for the verification steps below.
+      - uses: pnpm/action-setup@v4
+        with:
+          version: 10.23.0
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install system dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64
+          # qemu-utils gives us qemu-img; qemu-efi-aarch64 provides the arm64
+          # UEFI firmware. The actual qemu-system-* binaries come from the
+          # source build below — Ubuntu 24.04 ships QEMU 8.2 which predates
+          # the mapped-ram migration capability we rely on.
+          sudo apt-get install -y qemu-utils qemu-efi-aarch64 socat genisoimage zstd \
+            ninja-build pkg-config python3-venv \
+            libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev
+
+      # QEMU 10.2.2 is required for the mapped-ram + multifd migration path
+      # used by the fast-resume snapshot. Cache the compiled prefix so CI
+      # only pays the ~5-8 min build cost once per runner image.
+      - name: Restore QEMU 10.2.2 cache
+        id: qemu-cache
+        uses: actions/cache@v4
+        with:
+          path: /opt/qemu
+          key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1
+
+      - name: Build QEMU 10.2.2 from source
+        if: steps.qemu-cache.outputs.cache-hit != 'true'
+        run: |
+          set -euxo pipefail
+          curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz
+          mkdir -p /tmp/qemu-src
+          tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1
+          cd /tmp/qemu-src
+          ./configure --prefix=/opt/qemu \
+            --target-list=x86_64-softmmu,aarch64-softmmu \
+            --enable-kvm --enable-slirp --enable-tcg \
+            --disable-docs --disable-gtk --disable-sdl --disable-vnc \
+            --disable-guest-agent --disable-tools
+          make -j"$(nproc)"
+          sudo make install
+
+      - name: Put QEMU 10.2.2 on PATH
+        run: |
+          echo "/opt/qemu/bin" >> "$GITHUB_PATH"
+          /opt/qemu/bin/qemu-system-x86_64 --version
+          /opt/qemu/bin/qemu-system-aarch64 --version

      - name: Enable KVM access
        run: |
@ -82,41 +141,56 @@ jobs:
      - name: Generate emulator env
        run: node docker/local-emulator/generate-env-development.mjs

-      # arm64 runs under cross-arch TCG on an amd64 runner; the backend's
-      # V8 TurboFan JIT re-triggers the SIGTRAPs we dodge in migrations
-      # with --no-opt, and even if it didn't, boot is too slow under TCG
-      # to verify in any sane window. amd64 KVM already exercises the
-      # service stack; real arm64 hosts have KVM for end-users.
-      - name: Start emulator and verify
+      # amd64 runs under KVM on the runner so we can boot the newly-built
+      # image to verify it works end-to-end before publishing. arm64 runs
+      # under cross-arch TCG on an amd64 host, which can't reliably boot
+      # Next.js within any sane window — skipped.
+      - name: Build stack-cli (for emulator CLI)
        if: matrix.arch == 'amd64'
        run: |
-          chmod +x docker/local-emulator/qemu/run-emulator.sh
-          EMULATOR_ARCH=${{ matrix.arch }} \
-          EMULATOR_READY_TIMEOUT=3200 \
-            docker/local-emulator/qemu/run-emulator.sh start
+          pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
+          # Turbo's trailing `...` filter builds stack-cli AND its workspace
+          # deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli
+          # imports them at runtime from their dist/ outputs.
+          pnpm exec turbo run build --filter='@stackframe/stack-cli...'
+
+      - name: Start emulator and verify
+        if: matrix.arch == 'amd64'
+        env:
+          EMULATOR_ARCH: ${{ matrix.arch }}
+          EMULATOR_READY_TIMEOUT: 3200
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator start

      - name: Verify services are healthy
        if: matrix.arch == 'amd64'
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh status
+        env:
+          EMULATOR_ARCH: ${{ matrix.arch }}
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator status

      - name: Stop emulator
        if: always() && matrix.arch == 'amd64'
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh stop
+        env:
+          EMULATOR_ARCH: ${{ matrix.arch }}
+          EMULATOR_IMAGE_DIR: ${{ env.EMULATOR_IMAGE_DIR }}
+          EMULATOR_RUN_DIR: ${{ env.EMULATOR_RUN_DIR }}
+        run: node packages/stack-cli/dist/index.js emulator stop

      - name: Package image
        run: |
          BASE_IMG="docker/local-emulator/qemu/images/stack-emulator-${{ matrix.arch }}.qcow2"
          cp "$BASE_IMG" "stack-emulator-${{ matrix.arch }}.qcow2"
+          ls -lh "stack-emulator-${{ matrix.arch }}.qcow2"

      - name: Upload image artifact
        uses: actions/upload-artifact@v4
        with:
          name: qemu-emulator-${{ matrix.arch }}
          path: stack-emulator-${{ matrix.arch }}.qcow2
+          if-no-files-found: error
          retention-days: 30
          compression-level: 0

@ -134,31 +208,80 @@ jobs:
    steps:
      - uses: actions/checkout@v6

-      - name: Install QEMU dependencies
+      - name: Install system dependencies
        run: |
          sudo apt-get update
-          sudo apt-get install -y qemu-system-x86 qemu-utils genisoimage socat
+          sudo apt-get install -y qemu-utils socat zstd \
+            ninja-build pkg-config python3-venv \
+            libglib2.0-dev libpixman-1-dev libslirp-dev libepoxy-dev libgbm-dev
+
+      - name: Restore QEMU 10.2.2 cache
+        id: qemu-cache
+        uses: actions/cache@v4
+        with:
+          path: /opt/qemu
+          key: qemu-10.2.2-${{ runner.os }}-${{ runner.arch }}-v1
+
+      - name: Build QEMU 10.2.2 from source
+        if: steps.qemu-cache.outputs.cache-hit != 'true'
+        run: |
+          set -euxo pipefail
+          curl -fsSL https://download.qemu.org/qemu-10.2.2.tar.xz -o /tmp/qemu.tar.xz
+          mkdir -p /tmp/qemu-src
+          tar -xf /tmp/qemu.tar.xz -C /tmp/qemu-src --strip-components=1
+          cd /tmp/qemu-src
+          ./configure --prefix=/opt/qemu \
+            --target-list=x86_64-softmmu,aarch64-softmmu \
+            --enable-kvm --enable-slirp --enable-tcg \
+            --disable-docs --disable-gtk --disable-sdl --disable-vnc \
+            --disable-guest-agent --disable-tools
+          make -j"$(nproc)"
+          sudo make install
+
+      - name: Put QEMU 10.2.2 on PATH
+        run: |
+          echo "/opt/qemu/bin" >> "$GITHUB_PATH"
+          /opt/qemu/bin/qemu-system-x86_64 --version
+
+      - uses: pnpm/action-setup@v4
+        with:
+          version: 10.23.0
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: pnpm
+
+      - name: Install stack-cli deps + build
+        run: |
+          pnpm install --frozen-lockfile --filter '@stackframe/stack-cli...'
+          # Turbo's trailing `...` filter builds stack-cli AND its workspace
+          # deps (@stackframe/js, @stackframe/stack-shared, etc.) — stack-cli
+          # imports them at runtime from their dist/ outputs.
+          pnpm exec turbo run build --filter='@stackframe/stack-cli...'

      - name: Download built image
        uses: actions/download-artifact@v4
        with:
          name: qemu-emulator-${{ matrix.arch }}
-          path: docker/local-emulator/qemu/images/
+          path: ${{ github.workspace }}/.stack-emulator-images/

-      - name: Generate emulator env
-        run: node docker/local-emulator/generate-env-development.mjs
-
-      - name: Start emulator from artifact
+      - name: Place qcow2 into STACK_EMULATOR_HOME layout
+        run: |
+          mkdir -p "$STACK_EMULATOR_HOME/images"
+          cp "${{ github.workspace }}/.stack-emulator-images/stack-emulator-${{ matrix.arch }}.qcow2" "$STACK_EMULATOR_HOME/images/"
+          ls -lh "$STACK_EMULATOR_HOME/images/"
+
+      # No savevm.zst artifact (users capture locally via `emulator pull`),
+      # so `emulator start` cold-boots the qcow2. Budget accordingly.
+      - name: Start emulator via CLI
        run: |
-          chmod +x docker/local-emulator/qemu/run-emulator.sh docker/local-emulator/qemu/common.sh
          EMULATOR_ARCH=${{ matrix.arch }} \
          EMULATOR_READY_TIMEOUT=600 \
-            docker/local-emulator/qemu/run-emulator.sh start
+            node packages/stack-cli/dist/index.js emulator start

      - name: Verify services are healthy
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh status
+        run: node packages/stack-cli/dist/index.js emulator status

      - name: Smoke test — backend health
        run: curl -sf http://localhost:26701/health?db=1
@ -174,13 +297,11 @@ jobs:

      - name: Stop emulator
        if: always()
-        run: |
-          EMULATOR_ARCH=${{ matrix.arch }} \
-            docker/local-emulator/qemu/run-emulator.sh stop
+        run: node packages/stack-cli/dist/index.js emulator stop

      - name: Print serial log on failure
        if: failure()
-        run: tail -100 docker/local-emulator/qemu/run/vm/serial.log 2>/dev/null || true
+        run: tail -100 "$STACK_EMULATOR_HOME/run/vm/serial.log" 2>/dev/null || true

  publish:
    name: Publish to GitHub Releases
@ -220,8 +341,14 @@ jobs:
          ### Images
          | File | Description |
          |------|-------------|
-          | \`stack-emulator-arm64.qcow2\` | ARM64 emulator image |
-          | \`stack-emulator-amd64.qcow2\` | AMD64 emulator image |
+          | \`stack-emulator-arm64.qcow2\` | ARM64 disk image |
+          | \`stack-emulator-amd64.qcow2\` | AMD64 disk image |
+
+          \`emulator pull\` downloads the qcow2 and captures a local fast-start
+          snapshot (~1-3 min). Subsequent \`emulator start\`s resume in ~3-8 s.
+          Snapshots are captured locally because QEMU migration state isn't
+          portable across accelerators (KVM / HVF / TCG) or \`-cpu max\`
+          feature sets.

          ### Usage
          \`\`\`bash
--- a/.gitignore
+++ b/.gitignore
@ -144,3 +144,6 @@ packages/stack/*
 !packages/react/package.json
 !packages/next/package.json
 !packages/stack/package.json
+
+# claude code
+.claude/scheduled_tasks.lock
--- a/apps/backend/src/lib/seed-dummy-data.ts
+++ b/apps/backend/src/lib/seed-dummy-data.ts
@ -1485,6 +1485,12 @@ async function seedDummySessionActivityEvents(options: SessionActivityEventSeedO
    await tx.event.createMany({
      data: events,
    });
+  }, {
+    // Under cross-arch arm64 TCG in the emulator qcow2 build, this batch
+    // takes ~10s; Prisma's default is 5s. Production (KVM/native) runs it
+    // in well under 1s, so the looser bound only kicks in when the DB is
+    // genuinely slow.
+    timeout: 30_000,
  });

  if (clickhouseClient && clickhouseRows.length > 0) {
--- a/docker/local-emulator/Dockerfile
+++ b/docker/local-emulator/Dockerfile
@ -58,8 +58,22 @@ ENV NEXT_PUBLIC_STACK_STRIPE_PUBLISHABLE_KEY=pk_test_mock_publishable_key_for_lo
 # Build the backend NextJS app
 RUN pnpm turbo run docker-build --filter=@stackframe/backend... --filter=@stackframe/dashboard...

-# Build the self-host seed script
-RUN cd apps/backend && pnpm build-self-host-migration-script
+# Build the self-host seed script.
+# tsdown -> rolldown is multi-threaded Rust; under qemu-user (cross-arch
+# arm64-on-amd64) its futex emulation occasionally deadlocks and the build
+# hangs forever. Bound each attempt and retry to ride out the race.
+RUN cd apps/backend && \
+    attempt=1; \
+    while :; do \
+      timeout --kill-after=30s 600s pnpm build-self-host-migration-script && break; \
+      rc=$?; \
+      if [ "$attempt" -ge 3 ]; then \
+        echo "build-self-host-migration-script failed after $attempt attempts (last rc=$rc)" >&2; \
+        exit "$rc"; \
+      fi; \
+      echo "build-self-host-migration-script attempt $attempt failed (rc=$rc); retrying..." >&2; \
+      attempt=$((attempt + 1)); \
+    done


 # Prune node_modules for runtime: remove dev tools, heavy UI packages,
@ -263,10 +277,11 @@ COPY docker/local-emulator/run-cron-jobs.sh /run-cron-jobs.sh
 COPY docker/local-emulator/entrypoint.sh /entrypoint.sh
 COPY docker/local-emulator/init-services.sh /init-services.sh
 COPY docker/local-emulator/start-app.sh /start-app.sh
+COPY docker/local-emulator/rotate-secrets.sh /usr/local/bin/rotate-secrets
 COPY docker/local-emulator/clickhouse-config.xml /etc/clickhouse-server/config.xml
 COPY docker/local-emulator/clickhouse-users.xml /etc/clickhouse-server/users.xml
 COPY docker/server/entrypoint.sh /app-entrypoint.sh
-RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh
+RUN chmod +x /entrypoint.sh /init-services.sh /start-app.sh /app-entrypoint.sh /run-cron-jobs.sh /usr/local/bin/rotate-secrets

 # PostgreSQL: 5432, Redis: 6379, Inbucket: 2500/9001/1100,
 # Svix: 8071, ClickHouse: 8123/9009, MinIO: 9090, QStash: 8080
--- a/docker/local-emulator/entrypoint.sh
+++ b/docker/local-emulator/entrypoint.sh
@ -33,6 +33,12 @@ fi
 # baked-in mock value from .env.development to be a usable credential against
 # a running emulator. Overriding here propagates to both the backend and the
 # run-cron-jobs.sh loop via supervisord's inherited environment.
-export CRON_SECRET="$(openssl rand -hex 32)"
+#
+# In snapshot-build mode the VM supplies a deterministic placeholder via the
+# --env-file so the baked snapshot doesn't contain a real secret; on resume,
+# /usr/local/bin/rotate-secrets swaps in a fresh per-install value.
+if [ -z "${CRON_SECRET:-}" ]; then
+  export CRON_SECRET="$(openssl rand -hex 32)"
+fi

 exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf
--- a/docker/local-emulator/qemu/build-image.sh
+++ b/docker/local-emulator/qemu/build-image.sh
@ -12,9 +12,34 @@ REPO_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)"
 DEBIAN_VERSION="${DEBIAN_VERSION:-13}"
 DISK_SIZE="${EMULATOR_DISK_SIZE:-12G}"
 RAM="${EMULATOR_BUILD_RAM:-4096}"
-CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
 PROVISION_TIMEOUT="${EMULATOR_PROVISION_TIMEOUT:-3200}"
 EMULATOR_IMAGE_NAME="${EMULATOR_IMAGE_NAME:-stack-local-emulator}"
+# Snapshot-ready qcow2: bake deterministic placeholder secrets (PCK/SSK/SAK/
+# CRON_SECRET) into the image so runtime `rotate-secrets` can swap them for
+# fresh per-install values on every `emulator start`. Without this, the image
+# would ship with random shared secrets — a security regression. Cheap to
+# build (no extra wall-clock cost in CI), so it stays on by default.
+EMULATOR_BUILD_SNAPSHOT="${EMULATOR_BUILD_SNAPSHOT:-1}"
+# Capture RAM/device state via QMP at build time, producing a
+# `stack-emulator-<arch>.savevm.zst` next to the qcow2. Off by default —
+# users capture locally on first `stack emulator pull` (run-emulator.sh
+# capture) because migration state isn't portable across accelerators
+# (KVM/HVF/TCG) or `-cpu max` feature sets, so a CI-captured snapshot
+# couldn't resume reliably on arbitrary user hardware. Implies
+# EMULATOR_BUILD_SNAPSHOT=1.
+EMULATOR_CAPTURE_SAVEVM="${EMULATOR_CAPTURE_SAVEVM:-0}"
+if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ] && [ "$EMULATOR_BUILD_SNAPSHOT" != "1" ]; then
+  echo "EMULATOR_CAPTURE_SAVEVM=1 requires EMULATOR_BUILD_SNAPSHOT=1" >&2
+  exit 1
+fi
+# Capture mode pins SMP to a fixed value so the resume QEMU command (which
+# uses EMULATOR_CPUS, default 4) can match the captured device topology —
+# RAM migration replay requires identical vCPU count.
+if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+  CPUS="${EMULATOR_BUILD_CPUS:-4}"
+else
+  CPUS="${EMULATOR_BUILD_CPUS:-$(sysctl -n hw.ncpu 2>/dev/null || nproc 2>/dev/null || echo 4)}"
+fi

 RED='\033[0;31m'
 GREEN='\033[0;32m'
@ -47,9 +72,18 @@ check_deps() {
    command -v "$qemu_bin" >/dev/null 2>&1 || missing+=("$qemu_bin")
  done

-  for cmd in qemu-img curl docker gzip; do
+  for cmd in qemu-img curl gzip; do
    command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
  done
+  if [ "${SKIP_DOCKER_BUILD:-0}" != "1" ]; then
+    command -v docker >/dev/null 2>&1 || missing+=("docker")
+  fi
+
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    for cmd in socat zstd; do
+      command -v "$cmd" >/dev/null 2>&1 || missing+=("$cmd")
+    done
+  fi

  if ! command -v mkisofs >/dev/null 2>&1 && ! command -v genisoimage >/dev/null 2>&1 && ! command -v hdiutil >/dev/null 2>&1; then
    missing+=("mkisofs/genisoimage/hdiutil")
@ -231,6 +265,9 @@ persist_provision_logs() {
  cp "$provision_log" "$IMAGE_DIR/provision-emulator-${arch}.progress.log" 2>/dev/null || true
 }

+# qmp_session() and capture_vm_state() live in common.sh; both build-image.sh
+# (CI) and run-emulator.sh (stack emulator pull local capture) call them.
+
 build_one() {
  local arch="$1"
  local base_img="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
@ -245,7 +282,9 @@ build_one() {
  local tmp_img="$tmp_dir/disk.qcow2"
  local seed_iso="$tmp_dir/seed.iso"
  local bundle_iso="$tmp_dir/bundle.iso"
+  local runtime_iso="$tmp_dir/runtime.iso"
  local bundle_dir="$tmp_dir/bundle"
+  local runtime_cfg_dir="$tmp_dir/runtime"
  local serial_log="$tmp_dir/serial.log"
  local provision_log="$tmp_dir/provision.log"
  local pidfile="$tmp_dir/qemu.pid"
@ -269,16 +308,83 @@ build_one() {
  mkdir -p "$bundle_dir"
  cp "$bundle_tgz" "$bundle_dir/img.tgz"
  cp "$BUILD_ENV_FILE" "$bundle_dir/build.env"
+  if [ "$EMULATOR_BUILD_SNAPSHOT" = "1" ]; then
+    # Guest reads this flag to use deterministic placeholder secrets so that
+    # runtime rotate-secrets can swap them out per-install.
+    printf 'STACK_EMULATOR_BUILD_SNAPSHOT=1\n' >> "$bundle_dir/build.env"
+  fi
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    # Guest reads this flag to start stack.service during provision-build,
+    # wait for backend+dashboard health, then block forever waiting for the
+    # host to capture VM state via QMP (stop + migrate + quit).
+    printf 'STACK_EMULATOR_CAPTURE_SAVEVM=1\n' >> "$bundle_dir/build.env"
+  fi
  # Tell the guest which arch it's being built for so cross-arch (TCG) builds
  # can skip the smoke test, which isn't reliable under software emulation.
  printf 'STACK_EMULATOR_BUILD_ARCH=%s\n' "$arch" > "$bundle_dir/build-arch.env"
  make_iso_from_dir "$bundle_iso" "STACKBUNDLE" "$bundle_dir"

+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    # render-stack-env (inside the guest) mounts a STACKCFG disk containing
+    # runtime.env + base.env. At runtime the host-side run-emulator.sh builds
+    # this ISO; in capture mode stack.service also starts during the build,
+    # so we must provide the same shape here. Values mirror the defaults the
+    # runtime would supply — port-prefix 81 and matching host-port numbers
+    # (unused at build time since nothing is port-forwarded, but
+    # render-stack-env embeds them into /run/stack-auth/local-emulator.env).
+    mkdir -p "$runtime_cfg_dir"
+    {
+      printf 'STACK_EMULATOR_PORT_PREFIX=81\n'
+      printf 'STACK_EMULATOR_DASHBOARD_HOST_PORT=26700\n'
+      printf 'STACK_EMULATOR_BACKEND_HOST_PORT=26701\n'
+      printf 'STACK_EMULATOR_MINIO_HOST_PORT=26702\n'
+      printf 'STACK_EMULATOR_INBUCKET_HOST_PORT=26703\n'
+      printf 'STACK_EMULATOR_VM_DIR_HOST=\n'
+    } > "$runtime_cfg_dir/runtime.env"
+    cp "$BUILD_ENV_FILE" "$runtime_cfg_dir/base.env"
+    make_iso_from_dir "$runtime_iso" "STACKCFG" "$runtime_cfg_dir"
+  fi
+
  : > "$serial_log"
  : > "$provision_log"
  qemu_base="$(qemu_cmd_prefix_for_arch "$arch")"
  log "QEMU command prefix (${arch}): $qemu_base"

+  local monitor_sock="$tmp_dir/monitor.sock"
+  local qga_sock="$tmp_dir/qga.sock"
+  local snapshot_args=()
+  local runtime_disk_args=()
+  local virtfs_args=(-virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none")
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    # STACKCFG runtime ISO lets stack.service start during the build — same
+    # disk shape render-stack-env expects at runtime. Placed before netdev
+    # so its virtio-blk PCI slot precedes virtio-net-pci, matching the
+    # resume argv order in run-emulator.sh (slots must line up or
+    # migrate-incoming fails the device-tree check).
+    runtime_disk_args=(
+      -drive "file=$runtime_iso,format=raw,if=virtio,readonly=on"
+    )
+    # QMP for stop/migrate/quit; virtio-serial + QGA channel so we can exec
+    # inside the guest post-resume (only needed at runtime but harmless here).
+    snapshot_args=(
+      -chardev "socket,id=monitor,path=$monitor_sock,server=on,wait=off"
+      -mon "chardev=monitor,mode=control"
+      -chardev "socket,path=$qga_sock,server=on,wait=off,id=qga0"
+      -device virtio-serial
+      -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      # Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
+      # The integrated pcie.0 bus on q35 / arm64-virt is static — hotplug
+      # only works through a root port. Must be present at snapshot capture
+      # so the resumed device tree matches.
+      -device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
+    )
+    # QEMU disallows migration when virtfs is mounted in the guest — virtfs
+    # has guest-side state (open handles, mount table) that isn't migratable.
+    # Drop the host fs mount in capture mode; STACK_SERVICES_READY still
+    # arrives on the serial log so contains_provision_marker can detect it.
+    virtfs_args=()
+  fi
+
  # shellcheck disable=SC2086
  $qemu_base \
    -boot order=c \
@ -287,18 +393,24 @@ build_one() {
    -drive "file=$tmp_img,format=qcow2,if=virtio,discard=on,detect-zeroes=unmap" \
    -drive "file=$seed_iso,format=raw,if=virtio,readonly=on" \
    -drive "file=$bundle_iso,format=raw,if=virtio,readonly=on" \
+    ${runtime_disk_args[@]+"${runtime_disk_args[@]}"} \
    -netdev user,id=net0 \
    -device virtio-net-pci,netdev=net0 \
-    -virtfs "local,path=$tmp_dir,mount_tag=hostfs,security_model=none" \
+    ${virtfs_args[@]+"${virtfs_args[@]}"} \
+    ${snapshot_args[@]+"${snapshot_args[@]}"} \
    -serial "file:$serial_log" \
    -display none \
    -daemonize \
    -pidfile "$pidfile"

  pid="$(cat "$pidfile")"
+  local ready_marker="STACK_CLOUD_INIT_DONE"
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    ready_marker="STACK_SERVICES_READY"
+  fi
  elapsed=0
  while [ "$elapsed" -lt "$PROVISION_TIMEOUT" ]; do
-    if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
+    if contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then
      break
    fi

@ -312,7 +424,7 @@ build_one() {
      if [ "$total_build_lines" -gt "$last_build_lines" ]; then
        echo ""
        sed -n "$((last_build_lines + 1)),${total_build_lines}p" "$provision_log" 2>/dev/null | while IFS= read -r msg; do
-          if [ "$msg" = "STACK_CLOUD_INIT_DONE" ]; then
+          if [ "$msg" = "STACK_CLOUD_INIT_DONE" ] || [ "$msg" = "STACK_SERVICES_READY" ]; then
            continue
          fi
          printf "  [%3ds] %s\n" "$elapsed" "$msg"
@ -332,7 +444,7 @@ build_one() {
  done
  echo ""

-  if ! contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then
+  if ! contains_provision_marker "$provision_log" "$serial_log" "$ready_marker"; then
    if [ "$guest_failed" = true ]; then
      err "Guest provisioning reported failure for emulator (${arch})"
    elif [ "$guest_exited" = true ]; then
@ -358,17 +470,69 @@ build_one() {
    exit 1
  fi

-  local shutdown_wait=0
-  while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do
-    sleep 1
-    shutdown_wait=$((shutdown_wait + 1))
-  done
+  if [ "$EMULATOR_CAPTURE_SAVEVM" = "1" ]; then
+    local savevm_file="$IMAGE_DIR/stack-emulator-${arch}.savevm.zst"
+    local savevm_raw="$tmp_dir/state.raw"
+    local savevm_tmp="$tmp_dir/state.zst"

-  if kill -0 "$pid" 2>/dev/null; then
-    warn "Guest did not power off cleanly; forcing shutdown."
-    kill "$pid" 2>/dev/null || true
-    sleep 2
-    kill -9 "$pid" 2>/dev/null || true
+    # Capture raw RAM/device state via QEMU's native file: migration; then
+    # compress on the host side. Avoids any reliance on QEMU spawning a shell
+    # that has zstd in PATH.
+    log "Capturing VM state via QMP (${arch})..."
+    if ! capture_vm_state "$monitor_sock" "$savevm_raw"; then
+      err "Failed to capture VM state for ${arch}"
+      if kill -0 "$pid" 2>/dev/null; then
+        kill "$pid" 2>/dev/null || true
+        sleep 1
+        kill -9 "$pid" 2>/dev/null || true
+      fi
+      persist_provision_logs "$arch" "$serial_log" "$provision_log"
+      rm -rf "$tmp_dir"
+      exit 1
+    fi
+
+    # QEMU exited cleanly via `quit`. Wait briefly to release the pid file.
+    local shutdown_wait=0
+    while [ "$shutdown_wait" -lt 30 ] && kill -0 "$pid" 2>/dev/null; do
+      sleep 1
+      shutdown_wait=$((shutdown_wait + 1))
+    done
+    if kill -0 "$pid" 2>/dev/null; then
+      warn "QEMU did not exit after quit; forcing."
+      kill "$pid" 2>/dev/null || true
+      sleep 2
+      kill -9 "$pid" 2>/dev/null || true
+    fi
+
+    if [ ! -s "$savevm_raw" ]; then
+      err "VM state file missing or empty at $savevm_raw"
+      persist_provision_logs "$arch" "$serial_log" "$provision_log"
+      rm -rf "$tmp_dir"
+      exit 1
+    fi
+
+    # zstd -1 trades ~30% larger file for ~40% faster decompression at resume.
+    # For shipping-and-decompress-once-per-start, that's the right balance.
+    log "Compressing VM state with zstd..."
+    zstd -1 -T0 --rm -o "$savevm_tmp" "$savevm_raw"
+
+    mv "$savevm_tmp" "$savevm_file"
+    local savevm_size
+    savevm_size="$(du -h "$savevm_file" | cut -f1)"
+    log "Saved VM state: $savevm_file (${savevm_size})"
+  else
+    local shutdown_wait=0
+    while [ "$shutdown_wait" -lt 90 ] && kill -0 "$pid" 2>/dev/null; do
+      sleep 1
+      shutdown_wait=$((shutdown_wait + 1))
+    done
+
+    if kill -0 "$pid" 2>/dev/null; then
+      warn "Guest did not power off cleanly; forcing shutdown."
+      kill "$pid" 2>/dev/null || true
+      sleep 2
+      kill -9 "$pid" 2>/dev/null || true
+    fi
  fi

  persist_provision_logs "$arch" "$serial_log" "$provision_log"
@ -389,8 +553,16 @@ BUILD_ENV_FILE="$REPO_ROOT/docker/local-emulator/.env.development"
 for arch in "${TARGET_ARCHS[@]}"; do
  local_base="$IMAGE_DIR/debian-${DEBIAN_VERSION}-base-${arch}.qcow2"
  download_cloud_image "$arch" "$local_base"
-  build_local_emulator_image "$arch"
-  prepare_bundle_artifacts "$arch"
+  if [ "${SKIP_DOCKER_BUILD:-0}" = "1" ]; then
+    log "SKIP_DOCKER_BUILD=1: reusing pre-built Docker bundle"
+    if [ ! -f "$IMAGE_DIR/emulator-${arch}-docker-images.tar.gz" ]; then
+      err "Pre-built bundle not found: $IMAGE_DIR/emulator-${arch}-docker-images.tar.gz"
+      exit 1
+    fi
+  else
+    build_local_emulator_image "$arch"
+    prepare_bundle_artifacts "$arch"
+  fi
  build_one "$arch"
 done

--- a/docker/local-emulator/qemu/cloud-init/emulator/user-data
+++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data
@ -75,12 +75,24 @@ write_files:
      # ssk/sak: required by the emulator's own dashboard (StackServerApp
      #   construction throws without them). Not used by user-app flows; the
      #   /local-emulator/project route mints separate per-project credentials.
+      #
+      # Snapshot-build mode (STACK_EMULATOR_BUILD_SNAPSHOT=1 in /etc/stack-build.env):
+      # use deterministic placeholder hex strings instead of random values. The
+      # built image then contains these placeholders; at every `emulator start`
+      # resume the host generates fresh per-install secrets and
+      # /usr/local/bin/rotate-secrets (inside the stack container) swaps them in.
      umask 077
-      for key in internal-pck internal-ssk internal-sak; do
-        if [ ! -s "/var/lib/stack-auth/$key" ]; then
-          openssl rand -hex 32 > "/var/lib/stack-auth/$key"
-        fi
-      done
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
+        printf '%s' '00000000000000000000000000000000ffffffffffffffffffffffffffffffff' > /var/lib/stack-auth/internal-pck
+        printf '%s' '00000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee' > /var/lib/stack-auth/internal-ssk
+        printf '%s' '00000000000000000000000000000000dddddddddddddddddddddddddddddddd' > /var/lib/stack-auth/internal-sak
+      else
+        for key in internal-pck internal-ssk internal-sak; do
+          if [ ! -s "/var/lib/stack-auth/$key" ]; then
+            openssl rand -hex 32 > "/var/lib/stack-auth/$key"
+          fi
+        done
+      fi
      INTERNAL_PCK="$(cat /var/lib/stack-auth/internal-pck)"
      INTERNAL_SSK="$(cat /var/lib/stack-auth/internal-ssk)"
      INTERNAL_SAK="$(cat /var/lib/stack-auth/internal-sak)"
@ -92,6 +104,15 @@ write_files:
      HOST_SERVICES_HOST=10.0.2.2
      P="$STACK_EMULATOR_PORT_PREFIX"

+      # Snapshot-build mode: ship a deterministic placeholder CRON_SECRET so the
+      # baked VM contains a known-public value that rotate-secrets swaps out on
+      # every resume. Outside snapshot-build mode, leave CRON_SECRET unset so
+      # docker/local-emulator/entrypoint.sh generates a fresh random one.
+      EMULATOR_CRON_SECRET=""
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_BUILD_SNAPSHOT=1' /etc/stack-build.env 2>/dev/null; then
+        EMULATOR_CRON_SECRET="00000000000000000000000000000000cccccccccccccccccccccccccccccccc"
+      fi
+
      {
        # Static vars from base config and runtime (e.g. API keys, feature flags)
        cat /mnt/stack-runtime/base.env
@ -99,6 +120,9 @@ write_files:
        printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$INTERNAL_PCK"
        printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$INTERNAL_SSK"
        printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$INTERNAL_SAK"
+        if [ -n "$EMULATOR_CRON_SECRET" ]; then
+          printf 'CRON_SECRET=%s\n' "$EMULATOR_CRON_SECRET"
+        fi

        # Computed vars — depend on port prefix or deps host
        # Host-side ports (for browser URLs — browser runs on host, not in VM)
@ -142,14 +166,45 @@ write_files:
    permissions: '0755'
    content: |
      #!/bin/bash
-      set -euo pipefail
+      # Mount the host filesystem at /host. Two modes:
+      #   (no args)       — cold-boot: bind /host on itself, make it a shared
+      #                     mount point, then mount virtio-9p on top. The
+      #                     bind+shared step is what lets the docker bind
+      #                     mount (-v /host:/host:rshared) receive later
+      #                     propagation events.
+      #   --post-resume   — snapshot-resume: /host is already shared (set up
+      #                     at build time and preserved across the snapshot,
+      #                     plus the docker bind mount has rshared
+      #                     propagation). The host has just hot-plugged
+      #                     virtio-9p; mount it on /host and the new mount
+      #                     propagates into the running container.
+      set -uo pipefail
      mkdir -p /host
+
+      # Idempotent: bind /host on itself once so it becomes a mount point
+      # with its own propagation, then make it shared. mount --make-shared
+      # requires a mount point, hence the bind first.
      if ! mountpoint -q /host; then
-        if ! mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then
-          echo "Failed to mount host filesystem at /host" >&2
-          exit 1
-        fi
+        mount --bind /host /host
      fi
+      mount --make-shared /host
+
+      if [ "${1:-}" = "--post-resume" ]; then
+        if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host; then
+          exit 0
+        fi
+        echo "post-resume 9p mount failed" >&2
+        exit 1
+      fi
+
+      # Cold boot. In snapshot-build mode the host detaches virtfs (QEMU
+      # disallows migration while it's mounted), so the 9p mount may not be
+      # available — tolerate that and fall through to an empty /host.
+      if mount -t 9p -o trans=virtio,version=9p2000.L hostfs /host 2>/dev/null; then
+        exit 0
+      fi
+      echo "host filesystem unavailable; continuing with empty /host" >&2
+      exit 0

  - path: /usr/local/bin/run-stack-container
    permissions: '0755'
@ -190,7 +245,7 @@ write_files:
          -v stack-clickhouse-data:/data/clickhouse \
          -v stack-minio-data:/data/minio \
          -v stack-inbucket-data:/data/inbucket \
-          -v /host:/host \
+          -v /host:/host:rshared \
          stack-local-emulator 2>&1 | tee -a "$host_log"
      else
        exec docker run \
@ -204,7 +259,7 @@ write_files:
          -v stack-clickhouse-data:/data/clickhouse \
          -v stack-minio-data:/data/minio \
          -v stack-inbucket-data:/data/inbucket \
-          -v /host:/host \
+          -v /host:/host:rshared \
          stack-local-emulator
      fi

@ -522,6 +577,74 @@ write_files:
      fstrim -av 2>/dev/null || true
      log "slim-docker-image done."

+  - path: /usr/local/bin/wait-for-stack-ready
+    permissions: '0755'
+    content: |
+      #!/bin/bash
+      # Poll the stack container's backend + dashboard on the guest's own
+      # localhost until both respond healthy. Used at snapshot-build time to
+      # gate "emit STACK_SERVICES_READY" on the app actually being warm.
+      set -uo pipefail
+
+      TIMEOUT="${STACK_READY_TIMEOUT:-600}"
+      BACKEND_PORT="${STACK_READY_BACKEND_PORT:-8102}"
+      DASHBOARD_PORT="${STACK_READY_DASHBOARD_PORT:-8101}"
+
+      log() { /usr/local/bin/log-provision "wait-for-stack-ready: $*"; }
+
+      start=$SECONDS
+      next_heartbeat=$((start + 30))
+      log "waiting for backend:$BACKEND_PORT and dashboard:$DASHBOARD_PORT (timeout=${TIMEOUT}s)"
+      while true; do
+        backend_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${BACKEND_PORT}/health?db=1" 2>/dev/null || true)
+        dashboard_code=$(curl -s -o /dev/null -w '%{http_code}' --max-time 3 "http://127.0.0.1:${DASHBOARD_PORT}/handler/sign-in" 2>/dev/null || true)
+        if [ "$backend_code" = "200" ] && [ "$dashboard_code" = "200" ]; then
+          log "ready ($((SECONDS - start))s)"
+          exit 0
+        fi
+        if [ "$SECONDS" -ge "$next_heartbeat" ]; then
+          log "still waiting (backend=$backend_code dashboard=$dashboard_code, $((SECONDS - start))s elapsed)"
+          next_heartbeat=$((SECONDS + 30))
+        fi
+        if [ "$((SECONDS - start))" -ge "$TIMEOUT" ]; then
+          log "TIMEOUT after $((SECONDS - start))s (backend=$backend_code dashboard=$dashboard_code)"
+          docker ps -a 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: ps" || true
+          docker logs --tail 200 stack 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: stack" || true
+          systemctl status stack.service --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: svc" || true
+          journalctl -u stack.service --no-pager -n 100 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: jrnl" || true
+          docker image ls 2>&1 | /usr/local/bin/log-provision-stream "wait-for-stack-ready: img" || true
+          exit 1
+        fi
+        sleep 2
+      done
+
+  - path: /usr/local/bin/trigger-fast-rotate
+    permissions: '0755'
+    content: |
+      #!/bin/bash
+      # Called via qemu-guest-agent on every snapshot resume. Reads fresh
+      # secrets from stdin (key=value lines, written by the host via QGA's
+      # guest-exec input-data) and execs rotate-secrets inside the stack
+      # container with those values exported.
+      set -euo pipefail
+
+      tmp="$(mktemp /var/run/stack-fresh-XXXXXX.env)"
+      cat > "$tmp"
+      chmod 0600 "$tmp"
+
+      # shellcheck disable=SC1090
+      set -a
+      source "$tmp"
+      set +a
+      rm -f "$tmp"
+
+      exec docker exec \
+        -e STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \
+        -e STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \
+        -e STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \
+        -e CRON_SECRET \
+        stack /usr/local/bin/rotate-secrets
+
  - path: /etc/systemd/system/stack.service
    content: |
      [Unit]
@ -591,6 +714,14 @@ write_files:
      systemctl disable --now ssh || true
      systemctl mask ssh || true

+      # qemu-guest-agent: used by the host to inject fresh secrets + trigger
+      # rotate-secrets after a snapshot resume. Must be running INSIDE the VM
+      # at snapshot capture time — the virtio-serial port's "open" state is
+      # part of the migrated device state. If QGA wasn't connected at capture,
+      # the resumed VM's port stays closed and the host can't reach it.
+      systemctl enable qemu-guest-agent || true
+      systemctl start qemu-guest-agent || true
+
      log_provision "installing emulator containers"
      bash /usr/local/bin/install-emulator-containers

@ -603,6 +734,53 @@ write_files:
      log_provision "starting slim-docker-image"
      bash /usr/local/bin/slim-docker-image

+      # Capture mode: bring the stack container up, wait for full
+      # readiness, emit STACK_SERVICES_READY, then wait indefinitely for the
+      # host build script to capture VM state over QMP (stop + migrate + quit).
+      # The VM never shuts itself down in this path — the host tears it down
+      # once the savevm file has been written.
+      #
+      # CI never sets STACK_EMULATOR_CAPTURE_SAVEVM=1 (snapshots aren't
+      # portable across accelerators, so they're captured locally on first
+      # `stack emulator pull`). This branch only fires for opt-in local
+      # builds run with EMULATOR_CAPTURE_SAVEVM=1.
+      if [ -f /etc/stack-build.env ] && grep -q '^STACK_EMULATOR_CAPTURE_SAVEVM=1' /etc/stack-build.env 2>/dev/null; then
+        log_provision "capture mode: starting stack.service"
+        systemctl start stack.service || true
+
+        log_provision "waiting for backend + dashboard to be ready"
+        if ! /usr/local/bin/wait-for-stack-ready; then
+          log_provision "ERROR: stack services did not become ready"
+          exit 1
+        fi
+
+        # Ensure qemu-guest-agent is running so its virtio-serial port stays
+        # "open" in the snapshot — the host needs that port at runtime to
+        # trigger rotate-secrets.
+        log_provision "ensuring qemu-guest-agent is up"
+        systemctl restart qemu-guest-agent || true
+        sleep 2
+        if ! systemctl is-active --quiet qemu-guest-agent; then
+          log_provision "ERROR: qemu-guest-agent failed to start"
+          systemctl status qemu-guest-agent --no-pager -l 2>&1 | /usr/local/bin/log-provision-stream "qga"
+          exit 1
+        fi
+        log_provision "qemu-guest-agent active"
+
+        log_provision "services ready; signalling STACK_SERVICES_READY"
+        if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then
+          printf '%s\n' "STACK_SERVICES_READY" >> "$STACK_PROVISION_LOG_FILE"
+        fi
+        write_marker_to_consoles "STACK_SERVICES_READY"
+        sync || true
+
+        # Clear the EXIT trap so the cleanup path doesn't mark this as failed
+        # when the host powers us off via QMP quit.
+        trap - EXIT
+        # Block forever; host will issue qmp quit after migrate completes.
+        while true; do sleep 3600; done
+      fi
+
      log_provision "build pipeline complete"
      if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then
        printf '%s\n' "STACK_CLOUD_INIT_DONE" >> "$STACK_PROVISION_LOG_FILE"
--- a/docker/local-emulator/qemu/common.sh
+++ b/docker/local-emulator/qemu/common.sh
@ -68,3 +68,142 @@ make_iso_from_dir() {
    exit 1
  fi
 }
+
+# Send one or more QMP commands over the monitor socket. Stdin is a stream of
+# JSON objects; qmp_capabilities is always sent first to exit negotiation mode.
+# Keep stdin open briefly after writing so socat doesn't close before QEMU
+# responds — QMP replies in milliseconds so 0.5s is plenty.
+#
+# Callers: build-image.sh capture flow, run-emulator.sh cmd_capture.
+qmp_session() {
+  local sock="$1"
+  local payload
+  payload="$(cat)"
+  ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t30 - "UNIX-CONNECT:${sock}"
+}
+
+# Drive the snapshot capture over QMP:
+#   1. qmp_capabilities — exit negotiation mode.
+#   2. stop — pause the VM so no more disk writes happen.
+#   3. migrate-set-capabilities — enable mapped-ram + multifd for fast resume.
+#   4. migrate to file:<path> — streams RAM/device state out.
+#   5. Poll query-migrate until status=completed (or failed).
+#   6. quit — terminate QEMU cleanly.
+#
+# Depends on log/err/warn being defined by the sourcing script.
+capture_vm_state() {
+  local sock="$1"
+  local guest_path="$2"
+
+  if [ ! -S "$sock" ]; then
+    err "QMP monitor socket missing: $sock"
+    return 1
+  fi
+
+  log "  QMP: stopping VM..."
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' '{"execute":"stop"}'
+  } | qmp_session "$sock" >/dev/null || {
+    err "QMP stop failed"
+    return 1
+  }
+
+  log "  QMP: enabling mapped-ram + multifd for fast resume..."
+  # mapped-ram: writes each RAM page to a fixed offset in the output file
+  # (vs the legacy streamed format). This lets the target QEMU mmap the file
+  # and fault pages lazily — and combined with multifd, load RAM in parallel.
+  # multifd-channels=4 matches our pinned SMP so the channels don't starve
+  # each other on the target's 4 vCPUs.
+  local caps_cmd params_cmd
+  caps_cmd='{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
+  params_cmd='{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
+  local setup_resp
+  setup_resp=$({
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$caps_cmd"
+    printf '%s\n' "$params_cmd"
+  } | qmp_session "$sock") || {
+    err "QMP capabilities setup failed"
+    return 1
+  }
+  if printf '%s' "$setup_resp" | grep -q '"error"[[:space:]]*:'; then
+    err "QMP capabilities returned error: $setup_resp"
+    return 1
+  fi
+
+  log "  QMP: migrating RAM state to ${guest_path}..."
+  # Use file: migration (native QEMU) instead of exec: to avoid relying on a
+  # spawned shell finding zstd in PATH. Compressed as a separate host step
+  # after migrate completes.
+  local migrate_cmd
+  migrate_cmd=$(printf '{"execute":"migrate","arguments":{"uri":"file:%s"}}' "$guest_path")
+  local migrate_resp
+  migrate_resp=$({
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$migrate_cmd"
+  } | qmp_session "$sock") || {
+    err "QMP migrate failed"
+    return 1
+  }
+  if printf '%s' "$migrate_resp" | grep -q '"error"[[:space:]]*:'; then
+    err "QMP migrate returned error: $migrate_resp"
+    return 1
+  fi
+
+  # Poll migration status. Migration runs in the background after the
+  # migrate command returns; we watch for "completed" or "failed".
+  local migrate_timeout=600
+  local waited=0
+  local last_heartbeat=0
+  while [ "$waited" -lt "$migrate_timeout" ]; do
+    local status_line status
+    status_line=$({
+      printf '%s\n' '{"execute":"qmp_capabilities"}'
+      printf '%s\n' '{"execute":"query-migrate"}'
+    } | qmp_session "$sock" 2>/dev/null || true)
+    status="$(printf '%s\n' "$status_line" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')"
+    case "$status" in
+      completed)
+        log "  QMP: migrate completed (${waited}s)"
+        break
+        ;;
+      failed|cancelled)
+        err "  QMP: migrate ended with status=$status"
+        err "  QMP response: $status_line"
+        return 1
+        ;;
+      active|setup|device|"")
+        # still running
+        if [ "$((waited - last_heartbeat))" -ge 30 ]; then
+          local transferred
+          transferred=$(printf '%s' "$status_line" | grep -o '"transferred"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+          log "  QMP: migrate in progress (${waited}s, status=${status:-init}, transferred=${transferred:-0})"
+          last_heartbeat=$waited
+        fi
+        ;;
+      *)
+        log "  QMP: migrate status=$status (${waited}s)"
+        ;;
+    esac
+    sleep 2
+    waited=$((waited + 2))
+  done
+
+  if [ "$waited" -ge "$migrate_timeout" ]; then
+    err "QMP migrate timed out after ${migrate_timeout}s"
+    err "Last query-migrate response: $({
+      printf '%s\n' '{"execute":"qmp_capabilities"}'
+      printf '%s\n' '{"execute":"query-migrate"}'
+    } | qmp_session "$sock" 2>/dev/null || true)"
+    return 1
+  fi
+
+  log "  QMP: quitting VM..."
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' '{"execute":"quit"}'
+  } | qmp_session "$sock" >/dev/null || true
+
+  return 0
+}
--- a/docker/local-emulator/qemu/run-emulator.sh
+++ b/docker/local-emulator/qemu/run-emulator.sh
@ -12,6 +12,22 @@ VM_RAM="${EMULATOR_RAM:-4096}"
 VM_CPUS="${EMULATOR_CPUS:-4}"
 PORT_PREFIX="${PORT_PREFIX:-${NEXT_PUBLIC_STACK_PORT_PREFIX:-81}}"
 READY_TIMEOUT="${EMULATOR_READY_TIMEOUT:-240}"
+# Shorter timeout when resuming from a snapshot: services are already running,
+# we only need to wait for rotate-secrets + Node restart (~3-10s).
+SNAPSHOT_READY_TIMEOUT="${EMULATOR_SNAPSHOT_READY_TIMEOUT:-45}"
+# Set to 1 to force a cold boot and ignore any shipped savevm file.
+EMULATOR_NO_SNAPSHOT="${EMULATOR_NO_SNAPSHOT:-0}"
+# Skip the post-resume secret rotation. Keeps the baked placeholder secrets
+# in place — acceptable for tests and CI that don't reach the emulator over
+# a shared network. Shaves ~2-3s off `emulator start`.
+EMULATOR_NO_ROTATION="${EMULATOR_NO_ROTATION:-0}"
+# Internal: set to 1 by cmd_capture to build QEMU with the snapshot-compatible
+# device layout (phantom ISOs, no virtfs, pcie-root-port, pinned 4096MB/4CPU)
+# without the `-incoming defer` that resume mode adds. The captured snapshot
+# must be byte-compatible with what the resume path will later feed to QEMU.
+EMULATOR_CAPTURING_SNAPSHOT="${EMULATOR_CAPTURING_SNAPSHOT:-0}"
+# Force re-capture even if a .savevm.zst is already present.
+EMULATOR_FORCE_CAPTURE="${EMULATOR_FORCE_CAPTURE:-0}"

 # Fixed host-side ports for the QEMU emulator (267xx range).
 # Only user-facing services are exposed; internal deps stay inside the VM.
@ -62,10 +78,67 @@ image_path() {
  echo "$IMAGE_DIR/stack-emulator-$ARCH.qcow2"
 }

+savevm_path() {
+  echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.zst"
+}
+
+# Cached, decompressed mapped-ram file. Created on first resume from the .zst
+# and reused on subsequent resumes — mapped-ram format requires a seekable
+# file, so we can't stream through zstd and use multifd at the same time.
+savevm_raw_path() {
+  echo "$IMAGE_DIR/stack-emulator-$ARCH.savevm.raw"
+}
+
 runtime_iso_path() {
  echo "$VM_DIR/runtime-config.iso"
 }

+snapshot_available() {
+  [ "$EMULATOR_NO_SNAPSHOT" != "1" ] && [ "$EMULATOR_CAPTURING_SNAPSHOT" != "1" ] && [ -s "$(savevm_path)" ]
+}
+
+# True when QEMU must use the snapshot-compatible device layout — either to
+# resume from an existing snapshot or to capture a new one. Resume adds
+# `-incoming defer`; capture does not. Everything else (phantom ISOs, no
+# virtfs, pcie-root-port, pinned RAM/SMP) matches.
+snapshot_layout() {
+  snapshot_available || [ "$EMULATOR_CAPTURING_SNAPSHOT" = "1" ]
+}
+
+# Ensure the decompressed mapped-ram cache is up-to-date with the shipped
+# .zst. Compares mtime: if .raw is older or missing, re-decompress.
+ensure_savevm_raw() {
+  local zst raw
+  zst="$(savevm_path)"
+  raw="$(savevm_raw_path)"
+
+  local zst_ts raw_ts
+  case "$HOST_OS" in
+    darwin)
+      zst_ts="$(stat -f '%m' "$zst" 2>/dev/null || echo 0)"
+      raw_ts="$(stat -f '%m' "$raw" 2>/dev/null || echo 0)"
+      ;;
+    *)
+      zst_ts="$(stat -c '%Y' "$zst" 2>/dev/null || echo 0)"
+      raw_ts="$(stat -c '%Y' "$raw" 2>/dev/null || echo 0)"
+      ;;
+  esac
+
+  if [ -s "$raw" ] && [ "$raw_ts" -ge "$zst_ts" ]; then
+    return 0
+  fi
+
+  log "Decompressing snapshot cache (one-time; ~2-3GB sparse)..."
+  local tmp="${raw}.tmp"
+  rm -f "$tmp"
+  if ! zstd -dc "$zst" > "$tmp"; then
+    err "Failed to decompress $zst"
+    rm -f "$tmp"
+    return 1
+  fi
+  mv "$tmp" "$raw"
+}
+
 # Returns a fast fingerprint (size:mtime) of the base QEMU image.
 # Used to detect whether the image has changed since the overlay was created.
 base_image_fingerprint() {
@ -77,10 +150,62 @@ base_image_fingerprint() {
  esac
 }

-prepare_runtime_config_iso() {
+# Fingerprint used to detect stale overlays. Includes both the base qcow2 and
+# the savevm file so the overlay is rebuilt whenever either input changes. The
+# overlay disk must match the disk state the snapshot was taken against for
+# -incoming resume to be consistent.
+runtime_fingerprint() {
+  local base="$1"
+  local savevm="$2"
+  local base_fp savevm_fp
+  base_fp="$(base_image_fingerprint "$base")"
+  if [ -f "$savevm" ]; then
+    savevm_fp="$(base_image_fingerprint "$savevm")"
+  else
+    savevm_fp="no-savevm"
+  fi
+  printf '%s|%s\n' "$base_fp" "$savevm_fp"
+}
+
+ensure_runtime_config_iso() {
+  # When invoked via stack-cli, the CLI writes the runtime ISO natively
+  # (packages/stack-cli/src/lib/iso.ts) immediately before spawning us and
+  # sets STACK_EMULATOR_CLI_WROTE_ISO=1. Trust it and skip regeneration —
+  # otherwise we'd fall through to make_iso_from_dir and require
+  # hdiutil/mkisofs/genisoimage, which is exactly the host dep the CLI path
+  # is designed to remove.
+  if [ "${STACK_EMULATOR_CLI_WROTE_ISO:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
+    return 0
+  fi
+  # In capture mode, cmd_capture already wrote a specialized ISO with an
+  # empty STACK_EMULATOR_VM_DIR_HOST — required because virtfs is detached
+  # for snapshot compatibility, and run-stack-container would otherwise
+  # try to publish internal-pck to /host/... and restart-loop
+  # stack.service. Trust that write and don't overwrite it.
+  if [ "${EMULATOR_CAPTURING_SNAPSHOT:-}" = "1" ] && [ -s "$(runtime_iso_path)" ]; then
+    return 0
+  fi
+  # Direct-shell invocation path: regenerate unconditionally. Port env vars
+  # (PORT_PREFIX, EMULATOR_*_PORT) may have changed since the last run, and
+  # an ISO cached from a prior invocation would silently override them.
+  write_runtime_config_iso "$VM_DIR"
+}
+
+# Write a STACKCFG runtime-config.iso containing runtime.env + base.env.
+# The VM_DIR_HOST arg is the path to publish internal-pck / stack.log to on
+# /host; pass empty string to suppress publication (used by capture mode
+# where /host isn't mounted — virtfs is detached for snapshot compatibility,
+# so any host-side write would fail and restart-loop stack.service).
+write_runtime_config_iso() {
+  local vm_dir_host="$1"
+  local base_env="$SCRIPT_DIR/../.env.development"
+  if [ ! -f "$base_env" ]; then
+    err "Cannot generate runtime config ISO: $base_env is missing."
+    err "Run 'pnpm run emulator:generate-env' first, or invoke via 'stack emulator start'."
+    exit 1
+  fi
+
  local cfg_dir="$VM_DIR/runtime-config"
-  local cfg_iso
-  cfg_iso="$(runtime_iso_path)"
  rm -rf "$cfg_dir"
  mkdir -p "$cfg_dir"
  {
@ -89,10 +214,10 @@ prepare_runtime_config_iso() {
    printf "STACK_EMULATOR_BACKEND_HOST_PORT=%s\n" "$EMULATOR_BACKEND_PORT"
    printf "STACK_EMULATOR_MINIO_HOST_PORT=%s\n" "$EMULATOR_MINIO_PORT"
    printf "STACK_EMULATOR_INBUCKET_HOST_PORT=%s\n" "$EMULATOR_INBUCKET_PORT"
-    printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$VM_DIR"
+    printf "STACK_EMULATOR_VM_DIR_HOST=%s\n" "$vm_dir_host"
  } > "$cfg_dir/runtime.env"
-  cp "$SCRIPT_DIR/../.env.development" "$cfg_dir/base.env"
-  make_iso_from_dir "$cfg_iso" "STACKCFG" "$cfg_dir"
+  cp "$base_env" "$cfg_dir/base.env"
+  make_iso_from_dir "$(runtime_iso_path)" "STACKCFG" "$cfg_dir"
 }

 service_is_up() {
@ -145,7 +270,7 @@ wait_for_condition() {
      log "${label} ready in ${elapsed}s"
      return 0
    fi
-    sleep 1
+    sleep 0.2
    elapsed=$((SECONDS - started))
    printf "\r  [%3ds] %s..." "$elapsed" "$label"
  done
@ -154,8 +279,9 @@ wait_for_condition() {
 }

 build_qemu_cmd() {
-  local base_img
+  local base_img savevm_file
  base_img="$(image_path)"
+  savevm_file="$(savevm_path)"

  if [ ! -f "$base_img" ]; then
    err "Missing QEMU image: $base_img"
@ -166,18 +292,36 @@ build_qemu_cmd() {
  mkdir -p "$VM_DIR"
  local fingerprint_file="$VM_DIR/base-image.fingerprint"
  local current_fp
-  current_fp="$(base_image_fingerprint "$base_img")"
-  if [ -f "$VM_DIR/disk.qcow2" ]; then
-    if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
-      log "Reusing existing overlay disk (changes persist)"
-    else
-      warn "QEMU base image has changed — recreating overlay."
+  current_fp="$(runtime_fingerprint "$base_img" "$savevm_file")"
+
+  if snapshot_layout; then
+    # The savevm RAM state was captured against the base image's exact disk
+    # state. An overlay with writes from a previous session diverges from
+    # that point, so -incoming would resume RAM against inconsistent disk.
+    # Always start from a fresh overlay in the snapshot path; per-session
+    # state is not preserved. Users who want persistence can opt out with
+    # EMULATOR_NO_SNAPSHOT=1. Capture mode also needs a clean overlay so the
+    # snapshot we write is taken against the base's known disk state.
+    if [ -f "$VM_DIR/disk.qcow2" ]; then
      rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
    fi
-  fi
-  if [ ! -f "$VM_DIR/disk.qcow2" ]; then
    qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
-    base_image_fingerprint "$base_img" > "$fingerprint_file"
+    printf '%s' "$current_fp" > "$fingerprint_file"
+  else
+    # If the overlay was created against a different base or savevm, it will
+    # diverge from the snapshot's disk state — force a rebuild.
+    if [ -f "$VM_DIR/disk.qcow2" ]; then
+      if [ -f "$fingerprint_file" ] && [ "$(cat "$fingerprint_file")" = "$current_fp" ]; then
+        log "Reusing existing overlay disk (changes persist)"
+      else
+        warn "Base image or snapshot has changed — recreating overlay."
+        rm -f "$VM_DIR/disk.qcow2" "$fingerprint_file"
+      fi
+    fi
+    if [ ! -f "$VM_DIR/disk.qcow2" ]; then
+      qemu-img create -f qcow2 -b "$base_img" -F qcow2 "$VM_DIR/disk.qcow2" >/dev/null
+      printf '%s' "$current_fp" > "$fingerprint_file"
+    fi
  fi

  local qemu_bin machine cpu firmware_args=()
@ -213,28 +357,127 @@ build_qemu_cmd() {
  # are mutually exclusive.
  netdev+=",hostfwd=tcp:127.0.0.1:${PORT_PREFIX}14-:${PORT_PREFIX}14"

-  QEMU_CMD=(
-    "$qemu_bin"
-    -machine "$machine"
-    -accel "$ACCEL"
-    -cpu "$cpu"
-    "${firmware_args[@]}"
-    -boot order=c
-    -m "$VM_RAM"
-    -smp "$VM_CPUS"
-    -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
-    -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
-    -netdev "$netdev"
-    -device virtio-net-pci,netdev=net0
-    -device virtio-balloon-pci
-    -virtfs "local,path=/,mount_tag=hostfs,security_model=none"
-    -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
-    -mon "chardev=monitor,mode=control"
-    -serial "file:$VM_DIR/serial.log"
-    -display none
-    -daemonize
-    -pidfile "$VM_DIR/qemu.pid"
-  )
+  # In snapshot-resume mode the QEMU command-line MUST match the device set
+  # used at snapshot capture time, otherwise migration replay fails (broken
+  # pipe / device tree mismatch). At capture time the build attaches:
+  #   disk(if=virtio) + seed.iso + bundle.iso + runtime.iso (all if=virtio)
+  #   netdev + virtio-net-pci + monitor + QGA virtio-serial
+  #   SMP=4, RAM=4096 (pinned in build-image.sh snapshot mode)
+  # We mirror that exactly. The seed/bundle ISOs were used by cloud-init at
+  # build and are not needed at runtime, but their virtio-blk slots must
+  # exist so the migration replay matches device IDs. Runtime-only devices
+  # (virtfs, balloon) live at higher slots — extra at destination is fine.
+  local snapshot_args=() runtime_only_args=() snapshot_smp="$VM_CPUS" snapshot_ram="$VM_RAM"
+  if snapshot_layout; then
+    if snapshot_available; then
+      log "Snapshot found at $savevm_file — fast-resume enabled."
+      # -incoming defer: QEMU starts, waits for a QMP migrate-incoming command.
+      # We use that to set mapped-ram + multifd capabilities before loading,
+      # which enables parallel RAM restore (~2-3x faster than streamed decode).
+      snapshot_args+=(-incoming defer)
+    else
+      log "Capture mode: booting with snapshot-compatible layout (no -incoming)."
+    fi
+    snapshot_smp="${EMULATOR_SNAPSHOT_CPUS:-4}"
+    # RAM size is baked into the snapshot; migration replay requires an
+    # identical -m value. Pin to the build-time RAM (4096) and ignore
+    # EMULATOR_RAM — override via EMULATOR_SNAPSHOT_RAM if a different
+    # snapshot was produced.
+    snapshot_ram="${EMULATOR_SNAPSHOT_RAM:-4096}"
+    if [ "$snapshot_smp" != "$VM_CPUS" ]; then
+      log "Pinning SMP to ${snapshot_smp} for snapshot resume (build-time value)."
+    fi
+    if [ "$snapshot_ram" != "$VM_RAM" ]; then
+      log "Pinning RAM to ${snapshot_ram}MB for snapshot resume (ignoring EMULATOR_RAM=${VM_RAM})."
+    fi
+
+    # Tiny placeholder ISOs to match the seed.iso / bundle.iso slots present
+    # at snapshot time. Their content doesn't matter (cloud-init has already
+    # run); only the virtio-blk slot count must match.
+    local seed_phantom="$VM_DIR/seed.phantom"
+    local bundle_phantom="$VM_DIR/bundle.phantom"
+    if [ ! -s "$seed_phantom" ]; then
+      dd if=/dev/zero of="$seed_phantom" bs=1M count=1 status=none
+    fi
+    if [ ! -s "$bundle_phantom" ]; then
+      dd if=/dev/zero of="$bundle_phantom" bs=1M count=1 status=none
+    fi
+    runtime_only_args+=(
+      -drive "file=$seed_phantom,format=raw,if=virtio,readonly=on"
+      -drive "file=$bundle_phantom,format=raw,if=virtio,readonly=on"
+    )
+  else
+    # Cold-boot: include virtio-balloon and virtfs as before.
+    runtime_only_args+=(
+      -device virtio-balloon-pci
+      -virtfs "local,path=/,mount_tag=hostfs,security_model=none"
+    )
+  fi
+
+  if snapshot_layout; then
+    QEMU_CMD=(
+      "$qemu_bin"
+      -machine "$machine"
+      -accel "$ACCEL"
+      -cpu "$cpu"
+      "${firmware_args[@]}"
+      -boot order=c
+      -m "$snapshot_ram"
+      -smp "$snapshot_smp"
+      -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
+      "${runtime_only_args[@]}"
+      -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
+      -netdev "$netdev"
+      -device virtio-net-pci,netdev=net0
+      -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
+      -mon "chardev=monitor,mode=control"
+      -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
+      -device virtio-serial
+      -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      # Empty PCIe root port reserved for runtime hot-plug of virtio-9p.
+      # MUST be the last explicit -device entry — slot order has to mirror
+      # build-image.sh exactly or migration replay stalls in inmigrate.
+      -device "pcie-root-port,id=hostfs-port,bus=pcie.0,chassis=1"
+      # Pre-create the host-side fsdev backend so the post-resume QMP
+      # device_add can attach to it by id. -fsdev is host-only state — not
+      # part of the migrated device tree — so it's safe to add here even
+      # though the snapshot was captured without it. Going through -fsdev
+      # avoids the HMP fsdev_add command, whose error path is invisible
+      # via human-monitor-command (errors come back as a return string,
+      # not a QMP error).
+      -fsdev "local,id=hostfs,path=/,security_model=none"
+      ${snapshot_args[@]+"${snapshot_args[@]}"}
+      -serial "file:$VM_DIR/serial.log"
+      -display none
+      -daemonize
+      -pidfile "$VM_DIR/qemu.pid"
+    )
+  else
+    QEMU_CMD=(
+      "$qemu_bin"
+      -machine "$machine"
+      -accel "$ACCEL"
+      -cpu "$cpu"
+      "${firmware_args[@]}"
+      -boot order=c
+      -m "$VM_RAM"
+      -smp "$snapshot_smp"
+      -drive "file=$VM_DIR/disk.qcow2,format=qcow2,if=virtio"
+      -drive "file=$(runtime_iso_path),format=raw,if=virtio,readonly=on"
+      -netdev "$netdev"
+      -device virtio-net-pci,netdev=net0
+      "${runtime_only_args[@]}"
+      -chardev "socket,id=monitor,path=$VM_DIR/monitor.sock,server=on,wait=off"
+      -mon "chardev=monitor,mode=control"
+      -chardev "socket,path=$VM_DIR/qga.sock,server=on,wait=off,id=qga0"
+      -device virtio-serial
+      -device "virtserialport,chardev=qga0,name=org.qemu.guest_agent.0"
+      -serial "file:$VM_DIR/serial.log"
+      -display none
+      -daemonize
+      -pidfile "$VM_DIR/qemu.pid"
+    )
+  fi

 }

@ -269,11 +512,225 @@ ensure_ports_free() {
 start_vm() {
  mkdir -p "$VM_DIR"
  : > "$VM_DIR/serial.log"
-  prepare_runtime_config_iso
+  ensure_runtime_config_iso
  build_qemu_cmd
  "${QEMU_CMD[@]}"
 }

+# Send one or more QMP commands over the monitor socket. Each line of stdin is
+# a JSON object; capabilities are always negotiated first. Keep stdin open
+# briefly after writing so socat doesn't close before QEMU responds — QMP
+# typically replies in milliseconds so 0.3s is enough.
+qmp_send() {
+  if [ ! -S "$VM_DIR/monitor.sock" ]; then
+    return 1
+  fi
+  local payload
+  payload="$(cat)"
+  {
+    printf '%s\n' '{"execute":"qmp_capabilities"}'
+    printf '%s\n' "$payload"
+    sleep 0.3
+  } | socat -t5 - "UNIX-CONNECT:$VM_DIR/monitor.sock" 2>/dev/null
+}
+
+# After -incoming defer, QEMU waits for a migrate-incoming command. This sets
+# up mapped-ram + multifd capabilities and kicks off the RAM load from the
+# decompressed cache file. Returns once the VM is running.
+qmp_incoming_and_cont() {
+  local raw_file="$1"
+
+  # Set caps + parameters before migrate-incoming, same as source.
+  local setup_resp
+  setup_resp=$( {
+    printf '%s\n' '{"execute":"migrate-set-capabilities","arguments":{"capabilities":[{"capability":"mapped-ram","state":true},{"capability":"multifd","state":true}]}}'
+    printf '%s\n' '{"execute":"migrate-set-parameters","arguments":{"multifd-channels":4}}'
+  } | qmp_send)
+  if printf '%s' "$setup_resp" | grep -q '"error"'; then
+    err "QMP caps setup failed: $setup_resp"
+    return 1
+  fi
+
+  # Kick off the incoming migration from the mapped-ram file.
+  local inc_cmd inc_resp
+  inc_cmd=$(printf '{"execute":"migrate-incoming","arguments":{"uri":"file:%s"}}' "$raw_file")
+  inc_resp=$(printf '%s\n' "$inc_cmd" | qmp_send)
+  if printf '%s' "$inc_resp" | grep -q '"error"'; then
+    err "QMP migrate-incoming failed: $inc_resp"
+    return 1
+  fi
+
+  # Poll until status reaches a runnable state, then cont.
+  local deadline=$((SECONDS + 60))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    local out status
+    out=$(printf '%s\n' '{"execute":"query-status"}' | qmp_send || true)
+    status=$(printf '%s' "$out" | grep -o '"status"[[:space:]]*:[[:space:]]*"[a-z-]*"' | head -1 | sed -E 's/.*"([a-z-]+)".*/\1/')
+    case "$status" in
+      running)
+        return 0
+        ;;
+      paused|postmigrate|prelaunch)
+        printf '%s\n' '{"execute":"cont"}' | qmp_send >/dev/null || true
+        return 0
+        ;;
+      inmigrate|"")
+        ;;
+      *)
+        log "unexpected QMP status: $status"
+        ;;
+    esac
+    sleep 0.2
+  done
+  return 1
+}
+
+# Placeholder PCK baked into the snapshot. Kept in sync with the value in
+# docker/local-emulator/qemu/cloud-init/emulator/user-data.
+SNAPSHOT_PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
+
+# Write the internal PCK to the host path the CLI reads (see
+# readInternalPck() in packages/stack-cli/src/commands/emulator.ts). In
+# cold-boot mode the guest publishes this via virtfs/9p, but snapshot mode
+# drops virtfs, so the host has to write it itself.
+write_internal_pck_for_cli() {
+  local pck="$1"
+  (umask 077 && printf '%s' "$pck" > "$VM_DIR/internal-pck")
+}
+
+# Drive qemu-guest-agent via its virtserialport socket. QGA speaks the same
+# JSON protocol as QMP but over a separate channel. We use guest-sync to make
+# sure the agent is responsive, then guest-exec to fire trigger-fast-rotate.
+qga_send() {
+  if [ ! -S "$VM_DIR/qga.sock" ]; then
+    return 1
+  fi
+  # socat closes the connection on stdin EOF before QGA can reply, so keep
+  # stdin open for a short window after writing the request to give the
+  # agent time to respond. QGA replies in milliseconds; the only reason this
+  # isn't 0.1s is to absorb scheduling jitter on a busy host.
+  local payload
+  payload="$(cat)"
+  ( printf '%s\n' "$payload"; sleep 0.5 ) | socat -t10 - "UNIX-CONNECT:$VM_DIR/qga.sock" 2>/dev/null
+}
+
+qga_wait_ready() {
+  local deadline=$((SECONDS + 30))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    local resp
+    resp=$(printf '%s\n' '{"execute":"guest-sync","arguments":{"id":424242}}' | qga_send || true)
+    if printf '%s' "$resp" | grep -q '"return":[[:space:]]*424242'; then
+      return 0
+    fi
+    sleep 0.2
+  done
+  return 1
+}
+
+# Hot-plug a virtio-9p device backed by host `/` after a snapshot resume.
+# The snapshot was captured WITHOUT virtfs (QEMU disallows migration while
+# 9p is mounted in the guest), so the resumed VM has no host filesystem
+# available until we add one here. The fsdev backend was pre-created by
+# the -fsdev option in build_qemu_cmd; we only need the device_add half.
+qmp_hotplug_9p() {
+  local resp
+  resp=$(printf '%s\n' \
+    '{"execute":"device_add","arguments":{"driver":"virtio-9p-pci","id":"hostfs-dev","fsdev":"hostfs","mount_tag":"hostfs","bus":"hostfs-port"}}' \
+    | qmp_send)
+  if printf '%s' "$resp" | grep -q '"error"'; then
+    err "QMP device_add virtio-9p-pci failed: $resp"
+    return 1
+  fi
+  return 0
+}
+
+# Run /usr/local/bin/mount-host-fs --post-resume in the guest. The script
+# mounts the freshly-hot-plugged 9p device on /host, which is a shared
+# mount point — so the new mount propagates into the running stack
+# container's `-v /host:/host:rshared` bind mount without a container
+# restart.
+qga_mount_host_fs() {
+  local cmd resp pid status_resp exited exitcode
+  cmd='{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/mount-host-fs","arg":["--post-resume"],"capture-output":true}}'
+  resp=$(printf '%s\n' "$cmd" | qga_send || true)
+  pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+  if [ -z "$pid" ]; then
+    err "guest-exec mount-host-fs did not return a pid; response: $resp"
+    return 1
+  fi
+  local deadline=$((SECONDS + 20))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
+    exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
+    if [ "$exited" = "true" ]; then
+      exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
+      if [ "${exitcode:-0}" = "0" ]; then
+        log "host fs mounted in guest"
+        return 0
+      fi
+      err "mount-host-fs exited with code ${exitcode:-unknown}; response: $status_resp"
+      return 1
+    fi
+    sleep 0.2
+  done
+  err "mount-host-fs did not complete within 20s"
+  return 1
+}
+
+qga_trigger_fast_rotate() {
+  # guest-exec returns a pid; we then poll guest-exec-status until the
+  # process exits, and surface its exit code. Capture output so a failure
+  # message is available in serial.log. We pipe the fresh-secrets env file
+  # (as base64) to the script via input-data — keeps secrets off the
+  # filesystem and avoids needing virtfs.
+  local fresh_pck fresh_ssk fresh_sak fresh_cron payload secrets_b64 resp pid
+  fresh_pck="$(openssl rand -hex 32)"
+  fresh_ssk="$(openssl rand -hex 32)"
+  fresh_sak="$(openssl rand -hex 32)"
+  fresh_cron="$(openssl rand -hex 32)"
+  payload=$(
+    printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$fresh_pck"
+    printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$fresh_ssk"
+    printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$fresh_sak"
+    printf 'CRON_SECRET=%s\n' "$fresh_cron"
+  )
+  # Publish the fresh PCK to the host path the CLI reads. Writing before the
+  # guest-exec so a --config-file flow that polls from another process can
+  # pick it up the moment rotation completes.
+  write_internal_pck_for_cli "$fresh_pck"
+  secrets_b64=$(printf '%s' "$payload" | base64 | tr -d '\n')
+  local cmd
+  cmd=$(printf '{"execute":"guest-exec","arguments":{"path":"/usr/local/bin/trigger-fast-rotate","capture-output":true,"input-data":"%s"}}' "$secrets_b64")
+  resp=$(printf '%s\n' "$cmd" | qga_send || true)
+  pid=$(printf '%s' "$resp" | grep -o '"pid"[[:space:]]*:[[:space:]]*[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*([0-9]+).*/\1/')
+  if [ -z "$pid" ]; then
+    err "guest-exec did not return a pid; response: $resp"
+    return 1
+  fi
+
+  # Rotation (sed + UPDATE + supervisorctl restart + node startup) fits well
+  # inside this window.
+  local deadline=$((SECONDS + 60))
+  while [ "$SECONDS" -lt "$deadline" ]; do
+    local status_resp exited exitcode
+    status_resp=$(printf '%s\n' "{\"execute\":\"guest-exec-status\",\"arguments\":{\"pid\":${pid}}}" | qga_send || true)
+    exited=$(printf '%s' "$status_resp" | grep -o '"exited"[[:space:]]*:[[:space:]]*\(true\|false\)' | head -1 | sed -E 's/.*:[[:space:]]*(true|false).*/\1/')
+    if [ "$exited" = "true" ]; then
+      exitcode=$(printf '%s' "$status_resp" | grep -o '"exitcode"[[:space:]]*:[[:space:]]*-\{0,1\}[0-9]*' | head -1 | sed -E 's/.*:[[:space:]]*(-?[0-9]+).*/\1/')
+      if [ "${exitcode:-0}" = "0" ]; then
+        log "rotate-secrets completed."
+        return 0
+      fi
+      err "rotate-secrets exited with code ${exitcode:-unknown}"
+      err "response: $status_resp"
+      return 1
+    fi
+    sleep 0.2
+  done
+  err "rotate-secrets did not complete within 60s"
+  return 1
+}
+
 stop_vm() {
  if [ ! -f "$VM_DIR/qemu.pid" ]; then
    return 0
@ -292,9 +749,10 @@ stop_vm() {
      kill -9 "$pid" 2>/dev/null || true
    fi
  fi
-  rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/serial.log"
-  rm -rf "$VM_DIR/runtime-config"
-  rm -f "$VM_DIR/runtime-config.iso"
+  rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock" "$VM_DIR/serial.log"
+  # runtime-config.iso is left in place; ensure_runtime_config_iso regenerates
+  # it on the next start. `cmd_reset` wipes $RUN_DIR entirely when a full reset
+  # is wanted.
 }

 cmd_start() {
@ -305,18 +763,92 @@ cmd_start() {
  info "Arch: $ARCH | Accel: $ACCEL"
  info "Ports: Dashboard=$EMULATOR_DASHBOARD_PORT Backend=$EMULATOR_BACKEND_PORT MinIO=$EMULATOR_MINIO_PORT Inbucket=$EMULATOR_INBUCKET_PORT"

+  local using_snapshot=0
+  if snapshot_available; then
+    if ! ensure_savevm_raw; then
+      warn "Snapshot decompression failed — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+    using_snapshot=1
+  fi
+
  start_vm

  info "VM: ${VM_RAM}MB / ${VM_CPUS} CPUs"

-  if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
-    tail_vm_logs
-    exit 1
-  fi
+  if [ "$using_snapshot" = "1" ]; then
+    log "Resuming from snapshot (mapped-ram + multifd)..."
+    if ! qmp_incoming_and_cont "$(savevm_raw_path)"; then
+      warn "Snapshot resume did not reach a runnable state — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi

-  if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
-    tail_vm_logs
-    exit 1
+    log "VM resumed; waiting for guest agent..."
+    if ! qga_wait_ready; then
+      warn "Guest agent did not respond — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+
+    # Hot-plug the host filesystem. The snapshot was captured without
+    # virtfs, so the running container has an empty /host bind mount until
+    # we add the 9p device and mount it in the guest. Required for routes
+    # like /local-emulator/project that read user-supplied paths via /host.
+    log "Hot-plugging host filesystem..."
+    if ! qmp_hotplug_9p; then
+      warn "Failed to hot-plug 9p device — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+    if ! qga_mount_host_fs; then
+      warn "Failed to mount host fs in guest — falling back to cold boot."
+      snapshot_fallback_to_cold_boot
+      return
+    fi
+
+    if [ "$EMULATOR_NO_ROTATION" = "1" ]; then
+      warn "EMULATOR_NO_ROTATION=1: snapshot's placeholder secrets are in effect — do not expose this instance."
+      # The placeholder PCK is live in the running image; publish it to the
+      # host path so --config-file flows still work.
+      write_internal_pck_for_cli "$SNAPSHOT_PLACEHOLDER_PCK"
+      if ! wait_for_condition "services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
+        warn "Services did not respond after resume — falling back to cold boot."
+        tail_vm_logs
+        snapshot_fallback_to_cold_boot
+        return
+      fi
+    else
+      log "Generating fresh secrets + triggering rotation..."
+      if ! qga_trigger_fast_rotate; then
+        warn "Failed to trigger rotate-secrets — falling back to cold boot."
+        snapshot_fallback_to_cold_boot
+        return
+      fi
+
+      # Wait for the *new* backend (post-supervisor-restart) to actually be
+      # listening. all_ready may briefly return true against the OLD Node
+      # processes between when supervisor sends SIGTERM and when the children
+      # die; sleep a beat so we measure the real readiness.
+      sleep 1
+      if ! wait_for_condition "rotated services" "$SNAPSHOT_READY_TIMEOUT" all_ready; then
+        warn "Services did not recover after rotation — falling back to cold boot."
+        tail_vm_logs
+        snapshot_fallback_to_cold_boot
+        return
+      fi
+    fi
+  else
+    if ! wait_for_condition "deps services" "$READY_TIMEOUT" deps_ready; then
+      tail_vm_logs
+      exit 1
+    fi
+
+    if ! wait_for_condition "dashboard/backend" "$READY_TIMEOUT" app_ready; then
+      tail_vm_logs
+      exit 1
+    fi
  fi

  log "All services are green."
@ -324,6 +856,20 @@ cmd_start() {
  info "Backend:   http://localhost:${EMULATOR_BACKEND_PORT}"
 }

+# If anything about the snapshot resume fails, stop the VM, wipe the overlay,
+# and retry as a cold boot. Keeps the user unblocked even when the snapshot is
+# broken (e.g. stale, incompatible host-arch/QEMU-version mismatch).
+snapshot_fallback_to_cold_boot() {
+  warn "Retrying with cold boot (EMULATOR_NO_SNAPSHOT=1)..."
+  stop_vm
+  # Wipe the overlay + fingerprint so build_qemu_cmd re-creates a fresh one.
+  # runtime-config.iso is regenerated by ensure_runtime_config_iso on recursion.
+  rm -f "$VM_DIR/disk.qcow2" "$VM_DIR/base-image.fingerprint" \
+        "$VM_DIR/seed.phantom" "$VM_DIR/bundle.phantom"
+  EMULATOR_NO_SNAPSHOT=1
+  cmd_start
+}
+
 cmd_stop() {
  stop_vm
  log "QEMU emulator stopped."
@ -335,6 +881,100 @@ cmd_reset() {
  log "Emulator state reset. Next start will be a fresh boot."
 }

+# Cold-boot the VM with the snapshot-compatible device layout, wait for all
+# services to be healthy, then capture a snapshot via QMP migrate and compress
+# it to .savevm.zst. Called by `stack emulator pull` so first-run users get a
+# fast-resume snapshot that's guaranteed compatible with their host's QEMU
+# version + accelerator (which CI-built snapshots can't guarantee across
+# KVM/HVF/TCG).
+cmd_capture() {
+  if [ ! -f "$(image_path)" ]; then
+    err "Missing qcow2: $(image_path). Run 'stack emulator pull' first."
+    exit 1
+  fi
+  if [ -s "$(savevm_path)" ] && [ "$EMULATOR_FORCE_CAPTURE" != "1" ]; then
+    log "Snapshot already present at $(savevm_path); skipping capture."
+    log "Pass EMULATOR_FORCE_CAPTURE=1 to rebuild it."
+    return 0
+  fi
+  if is_running; then
+    err "Emulator is already running; stop it first (stack emulator stop)."
+    exit 1
+  fi
+
+  # Start with a clean slate if we're force-recapturing; stale raw/zst would
+  # otherwise make snapshot_available() return true and flip QEMU into
+  # -incoming defer mode.
+  rm -f "$(savevm_path)" "$(savevm_raw_path)"
+
+  ensure_ports_free
+  mkdir -p "$RUN_DIR" "$VM_DIR"
+  # Regenerate runtime-config.iso with STACK_EMULATOR_VM_DIR_HOST empty —
+  # virtfs is detached in capture mode, so run-stack-container's
+  # `install internal-pck → /host/$VM_DIR_HOST/...` would fail and restart-loop
+  # stack.service. Mirrors build-image.sh's CI runtime.env shape.
+  rm -f "$(runtime_iso_path)"
+  write_runtime_config_iso ""
+
+  info "Cold-booting VM to capture local snapshot (one-time, ~1-3 min)..."
+  EMULATOR_CAPTURING_SNAPSHOT=1
+  start_vm
+  info "VM: 4096MB / 4 CPUs (pinned for snapshot compatibility)"
+
+  # Cold boot with snapshot-compatible layout drops virtfs, so stack.service
+  # starts without /host mounted — fine for capture; hostfs is hot-plugged on
+  # resume via qmp_hotplug_9p.
+  if ! wait_for_condition "all services" "$READY_TIMEOUT" all_ready; then
+    tail_vm_logs
+    stop_vm
+    err "Services did not come up; capture aborted."
+    exit 1
+  fi
+
+  local raw tmp_raw zst tmp_zst
+  raw="$(savevm_raw_path)"
+  tmp_raw="${raw}.capture.tmp"
+  zst="$(savevm_path)"
+  tmp_zst="${zst}.capture.tmp"
+  rm -f "$tmp_raw" "$tmp_zst"
+
+  log "Capturing VM state via QMP (mapped-ram + multifd)..."
+  if ! capture_vm_state "$VM_DIR/monitor.sock" "$tmp_raw"; then
+    err "QMP capture failed."
+    stop_vm
+    exit 1
+  fi
+
+  # capture_vm_state sent QMP quit; wait for QEMU to exit, then clean sockets.
+  local waited=0
+  while [ "$waited" -lt 30 ] && is_running; do
+    sleep 1
+    waited=$((waited + 1))
+  done
+  if is_running; then
+    warn "QEMU did not exit after QMP quit; forcing."
+    stop_vm
+  fi
+  rm -f "$VM_DIR/qemu.pid" "$VM_DIR/monitor.sock" "$VM_DIR/qga.sock"
+
+  if [ ! -s "$tmp_raw" ]; then
+    err "Captured raw file is empty: $tmp_raw"
+    exit 1
+  fi
+
+  log "Compressing snapshot with zstd..."
+  zstd -1 -T0 -f -o "$tmp_zst" "$tmp_raw"
+  mv "$tmp_zst" "$zst"
+  # Keep the uncompressed file too — resume reads it directly via mapped-ram,
+  # and ensure_savevm_raw skips re-decompression when the raw's mtime >= zst's.
+  mv "$tmp_raw" "$raw"
+  touch -r "$zst" "$raw"
+
+  local size
+  size="$(du -h "$zst" | cut -f1)"
+  log "Snapshot captured: $zst (${size})"
+}
+
 STATUS_FAILED=0

 print_service_status() {
@ -382,12 +1022,12 @@ ACTION="start"

 while [[ $# -gt 0 ]]; do
  case "$1" in
-    start|stop|reset|status|bench)
+    start|stop|reset|status|bench|capture)
      ACTION="$1"
      shift
      ;;
    *)
-      echo "Usage: $0 [start|stop|reset|status|bench]"
+      echo "Usage: $0 [start|stop|reset|status|bench|capture]"
      exit 1
      ;;
  esac
@ -399,4 +1039,5 @@ case "$ACTION" in
  reset) cmd_reset ;;
  status) cmd_status ;;
  bench) cmd_bench ;;
+  capture) cmd_capture ;;
 esac
--- a/docker/local-emulator/rotate-secrets.sh
+++ b/docker/local-emulator/rotate-secrets.sh
@ -0,0 +1,104 @@
+#!/bin/bash
+# Rotate baked-in placeholder secrets with fresh host-generated values.
+#
+# Called inside the stack container by the emulator snapshot-resume path.
+# Host writes fresh secrets to /host/stack-runtime/fresh-secrets.env before
+# invoking this script (via `docker exec stack /usr/local/bin/rotate-secrets`).
+#
+# Flow:
+#   1. Read fresh secrets from host-supplied env file.
+#   2. Validate they are 64-char hex (the build placeholders are too).
+#   3. Write rotated-secrets.env that app-entrypoint and run-cron-jobs source
+#      on restart.
+#   4. Targeted sed across built files: swap the placeholder PCK for the fresh
+#      one (this is the only secret baked into JS via sentinel replacement at
+#      build time — SSK/SAK/CRON_SECRET flow through process.env only).
+#   5. UPDATE the internal ApiKeySet row in Postgres.
+#   6. supervisorctl restart stack-app + cron-jobs so the new values take
+#      effect in the running Node processes.
+
+set -euo pipefail
+
+OUTPUT=/run/stack-auth/rotated-secrets.env
+WORK_DIR="${STACK_RUNTIME_WORK_DIR:-/app}"
+
+PLACEHOLDER_PCK="00000000000000000000000000000000ffffffffffffffffffffffffffffffff"
+
+log() { printf '[rotate-secrets] %s\n' "$*"; }
+
+# Fresh secrets arrive via env vars (passed by trigger-fast-rotate using
+# `docker exec -e`). For backward compatibility, fall back to a file path if
+# STACK_ROTATE_INPUT is set.
+if [ -n "${STACK_ROTATE_INPUT:-}" ] && [ -f "$STACK_ROTATE_INPUT" ]; then
+  log "reading fresh secrets from $STACK_ROTATE_INPUT"
+  set -a
+  # shellcheck disable=SC1090
+  source "$STACK_ROTATE_INPUT"
+  set +a
+fi
+
+for var in STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY \
+           STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY \
+           STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY \
+           CRON_SECRET; do
+  val="${!var:-}"
+  if [ -z "$val" ]; then
+    log "ERROR: $var is missing from environment"
+    exit 1
+  fi
+  if ! printf '%s' "$val" | grep -Eq '^[0-9a-fA-F]{64}$'; then
+    log "ERROR: $var is not a 64-char hex string"
+    exit 1
+  fi
+done
+
+mkdir -p "$(dirname "$OUTPUT")"
+umask 077
+{
+  printf 'STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY"
+  printf 'STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY"
+  printf 'STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"
+  printf 'CRON_SECRET=%s\n' "$CRON_SECRET"
+  # Mirror these so process.env lookups in Node match env after restart.
+  printf 'NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY"
+  printf 'STACK_SECRET_SERVER_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY"
+  printf 'STACK_SUPER_SECRET_ADMIN_KEY=%s\n' "$STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY"
+} > "$OUTPUT"
+chmod 0600 "$OUTPUT"
+log "wrote $OUTPUT"
+
+# The PCK is baked into built JS via STACK_ENV_VAR_SENTINEL replacement at
+# container start (see /app-entrypoint.sh). Swap the placeholder hex for the
+# fresh value across the built tree. Only *.js files need patching; this
+# runs in ~1s on the standalone Next.js bundles.
+if [ "$STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY" != "$PLACEHOLDER_PCK" ]; then
+  log "rewriting PCK placeholder in $WORK_DIR"
+  # grep -rl narrows the find to only files that contain the placeholder, so
+  # the follow-up sed doesn't walk the whole tree.
+  mapfile -t files < <(grep -rl --include='*.js' "$PLACEHOLDER_PCK" "$WORK_DIR/apps" 2>/dev/null || true)
+  if [ "${#files[@]}" -gt 0 ]; then
+    sed -i "s|${PLACEHOLDER_PCK}|${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}|g" "${files[@]}"
+    log "patched ${#files[@]} file(s)"
+  else
+    log "no files contained the placeholder (already rotated?)"
+  fi
+fi
+
+# Update the internal ApiKeySet row so existing dashboard sessions keep
+# working with the new keys. Values are already validated as hex above, so
+# inlining is safe.
+if [ -n "${STACK_DATABASE_CONNECTION_STRING:-}" ]; then
+  log "updating internal ApiKeySet"
+  psql "$STACK_DATABASE_CONNECTION_STRING" -v ON_ERROR_STOP=1 <<SQL
+UPDATE "ApiKeySet" SET
+  "publishableClientKey" = '${STACK_SEED_INTERNAL_PROJECT_PUBLISHABLE_CLIENT_KEY}',
+  "secretServerKey"      = '${STACK_SEED_INTERNAL_PROJECT_SECRET_SERVER_KEY}',
+  "superSecretAdminKey"  = '${STACK_SEED_INTERNAL_PROJECT_SUPER_SECRET_ADMIN_KEY}',
+  "updatedAt"            = NOW()
+WHERE "projectId" = 'internal' AND id = '3142e763-b230-44b5-8636-aa62f7489c26';
+SQL
+fi
+
+log "restarting stack-app and cron-jobs"
+supervisorctl restart stack-app cron-jobs
+log "done"
--- a/docker/local-emulator/run-cron-jobs.sh
+++ b/docker/local-emulator/run-cron-jobs.sh
@ -4,6 +4,14 @@

 set -e

+# Pick up rotated secrets from the emulator snapshot resume path if present.
+if [ -f /run/stack-auth/rotated-secrets.env ]; then
+  set -a
+  # shellcheck disable=SC1091
+  source /run/stack-auth/rotated-secrets.env
+  set +a
+fi
+
 BACKEND_URL="http://127.0.0.1:${BACKEND_PORT:-8102}"

 if [ -z "${CRON_SECRET:-}" ]; then
--- a/docker/local-emulator/supervisord.conf
+++ b/docker/local-emulator/supervisord.conf
@ -4,6 +4,18 @@ logfile=/var/log/supervisor/supervisord.log
 pidfile=/var/run/supervisord.pid
 loglevel=info

+; supervisorctl endpoint — rotate-secrets uses this to restart stack-app and
+; cron-jobs after the emulator snapshot-resume path injects fresh secrets.
+[unix_http_server]
+file=/var/run/supervisor.sock
+chmod=0700
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory=supervisor.rpcinterface:make_main_rpcinterface
+
+[supervisorctl]
+serverurl=unix:///var/run/supervisor.sock
+
 ; --- PostgreSQL ---

 [program:postgres]
@ -13,7 +25,7 @@ command=/usr/lib/postgresql/16/bin/postgres
    -c max_connections=500
    -c shared_preload_libraries=pg_stat_statements
    -c pg_stat_statements.track=all
-    -c statement_timeout=30s
+    -c statement_timeout=120s
 user=postgres
 autostart=true
 autorestart=true
@ -180,6 +192,8 @@ autostart=true
 autorestart=true
 startsecs=0
 priority=70
+stopasgroup=true
+killasgroup=true
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
@ -193,6 +207,13 @@ autostart=true
 autorestart=unexpected
 startsecs=0
 priority=60
+; The wrapper script spawns Node backends as background children. On
+; supervisor restart we MUST kill the whole process group, otherwise the
+; old Node servers keep their port bindings and the new ones fail with
+; EADDRINUSE — breaking the snapshot-resume rotation flow.
+stopasgroup=true
+killasgroup=true
+stopwaitsecs=10
 stdout_logfile=/dev/stdout
 stdout_logfile_maxbytes=0
 stderr_logfile=/dev/stderr
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@ -2,6 +2,17 @@

 set -e

+# ============= ROTATED SECRETS OVERLAY =============
+# On emulator snapshot resume, the host injects freshly-generated secrets into
+# /run/stack-auth/rotated-secrets.env before supervisorctl restarts us. Sourcing
+# here lets a fast-restart pick up new values without a full container restart.
+if [ -f /run/stack-auth/rotated-secrets.env ]; then
+  set -a
+  # shellcheck disable=SC1091
+  source /run/stack-auth/rotated-secrets.env
+  set +a
+fi
+
 # ============= FORWARD MOCK OAUTH SERVER =============

 # Start socat to forward port 32202 for mock-oauth-server if enabled
@ -130,39 +141,51 @@ if [ "$WORK_DIR" != "/app" ]; then
  cp -r /app/. "$WORK_DIR"/.
 fi

-# Find all files in the apps directory that contain a STACK_ENV_VAR_SENTINEL and extract the unique sentinel strings.
-echo "Finding unhandled sentinels..."
-unhandled_sentinels=$(find "$WORK_DIR/apps" -type f -exec grep -l "STACK_ENV_VAR_SENTINEL" {} + | \
-  xargs grep -h "STACK_ENV_VAR_SENTINEL" | \
-  grep -o "STACK_ENV_VAR_SENTINEL[A-Z_]*" | \
-  sort -u | grep -v "^STACK_ENV_VAR_SENTINEL$")
+# The full-tree sentinel scan is expensive (several seconds over the whole built
+# app tree). On a fast-restart — triggered by the emulator snapshot rotation
+# path — the placeholders have already been sed-replaced by rotate-secrets,
+# and no new sentinels need substitution. Skip the scan in that case. Marker
+# lives in WORK_DIR because the docker/server image runs as the unprivileged
+# `node` user and cannot write to /var/run.
+SENTINEL_MARKER="$WORK_DIR/.stack-sentinels-replaced"
+if [ -f "$SENTINEL_MARKER" ]; then
+  echo "Sentinels already replaced on a previous start; skipping scan."
+else
+  # Find all files in the apps directory that contain a STACK_ENV_VAR_SENTINEL and extract the unique sentinel strings.
+  echo "Finding unhandled sentinels..."
+  unhandled_sentinels=$(find "$WORK_DIR/apps" -type f -exec grep -l "STACK_ENV_VAR_SENTINEL" {} + | \
+    xargs grep -h "STACK_ENV_VAR_SENTINEL" | \
+    grep -o "STACK_ENV_VAR_SENTINEL[A-Z_]*" | \
+    sort -u | grep -v "^STACK_ENV_VAR_SENTINEL$")

-# Choose an uncommon delimiter – here, we use the ASCII Unit Separator (0x1F)
-delimiter=$(printf '\037')
+  # Choose an uncommon delimiter – here, we use the ASCII Unit Separator (0x1F)
+  delimiter=$(printf '\037')

-echo "Replacing sentinels..."
-for sentinel in $unhandled_sentinels; do
-  # The sentinel is like "STACK_ENV_VAR_SENTINEL_MY_VAR", so extract the env var name.
-  env_var=${sentinel#STACK_ENV_VAR_SENTINEL_}
-  
-  # Get the corresponding environment variable value.
-  value="${!env_var}"
-  
-  # If the env var is not set, skip replacement.
-  if [ -z "$value" ]; then
-    continue
-  fi
+  echo "Replacing sentinels..."
+  for sentinel in $unhandled_sentinels; do
+    # The sentinel is like "STACK_ENV_VAR_SENTINEL_MY_VAR", so extract the env var name.
+    env_var=${sentinel#STACK_ENV_VAR_SENTINEL_}

-  # Although the sentinel only contains [A-Z_] we still escape it for any regex meta-characters.
-  escaped_sentinel=$(printf '%s\n' "$sentinel" | sed -e 's/\\/\\\\/g' -e 's/[][\/.^$*]/\\&/g')
+    # Get the corresponding environment variable value.
+    value="${!env_var}"

-  # For the replacement value, first escape backslashes, then escape any occurrence of
-  # the chosen delimiter and the '&' (which has special meaning in sed replacements).
-  escaped_value=$(printf '%s\n' "$value" | sed -e 's/\\/\\\\/g' -e "s/[${delimiter}&]/\\\\&/g")
+    # If the env var is not set, skip replacement.
+    if [ -z "$value" ]; then
+      continue
+    fi

-  # Now replace the sentinel with the (properly escaped) value in all files in the working directory.
-  find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
-done
+    # Although the sentinel only contains [A-Z_] we still escape it for any regex meta-characters.
+    escaped_sentinel=$(printf '%s\n' "$sentinel" | sed -e 's/\\/\\\\/g' -e 's/[][\/.^$*]/\\&/g')
+
+    # For the replacement value, first escape backslashes, then escape any occurrence of
+    # the chosen delimiter and the '&' (which has special meaning in sed replacements).
+    escaped_value=$(printf '%s\n' "$value" | sed -e 's/\\/\\\\/g' -e "s/[${delimiter}&]/\\\\&/g")
+
+    # Now replace the sentinel with the (properly escaped) value in all files in the working directory.
+    find $WORK_DIR/apps -type f -exec sed -i "s${delimiter}${escaped_sentinel}${delimiter}${escaped_value}${delimiter}g" {} +
+  done
+  touch "$SENTINEL_MARKER"
+fi

 # ============= START BACKEND AND DASHBOARD =============

--- a/packages/stack-cli/package.json
+++ b/packages/stack-cli/package.json
@ -13,7 +13,8 @@
    "build": "tsdown && node scripts/copy-emulator-assets.mjs",
    "dev": "tsdown --watch",
    "lint": "eslint --ext .tsx,.ts .",
-    "typecheck": "tsc --noEmit"
+    "typecheck": "tsc --noEmit",
+    "test": "vitest run"
  },
  "files": [
    "README.md",
@ -31,6 +32,7 @@
    "@stackframe/js": "workspace:*",
    "@stackframe/stack-shared": "workspace:*",
    "commander": "^13.1.0",
+    "extract-zip": "^2.0.1",
    "jiti": "^2.4.2"
  },
  "devDependencies": {
--- a/packages/stack-cli/src/commands/emulator.test.ts
+++ b/packages/stack-cli/src/commands/emulator.test.ts
@ -0,0 +1,166 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import {
+  envPort,
+  formatBytes,
+  formatDuration,
+  platformInstallHint,
+  renderProgressLine,
+  resolveArch,
+} from "./emulator.js";
+
+describe("formatBytes", () => {
+  it("renders B / KB / MB / GB across unit boundaries", () => {
+    expect(formatBytes(0)).toBe("0 B");
+    expect(formatBytes(1)).toBe("1 B");
+    expect(formatBytes(1023)).toBe("1023 B");
+    expect(formatBytes(1024)).toBe("1.0 KB");
+    expect(formatBytes(1536)).toBe("1.5 KB");
+    expect(formatBytes(1024 * 1024)).toBe("1.0 MB");
+    expect(formatBytes(1024 * 1024 * 1024)).toBe("1.0 GB");
+    expect(formatBytes(1024 * 1024 * 1024 * 1024)).toBe("1.0 TB");
+  });
+
+  it("switches precision at v>=10 within a unit", () => {
+    expect(formatBytes(1024 * 10)).toBe("10 KB");
+    expect(formatBytes(1024 * 9.5)).toBe("9.5 KB");
+  });
+
+  it("returns '?' for non-finite and negative values", () => {
+    expect(formatBytes(NaN)).toBe("?");
+    expect(formatBytes(Infinity)).toBe("?");
+    expect(formatBytes(-1)).toBe("?");
+  });
+
+  it("caps at TB for very large values", () => {
+    // Even if we exceed TB, we don't walk off the end of the units array.
+    const huge = 1024 ** 6; // exabyte-scale
+    expect(formatBytes(huge)).toMatch(/ TB$/);
+  });
+});
+
+describe("formatDuration", () => {
+  it("uses s/m/h units at the right boundaries", () => {
+    expect(formatDuration(0)).toBe("0s");
+    expect(formatDuration(59)).toBe("59s");
+    expect(formatDuration(60)).toBe("1m00s");
+    expect(formatDuration(61)).toBe("1m01s");
+    expect(formatDuration(3599)).toBe("59m59s");
+    expect(formatDuration(3600)).toBe("1h00m");
+    expect(formatDuration(3660)).toBe("1h01m");
+  });
+
+  it("rounds seconds to integers", () => {
+    expect(formatDuration(59.4)).toBe("59s");
+    expect(formatDuration(59.9)).toBe("1m00s");
+  });
+
+  it("returns '?' for non-finite and negative values", () => {
+    expect(formatDuration(NaN)).toBe("?");
+    expect(formatDuration(Infinity)).toBe("?");
+    expect(formatDuration(-1)).toBe("?");
+  });
+});
+
+describe("renderProgressLine", () => {
+  it("renders a known-size progress bar with percent, size, speed, and ETA", () => {
+    const line = renderProgressLine(1024, 2048, 512);
+    expect(line).toContain("50.0%");
+    expect(line).toContain("/");
+    expect(line).toContain("/s");
+    expect(line).toContain("eta");
+  });
+
+  it("hides the percent / ETA fields when total size is unknown (total=0)", () => {
+    const line = renderProgressLine(1024, 0, 512);
+    expect(line).not.toContain("%");
+    expect(line).not.toContain("eta");
+    expect(line).toContain("/s");
+  });
+
+  it("clamps percent at 100 if downloaded overshoots total (rounding)", () => {
+    const line = renderProgressLine(2050, 2048, 100);
+    expect(line).toContain("100.0%");
+  });
+
+  it("handles bytesPerSec = 0 by suppressing ETA", () => {
+    const line = renderProgressLine(512, 2048, 0);
+    expect(line).not.toContain("eta");
+  });
+});
+
+describe("envPort", () => {
+  const SAVED = process.env.__TEST_PORT;
+  beforeEach(() => {
+    delete process.env.__TEST_PORT;
+  });
+  afterEach(() => {
+    if (SAVED === undefined) delete process.env.__TEST_PORT;
+    else process.env.__TEST_PORT = SAVED;
+  });
+
+  it("returns the fallback when the env var is not set", () => {
+    expect(envPort("__TEST_PORT", 1234)).toBe(1234);
+  });
+
+  it("parses a valid integer value", () => {
+    process.env.__TEST_PORT = "9876";
+    expect(envPort("__TEST_PORT", 1234)).toBe(9876);
+  });
+
+  it("rejects zero and negative values", () => {
+    process.env.__TEST_PORT = "0";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+    process.env.__TEST_PORT = "-5";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+  });
+
+  it("rejects non-integer and non-numeric values", () => {
+    process.env.__TEST_PORT = "3.14";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+    process.env.__TEST_PORT = "not-a-port";
+    expect(() => envPort("__TEST_PORT", 1234)).toThrow(/Invalid __TEST_PORT/);
+  });
+
+  it("treats empty string as not set (returns fallback)", () => {
+    // Regression target: earlier versions sometimes parsed "" as 0 and threw.
+    process.env.__TEST_PORT = "";
+    expect(envPort("__TEST_PORT", 1234)).toBe(1234);
+  });
+});
+
+describe("resolveArch", () => {
+  it("accepts explicit arm64 / amd64", () => {
+    expect(resolveArch("arm64")).toBe("arm64");
+    expect(resolveArch("amd64")).toBe("amd64");
+  });
+
+  it("throws on unsupported explicit arch", () => {
+    expect(() => resolveArch("mips")).toThrow(/Invalid architecture/);
+    expect(() => resolveArch("x86")).toThrow(/Invalid architecture/);
+  });
+
+  it("maps the current process arch when raw is undefined", () => {
+    const expected = process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null;
+    if (expected === null) {
+      expect(() => resolveArch()).toThrow(/Invalid architecture/);
+    } else {
+      expect(resolveArch()).toBe(expected);
+    }
+  });
+});
+
+describe("platformInstallHint", () => {
+  it("uses brew on darwin and apt on linux", () => {
+    const spy = vi.spyOn(process, "platform", "get");
+    try {
+      spy.mockReturnValue("darwin");
+      expect(platformInstallHint("foo-linux", "foo-mac")).toContain("brew install foo-mac");
+      spy.mockReturnValue("linux");
+      expect(platformInstallHint("foo-linux", "foo-mac")).toContain("apt install foo-linux");
+      spy.mockReturnValue("win32");
+      expect(platformInstallHint("foo-linux", "foo-mac")).toContain("install foo-mac");
+    } finally {
+      spy.mockRestore();
+    }
+  });
+});
--- a/packages/stack-cli/src/commands/emulator.ts
+++ b/packages/stack-cli/src/commands/emulator.ts
@ -1,23 +1,43 @@
 import { Command } from "commander";
 import { execFileSync, spawn } from "child_process";
-import { existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
+import extract from "extract-zip";
+import { chmodSync, createWriteStream, existsSync, mkdirSync, readFileSync, renameSync, unlinkSync } from "fs";
 import { homedir } from "os";
 import { dirname, join, resolve } from "path";
+import { Readable } from "stream";
+import { pipeline } from "stream/promises";
 import { fileURLToPath } from "url";
 import { CliError } from "../lib/errors.js";
+import { writeIso } from "../lib/iso.js";

 const DEFAULT_EMULATOR_BACKEND_PORT = 26701;
+const DEFAULT_EMULATOR_DASHBOARD_PORT = 26700;
+const DEFAULT_EMULATOR_MINIO_PORT = 26702;
+const DEFAULT_EMULATOR_INBUCKET_PORT = 26703;
+const DEFAULT_PORT_PREFIX = "81";
+const GITHUB_API = "https://api.github.com";
+const DEFAULT_REPO = "stack-auth/stack-auth";
+const AARCH64_FIRMWARE_PATHS = [
+  "/opt/homebrew/share/qemu/edk2-aarch64-code.fd",
+  "/usr/share/qemu/edk2-aarch64-code.fd",
+  "/usr/share/AAVMF/AAVMF_CODE.fd",
+  "/usr/share/qemu-efi-aarch64/QEMU_EFI.fd",
+];

-function emulatorBackendPort(): number {
-  const raw = process.env.EMULATOR_BACKEND_PORT;
-  if (!raw) return DEFAULT_EMULATOR_BACKEND_PORT;
+export function envPort(name: string, fallback: number): number {
+  const raw = process.env[name];
+  if (!raw) return fallback;
  const parsed = Number(raw);
  if (!Number.isInteger(parsed) || parsed <= 0) {
-    throw new CliError(`Invalid EMULATOR_BACKEND_PORT: ${raw}`);
+    throw new CliError(`Invalid ${name}: ${raw}`);
  }
  return parsed;
 }

+function emulatorBackendPort(): number {
+  return envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT);
+}
+
 function emulatorHome(): string {
  return process.env.STACK_EMULATOR_HOME ?? join(homedir(), ".stack", "emulator");
 }
@ -37,11 +57,13 @@ function internalPckPath(): string {
 async function readInternalPck(timeoutMs = 60_000): Promise<string> {
  const path = internalPckPath();
  const deadline = Date.now() + timeoutMs;
-  let delay = 250;
+  let delay = 50;
  while (Date.now() < deadline) {
-    if (existsSync(path)) {
+    try {
      const contents = readFileSync(path, "utf-8").trim();
      if (contents) return contents;
+    } catch (e) {
+      if ((e as NodeJS.ErrnoException).code !== "ENOENT") throw e;
    }
    await new Promise((r) => setTimeout(r, delay));
    delay = Math.min(delay * 2, 2000);
@ -82,26 +104,71 @@ async function fetchEmulatorCredentials(pck: string, backendPort: number, config
  };
 }

-function gh(args: string[]): string {
+// Resolve a GitHub auth token. We try GITHUB_TOKEN first so users can pin a
+// PAT, then fall back to `gh auth token` if the gh CLI is installed and
+// signed in. If neither works we return undefined — public release downloads
+// still work (anonymous, lower rate limit) but artifact downloads fail with a
+// clear error at the call site.
+function githubToken(): string | undefined {
+  if (process.env.GITHUB_TOKEN) return process.env.GITHUB_TOKEN;
  try {
-    return execFileSync("gh", args, { encoding: "utf-8", stdio: ["pipe", "pipe", "pipe"] }).trim();
-  } catch (err: unknown) {
-    if (err instanceof Error && "stderr" in err && typeof err.stderr === "string") {
-      throw new CliError(`GitHub CLI error: ${err.stderr}`);
-    }
-    throw new CliError("GitHub CLI (gh) is required. Install: https://cli.github.com/");
+    const out = execFileSync("gh", ["auth", "token"], {
+      encoding: "utf-8",
+      stdio: ["pipe", "pipe", "pipe"],
+    }).trim();
+    return out || undefined;
+  } catch {
+    return undefined;
  }
 }

+async function ghApi<T>(path: string): Promise<T> {
+  const token = githubToken();
+  const headers: Record<string, string> = {
+    Accept: "application/vnd.github+json",
+    "X-GitHub-Api-Version": "2022-11-28",
+  };
+  if (token) headers.Authorization = `Bearer ${token}`;
+  const res = await fetch(`${GITHUB_API}${path}`, { headers });
+  if (!res.ok) {
+    const body = await res.text().catch(() => "");
+    const hint = res.status === 401 || res.status === 403
+      ? " (set GITHUB_TOKEN or run `gh auth login` for higher rate limits / private access)"
+      : "";
+    throw new CliError(`GitHub API ${res.status} ${res.statusText} for ${path}${hint}${body ? `: ${body.slice(0, 300)}` : ""}`);
+  }
+  return await (res.json() as Promise<T>);
+}
+
 function emulatorScriptsDir(): string {
  const here = dirname(fileURLToPath(import.meta.url));
  const bundled = join(here, "emulator");
-  if (existsSync(join(bundled, "run-emulator.sh"))) return bundled;
+  if (existsSync(join(bundled, "run-emulator.sh"))) return ensureExecutable(bundled);
  const repo = resolve(here, "../../../docker/local-emulator/qemu");
-  if (existsSync(join(repo, "run-emulator.sh"))) return repo;
+  if (existsSync(join(repo, "run-emulator.sh"))) return ensureExecutable(repo);
  throw new CliError("Emulator scripts not found in CLI bundle.");
 }

+// npm pack strips the execute bit from non-`bin` files, so restore it here.
+function ensureExecutable(scriptsDir: string): string {
+  try {
+    chmodSync(join(scriptsDir, "run-emulator.sh"), 0o755);
+  } catch {
+    // best-effort
+  }
+  return scriptsDir;
+}
+
+function baseEnvPath(): string {
+  // Lives one directory up from the scripts dir in both bundled and repo
+  // layouts (dist/.env.development vs docker/local-emulator/.env.development).
+  const path = resolve(emulatorScriptsDir(), "..", ".env.development");
+  if (!existsSync(path)) {
+    throw new CliError(`Emulator base.env not found at ${path}`);
+  }
+  return path;
+}
+
 function emulatorSpawnEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
  return {
    ...process.env,
@ -111,6 +178,33 @@ function emulatorSpawnEnv(extra?: Record<string, string>): NodeJS.ProcessEnv {
  };
 }

+// Generate the runtime config ISO that the VM mounts via STACKCFG. Replaces
+// the hdiutil/mkisofs/genisoimage host dep — see ../lib/iso.ts.
+function prepareRuntimeConfigIso(): void {
+  const vmDir = join(emulatorRunDir(), "vm");
+  mkdirSync(vmDir, { recursive: true });
+  const portPrefix = process.env.PORT_PREFIX ?? process.env.NEXT_PUBLIC_STACK_PORT_PREFIX ?? DEFAULT_PORT_PREFIX;
+  const dashboardPort = envPort("EMULATOR_DASHBOARD_PORT", DEFAULT_EMULATOR_DASHBOARD_PORT);
+  const backendPort = envPort("EMULATOR_BACKEND_PORT", DEFAULT_EMULATOR_BACKEND_PORT);
+  const minioPort = envPort("EMULATOR_MINIO_PORT", DEFAULT_EMULATOR_MINIO_PORT);
+  const inbucketPort = envPort("EMULATOR_INBUCKET_PORT", DEFAULT_EMULATOR_INBUCKET_PORT);
+
+  const runtimeEnv = [
+    `STACK_EMULATOR_PORT_PREFIX=${portPrefix}`,
+    `STACK_EMULATOR_DASHBOARD_HOST_PORT=${dashboardPort}`,
+    `STACK_EMULATOR_BACKEND_HOST_PORT=${backendPort}`,
+    `STACK_EMULATOR_MINIO_HOST_PORT=${minioPort}`,
+    `STACK_EMULATOR_INBUCKET_HOST_PORT=${inbucketPort}`,
+    `STACK_EMULATOR_VM_DIR_HOST=${vmDir}`,
+    "",
+  ].join("\n");
+  const baseEnv = readFileSync(baseEnvPath());
+  writeIso(join(vmDir, "runtime-config.iso"), "STACKCFG", [
+    { name: "runtime.env", data: Buffer.from(runtimeEnv, "utf-8") },
+    { name: "base.env", data: baseEnv },
+  ]);
+}
+
 function runEmulator(action: string, env?: Record<string, string>): Promise<void> {
  const scriptsDir = emulatorScriptsDir();
  mkdirSync(emulatorRunDir(), { recursive: true });
@ -141,82 +235,327 @@ function isEmulatorRunning(): boolean {
 }

 async function startEmulator(arch: "arm64" | "amd64"): Promise<void> {
-  mkdirSync(emulatorImageDir(), { recursive: true });
  const img = join(emulatorImageDir(), `stack-emulator-${arch}.qcow2`);
  if (!existsSync(img)) {
    console.log("No emulator image found. Pulling latest...");
-    pullRelease(arch);
+    await pullRelease(arch);
+    // Capture now so this and all subsequent starts resume fast. Skipping it
+    // would cold-boot today plus every future start (we never auto-capture).
+    await captureLocalSnapshot(arch);
  }
-  await runEmulator("start", { EMULATOR_ARCH: arch });
+  prepareRuntimeConfigIso();
+  // Signal to run-emulator.sh that runtime-config.iso was written by the CLI
+  // via lib/iso.ts; the shell's ensure_runtime_config_iso should trust it and
+  // skip its own regeneration (which would otherwise require the
+  // hdiutil/mkisofs/genisoimage host dep the TS writer replaces).
+  await runEmulator("start", { EMULATOR_ARCH: arch, STACK_EMULATOR_CLI_WROTE_ISO: "1" });
 }

-function resolveArch(raw?: string): "arm64" | "amd64" {
+export function resolveArch(raw?: string): "arm64" | "amd64" {
  const arch = raw ?? (process.arch === "arm64" ? "arm64" : process.arch === "x64" ? "amd64" : null);
  if (arch === "arm64" || arch === "amd64") return arch;
  throw new CliError(`Invalid architecture: ${raw ?? process.arch}. Expected arm64 or amd64.`);
 }

-function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string; branch?: string; tag?: string } = {}) {
-  const repo = opts.repo ?? "stack-auth/stack-auth";
+type ReleaseAsset = { name: string, url: string, size: number };
+type ReleaseResponse = { assets: ReleaseAsset[] };
+
+async function pullRelease(arch: "arm64" | "amd64", opts: { repo?: string, branch?: string, tag?: string } = {}) {
+  const repo = opts.repo ?? DEFAULT_REPO;
  const branch = opts.branch ?? "dev";
  const tag = opts.tag ?? `emulator-${branch}-latest`;
-  const asset = `stack-emulator-${arch}.qcow2`;
  const imageDir = emulatorImageDir();
  mkdirSync(imageDir, { recursive: true });
+
+  const diskAsset = `stack-emulator-${arch}.qcow2`;
+
+  const release = await ghApi<ReleaseResponse>(`/repos/${repo}/releases/tags/${tag}`);
+  const diskMatch = release.assets.find((a) => a.name === diskAsset);
+  if (!diskMatch) {
+    throw new CliError(`Asset ${diskAsset} not found in release ${tag}. Run 'stack emulator list-releases' to see available releases.`);
+  }
+  const token = githubToken();
+  await downloadReleaseAsset(diskMatch, imageDir, diskAsset, token, tag);
+}
+
+// Cold-boot the VM, wait for services, capture a snapshot via QMP, compress,
+// stop. Runs once per qcow2 download so subsequent `stack emulator start`s
+// resume in ~3-8s. Snapshots are always captured on the user's own machine
+// because QEMU migration state isn't portable across accelerators
+// (KVM/HVF/TCG) or `-cpu max` feature sets.
+async function captureLocalSnapshot(arch: "arm64" | "amd64"): Promise<void> {
+  preflightForVmStart("pull", arch);
+  prepareRuntimeConfigIso();
+  console.log("Capturing local snapshot (first-time, ~1-3 min cold boot + capture)...");
+  await runEmulator("capture", { EMULATOR_ARCH: arch });
+}
+
+async function downloadReleaseAsset(
+  match: ReleaseAsset,
+  imageDir: string,
+  asset: string,
+  token: string | undefined,
+  tag: string,
+): Promise<void> {
  const dest = join(imageDir, asset);
  const tmpDest = `${dest}.download`;
-
  console.log(`Pulling ${asset} from release ${tag}...`);
+  const headers: Record<string, string> = { Accept: "application/octet-stream" };
+  if (token) headers.Authorization = `Bearer ${token}`;
  try {
-    execFileSync("gh", ["release", "download", tag, "--repo", repo, "--pattern", asset, "--output", tmpDest, "--clobber"], { stdio: "inherit" });
+    await downloadWithProgress(match.url, headers, tmpDest, match.size);
  } catch (err) {
    if (existsSync(tmpDest)) unlinkSync(tmpDest);
-    throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}\nRun 'stack emulator list-releases' to see available releases.`);
+    if (err instanceof CliError) throw err;
+    throw new CliError(`Failed to download ${asset} from release ${tag}: ${err instanceof Error ? err.message : err}`);
  }
  renameSync(tmpDest, dest);
  console.log(`Downloaded: ${dest}`);
 }

+async function downloadWithProgress(url: string, headers: Record<string, string>, dest: string, totalBytes?: number): Promise<void> {
+  const res = await fetch(url, { headers, redirect: "follow" });
+  if (!res.ok || !res.body) {
+    throw new CliError(`Download failed (${res.status} ${res.statusText}): ${url}`);
+  }
+  const total = totalBytes ?? (Number(res.headers.get("content-length")) || 0);
+  const isTty = Boolean(process.stderr.isTTY);
+  const startedAt = Date.now();
+  let downloaded = 0;
+  let lastRender = 0;
+
+  const render = (final: boolean) => {
+    const now = Date.now();
+    if (!final && now - lastRender < 100) return;
+    lastRender = now;
+    const elapsed = Math.max(0.001, (now - startedAt) / 1000);
+    const speed = downloaded / elapsed;
+    const line = renderProgressLine(downloaded, total, speed);
+    if (isTty) {
+      process.stderr.write(`\r\x1b[2K${line}`);
+    } else if (final) {
+      process.stderr.write(`${line}\n`);
+    }
+  };
+
+  const body = Readable.fromWeb(res.body as Parameters<typeof Readable.fromWeb>[0]);
+  body.on("data", (chunk: Buffer) => {
+    downloaded += chunk.byteLength;
+    render(false);
+  });
+  await pipeline(body, createWriteStream(dest));
+  render(true);
+  if (isTty) process.stderr.write("\n");
+}
+
+export function renderProgressLine(downloaded: number, total: number, bytesPerSec: number): string {
+  const barWidth = 30;
+  const pct = total > 0 ? Math.min(100, (downloaded / total) * 100) : 0;
+  const filled = total > 0 ? Math.round((downloaded / total) * barWidth) : 0;
+  const bar = "█".repeat(filled) + "░".repeat(Math.max(0, barWidth - filled));
+  const pctStr = total > 0 ? `${pct.toFixed(1).padStart(5)}%` : "  ?  ";
+  const sizeStr = total > 0 ? `${formatBytes(downloaded)}/${formatBytes(total)}` : formatBytes(downloaded);
+  const speedStr = `${formatBytes(bytesPerSec)}/s`;
+  const etaStr = total > 0 && bytesPerSec > 0 ? `  eta ${formatDuration((total - downloaded) / bytesPerSec)}` : "";
+  return `  [${bar}] ${pctStr}  ${sizeStr}  ${speedStr}${etaStr}`;
+}
+
+export function formatBytes(bytes: number): string {
+  if (!Number.isFinite(bytes) || bytes < 0) return "?";
+  const units = ["B", "KB", "MB", "GB", "TB"];
+  let v = bytes;
+  let i = 0;
+  while (v >= 1024 && i < units.length - 1) {
+    v /= 1024;
+    i++;
+  }
+  return `${v.toFixed(v < 10 && i > 0 ? 1 : 0)} ${units[i]}`;
+}
+
+export function formatDuration(seconds: number): string {
+  if (!Number.isFinite(seconds) || seconds < 0) return "?";
+  const s = Math.round(seconds);
+  if (s < 60) return `${s}s`;
+  const m = Math.floor(s / 60);
+  const rs = s % 60;
+  if (m < 60) return `${m}m${rs.toString().padStart(2, "0")}s`;
+  const h = Math.floor(m / 60);
+  const rm = m % 60;
+  return `${h}h${rm.toString().padStart(2, "0")}m`;
+}
+
+// --- Dependency preflight ---------------------------------------------------
+
+type BinarySpec = { name: string, install: string };
+
+function commandExists(bin: string): boolean {
+  try {
+    execFileSync(process.platform === "win32" ? "where" : "which", [bin], { stdio: "pipe" });
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+export function platformInstallHint(linuxPkg: string, macPkg: string): string {
+  switch (process.platform) {
+    case "darwin": {
+      return `brew install ${macPkg}`;
+    }
+    case "linux": {
+      return `apt install ${linuxPkg} (or your distro's equivalent)`;
+    }
+    default: {
+      return `install ${macPkg}`;
+    }
+  }
+}
+
+function bin(name: string, linuxPkg: string, macPkg: string): BinarySpec {
+  return { name, install: platformInstallHint(linuxPkg, macPkg) };
+}
+
+function requireBinaries(commandName: string, bins: BinarySpec[]): void {
+  const missing = bins.filter((b) => !commandExists(b.name));
+  if (missing.length === 0) return;
+  const lines = missing.map((b) => `  - ${b.name}  →  ${b.install}`);
+  throw new CliError(
+    `\`stack emulator ${commandName}\` requires the following missing binaries:\n${lines.join("\n")}`,
+  );
+}
+
+function warnIfMissing(commandName: string, bins: BinarySpec[]): void {
+  const missing = bins.filter((b) => !commandExists(b.name));
+  if (missing.length === 0) return;
+  for (const b of missing) {
+    console.warn(`[stack emulator ${commandName}] optional dep '${b.name}' missing — feature degraded. Install: ${b.install}`);
+  }
+}
+
+function aarch64FirmwareAvailable(): boolean {
+  return AARCH64_FIRMWARE_PATHS.some((p) => existsSync(p));
+}
+
+function commonVmBins(): BinarySpec[] {
+  return [
+    bin("qemu-img", "qemu-utils", "qemu"),
+    bin("socat", "socat", "socat"),
+    bin("curl", "curl", "curl"),
+    bin("nc", "ncat", "netcat"),
+    bin("lsof", "lsof", "lsof"),
+    bin("openssl", "openssl", "openssl"),
+  ];
+}
+
+function archSpecificQemuBin(arch: "arm64" | "amd64"): BinarySpec {
+  if (arch === "arm64") {
+    return bin("qemu-system-aarch64", "qemu-system-arm", "qemu");
+  }
+  return bin("qemu-system-x86_64", "qemu-system-x86", "qemu");
+}
+
+function preflightForVmStart(commandName: string, arch: "arm64" | "amd64"): void {
+  requireBinaries(commandName, [archSpecificQemuBin(arch), ...commonVmBins()]);
+  warnIfMissing(commandName, [bin("zstd", "zstd", "zstd")]);
+  if (arch === "arm64" && !aarch64FirmwareAvailable()) {
+    throw new CliError(
+      `aarch64 UEFI firmware not found. Looked in:\n${AARCH64_FIRMWARE_PATHS.map((p) => `  - ${p}`).join("\n")}\n` +
+      `Install: ${platformInstallHint("qemu-efi-aarch64", "qemu")}`,
+    );
+  }
+}
+
+// --- Workflow run / artifact downloads (replaces `gh run download`) ---------
+
+type WorkflowRunsResponse = { workflow_runs: { id: number }[] };
+type ArtifactsResponse = { artifacts: { id: number, name: string, size_in_bytes: number }[] };
+type PullResponse = { head: { ref: string } };
+
+async function downloadArtifactByName(repo: string, runId: string, name: string, destDir: string): Promise<boolean> {
+  const token = githubToken();
+  if (!token) {
+    throw new CliError(
+      "Downloading workflow run artifacts requires authentication. Set GITHUB_TOKEN or run `gh auth login`.",
+    );
+  }
+  const list = await ghApi<ArtifactsResponse>(`/repos/${repo}/actions/runs/${runId}/artifacts?per_page=100`);
+  const match = list.artifacts.find((a) => a.name === name);
+  if (!match) return false;
+  const zipPath = join(destDir, `${name}.zip`);
+  console.log(`Downloading artifact '${name}' from run ${runId}...`);
+  await downloadWithProgress(
+    `${GITHUB_API}/repos/${repo}/actions/artifacts/${match.id}/zip`,
+    { Accept: "application/vnd.github+json", Authorization: `Bearer ${token}` },
+    zipPath,
+    match.size_in_bytes,
+  );
+  await extract(zipPath, { dir: destDir });
+  unlinkSync(zipPath);
+  return true;
+}
+
 export function registerEmulatorCommand(program: Command) {
  const emulator = program.command("emulator").description("Manage the QEMU local emulator");

  emulator
    .command("pull")
-    .description("Download an emulator image from GitHub Releases or a PR build")
+    .description("Download an emulator image from GitHub Releases or a PR build, then capture a local fast-start snapshot")
    .option("--arch <arch>", "Target architecture (default: current system arch)")
    .option("--branch <branch>", "Release branch (default: dev)")
    .option("--tag <tag>", "Specific release tag (default: latest)")
    .option("--repo <repo>", "GitHub repository (default: stack-auth/stack-auth)")
    .option("--pr <number>", "Pull from a PR's CI artifacts")
    .option("--run <id>", "Pull from a specific workflow run's artifacts")
-    .action(async (opts) => {
+    .option("--skip-snapshot", "Download only the qcow2; skip the one-time local snapshot capture")
+    .action(async (opts: { arch?: string, repo?: string, branch?: string, tag?: string, pr?: string, run?: string, skipSnapshot?: boolean }) => {
      const arch = resolveArch(opts.arch);
-      const repo = opts.repo ?? "stack-auth/stack-auth";
+      const repo = opts.repo ?? DEFAULT_REPO;

      if (opts.run || opts.pr) {
-        let runId = opts.run as string | undefined;
+        let runId = opts.run;
        if (!runId) {
          console.log(`Finding latest successful build for PR #${opts.pr}...`);
-          const { headRefName } = JSON.parse(gh(["pr", "view", opts.pr, "--repo", repo, "--json", "headRefName"]));
-          const runs = JSON.parse(gh(["run", "list", "--repo", repo, "--workflow", "qemu-emulator-build.yaml", "--branch", headRefName, "--status", "success", "--limit", "1", "--json", "databaseId"]));
-          if (runs.length === 0) throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`);
-          runId = String(runs[0].databaseId);
+          const pr = await ghApi<PullResponse>(`/repos/${repo}/pulls/${opts.pr}`);
+          const headRefName = pr.head.ref;
+          const runs = await ghApi<WorkflowRunsResponse>(
+            `/repos/${repo}/actions/workflows/qemu-emulator-build.yaml/runs?branch=${encodeURIComponent(headRefName)}&status=success&per_page=1`,
+          );
+          if (runs.workflow_runs.length === 0) {
+            throw new CliError(`No successful build found for PR #${opts.pr} (branch: ${headRefName}).`);
+          }
+          runId = String(runs.workflow_runs[0].id);
        }

        const imageDir = emulatorImageDir();
        mkdirSync(imageDir, { recursive: true });
        const dest = join(imageDir, `stack-emulator-${arch}.qcow2`);
+        const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
+        const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`);
        if (existsSync(dest)) unlinkSync(dest);
-        console.log(`Downloading qemu-emulator-${arch} from workflow run ${runId}...`);
-        try {
-          execFileSync("gh", ["run", "download", runId, "--repo", repo, "--name", `qemu-emulator-${arch}`, "--dir", imageDir], { stdio: "inherit" });
-        } catch (err) {
-          throw new CliError(`Failed to download artifact from run ${runId}: ${err instanceof Error ? err.message : err}`);
+        // Stale snapshots from a previous pull would resume against the new
+        // qcow2 and crash; wipe them so capture rebuilds cleanly.
+        if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
+        if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest);
+        const downloaded = await downloadArtifactByName(repo, runId, `qemu-emulator-${arch}`, imageDir);
+        if (!downloaded) {
+          throw new CliError(`Artifact qemu-emulator-${arch} not found in workflow run ${runId}.`);
        }
        if (!existsSync(dest)) throw new CliError(`Expected image not found at ${dest} after download.`);
        console.log(`Downloaded: ${dest}`);
      } else {
-        pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
+        // Same stale-snapshot concern as the PR branch above.
+        const imageDir = emulatorImageDir();
+        const snapshotDest = join(imageDir, `stack-emulator-${arch}.savevm.zst`);
+        const snapshotRawDest = join(imageDir, `stack-emulator-${arch}.savevm.raw`);
+        if (existsSync(snapshotDest)) unlinkSync(snapshotDest);
+        if (existsSync(snapshotRawDest)) unlinkSync(snapshotRawDest);
+        await pullRelease(arch, { repo, branch: opts.branch, tag: opts.tag });
+      }
+
+      if (opts.skipSnapshot) {
+        console.log("--skip-snapshot: not capturing a local snapshot. First `stack emulator start` will cold-boot.");
+      } else {
+        await captureLocalSnapshot(arch);
      }
    });

@ -227,6 +566,7 @@ export function registerEmulatorCommand(program: Command) {
    .option("--config-file <path>", "Path to a config file; when set, credentials for this project are printed to stdout as JSON")
    .action(async (opts: { arch?: string, configFile?: string }) => {
      const arch = resolveArch(opts.arch);
+      preflightForVmStart("start", arch);

      let resolvedConfigFile: string | undefined;
      if (opts.configFile) {
@ -257,6 +597,7 @@ export function registerEmulatorCommand(program: Command) {
    .option("--config-file <path>", "Path to a config file; fetches credentials and injects STACK_PROJECT_ID / STACK_PUBLISHABLE_CLIENT_KEY / STACK_SECRET_SERVER_KEY into the child")
    .action(async (cmd: string, opts: { arch?: string, configFile?: string }) => {
      const arch = resolveArch(opts.arch);
+      preflightForVmStart("run", arch);

      let resolvedConfigFile: string | undefined;
      if (opts.configFile) {
@ -281,11 +622,17 @@ export function registerEmulatorCommand(program: Command) {
        const apiUrl = `http://127.0.0.1:${backendPort}`;
        childEnv.STACK_PROJECT_ID = creds.project_id;
        childEnv.NEXT_PUBLIC_STACK_PROJECT_ID = creds.project_id;
+        childEnv.VITE_STACK_PROJECT_ID = creds.project_id;
+        childEnv.EXPO_PUBLIC_STACK_PROJECT_ID = creds.project_id;
        childEnv.STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
        childEnv.NEXT_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
+        childEnv.VITE_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
+        childEnv.EXPO_PUBLIC_STACK_PUBLISHABLE_CLIENT_KEY = creds.publishable_client_key;
        childEnv.STACK_SECRET_SERVER_KEY = creds.secret_server_key;
        childEnv.STACK_API_URL = apiUrl;
        childEnv.NEXT_PUBLIC_STACK_API_URL = apiUrl;
+        childEnv.VITE_STACK_API_URL = apiUrl;
+        childEnv.EXPO_PUBLIC_STACK_API_URL = apiUrl;
      }

      const child = spawn(cmd, { shell: true, stdio: "inherit", env: childEnv });
@ -304,25 +651,61 @@ export function registerEmulatorCommand(program: Command) {
          process.exit(exitCode);
        } else {
          console.log("\nStopping emulator...");
+          const warnStopFailed = (e: unknown) => {
+            const msg = e instanceof Error ? e.message : String(e);
+            process.stderr.write(`Failed to stop emulator cleanly: ${msg}\n`);
+          };
          runEmulator("stop")
-            .catch(() => { /* best-effort stop */ })
+            .catch(warnStopFailed)
            .finally(() => process.exit(exitCode));
        }
      });
    });

-  emulator.command("stop").description("Stop the emulator (data preserved; use 'reset' to clear)").action(() => runEmulator("stop"));
-  emulator.command("reset").description("Reset emulator state for a fresh boot").action(() => runEmulator("reset"));
-  emulator.command("status").description("Show emulator and service health").action(() => runEmulator("status"));
+  emulator
+    .command("stop")
+    .description("Stop the emulator (data preserved; use 'reset' to clear)")
+    .action(() => {
+      requireBinaries("stop", [bin("socat", "socat", "socat")]);
+      return runEmulator("stop");
+    });
+
+  emulator
+    .command("reset")
+    .description("Reset emulator state for a fresh boot")
+    .action(() => {
+      requireBinaries("reset", [bin("socat", "socat", "socat")]);
+      return runEmulator("reset");
+    });
+
+  emulator
+    .command("status")
+    .description("Show emulator and service health")
+    .action(() => {
+      requireBinaries("status", [
+        bin("curl", "curl", "curl"),
+        bin("nc", "ncat", "netcat"),
+      ]);
+      return runEmulator("status");
+    });

  emulator
    .command("list-releases")
    .description("List available emulator releases")
    .option("--repo <repo>", "GitHub repository (default: stack-auth/stack-auth)")
-    .action((opts) => {
-      const repo = opts.repo ?? "stack-auth/stack-auth";
+    .action(async (opts) => {
+      const repo = opts.repo ?? DEFAULT_REPO;
      console.log(`Available emulator releases from ${repo}:\n`);
-      const lines = gh(["release", "list", "--repo", repo, "--limit", "20"]).split("\n").filter((l) => l.toLowerCase().includes("emulator"));
+      type Release = { tag_name: string, name: string | null, published_at: string | null, draft: boolean, prerelease: boolean };
+      const releases = await ghApi<Release[]>(`/repos/${repo}/releases?per_page=50`);
+      const lines = releases
+        .filter((r) => (r.tag_name + " " + (r.name ?? "")).toLowerCase().includes("emulator"))
+        .slice(0, 20)
+        .map((r) => {
+          const status = r.draft ? "Draft" : r.prerelease ? "Pre-release" : "Latest";
+          const date = r.published_at ? r.published_at.slice(0, 10) : "";
+          return `${r.tag_name}\t${status}\t${date}`;
+        });
      if (lines.length === 0) console.log("No emulator releases found.");
      else for (const line of lines) console.log(line);
    });
--- a/packages/stack-cli/src/lib/iso.test.ts
+++ b/packages/stack-cli/src/lib/iso.test.ts
@ -0,0 +1,259 @@
+import { describe, expect, it } from "vitest";
+import { buildIso, type IsoFile } from "./iso.js";
+
+const SECTOR = 2048;
+
+// --- Test helpers: a minimal ISO 9660 parser, just enough to walk the
+// directory records we produce so tests can assert the bytes we emitted really
+// are addressable at the offsets claimed in the directory records.
+
+function readSector(iso: Buffer, sector: number): Buffer {
+  return iso.subarray(sector * SECTOR, (sector + 1) * SECTOR);
+}
+
+function readVolumeDescriptor(iso: Buffer, sector: number): { type: number, id: string } {
+  const buf = readSector(iso, sector);
+  return { type: buf[0], id: buf.toString("ascii", 1, 6) };
+}
+
+type DirRecord = {
+  lenDr: number,
+  extentSector: number,
+  dataLength: number,
+  isDir: boolean,
+  fileId: Buffer,
+};
+
+function parseDirRecords(sector: Buffer): DirRecord[] {
+  const records: DirRecord[] = [];
+  let offset = 0;
+  while (offset < sector.length) {
+    const lenDr = sector[offset];
+    if (lenDr === 0) break;
+    const extentSector = sector.readUInt32LE(offset + 2);
+    const dataLength = sector.readUInt32LE(offset + 10);
+    const flags = sector[offset + 25];
+    const lenFi = sector[offset + 32];
+    const fileId = sector.subarray(offset + 33, offset + 33 + lenFi);
+    records.push({
+      lenDr,
+      extentSector,
+      dataLength,
+      isDir: (flags & 0x02) !== 0,
+      fileId: Buffer.from(fileId),
+    });
+    offset += lenDr;
+  }
+  return records;
+}
+
+// Follow PVD → root dir → pull file bytes by ISO-9660 name ("NAME.EXT;1").
+function readIsoFile(iso: Buffer, isoName: string): Buffer | null {
+  const pvd = readSector(iso, 16);
+  const rootSector = pvd.readUInt32LE(156 + 2);
+  const rootRecords = parseDirRecords(readSector(iso, rootSector));
+  const match = rootRecords.find((r) => r.fileId.toString("ascii") === isoName);
+  if (!match) return null;
+  const start = match.extentSector * SECTOR;
+  return iso.subarray(start, start + match.dataLength);
+}
+
+// Same, but follow the Joliet SVD (so names are UCS-2 BE).
+function readJolietFile(iso: Buffer, name: string): Buffer | null {
+  const svd = readSector(iso, 17);
+  if (svd[0] !== 2) return null;
+  const rootSector = svd.readUInt32LE(156 + 2);
+  const rootRecords = parseDirRecords(readSector(iso, rootSector));
+  const expected = Buffer.alloc(name.length * 2);
+  for (let i = 0; i < name.length; i++) expected.writeUInt16BE(name.charCodeAt(i), i * 2);
+  const match = rootRecords.find((r) => r.fileId.equals(expected));
+  if (!match) return null;
+  const start = match.extentSector * SECTOR;
+  return iso.subarray(start, start + match.dataLength);
+}
+
+function sampleFile(name: string, size: number, byte = 0x41): IsoFile {
+  return { name, data: Buffer.alloc(size, byte) };
+}
+
+describe("buildIso — structural invariants", () => {
+  it("emits the ISO 9660 standard identifiers at sectors 16, 17, 18", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hi") }]);
+    expect(readVolumeDescriptor(iso, 16)).toEqual({ type: 1, id: "CD001" });
+    expect(readVolumeDescriptor(iso, 17)).toEqual({ type: 2, id: "CD001" });
+    expect(readVolumeDescriptor(iso, 18)).toEqual({ type: 0xff, id: "CD001" });
+  });
+
+  it("stores the volume identifier verbatim in the PVD for blkid discovery", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
+    const pvd = readSector(iso, 16);
+    expect(pvd.toString("ascii", 40, 40 + 8)).toBe("STACKCFG");
+  });
+
+  it("stores the volume identifier in the Joliet SVD as UCS-2 BE", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
+    const svd = readSector(iso, 17);
+    const ucs = svd.subarray(40, 40 + 16);
+    let decoded = "";
+    for (let i = 0; i < ucs.length; i += 2) decoded += String.fromCharCode(ucs.readUInt16BE(i));
+    expect(decoded).toBe("STACKCFG");
+  });
+
+  it("sets the Joliet escape sequence %/E", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("x") }]);
+    const svd = readSector(iso, 17);
+    expect(svd[88]).toBe(0x25);
+    expect(svd[89]).toBe(0x2f);
+    expect(svd[90]).toBe(0x45);
+  });
+
+  it("declares a volume space size equal to the emitted sector count", () => {
+    const iso = buildIso("STACKCFG", [{ name: "a.txt", data: Buffer.from("hello world") }]);
+    const pvd = readSector(iso, 16);
+    const declared = pvd.readUInt32LE(80);
+    expect(iso.length).toBe(declared * SECTOR);
+  });
+});
+
+describe("buildIso — file round-trip", () => {
+  it("makes files readable by ISO 9660 name", () => {
+    const iso = buildIso("STACKCFG", [
+      { name: "runtime.env", data: Buffer.from("KEY=value\n") },
+      { name: "base.env", data: Buffer.from("FOO=bar\n") },
+    ]);
+    expect(readIsoFile(iso, "RUNTIME.ENV;1")?.toString()).toBe("KEY=value\n");
+    expect(readIsoFile(iso, "BASE.ENV;1")?.toString()).toBe("FOO=bar\n");
+  });
+
+  it("makes files readable by Joliet (lowercase) name", () => {
+    const iso = buildIso("STACKCFG", [
+      { name: "runtime.env", data: Buffer.from("KEY=value\n") },
+      { name: "base.env", data: Buffer.from("FOO=bar\n") },
+    ]);
+    expect(readJolietFile(iso, "runtime.env")?.toString()).toBe("KEY=value\n");
+    expect(readJolietFile(iso, "base.env")?.toString()).toBe("FOO=bar\n");
+  });
+
+  it("preserves exact file contents byte-for-byte", () => {
+    const content = Buffer.from([0x00, 0xff, 0x7f, 0x80, 0x41, 0x42, 0x43]);
+    const iso = buildIso("STACKCFG", [{ name: "bin.dat", data: content }]);
+    expect(readJolietFile(iso, "bin.dat")?.equals(content)).toBe(true);
+  });
+
+  it("handles files whose length is exactly one sector", () => {
+    const content = Buffer.alloc(SECTOR, 0x37);
+    const iso = buildIso("STACKCFG", [{ name: "one.bin", data: content }]);
+    expect(readJolietFile(iso, "one.bin")?.equals(content)).toBe(true);
+  });
+
+  it("handles files that span multiple sectors", () => {
+    const content = Buffer.alloc(SECTOR * 3 + 17, 0x55);
+    const iso = buildIso("STACKCFG", [{ name: "big.bin", data: content }]);
+    expect(readJolietFile(iso, "big.bin")?.equals(content)).toBe(true);
+  });
+
+  it("keeps files byte-exact at the claimed extent sector across multi-file layouts", () => {
+    // Fingerprint each file so we can tell them apart even if extents shift.
+    const files: IsoFile[] = [
+      { name: "alpha.bin", data: Buffer.alloc(SECTOR + 5, 0xaa) },
+      { name: "beta.bin", data: Buffer.alloc(SECTOR * 2, 0xbb) },
+      { name: "gamma.bin", data: Buffer.alloc(42, 0xcc) },
+    ];
+    const iso = buildIso("STACKCFG", files);
+    for (const f of files) {
+      expect(readJolietFile(iso, f.name)?.equals(f.data)).toBe(true);
+    }
+  });
+});
+
+describe("buildIso — edge cases", () => {
+  it("handles empty files without misaligning subsequent file extents", () => {
+    // Regression: `padToSector(Buffer.alloc(0))` used to return a 0-byte
+    // buffer, but the layout reserved 1 sector for the empty file — the next
+    // file was then read from the empty file's reserved slot.
+    const files: IsoFile[] = [
+      { name: "empty.txt", data: Buffer.alloc(0) },
+      { name: "after.txt", data: Buffer.from("marker\n") },
+    ];
+    const iso = buildIso("STACKCFG", files);
+    expect(readJolietFile(iso, "empty.txt")?.length).toBe(0);
+    expect(readJolietFile(iso, "after.txt")?.toString()).toBe("marker\n");
+    // And: the declared volume space size must cover every emitted byte.
+    const pvd = readSector(iso, 16);
+    expect(iso.length).toBe(pvd.readUInt32LE(80) * SECTOR);
+  });
+
+  it("writes the exact file length in the directory record (not padded to sector)", () => {
+    const content = Buffer.from("abc");
+    const iso = buildIso("STACKCFG", [{ name: "tiny.txt", data: content }]);
+    const svd = readSector(iso, 17);
+    const rootSector = svd.readUInt32LE(156 + 2);
+    const records = parseDirRecords(readSector(iso, rootSector));
+    const file = records.find((r) => !r.isDir);
+    expect(file?.dataLength).toBe(3);
+  });
+
+  it("places the root directory records for . and .. pointing at the root extent", () => {
+    const iso = buildIso("STACKCFG", [{ name: "x.txt", data: Buffer.from("1") }]);
+    const svd = readSector(iso, 17);
+    const rootSector = svd.readUInt32LE(156 + 2);
+    const records = parseDirRecords(readSector(iso, rootSector));
+    expect(records.length).toBeGreaterThanOrEqual(2);
+    expect(records[0].fileId.equals(Buffer.from([0x00]))).toBe(true);
+    expect(records[1].fileId.equals(Buffer.from([0x01]))).toBe(true);
+    expect(records[0].isDir).toBe(true);
+    expect(records[0].extentSector).toBe(rootSector);
+    expect(records[1].extentSector).toBe(rootSector);
+  });
+
+  it("truncates volume identifiers longer than 32 bytes rather than corrupting the PVD", () => {
+    const longId = "A".repeat(64);
+    const iso = buildIso(longId, [{ name: "x.txt", data: Buffer.from("1") }]);
+    const pvd = readSector(iso, 16);
+    expect(pvd.toString("ascii", 40, 40 + 32)).toBe("A".repeat(32));
+    // Sector 17 should still be the Joliet SVD, not clobbered.
+    expect(pvd[881]).toBe(1);
+    expect(readVolumeDescriptor(iso, 17).type).toBe(2);
+  });
+
+  it("rejects an input set whose root directory record overflows one sector", () => {
+    // Each Joliet dir record for an N-char name is 33 + 2N + (2N even ? 1 : 0)
+    // ≈ 2N + 34 bytes. A sector is 2048. Thirty 30-char names → ~1860 bytes
+    // plus "." + ".." (68) → fits. Eighty of them → well over a sector.
+    const many: IsoFile[] = Array.from({ length: 80 }, (_, i) => ({
+      name: `file-${String(i).padStart(3, "0")}-padding-padding.bin`,
+      data: Buffer.from("x"),
+    }));
+    expect(() => buildIso("STACKCFG", many)).toThrow(/Root directory exceeds/);
+  });
+
+  it("produces a sector-aligned buffer regardless of file sizes", () => {
+    for (const size of [0, 1, SECTOR - 1, SECTOR, SECTOR + 1, SECTOR * 5 - 1]) {
+      const iso = buildIso("STACKCFG", [sampleFile("a.bin", size)]);
+      expect(iso.length % SECTOR).toBe(0);
+    }
+  });
+});
+
+describe("buildIso — multiple file sector layout", () => {
+  it("assigns non-overlapping extents to all files", () => {
+    const files: IsoFile[] = [
+      sampleFile("a.bin", 10, 0x01),
+      sampleFile("b.bin", SECTOR, 0x02),
+      sampleFile("c.bin", SECTOR * 2 + 500, 0x03),
+      sampleFile("d.bin", 1, 0x04),
+    ];
+    const iso = buildIso("STACKCFG", files);
+    const svd = readSector(iso, 17);
+    const rootSector = svd.readUInt32LE(156 + 2);
+    const records = parseDirRecords(readSector(iso, rootSector)).filter((r) => !r.isDir);
+
+    // Extents must be strictly ordered and non-overlapping.
+    const sorted = [...records].sort((a, b) => a.extentSector - b.extentSector);
+    for (let i = 1; i < sorted.length; i++) {
+      const prev = sorted[i - 1];
+      const prevEndSector = prev.extentSector + Math.max(1, Math.ceil(prev.dataLength / SECTOR));
+      expect(sorted[i].extentSector).toBeGreaterThanOrEqual(prevEndSector);
+    }
+  });
+});
--- a/packages/stack-cli/src/lib/iso.ts
+++ b/packages/stack-cli/src/lib/iso.ts
@ -0,0 +1,399 @@
+// Minimal ISO 9660 + Joliet writer used to package the runtime config blob
+// that the emulator VM mounts at boot via /dev/disk/by-label/STACKCFG.
+//
+// Replaces the host-side dependency on hdiutil/mkisofs/genisoimage. Only the
+// subset of ECMA-119 needed for a single-level root directory of small UTF-8
+// text files is implemented: PVD + Joliet SVD + path tables + root dir + file
+// data. Names are emitted in both ISO 9660 ("BASE.ENV;1") and Joliet
+// (lower-case UCS-2) form so Linux mounts the Joliet view by default and the
+// guest's `source /mnt/stack-runtime/runtime.env` works unchanged.
+
+import { writeFileSync } from "fs";
+
+const SECTOR = 2048;
+
+function bothEndian32(n: number): Buffer {
+  const b = Buffer.alloc(8);
+  b.writeUInt32LE(n, 0);
+  b.writeUInt32BE(n, 4);
+  return b;
+}
+
+function bothEndian16(n: number): Buffer {
+  const b = Buffer.alloc(4);
+  b.writeUInt16LE(n, 0);
+  b.writeUInt16BE(n, 2);
+  return b;
+}
+
+function padString(s: string, len: number, fill = " "): Buffer {
+  const buf = Buffer.alloc(len, fill.charCodeAt(0));
+  buf.write(s.slice(0, len), 0, "ascii");
+  return buf;
+}
+
+function ucs2BE(s: string): Buffer {
+  const buf = Buffer.alloc(s.length * 2);
+  for (let i = 0; i < s.length; i++) {
+    buf.writeUInt16BE(s.charCodeAt(i), i * 2);
+  }
+  return buf;
+}
+
+function padUcs2BE(s: string, byteLen: number): Buffer {
+  const buf = Buffer.alloc(byteLen);
+  const wholeChars = Math.floor(byteLen / 2);
+  for (let i = 0; i < wholeChars; i++) {
+    buf.writeUInt16BE(i < s.length ? s.charCodeAt(i) : 0x0020, i * 2);
+  }
+  // Odd-length fields (e.g. 37-byte Copyright/Abstract/Bibliographic IDs) get
+  // a trailing space byte; spec allows either NUL or 0x20 padding.
+  if (byteLen % 2 === 1) {
+    buf[byteLen - 1] = 0x20;
+  }
+  return buf;
+}
+
+function dirRecordingDate(d: Date): Buffer {
+  const buf = Buffer.alloc(7);
+  buf[0] = d.getUTCFullYear() - 1900;
+  buf[1] = d.getUTCMonth() + 1;
+  buf[2] = d.getUTCDate();
+  buf[3] = d.getUTCHours();
+  buf[4] = d.getUTCMinutes();
+  buf[5] = d.getUTCSeconds();
+  buf[6] = 0;
+  return buf;
+}
+
+function volumeDate(d: Date): Buffer {
+  const pad = (n: number, w: number) => String(n).padStart(w, "0");
+  const s =
+    pad(d.getUTCFullYear(), 4) +
+    pad(d.getUTCMonth() + 1, 2) +
+    pad(d.getUTCDate(), 2) +
+    pad(d.getUTCHours(), 2) +
+    pad(d.getUTCMinutes(), 2) +
+    pad(d.getUTCSeconds(), 2) +
+    "00";
+  const buf = Buffer.alloc(17);
+  buf.write(s, 0, 16, "ascii");
+  buf[16] = 0;
+  return buf;
+}
+
+const UNUSED_VOLUME_DATE = (() => {
+  const buf = Buffer.alloc(17, "0".charCodeAt(0));
+  buf[16] = 0;
+  return buf;
+})();
+
+// Encodes an ISO 9660 file identifier ("FILENAME.EXT;1"). Caller must pass an
+// already-uppercased 8.3 name without the version suffix.
+function isoFileIdentifier(name: string): Buffer {
+  const upper = name.toUpperCase();
+  return Buffer.from(`${upper};1`, "ascii");
+}
+
+// Builds a single directory record. `idBytes` is the file identifier bytes
+// (ASCII for ISO, UCS-2 BE for Joliet); `idForDot` overrides with a single
+// 0x00 / 0x01 byte for "." / ".." entries.
+function buildDirRecord(
+  extentSector: number,
+  dataLength: number,
+  isDir: boolean,
+  recDate: Buffer,
+  idBytes: Buffer,
+): Buffer {
+  const lenFi = idBytes.length;
+  const pad = lenFi % 2 === 0 ? 1 : 0;
+  const lenDr = 33 + lenFi + pad;
+  const buf = Buffer.alloc(lenDr);
+  buf[0] = lenDr;
+  buf[1] = 0;
+  bothEndian32(extentSector).copy(buf, 2);
+  bothEndian32(dataLength).copy(buf, 10);
+  recDate.copy(buf, 18);
+  buf[25] = isDir ? 0x02 : 0x00;
+  buf[26] = 0;
+  buf[27] = 0;
+  bothEndian16(1).copy(buf, 28);
+  buf[32] = lenFi;
+  idBytes.copy(buf, 33);
+  return buf;
+}
+
+function buildRootDirEntries(
+  rootSector: number,
+  rootSize: number,
+  recDate: Buffer,
+  files: { idBytes: Buffer, sector: number, size: number }[],
+): Buffer {
+  const records: Buffer[] = [];
+  records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x00])));
+  records.push(buildDirRecord(rootSector, rootSize, true, recDate, Buffer.from([0x01])));
+  for (const f of files) {
+    records.push(buildDirRecord(f.sector, f.size, false, recDate, f.idBytes));
+  }
+
+  // Records may not span sector boundaries; pack them with sector padding.
+  const sectors: Buffer[] = [];
+  let current = Buffer.alloc(0);
+  for (const r of records) {
+    if (current.length + r.length > SECTOR) {
+      sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)]));
+      current = Buffer.alloc(0);
+    }
+    current = Buffer.concat([current, r]);
+  }
+  if (current.length > 0) {
+    sectors.push(Buffer.concat([current, Buffer.alloc(SECTOR - current.length)]));
+  }
+  return Buffer.concat(sectors);
+}
+
+// Single-entry path table for the root directory. Used for both L (LE) and M
+// (BE) tables; pass writeUInt32LE/BE accordingly.
+function buildPathTable(rootSector: number, byteOrder: "LE" | "BE"): Buffer {
+  const buf = Buffer.alloc(10);
+  buf[0] = 1; // LEN_DI
+  buf[1] = 0; // EAR length
+  if (byteOrder === "LE") {
+    buf.writeUInt32LE(rootSector, 2);
+    buf.writeUInt16LE(1, 6);
+  } else {
+    buf.writeUInt32BE(rootSector, 2);
+    buf.writeUInt16BE(1, 6);
+  }
+  buf[8] = 0; // root identifier
+  buf[9] = 0; // pad
+  return buf;
+}
+
+function padToSector(buf: Buffer): Buffer {
+  const rem = buf.length % SECTOR;
+  if (rem === 0) return buf;
+  return Buffer.concat([buf, Buffer.alloc(SECTOR - rem)]);
+}
+
+// Build a Volume Descriptor (PVD or Joliet SVD). `joliet` switches volume-name
+// fields to UCS-2 BE and sets the Joliet escape sequence.
+function buildVolumeDescriptor(opts: {
+  joliet: boolean,
+  volumeId: string,
+  volumeSpaceSize: number,
+  pathTableSize: number,
+  lPathSector: number,
+  mPathSector: number,
+  rootDirRecord: Buffer,
+  date: Buffer,
+}): Buffer {
+  const buf = Buffer.alloc(SECTOR);
+  buf[0] = opts.joliet ? 2 : 1;
+  buf.write("CD001", 1, 5, "ascii");
+  buf[6] = 1;
+  buf[7] = 0;
+
+  // System Identifier (32 bytes)
+  if (opts.joliet) {
+    padUcs2BE("", 32).copy(buf, 8);
+  } else {
+    padString("", 32).copy(buf, 8);
+  }
+
+  // Volume Identifier (32 bytes) — must be "STACKCFG" so udev exposes it as
+  // /dev/disk/by-label/STACKCFG. blkid reads from PVD by default but Joliet
+  // takes precedence when both are present.
+  if (opts.joliet) {
+    padUcs2BE(opts.volumeId, 32).copy(buf, 40);
+  } else {
+    padString(opts.volumeId, 32).copy(buf, 40);
+  }
+
+  bothEndian32(opts.volumeSpaceSize).copy(buf, 80);
+
+  if (opts.joliet) {
+    // Escape sequence for UCS-2 Level 3 ("%/E") at offset 88 (32 bytes).
+    buf[88] = 0x25;
+    buf[89] = 0x2f;
+    buf[90] = 0x45;
+  }
+
+  bothEndian16(1).copy(buf, 120); // Volume Set Size
+  bothEndian16(1).copy(buf, 124); // Volume Sequence Number
+  bothEndian16(SECTOR).copy(buf, 128); // Logical Block Size
+  bothEndian32(opts.pathTableSize).copy(buf, 132);
+  buf.writeUInt32LE(opts.lPathSector, 140);
+  buf.writeUInt32LE(0, 144); // optional L
+  buf.writeUInt32BE(opts.mPathSector, 148);
+  buf.writeUInt32BE(0, 152); // optional M
+
+  opts.rootDirRecord.copy(buf, 156);
+
+  const padFn = opts.joliet
+    ? (s: string, n: number) => padUcs2BE(s, n)
+    : (s: string, n: number) => padString(s, n);
+
+  padFn("", 128).copy(buf, 190); // Volume Set Identifier
+  padFn("", 128).copy(buf, 318); // Publisher Identifier
+  padFn("", 128).copy(buf, 446); // Data Preparer Identifier
+  padFn("", 128).copy(buf, 574); // Application Identifier
+  padFn("", 37).copy(buf, 702); // Copyright File Identifier
+  padFn("", 37).copy(buf, 739); // Abstract File Identifier
+  padFn("", 37).copy(buf, 776); // Bibliographic File Identifier
+
+  opts.date.copy(buf, 813); // Creation
+  opts.date.copy(buf, 830); // Modification
+  UNUSED_VOLUME_DATE.copy(buf, 847); // Expiration
+  UNUSED_VOLUME_DATE.copy(buf, 864); // Effective
+
+  buf[881] = 1; // File Structure Version
+  return buf;
+}
+
+function buildVolumeDescriptorTerminator(): Buffer {
+  const buf = Buffer.alloc(SECTOR);
+  buf[0] = 0xff;
+  buf.write("CD001", 1, 5, "ascii");
+  buf[6] = 1;
+  return buf;
+}
+
+export type IsoFile = { name: string, data: Buffer };
+
+export function buildIso(volumeId: string, files: IsoFile[]): Buffer {
+  const date = new Date();
+  const recDate = dirRecordingDate(date);
+  const volDateBuf = volumeDate(date);
+
+  // Compute per-file directory record sizes for both views.
+  const isoEntries = files.map((f) => ({
+    file: f,
+    idBytes: isoFileIdentifier(f.name),
+  }));
+  const jolietEntries = files.map((f) => ({
+    file: f,
+    idBytes: ucs2BE(f.name),
+  }));
+
+  // We need root sector + size before we know file sectors — but file sectors
+  // depend only on the root dir size, which depends only on the file count.
+  // Compute the root dir buffer twice if needed (sizes are stable since they
+  // depend only on identifier bytes, not on file extents).
+  const dirRecLen = (lenFi: number) => 33 + lenFi + (lenFi % 2 === 0 ? 1 : 0);
+  const isoRootSize = 34 + 34 + isoEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0);
+  const jolietRootSize = 34 + 34 + jolietEntries.reduce((acc, e) => acc + dirRecLen(e.idBytes.length), 0);
+  if (isoRootSize > SECTOR || jolietRootSize > SECTOR) {
+    throw new Error(`Root directory exceeds ${SECTOR} bytes; multi-sector root not supported.`);
+  }
+
+  // Sector layout.
+  const sysAreaSectors = 16;
+  const pvdSector = sysAreaSectors;
+  const svdSector = pvdSector + 1;
+  const termSector = svdSector + 1;
+  const isoLPathSector = termSector + 1;
+  const isoMPathSector = isoLPathSector + 1;
+  const jolietLPathSector = isoMPathSector + 1;
+  const jolietMPathSector = jolietLPathSector + 1;
+  const isoRootSector = jolietMPathSector + 1;
+  const jolietRootSector = isoRootSector + 1;
+  let nextSector = jolietRootSector + 1;
+
+  const fileLayout = files.map((f) => {
+    const sector = nextSector;
+    const sectors = Math.max(1, Math.ceil(f.data.length / SECTOR));
+    nextSector += sectors;
+    return { file: f, sector, size: f.data.length };
+  });
+
+  const totalSectors = nextSector;
+  const pathTableSize = 10;
+
+  // Root directory record inside the volume descriptor (BP 157-190 of PVD/SVD):
+  // same layout as a regular dir record but the identifier is the single byte 0x00.
+  const rootIdent = Buffer.from([0x00]);
+  const isoRootDirRecordVD = buildDirRecord(isoRootSector, SECTOR, true, recDate, rootIdent);
+  const jolietRootDirRecordVD = buildDirRecord(jolietRootSector, SECTOR, true, recDate, rootIdent);
+
+  const pvd = buildVolumeDescriptor({
+    joliet: false,
+    volumeId,
+    volumeSpaceSize: totalSectors,
+    pathTableSize,
+    lPathSector: isoLPathSector,
+    mPathSector: isoMPathSector,
+    rootDirRecord: isoRootDirRecordVD,
+    date: volDateBuf,
+  });
+
+  const svd = buildVolumeDescriptor({
+    joliet: true,
+    volumeId,
+    volumeSpaceSize: totalSectors,
+    pathTableSize,
+    lPathSector: jolietLPathSector,
+    mPathSector: jolietMPathSector,
+    rootDirRecord: jolietRootDirRecordVD,
+    date: volDateBuf,
+  });
+
+  const term = buildVolumeDescriptorTerminator();
+  const isoLPath = padToSector(buildPathTable(isoRootSector, "LE"));
+  const isoMPath = padToSector(buildPathTable(isoRootSector, "BE"));
+  const jolietLPath = padToSector(buildPathTable(jolietRootSector, "LE"));
+  const jolietMPath = padToSector(buildPathTable(jolietRootSector, "BE"));
+
+  const isoRoot = buildRootDirEntries(
+    isoRootSector,
+    SECTOR,
+    recDate,
+    isoEntries.map((e, i) => ({
+      idBytes: e.idBytes,
+      sector: fileLayout[i].sector,
+      size: fileLayout[i].size,
+    })),
+  );
+  const jolietRoot = buildRootDirEntries(
+    jolietRootSector,
+    SECTOR,
+    recDate,
+    jolietEntries.map((e, i) => ({
+      idBytes: e.idBytes,
+      sector: fileLayout[i].sector,
+      size: fileLayout[i].size,
+    })),
+  );
+
+  // Each file must occupy the exact number of sectors the layout reserved for
+  // it. An empty file reserves 1 sector (via Math.max(1, …)) but
+  // padToSector(Buffer.alloc(0)) returns 0 bytes — that would desync every
+  // subsequent file's extent. Explicitly pad to the reserved size instead.
+  const fileBuffers = fileLayout.map((f) => {
+    const reservedSectors = Math.max(1, Math.ceil(f.file.data.length / SECTOR));
+    const reservedBytes = reservedSectors * SECTOR;
+    if (f.file.data.length === reservedBytes) return f.file.data;
+    const out = Buffer.alloc(reservedBytes);
+    f.file.data.copy(out, 0);
+    return out;
+  });
+
+  return Buffer.concat([
+    Buffer.alloc(sysAreaSectors * SECTOR),
+    pvd,
+    svd,
+    term,
+    isoLPath,
+    isoMPath,
+    jolietLPath,
+    jolietMPath,
+    isoRoot,
+    jolietRoot,
+    ...fileBuffers,
+  ]);
+}
+
+export function writeIso(path: string, volumeId: string, files: IsoFile[]): void {
+  const buf = buildIso(volumeId, files);
+  writeFileSync(path, buf);
+}
--- a/packages/stack-cli/vitest.config.ts
+++ b/packages/stack-cli/vitest.config.ts
@ -0,0 +1,19 @@
+import { defineConfig, mergeConfig } from 'vitest/config';
+import sharedConfig from '../../vitest.shared';
+
+export default mergeConfig(
+  sharedConfig,
+  defineConfig({
+    test: {
+      // Override the shared `maxWorkers: 8` — with it set, tinypool defaults
+      // minThreads to the host's available parallelism, producing
+      // "minThreads/maxThreads must not conflict" on machines with >8 cores.
+      poolOptions: {
+        threads: {
+          minThreads: 1,
+          maxThreads: 4,
+        },
+      },
+    },
+  }),
+);
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -746,7 +746,7 @@ importers:
        version: 1.166.6(crossws@0.4.4(srvx@0.8.16))
      nitro:
        specifier: ^3.0.0
-        version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2)
+        version: 3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2)
      react:
        specifier: 19.2.1
        version: 19.2.1
@ -2097,6 +2097,9 @@ importers:
      commander:
        specifier: ^13.1.0
        version: 13.1.0
+      extract-zip:
+        specifier: ^2.0.1
+        version: 2.0.1
      jiti:
        specifier: ^2.4.2
        version: 2.6.1
@ -11395,6 +11398,7 @@ packages:
  basic-ftp@5.2.0:
    resolution: {integrity: sha512-VoMINM2rqJwJgfdHq6RiUudKt2BV+FY5ZFezP/ypmwayk68+NzzAQy4XXLlqsGD4MCzq3DrmNFD/uUmBJuGoXw==}
    engines: {node: '>=10.0.0'}
+    deprecated: Security vulnerability fixed in 5.2.1, please upgrade

  bcrypt@6.0.0:
    resolution: {integrity: sha512-cU8v/EGSrnH+HnxV2z0J7/blxH8gq7Xh2JFT6Aroax7UohdmiJJlxApMxtKfuI7z68NvvVcmR78k2LbT6efhRg==}
@ -33362,7 +33366,7 @@ snapshots:
      debug: 4.4.3
      enhanced-resolve: 5.17.1
      eslint: 8.57.1
-      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1)
+      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
      fast-glob: 3.3.3
      get-tsconfig: 4.8.1
      is-bun-module: 1.2.1
@ -33405,7 +33409,7 @@ snapshots:
    transitivePeerDependencies:
      - supports-color

-  eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1):
+  eslint-module-utils@2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1):
    dependencies:
      debug: 3.2.7
    optionalDependencies:
@ -33483,7 +33487,7 @@ snapshots:
      doctrine: 2.1.0
      eslint: 8.57.1
      eslint-import-resolver-node: 0.3.9
-      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1)
+      eslint-module-utils: 2.12.0(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@6.21.0(eslint@8.57.1)(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0(@typescript-eslint/parser@8.56.1(eslint@8.57.1)(typescript@5.9.3))(eslint@8.57.1))(eslint@8.57.1))(eslint@8.57.1)
      hasown: 2.0.2
      is-core-module: 2.15.1
      is-glob: 4.0.3
@ -37347,7 +37351,7 @@ snapshots:
      jsonpath-plus: 10.4.0
      lodash.topath: 4.5.2

-  nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(rolldown@1.0.0-rc.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2):
+  nitro@3.0.0(@electric-sql/pglite@0.3.2)(chokidar@4.0.3)(lru-cache@11.2.2)(mysql2@3.15.3)(vite@7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0))(xml2js@0.6.2):
    dependencies:
      consola: 3.4.2
      cookie-es: 2.0.0
@ -37367,7 +37371,6 @@ snapshots:
      unenv: 2.0.0-rc.21
      unstorage: 2.0.0-alpha.3(chokidar@4.0.3)(db0@0.3.4(@electric-sql/pglite@0.3.2)(mysql2@3.15.3))(lru-cache@11.2.2)(ofetch@1.5.1)
    optionalDependencies:
-      rolldown: 1.0.0-rc.3
      vite: 7.3.1(@types/node@22.19.0)(jiti@2.6.1)(lightningcss@1.30.1)(terser@5.44.0)(tsx@4.21.0)(yaml@2.8.0)
      xml2js: 0.6.2
    transitivePeerDependencies: