diff --git a/.github/workflows/qemu-emulator-build.yaml b/.github/workflows/qemu-emulator-build.yaml index e4a42207c..7b5833aab 100644 --- a/.github/workflows/qemu-emulator-build.yaml +++ b/.github/workflows/qemu-emulator-build.yaml @@ -47,7 +47,20 @@ jobs: - name: Install QEMU dependencies run: | sudo apt-get update - sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-utils genisoimage socat qemu-efi-aarch64 + sudo apt-get install -y qemu-system-x86 qemu-system-arm qemu-kvm qemu-utils genisoimage socat qemu-efi-aarch64 + + - name: Enable KVM access + run: | + echo 'KERNEL=="kvm", GROUP="kvm", MODE="0666", OPTIONS+="static_node=kvm"' \ + | sudo tee /etc/udev/rules.d/99-kvm4all.rules + sudo udevadm control --reload-rules + sudo udevadm trigger --name-match=kvm || true + ls -la /dev/kvm || echo "no /dev/kvm present" + if [ -w /dev/kvm ]; then + echo "KVM is writable — hardware acceleration will be used" + else + echo "WARNING: /dev/kvm is not writable — will fall back to TCG (very slow)" + fi - name: Build QEMU image run: | diff --git a/docker/local-emulator/qemu/build-image.sh b/docker/local-emulator/qemu/build-image.sh index 2f773a793..7d73c0ead 100755 --- a/docker/local-emulator/qemu/build-image.sh +++ b/docker/local-emulator/qemu/build-image.sh @@ -237,6 +237,7 @@ build_one() { local qemu_base pid elapsed total_build_lines local last_build_lines=0 local guest_exited=false + local guest_failed=false local start_time=$SECONDS cp "$base_img" "$tmp_img" @@ -258,6 +259,7 @@ build_one() { : > "$serial_log" : > "$provision_log" qemu_base="$(qemu_cmd_prefix_for_arch "$arch")" + log "QEMU command prefix (${arch}): $qemu_base" # shellcheck disable=SC2086 $qemu_base \ @@ -282,6 +284,11 @@ build_one() { break fi + if contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_FAILED"; then + guest_failed=true + break + fi + if [ -f "$provision_log" ]; then total_build_lines="$(line_count "$provision_log")" if [ "$total_build_lines" -gt "$last_build_lines" ]; then @@ -308,7 +315,9 @@ build_one() { echo "" if ! contains_provision_marker "$provision_log" "$serial_log" "STACK_CLOUD_INIT_DONE"; then - if [ "$guest_exited" = true ]; then + if [ "$guest_failed" = true ]; then + err "Guest provisioning reported failure for emulator (${arch})" + elif [ "$guest_exited" = true ]; then err "Provisioning exited before completion for emulator (${arch})" else err "Provisioning timed out for emulator (${arch})" diff --git a/docker/local-emulator/qemu/cloud-init/emulator/user-data b/docker/local-emulator/qemu/cloud-init/emulator/user-data index 5b92e3e35..4dcf7bda0 100644 --- a/docker/local-emulator/qemu/cloud-init/emulator/user-data +++ b/docker/local-emulator/qemu/cloud-init/emulator/user-data @@ -273,7 +273,7 @@ write_files: -v stack-inbucket-data:/data/inbucket \ -d stack-local-emulator-slim - smoke_timeout=120 + smoke_timeout=300 smoke_elapsed=0 smoke_passed=false while [ "$smoke_elapsed" -lt "$smoke_timeout" ]; do @@ -286,13 +286,22 @@ write_files: smoke_elapsed=$((smoke_elapsed + 2)) done - docker stop smoke-test 2>/dev/null || true - sleep 2 - if [ "$smoke_passed" = "false" ]; then log "SMOKE TEST FAILED: backend /health?db=1 did not return 200 within ${smoke_timeout}s" + log "--- docker ps -a ---" + docker ps -a 2>&1 | while IFS= read -r line; do log "ps: $line"; done || true + log "--- smoke-test container logs (last 200 lines) ---" + docker logs --tail 200 smoke-test 2>&1 | while IFS= read -r line; do log "smoke-test: $line"; done || true + log "--- free -m ---" + free -m 2>&1 | while IFS= read -r line; do log "mem: $line"; done || true + log "--- curl -v /health?db=1 ---" + curl -v --max-time 5 http://127.0.0.1:8102/health?db=1 2>&1 | while IFS= read -r line; do log "curl: $line"; done || true + docker stop smoke-test 2>/dev/null || true exit 1 fi + + docker stop smoke-test 2>/dev/null || true + sleep 2 log "Smoke test passed (${smoke_elapsed}s)." log "Flattening image (docker export/import)..." @@ -363,8 +372,17 @@ write_files: cleanup() { local status=$? - if [ "$status" -ne 0 ] && [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then - printf 'ERROR: provision-build exited with code %s\n' "$status" >> "$STACK_PROVISION_LOG_FILE" + if [ "$status" -ne 0 ]; then + if [ -n "${STACK_PROVISION_LOG_FILE:-}" ]; then + printf 'ERROR: provision-build exited with code %s\n' "$status" >> "$STACK_PROVISION_LOG_FILE" + printf '%s\n' "STACK_CLOUD_INIT_FAILED" >> "$STACK_PROVISION_LOG_FILE" + fi + for dev in /dev/console /dev/ttyAMA0 /dev/ttyS0; do + echo "STACK_CLOUD_INIT_FAILED" > "$dev" 2>/dev/null || true + done + sync || true + (sleep 2 && shutdown -P now) & + (sleep 15 && poweroff -f) & fi } trap cleanup EXIT