CI fix
Some checks failed
Unit tests / build (t4_gpu) (push) Has been cancelled
Unit tests / build (ubuntu-latest) (push) Has been cancelled
Unit tests / build (windows-latest) (push) Has been cancelled
Test CLI scripts / build (push) Has been cancelled

This commit is contained in:
Vik Paruchuri 2026-05-27 10:59:19 -04:00
parent c421e742b4
commit e966a20990
5 changed files with 48 additions and 5 deletions

View File

@ -9,6 +9,12 @@ jobs:
matrix:
os: [t4_gpu, ubuntu-latest, windows-latest]
fail-fast: false
env:
# T4 can't run bf16 in vllm; size for the 16GB card. (No-op on the
# CPU runners, which skip the VLM-backed tests.)
VLLM_DTYPE: float16
VLLM_GPU_TYPE: t4
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
steps:
- uses: actions/checkout@v4
- name: Install uv

View File

@ -5,6 +5,12 @@ on: [push]
jobs:
build:
runs-on: t4_gpu
env:
# T4 (Turing, compute 7.5) can't run bf16 in vllm; size vllm for the 16GB
# card and give the cold start (image pull + model download) headroom.
VLLM_DTYPE: float16
VLLM_GPU_TYPE: t4
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
steps:
- uses: actions/checkout@v4
- name: Install uv
@ -22,11 +28,13 @@ jobs:
unzip -o benchmark_data.zip
- name: Test detection
run: uv run surya_detect benchmark_data/pdfs/switch_trans.pdf --page_range 0
# Spawn the vllm server once and reuse it across the OCR/layout/table
# steps (--keep_server) instead of paying a cold start three times.
- name: Test OCR
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
- name: Test layout
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
- name: Test table
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
- name: Test detection folder
run: uv run surya_detect benchmark_data/pdfs --page_range 0

View File

@ -138,6 +138,27 @@ def _stop_process(pid: int, name: str) -> None:
logger.warning(f"Failed to stop {name} (pid {pid}): {e}")
def _capture_server_logs(handle: "SpawnHandle", tail: int = 100) -> str:
"""Best-effort tail of a server's logs, for surfacing startup failures."""
try:
if handle.cleanup_kind == "docker":
r = subprocess.run(
["docker", "logs", "--tail", str(tail), handle.cleanup_id],
capture_output=True,
text=True,
timeout=15,
)
return (r.stdout or "") + (r.stderr or "") or "(no docker logs)"
# llama.cpp process backend logs to this file (see llamacpp.py)
log_path = Path("~/.cache/datalab/surya/llamacpp_server.log").expanduser()
if log_path.exists():
lines = log_path.read_text(errors="replace").splitlines()
return "\n".join(lines[-tail:]) or "(empty log)"
except Exception as e:
return f"(could not capture logs: {e})"
return "(no logs available)"
def _stop_docker_container(name: str) -> None:
try:
subprocess.run(
@ -290,10 +311,15 @@ def attach_or_spawn(
# 6. Wait for health
health_url = health_url_for(port)
if not wait_for_health(health_url, total_timeout=startup_timeout):
# Grab the server's own logs *before* cleanup tears the (--rm)
# container down, otherwise the actual failure reason is lost and
# all the caller sees is this timeout.
logs = _capture_server_logs(spawn_handle)
_cleanup()
raise SpawnError(
f"{backend} server failed to become healthy at {health_url} "
f"within {startup_timeout}s"
f"within {startup_timeout}s.\n"
f"--- last {backend} server logs ---\n{logs}"
)
# 7. Verify model name

View File

@ -140,7 +140,7 @@ class VllmBackend(Backend):
"--max-num-seqs",
str(max_num_seqs),
"--dtype",
"bfloat16",
settings.VLLM_DTYPE,
"--max-model-len",
str(settings.VLLM_MAX_MODEL_LEN),
"--max-num-batched-tokens",

View File

@ -89,6 +89,9 @@ class Settings(BaseSettings):
VLLM_API_KEY: str = "EMPTY"
VLLM_GPUS: str = "0"
VLLM_GPU_TYPE: str = "4090"
# bfloat16 needs an Ampere+ GPU (compute capability >= 8.0). On older cards
# (e.g. T4 / Turing) vllm refuses to start with bf16 — set float16 there.
VLLM_DTYPE: str = "bfloat16"
VLLM_MAX_MODEL_LEN: int = 18000
VLLM_GPU_MEMORY_UTILIZATION: float = 0.85
VLLM_ENABLE_MTP: bool = True