mirror of
https://github.com/VikParuchuri/surya.git
synced 2026-06-04 21:03:53 +08:00
CI fix
This commit is contained in:
parent
c421e742b4
commit
e966a20990
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -9,6 +9,12 @@ jobs:
|
||||
matrix:
|
||||
os: [t4_gpu, ubuntu-latest, windows-latest]
|
||||
fail-fast: false
|
||||
env:
|
||||
# T4 can't run bf16 in vllm; size for the 16GB card. (No-op on the
|
||||
# CPU runners, which skip the VLM-backed tests.)
|
||||
VLLM_DTYPE: float16
|
||||
VLLM_GPU_TYPE: t4
|
||||
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install uv
|
||||
|
||||
14
.github/workflows/scripts.yml
vendored
14
.github/workflows/scripts.yml
vendored
@ -5,6 +5,12 @@ on: [push]
|
||||
jobs:
|
||||
build:
|
||||
runs-on: t4_gpu
|
||||
env:
|
||||
# T4 (Turing, compute 7.5) can't run bf16 in vllm; size vllm for the 16GB
|
||||
# card and give the cold start (image pull + model download) headroom.
|
||||
VLLM_DTYPE: float16
|
||||
VLLM_GPU_TYPE: t4
|
||||
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install uv
|
||||
@ -22,11 +28,13 @@ jobs:
|
||||
unzip -o benchmark_data.zip
|
||||
- name: Test detection
|
||||
run: uv run surya_detect benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
||||
# Spawn the vllm server once and reuse it across the OCR/layout/table
|
||||
# steps (--keep_server) instead of paying a cold start three times.
|
||||
- name: Test OCR
|
||||
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
||||
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
|
||||
- name: Test layout
|
||||
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
||||
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
|
||||
- name: Test table
|
||||
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
||||
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
|
||||
- name: Test detection folder
|
||||
run: uv run surya_detect benchmark_data/pdfs --page_range 0
|
||||
|
||||
@ -138,6 +138,27 @@ def _stop_process(pid: int, name: str) -> None:
|
||||
logger.warning(f"Failed to stop {name} (pid {pid}): {e}")
|
||||
|
||||
|
||||
def _capture_server_logs(handle: "SpawnHandle", tail: int = 100) -> str:
|
||||
"""Best-effort tail of a server's logs, for surfacing startup failures."""
|
||||
try:
|
||||
if handle.cleanup_kind == "docker":
|
||||
r = subprocess.run(
|
||||
["docker", "logs", "--tail", str(tail), handle.cleanup_id],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
return (r.stdout or "") + (r.stderr or "") or "(no docker logs)"
|
||||
# llama.cpp process backend logs to this file (see llamacpp.py)
|
||||
log_path = Path("~/.cache/datalab/surya/llamacpp_server.log").expanduser()
|
||||
if log_path.exists():
|
||||
lines = log_path.read_text(errors="replace").splitlines()
|
||||
return "\n".join(lines[-tail:]) or "(empty log)"
|
||||
except Exception as e:
|
||||
return f"(could not capture logs: {e})"
|
||||
return "(no logs available)"
|
||||
|
||||
|
||||
def _stop_docker_container(name: str) -> None:
|
||||
try:
|
||||
subprocess.run(
|
||||
@ -290,10 +311,15 @@ def attach_or_spawn(
|
||||
# 6. Wait for health
|
||||
health_url = health_url_for(port)
|
||||
if not wait_for_health(health_url, total_timeout=startup_timeout):
|
||||
# Grab the server's own logs *before* cleanup tears the (--rm)
|
||||
# container down, otherwise the actual failure reason is lost and
|
||||
# all the caller sees is this timeout.
|
||||
logs = _capture_server_logs(spawn_handle)
|
||||
_cleanup()
|
||||
raise SpawnError(
|
||||
f"{backend} server failed to become healthy at {health_url} "
|
||||
f"within {startup_timeout}s"
|
||||
f"within {startup_timeout}s.\n"
|
||||
f"--- last {backend} server logs ---\n{logs}"
|
||||
)
|
||||
|
||||
# 7. Verify model name
|
||||
|
||||
@ -140,7 +140,7 @@ class VllmBackend(Backend):
|
||||
"--max-num-seqs",
|
||||
str(max_num_seqs),
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
settings.VLLM_DTYPE,
|
||||
"--max-model-len",
|
||||
str(settings.VLLM_MAX_MODEL_LEN),
|
||||
"--max-num-batched-tokens",
|
||||
|
||||
@ -89,6 +89,9 @@ class Settings(BaseSettings):
|
||||
VLLM_API_KEY: str = "EMPTY"
|
||||
VLLM_GPUS: str = "0"
|
||||
VLLM_GPU_TYPE: str = "4090"
|
||||
# bfloat16 needs an Ampere+ GPU (compute capability >= 8.0). On older cards
|
||||
# (e.g. T4 / Turing) vllm refuses to start with bf16 — set float16 there.
|
||||
VLLM_DTYPE: str = "bfloat16"
|
||||
VLLM_MAX_MODEL_LEN: int = 18000
|
||||
VLLM_GPU_MEMORY_UTILIZATION: float = 0.85
|
||||
VLLM_ENABLE_MTP: bool = True
|
||||
|
||||
Loading…
Reference in New Issue
Block a user