mirror of
https://github.com/VikParuchuri/surya.git
synced 2026-06-04 21:03:53 +08:00
CI fix
This commit is contained in:
parent
c421e742b4
commit
e966a20990
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@ -9,6 +9,12 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
os: [t4_gpu, ubuntu-latest, windows-latest]
|
os: [t4_gpu, ubuntu-latest, windows-latest]
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
|
env:
|
||||||
|
# T4 can't run bf16 in vllm; size for the 16GB card. (No-op on the
|
||||||
|
# CPU runners, which skip the VLM-backed tests.)
|
||||||
|
VLLM_DTYPE: float16
|
||||||
|
VLLM_GPU_TYPE: t4
|
||||||
|
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
|
|||||||
14
.github/workflows/scripts.yml
vendored
14
.github/workflows/scripts.yml
vendored
@ -5,6 +5,12 @@ on: [push]
|
|||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
runs-on: t4_gpu
|
runs-on: t4_gpu
|
||||||
|
env:
|
||||||
|
# T4 (Turing, compute 7.5) can't run bf16 in vllm; size vllm for the 16GB
|
||||||
|
# card and give the cold start (image pull + model download) headroom.
|
||||||
|
VLLM_DTYPE: float16
|
||||||
|
VLLM_GPU_TYPE: t4
|
||||||
|
SURYA_INFERENCE_STARTUP_TIMEOUT: "1200"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
@ -22,11 +28,13 @@ jobs:
|
|||||||
unzip -o benchmark_data.zip
|
unzip -o benchmark_data.zip
|
||||||
- name: Test detection
|
- name: Test detection
|
||||||
run: uv run surya_detect benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
run: uv run surya_detect benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
||||||
|
# Spawn the vllm server once and reuse it across the OCR/layout/table
|
||||||
|
# steps (--keep_server) instead of paying a cold start three times.
|
||||||
- name: Test OCR
|
- name: Test OCR
|
||||||
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
run: uv run surya_ocr benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
|
||||||
- name: Test layout
|
- name: Test layout
|
||||||
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
run: uv run surya_layout benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
|
||||||
- name: Test table
|
- name: Test table
|
||||||
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0
|
run: uv run surya_table benchmark_data/pdfs/switch_trans.pdf --page_range 0 --keep_server
|
||||||
- name: Test detection folder
|
- name: Test detection folder
|
||||||
run: uv run surya_detect benchmark_data/pdfs --page_range 0
|
run: uv run surya_detect benchmark_data/pdfs --page_range 0
|
||||||
|
|||||||
@ -138,6 +138,27 @@ def _stop_process(pid: int, name: str) -> None:
|
|||||||
logger.warning(f"Failed to stop {name} (pid {pid}): {e}")
|
logger.warning(f"Failed to stop {name} (pid {pid}): {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _capture_server_logs(handle: "SpawnHandle", tail: int = 100) -> str:
|
||||||
|
"""Best-effort tail of a server's logs, for surfacing startup failures."""
|
||||||
|
try:
|
||||||
|
if handle.cleanup_kind == "docker":
|
||||||
|
r = subprocess.run(
|
||||||
|
["docker", "logs", "--tail", str(tail), handle.cleanup_id],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=15,
|
||||||
|
)
|
||||||
|
return (r.stdout or "") + (r.stderr or "") or "(no docker logs)"
|
||||||
|
# llama.cpp process backend logs to this file (see llamacpp.py)
|
||||||
|
log_path = Path("~/.cache/datalab/surya/llamacpp_server.log").expanduser()
|
||||||
|
if log_path.exists():
|
||||||
|
lines = log_path.read_text(errors="replace").splitlines()
|
||||||
|
return "\n".join(lines[-tail:]) or "(empty log)"
|
||||||
|
except Exception as e:
|
||||||
|
return f"(could not capture logs: {e})"
|
||||||
|
return "(no logs available)"
|
||||||
|
|
||||||
|
|
||||||
def _stop_docker_container(name: str) -> None:
|
def _stop_docker_container(name: str) -> None:
|
||||||
try:
|
try:
|
||||||
subprocess.run(
|
subprocess.run(
|
||||||
@ -290,10 +311,15 @@ def attach_or_spawn(
|
|||||||
# 6. Wait for health
|
# 6. Wait for health
|
||||||
health_url = health_url_for(port)
|
health_url = health_url_for(port)
|
||||||
if not wait_for_health(health_url, total_timeout=startup_timeout):
|
if not wait_for_health(health_url, total_timeout=startup_timeout):
|
||||||
|
# Grab the server's own logs *before* cleanup tears the (--rm)
|
||||||
|
# container down, otherwise the actual failure reason is lost and
|
||||||
|
# all the caller sees is this timeout.
|
||||||
|
logs = _capture_server_logs(spawn_handle)
|
||||||
_cleanup()
|
_cleanup()
|
||||||
raise SpawnError(
|
raise SpawnError(
|
||||||
f"{backend} server failed to become healthy at {health_url} "
|
f"{backend} server failed to become healthy at {health_url} "
|
||||||
f"within {startup_timeout}s"
|
f"within {startup_timeout}s.\n"
|
||||||
|
f"--- last {backend} server logs ---\n{logs}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 7. Verify model name
|
# 7. Verify model name
|
||||||
|
|||||||
@ -140,7 +140,7 @@ class VllmBackend(Backend):
|
|||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
str(max_num_seqs),
|
str(max_num_seqs),
|
||||||
"--dtype",
|
"--dtype",
|
||||||
"bfloat16",
|
settings.VLLM_DTYPE,
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
str(settings.VLLM_MAX_MODEL_LEN),
|
str(settings.VLLM_MAX_MODEL_LEN),
|
||||||
"--max-num-batched-tokens",
|
"--max-num-batched-tokens",
|
||||||
|
|||||||
@ -89,6 +89,9 @@ class Settings(BaseSettings):
|
|||||||
VLLM_API_KEY: str = "EMPTY"
|
VLLM_API_KEY: str = "EMPTY"
|
||||||
VLLM_GPUS: str = "0"
|
VLLM_GPUS: str = "0"
|
||||||
VLLM_GPU_TYPE: str = "4090"
|
VLLM_GPU_TYPE: str = "4090"
|
||||||
|
# bfloat16 needs an Ampere+ GPU (compute capability >= 8.0). On older cards
|
||||||
|
# (e.g. T4 / Turing) vllm refuses to start with bf16 — set float16 there.
|
||||||
|
VLLM_DTYPE: str = "bfloat16"
|
||||||
VLLM_MAX_MODEL_LEN: int = 18000
|
VLLM_MAX_MODEL_LEN: int = 18000
|
||||||
VLLM_GPU_MEMORY_UTILIZATION: float = 0.85
|
VLLM_GPU_MEMORY_UTILIZATION: float = 0.85
|
||||||
VLLM_ENABLE_MTP: bool = True
|
VLLM_ENABLE_MTP: bool = True
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user