surya/pyproject.toml

[project]
name = "surya-ocr"
version = "2.0.0a0"
description = "OCR, layout, reading order, and table recognition via VLM (vllm + llama.cpp)."
readme = "README.md"
license = { text = "Apache-2.0" }
authors = [
    { name = "Vik Paruchuri", email = "vik.paruchuri@gmail.com" },
]
requires-python = ">=3.10,<4"
keywords = ["ocr", "pdf", "text detection", "text recognition", "tables"]
dependencies = [
    "transformers>=4.56.1",
    "torch>=2.7.0,<3",
    "pydantic>=2.5.3,<3",
    "pydantic-settings>=2.1.0,<3",
    "python-dotenv>=1.0.0,<2",
    "pillow>=10.2.0,<11",
    "pypdfium2==4.30.0",
    "filetype>=1.2.0,<2",
    "click>=8.1.8,<9",
    "platformdirs>=4.3.6,<5",
    "opencv-python-headless==4.11.0.86",
    "einops>=0.8.1,<0.9",
    "openai>=1.55.0,<2",
    "httpx>=0.27.0,<0.28",
    "huggingface-hub>=0.26.0,<1",
    "filelock>=3.16.0,<4",
    "beautifulsoup4>=4.12.0,<5",
]

[project.urls]
Repository = "https://github.com/VikParuchuri/surya"

[project.scripts]
surya_detect = "surya.scripts.detect_text:detect_text_cli"
surya_ocr = "surya.scripts.ocr_text:ocr_text_cli"
surya_layout = "surya.scripts.detect_layout:detect_layout_cli"
surya_gui = "surya.scripts.run_streamlit_app:streamlit_app_cli"
surya_table = "surya.scripts.table_recognition:table_recognition_cli"

[dependency-groups]
dev = [
    "pre-commit>=4.2.0",
    "jupyter>=1.0.0",
    "pytesseract>=0.3.10",
    "pymupdf>=1.23.8",
    "datasets>=2.16.1",
    "rapidfuzz>=3.6.1",
    "streamlit>=1.31.0",
    "pytest>=8.3.4",
    "pdftext>=0.5.1",
    "tabulate>=0.9.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["surya"]