mirror of
https://github.com/VikParuchuri/surya.git
synced 2026-06-04 21:03:53 +08:00
Add screenshot app
This commit is contained in:
parent
80c2903ea2
commit
79246df837
@ -37,6 +37,7 @@ surya_ocr = "surya.scripts.ocr_text:ocr_text_cli"
|
||||
surya_layout = "surya.scripts.detect_layout:detect_layout_cli"
|
||||
surya_gui = "surya.scripts.run_streamlit_app:streamlit_app_cli"
|
||||
surya_table = "surya.scripts.table_recognition:table_recognition_cli"
|
||||
surya_screenshot = "surya.scripts.screenshot_app:main"
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
@ -48,6 +49,7 @@ dev = [
|
||||
"pytest>=8.3.4",
|
||||
"pdftext>=0.5.1",
|
||||
"tabulate>=0.9.0",
|
||||
"flask>=3.0.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
||||
@ -265,7 +265,10 @@ def attach_or_spawn(
|
||||
},
|
||||
)
|
||||
|
||||
# 5. Register atexit cleanup (only spawner)
|
||||
# 5. Register atexit cleanup (only spawner). Skipped when keep-alive is
|
||||
# set so the server outlives this process and later commands attach to
|
||||
# it via the sentinel. (_cleanup is still callable below on startup
|
||||
# failure, where we always tear a half-started server down.)
|
||||
def _cleanup():
|
||||
try:
|
||||
if spawn_handle.cleanup_kind == "docker":
|
||||
@ -276,7 +279,13 @@ def attach_or_spawn(
|
||||
finally:
|
||||
_delete_sentinel(backend)
|
||||
|
||||
atexit.register(_cleanup)
|
||||
if settings.SURYA_INFERENCE_KEEP_ALIVE:
|
||||
logger.info(
|
||||
f"keep-alive: {backend} server on port {port} will stay up "
|
||||
f"after exit (cleanup_id={spawn_handle.cleanup_id!r})"
|
||||
)
|
||||
else:
|
||||
atexit.register(_cleanup)
|
||||
|
||||
# 6. Wait for health
|
||||
health_url = health_url_for(port)
|
||||
|
||||
@ -17,15 +17,45 @@ class CLILoader:
|
||||
self.debug = cli_options.get("debug", False)
|
||||
self.output_dir = cli_options.get("output_dir")
|
||||
|
||||
# Opt in to leaving the inference server up so later commands reuse it.
|
||||
if cli_options.get("keep_server"):
|
||||
settings.SURYA_INFERENCE_KEEP_ALIVE = True
|
||||
|
||||
self.load(highres)
|
||||
|
||||
@staticmethod
|
||||
def common_options(fn):
|
||||
fn = click.argument("input_path", type=click.Path(exists=True), required=True)(fn)
|
||||
fn = click.option("--output_dir", type=click.Path(exists=False), required=False, default=os.path.join(settings.RESULT_DIR, "surya"), help="Directory to save output.")(fn)
|
||||
fn = click.option("--page_range", type=str, default=None, help="Page range to convert, specify comma separated page numbers or ranges. Example: 0,5-10,20")(fn)
|
||||
fn = click.option("--images", is_flag=True, help="Save images of detected bboxes.", default=False)(fn)
|
||||
fn = click.option('--debug', '-d', is_flag=True, help='Enable debug mode.', default=False)(fn)
|
||||
fn = click.argument("input_path", type=click.Path(exists=True), required=True)(
|
||||
fn
|
||||
)
|
||||
fn = click.option(
|
||||
"--output_dir",
|
||||
type=click.Path(exists=False),
|
||||
required=False,
|
||||
default=os.path.join(settings.RESULT_DIR, "surya"),
|
||||
help="Directory to save output.",
|
||||
)(fn)
|
||||
fn = click.option(
|
||||
"--page_range",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Page range to convert, specify comma separated page numbers or ranges. Example: 0,5-10,20",
|
||||
)(fn)
|
||||
fn = click.option(
|
||||
"--images",
|
||||
is_flag=True,
|
||||
help="Save images of detected bboxes.",
|
||||
default=False,
|
||||
)(fn)
|
||||
fn = click.option(
|
||||
"--debug", "-d", is_flag=True, help="Enable debug mode.", default=False
|
||||
)(fn)
|
||||
fn = click.option(
|
||||
"--keep_server",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Keep the inference server (vllm/llama.cpp) running after this command exits so later commands reuse it instead of re-spawning.",
|
||||
)(fn)
|
||||
return fn
|
||||
|
||||
def load(self, highres: bool = False):
|
||||
@ -34,13 +64,16 @@ class CLILoader:
|
||||
images, names = load_from_folder(self.filepath, self.page_range)
|
||||
folder_name = os.path.basename(self.filepath)
|
||||
if highres:
|
||||
highres_images, _ = load_from_folder(self.filepath, self.page_range, settings.IMAGE_DPI_HIGHRES)
|
||||
highres_images, _ = load_from_folder(
|
||||
self.filepath, self.page_range, settings.IMAGE_DPI_HIGHRES
|
||||
)
|
||||
else:
|
||||
images, names = load_from_file(self.filepath, self.page_range)
|
||||
folder_name = os.path.basename(self.filepath).split(".")[0]
|
||||
if highres:
|
||||
highres_images, _ = load_from_file(self.filepath, self.page_range, settings.IMAGE_DPI_HIGHRES)
|
||||
|
||||
highres_images, _ = load_from_file(
|
||||
self.filepath, self.page_range, settings.IMAGE_DPI_HIGHRES
|
||||
)
|
||||
|
||||
self.images = images
|
||||
self.highres_images = highres_images
|
||||
@ -59,5 +92,7 @@ class CLILoader:
|
||||
page_lst += list(range(int(start), int(end) + 1))
|
||||
else:
|
||||
page_lst.append(int(i))
|
||||
page_lst = sorted(list(set(page_lst))) # Deduplicate page numbers and sort in order
|
||||
return page_lst
|
||||
page_lst = sorted(
|
||||
list(set(page_lst))
|
||||
) # Deduplicate page numbers and sort in order
|
||||
return page_lst
|
||||
|
||||
226
surya/scripts/screenshot_app.py
Normal file
226
surya/scripts/screenshot_app.py
Normal file
@ -0,0 +1,226 @@
|
||||
"""Screenshot-friendly Surya viewer.
|
||||
|
||||
Shows a PDF/image page on the left and full-page OCR output on the right, side
|
||||
by side, for clean screenshots. You can scroll through pages and preview them
|
||||
before running OCR, then export the side-by-side view as a PNG.
|
||||
|
||||
Run with `surya_screenshot`, then open http://localhost:8504.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
import uuid
|
||||
from typing import List, Optional
|
||||
|
||||
import pypdfium2
|
||||
from flask import Flask, jsonify, render_template, request
|
||||
from PIL import Image
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from surya.inference import SuryaInferenceManager
|
||||
from surya.logging import configure_logging, get_logger
|
||||
from surya.recognition import RecognitionPredictor
|
||||
from surya.recognition.schema import PageOCRResult
|
||||
from surya.settings import settings
|
||||
|
||||
configure_logging()
|
||||
logger = get_logger()
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
ALLOWED_EXT = {".pdf", ".png", ".jpg", ".jpeg", ".gif", ".webp"}
|
||||
UPLOAD_DIR = os.path.join(tempfile.gettempdir(), "surya_screenshot")
|
||||
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
||||
|
||||
_rec: Optional[RecognitionPredictor] = None
|
||||
|
||||
|
||||
def get_rec() -> RecognitionPredictor:
|
||||
"""Lazily build the recognition predictor (shared inference manager)."""
|
||||
global _rec
|
||||
if _rec is None:
|
||||
_rec = RecognitionPredictor(SuryaInferenceManager())
|
||||
return _rec
|
||||
|
||||
|
||||
# Datalab-flavored palette for layout block overlays, keyed by canonical label.
|
||||
LABEL_COLORS = {
|
||||
"Text": "#2563eb",
|
||||
"SectionHeader": "#0ea5e9",
|
||||
"PageHeader": "#7c3aed",
|
||||
"PageFooter": "#7c3aed",
|
||||
"Caption": "#c026d3",
|
||||
"Footnote": "#64748b",
|
||||
"Equation": "#9333ea",
|
||||
"Table": "#f59e0b",
|
||||
"TableOfContents": "#f59e0b",
|
||||
"Form": "#ea580c",
|
||||
"ListGroup": "#10b981",
|
||||
"Picture": "#db2777",
|
||||
"Figure": "#db2777",
|
||||
"Diagram": "#db2777",
|
||||
"Code": "#0d9488",
|
||||
"default": "#ef4444",
|
||||
}
|
||||
|
||||
|
||||
def _logo_data_url() -> str:
|
||||
path = os.path.join(settings.BASE_DIR, "static", "datalab-logo.png")
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
return "data:image/png;base64," + base64.b64encode(f.read()).decode()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _pil_to_data_url(img: Image.Image, fmt: str = "PNG") -> str:
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format=fmt)
|
||||
return (
|
||||
f"data:image/{fmt.lower()};base64," + base64.b64encode(buf.getvalue()).decode()
|
||||
)
|
||||
|
||||
|
||||
def _is_pdf(path: str) -> bool:
|
||||
return path.lower().endswith(".pdf")
|
||||
|
||||
|
||||
def _page_count(path: str) -> int:
|
||||
if _is_pdf(path):
|
||||
doc = pypdfium2.PdfDocument(path)
|
||||
n = len(doc)
|
||||
doc.close()
|
||||
return n
|
||||
return 1
|
||||
|
||||
|
||||
def _render_page(path: str, page: int, dpi: int) -> Image.Image:
|
||||
"""Render a 0-indexed page of a PDF (or load an image file) as RGB."""
|
||||
if _is_pdf(path):
|
||||
doc = pypdfium2.PdfDocument(path)
|
||||
try:
|
||||
pil = doc[page].render(scale=dpi / 72).to_pil().convert("RGB")
|
||||
finally:
|
||||
doc.close()
|
||||
return pil
|
||||
return Image.open(path).convert("RGB")
|
||||
|
||||
|
||||
def _assemble_page_html(page: PageOCRResult) -> str:
|
||||
"""Whole-page HTML from a PageOCRResult (math stays in <math> tags)."""
|
||||
parts: List[str] = []
|
||||
for blk in page.blocks:
|
||||
if blk.skipped:
|
||||
continue
|
||||
x0, y0, x1, y1 = (int(c) for c in blk.bbox)
|
||||
parts.append(
|
||||
f'<div data-bbox="{x0} {y0} {x1} {y1}" '
|
||||
f'data-label="{blk.label}">{blk.html or ""}</div>'
|
||||
)
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
return render_template("surya_screenshot.html", logo=_logo_data_url())
|
||||
|
||||
|
||||
@app.route("/info", methods=["POST"])
|
||||
def info():
|
||||
path = (request.json or {}).get("file_path", "").strip()
|
||||
if not path:
|
||||
return jsonify({"error": "file_path is required"}), 400
|
||||
if not os.path.exists(path):
|
||||
return jsonify({"error": f"File not found: {path}"}), 400
|
||||
try:
|
||||
return jsonify({"page_count": _page_count(path)})
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@app.route("/upload", methods=["POST"])
|
||||
def upload():
|
||||
"""Accept a drag/drop (or browsed) file, save to a temp path, return it."""
|
||||
f = request.files.get("file")
|
||||
if f is None or not f.filename:
|
||||
return jsonify({"error": "no file uploaded"}), 400
|
||||
ext = os.path.splitext(f.filename)[1].lower()
|
||||
if ext not in ALLOWED_EXT:
|
||||
return jsonify({"error": f"unsupported file type: {ext or '(none)'}"}), 400
|
||||
safe = secure_filename(f.filename) or f"upload{ext}"
|
||||
dest = os.path.join(UPLOAD_DIR, f"{uuid.uuid4().hex}_{safe}")
|
||||
f.save(dest)
|
||||
try:
|
||||
return jsonify(
|
||||
{"file_path": dest, "page_count": _page_count(dest), "name": f.filename}
|
||||
)
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@app.route("/page", methods=["POST"])
|
||||
def page():
|
||||
"""Render a single page for preview (no OCR)."""
|
||||
data = request.json or {}
|
||||
path = data.get("file_path", "").strip()
|
||||
page_num = int(data.get("page", 0))
|
||||
if not path or not os.path.exists(path):
|
||||
return jsonify({"error": "valid file_path is required"}), 400
|
||||
try:
|
||||
img = _render_page(path, page_num, settings.IMAGE_DPI_HIGHRES)
|
||||
return jsonify(
|
||||
{
|
||||
"image_base64": _pil_to_data_url(img),
|
||||
"width": img.size[0],
|
||||
"height": img.size[1],
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
@app.route("/process", methods=["POST"])
|
||||
def process():
|
||||
"""Run full-page OCR on one page; return the page image + OCR HTML + blocks."""
|
||||
data = request.json or {}
|
||||
path = data.get("file_path", "").strip()
|
||||
page_num = int(data.get("page", 0))
|
||||
if not path or not os.path.exists(path):
|
||||
return jsonify({"error": "valid file_path is required"}), 400
|
||||
try:
|
||||
img = _render_page(path, page_num, settings.IMAGE_DPI_HIGHRES)
|
||||
page_result = get_rec()([img], full_page=True)[0]
|
||||
blocks = [
|
||||
{
|
||||
"bbox": [int(c) for c in blk.bbox],
|
||||
"label": blk.label,
|
||||
"color": LABEL_COLORS.get(blk.label, LABEL_COLORS["default"]),
|
||||
}
|
||||
for blk in page_result.blocks
|
||||
if not blk.skipped
|
||||
]
|
||||
return jsonify(
|
||||
{
|
||||
"image_base64": _pil_to_data_url(img),
|
||||
"width": img.size[0],
|
||||
"height": img.size[1],
|
||||
"html": _assemble_page_html(page_result),
|
||||
"blocks": blocks,
|
||||
"n_blocks": len(page_result.blocks),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.exception("Full-page OCR failed")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
|
||||
def main():
|
||||
app.run(host="0.0.0.0", port=8504)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -4,12 +4,14 @@ inference manager. Detection + OCR-error stay in their own torch paths."""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import re
|
||||
import tempfile
|
||||
import time
|
||||
from typing import List
|
||||
|
||||
import pypdfium2
|
||||
import streamlit as st
|
||||
import streamlit.components.v1 as components
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
from surya.debug.draw import draw_polys_on_image, draw_bboxes_on_image
|
||||
@ -24,6 +26,61 @@ from surya.table_rec import TableRecPredictor
|
||||
from surya.table_rec.schema import TableResult
|
||||
|
||||
|
||||
# KaTeX-enabled HTML wrapper. The OCR HTML wraps math in <math>...</math>
|
||||
# (KaTeX-compatible LaTeX inside), which a browser would otherwise show as
|
||||
# raw text. We convert those tags to \( \) / \[ \] delimiters and let KaTeX
|
||||
# auto-render typeset them inside an iframe component.
|
||||
_KATEX_HEAD = r"""<!doctype html><html><head>
|
||||
<meta charset="utf-8">
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css">
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"></script>
|
||||
<style>
|
||||
/* White "paper" card so the text stays readable in both light and dark
|
||||
Streamlit themes (the iframe is otherwise transparent and our text is dark). */
|
||||
html,body{background:#ffffff;}
|
||||
body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;font-size:15px;line-height:1.55;color:#111111;margin:0;padding:14px;}
|
||||
table{border-collapse:collapse;margin:6px 0;} td,th{border:1px solid #bbb;padding:3px 6px;color:#111111;}
|
||||
[data-label="SectionHeader"],[data-label="PageHeader"]{font-weight:600;}
|
||||
</style></head><body>
|
||||
"""
|
||||
|
||||
_KATEX_TAIL = r"""
|
||||
<script>
|
||||
renderMathInElement(document.body, {
|
||||
delimiters: [
|
||||
{left: "\\[", right: "\\]", display: true},
|
||||
{left: "\\(", right: "\\)", display: false}
|
||||
],
|
||||
throwOnError: false
|
||||
});
|
||||
</script></body></html>
|
||||
"""
|
||||
|
||||
_MATH_RE = re.compile(r"<math\b([^>]*)>(.*?)</math>", re.DOTALL | re.IGNORECASE)
|
||||
|
||||
|
||||
def _math_to_katex(html_str: str) -> str:
|
||||
"""Rewrite <math>...</math> tags into KaTeX \\( \\) / \\[ \\] delimiters."""
|
||||
|
||||
def repl(m: "re.Match") -> str:
|
||||
attrs, inner = m.group(1), m.group(2)
|
||||
if re.search(r"""display\s*=\s*["']block["']""", attrs):
|
||||
return "\\[" + inner + "\\]"
|
||||
return "\\(" + inner + "\\)"
|
||||
|
||||
return _MATH_RE.sub(repl, html_str or "")
|
||||
|
||||
|
||||
def render_ocr_html(html_str: str, height: int = 400) -> None:
|
||||
"""Render OCR HTML with math typeset by KaTeX (iframe component)."""
|
||||
components.html(
|
||||
_KATEX_HEAD + _math_to_katex(html_str) + _KATEX_TAIL,
|
||||
height=height,
|
||||
scrolling=True,
|
||||
)
|
||||
|
||||
|
||||
def _assemble_page_html(page: PageOCRResult) -> str:
|
||||
"""Reconstruct a div-block whole-page HTML from a PageOCRResult."""
|
||||
parts: List[str] = []
|
||||
@ -334,7 +391,7 @@ if run_block_ocr:
|
||||
)
|
||||
full_html = _assemble_page_html(page)
|
||||
with st.expander("Full page HTML (rendered)", expanded=False):
|
||||
st.markdown(full_html, unsafe_allow_html=True)
|
||||
render_ocr_html(full_html, height=600)
|
||||
with st.expander("Full page HTML (source)", expanded=False):
|
||||
st.code(full_html, language="html")
|
||||
for blk in page.blocks:
|
||||
@ -366,6 +423,7 @@ if run_block_ocr:
|
||||
elif blk.error:
|
||||
st.error("Block OCR errored")
|
||||
else:
|
||||
render_ocr_html(blk.html, height=160)
|
||||
st.code(blk.html, language="html")
|
||||
|
||||
|
||||
@ -382,7 +440,7 @@ if run_full_page_ocr:
|
||||
)
|
||||
full_html = _assemble_page_html(page)
|
||||
with st.expander("Full page HTML (rendered)", expanded=False):
|
||||
st.markdown(full_html, unsafe_allow_html=True)
|
||||
render_ocr_html(full_html, height=600)
|
||||
with st.expander("Full page HTML (source)", expanded=False):
|
||||
st.code(full_html, language="html")
|
||||
for blk in page.blocks:
|
||||
@ -394,7 +452,7 @@ if run_full_page_ocr:
|
||||
elif blk.error:
|
||||
st.error("Block OCR errored")
|
||||
else:
|
||||
st.markdown(blk.html, unsafe_allow_html=True)
|
||||
render_ocr_html(blk.html, height=160)
|
||||
st.code(blk.html, language="html")
|
||||
|
||||
|
||||
@ -412,7 +470,7 @@ if run_table_rec:
|
||||
for pred in preds:
|
||||
if pred.mode == "full" and pred.html:
|
||||
with st.expander("Table HTML"):
|
||||
st.markdown(pred.html, unsafe_allow_html=True)
|
||||
render_ocr_html(pred.html, height=400)
|
||||
st.code(pred.html, language="html")
|
||||
else:
|
||||
st.json(pred.model_dump(), expanded=False)
|
||||
|
||||
331
surya/scripts/templates/surya_screenshot.html
Normal file
331
surya/scripts/templates/surya_screenshot.html
Normal file
@ -0,0 +1,331 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Surya · Full-Page OCR</title>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css">
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/html2canvas@1.4.1/dist/html2canvas.min.js"></script>
|
||||
<style>
|
||||
:root {
|
||||
--ink: #0f1115;
|
||||
--bg: #f4f6f9;
|
||||
--panel: #ffffff;
|
||||
--border: #e4e7ec;
|
||||
--accent: #2563eb;
|
||||
--accent-hover: #1d4ed8;
|
||||
--muted: #667085;
|
||||
}
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
background: var(--bg);
|
||||
color: var(--ink);
|
||||
height: 100vh;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
overflow: hidden;
|
||||
}
|
||||
header {
|
||||
display: flex; align-items: center; gap: 16px;
|
||||
padding: 12px 20px;
|
||||
background: var(--panel);
|
||||
border-bottom: 1px solid var(--border);
|
||||
box-shadow: 0 1px 3px rgba(16,24,40,.04);
|
||||
z-index: 10;
|
||||
}
|
||||
.brand { display: flex; align-items: center; gap: 10px; }
|
||||
.brand img { height: 30px; width: auto; }
|
||||
.brand .title { font-size: 18px; font-weight: 700; letter-spacing: -.01em; }
|
||||
.brand .sub { font-size: 13px; color: var(--muted); font-weight: 500;
|
||||
padding-left: 10px; margin-left: 4px; border-left: 1px solid var(--border); }
|
||||
.controls { display: flex; align-items: center; gap: 10px; flex: 1; flex-wrap: wrap; }
|
||||
input[type=text] {
|
||||
flex: 1; min-width: 260px; max-width: 560px;
|
||||
padding: 9px 12px; font-size: 14px;
|
||||
border: 1px solid var(--border); border-radius: 8px; background: #fff;
|
||||
}
|
||||
input[type=text]:focus { outline: none; border-color: var(--accent); box-shadow: 0 0 0 3px rgba(37,99,235,.12); }
|
||||
button {
|
||||
padding: 9px 14px; font-size: 14px; font-weight: 600;
|
||||
border: none; border-radius: 8px; cursor: pointer;
|
||||
background: var(--accent); color: #fff; transition: background .15s;
|
||||
}
|
||||
button:hover:not(:disabled) { background: var(--accent-hover); }
|
||||
button.secondary { background: #fff; color: var(--ink); border: 1px solid var(--border); }
|
||||
button.secondary:hover:not(:disabled) { background: #f2f4f7; }
|
||||
button:disabled { opacity: .45; cursor: not-allowed; }
|
||||
.pager { display: flex; align-items: center; gap: 6px; }
|
||||
.pager .pageind { font-size: 13px; color: var(--muted); min-width: 96px; text-align: center; }
|
||||
.pager button { padding: 7px 11px; }
|
||||
.toggle { display: flex; align-items: center; gap: 7px; font-size: 13px; color: var(--muted); cursor: pointer; user-select: none; }
|
||||
.status { font-size: 13px; font-weight: 600; min-width: 80px; }
|
||||
.status.loading { color: #b45309; }
|
||||
.status.error { color: #d92d20; }
|
||||
.status.ok { color: #079455; }
|
||||
|
||||
.stage {
|
||||
flex: 1; display: flex; gap: 16px; padding: 16px; overflow: hidden;
|
||||
}
|
||||
.panel {
|
||||
flex: 1; min-width: 0; display: flex; flex-direction: column;
|
||||
background: var(--panel); border: 1px solid var(--border);
|
||||
border-radius: 12px; overflow: hidden;
|
||||
box-shadow: 0 1px 2px rgba(16,24,40,.05);
|
||||
}
|
||||
.panel-head {
|
||||
padding: 11px 16px; font-size: 13px; font-weight: 700;
|
||||
letter-spacing: .02em; text-transform: uppercase; color: var(--muted);
|
||||
border-bottom: 1px solid var(--border); background: #fcfcfd;
|
||||
}
|
||||
.panel-body { flex: 1; overflow: auto; }
|
||||
.img-wrap {
|
||||
display: flex; align-items: flex-start; justify-content: center;
|
||||
padding: 16px; background: #f0f2f5; min-height: 100%;
|
||||
}
|
||||
#pageCanvas { max-width: 100%; height: auto; border-radius: 6px;
|
||||
box-shadow: 0 2px 10px rgba(16,24,40,.12); background: #fff; }
|
||||
.ocr {
|
||||
padding: 28px 32px; line-height: 1.6; font-size: 16px; color: #1d2433;
|
||||
}
|
||||
.ocr [data-label="SectionHeader"], .ocr [data-label="Title"] { font-weight: 700; font-size: 1.15em; margin: .5em 0 .3em; }
|
||||
.ocr [data-label="PageHeader"], .ocr [data-label="PageFooter"] { color: var(--muted); font-size: .9em; }
|
||||
.ocr table { border-collapse: collapse; margin: 14px 0; width: 100%; }
|
||||
.ocr th, .ocr td { border: 1px solid #d0d5dd; padding: 6px 10px; text-align: left; }
|
||||
.ocr th { background: #f2f4f7; font-weight: 600; }
|
||||
.ocr img { max-width: 100%; height: auto; }
|
||||
.placeholder { padding: 48px 32px; color: var(--muted); font-size: 15px; text-align: center; }
|
||||
#dropOverlay { position: fixed; inset: 0; z-index: 100; display: none;
|
||||
align-items: center; justify-content: center; background: rgba(15,17,21,.55); }
|
||||
#dropOverlay.active { display: flex; }
|
||||
#dropOverlay .drop-card { padding: 40px 64px; border: 3px dashed #fff;
|
||||
border-radius: 16px; color: #fff; font-size: 22px; font-weight: 700;
|
||||
background: rgba(37,99,235,.30); }
|
||||
/* While screenshotting, expand panels to full content height so
|
||||
html2canvas captures everything, not just the visible scroll area. */
|
||||
body.capturing { height: auto !important; overflow: visible !important; }
|
||||
body.capturing .stage { height: auto !important; overflow: visible !important; }
|
||||
body.capturing .panel-body { height: auto !important; overflow: visible !important; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<div class="brand">
|
||||
{% if logo %}<img src="{{ logo }}" alt="Datalab">{% endif %}
|
||||
<span class="title">Surya</span>
|
||||
<span class="sub">Full-Page OCR</span>
|
||||
</div>
|
||||
<div class="controls">
|
||||
<input type="text" id="filePath" placeholder="Drop a file, Browse, or type a server path…">
|
||||
<input type="file" id="fileInput" accept=".pdf,.png,.jpg,.jpeg,.gif,.webp" style="display:none" onchange="onPick(this)">
|
||||
<button class="secondary" onclick="document.getElementById('fileInput').click()">Browse</button>
|
||||
<button class="secondary" id="loadBtn" onclick="loadFile()">Load</button>
|
||||
<div class="pager">
|
||||
<button class="secondary" id="prevBtn" onclick="changePage(-1)" disabled>◀</button>
|
||||
<span class="pageind" id="pageInd">—</span>
|
||||
<button class="secondary" id="nextBtn" onclick="changePage(1)" disabled>▶</button>
|
||||
</div>
|
||||
<button id="runBtn" onclick="runOCR()" disabled>Run Full-Page OCR</button>
|
||||
<label class="toggle"><input type="checkbox" id="showBoxes" checked onchange="drawLeft()"> Layout boxes</label>
|
||||
<button class="secondary" id="copyBtn" onclick="copyHtml()" disabled>Copy HTML</button>
|
||||
<button class="secondary" id="shotBtn" onclick="saveScreenshot()" disabled>Save Screenshot</button>
|
||||
<span class="status" id="status"></span>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div class="stage">
|
||||
<div class="panel">
|
||||
<div class="panel-head">PDF Page</div>
|
||||
<div class="panel-body"><div class="img-wrap"><canvas id="pageCanvas"></canvas></div></div>
|
||||
</div>
|
||||
<div class="panel">
|
||||
<div class="panel-head">Full-Page OCR</div>
|
||||
<div class="panel-body"><div class="ocr" id="ocr"><div class="placeholder">Load a file, scroll to a page, then run full-page OCR.</div></div></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="dropOverlay"><div class="drop-card">Drop a PDF or image to load</div></div>
|
||||
|
||||
<script>
|
||||
const S = { path: "", name: "", page: 0, count: 0, img: null, blocks: null, html: null, ocrPage: null };
|
||||
|
||||
const $ = (id) => document.getElementById(id);
|
||||
function setStatus(msg, kind) { const s = $("status"); s.textContent = msg || ""; s.className = "status " + (kind || ""); }
|
||||
|
||||
async function post(url, body) {
|
||||
const r = await fetch(url, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) });
|
||||
const data = await r.json();
|
||||
if (!r.ok) throw new Error(data.error || "Request failed");
|
||||
return data;
|
||||
}
|
||||
|
||||
async function loadFile() {
|
||||
const val = $("filePath").value.trim();
|
||||
if (!val) { setStatus("Drop a file or enter a path", "error"); return; }
|
||||
// If the box still shows the name of an uploaded file, just reload it.
|
||||
if (S.path && val === S.name) { S.page = 0; await showPage(); return; }
|
||||
setStatus("Loading…", "loading");
|
||||
try {
|
||||
const info = await post("/info", { file_path: val });
|
||||
S.path = val; S.name = val; S.count = info.page_count; S.page = 0;
|
||||
await showPage();
|
||||
setStatus("");
|
||||
} catch (e) { setStatus(e.message, "error"); }
|
||||
}
|
||||
|
||||
function onPick(input) { if (input.files && input.files[0]) uploadFile(input.files[0]); }
|
||||
|
||||
async function uploadFile(file) {
|
||||
setStatus("Uploading…", "loading");
|
||||
const fd = new FormData(); fd.append("file", file);
|
||||
try {
|
||||
const r = await fetch("/upload", { method: "POST", body: fd });
|
||||
const data = await r.json();
|
||||
if (!r.ok) throw new Error(data.error || "Upload failed");
|
||||
S.path = data.file_path; S.name = data.name; S.count = data.page_count; S.page = 0;
|
||||
$("filePath").value = data.name;
|
||||
await showPage();
|
||||
setStatus("");
|
||||
} catch (e) { setStatus(e.message, "error"); }
|
||||
}
|
||||
|
||||
async function showPage() {
|
||||
setStatus("Rendering…", "loading");
|
||||
try {
|
||||
const data = await post("/page", { file_path: S.path, page: S.page });
|
||||
// New page → clear any previous OCR output.
|
||||
S.blocks = null; S.html = null; S.ocrPage = null;
|
||||
$("ocr").innerHTML = '<div class="placeholder">Run full-page OCR to see the extracted content.</div>';
|
||||
$("shotBtn").disabled = true;
|
||||
$("copyBtn").disabled = true;
|
||||
loadImage(data.image_base64, () => drawLeft());
|
||||
updatePager();
|
||||
$("runBtn").disabled = false;
|
||||
setStatus("");
|
||||
} catch (e) { setStatus(e.message, "error"); }
|
||||
}
|
||||
|
||||
function loadImage(src, cb) {
|
||||
const im = new Image();
|
||||
im.onload = () => { S.img = im; cb && cb(); };
|
||||
im.src = src;
|
||||
}
|
||||
|
||||
function updatePager() {
|
||||
$("pageInd").textContent = S.count ? `Page ${S.page + 1} of ${S.count}` : "—";
|
||||
$("prevBtn").disabled = S.page <= 0;
|
||||
$("nextBtn").disabled = S.page >= S.count - 1;
|
||||
}
|
||||
|
||||
function changePage(delta) {
|
||||
const next = S.page + delta;
|
||||
if (next < 0 || next >= S.count) return;
|
||||
S.page = next;
|
||||
showPage();
|
||||
}
|
||||
|
||||
function drawLeft() {
|
||||
if (!S.img) return;
|
||||
const c = $("pageCanvas"), ctx = c.getContext("2d");
|
||||
c.width = S.img.naturalWidth; c.height = S.img.naturalHeight;
|
||||
ctx.drawImage(S.img, 0, 0);
|
||||
if (!S.blocks || !$("showBoxes").checked) return;
|
||||
ctx.lineWidth = 3;
|
||||
ctx.font = 'bold 15px -apple-system, "Segoe UI", sans-serif';
|
||||
ctx.textBaseline = "top";
|
||||
S.blocks.forEach((b) => {
|
||||
const [x1, y1, x2, y2] = b.bbox;
|
||||
ctx.strokeStyle = b.color; ctx.fillStyle = b.color + "26";
|
||||
ctx.fillRect(x1, y1, x2 - x1, y2 - y1);
|
||||
ctx.strokeRect(x1, y1, x2 - x1, y2 - y1);
|
||||
const tw = ctx.measureText(b.label).width, ly = Math.max(y1 - 22, 0);
|
||||
ctx.fillStyle = b.color; ctx.fillRect(x1, ly, tw + 12, 21);
|
||||
ctx.fillStyle = "#fff"; ctx.fillText(b.label, x1 + 6, ly + 3);
|
||||
});
|
||||
}
|
||||
|
||||
async function runOCR() {
|
||||
if (!S.path) return;
|
||||
setStatus("Running OCR…", "loading");
|
||||
$("runBtn").disabled = true;
|
||||
try {
|
||||
const data = await post("/process", { file_path: S.path, page: S.page });
|
||||
S.blocks = data.blocks; S.html = data.html || ""; S.ocrPage = S.page;
|
||||
loadImage(data.image_base64, () => drawLeft());
|
||||
const ocr = $("ocr");
|
||||
ocr.innerHTML = data.html || '<div class="placeholder">No content detected on this page.</div>';
|
||||
renderMath(ocr);
|
||||
$("shotBtn").disabled = false;
|
||||
$("copyBtn").disabled = !S.html;
|
||||
setStatus(`${data.n_blocks} blocks`, "ok");
|
||||
} catch (e) { setStatus(e.message, "error"); }
|
||||
finally { $("runBtn").disabled = false; }
|
||||
}
|
||||
|
||||
function renderMath(root) {
|
||||
root.querySelectorAll("math").forEach((el) => {
|
||||
const block = el.getAttribute("display") === "block";
|
||||
try {
|
||||
const span = document.createElement(block ? "div" : "span");
|
||||
span.innerHTML = katex.renderToString(el.textContent, { displayMode: block, throwOnError: false });
|
||||
el.replaceWith(span);
|
||||
} catch (e) { /* leave raw on failure */ }
|
||||
});
|
||||
}
|
||||
|
||||
function copyHtml() {
|
||||
if (!S.html) return;
|
||||
const done = () => setStatus("HTML copied", "ok");
|
||||
if (navigator.clipboard && navigator.clipboard.writeText) {
|
||||
navigator.clipboard.writeText(S.html).then(done).catch(() => fallbackCopy(S.html, done));
|
||||
} else {
|
||||
fallbackCopy(S.html, done);
|
||||
}
|
||||
}
|
||||
|
||||
function fallbackCopy(text, done) {
|
||||
const ta = document.createElement("textarea");
|
||||
ta.value = text; ta.style.position = "fixed"; ta.style.opacity = "0";
|
||||
document.body.appendChild(ta); ta.select();
|
||||
try { document.execCommand("copy"); done(); }
|
||||
catch (e) { setStatus("Copy failed", "error"); }
|
||||
document.body.removeChild(ta);
|
||||
}
|
||||
|
||||
async function saveScreenshot() {
|
||||
setStatus("Capturing…", "loading");
|
||||
const stage = document.querySelector(".stage");
|
||||
document.body.classList.add("capturing");
|
||||
try {
|
||||
// Let the expanded layout settle before measuring full size.
|
||||
await new Promise((r) => requestAnimationFrame(() => requestAnimationFrame(r)));
|
||||
const w = stage.scrollWidth, h = stage.scrollHeight;
|
||||
const canvas = await html2canvas(stage, {
|
||||
backgroundColor: "#f4f6f9", scale: 2, useCORS: true, logging: false,
|
||||
width: w, height: h, windowWidth: w, windowHeight: h, scrollX: 0, scrollY: 0,
|
||||
});
|
||||
const a = document.createElement("a");
|
||||
a.href = canvas.toDataURL("image/png");
|
||||
a.download = `surya_ocr_page_${(S.ocrPage ?? S.page) + 1}.png`;
|
||||
a.click();
|
||||
setStatus("Saved", "ok");
|
||||
} catch (e) { setStatus("Screenshot failed: " + e.message, "error"); }
|
||||
finally { document.body.classList.remove("capturing"); }
|
||||
}
|
||||
|
||||
$("filePath").addEventListener("keypress", (e) => { if (e.key === "Enter") loadFile(); });
|
||||
|
||||
// Drag & drop anywhere in the window.
|
||||
let dragDepth = 0;
|
||||
window.addEventListener("dragenter", (e) => { e.preventDefault(); dragDepth++; $("dropOverlay").classList.add("active"); });
|
||||
window.addEventListener("dragover", (e) => { e.preventDefault(); });
|
||||
window.addEventListener("dragleave", (e) => { e.preventDefault(); if (--dragDepth <= 0) { dragDepth = 0; $("dropOverlay").classList.remove("active"); } });
|
||||
window.addEventListener("drop", (e) => {
|
||||
e.preventDefault(); dragDepth = 0; $("dropOverlay").classList.remove("active");
|
||||
const f = e.dataTransfer.files && e.dataTransfer.files[0];
|
||||
if (f) uploadFile(f);
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@ -49,6 +49,10 @@ class Settings(BaseSettings):
|
||||
SURYA_INFERENCE_BACKEND: Optional[str] = None # "vllm" | "llamacpp" | None (auto)
|
||||
SURYA_INFERENCE_URL: Optional[str] = None # external server, skip spawn
|
||||
SURYA_INFERENCE_AUTOSTART: bool = True
|
||||
# Leave an auto-spawned server running after the process exits so later
|
||||
# commands attach to it instead of re-spawning (avoids repeated startup /
|
||||
# model-load cost). Stop it manually when done — see `surya/inference`.
|
||||
SURYA_INFERENCE_KEEP_ALIVE: bool = False
|
||||
SURYA_INFERENCE_HOST: str = "127.0.0.1"
|
||||
SURYA_INFERENCE_PORT: Optional[int] = None # None = pick a free port
|
||||
SURYA_INFERENCE_PARALLEL: int = 8
|
||||
|
||||
31
uv.lock
31
uv.lock
@ -823,6 +823,23 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/18/79/1b8fa1bb3568781e84c9200f951c735f3f157429f44be0495da55894d620/filetype-1.2.0-py2.py3-none-any.whl", hash = "sha256:7ce71b6880181241cf7ac8697a2f1eb6a8bd9b429f7ad6d27b8db9ba5f1c2d25", size = 19970 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flask"
|
||||
version = "3.1.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "blinker" },
|
||||
{ name = "click" },
|
||||
{ name = "itsdangerous" },
|
||||
{ name = "jinja2" },
|
||||
{ name = "markupsafe" },
|
||||
{ name = "werkzeug" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/26/00/35d85dcce6c57fdc871f3867d465d780f302a175ea360f62533f12b27e2b/flask-3.1.3.tar.gz", hash = "sha256:0ef0e52b8a9cd932855379197dd8f94047b359ca0a78695144304cb45f87c9eb", size = 759004 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/7f/9c/34f6962f9b9e9c71f6e5ed806e0d0ff03c9d1b0b2340088a0cf4bce09b18/flask-3.1.3-py3-none-any.whl", hash = "sha256:f4bcbefc124291925f1a26446da31a5178f9483862233b23c0c96a20701f670c", size = 103424 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fqdn"
|
||||
version = "1.5.1"
|
||||
@ -3944,6 +3961,7 @@ dependencies = [
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
{ name = "datasets" },
|
||||
{ name = "flask" },
|
||||
{ name = "jupyter" },
|
||||
{ name = "pdftext" },
|
||||
{ name = "pre-commit" },
|
||||
@ -3976,6 +3994,7 @@ requires-dist = [
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
{ name = "datasets", specifier = ">=2.16.1" },
|
||||
{ name = "flask", specifier = ">=3.0.0" },
|
||||
{ name = "jupyter", specifier = ">=1.0.0" },
|
||||
{ name = "pdftext", specifier = ">=0.5.1" },
|
||||
{ name = "pre-commit", specifier = ">=4.2.0" },
|
||||
@ -4468,6 +4487,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/6f/28/258ebab549c2bf3e64d2b0217b973467394a9cea8c42f70418ca2c5d0d2e/websockets-16.0-py3-none-any.whl", hash = "sha256:1637db62fad1dc833276dded54215f2c7fa46912301a24bd94d45d46a011ceec", size = 171598 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "werkzeug"
|
||||
version = "3.1.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "markupsafe" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/dd/b2/381be8cfdee792dd117872481b6e378f85c957dd7c5bca38897b08f765fd/werkzeug-3.1.8.tar.gz", hash = "sha256:9bad61a4268dac112f1c5cd4630a56ede601b6ed420300677a869083d70a4c44", size = 875852 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/93/8c/2e650f2afeb7ee576912636c23ddb621c91ac6a98e66dc8d29c3c69446e1/werkzeug-3.1.8-py3-none-any.whl", hash = "sha256:63a77fb8892bf28ebc3178683445222aa500e48ebad5ec77b0ad80f8726b1f50", size = 226459 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "widgetsnbextension"
|
||||
version = "4.0.15"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user