Scrapegraph-ai/pyproject.toml

[project]
name = "scrapegraphai"

version = "1.19.0b1"

description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
authors = [
    { name = "Marco Vinciguerra", email = "mvincig11@gmail.com" },
    { name = "Marco Perini", email = "perinim.98@gmail.com" },
    { name = "Lorenzo Padoan", email = "lorenzo.padoan977@gmail.com" }
]

dependencies = [
    "langchain>=0.2.14",
    "langchain-google-genai>=1.0.7",
    "langchain-openai>=0.1.22",
    "langchain-mistralai>=0.1.12",
    "langchain_community>=0.2.9",
    "langchain-aws>=0.1.3",
    "html2text>=2024.2.26",
    "faiss-cpu>=1.8.0",
    "beautifulsoup4>=4.12.3",
    "pandas>=2.2.2",
    "python-dotenv>=1.0.1",
    "tiktoken>=0.7",
    "tqdm>=4.66.4",
    "minify-html>=0.15.0",
    "free-proxy>=1.1.1",
    "playwright>=1.43.0",
    "undetected-playwright>=0.3.0",
    "google>=3.0.0",
    "semchunk>=1.0.1",
    "langchain-ollama>=0.1.3",
]

license = "MIT"
readme = "README.md"
homepage = "https://scrapegraphai.com/"
repository = "https://github.com/VinciGit00/Scrapegraph-ai"
documentation = "https://scrapegraph-ai.readthedocs.io/en/latest/"
keywords = [
    "scrapegraph",
    "scrapegraphai",
    "langchain",
    "ai",
    "artificial intelligence",
    "gpt",
    "machine learning",
    "rag",
    "nlp",
    "natural language processing",
    "openai",
    "scraping",
    "web scraping",
    "web scraping library",
    "web scraping tool",
    "webscraping",
    "graph",
]
classifiers = [
    "Intended Audience :: Developers",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Programming Language :: Python :: 3",
    "Operating System :: OS Independent",
]
requires-python = ">=3.9,<4.0"

[project.optional-dependencies]
burr = ["burr[start]==0.22.1"]
docs = ["sphinx==6.0", "furo==2024.5.6"]

# Group 1: Other Language Models
other-language-models = [
    "langchain-google-vertexai>=1.0.7",
    "langchain-fireworks>=0.1.3",
    "langchain-groq>=0.1.3",
    "langchain-anthropic>=0.1.11",
    "langchain-huggingface>=0.0.3",
    "langchain-nvidia-ai-endpoints>=0.1.6",
    "langchain_together>=1.2.9"
]

# Group 2: More Semantic Options
more-semantic-options = [
    "graphviz>=0.20.3",
]

# Group 3: More Browser Options
more-browser-options = [
    "browserbase>=0.3.0",
]

# Group 4: Surya Library
screenshot_scraper = [
    "surya-ocr>=0.5.0; python_version >= '3.10'",
    "matplotlib>=3.7.2; python_version >= '3.10'",
    "ipywidgets>=8.1.0; python_version >= '3.10'",
    "pillow>=10.4.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.rye]
managed = true
dev-dependencies = [
    "pytest==8.0.0",
    "pytest-mock==3.14.0",
    "-e file:.[burr]",
    "-e file:.[docs]",
    "pylint>=3.2.5",
]

[tool.rye.scripts]
pylint-local = "pylint scrapegraphai/**/*.py"
pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py"
update-requirements = "python 'manual deployment/autorequirements.py'"