chore(package manager)!: move from poetry to rye

closes #198
This commit is contained in:
Federico Aguzzi 2024-05-15 13:42:58 +02:00
parent 78d1940235
commit 8fc2510b37
7 changed files with 664 additions and 3400 deletions

View File

@ -14,11 +14,8 @@ jobs:
run: |
sudo apt update
sudo apt install -y git
- name: Install Python Env and Poetry
uses: actions/setup-python@v5
with:
python-version: '3.9'
- run: pip install poetry
- name: Install the latest version of rye
uses: eifinger/setup-rye@v3
- name: Install Node Env
uses: actions/setup-node@v4
with:
@ -30,8 +27,8 @@ jobs:
persist-credentials: false
- name: Build app
run: |
poetry install
poetry build
rye sync --no-lock
rye build
id: build_cache
if: success()
- name: Cache build

1
.python-version Normal file
View File

@ -0,0 +1 @@
3.9.19

View File

@ -1,15 +1,12 @@
#!/bin/bash
cd ..
poetry update
rye self update
# Install dependencies using Poetry
poetry install
# Check for any potential issues in the project
poetry check
rye sync
# Build the project
poetry build
rye build
# Publish the project to PyPI
poetry publish
rye publish

3347
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,37 @@
[tool.poetry]
[project]
name = "scrapegraphai"
version = "0.11.1"
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
authors = [
"Marco Vinciguerra <mvincig11@gmail.com>",
"Marco Perini <perinim.98@gmail.com>",
"Lorenzo Padoan <lorenzo.padoan977@gmail.com>",
{ name = "Marco Vinciguerra", email = "mvincig11@gmail.com" },
{ name = "Marco Perini", email = "perinim.98@gmail.com" },
{ name = "Lorenzo Padoan", email = "lorenzo.padoan977@gmail.com" }
]
dependencies = [
# python = ">=3.9, <3.12"
"langchain==0.1.15",
"langchain-openai==0.1.6",
"langchain-google-genai==1.0.3",
"langchain-groq==0.1.3",
"langchain-aws==0.1.3",
"langchain-anthropic==0.1.11",
"html2text==2024.2.26",
"faiss-cpu==1.8.0",
"beautifulsoup4==4.12.3",
"pandas==2.2.2",
"python-dotenv==1.0.1",
"tiktoken==0.6.0",
"tqdm==4.66.4",
"graphviz==0.20.3",
"minify-html==0.15.0",
"free-proxy==1.1.1",
"playwright==1.43.0",
"google==3.0.0",
"yahoo-search-py==0.3",
]
license = "MIT"
readme = "README.md"
homepage = "https://scrapegraph-ai.readthedocs.io/"
@ -39,40 +62,22 @@ classifiers = [
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
]
[tool.poetry.dependencies]
python = ">=3.9, <3.12"
langchain = "0.1.15"
langchain-openai = "0.1.6"
langchain-google-genai = "1.0.3"
langchain-groq = "0.1.3"
langchain-aws = "0.1.3"
langchain-anthropic = "0.1.11"
html2text = "2024.2.26"
faiss-cpu = "1.8.0"
beautifulsoup4 = "4.12.3"
pandas = "2.2.2"
python-dotenv = "1.0.1"
tiktoken = "0.6.0"
tqdm = "4.66.4"
graphviz = "0.20.3"
minify-html = "0.15.0"
free-proxy = "1.1.1"
playwright = "1.43.0"
google = "3.0.0"
yahoo-search-py = "0.3"
[tool.poetry.dev-dependencies]
pytest = "8.0.0"
pytest-mock = "3.14.0"
[tool.poetry.group.docs]
optional = true
[tool.poetry.group.docs.dependencies]
sphinx = "7.1.2"
sphinx-rtd-theme = "2.0.0"
requires-python = ">= 3.9"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.rye]
managed = true
dev-dependencies = [
"pytest==8.0.0",
"pytest-mock==3.14.0"
]
[tool.rye.group.docs]
optional = true
[tool.rye.group.docs.dependencies]
sphinx = "7.1.2"
sphinx-rtd-theme = "2.0.0"

311
requirements-dev.lock Normal file
View File

@ -0,0 +1,311 @@
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
-e file:.
aiohttp==3.9.5
# via langchain
# via langchain-community
aiosignal==1.3.1
# via aiohttp
annotated-types==0.6.0
# via pydantic
anthropic==0.25.9
# via langchain-anthropic
anyio==4.3.0
# via anthropic
# via groq
# via httpx
# via openai
async-timeout==4.0.3
# via aiohttp
# via langchain
attrs==23.2.0
# via aiohttp
beautifulsoup4==4.12.3
# via google
# via scrapegraphai
boto3==1.34.105
# via langchain-aws
botocore==1.34.105
# via boto3
# via s3transfer
cachetools==5.3.3
# via google-auth
certifi==2024.2.2
# via httpcore
# via httpx
# via requests
charset-normalizer==3.3.2
# via requests
dataclasses-json==0.6.6
# via langchain
# via langchain-community
defusedxml==0.7.1
# via langchain-anthropic
distro==1.9.0
# via anthropic
# via groq
# via openai
exceptiongroup==1.2.1
# via anyio
# via pytest
faiss-cpu==1.8.0
# via scrapegraphai
filelock==3.14.0
# via huggingface-hub
free-proxy==1.1.1
# via scrapegraphai
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.3.1
# via huggingface-hub
google==3.0.0
# via scrapegraphai
google-ai-generativelanguage==0.6.3
# via google-generativeai
google-api-core==2.19.0
# via google-ai-generativelanguage
# via google-api-python-client
# via google-generativeai
google-api-python-client==2.129.0
# via google-generativeai
google-auth==2.29.0
# via google-ai-generativelanguage
# via google-api-core
# via google-api-python-client
# via google-auth-httplib2
# via google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.3
# via langchain-google-genai
googleapis-common-protos==1.63.0
# via google-api-core
# via grpcio-status
graphviz==0.20.3
# via scrapegraphai
greenlet==3.0.3
# via playwright
# via sqlalchemy
groq==0.5.0
# via langchain-groq
grpcio==1.63.0
# via google-api-core
# via grpcio-status
grpcio-status==1.62.2
# via google-api-core
h11==0.14.0
# via httpcore
html2text==2024.2.26
# via scrapegraphai
httpcore==1.0.5
# via httpx
httplib2==0.22.0
# via google-api-python-client
# via google-auth-httplib2
httpx==0.27.0
# via anthropic
# via groq
# via openai
# via yahoo-search-py
huggingface-hub==0.23.0
# via tokenizers
idna==3.7
# via anyio
# via httpx
# via requests
# via yarl
iniconfig==2.0.0
# via pytest
jmespath==1.0.1
# via boto3
# via botocore
jsonpatch==1.33
# via langchain
# via langchain-core
jsonpointer==2.4
# via jsonpatch
langchain==0.1.15
# via scrapegraphai
langchain-anthropic==0.1.11
# via scrapegraphai
langchain-aws==0.1.3
# via scrapegraphai
langchain-community==0.0.38
# via langchain
langchain-core==0.1.52
# via langchain
# via langchain-anthropic
# via langchain-aws
# via langchain-community
# via langchain-google-genai
# via langchain-groq
# via langchain-openai
# via langchain-text-splitters
langchain-google-genai==1.0.3
# via scrapegraphai
langchain-groq==0.1.3
# via scrapegraphai
langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.1
# via langchain
langsmith==0.1.57
# via langchain
# via langchain-community
# via langchain-core
lxml==5.2.2
# via free-proxy
marshmallow==3.21.2
# via dataclasses-json
minify-html==0.15.0
# via scrapegraphai
multidict==6.0.5
# via aiohttp
# via yarl
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via faiss-cpu
# via langchain
# via langchain-aws
# via langchain-community
# via pandas
openai==1.30.1
# via langchain-openai
orjson==3.10.3
# via langsmith
packaging==23.2
# via huggingface-hub
# via langchain-core
# via marshmallow
# via pytest
pandas==2.2.2
# via scrapegraphai
playwright==1.43.0
# via scrapegraphai
pluggy==1.5.0
# via pytest
proto-plus==1.23.0
# via google-ai-generativelanguage
# via google-api-core
protobuf==4.25.3
# via google-ai-generativelanguage
# via google-api-core
# via google-generativeai
# via googleapis-common-protos
# via grpcio-status
# via proto-plus
pyasn1==0.6.0
# via pyasn1-modules
# via rsa
pyasn1-modules==0.4.0
# via google-auth
pydantic==2.7.1
# via anthropic
# via google-generativeai
# via groq
# via langchain
# via langchain-core
# via langsmith
# via openai
# via yahoo-search-py
pydantic-core==2.18.2
# via pydantic
pyee==11.1.0
# via playwright
pyparsing==3.1.2
# via httplib2
pytest==8.0.0
# via pytest-mock
pytest-mock==3.14.0
python-dateutil==2.9.0.post0
# via botocore
# via pandas
python-dotenv==1.0.1
# via scrapegraphai
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via huggingface-hub
# via langchain
# via langchain-community
# via langchain-core
regex==2024.5.10
# via tiktoken
requests==2.31.0
# via free-proxy
# via google-api-core
# via huggingface-hub
# via langchain
# via langchain-community
# via langsmith
# via tiktoken
rsa==4.9
# via google-auth
s3transfer==0.10.1
# via boto3
selectolax==0.3.21
# via yahoo-search-py
six==1.16.0
# via python-dateutil
sniffio==1.3.1
# via anthropic
# via anyio
# via groq
# via httpx
# via openai
soupsieve==2.5
# via beautifulsoup4
sqlalchemy==2.0.30
# via langchain
# via langchain-community
tenacity==8.3.0
# via langchain
# via langchain-community
# via langchain-core
tiktoken==0.6.0
# via langchain-openai
# via scrapegraphai
tokenizers==0.19.1
# via anthropic
tomli==2.0.1
# via pytest
tqdm==4.66.4
# via google-generativeai
# via huggingface-hub
# via openai
# via scrapegraphai
typing-extensions==4.11.0
# via anthropic
# via anyio
# via google-generativeai
# via groq
# via huggingface-hub
# via openai
# via pydantic
# via pydantic-core
# via pyee
# via sqlalchemy
# via typing-inspect
typing-inspect==0.9.0
# via dataclasses-json
tzdata==2024.1
# via pandas
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
# via botocore
# via requests
# via yahoo-search-py
yahoo-search-py==0.3
# via scrapegraphai
yarl==1.9.4
# via aiohttp

300
requirements.lock Normal file
View File

@ -0,0 +1,300 @@
# generated by rye
# use `rye lock` or `rye sync` to update this lockfile
#
# last locked with the following flags:
# pre: false
# features: []
# all-features: false
# with-sources: false
-e file:.
aiohttp==3.9.5
# via langchain
# via langchain-community
aiosignal==1.3.1
# via aiohttp
annotated-types==0.6.0
# via pydantic
anthropic==0.25.9
# via langchain-anthropic
anyio==4.3.0
# via anthropic
# via groq
# via httpx
# via openai
async-timeout==4.0.3
# via aiohttp
# via langchain
attrs==23.2.0
# via aiohttp
beautifulsoup4==4.12.3
# via google
# via scrapegraphai
boto3==1.34.105
# via langchain-aws
botocore==1.34.105
# via boto3
# via s3transfer
cachetools==5.3.3
# via google-auth
certifi==2024.2.2
# via httpcore
# via httpx
# via requests
charset-normalizer==3.3.2
# via requests
dataclasses-json==0.6.6
# via langchain
# via langchain-community
defusedxml==0.7.1
# via langchain-anthropic
distro==1.9.0
# via anthropic
# via groq
# via openai
exceptiongroup==1.2.1
# via anyio
faiss-cpu==1.8.0
# via scrapegraphai
filelock==3.14.0
# via huggingface-hub
free-proxy==1.1.1
# via scrapegraphai
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.3.1
# via huggingface-hub
google==3.0.0
# via scrapegraphai
google-ai-generativelanguage==0.6.3
# via google-generativeai
google-api-core==2.19.0
# via google-ai-generativelanguage
# via google-api-python-client
# via google-generativeai
google-api-python-client==2.129.0
# via google-generativeai
google-auth==2.29.0
# via google-ai-generativelanguage
# via google-api-core
# via google-api-python-client
# via google-auth-httplib2
# via google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.3
# via langchain-google-genai
googleapis-common-protos==1.63.0
# via google-api-core
# via grpcio-status
graphviz==0.20.3
# via scrapegraphai
greenlet==3.0.3
# via playwright
# via sqlalchemy
groq==0.5.0
# via langchain-groq
grpcio==1.63.0
# via google-api-core
# via grpcio-status
grpcio-status==1.62.2
# via google-api-core
h11==0.14.0
# via httpcore
html2text==2024.2.26
# via scrapegraphai
httpcore==1.0.5
# via httpx
httplib2==0.22.0
# via google-api-python-client
# via google-auth-httplib2
httpx==0.27.0
# via anthropic
# via groq
# via openai
# via yahoo-search-py
huggingface-hub==0.23.0
# via tokenizers
idna==3.7
# via anyio
# via httpx
# via requests
# via yarl
jmespath==1.0.1
# via boto3
# via botocore
jsonpatch==1.33
# via langchain
# via langchain-core
jsonpointer==2.4
# via jsonpatch
langchain==0.1.15
# via scrapegraphai
langchain-anthropic==0.1.11
# via scrapegraphai
langchain-aws==0.1.3
# via scrapegraphai
langchain-community==0.0.38
# via langchain
langchain-core==0.1.52
# via langchain
# via langchain-anthropic
# via langchain-aws
# via langchain-community
# via langchain-google-genai
# via langchain-groq
# via langchain-openai
# via langchain-text-splitters
langchain-google-genai==1.0.3
# via scrapegraphai
langchain-groq==0.1.3
# via scrapegraphai
langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.1
# via langchain
langsmith==0.1.57
# via langchain
# via langchain-community
# via langchain-core
lxml==5.2.2
# via free-proxy
marshmallow==3.21.2
# via dataclasses-json
minify-html==0.15.0
# via scrapegraphai
multidict==6.0.5
# via aiohttp
# via yarl
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via faiss-cpu
# via langchain
# via langchain-aws
# via langchain-community
# via pandas
openai==1.30.1
# via langchain-openai
orjson==3.10.3
# via langsmith
packaging==23.2
# via huggingface-hub
# via langchain-core
# via marshmallow
pandas==2.2.2
# via scrapegraphai
playwright==1.43.0
# via scrapegraphai
proto-plus==1.23.0
# via google-ai-generativelanguage
# via google-api-core
protobuf==4.25.3
# via google-ai-generativelanguage
# via google-api-core
# via google-generativeai
# via googleapis-common-protos
# via grpcio-status
# via proto-plus
pyasn1==0.6.0
# via pyasn1-modules
# via rsa
pyasn1-modules==0.4.0
# via google-auth
pydantic==2.7.1
# via anthropic
# via google-generativeai
# via groq
# via langchain
# via langchain-core
# via langsmith
# via openai
# via yahoo-search-py
pydantic-core==2.18.2
# via pydantic
pyee==11.1.0
# via playwright
pyparsing==3.1.2
# via httplib2
python-dateutil==2.9.0.post0
# via botocore
# via pandas
python-dotenv==1.0.1
# via scrapegraphai
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via huggingface-hub
# via langchain
# via langchain-community
# via langchain-core
regex==2024.5.10
# via tiktoken
requests==2.31.0
# via free-proxy
# via google-api-core
# via huggingface-hub
# via langchain
# via langchain-community
# via langsmith
# via tiktoken
rsa==4.9
# via google-auth
s3transfer==0.10.1
# via boto3
selectolax==0.3.21
# via yahoo-search-py
six==1.16.0
# via python-dateutil
sniffio==1.3.1
# via anthropic
# via anyio
# via groq
# via httpx
# via openai
soupsieve==2.5
# via beautifulsoup4
sqlalchemy==2.0.30
# via langchain
# via langchain-community
tenacity==8.3.0
# via langchain
# via langchain-community
# via langchain-core
tiktoken==0.6.0
# via langchain-openai
# via scrapegraphai
tokenizers==0.19.1
# via anthropic
tqdm==4.66.4
# via google-generativeai
# via huggingface-hub
# via openai
# via scrapegraphai
typing-extensions==4.11.0
# via anthropic
# via anyio
# via google-generativeai
# via groq
# via huggingface-hub
# via openai
# via pydantic
# via pydantic-core
# via pyee
# via sqlalchemy
# via typing-inspect
typing-inspect==0.9.0
# via dataclasses-json
tzdata==2024.1
# via pandas
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
# via botocore
# via requests
# via yahoo-search-py
yahoo-search-py==0.3
# via scrapegraphai
yarl==1.9.4
# via aiohttp