From 0425124c570f765b98fcf67ba6649f4f9fe76b15 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Mon, 22 Jul 2024 12:58:18 +0200 Subject: [PATCH 1/5] chore(pyproject): upgrade dependencies --- pyproject.toml | 44 +++++++++++++++++------------------ requirements-dev.lock | 49 +++++++++++++-------------------------- requirements-dev.txt | 4 ---- requirements.lock | 53 +++++++++++++------------------------------ requirements.txt | 25 -------------------- 5 files changed, 54 insertions(+), 121 deletions(-) delete mode 100644 requirements-dev.txt delete mode 100644 requirements.txt diff --git a/pyproject.toml b/pyproject.toml index c42bf33b..3922ee6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,29 +13,29 @@ authors = [ { name = "Lorenzo Padoan", email = "lorenzo.padoan977@gmail.com" } ] dependencies = [ - "langchain", - "langchain-google-genai", + "langchain>=0.2.10", + "langchain-google-genai>=1.0.7", "langchain-google-vertexai", - "langchain-openai", - "langchain-groq==0.1.3", - "langchain-aws==0.1.3", - "langchain-anthropic==0.1.11", - "html2text==2024.2.26", - "faiss-cpu==1.8.0", - "beautifulsoup4==4.12.3", - "pandas==2.2.2", - "python-dotenv==1.0.1", - "tiktoken==0.7", - "tqdm==4.66.4", - "graphviz==0.20.3", - "minify-html==0.15.0", - "free-proxy==1.1.1", - "playwright==1.43.0", - "google==3.0.0", - "undetected-playwright==0.3.0", - "semchunk==1.0.1", - "html2text==2024.2.26", - "langchain-fireworks==0.1.3" + "langchain-openai>=0.1.17", + "langchain-groq>=0.1.3", + "langchain-aws>=0.1.3", + "langchain-anthropic>=0.1.11", + "html2text>=2024.2.26", + "faiss-cpu>=1.8.0", + "beautifulsoup4>=4.12.3", + "pandas>=2.2.2", + "python-dotenv>=1.0.1", + "tiktoken>=0.7", + "tqdm>=4.66.4", + "graphviz>=0.20.3", + "minify-html>=0.15.0", + "free-proxy>=1.1.1", + "playwright>=1.43.0", + "google>=3.0.0", + "undetected-playwright>=0.3.0", + "semchunk>=1.0.1", + "html2text>=2024.2.26", + "langchain-fireworks>=0.1.3", ] license = "MIT" diff --git a/requirements-dev.lock b/requirements-dev.lock index b0bcaaa0..9f298273 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -12,7 +12,6 @@ aiofiles==23.2.1 # via burr aiohttp==3.9.5 # via langchain - # via langchain-community # via langchain-fireworks aiosignal==1.3.1 # via aiohttp @@ -22,7 +21,7 @@ altair==5.3.0 # via streamlit annotated-types==0.7.0 # via pydantic -anthropic==0.26.1 +anthropic==0.31.2 # via langchain-anthropic anyio==4.3.0 # via anthropic @@ -48,9 +47,9 @@ beautifulsoup4==4.12.3 # via scrapegraphai blinker==1.8.2 # via streamlit -boto3==1.34.113 +boto3==1.34.145 # via langchain-aws -botocore==1.34.113 +botocore==1.34.145 # via boto3 # via s3transfer burr==0.22.1 @@ -73,9 +72,6 @@ contourpy==1.2.1 # via matplotlib cycler==0.12.1 # via matplotlib -dataclasses-json==0.6.6 - # via langchain - # via langchain-community defusedxml==0.7.1 # via langchain-anthropic dill==0.3.8 @@ -125,7 +121,7 @@ gitpython==3.1.43 # via streamlit google==3.0.0 # via scrapegraphai -google-ai-generativelanguage==0.6.4 +google-ai-generativelanguage==0.6.6 # via google-generativeai google-api-core==2.19.0 # via google-ai-generativelanguage @@ -166,7 +162,7 @@ google-cloud-storage==2.17.0 google-crc32c==1.5.0 # via google-cloud-storage # via google-resumable-media -google-generativeai==0.5.4 +google-generativeai==0.7.2 # via langchain-google-genai google-resumable-media==2.7.1 # via google-cloud-bigquery @@ -180,6 +176,7 @@ graphviz==0.20.3 # via scrapegraphai greenlet==3.0.3 # via playwright + # via sqlalchemy groq==0.8.0 # via langchain-groq grpc-google-iam-v1==0.13.1 @@ -241,7 +238,6 @@ jmespath==1.0.1 # via boto3 # via botocore jsonpatch==1.33 - # via langchain # via langchain-core jsonpointer==2.4 # via jsonpatch @@ -251,19 +247,16 @@ jsonschema-specifications==2023.12.1 # via jsonschema kiwisolver==1.4.5 # via matplotlib -langchain==0.1.15 +langchain==0.2.10 # via scrapegraphai -langchain-anthropic==0.1.11 +langchain-anthropic==0.1.20 # via scrapegraphai -langchain-aws==0.1.3 +langchain-aws==0.1.11 # via scrapegraphai -langchain-community==0.0.38 - # via langchain -langchain-core==0.1.52 +langchain-core==0.2.22 # via langchain # via langchain-anthropic # via langchain-aws - # via langchain-community # via langchain-fireworks # via langchain-google-genai # via langchain-google-vertexai @@ -272,19 +265,18 @@ langchain-core==0.1.52 # via langchain-text-splitters langchain-fireworks==0.1.3 # via scrapegraphai -langchain-google-genai==1.0.3 +langchain-google-genai==1.0.7 # via scrapegraphai langchain-google-vertexai==1.0.4 # via scrapegraphai -langchain-groq==0.1.3 +langchain-groq==0.1.6 # via scrapegraphai -langchain-openai==0.1.6 +langchain-openai==0.1.17 # via scrapegraphai -langchain-text-splitters==0.0.2 +langchain-text-splitters==0.2.2 # via langchain -langsmith==0.1.63 +langsmith==0.1.93 # via langchain - # via langchain-community # via langchain-core loguru==0.7.2 # via burr @@ -294,8 +286,6 @@ markdown-it-py==3.0.0 # via rich markupsafe==2.1.5 # via jinja2 -marshmallow==3.21.2 - # via dataclasses-json matplotlib==3.9.0 # via burr mccabe==0.7.0 @@ -315,7 +305,6 @@ numpy==1.26.4 # via faiss-cpu # via langchain # via langchain-aws - # via langchain-community # via matplotlib # via pandas # via pyarrow @@ -323,7 +312,7 @@ numpy==1.26.4 # via sf-hamilton # via shapely # via streamlit -openai==1.30.3 +openai==1.36.1 # via burr # via langchain-fireworks # via langchain-openai @@ -336,7 +325,6 @@ packaging==23.2 # via google-cloud-bigquery # via huggingface-hub # via langchain-core - # via marshmallow # via matplotlib # via pytest # via sphinx @@ -425,7 +413,6 @@ pytz==2024.1 pyyaml==6.0.1 # via huggingface-hub # via langchain - # via langchain-community # via langchain-core # via uvicorn referencing==0.35.1 @@ -441,7 +428,6 @@ requests==2.32.2 # via google-cloud-storage # via huggingface-hub # via langchain - # via langchain-community # via langchain-fireworks # via langsmith # via sphinx @@ -499,14 +485,12 @@ sphinxcontrib-serializinghtml==1.1.10 # via sphinx sqlalchemy==2.0.30 # via langchain - # via langchain-community starlette==0.37.2 # via fastapi streamlit==1.35.0 # via burr tenacity==8.3.0 # via langchain - # via langchain-community # via langchain-core # via streamlit tiktoken==0.7.0 @@ -556,7 +540,6 @@ typing-extensions==4.12.0 # via typing-inspect # via uvicorn typing-inspect==0.9.0 - # via dataclasses-json # via sf-hamilton tzdata==2024.1 # via pandas diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index d33296d5..00000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,4 +0,0 @@ -sphinx==7.1.2 -furo==2024.5.6 -pytest==8.0.0 -burr[start]==0.22.1 \ No newline at end of file diff --git a/requirements.lock b/requirements.lock index 7a8bb455..881432d6 100644 --- a/requirements.lock +++ b/requirements.lock @@ -10,13 +10,12 @@ -e file:. aiohttp==3.9.5 # via langchain - # via langchain-community # via langchain-fireworks aiosignal==1.3.1 # via aiohttp annotated-types==0.7.0 # via pydantic -anthropic==0.26.1 +anthropic==0.31.2 # via langchain-anthropic anyio==4.3.0 # via anthropic @@ -31,9 +30,9 @@ attrs==23.2.0 beautifulsoup4==4.12.3 # via google # via scrapegraphai -boto3==1.34.113 +boto3==1.34.145 # via langchain-aws -botocore==1.34.113 +botocore==1.34.145 # via boto3 # via s3transfer cachetools==5.3.3 @@ -44,9 +43,6 @@ certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests -dataclasses-json==0.6.6 - # via langchain - # via langchain-community defusedxml==0.7.1 # via langchain-anthropic distro==1.9.0 @@ -72,7 +68,7 @@ fsspec==2024.5.0 # via huggingface-hub google==3.0.0 # via scrapegraphai -google-ai-generativelanguage==0.6.4 +google-ai-generativelanguage==0.6.6 # via google-generativeai google-api-core==2.19.0 # via google-ai-generativelanguage @@ -113,7 +109,7 @@ google-cloud-storage==2.17.0 google-crc32c==1.5.0 # via google-cloud-storage # via google-resumable-media -google-generativeai==0.5.4 +google-generativeai==0.7.2 # via langchain-google-genai google-resumable-media==2.7.1 # via google-cloud-bigquery @@ -126,6 +122,7 @@ graphviz==0.20.3 # via scrapegraphai greenlet==3.0.3 # via playwright + # via sqlalchemy groq==0.8.0 # via langchain-groq grpc-google-iam-v1==0.13.1 @@ -166,23 +163,19 @@ jmespath==1.0.1 # via boto3 # via botocore jsonpatch==1.33 - # via langchain # via langchain-core jsonpointer==2.4 # via jsonpatch -langchain==0.1.15 +langchain==0.2.10 # via scrapegraphai -langchain-anthropic==0.1.11 +langchain-anthropic==0.1.20 # via scrapegraphai -langchain-aws==0.1.3 +langchain-aws==0.1.11 # via scrapegraphai -langchain-community==0.0.38 - # via langchain -langchain-core==0.1.52 +langchain-core==0.2.22 # via langchain # via langchain-anthropic # via langchain-aws - # via langchain-community # via langchain-fireworks # via langchain-google-genai # via langchain-google-vertexai @@ -191,39 +184,33 @@ langchain-core==0.1.52 # via langchain-text-splitters langchain-fireworks==0.1.3 # via scrapegraphai -langchain-google-genai==1.0.3 +langchain-google-genai==1.0.7 # via scrapegraphai langchain-google-vertexai==1.0.4 # via scrapegraphai -langchain-groq==0.1.3 +langchain-groq==0.1.6 # via scrapegraphai -langchain-openai==0.1.6 +langchain-openai==0.1.17 # via scrapegraphai -langchain-text-splitters==0.0.2 +langchain-text-splitters==0.2.2 # via langchain -langsmith==0.1.63 +langsmith==0.1.93 # via langchain - # via langchain-community # via langchain-core lxml==5.2.2 # via free-proxy -marshmallow==3.21.2 - # via dataclasses-json minify-html==0.15.0 # via scrapegraphai multidict==6.0.5 # via aiohttp # via yarl -mypy-extensions==1.0.0 - # via typing-inspect numpy==1.26.4 # via faiss-cpu # via langchain # via langchain-aws - # via langchain-community # via pandas # via shapely -openai==1.30.3 +openai==1.36.1 # via langchain-fireworks # via langchain-openai orjson==3.10.3 @@ -233,7 +220,6 @@ packaging==23.2 # via google-cloud-bigquery # via huggingface-hub # via langchain-core - # via marshmallow pandas==2.2.2 # via scrapegraphai pillow==10.3.0 @@ -288,7 +274,6 @@ pytz==2024.1 pyyaml==6.0.1 # via huggingface-hub # via langchain - # via langchain-community # via langchain-core regex==2024.5.15 # via tiktoken @@ -299,7 +284,6 @@ requests==2.32.2 # via google-cloud-storage # via huggingface-hub # via langchain - # via langchain-community # via langchain-fireworks # via langsmith # via tiktoken @@ -323,10 +307,8 @@ soupsieve==2.5 # via beautifulsoup4 sqlalchemy==2.0.30 # via langchain - # via langchain-community tenacity==8.3.0 # via langchain - # via langchain-community # via langchain-core tiktoken==0.7.0 # via langchain-openai @@ -350,9 +332,6 @@ typing-extensions==4.12.0 # via pydantic-core # via pyee # via sqlalchemy - # via typing-inspect -typing-inspect==0.9.0 - # via dataclasses-json tzdata==2024.1 # via pandas undetected-playwright==0.3.0 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 502e9df5..00000000 --- a/requirements.txt +++ /dev/null @@ -1,25 +0,0 @@ -beautifulsoup4==4.12.3 -burr==0.24.0 -free_proxy==1.1.1 -google==3.0.0 -graphviz==0.20.3 -html2text==2024.2.26 -langchain==0.2.10 -langchain_anthropic==0.1.20 -langchain_aws==0.1.11 -langchain_community==0.2.9 -langchain_core==0.2.22 -langchain_fireworks==0.1.5 -langchain_google_genai==1.0.7 -langchain_google_vertexai==1.0.6 -langchain_groq==0.1.6 -langchain_openai==0.1.17 -minify_html==0.15.0 -pandas==2.2.2 -playwright==1.43.0 -pydantic==2.8.2 -Requests==2.32.3 -semchunk==2.2.0 -tiktoken==0.7.0 -tqdm==4.66.4 -undetected_playwright==0.3.0 From fcb3220868e7ef1127a7a47f40d0379be282e6eb Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Mon, 22 Jul 2024 13:06:32 +0200 Subject: [PATCH 2/5] feat: add new toml From 254bde7008b41ffa434925e3ae84340c53a565bd Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 22 Jul 2024 11:07:52 +0000 Subject: [PATCH 3/5] ci(release): 1.10.0-beta.6 [skip ci] ## [1.10.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.0-beta.5...v1.10.0-beta.6) (2024-07-22) ### Features * add new toml ([fcb3220](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fcb3220868e7ef1127a7a47f40d0379be282e6eb)) ### chore * **pyproject:** upgrade dependencies ([0425124](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0425124c570f765b98fcf67ba6649f4f9fe76b15)) ### Docs * **gpt-4o-mini:** added new gpt, fixed chromium lazy loading, ([99dc849](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/99dc8497d85289759286a973e4aecc3f924d3ada)) --- CHANGELOG.md | 17 +++++++++++++++++ pyproject.toml | 2 +- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 01fb0c3a..e8b2ceb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,20 @@ +## [1.10.0-beta.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.0-beta.5...v1.10.0-beta.6) (2024-07-22) + + +### Features + +* add new toml ([fcb3220](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/fcb3220868e7ef1127a7a47f40d0379be282e6eb)) + + +### chore + +* **pyproject:** upgrade dependencies ([0425124](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/0425124c570f765b98fcf67ba6649f4f9fe76b15)) + + +### Docs + +* **gpt-4o-mini:** added new gpt, fixed chromium lazy loading, ([99dc849](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/99dc8497d85289759286a973e4aecc3f924d3ada)) + ## [1.10.0-beta.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.10.0-beta.4...v1.10.0-beta.5) (2024-07-20) diff --git a/pyproject.toml b/pyproject.toml index 3922ee6e..92b9a9af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "scrapegraphai" -version = "1.10.0b5" +version = "1.10.0b6" From 3289c7bf5ec58ac3d04e9e5e8e654af9abcee228 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Mon, 22 Jul 2024 13:18:17 +0200 Subject: [PATCH 4/5] chore(dependecies): add script to auto-update requirements --- manual deployment/autorequirements.py | 30 +++++++++++++++++++++++++++ pyproject.toml | 1 + requirements-dev.txt | 6 ++++++ requirements.txt | 23 ++++++++++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 manual deployment/autorequirements.py create mode 100644 requirements-dev.txt create mode 100644 requirements.txt diff --git a/manual deployment/autorequirements.py b/manual deployment/autorequirements.py new file mode 100644 index 00000000..2bb7e1e8 --- /dev/null +++ b/manual deployment/autorequirements.py @@ -0,0 +1,30 @@ +import toml + +# Load the TOML file +data = toml.load('pyproject.toml') + +# Get the dependencies +dependencies = data['project']['dependencies'] + +# Write the dependencies to a requirements.txt file +with open('requirements.txt', 'w') as f: + for dependency in dependencies: + f.write(dependency + '\n') + +# Get the dev dependencies +dev_dependencies = data['tool']['rye']['dev-dependencies'] + +# Expand the optional dependencies +optional_dependencies = data['project']['optional-dependencies'] +expanded_dev_dependencies = [] +for dependency in dev_dependencies: + if dependency.startswith('-e file:.'): + optional_dependency_name = dependency.split('.')[1][1:-1] + expanded_dev_dependencies.extend(optional_dependencies[optional_dependency_name]) + else: + expanded_dev_dependencies.append(dependency) + +# Write the expanded dev dependencies to a requirements-dev.txt file +with open('requirements-dev.txt', 'w') as f: + for dependency in expanded_dev_dependencies: + f.write(dependency + '\n') \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 92b9a9af..b06411a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,3 +91,4 @@ dev-dependencies = [ [tool.rye.scripts] pylint-local = "pylint scrapegraphai/**/*.py" pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py" +update-requirements = "python 'manual deployment/autorequirements.py'" \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..e04399e9 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +pytest==8.0.0 +pytest-mock==3.14.0 +burr[start]==0.22.1 +sphinx==6.0 +furo==2024.5.6 +pylint>=3.2.5 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..4a3d525c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +langchain>=0.2.10 +langchain-google-genai>=1.0.7 +langchain-google-vertexai +langchain-openai>=0.1.17 +langchain-groq>=0.1.3 +langchain-aws>=0.1.3 +langchain-anthropic>=0.1.11 +html2text>=2024.2.26 +faiss-cpu>=1.8.0 +beautifulsoup4>=4.12.3 +pandas>=2.2.2 +python-dotenv>=1.0.1 +tiktoken>=0.7 +tqdm>=4.66.4 +graphviz>=0.20.3 +minify-html>=0.15.0 +free-proxy>=1.1.1 +playwright>=1.43.0 +google>=3.0.0 +undetected-playwright>=0.3.0 +semchunk>=1.0.1 +html2text>=2024.2.26 +langchain-fireworks>=0.1.3 From 295fc28ceb02c78198f7fbe678352503b3259b6b Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:33:35 +0200 Subject: [PATCH 5/5] chore(ci): set up workflow for requirements auto-update --- .github/update-requirements.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 .github/update-requirements.yml diff --git a/.github/update-requirements.yml b/.github/update-requirements.yml new file mode 100644 index 00000000..31d0abe6 --- /dev/null +++ b/.github/update-requirements.yml @@ -0,0 +1,26 @@ +name: Update requirements +on: + push: + paths: + - 'pyproject.toml' + - '.github/workflows/update-requirements.yml' + +jobs: + update: + name: Update requirements + runs-on: ubuntu-latest + steps: + - name: Install the latest version of rye + uses: eifinger/setup-rye@v3 + - name: Build app + run: rye run update-requirements + commit: + name: Commit changes + run: | + git config --global user.name 'github-actions' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + git add . + git commit -m "ci: update requirements.txt [skip ci]" + git push + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}