mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-04 21:00:36 +08:00
update function
This commit is contained in:
parent
09f983b14f
commit
2ae19aee56
@ -35,7 +35,8 @@ dependencies = [
|
||||
"undetected-playwright==0.3.0",
|
||||
"semchunk==1.0.1",
|
||||
"html2text==2024.2.26",
|
||||
"langchain-fireworks==0.1.3"
|
||||
"langchain-fireworks==0.1.3",
|
||||
"langchain-community==0.2.9"
|
||||
]
|
||||
|
||||
license = "MIT"
|
||||
|
||||
@ -1,585 +0,0 @@
|
||||
# generated by rye
|
||||
# use `rye lock` or `rye sync` to update this lockfile
|
||||
#
|
||||
# last locked with the following flags:
|
||||
# pre: false
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
|
||||
-e file:.
|
||||
aiofiles==23.2.1
|
||||
# via burr
|
||||
aiohttp==3.9.5
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-fireworks
|
||||
aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
alabaster==0.7.16
|
||||
# via sphinx
|
||||
altair==5.3.0
|
||||
# via streamlit
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anthropic==0.26.1
|
||||
# via langchain-anthropic
|
||||
anyio==4.3.0
|
||||
# via anthropic
|
||||
# via groq
|
||||
# via httpx
|
||||
# via openai
|
||||
# via starlette
|
||||
# via watchfiles
|
||||
astroid==3.2.2
|
||||
# via pylint
|
||||
async-timeout==4.0.3
|
||||
# via aiohttp
|
||||
# via langchain
|
||||
attrs==23.2.0
|
||||
# via aiohttp
|
||||
# via jsonschema
|
||||
# via referencing
|
||||
babel==2.15.0
|
||||
# via sphinx
|
||||
beautifulsoup4==4.12.3
|
||||
# via furo
|
||||
# via google
|
||||
# via scrapegraphai
|
||||
blinker==1.8.2
|
||||
# via streamlit
|
||||
boto3==1.34.113
|
||||
# via langchain-aws
|
||||
botocore==1.34.113
|
||||
# via boto3
|
||||
# via s3transfer
|
||||
burr==0.22.1
|
||||
# via scrapegraphai
|
||||
cachetools==5.3.3
|
||||
# via google-auth
|
||||
# via streamlit
|
||||
certifi==2024.2.2
|
||||
# via httpcore
|
||||
# via httpx
|
||||
# via requests
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
click==8.1.7
|
||||
# via burr
|
||||
# via streamlit
|
||||
# via typer
|
||||
# via uvicorn
|
||||
contourpy==1.2.1
|
||||
# via matplotlib
|
||||
cycler==0.12.1
|
||||
# via matplotlib
|
||||
dataclasses-json==0.6.6
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
defusedxml==0.7.1
|
||||
# via langchain-anthropic
|
||||
dill==0.3.8
|
||||
# via pylint
|
||||
distro==1.9.0
|
||||
# via anthropic
|
||||
# via groq
|
||||
# via openai
|
||||
dnspython==2.6.1
|
||||
# via email-validator
|
||||
docstring-parser==0.16
|
||||
# via google-cloud-aiplatform
|
||||
docutils==0.19
|
||||
# via sphinx
|
||||
email-validator==2.1.1
|
||||
# via fastapi
|
||||
exceptiongroup==1.2.1
|
||||
# via anyio
|
||||
# via pytest
|
||||
faiss-cpu==1.8.0
|
||||
# via scrapegraphai
|
||||
fastapi==0.111.0
|
||||
# via burr
|
||||
# via fastapi-pagination
|
||||
fastapi-cli==0.0.4
|
||||
# via fastapi
|
||||
fastapi-pagination==0.12.24
|
||||
# via burr
|
||||
filelock==3.14.0
|
||||
# via huggingface-hub
|
||||
fireworks-ai==0.14.0
|
||||
# via langchain-fireworks
|
||||
fonttools==4.52.1
|
||||
# via matplotlib
|
||||
free-proxy==1.1.1
|
||||
# via scrapegraphai
|
||||
frozenlist==1.4.1
|
||||
# via aiohttp
|
||||
# via aiosignal
|
||||
fsspec==2024.5.0
|
||||
# via huggingface-hub
|
||||
furo==2024.5.6
|
||||
# via scrapegraphai
|
||||
gitdb==4.0.11
|
||||
# via gitpython
|
||||
gitpython==3.1.43
|
||||
# via streamlit
|
||||
google==3.0.0
|
||||
# via scrapegraphai
|
||||
google-ai-generativelanguage==0.6.4
|
||||
# via google-generativeai
|
||||
google-api-core==2.19.0
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-python-client
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-core
|
||||
# via google-cloud-resource-manager
|
||||
# via google-cloud-storage
|
||||
# via google-generativeai
|
||||
google-api-python-client==2.130.0
|
||||
# via google-generativeai
|
||||
google-auth==2.29.0
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-core
|
||||
# via google-api-python-client
|
||||
# via google-auth-httplib2
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-core
|
||||
# via google-cloud-resource-manager
|
||||
# via google-cloud-storage
|
||||
# via google-generativeai
|
||||
google-auth-httplib2==0.2.0
|
||||
# via google-api-python-client
|
||||
google-cloud-aiplatform==1.58.0
|
||||
# via langchain-google-vertexai
|
||||
google-cloud-bigquery==3.25.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.4.1
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-storage
|
||||
google-cloud-resource-manager==1.12.3
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==2.17.0
|
||||
# via google-cloud-aiplatform
|
||||
# via langchain-google-vertexai
|
||||
google-crc32c==1.5.0
|
||||
# via google-cloud-storage
|
||||
# via google-resumable-media
|
||||
google-generativeai==0.5.4
|
||||
# via langchain-google-genai
|
||||
google-resumable-media==2.7.1
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-storage
|
||||
googleapis-common-protos==1.63.0
|
||||
# via google-api-core
|
||||
# via grpc-google-iam-v1
|
||||
# via grpcio-status
|
||||
graphviz==0.20.3
|
||||
# via burr
|
||||
# via scrapegraphai
|
||||
greenlet==3.0.3
|
||||
# via playwright
|
||||
groq==0.8.0
|
||||
# via langchain-groq
|
||||
grpc-google-iam-v1==0.13.1
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.64.0
|
||||
# via google-api-core
|
||||
# via googleapis-common-protos
|
||||
# via grpc-google-iam-v1
|
||||
# via grpcio-status
|
||||
grpcio-status==1.62.2
|
||||
# via google-api-core
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
# via uvicorn
|
||||
html2text==2024.2.26
|
||||
# via scrapegraphai
|
||||
httpcore==1.0.5
|
||||
# via httpx
|
||||
httplib2==0.22.0
|
||||
# via google-api-python-client
|
||||
# via google-auth-httplib2
|
||||
httptools==0.6.1
|
||||
# via uvicorn
|
||||
httpx==0.27.0
|
||||
# via anthropic
|
||||
# via fastapi
|
||||
# via fireworks-ai
|
||||
# via groq
|
||||
# via openai
|
||||
httpx-sse==0.4.0
|
||||
# via fireworks-ai
|
||||
huggingface-hub==0.23.1
|
||||
# via tokenizers
|
||||
idna==3.7
|
||||
# via anyio
|
||||
# via email-validator
|
||||
# via httpx
|
||||
# via requests
|
||||
# via yarl
|
||||
imagesize==1.4.1
|
||||
# via sphinx
|
||||
importlib-metadata==8.0.0
|
||||
# via sphinx
|
||||
importlib-resources==6.4.0
|
||||
# via matplotlib
|
||||
iniconfig==2.0.0
|
||||
# via pytest
|
||||
isort==5.13.2
|
||||
# via pylint
|
||||
jinja2==3.1.4
|
||||
# via altair
|
||||
# via burr
|
||||
# via fastapi
|
||||
# via pydeck
|
||||
# via sphinx
|
||||
jiter==0.4.0
|
||||
# via anthropic
|
||||
jmespath==1.0.1
|
||||
# via boto3
|
||||
# via botocore
|
||||
jsonpatch==1.33
|
||||
# via langchain
|
||||
# via langchain-core
|
||||
jsonpointer==2.4
|
||||
# via jsonpatch
|
||||
jsonschema==4.22.0
|
||||
# via altair
|
||||
jsonschema-specifications==2023.12.1
|
||||
# via jsonschema
|
||||
kiwisolver==1.4.5
|
||||
# via matplotlib
|
||||
langchain==0.1.15
|
||||
# via scrapegraphai
|
||||
langchain-anthropic==0.1.11
|
||||
# via scrapegraphai
|
||||
langchain-aws==0.1.3
|
||||
# via scrapegraphai
|
||||
langchain-community==0.0.38
|
||||
# via langchain
|
||||
langchain-core==0.1.52
|
||||
# via langchain
|
||||
# via langchain-anthropic
|
||||
# via langchain-aws
|
||||
# via langchain-community
|
||||
# via langchain-fireworks
|
||||
# via langchain-google-genai
|
||||
# via langchain-google-vertexai
|
||||
# via langchain-groq
|
||||
# via langchain-openai
|
||||
# via langchain-text-splitters
|
||||
langchain-fireworks==0.1.3
|
||||
# via scrapegraphai
|
||||
langchain-google-genai==1.0.3
|
||||
# via scrapegraphai
|
||||
langchain-google-vertexai==1.0.4
|
||||
# via scrapegraphai
|
||||
langchain-groq==0.1.3
|
||||
# via scrapegraphai
|
||||
langchain-openai==0.1.6
|
||||
# via scrapegraphai
|
||||
langchain-text-splitters==0.0.2
|
||||
# via langchain
|
||||
langsmith==0.1.63
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
loguru==0.7.2
|
||||
# via burr
|
||||
lxml==5.2.2
|
||||
# via free-proxy
|
||||
markdown-it-py==3.0.0
|
||||
# via rich
|
||||
markupsafe==2.1.5
|
||||
# via jinja2
|
||||
marshmallow==3.21.2
|
||||
# via dataclasses-json
|
||||
matplotlib==3.9.0
|
||||
# via burr
|
||||
mccabe==0.7.0
|
||||
# via pylint
|
||||
mdurl==0.1.2
|
||||
# via markdown-it-py
|
||||
minify-html==0.15.0
|
||||
# via scrapegraphai
|
||||
multidict==6.0.5
|
||||
# via aiohttp
|
||||
# via yarl
|
||||
mypy-extensions==1.0.0
|
||||
# via typing-inspect
|
||||
numpy==1.26.4
|
||||
# via altair
|
||||
# via contourpy
|
||||
# via faiss-cpu
|
||||
# via langchain
|
||||
# via langchain-aws
|
||||
# via langchain-community
|
||||
# via matplotlib
|
||||
# via pandas
|
||||
# via pyarrow
|
||||
# via pydeck
|
||||
# via sf-hamilton
|
||||
# via shapely
|
||||
# via streamlit
|
||||
openai==1.30.3
|
||||
# via burr
|
||||
# via langchain-fireworks
|
||||
# via langchain-openai
|
||||
orjson==3.10.3
|
||||
# via fastapi
|
||||
# via langsmith
|
||||
packaging==23.2
|
||||
# via altair
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-bigquery
|
||||
# via huggingface-hub
|
||||
# via langchain-core
|
||||
# via marshmallow
|
||||
# via matplotlib
|
||||
# via pytest
|
||||
# via sphinx
|
||||
# via streamlit
|
||||
pandas==2.2.2
|
||||
# via altair
|
||||
# via scrapegraphai
|
||||
# via sf-hamilton
|
||||
# via streamlit
|
||||
pillow==10.3.0
|
||||
# via fireworks-ai
|
||||
# via matplotlib
|
||||
# via streamlit
|
||||
platformdirs==4.2.2
|
||||
# via pylint
|
||||
playwright==1.43.0
|
||||
# via scrapegraphai
|
||||
# via undetected-playwright
|
||||
pluggy==1.5.0
|
||||
# via pytest
|
||||
proto-plus==1.23.0
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-core
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-resource-manager
|
||||
protobuf==4.25.3
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-core
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-resource-manager
|
||||
# via google-generativeai
|
||||
# via googleapis-common-protos
|
||||
# via grpc-google-iam-v1
|
||||
# via grpcio-status
|
||||
# via proto-plus
|
||||
# via streamlit
|
||||
pyarrow==16.1.0
|
||||
# via streamlit
|
||||
pyasn1==0.6.0
|
||||
# via pyasn1-modules
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.0
|
||||
# via google-auth
|
||||
pydantic==2.7.1
|
||||
# via anthropic
|
||||
# via burr
|
||||
# via fastapi
|
||||
# via fastapi-pagination
|
||||
# via fireworks-ai
|
||||
# via google-cloud-aiplatform
|
||||
# via google-generativeai
|
||||
# via groq
|
||||
# via langchain
|
||||
# via langchain-core
|
||||
# via langsmith
|
||||
# via openai
|
||||
pydantic-core==2.18.2
|
||||
# via pydantic
|
||||
pydeck==0.9.1
|
||||
# via streamlit
|
||||
pyee==11.1.0
|
||||
# via playwright
|
||||
pygments==2.18.0
|
||||
# via furo
|
||||
# via rich
|
||||
# via sphinx
|
||||
pylint==3.2.5
|
||||
pyparsing==3.1.2
|
||||
# via httplib2
|
||||
# via matplotlib
|
||||
pytest==8.0.0
|
||||
# via pytest-mock
|
||||
pytest-mock==3.14.0
|
||||
python-dateutil==2.9.0.post0
|
||||
# via botocore
|
||||
# via google-cloud-bigquery
|
||||
# via matplotlib
|
||||
# via pandas
|
||||
python-dotenv==1.0.1
|
||||
# via scrapegraphai
|
||||
# via uvicorn
|
||||
python-multipart==0.0.9
|
||||
# via fastapi
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
pyyaml==6.0.1
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
# via uvicorn
|
||||
referencing==0.35.1
|
||||
# via jsonschema
|
||||
# via jsonschema-specifications
|
||||
regex==2024.5.15
|
||||
# via tiktoken
|
||||
requests==2.32.2
|
||||
# via burr
|
||||
# via free-proxy
|
||||
# via google-api-core
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-storage
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-fireworks
|
||||
# via langsmith
|
||||
# via sphinx
|
||||
# via streamlit
|
||||
# via tiktoken
|
||||
rich==13.7.1
|
||||
# via streamlit
|
||||
# via typer
|
||||
rpds-py==0.18.1
|
||||
# via jsonschema
|
||||
# via referencing
|
||||
rsa==4.9
|
||||
# via google-auth
|
||||
s3transfer==0.10.1
|
||||
# via boto3
|
||||
semchunk==1.0.1
|
||||
# via scrapegraphai
|
||||
sf-hamilton==1.63.0
|
||||
# via burr
|
||||
shapely==2.0.4
|
||||
# via google-cloud-aiplatform
|
||||
shellingham==1.5.4
|
||||
# via typer
|
||||
six==1.16.0
|
||||
# via python-dateutil
|
||||
smmap==5.0.1
|
||||
# via gitdb
|
||||
sniffio==1.3.1
|
||||
# via anthropic
|
||||
# via anyio
|
||||
# via groq
|
||||
# via httpx
|
||||
# via openai
|
||||
snowballstemmer==2.2.0
|
||||
# via sphinx
|
||||
soupsieve==2.5
|
||||
# via beautifulsoup4
|
||||
sphinx==6.0.0
|
||||
# via furo
|
||||
# via scrapegraphai
|
||||
# via sphinx-basic-ng
|
||||
sphinx-basic-ng==1.0.0b2
|
||||
# via furo
|
||||
sphinxcontrib-applehelp==1.0.8
|
||||
# via sphinx
|
||||
sphinxcontrib-devhelp==1.0.6
|
||||
# via sphinx
|
||||
sphinxcontrib-htmlhelp==2.0.5
|
||||
# via sphinx
|
||||
sphinxcontrib-jsmath==1.0.1
|
||||
# via sphinx
|
||||
sphinxcontrib-qthelp==1.0.7
|
||||
# via sphinx
|
||||
sphinxcontrib-serializinghtml==1.1.10
|
||||
# via sphinx
|
||||
sqlalchemy==2.0.30
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
starlette==0.37.2
|
||||
# via fastapi
|
||||
streamlit==1.35.0
|
||||
# via burr
|
||||
tenacity==8.3.0
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
# via streamlit
|
||||
tiktoken==0.7.0
|
||||
# via langchain-openai
|
||||
# via scrapegraphai
|
||||
tokenizers==0.19.1
|
||||
# via anthropic
|
||||
toml==0.10.2
|
||||
# via streamlit
|
||||
tomli==2.0.1
|
||||
# via pylint
|
||||
# via pytest
|
||||
tomlkit==0.12.5
|
||||
# via pylint
|
||||
toolz==0.12.1
|
||||
# via altair
|
||||
tornado==6.4
|
||||
# via streamlit
|
||||
tqdm==4.66.4
|
||||
# via google-generativeai
|
||||
# via huggingface-hub
|
||||
# via openai
|
||||
# via scrapegraphai
|
||||
# via semchunk
|
||||
typer==0.12.3
|
||||
# via fastapi-cli
|
||||
typing-extensions==4.12.0
|
||||
# via altair
|
||||
# via anthropic
|
||||
# via anyio
|
||||
# via astroid
|
||||
# via fastapi
|
||||
# via fastapi-pagination
|
||||
# via google-generativeai
|
||||
# via groq
|
||||
# via huggingface-hub
|
||||
# via openai
|
||||
# via pydantic
|
||||
# via pydantic-core
|
||||
# via pyee
|
||||
# via pylint
|
||||
# via sf-hamilton
|
||||
# via sqlalchemy
|
||||
# via starlette
|
||||
# via streamlit
|
||||
# via typer
|
||||
# via typing-inspect
|
||||
# via uvicorn
|
||||
typing-inspect==0.9.0
|
||||
# via dataclasses-json
|
||||
# via sf-hamilton
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
ujson==5.10.0
|
||||
# via fastapi
|
||||
undetected-playwright==0.3.0
|
||||
# via scrapegraphai
|
||||
uritemplate==4.1.1
|
||||
# via google-api-python-client
|
||||
urllib3==1.26.18
|
||||
# via botocore
|
||||
# via requests
|
||||
uvicorn==0.29.0
|
||||
# via burr
|
||||
# via fastapi
|
||||
uvloop==0.19.0
|
||||
# via uvicorn
|
||||
watchfiles==0.21.0
|
||||
# via uvicorn
|
||||
websockets==12.0
|
||||
# via uvicorn
|
||||
yarl==1.9.4
|
||||
# via aiohttp
|
||||
zipp==3.19.2
|
||||
# via importlib-metadata
|
||||
# via importlib-resources
|
||||
@ -1,4 +0,0 @@
|
||||
sphinx==7.1.2
|
||||
furo==2024.5.6
|
||||
pytest==8.0.0
|
||||
burr[start]==0.22.1
|
||||
@ -1,366 +0,0 @@
|
||||
# generated by rye
|
||||
# use `rye lock` or `rye sync` to update this lockfile
|
||||
#
|
||||
# last locked with the following flags:
|
||||
# pre: false
|
||||
# features: []
|
||||
# all-features: false
|
||||
# with-sources: false
|
||||
|
||||
-e file:.
|
||||
aiohttp==3.9.5
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-fireworks
|
||||
aiosignal==1.3.1
|
||||
# via aiohttp
|
||||
annotated-types==0.7.0
|
||||
# via pydantic
|
||||
anthropic==0.26.1
|
||||
# via langchain-anthropic
|
||||
anyio==4.3.0
|
||||
# via anthropic
|
||||
# via groq
|
||||
# via httpx
|
||||
# via openai
|
||||
async-timeout==4.0.3
|
||||
# via aiohttp
|
||||
# via langchain
|
||||
attrs==23.2.0
|
||||
# via aiohttp
|
||||
beautifulsoup4==4.12.3
|
||||
# via google
|
||||
# via scrapegraphai
|
||||
boto3==1.34.113
|
||||
# via langchain-aws
|
||||
botocore==1.34.113
|
||||
# via boto3
|
||||
# via s3transfer
|
||||
cachetools==5.3.3
|
||||
# via google-auth
|
||||
certifi==2024.2.2
|
||||
# via httpcore
|
||||
# via httpx
|
||||
# via requests
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
dataclasses-json==0.6.6
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
defusedxml==0.7.1
|
||||
# via langchain-anthropic
|
||||
distro==1.9.0
|
||||
# via anthropic
|
||||
# via groq
|
||||
# via openai
|
||||
docstring-parser==0.16
|
||||
# via google-cloud-aiplatform
|
||||
exceptiongroup==1.2.1
|
||||
# via anyio
|
||||
faiss-cpu==1.8.0
|
||||
# via scrapegraphai
|
||||
filelock==3.14.0
|
||||
# via huggingface-hub
|
||||
fireworks-ai==0.14.0
|
||||
# via langchain-fireworks
|
||||
free-proxy==1.1.1
|
||||
# via scrapegraphai
|
||||
frozenlist==1.4.1
|
||||
# via aiohttp
|
||||
# via aiosignal
|
||||
fsspec==2024.5.0
|
||||
# via huggingface-hub
|
||||
google==3.0.0
|
||||
# via scrapegraphai
|
||||
google-ai-generativelanguage==0.6.4
|
||||
# via google-generativeai
|
||||
google-api-core==2.19.0
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-python-client
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-core
|
||||
# via google-cloud-resource-manager
|
||||
# via google-cloud-storage
|
||||
# via google-generativeai
|
||||
google-api-python-client==2.130.0
|
||||
# via google-generativeai
|
||||
google-auth==2.29.0
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-core
|
||||
# via google-api-python-client
|
||||
# via google-auth-httplib2
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-core
|
||||
# via google-cloud-resource-manager
|
||||
# via google-cloud-storage
|
||||
# via google-generativeai
|
||||
google-auth-httplib2==0.2.0
|
||||
# via google-api-python-client
|
||||
google-cloud-aiplatform==1.58.0
|
||||
# via langchain-google-vertexai
|
||||
google-cloud-bigquery==3.25.0
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-core==2.4.1
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-storage
|
||||
google-cloud-resource-manager==1.12.3
|
||||
# via google-cloud-aiplatform
|
||||
google-cloud-storage==2.17.0
|
||||
# via google-cloud-aiplatform
|
||||
# via langchain-google-vertexai
|
||||
google-crc32c==1.5.0
|
||||
# via google-cloud-storage
|
||||
# via google-resumable-media
|
||||
google-generativeai==0.5.4
|
||||
# via langchain-google-genai
|
||||
google-resumable-media==2.7.1
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-storage
|
||||
googleapis-common-protos==1.63.0
|
||||
# via google-api-core
|
||||
# via grpc-google-iam-v1
|
||||
# via grpcio-status
|
||||
graphviz==0.20.3
|
||||
# via scrapegraphai
|
||||
greenlet==3.0.3
|
||||
# via playwright
|
||||
groq==0.8.0
|
||||
# via langchain-groq
|
||||
grpc-google-iam-v1==0.13.1
|
||||
# via google-cloud-resource-manager
|
||||
grpcio==1.64.0
|
||||
# via google-api-core
|
||||
# via googleapis-common-protos
|
||||
# via grpc-google-iam-v1
|
||||
# via grpcio-status
|
||||
grpcio-status==1.62.2
|
||||
# via google-api-core
|
||||
h11==0.14.0
|
||||
# via httpcore
|
||||
html2text==2024.2.26
|
||||
# via scrapegraphai
|
||||
httpcore==1.0.5
|
||||
# via httpx
|
||||
httplib2==0.22.0
|
||||
# via google-api-python-client
|
||||
# via google-auth-httplib2
|
||||
httpx==0.27.0
|
||||
# via anthropic
|
||||
# via fireworks-ai
|
||||
# via groq
|
||||
# via openai
|
||||
httpx-sse==0.4.0
|
||||
# via fireworks-ai
|
||||
huggingface-hub==0.23.1
|
||||
# via tokenizers
|
||||
idna==3.7
|
||||
# via anyio
|
||||
# via httpx
|
||||
# via requests
|
||||
# via yarl
|
||||
jiter==0.4.0
|
||||
# via anthropic
|
||||
jmespath==1.0.1
|
||||
# via boto3
|
||||
# via botocore
|
||||
jsonpatch==1.33
|
||||
# via langchain
|
||||
# via langchain-core
|
||||
jsonpointer==2.4
|
||||
# via jsonpatch
|
||||
langchain==0.1.15
|
||||
# via scrapegraphai
|
||||
langchain-anthropic==0.1.11
|
||||
# via scrapegraphai
|
||||
langchain-aws==0.1.3
|
||||
# via scrapegraphai
|
||||
langchain-community==0.0.38
|
||||
# via langchain
|
||||
langchain-core==0.1.52
|
||||
# via langchain
|
||||
# via langchain-anthropic
|
||||
# via langchain-aws
|
||||
# via langchain-community
|
||||
# via langchain-fireworks
|
||||
# via langchain-google-genai
|
||||
# via langchain-google-vertexai
|
||||
# via langchain-groq
|
||||
# via langchain-openai
|
||||
# via langchain-text-splitters
|
||||
langchain-fireworks==0.1.3
|
||||
# via scrapegraphai
|
||||
langchain-google-genai==1.0.3
|
||||
# via scrapegraphai
|
||||
langchain-google-vertexai==1.0.4
|
||||
# via scrapegraphai
|
||||
langchain-groq==0.1.3
|
||||
# via scrapegraphai
|
||||
langchain-openai==0.1.6
|
||||
# via scrapegraphai
|
||||
langchain-text-splitters==0.0.2
|
||||
# via langchain
|
||||
langsmith==0.1.63
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
lxml==5.2.2
|
||||
# via free-proxy
|
||||
marshmallow==3.21.2
|
||||
# via dataclasses-json
|
||||
minify-html==0.15.0
|
||||
# via scrapegraphai
|
||||
multidict==6.0.5
|
||||
# via aiohttp
|
||||
# via yarl
|
||||
mypy-extensions==1.0.0
|
||||
# via typing-inspect
|
||||
numpy==1.26.4
|
||||
# via faiss-cpu
|
||||
# via langchain
|
||||
# via langchain-aws
|
||||
# via langchain-community
|
||||
# via pandas
|
||||
# via shapely
|
||||
openai==1.30.3
|
||||
# via langchain-fireworks
|
||||
# via langchain-openai
|
||||
orjson==3.10.3
|
||||
# via langsmith
|
||||
packaging==23.2
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-bigquery
|
||||
# via huggingface-hub
|
||||
# via langchain-core
|
||||
# via marshmallow
|
||||
pandas==2.2.2
|
||||
# via scrapegraphai
|
||||
pillow==10.3.0
|
||||
# via fireworks-ai
|
||||
playwright==1.43.0
|
||||
# via scrapegraphai
|
||||
# via undetected-playwright
|
||||
proto-plus==1.23.0
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-core
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-resource-manager
|
||||
protobuf==4.25.3
|
||||
# via google-ai-generativelanguage
|
||||
# via google-api-core
|
||||
# via google-cloud-aiplatform
|
||||
# via google-cloud-resource-manager
|
||||
# via google-generativeai
|
||||
# via googleapis-common-protos
|
||||
# via grpc-google-iam-v1
|
||||
# via grpcio-status
|
||||
# via proto-plus
|
||||
pyasn1==0.6.0
|
||||
# via pyasn1-modules
|
||||
# via rsa
|
||||
pyasn1-modules==0.4.0
|
||||
# via google-auth
|
||||
pydantic==2.7.1
|
||||
# via anthropic
|
||||
# via fireworks-ai
|
||||
# via google-cloud-aiplatform
|
||||
# via google-generativeai
|
||||
# via groq
|
||||
# via langchain
|
||||
# via langchain-core
|
||||
# via langsmith
|
||||
# via openai
|
||||
pydantic-core==2.18.2
|
||||
# via pydantic
|
||||
pyee==11.1.0
|
||||
# via playwright
|
||||
pyparsing==3.1.2
|
||||
# via httplib2
|
||||
python-dateutil==2.9.0.post0
|
||||
# via botocore
|
||||
# via google-cloud-bigquery
|
||||
# via pandas
|
||||
python-dotenv==1.0.1
|
||||
# via scrapegraphai
|
||||
pytz==2024.1
|
||||
# via pandas
|
||||
pyyaml==6.0.1
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
regex==2024.5.15
|
||||
# via tiktoken
|
||||
requests==2.32.2
|
||||
# via free-proxy
|
||||
# via google-api-core
|
||||
# via google-cloud-bigquery
|
||||
# via google-cloud-storage
|
||||
# via huggingface-hub
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-fireworks
|
||||
# via langsmith
|
||||
# via tiktoken
|
||||
rsa==4.9
|
||||
# via google-auth
|
||||
s3transfer==0.10.1
|
||||
# via boto3
|
||||
semchunk==1.0.1
|
||||
# via scrapegraphai
|
||||
shapely==2.0.4
|
||||
# via google-cloud-aiplatform
|
||||
six==1.16.0
|
||||
# via python-dateutil
|
||||
sniffio==1.3.1
|
||||
# via anthropic
|
||||
# via anyio
|
||||
# via groq
|
||||
# via httpx
|
||||
# via openai
|
||||
soupsieve==2.5
|
||||
# via beautifulsoup4
|
||||
sqlalchemy==2.0.30
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
tenacity==8.3.0
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
# via langchain-core
|
||||
tiktoken==0.7.0
|
||||
# via langchain-openai
|
||||
# via scrapegraphai
|
||||
tokenizers==0.19.1
|
||||
# via anthropic
|
||||
tqdm==4.66.4
|
||||
# via google-generativeai
|
||||
# via huggingface-hub
|
||||
# via openai
|
||||
# via scrapegraphai
|
||||
# via semchunk
|
||||
typing-extensions==4.12.0
|
||||
# via anthropic
|
||||
# via anyio
|
||||
# via google-generativeai
|
||||
# via groq
|
||||
# via huggingface-hub
|
||||
# via openai
|
||||
# via pydantic
|
||||
# via pydantic-core
|
||||
# via pyee
|
||||
# via sqlalchemy
|
||||
# via typing-inspect
|
||||
typing-inspect==0.9.0
|
||||
# via dataclasses-json
|
||||
tzdata==2024.1
|
||||
# via pandas
|
||||
undetected-playwright==0.3.0
|
||||
# via scrapegraphai
|
||||
uritemplate==4.1.1
|
||||
# via google-api-python-client
|
||||
urllib3==1.26.18
|
||||
# via botocore
|
||||
# via requests
|
||||
yarl==1.9.4
|
||||
# via aiohttp
|
||||
@ -7,6 +7,8 @@ from langchain.prompts import PromptTemplate
|
||||
from langchain_core.output_parsers import JsonOutputParser
|
||||
from langchain_core.runnables import RunnableParallel
|
||||
from tqdm import tqdm
|
||||
import asyncio
|
||||
from ..utils.merge_results import merge_results
|
||||
from ..utils.logging import get_logger
|
||||
from ..models import Ollama, OpenAI
|
||||
from .base_node import BaseNode
|
||||
@ -109,42 +111,46 @@ class GenerateAnswerNode(BaseNode):
|
||||
|
||||
chains_dict = {}
|
||||
|
||||
if len(doc) == 1:
|
||||
prompt = PromptTemplate(
|
||||
template=template_no_chunks_prompt,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": doc,
|
||||
"format_instructions": format_instructions})
|
||||
chain = prompt | self.llm_model | output_parser
|
||||
answer = chain.invoke({"question": user_prompt})
|
||||
|
||||
state.update({self.output[0]: answer})
|
||||
return state
|
||||
|
||||
# Use tqdm to add progress bar
|
||||
for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
|
||||
if len(doc) == 1:
|
||||
prompt = PromptTemplate(
|
||||
template=template_no_chunks_prompt,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk,
|
||||
"format_instructions": format_instructions})
|
||||
chain = prompt | self.llm_model | output_parser
|
||||
answer = chain.invoke({"question": user_prompt})
|
||||
break
|
||||
|
||||
prompt = PromptTemplate(
|
||||
template=template_chunks_prompt,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk,
|
||||
"chunk_id": i + 1,
|
||||
"format_instructions": format_instructions})
|
||||
# Dynamically name the chains based on their index
|
||||
template=template_chunks,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk,
|
||||
"chunk_id": i + 1,
|
||||
"format_instructions": format_instructions})
|
||||
# Add chain to dictionary with dynamic name
|
||||
chain_name = f"chunk{i+1}"
|
||||
chains_dict[chain_name] = prompt | self.llm_model | output_parser
|
||||
|
||||
if len(chains_dict) > 1:
|
||||
# Use dictionary unpacking to pass the dynamically named chains to RunnableParallel
|
||||
map_chain = RunnableParallel(**chains_dict)
|
||||
# Chain
|
||||
answer = map_chain.invoke({"question": user_prompt})
|
||||
# Merge the answers from the chunks
|
||||
merge_prompt = PromptTemplate(
|
||||
template = template_merge_prompt,
|
||||
input_variables=["context", "question"],
|
||||
partial_variables={"format_instructions": format_instructions},
|
||||
)
|
||||
merge_chain = merge_prompt | self.llm_model | output_parser
|
||||
answer = merge_chain.invoke({"context": answer, "question": user_prompt})
|
||||
|
||||
# Update the state with the generated answer
|
||||
state.update({self.output[0]: answer})
|
||||
async def process_chains():
|
||||
async_runner = RunnableParallel()
|
||||
for chain_name, chain in chains_dict.items():
|
||||
async_runner.add(chain.ainvoke([{"question": user_prompt}] * len(doc)))
|
||||
|
||||
batch_results = await async_runner.run()
|
||||
return batch_results
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
batch_answers = loop.run_until_complete(process_chains())
|
||||
|
||||
# Merge batch results (assuming same structure)
|
||||
merged_answer = merge_results(batch_answers)
|
||||
answers = merged_answer
|
||||
|
||||
state.update({self.output[0]: answers})
|
||||
return state
|
||||
|
||||
@ -11,3 +11,4 @@ from .sys_dynamic_import import dynamic_import, srcfile_import
|
||||
from .cleanup_html import cleanup_html
|
||||
from .logging import *
|
||||
from .convert_to_md import convert_to_md
|
||||
from .merge_results import merge_results
|
||||
|
||||
30
scrapegraphai/utils/merge_results.py
Normal file
30
scrapegraphai/utils/merge_results.py
Normal file
@ -0,0 +1,30 @@
|
||||
def merge_results(batch_answers):
|
||||
"""
|
||||
Merges the results from single-chunk processing and batch processing, and adds separators between the chunks.
|
||||
Parameters:
|
||||
-----------
|
||||
answers : list of str
|
||||
A list of strings containing the results from single-chunk processing.
|
||||
|
||||
batch_answers : list of dict
|
||||
A list of dictionaries, where each dictionary contains a key "text" with the batch processing result as a string.
|
||||
|
||||
Returns:
|
||||
--------
|
||||
str
|
||||
A single string containing all merged results, with each result separated by a newline character.
|
||||
|
||||
Example:
|
||||
--------
|
||||
>>> answers = ["Result from single-chunk 1", "Result from single-chunk 2"]
|
||||
>>> batch_answers = [{"text": "Result from batch 1"}, {"text": "Result from batch 2"}]
|
||||
>>> merge_results(answers, batch_answers)
|
||||
'Result from single-chunk 1\nResult from single-chunk 2\nResult from batch 1\nResult from batch 2'
|
||||
"""
|
||||
# Combine answers from single-chunk processing and batch processing
|
||||
merged_answers = [answer["text"] for answer in batch_answers]
|
||||
|
||||
# Add separators between chunks
|
||||
merged_answers = "\n".join(merged_answers)
|
||||
|
||||
return merged_answers
|
||||
Loading…
Reference in New Issue
Block a user