feat(burr): added burr integration in graphs and optional burr installation

This commit is contained in:
Marco Perini 2024-05-22 18:30:49 +02:00
parent 654a042396
commit ac10128ff3
9 changed files with 155 additions and 202 deletions

View File

@ -0,0 +1,50 @@
"""
Example of Search Graph
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import SearchGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-3.5-turbo",
},
"max_results": 2,
"verbose": True,
"burr_kwargs": {
"project_name": "search-graph-openai",
}
}
# ************************************************
# Create the SearchGraph instance and run it
# ************************************************
search_graph = SearchGraph(
prompt="List me Chioggia's attractions.",
config=graph_config
)
result = search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))
# Save to json and csv
convert_to_csv(result, "result")
convert_to_json(result, "result")

View File

@ -29,14 +29,13 @@ dependencies = [
"playwright==1.43.0",
"google==3.0.0",
"yahoo-search-py==0.3",
"burr[start]==0.17.1"
]
license = "MIT"
readme = "README.md"
homepage = "https://scrapegraph-ai.readthedocs.io/"
homepage = "https://scrapegraphai.com/"
repository = "https://github.com/VinciGit00/Scrapegraph-ai"
documentation = "https://scrapegraph-doc.onrender.com/"
documentation = "https://scrapegraph-ai.readthedocs.io/en/latest/"
keywords = [
"scrapegraph",
"scrapegraphai",
@ -64,6 +63,10 @@ classifiers = [
]
requires-python = ">= 3.9, < 3.12"
[project.optional-dependencies]
burr = ["burr[start]==0.18.0"]
docs = ["sphinx==4.3.0", "sphinx-rtd-theme==1.0.0"]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
@ -72,12 +75,7 @@ build-backend = "hatchling.build"
managed = true
dev-dependencies = [
"pytest==8.0.0",
"pytest-mock==3.14.0"
]
[tool.rye.group.docs]
optional = true
[tool.rye.group.docs.dependencies]
sphinx = "7.1.2"
sphinx-rtd-theme = "2.0.0"
"pytest-mock==3.14.0",
"-e file:.[burr]",
"-e file:.[docs]",
]

View File

@ -6,7 +6,6 @@
# features: []
# all-features: false
# with-sources: false
# generate-hashes: false
-e file:.
aiofiles==23.2.1
@ -16,11 +15,13 @@ aiohttp==3.9.5
# via langchain-community
aiosignal==1.3.1
# via aiohttp
alabaster==0.7.16
# via sphinx
altair==5.3.0
# via streamlit
annotated-types==0.6.0
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.26.1
# via langchain-anthropic
anyio==4.3.0
# via anthropic
@ -36,17 +37,20 @@ attrs==23.2.0
# via aiohttp
# via jsonschema
# via referencing
babel==2.15.0
# via sphinx
beautifulsoup4==4.12.3
# via google
# via scrapegraphai
blinker==1.8.2
# via streamlit
boto3==1.34.105
boto3==1.34.110
# via langchain-aws
botocore==1.34.105
botocore==1.34.110
# via boto3
# via s3transfer
burr==0.17.1
burr==0.18.0
# via burr
# via scrapegraphai
cachetools==5.3.3
# via google-auth
@ -66,6 +70,7 @@ colorama==0.4.6
# via click
# via loguru
# via pytest
# via sphinx
# via tqdm
# via uvicorn
contourpy==1.2.1
@ -83,6 +88,9 @@ distro==1.9.0
# via openai
dnspython==2.6.1
# via email-validator
docutils==0.17.1
# via sphinx
# via sphinx-rtd-theme
email-validator==2.1.1
# via fastapi
exceptiongroup==1.2.1
@ -106,7 +114,7 @@ free-proxy==1.1.1
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.3.1
fsspec==2024.5.0
# via huggingface-hub
gitdb==4.0.11
# via gitpython
@ -114,7 +122,7 @@ gitpython==3.1.43
# via streamlit
google==3.0.0
# via scrapegraphai
google-ai-generativelanguage==0.6.3
google-ai-generativelanguage==0.6.4
# via google-generativeai
google-api-core==2.19.0
# via google-ai-generativelanguage
@ -130,7 +138,7 @@ google-auth==2.29.0
# via google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.3
google-generativeai==0.5.4
# via langchain-google-genai
googleapis-common-protos==1.63.0
# via google-api-core
@ -141,9 +149,9 @@ graphviz==0.20.3
greenlet==3.0.3
# via playwright
# via sqlalchemy
groq==0.5.0
groq==0.7.0
# via langchain-groq
grpcio==1.63.0
grpcio==1.64.0
# via google-api-core
# via grpcio-status
grpcio-status==1.62.2
@ -166,7 +174,7 @@ httpx==0.27.0
# via groq
# via openai
# via yahoo-search-py
huggingface-hub==0.23.0
huggingface-hub==0.23.1
# via tokenizers
idna==3.7
# via anyio
@ -174,6 +182,8 @@ idna==3.7
# via httpx
# via requests
# via yarl
imagesize==1.4.1
# via sphinx
iniconfig==2.0.0
# via pytest
jinja2==3.1.4
@ -181,6 +191,9 @@ jinja2==3.1.4
# via burr
# via fastapi
# via pydeck
# via sphinx
jiter==0.1.0
# via anthropic
jmespath==1.0.1
# via boto3
# via botocore
@ -218,9 +231,9 @@ langchain-groq==0.1.3
# via scrapegraphai
langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.1
langchain-text-splitters==0.0.2
# via langchain
langsmith==0.1.57
langsmith==0.1.60
# via langchain
# via langchain-community
# via langchain-core
@ -271,6 +284,7 @@ packaging==23.2
# via marshmallow
# via matplotlib
# via pytest
# via sphinx
# via streamlit
pandas==2.2.2
# via altair
@ -322,6 +336,7 @@ pyee==11.1.0
# via playwright
pygments==2.18.0
# via rich
# via sphinx
pyparsing==3.1.2
# via httplib2
# via matplotlib
@ -348,9 +363,9 @@ pyyaml==6.0.1
referencing==0.35.1
# via jsonschema
# via jsonschema-specifications
regex==2024.5.10
regex==2024.5.15
# via tiktoken
requests==2.31.0
requests==2.32.2
# via burr
# via free-proxy
# via google-api-core
@ -358,6 +373,7 @@ requests==2.31.0
# via langchain
# via langchain-community
# via langsmith
# via sphinx
# via streamlit
# via tiktoken
rich==13.7.1
@ -372,7 +388,7 @@ s3transfer==0.10.1
# via boto3
selectolax==0.3.21
# via yahoo-search-py
sf-hamilton==1.62.0
sf-hamilton==1.63.0
# via burr
shellingham==1.5.4
# via typer
@ -386,8 +402,27 @@ sniffio==1.3.1
# via groq
# via httpx
# via openai
snowballstemmer==2.2.0
# via sphinx
soupsieve==2.5
# via beautifulsoup4
sphinx==4.3.0
# via scrapegraphai
# via sphinx-rtd-theme
sphinx-rtd-theme==1.0.0
# via scrapegraphai
sphinxcontrib-applehelp==1.0.8
# via sphinx
sphinxcontrib-devhelp==1.0.6
# via sphinx
sphinxcontrib-htmlhelp==2.0.5
# via sphinx
sphinxcontrib-jsmath==1.0.1
# via sphinx
sphinxcontrib-qthelp==1.0.7
# via sphinx
sphinxcontrib-serializinghtml==1.1.10
# via sphinx
sqlalchemy==2.0.30
# via langchain
# via langchain-community
@ -448,7 +483,7 @@ ujson==5.10.0
# via fastapi
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
urllib3==2.2.1
# via botocore
# via requests
# via yahoo-search-py
@ -467,3 +502,5 @@ yahoo-search-py==0.3
# via scrapegraphai
yarl==1.9.4
# via aiohttp
setuptools==70.0.0
# via sphinx

View File

@ -6,71 +6,45 @@
# features: []
# all-features: false
# with-sources: false
# generate-hashes: false
-e file:.
aiofiles==23.2.1
# via burr
aiohttp==3.9.5
# via langchain
# via langchain-community
aiosignal==1.3.1
# via aiohttp
altair==5.3.0
# via streamlit
annotated-types==0.6.0
annotated-types==0.7.0
# via pydantic
anthropic==0.25.9
anthropic==0.26.1
# via langchain-anthropic
anyio==4.3.0
# via anthropic
# via groq
# via httpx
# via openai
# via starlette
# via watchfiles
async-timeout==4.0.3
# via aiohttp
# via langchain
attrs==23.2.0
# via aiohttp
# via jsonschema
# via referencing
beautifulsoup4==4.12.3
# via google
# via scrapegraphai
blinker==1.8.2
# via streamlit
boto3==1.34.105
boto3==1.34.110
# via langchain-aws
botocore==1.34.105
botocore==1.34.110
# via boto3
# via s3transfer
burr==0.17.1
# via scrapegraphai
cachetools==5.3.3
# via google-auth
# via streamlit
certifi==2024.2.2
# via httpcore
# via httpx
# via requests
charset-normalizer==3.3.2
# via requests
click==8.1.7
# via burr
# via streamlit
# via typer
# via uvicorn
colorama==0.4.6
# via click
# via loguru
# via tqdm
# via uvicorn
contourpy==1.2.1
# via matplotlib
cycler==0.12.1
# via matplotlib
dataclasses-json==0.6.6
# via langchain
# via langchain-community
@ -80,39 +54,22 @@ distro==1.9.0
# via anthropic
# via groq
# via openai
dnspython==2.6.1
# via email-validator
email-validator==2.1.1
# via fastapi
exceptiongroup==1.2.1
# via anyio
faiss-cpu==1.8.0
# via scrapegraphai
fastapi==0.111.0
# via burr
# via fastapi-pagination
fastapi-cli==0.0.4
# via fastapi
fastapi-pagination==0.12.24
# via burr
filelock==3.14.0
# via huggingface-hub
fonttools==4.51.0
# via matplotlib
free-proxy==1.1.1
# via scrapegraphai
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.3.1
fsspec==2024.5.0
# via huggingface-hub
gitdb==4.0.11
# via gitpython
gitpython==3.1.43
# via streamlit
google==3.0.0
# via scrapegraphai
google-ai-generativelanguage==0.6.3
google-ai-generativelanguage==0.6.4
# via google-generativeai
google-api-core==2.19.0
# via google-ai-generativelanguage
@ -128,27 +85,25 @@ google-auth==2.29.0
# via google-generativeai
google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.3
google-generativeai==0.5.4
# via langchain-google-genai
googleapis-common-protos==1.63.0
# via google-api-core
# via grpcio-status
graphviz==0.20.3
# via burr
# via scrapegraphai
greenlet==3.0.3
# via playwright
# via sqlalchemy
groq==0.5.0
groq==0.7.0
# via langchain-groq
grpcio==1.63.0
grpcio==1.64.0
# via google-api-core
# via grpcio-status
grpcio-status==1.62.2
# via google-api-core
h11==0.14.0
# via httpcore
# via uvicorn
html2text==2024.2.26
# via scrapegraphai
httpcore==1.0.5
@ -156,27 +111,20 @@ httpcore==1.0.5
httplib2==0.22.0
# via google-api-python-client
# via google-auth-httplib2
httptools==0.6.1
# via uvicorn
httpx==0.27.0
# via anthropic
# via fastapi
# via groq
# via openai
# via yahoo-search-py
huggingface-hub==0.23.0
huggingface-hub==0.23.1
# via tokenizers
idna==3.7
# via anyio
# via email-validator
# via httpx
# via requests
# via yarl
jinja2==3.1.4
# via altair
# via burr
# via fastapi
# via pydeck
jiter==0.1.0
# via anthropic
jmespath==1.0.1
# via boto3
# via botocore
@ -185,12 +133,6 @@ jsonpatch==1.33
# via langchain-core
jsonpointer==2.4
# via jsonpatch
jsonschema==4.22.0
# via altair
jsonschema-specifications==2023.12.1
# via jsonschema
kiwisolver==1.4.5
# via matplotlib
langchain==0.1.15
# via scrapegraphai
langchain-anthropic==0.1.11
@ -214,26 +156,16 @@ langchain-groq==0.1.3
# via scrapegraphai
langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.1
langchain-text-splitters==0.0.2
# via langchain
langsmith==0.1.57
langsmith==0.1.60
# via langchain
# via langchain-community
# via langchain-core
loguru==0.7.2
# via burr
lxml==5.2.2
# via free-proxy
markdown-it-py==3.0.0
# via rich
markupsafe==2.1.5
# via jinja2
marshmallow==3.21.2
# via dataclasses-json
matplotlib==3.9.0
# via burr
mdurl==0.1.2
# via markdown-it-py
minify-html==0.15.0
# via scrapegraphai
multidict==6.0.5
@ -242,39 +174,21 @@ multidict==6.0.5
mypy-extensions==1.0.0
# via typing-inspect
numpy==1.26.4
# via altair
# via contourpy
# via faiss-cpu
# via langchain
# via langchain-aws
# via langchain-community
# via matplotlib
# via pandas
# via pyarrow
# via pydeck
# via sf-hamilton
# via streamlit
openai==1.30.1
# via burr
# via langchain-openai
orjson==3.10.3
# via fastapi
# via langsmith
packaging==23.2
# via altair
# via huggingface-hub
# via langchain-core
# via marshmallow
# via matplotlib
# via streamlit
pandas==2.2.2
# via altair
# via scrapegraphai
# via sf-hamilton
# via streamlit
pillow==10.3.0
# via matplotlib
# via streamlit
playwright==1.43.0
# via scrapegraphai
proto-plus==1.23.0
@ -287,9 +201,6 @@ protobuf==4.25.3
# via googleapis-common-protos
# via grpcio-status
# via proto-plus
# via streamlit
pyarrow==16.1.0
# via streamlit
pyasn1==0.6.0
# via pyasn1-modules
# via rsa
@ -297,9 +208,6 @@ pyasn1-modules==0.4.0
# via google-auth
pydantic==2.7.1
# via anthropic
# via burr
# via fastapi
# via fastapi-pagination
# via google-generativeai
# via groq
# via langchain
@ -309,24 +217,15 @@ pydantic==2.7.1
# via yahoo-search-py
pydantic-core==2.18.2
# via pydantic
pydeck==0.9.1
# via streamlit
pyee==11.1.0
# via playwright
pygments==2.18.0
# via rich
pyparsing==3.1.2
# via httplib2
# via matplotlib
python-dateutil==2.9.0.post0
# via botocore
# via matplotlib
# via pandas
python-dotenv==1.0.1
# via scrapegraphai
# via uvicorn
python-multipart==0.0.9
# via fastapi
pytz==2024.1
# via pandas
pyyaml==6.0.1
@ -334,42 +233,24 @@ pyyaml==6.0.1
# via langchain
# via langchain-community
# via langchain-core
# via uvicorn
referencing==0.35.1
# via jsonschema
# via jsonschema-specifications
regex==2024.5.10
regex==2024.5.15
# via tiktoken
requests==2.31.0
# via burr
requests==2.32.2
# via free-proxy
# via google-api-core
# via huggingface-hub
# via langchain
# via langchain-community
# via langsmith
# via streamlit
# via tiktoken
rich==13.7.1
# via streamlit
# via typer
rpds-py==0.18.1
# via jsonschema
# via referencing
rsa==4.9
# via google-auth
s3transfer==0.10.1
# via boto3
selectolax==0.3.21
# via yahoo-search-py
sf-hamilton==1.62.0
# via burr
shellingham==1.5.4
# via typer
six==1.16.0
# via python-dateutil
smmap==5.0.1
# via gitdb
sniffio==1.3.1
# via anthropic
# via anyio
@ -381,39 +262,23 @@ soupsieve==2.5
sqlalchemy==2.0.30
# via langchain
# via langchain-community
starlette==0.37.2
# via fastapi
streamlit==1.34.0
# via burr
tenacity==8.3.0
# via langchain
# via langchain-community
# via langchain-core
# via streamlit
tiktoken==0.6.0
# via langchain-openai
# via scrapegraphai
tokenizers==0.19.1
# via anthropic
toml==0.10.2
# via streamlit
toolz==0.12.1
# via altair
tornado==6.4
# via streamlit
tqdm==4.66.4
# via google-generativeai
# via huggingface-hub
# via openai
# via scrapegraphai
typer==0.12.3
# via fastapi-cli
typing-extensions==4.11.0
# via altair
# via anthropic
# via anyio
# via fastapi
# via fastapi-pagination
# via google-generativeai
# via groq
# via huggingface-hub
@ -421,36 +286,18 @@ typing-extensions==4.11.0
# via pydantic
# via pydantic-core
# via pyee
# via sf-hamilton
# via sqlalchemy
# via streamlit
# via typer
# via typing-inspect
# via uvicorn
typing-inspect==0.9.0
# via dataclasses-json
# via sf-hamilton
tzdata==2024.1
# via pandas
ujson==5.10.0
# via fastapi
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
urllib3==2.2.1
# via botocore
# via requests
# via yahoo-search-py
uvicorn==0.29.0
# via burr
# via fastapi
watchdog==4.0.0
# via streamlit
watchfiles==0.21.0
# via uvicorn
websockets==12.0
# via uvicorn
win32-setctime==1.1.0
# via loguru
yahoo-search-py==0.3
# via scrapegraphai
yarl==1.9.4

View File

@ -3,6 +3,7 @@ AbstractGraph Module
"""
from abc import ABC, abstractmethod
from typing import Optional
import uuid
from langchain_aws import BedrockEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
@ -69,6 +70,16 @@ class AbstractGraph(ABC):
"embedder_model": self.embedder_model}
self.set_common_params(common_params, overwrite=False)
# set burr config
self.burr_kwargs = config.get("burr_kwargs", None)
if self.burr_kwargs is not None:
self.graph.use_burr = True
if "app_instance_id" not in self.burr_kwargs:
# set a random uuid for the app_instance_id to avoid conflicts
self.burr_kwargs["app_instance_id"] = str(uuid.uuid4())
self.graph.burr_config = self.burr_kwargs
def set_common_params(self, params: dict, overwrite=False):
"""
Pass parameters to every node in the graph unless otherwise defined in the graph.

View File

@ -7,8 +7,6 @@ import warnings
from langchain_community.callbacks import get_openai_callback
from typing import Tuple
from ..integrations import BurrBridge
class BaseGraph:
"""
@ -163,6 +161,9 @@ class BaseGraph:
self.initial_state = initial_state
if self.use_burr:
from ..integrations import BurrBridge
bridge = BurrBridge(self, self.burr_config)
result = bridge.execute(initial_state)
return (result["_state"], [])

View File

@ -1 +1,5 @@
"""
Init file for integrations module
"""
from .burr_bridge import BurrBridge

View File

@ -6,6 +6,11 @@ Bridge class to integrate Burr into ScrapeGraphAI graphs
import re
from typing import Any, Dict, List, Tuple
try:
import burr
except ImportError:
raise ImportError("burr package is not installed. Please install it with 'pip install scrapegraphai[burr]'")
from burr import tracking
from burr.core import Application, ApplicationBuilder, State, Action, default
from burr.lifecycle import PostRunStepHook, PreRunStepHook