mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
feat(smart-scraper-multi): add schema to graphs and created SmartScraperMultiGraph
This commit is contained in:
parent
5701afe927
commit
fc58e2d3a6
134
examples/knowledge_graph/kg_custom_graph.py
Normal file
134
examples/knowledge_graph/kg_custom_graph.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
"""
|
||||||
|
Example of custom graph for creating a knowledge graph
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, json
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
from langchain_openai import OpenAIEmbeddings
|
||||||
|
from scrapegraphai.models import OpenAI
|
||||||
|
from scrapegraphai.graphs import BaseGraph, SmartScraperGraph
|
||||||
|
from scrapegraphai.nodes import GraphIteratorNode, MergeAnswersNode, KnowledgeGraphNode
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the output schema
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
schema= """{
|
||||||
|
"Job Postings": {
|
||||||
|
"Company x": [
|
||||||
|
{
|
||||||
|
"title": "...",
|
||||||
|
"description": "...",
|
||||||
|
"location": "...",
|
||||||
|
"date_posted": "..",
|
||||||
|
"requirements": ["...", "...", "..."]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"title": "...",
|
||||||
|
"description": "...",
|
||||||
|
"location": "...",
|
||||||
|
"date_posted": "..",
|
||||||
|
"requirements": ["...", "...", "..."]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"Company y": [
|
||||||
|
{
|
||||||
|
"title": "...",
|
||||||
|
"description": "...",
|
||||||
|
"location": "...",
|
||||||
|
"date_posted": "..",
|
||||||
|
"requirements": ["...", "...", "..."]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}"""
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
openai_key = os.getenv("OPENAI_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": openai_key,
|
||||||
|
"model": "gpt-4o",
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the graph nodes
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
llm_model = OpenAI(graph_config["llm"])
|
||||||
|
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
|
||||||
|
|
||||||
|
smart_scraper_instance = SmartScraperGraph(
|
||||||
|
prompt="",
|
||||||
|
source="",
|
||||||
|
config=graph_config,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the graph nodes
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
graph_iterator_node = GraphIteratorNode(
|
||||||
|
input="user_prompt & urls",
|
||||||
|
output=["results"],
|
||||||
|
node_config={
|
||||||
|
"graph_instance": smart_scraper_instance,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
merge_answers_node = MergeAnswersNode(
|
||||||
|
input="user_prompt & results",
|
||||||
|
output=["answer"],
|
||||||
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
"schema": schema
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
knowledge_graph_node = KnowledgeGraphNode(
|
||||||
|
input="user_prompt & answer",
|
||||||
|
output=["kg"],
|
||||||
|
node_config={
|
||||||
|
"llm_model": llm_model,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
graph = BaseGraph(
|
||||||
|
nodes=[
|
||||||
|
graph_iterator_node,
|
||||||
|
merge_answers_node,
|
||||||
|
knowledge_graph_node
|
||||||
|
],
|
||||||
|
edges=[
|
||||||
|
(graph_iterator_node, merge_answers_node),
|
||||||
|
(merge_answers_node, knowledge_graph_node)
|
||||||
|
],
|
||||||
|
entry_point=graph_iterator_node
|
||||||
|
)
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Execute the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
result, execution_info = graph.execute({
|
||||||
|
"user_prompt": "List me all the Machine Learning Engineer job postings",
|
||||||
|
"urls": [
|
||||||
|
"https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
|
||||||
|
"https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
|
||||||
|
"https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
|
||||||
|
],
|
||||||
|
})
|
||||||
|
|
||||||
|
# get the answer from the result
|
||||||
|
result = result.get("answer", "No answer found.")
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
@ -46,7 +46,7 @@ robot_node = RobotsNode(
|
|||||||
|
|
||||||
fetch_node = FetchNode(
|
fetch_node = FetchNode(
|
||||||
input="url | local_dir",
|
input="url | local_dir",
|
||||||
output=["doc"],
|
output=["doc", "link_urls", "img_urls"],
|
||||||
node_config={
|
node_config={
|
||||||
"verbose": True,
|
"verbose": True,
|
||||||
"headless": True,
|
"headless": True,
|
||||||
|
|||||||
@ -1,79 +0,0 @@
|
|||||||
"""
|
|
||||||
Basic example of scraping pipeline using SmartScraper
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
from scrapegraphai.graphs import MultipleSearchGraph
|
|
||||||
from scrapegraphai.utils import prettify_exec_info
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
|
|
||||||
schema= """{
|
|
||||||
"Job Postings": {
|
|
||||||
"Company x": [
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"description": "...",
|
|
||||||
"location": "...",
|
|
||||||
"date_posted": "..",
|
|
||||||
"requirements": ["...", "...", "..."]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"description": "...",
|
|
||||||
"location": "...",
|
|
||||||
"date_posted": "..",
|
|
||||||
"requirements": ["...", "...", "..."]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"Company y": [
|
|
||||||
{
|
|
||||||
"title": "...",
|
|
||||||
"description": "...",
|
|
||||||
"location": "...",
|
|
||||||
"date_posted": "..",
|
|
||||||
"requirements": ["...", "...", "..."]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}"""
|
|
||||||
|
|
||||||
# ************************************************
|
|
||||||
# Define the configuration for the graph
|
|
||||||
# ************************************************
|
|
||||||
|
|
||||||
openai_key = os.getenv("OPENAI_APIKEY")
|
|
||||||
|
|
||||||
graph_config = {
|
|
||||||
"llm": {
|
|
||||||
"api_key": openai_key,
|
|
||||||
"model": "gpt-4o",
|
|
||||||
},
|
|
||||||
"verbose": True,
|
|
||||||
"headless": False,
|
|
||||||
"schema": schema,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
multiple_search_graph = MultipleSearchGraph(
|
|
||||||
prompt="List me all the projects with their description",
|
|
||||||
source= [
|
|
||||||
"https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
|
|
||||||
"https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
|
|
||||||
"https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
|
|
||||||
],
|
|
||||||
config=graph_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
result = multiple_search_graph.run()
|
|
||||||
print(result)
|
|
||||||
|
|
||||||
# ************************************************
|
|
||||||
# Get graph execution info
|
|
||||||
# ************************************************
|
|
||||||
|
|
||||||
graph_exec_info = multiple_search_graph.get_execution_info()
|
|
||||||
print(prettify_exec_info(graph_exec_info))
|
|
||||||
@ -19,7 +19,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
|
|||||||
graph_config = {
|
graph_config = {
|
||||||
"llm": {
|
"llm": {
|
||||||
"api_key": openai_key,
|
"api_key": openai_key,
|
||||||
"model": "gpt-4-turbo",
|
"model": "gpt-4o",
|
||||||
},
|
},
|
||||||
"verbose": True,
|
"verbose": True,
|
||||||
"headless": True,
|
"headless": True,
|
||||||
|
|||||||
@ -20,7 +20,7 @@ graph_config = {
|
|||||||
"model": "gpt-4o",
|
"model": "gpt-4o",
|
||||||
},
|
},
|
||||||
"max_results": 2,
|
"max_results": 2,
|
||||||
"max_images": 5,
|
"max_images": 1,
|
||||||
"verbose": True,
|
"verbose": True,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
41
examples/openai/smart_scraper_multi_openai.py
Normal file
41
examples/openai/smart_scraper_multi_openai.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, json
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperMultiGraph
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
openai_key = os.getenv("OPENAI_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key": openai_key,
|
||||||
|
"model": "gpt-4o",
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# *******************************************************
|
||||||
|
# Create the SmartScraperMultiGraph instance and run it
|
||||||
|
# *******************************************************
|
||||||
|
|
||||||
|
multiple_search_graph = SmartScraperMultiGraph(
|
||||||
|
prompt="Who is Marco Perini?",
|
||||||
|
source= [
|
||||||
|
"https://perinim.github.io/",
|
||||||
|
"https://perinim.github.io/cv/"
|
||||||
|
],
|
||||||
|
schema=None,
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = multiple_search_graph.run()
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
59
examples/openai/smart_scraper_schema_openai.py
Normal file
59
examples/openai/smart_scraper_schema_openai.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
"""
|
||||||
|
Basic example of scraping pipeline using SmartScraper
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os, json
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from scrapegraphai.graphs import SmartScraperGraph
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the output schema for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
schema= """
|
||||||
|
{
|
||||||
|
"Projects": [
|
||||||
|
"Project #":
|
||||||
|
{
|
||||||
|
"title": "...",
|
||||||
|
"description": "...",
|
||||||
|
},
|
||||||
|
"Project #":
|
||||||
|
{
|
||||||
|
"title": "...",
|
||||||
|
"description": "...",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Define the configuration for the graph
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
openai_key = os.getenv("OPENAI_APIKEY")
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"llm": {
|
||||||
|
"api_key":openai_key,
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
},
|
||||||
|
"verbose": True,
|
||||||
|
"headless": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ************************************************
|
||||||
|
# Create the SmartScraperGraph instance and run it
|
||||||
|
# ************************************************
|
||||||
|
|
||||||
|
smart_scraper_graph = SmartScraperGraph(
|
||||||
|
prompt="List me all the projects with their description",
|
||||||
|
source="https://perinim.github.io/projects/",
|
||||||
|
schema=schema,
|
||||||
|
config=graph_config
|
||||||
|
)
|
||||||
|
|
||||||
|
result = smart_scraper_graph.run()
|
||||||
|
print(json.dumps(result, indent=4))
|
||||||
@ -45,6 +45,10 @@ certifi==2024.2.2
|
|||||||
# via requests
|
# via requests
|
||||||
charset-normalizer==3.3.2
|
charset-normalizer==3.3.2
|
||||||
# via requests
|
# via requests
|
||||||
|
colorama==0.4.6
|
||||||
|
# via ipython
|
||||||
|
# via pytest
|
||||||
|
# via tqdm
|
||||||
dataclasses-json==0.6.6
|
dataclasses-json==0.6.6
|
||||||
# via langchain
|
# via langchain
|
||||||
# via langchain-community
|
# via langchain-community
|
||||||
@ -100,6 +104,7 @@ graphviz==0.20.3
|
|||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
greenlet==3.0.3
|
greenlet==3.0.3
|
||||||
# via playwright
|
# via playwright
|
||||||
|
# via sqlalchemy
|
||||||
groq==0.5.0
|
groq==0.5.0
|
||||||
# via langchain-groq
|
# via langchain-groq
|
||||||
grpcio==1.63.0
|
grpcio==1.63.0
|
||||||
@ -212,8 +217,6 @@ pandas==2.2.2
|
|||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
parso==0.8.4
|
parso==0.8.4
|
||||||
# via jedi
|
# via jedi
|
||||||
pexpect==4.9.0
|
|
||||||
# via ipython
|
|
||||||
playwright==1.43.0
|
playwright==1.43.0
|
||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
pluggy==1.5.0
|
pluggy==1.5.0
|
||||||
@ -230,8 +233,6 @@ protobuf==4.25.3
|
|||||||
# via googleapis-common-protos
|
# via googleapis-common-protos
|
||||||
# via grpcio-status
|
# via grpcio-status
|
||||||
# via proto-plus
|
# via proto-plus
|
||||||
ptyprocess==0.7.0
|
|
||||||
# via pexpect
|
|
||||||
pure-eval==0.2.2
|
pure-eval==0.2.2
|
||||||
# via stack-data
|
# via stack-data
|
||||||
pyasn1==0.6.0
|
pyasn1==0.6.0
|
||||||
|
|||||||
@ -45,6 +45,9 @@ certifi==2024.2.2
|
|||||||
# via requests
|
# via requests
|
||||||
charset-normalizer==3.3.2
|
charset-normalizer==3.3.2
|
||||||
# via requests
|
# via requests
|
||||||
|
colorama==0.4.6
|
||||||
|
# via ipython
|
||||||
|
# via tqdm
|
||||||
dataclasses-json==0.6.6
|
dataclasses-json==0.6.6
|
||||||
# via langchain
|
# via langchain
|
||||||
# via langchain-community
|
# via langchain-community
|
||||||
@ -99,6 +102,7 @@ graphviz==0.20.3
|
|||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
greenlet==3.0.3
|
greenlet==3.0.3
|
||||||
# via playwright
|
# via playwright
|
||||||
|
# via sqlalchemy
|
||||||
groq==0.5.0
|
groq==0.5.0
|
||||||
# via langchain-groq
|
# via langchain-groq
|
||||||
grpcio==1.63.0
|
grpcio==1.63.0
|
||||||
@ -208,8 +212,6 @@ pandas==2.2.2
|
|||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
parso==0.8.4
|
parso==0.8.4
|
||||||
# via jedi
|
# via jedi
|
||||||
pexpect==4.9.0
|
|
||||||
# via ipython
|
|
||||||
playwright==1.43.0
|
playwright==1.43.0
|
||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
prompt-toolkit==3.0.43
|
prompt-toolkit==3.0.43
|
||||||
@ -224,8 +226,6 @@ protobuf==4.25.3
|
|||||||
# via googleapis-common-protos
|
# via googleapis-common-protos
|
||||||
# via grpcio-status
|
# via grpcio-status
|
||||||
# via proto-plus
|
# via proto-plus
|
||||||
ptyprocess==0.7.0
|
|
||||||
# via pexpect
|
|
||||||
pure-eval==0.2.2
|
pure-eval==0.2.2
|
||||||
# via stack-data
|
# via stack-data
|
||||||
pyasn1==0.6.0
|
pyasn1==0.6.0
|
||||||
|
|||||||
@ -15,4 +15,4 @@ from .csv_scraper_graph import CSVScraperGraph
|
|||||||
from .pdf_scraper_graph import PDFScraperGraph
|
from .pdf_scraper_graph import PDFScraperGraph
|
||||||
from .omni_scraper_graph import OmniScraperGraph
|
from .omni_scraper_graph import OmniScraperGraph
|
||||||
from .omni_search_graph import OmniSearchGraph
|
from .omni_search_graph import OmniSearchGraph
|
||||||
from .multiple_search_graph import MultipleSearchGraph
|
from .smart_scraper_multi_graph import SmartScraperMultiGraph
|
||||||
|
|||||||
@ -7,10 +7,11 @@ from langchain_aws import BedrockEmbeddings
|
|||||||
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
|
||||||
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
|
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
|
||||||
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
||||||
from ..helpers import models_tokens
|
|
||||||
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
|
|
||||||
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
|
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
|
||||||
|
|
||||||
|
from ..helpers import models_tokens
|
||||||
|
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
|
||||||
|
|
||||||
|
|
||||||
class AbstractGraph(ABC):
|
class AbstractGraph(ABC):
|
||||||
"""
|
"""
|
||||||
@ -19,6 +20,7 @@ class AbstractGraph(ABC):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -29,6 +31,7 @@ class AbstractGraph(ABC):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
source (str, optional): The source of the graph.
|
source (str, optional): The source of the graph.
|
||||||
|
schema (str, optional): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> class MyGraph(AbstractGraph):
|
>>> class MyGraph(AbstractGraph):
|
||||||
@ -40,11 +43,12 @@ class AbstractGraph(ABC):
|
|||||||
>>> result = my_graph.run()
|
>>> result = my_graph.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
|
def __init__(self, prompt: str, config: dict, source: Optional[str] = None, schema: Optional[str] = None):
|
||||||
|
|
||||||
self.prompt = prompt
|
self.prompt = prompt
|
||||||
self.source = source
|
self.source = source
|
||||||
self.config = config
|
self.config = config
|
||||||
|
self.schema = schema
|
||||||
self.llm_model = self._create_llm(config["llm"], chat=True)
|
self.llm_model = self._create_llm(config["llm"], chat=True)
|
||||||
self.embedder_model = self._create_default_embedder(llm_config=config["llm"]
|
self.embedder_model = self._create_default_embedder(llm_config=config["llm"]
|
||||||
) if "embeddings" not in config else self._create_embedder(
|
) if "embeddings" not in config else self._create_embedder(
|
||||||
@ -61,14 +65,14 @@ class AbstractGraph(ABC):
|
|||||||
self.headless = True if config is None else config.get(
|
self.headless = True if config is None else config.get(
|
||||||
"headless", True)
|
"headless", True)
|
||||||
self.loader_kwargs = config.get("loader_kwargs", {})
|
self.loader_kwargs = config.get("loader_kwargs", {})
|
||||||
self.schema = config.get("schema", None)
|
|
||||||
|
|
||||||
common_params = {"headless": self.headless,
|
common_params = {
|
||||||
"verbose": self.verbose,
|
"headless": self.headless,
|
||||||
"loader_kwargs": self.loader_kwargs,
|
"verbose": self.verbose,
|
||||||
"llm_model": self.llm_model,
|
"loader_kwargs": self.loader_kwargs,
|
||||||
"embedder_model": self.embedder_model,
|
"llm_model": self.llm_model,
|
||||||
"schema": self.schema}
|
"embedder_model": self.embedder_model
|
||||||
|
}
|
||||||
|
|
||||||
self.set_common_params(common_params, overwrite=False)
|
self.set_common_params(common_params, overwrite=False)
|
||||||
|
|
||||||
|
|||||||
@ -1,14 +1,18 @@
|
|||||||
"""
|
"""
|
||||||
Module for creating the smart scraper
|
Module for creating the smart scraper
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
RAGNode,
|
RAGNode,
|
||||||
GenerateAnswerCSVNode
|
GenerateAnswerCSVNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class CSVScraperGraph(AbstractGraph):
|
class CSVScraperGraph(AbstractGraph):
|
||||||
@ -17,11 +21,11 @@ class CSVScraperGraph(AbstractGraph):
|
|||||||
information from web pages using a natural language model to interpret and answer prompts.
|
information from web pages using a natural language model to interpret and answer prompts.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
Initializes the CSVScraperGraph with a prompt, source, and configuration.
|
Initializes the CSVScraperGraph with a prompt, source, and configuration.
|
||||||
"""
|
"""
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "csv" if source.endswith("csv") else "csv_dir"
|
self.input_key = "csv" if source.endswith("csv") else "csv_dir"
|
||||||
|
|
||||||
@ -53,6 +57,7 @@ class CSVScraperGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,11 @@
|
|||||||
DeepScraperGraph Module
|
DeepScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
SearchLinkNode,
|
SearchLinkNode,
|
||||||
@ -12,7 +16,6 @@ from ..nodes import (
|
|||||||
GraphIteratorNode,
|
GraphIteratorNode,
|
||||||
MergeAnswersNode
|
MergeAnswersNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class DeepScraperGraph(AbstractGraph):
|
class DeepScraperGraph(AbstractGraph):
|
||||||
@ -30,15 +33,19 @@ class DeepScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
verbose (bool): A flag indicating whether to show print statements during execution.
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
headless (bool): A flag indicating whether to run the graph in headless mode.
|
headless (bool): A flag indicating whether to run the graph in headless mode.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> deep_scraper = DeepScraperGraph(
|
>>> deep_scraper = DeepScraperGraph(
|
||||||
... "List me all the job titles and detailed job description.",
|
... "List me all the job titles and detailed job description.",
|
||||||
@ -49,8 +56,10 @@ class DeepScraperGraph(AbstractGraph):
|
|||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
super().__init__(prompt, config, source)
|
|
||||||
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
def _create_repeated_graph(self) -> BaseGraph:
|
def _create_repeated_graph(self) -> BaseGraph:
|
||||||
@ -84,7 +93,8 @@ class DeepScraperGraph(AbstractGraph):
|
|||||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
search_node = SearchLinkNode(
|
search_node = SearchLinkNode(
|
||||||
@ -108,6 +118,7 @@ class DeepScraperGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -2,14 +2,17 @@
|
|||||||
JSONScraperGraph Module
|
JSONScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
RAGNode,
|
RAGNode,
|
||||||
GenerateAnswerNode
|
GenerateAnswerNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class JSONScraperGraph(AbstractGraph):
|
class JSONScraperGraph(AbstractGraph):
|
||||||
@ -20,6 +23,7 @@ class JSONScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -30,6 +34,7 @@ class JSONScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> json_scraper = JSONScraperGraph(
|
>>> json_scraper = JSONScraperGraph(
|
||||||
@ -40,8 +45,8 @@ class JSONScraperGraph(AbstractGraph):
|
|||||||
>>> result = json_scraper.run()
|
>>> result = json_scraper.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "json" if source.endswith("json") else "json_dir"
|
self.input_key = "json" if source.endswith("json") else "json_dir"
|
||||||
|
|
||||||
@ -76,7 +81,8 @@ class JSONScraperGraph(AbstractGraph):
|
|||||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,11 @@
|
|||||||
OmniScraperGraph Module
|
OmniScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
@ -10,8 +14,8 @@ from ..nodes import (
|
|||||||
RAGNode,
|
RAGNode,
|
||||||
GenerateAnswerOmniNode
|
GenerateAnswerOmniNode
|
||||||
)
|
)
|
||||||
from scrapegraphai.models import OpenAIImageToText
|
|
||||||
from .abstract_graph import AbstractGraph
|
from ..models import OpenAIImageToText
|
||||||
|
|
||||||
|
|
||||||
class OmniScraperGraph(AbstractGraph):
|
class OmniScraperGraph(AbstractGraph):
|
||||||
@ -24,6 +28,7 @@ class OmniScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -35,6 +40,7 @@ class OmniScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> omni_scraper = OmniScraperGraph(
|
>>> omni_scraper = OmniScraperGraph(
|
||||||
@ -46,11 +52,11 @@ class OmniScraperGraph(AbstractGraph):
|
|||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
|
|
||||||
self.max_images = 5 if config is None else config.get("max_images", 5)
|
self.max_images = 5 if config is None else config.get("max_images", 5)
|
||||||
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
@ -96,7 +102,8 @@ class OmniScraperGraph(AbstractGraph):
|
|||||||
input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
|
input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -3,15 +3,17 @@ OmniSearchGraph Module
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from copy import copy, deepcopy
|
from copy import copy, deepcopy
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
from .omni_scraper_graph import OmniScraperGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
SearchInternetNode,
|
SearchInternetNode,
|
||||||
GraphIteratorNode,
|
GraphIteratorNode,
|
||||||
MergeAnswersNode
|
MergeAnswersNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
from .omni_scraper_graph import OmniScraperGraph
|
|
||||||
|
|
||||||
|
|
||||||
class OmniSearchGraph(AbstractGraph):
|
class OmniSearchGraph(AbstractGraph):
|
||||||
@ -31,6 +33,7 @@ class OmniSearchGraph(AbstractGraph):
|
|||||||
Args:
|
Args:
|
||||||
prompt (str): The user prompt to search the internet.
|
prompt (str): The user prompt to search the internet.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (Optional[str]): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> omni_search_graph = OmniSearchGraph(
|
>>> omni_search_graph = OmniSearchGraph(
|
||||||
@ -40,7 +43,7 @@ class OmniSearchGraph(AbstractGraph):
|
|||||||
>>> result = search_graph.run()
|
>>> result = search_graph.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, config: dict):
|
def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):
|
||||||
|
|
||||||
self.max_results = config.get("max_results", 3)
|
self.max_results = config.get("max_results", 3)
|
||||||
|
|
||||||
@ -49,7 +52,7 @@ class OmniSearchGraph(AbstractGraph):
|
|||||||
else:
|
else:
|
||||||
self.copy_config = deepcopy(config)
|
self.copy_config = deepcopy(config)
|
||||||
|
|
||||||
super().__init__(prompt, config)
|
super().__init__(prompt, config, schema)
|
||||||
|
|
||||||
def _create_graph(self) -> BaseGraph:
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
@ -94,6 +97,7 @@ class OmniSearchGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -2,14 +2,17 @@
|
|||||||
PDFScraperGraph Module
|
PDFScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
RAGNode,
|
RAGNode,
|
||||||
GenerateAnswerNode
|
GenerateAnswerNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class PDFScraperGraph(AbstractGraph):
|
class PDFScraperGraph(AbstractGraph):
|
||||||
@ -21,6 +24,7 @@ class PDFScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -32,6 +36,7 @@ class PDFScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> pdf_scraper = PDFScraperGraph(
|
>>> pdf_scraper = PDFScraperGraph(
|
||||||
@ -42,8 +47,8 @@ class PDFScraperGraph(AbstractGraph):
|
|||||||
>>> result = pdf_scraper.run()
|
>>> result = pdf_scraper.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir"
|
self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir"
|
||||||
|
|
||||||
@ -79,6 +84,7 @@ class PDFScraperGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -2,13 +2,16 @@
|
|||||||
ScriptCreatorGraph Module
|
ScriptCreatorGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
GenerateScraperNode
|
GenerateScraperNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class ScriptCreatorGraph(AbstractGraph):
|
class ScriptCreatorGraph(AbstractGraph):
|
||||||
@ -19,6 +22,7 @@ class ScriptCreatorGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -31,6 +35,7 @@ class ScriptCreatorGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> script_creator = ScriptCreatorGraph(
|
>>> script_creator = ScriptCreatorGraph(
|
||||||
@ -41,11 +46,11 @@ class ScriptCreatorGraph(AbstractGraph):
|
|||||||
>>> result = script_creator.run()
|
>>> result = script_creator.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
|
|
||||||
self.library = config['library']
|
self.library = config['library']
|
||||||
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
@ -65,14 +70,16 @@ class ScriptCreatorGraph(AbstractGraph):
|
|||||||
input="doc",
|
input="doc",
|
||||||
output=["parsed_doc"],
|
output=["parsed_doc"],
|
||||||
node_config={"chunk_size": self.model_token,
|
node_config={"chunk_size": self.model_token,
|
||||||
"verbose": self.verbose,
|
|
||||||
"parse_html": False
|
"parse_html": False
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
generate_scraper_node = GenerateScraperNode(
|
generate_scraper_node = GenerateScraperNode(
|
||||||
input="user_prompt & (doc)",
|
input="user_prompt & (doc)",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={"llm_model": self.llm_model},
|
node_config={
|
||||||
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema,
|
||||||
|
},
|
||||||
library=self.library,
|
library=self.library,
|
||||||
website=self.source
|
website=self.source
|
||||||
)
|
)
|
||||||
|
|||||||
@ -3,15 +3,17 @@ SearchGraph Module
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from copy import copy, deepcopy
|
from copy import copy, deepcopy
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
from .smart_scraper_graph import SmartScraperGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
SearchInternetNode,
|
SearchInternetNode,
|
||||||
GraphIteratorNode,
|
GraphIteratorNode,
|
||||||
MergeAnswersNode
|
MergeAnswersNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
from .smart_scraper_graph import SmartScraperGraph
|
|
||||||
|
|
||||||
|
|
||||||
class SearchGraph(AbstractGraph):
|
class SearchGraph(AbstractGraph):
|
||||||
@ -30,6 +32,7 @@ class SearchGraph(AbstractGraph):
|
|||||||
Args:
|
Args:
|
||||||
prompt (str): The user prompt to search the internet.
|
prompt (str): The user prompt to search the internet.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (Optional[str]): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> search_graph = SearchGraph(
|
>>> search_graph = SearchGraph(
|
||||||
@ -39,7 +42,7 @@ class SearchGraph(AbstractGraph):
|
|||||||
>>> result = search_graph.run()
|
>>> result = search_graph.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, config: dict):
|
def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):
|
||||||
|
|
||||||
self.max_results = config.get("max_results", 3)
|
self.max_results = config.get("max_results", 3)
|
||||||
|
|
||||||
@ -48,7 +51,7 @@ class SearchGraph(AbstractGraph):
|
|||||||
else:
|
else:
|
||||||
self.copy_config = deepcopy(config)
|
self.copy_config = deepcopy(config)
|
||||||
|
|
||||||
super().__init__(prompt, config)
|
super().__init__(prompt, config, schema)
|
||||||
|
|
||||||
def _create_graph(self) -> BaseGraph:
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
@ -93,6 +96,7 @@ class SearchGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -2,14 +2,17 @@
|
|||||||
SmartScraperGraph Module
|
SmartScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
RAGNode,
|
RAGNode,
|
||||||
GenerateAnswerNode
|
GenerateAnswerNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class SmartScraperGraph(AbstractGraph):
|
class SmartScraperGraph(AbstractGraph):
|
||||||
@ -22,6 +25,7 @@ class SmartScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -32,6 +36,7 @@ class SmartScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> smart_scraper = SmartScraperGraph(
|
>>> smart_scraper = SmartScraperGraph(
|
||||||
@ -43,8 +48,8 @@ class SmartScraperGraph(AbstractGraph):
|
|||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
@ -82,7 +87,7 @@ class SmartScraperGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
"schema": self.config.get("schema", None),
|
"schema": self.schema,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1,25 +1,25 @@
|
|||||||
"""
|
"""
|
||||||
MultipleSearchGraph Module
|
SmartScraperMultiGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from copy import copy, deepcopy
|
from copy import copy, deepcopy
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
from .smart_scraper_graph import SmartScraperGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
GraphIteratorNode,
|
GraphIteratorNode,
|
||||||
MergeAnswersNode,
|
MergeAnswersNode,
|
||||||
KnowledgeGraphNode
|
KnowledgeGraphNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
from .smart_scraper_graph import SmartScraperGraph
|
|
||||||
|
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
|
|
||||||
class MultipleSearchGraph(AbstractGraph):
|
class SmartScraperMultiGraph(AbstractGraph):
|
||||||
"""
|
"""
|
||||||
MultipleSearchGraph is a scraping pipeline that searches the internet for answers to a given prompt.
|
SmartScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
|
||||||
It only requires a user prompt to search the internet and generate an answer.
|
It only requires a user prompt and a list of URLs.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
prompt (str): The user prompt to search the internet.
|
prompt (str): The user prompt to search the internet.
|
||||||
@ -31,7 +31,9 @@ class MultipleSearchGraph(AbstractGraph):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt (str): The user prompt to search the internet.
|
prompt (str): The user prompt to search the internet.
|
||||||
|
source (List[str]): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (Optional[str]): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> search_graph = MultipleSearchGraph(
|
>>> search_graph = MultipleSearchGraph(
|
||||||
@ -41,7 +43,7 @@ class MultipleSearchGraph(AbstractGraph):
|
|||||||
>>> result = search_graph.run()
|
>>> result = search_graph.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: List[str], config: dict):
|
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
|
||||||
|
|
||||||
self.max_results = config.get("max_results", 3)
|
self.max_results = config.get("max_results", 3)
|
||||||
|
|
||||||
@ -50,7 +52,7 @@ class MultipleSearchGraph(AbstractGraph):
|
|||||||
else:
|
else:
|
||||||
self.copy_config = deepcopy(config)
|
self.copy_config = deepcopy(config)
|
||||||
|
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
def _create_graph(self) -> BaseGraph:
|
def _create_graph(self) -> BaseGraph:
|
||||||
"""
|
"""
|
||||||
@ -87,15 +89,7 @@ class MultipleSearchGraph(AbstractGraph):
|
|||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model,
|
"llm_model": self.llm_model,
|
||||||
"schema": self.config.get("schema", None),
|
"schema": self.schema
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
knowledge_graph_node = KnowledgeGraphNode(
|
|
||||||
input="user_prompt & answer",
|
|
||||||
output=["kg"],
|
|
||||||
node_config={
|
|
||||||
"llm_model": self.llm_model,
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -103,11 +97,9 @@ class MultipleSearchGraph(AbstractGraph):
|
|||||||
nodes=[
|
nodes=[
|
||||||
graph_iterator_node,
|
graph_iterator_node,
|
||||||
merge_answers_node,
|
merge_answers_node,
|
||||||
knowledge_graph_node
|
|
||||||
],
|
],
|
||||||
edges=[
|
edges=[
|
||||||
(graph_iterator_node, merge_answers_node),
|
(graph_iterator_node, merge_answers_node),
|
||||||
(merge_answers_node, knowledge_graph_node)
|
|
||||||
],
|
],
|
||||||
entry_point=graph_iterator_node
|
entry_point=graph_iterator_node
|
||||||
)
|
)
|
||||||
@ -2,9 +2,11 @@
|
|||||||
SpeechGraph Module
|
SpeechGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes
|
from typing import Optional
|
||||||
from ..models import OpenAITextToSpeech
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
@ -12,7 +14,9 @@ from ..nodes import (
|
|||||||
GenerateAnswerNode,
|
GenerateAnswerNode,
|
||||||
TextToSpeechNode,
|
TextToSpeechNode,
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
from ..utils.save_audio_from_bytes import save_audio_from_bytes
|
||||||
|
from ..models import OpenAITextToSpeech
|
||||||
|
|
||||||
|
|
||||||
class SpeechGraph(AbstractGraph):
|
class SpeechGraph(AbstractGraph):
|
||||||
@ -23,6 +27,7 @@ class SpeechGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
embedder_model: An instance of an embedding model client, configured for generating embeddings.
|
||||||
verbose (bool): A flag indicating whether to show print statements during execution.
|
verbose (bool): A flag indicating whether to show print statements during execution.
|
||||||
@ -33,6 +38,7 @@ class SpeechGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> speech_graph = SpeechGraph(
|
>>> speech_graph = SpeechGraph(
|
||||||
@ -41,8 +47,8 @@ class SpeechGraph(AbstractGraph):
|
|||||||
... {"llm": {"model": "gpt-3.5-turbo"}}
|
... {"llm": {"model": "gpt-3.5-turbo"}}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "url" if source.startswith("http") else "local_dir"
|
self.input_key = "url" if source.startswith("http") else "local_dir"
|
||||||
|
|
||||||
@ -76,7 +82,8 @@ class SpeechGraph(AbstractGraph):
|
|||||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
text_to_speech_node = TextToSpeechNode(
|
text_to_speech_node = TextToSpeechNode(
|
||||||
|
|||||||
@ -2,14 +2,17 @@
|
|||||||
XMLScraperGraph Module
|
XMLScraperGraph Module
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from .base_graph import BaseGraph
|
from .base_graph import BaseGraph
|
||||||
|
from .abstract_graph import AbstractGraph
|
||||||
|
|
||||||
from ..nodes import (
|
from ..nodes import (
|
||||||
FetchNode,
|
FetchNode,
|
||||||
ParseNode,
|
ParseNode,
|
||||||
RAGNode,
|
RAGNode,
|
||||||
GenerateAnswerNode
|
GenerateAnswerNode
|
||||||
)
|
)
|
||||||
from .abstract_graph import AbstractGraph
|
|
||||||
|
|
||||||
|
|
||||||
class XMLScraperGraph(AbstractGraph):
|
class XMLScraperGraph(AbstractGraph):
|
||||||
@ -21,6 +24,7 @@ class XMLScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
llm_model: An instance of a language model client, configured for generating answers.
|
llm_model: An instance of a language model client, configured for generating answers.
|
||||||
embedder_model: An instance of an embedding model client,
|
embedder_model: An instance of an embedding model client,
|
||||||
configured for generating embeddings.
|
configured for generating embeddings.
|
||||||
@ -32,6 +36,7 @@ class XMLScraperGraph(AbstractGraph):
|
|||||||
prompt (str): The prompt for the graph.
|
prompt (str): The prompt for the graph.
|
||||||
source (str): The source of the graph.
|
source (str): The source of the graph.
|
||||||
config (dict): Configuration parameters for the graph.
|
config (dict): Configuration parameters for the graph.
|
||||||
|
schema (str): The schema for the graph output.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>> xml_scraper = XMLScraperGraph(
|
>>> xml_scraper = XMLScraperGraph(
|
||||||
@ -42,8 +47,8 @@ class XMLScraperGraph(AbstractGraph):
|
|||||||
>>> result = xml_scraper.run()
|
>>> result = xml_scraper.run()
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, prompt: str, source: str, config: dict):
|
def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
|
||||||
super().__init__(prompt, config, source)
|
super().__init__(prompt, config, source, schema)
|
||||||
|
|
||||||
self.input_key = "xml" if source.endswith("xml") else "xml_dir"
|
self.input_key = "xml" if source.endswith("xml") else "xml_dir"
|
||||||
|
|
||||||
@ -78,7 +83,8 @@ class XMLScraperGraph(AbstractGraph):
|
|||||||
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
|
||||||
output=["answer"],
|
output=["answer"],
|
||||||
node_config={
|
node_config={
|
||||||
"llm_model": self.llm_model
|
"llm_model": self.llm_model,
|
||||||
|
"schema": self.schema
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Module for implementing the conditional node
|
Module for implementing the conditional node
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -10,10 +10,9 @@ from langchain.prompts import PromptTemplate
|
|||||||
from langchain_core.output_parsers import JsonOutputParser
|
from langchain_core.output_parsers import JsonOutputParser
|
||||||
from langchain_core.runnables import RunnableParallel
|
from langchain_core.runnables import RunnableParallel
|
||||||
|
|
||||||
from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
|
|
||||||
|
|
||||||
# Imports from the library
|
# Imports from the library
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
|
from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
|
||||||
|
|
||||||
|
|
||||||
class GenerateAnswerCSVNode(BaseNode):
|
class GenerateAnswerCSVNode(BaseNode):
|
||||||
|
|||||||
@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
|
|||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema
|
from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema
|
||||||
|
|
||||||
|
|
||||||
class GenerateAnswerNode(BaseNode):
|
class GenerateAnswerNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
A node that generates an answer using a large language model (LLM) based on the user's input
|
A node that generates an answer using a large language model (LLM) based on the user's input
|
||||||
|
|||||||
@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
|
|||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni
|
from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni
|
||||||
|
|
||||||
|
|
||||||
class GenerateAnswerOmniNode(BaseNode):
|
class GenerateAnswerOmniNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
A node that generates an answer using a large language model (LLM) based on the user's input
|
A node that generates an answer using a large language model (LLM) based on the user's input
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from langchain_core.runnables import RunnableParallel
|
|||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf
|
from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf
|
||||||
|
|
||||||
|
|
||||||
class GenerateAnswerPDFNode(BaseNode):
|
class GenerateAnswerPDFNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
A node that generates an answer using a language model (LLM) based on the user's input
|
A node that generates an answer using a language model (LLM) based on the user's input
|
||||||
|
|||||||
@ -10,7 +10,6 @@ from tqdm.asyncio import tqdm
|
|||||||
|
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
|
|
||||||
|
|
||||||
_default_batchsize = 16
|
_default_batchsize = 16
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from langchain_core.output_parsers import JsonOutputParser
|
|||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
from ..utils import create_graph, create_interactive_graph
|
from ..utils import create_graph, create_interactive_graph
|
||||||
|
|
||||||
|
|
||||||
class KnowledgeGraphNode(BaseNode):
|
class KnowledgeGraphNode(BaseNode):
|
||||||
"""
|
"""
|
||||||
A node responsible for generating a knowledge graph from a dictionary.
|
A node responsible for generating a knowledge graph from a dictionary.
|
||||||
|
|||||||
@ -3,8 +3,10 @@ ParseNode Module
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain_community.document_transformers import Html2TextTransformer
|
from langchain_community.document_transformers import Html2TextTransformer
|
||||||
|
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,7 @@ RAGNode Module
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.retrievers import ContextualCompressionRetriever
|
from langchain.retrievers import ContextualCompressionRetriever
|
||||||
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
|
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
|
||||||
|
|||||||
@ -4,9 +4,11 @@ RobotsNode Module
|
|||||||
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
from langchain_community.document_loaders import AsyncChromiumLoader
|
from langchain_community.document_loaders import AsyncChromiumLoader
|
||||||
from langchain.prompts import PromptTemplate
|
from langchain.prompts import PromptTemplate
|
||||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||||
|
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
from ..helpers import robots_dictionary
|
from ..helpers import robots_dictionary
|
||||||
|
|
||||||
|
|||||||
@ -3,8 +3,10 @@ SearchInternetNode Module
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
from langchain.output_parsers import CommaSeparatedListOutputParser
|
from langchain.output_parsers import CommaSeparatedListOutputParser
|
||||||
from langchain.prompts import PromptTemplate
|
from langchain.prompts import PromptTemplate
|
||||||
|
|
||||||
from ..utils.research_web import search_on_web
|
from ..utils.research_web import search_on_web
|
||||||
from .base_node import BaseNode
|
from .base_node import BaseNode
|
||||||
|
|
||||||
|
|||||||
@ -6,7 +6,6 @@ SearchLinkNode Module
|
|||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
# Imports from Langchain
|
# Imports from Langchain
|
||||||
from langchain.prompts import PromptTemplate
|
from langchain.prompts import PromptTemplate
|
||||||
from langchain_core.output_parsers import JsonOutputParser
|
from langchain_core.output_parsers import JsonOutputParser
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user