feat(smart-scraper-multi): add schema to graphs and created SmartScraperMultiGraph

This commit is contained in:
Marco Perini 2024-05-21 13:13:27 +02:00
parent 5701afe927
commit fc58e2d3a6
35 changed files with 401 additions and 172 deletions

View File

@ -0,0 +1,134 @@
"""
Example of custom graph for creating a knowledge graph
"""
import os, json
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph, SmartScraperGraph
from scrapegraphai.nodes import GraphIteratorNode, MergeAnswersNode, KnowledgeGraphNode
load_dotenv()
# ************************************************
# Define the output schema
# ************************************************
schema= """{
"Job Postings": {
"Company x": [
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
},
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
}
],
"Company y": [
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
}
]
}
}"""
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-4o",
},
"verbose": True,
"headless": False,
}
# ************************************************
# Define the graph nodes
# ************************************************
llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
smart_scraper_instance = SmartScraperGraph(
prompt="",
source="",
config=graph_config,
)
# ************************************************
# Define the graph nodes
# ************************************************
graph_iterator_node = GraphIteratorNode(
input="user_prompt & urls",
output=["results"],
node_config={
"graph_instance": smart_scraper_instance,
}
)
merge_answers_node = MergeAnswersNode(
input="user_prompt & results",
output=["answer"],
node_config={
"llm_model": llm_model,
"schema": schema
}
)
knowledge_graph_node = KnowledgeGraphNode(
input="user_prompt & answer",
output=["kg"],
node_config={
"llm_model": llm_model,
}
)
graph = BaseGraph(
nodes=[
graph_iterator_node,
merge_answers_node,
knowledge_graph_node
],
edges=[
(graph_iterator_node, merge_answers_node),
(merge_answers_node, knowledge_graph_node)
],
entry_point=graph_iterator_node
)
# ************************************************
# Execute the graph
# ************************************************
result, execution_info = graph.execute({
"user_prompt": "List me all the Machine Learning Engineer job postings",
"urls": [
"https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
"https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
"https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
],
})
# get the answer from the result
result = result.get("answer", "No answer found.")
print(json.dumps(result, indent=4))

View File

@ -46,7 +46,7 @@ robot_node = RobotsNode(
fetch_node = FetchNode( fetch_node = FetchNode(
input="url | local_dir", input="url | local_dir",
output=["doc"], output=["doc", "link_urls", "img_urls"],
node_config={ node_config={
"verbose": True, "verbose": True,
"headless": True, "headless": True,

View File

@ -1,79 +0,0 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import MultipleSearchGraph
from scrapegraphai.utils import prettify_exec_info
load_dotenv()
schema= """{
"Job Postings": {
"Company x": [
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
},
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
}
],
"Company y": [
{
"title": "...",
"description": "...",
"location": "...",
"date_posted": "..",
"requirements": ["...", "...", "..."]
}
]
}
}"""
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-4o",
},
"verbose": True,
"headless": False,
"schema": schema,
}
multiple_search_graph = MultipleSearchGraph(
prompt="List me all the projects with their description",
source= [
"https://www.linkedin.com/jobs/machine-learning-engineer-offerte-di-lavoro/?currentJobId=3889037104&originalSubdomain=it",
"https://www.glassdoor.com/Job/italy-machine-learning-engineer-jobs-SRCH_IL.0,5_IN120_KO6,31.html",
"https://it.indeed.com/jobs?q=ML+engineer&vjk=3c2e6d27601ffaaa"
],
config=graph_config,
)
result = multiple_search_graph.run()
print(result)
# ************************************************
# Get graph execution info
# ************************************************
graph_exec_info = multiple_search_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

View File

@ -19,7 +19,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
graph_config = { graph_config = {
"llm": { "llm": {
"api_key": openai_key, "api_key": openai_key,
"model": "gpt-4-turbo", "model": "gpt-4o",
}, },
"verbose": True, "verbose": True,
"headless": True, "headless": True,

View File

@ -20,7 +20,7 @@ graph_config = {
"model": "gpt-4o", "model": "gpt-4o",
}, },
"max_results": 2, "max_results": 2,
"max_images": 5, "max_images": 1,
"verbose": True, "verbose": True,
} }

View File

@ -0,0 +1,41 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os, json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperMultiGraph
load_dotenv()
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key": openai_key,
"model": "gpt-4o",
},
"verbose": True,
"headless": False,
}
# *******************************************************
# Create the SmartScraperMultiGraph instance and run it
# *******************************************************
multiple_search_graph = SmartScraperMultiGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
schema=None,
config=graph_config
)
result = multiple_search_graph.run()
print(json.dumps(result, indent=4))

View File

@ -0,0 +1,59 @@
"""
Basic example of scraping pipeline using SmartScraper
"""
import os, json
from dotenv import load_dotenv
from scrapegraphai.graphs import SmartScraperGraph
load_dotenv()
# ************************************************
# Define the output schema for the graph
# ************************************************
schema= """
{
"Projects": [
"Project #":
{
"title": "...",
"description": "...",
},
"Project #":
{
"title": "...",
"description": "...",
}
]
}
"""
# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")
graph_config = {
"llm": {
"api_key":openai_key,
"model": "gpt-3.5-turbo",
},
"verbose": True,
"headless": False,
}
# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************
smart_scraper_graph = SmartScraperGraph(
prompt="List me all the projects with their description",
source="https://perinim.github.io/projects/",
schema=schema,
config=graph_config
)
result = smart_scraper_graph.run()
print(json.dumps(result, indent=4))

View File

@ -45,6 +45,10 @@ certifi==2024.2.2
# via requests # via requests
charset-normalizer==3.3.2 charset-normalizer==3.3.2
# via requests # via requests
colorama==0.4.6
# via ipython
# via pytest
# via tqdm
dataclasses-json==0.6.6 dataclasses-json==0.6.6
# via langchain # via langchain
# via langchain-community # via langchain-community
@ -100,6 +104,7 @@ graphviz==0.20.3
# via scrapegraphai # via scrapegraphai
greenlet==3.0.3 greenlet==3.0.3
# via playwright # via playwright
# via sqlalchemy
groq==0.5.0 groq==0.5.0
# via langchain-groq # via langchain-groq
grpcio==1.63.0 grpcio==1.63.0
@ -212,8 +217,6 @@ pandas==2.2.2
# via scrapegraphai # via scrapegraphai
parso==0.8.4 parso==0.8.4
# via jedi # via jedi
pexpect==4.9.0
# via ipython
playwright==1.43.0 playwright==1.43.0
# via scrapegraphai # via scrapegraphai
pluggy==1.5.0 pluggy==1.5.0
@ -230,8 +233,6 @@ protobuf==4.25.3
# via googleapis-common-protos # via googleapis-common-protos
# via grpcio-status # via grpcio-status
# via proto-plus # via proto-plus
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2 pure-eval==0.2.2
# via stack-data # via stack-data
pyasn1==0.6.0 pyasn1==0.6.0

View File

@ -45,6 +45,9 @@ certifi==2024.2.2
# via requests # via requests
charset-normalizer==3.3.2 charset-normalizer==3.3.2
# via requests # via requests
colorama==0.4.6
# via ipython
# via tqdm
dataclasses-json==0.6.6 dataclasses-json==0.6.6
# via langchain # via langchain
# via langchain-community # via langchain-community
@ -99,6 +102,7 @@ graphviz==0.20.3
# via scrapegraphai # via scrapegraphai
greenlet==3.0.3 greenlet==3.0.3
# via playwright # via playwright
# via sqlalchemy
groq==0.5.0 groq==0.5.0
# via langchain-groq # via langchain-groq
grpcio==1.63.0 grpcio==1.63.0
@ -208,8 +212,6 @@ pandas==2.2.2
# via scrapegraphai # via scrapegraphai
parso==0.8.4 parso==0.8.4
# via jedi # via jedi
pexpect==4.9.0
# via ipython
playwright==1.43.0 playwright==1.43.0
# via scrapegraphai # via scrapegraphai
prompt-toolkit==3.0.43 prompt-toolkit==3.0.43
@ -224,8 +226,6 @@ protobuf==4.25.3
# via googleapis-common-protos # via googleapis-common-protos
# via grpcio-status # via grpcio-status
# via proto-plus # via proto-plus
ptyprocess==0.7.0
# via pexpect
pure-eval==0.2.2 pure-eval==0.2.2
# via stack-data # via stack-data
pyasn1==0.6.0 pyasn1==0.6.0

View File

@ -15,4 +15,4 @@ from .csv_scraper_graph import CSVScraperGraph
from .pdf_scraper_graph import PDFScraperGraph from .pdf_scraper_graph import PDFScraperGraph
from .omni_scraper_graph import OmniScraperGraph from .omni_scraper_graph import OmniScraperGraph
from .omni_search_graph import OmniSearchGraph from .omni_search_graph import OmniSearchGraph
from .multiple_search_graph import MultipleSearchGraph from .smart_scraper_multi_graph import SmartScraperMultiGraph

View File

@ -7,10 +7,11 @@ from langchain_aws import BedrockEmbeddings
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings from langchain_community.embeddings import HuggingFaceHubEmbeddings, OllamaEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_google_genai import GoogleGenerativeAIEmbeddings
from ..helpers import models_tokens
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from ..helpers import models_tokens
from ..models import AzureOpenAI, Bedrock, Gemini, Groq, HuggingFace, Ollama, OpenAI, Anthropic, DeepSeek
class AbstractGraph(ABC): class AbstractGraph(ABC):
""" """
@ -19,6 +20,7 @@ class AbstractGraph(ABC):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -29,6 +31,7 @@ class AbstractGraph(ABC):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
source (str, optional): The source of the graph. source (str, optional): The source of the graph.
schema (str, optional): The schema for the graph output.
Example: Example:
>>> class MyGraph(AbstractGraph): >>> class MyGraph(AbstractGraph):
@ -40,11 +43,12 @@ class AbstractGraph(ABC):
>>> result = my_graph.run() >>> result = my_graph.run()
""" """
def __init__(self, prompt: str, config: dict, source: Optional[str] = None): def __init__(self, prompt: str, config: dict, source: Optional[str] = None, schema: Optional[str] = None):
self.prompt = prompt self.prompt = prompt
self.source = source self.source = source
self.config = config self.config = config
self.schema = schema
self.llm_model = self._create_llm(config["llm"], chat=True) self.llm_model = self._create_llm(config["llm"], chat=True)
self.embedder_model = self._create_default_embedder(llm_config=config["llm"] self.embedder_model = self._create_default_embedder(llm_config=config["llm"]
) if "embeddings" not in config else self._create_embedder( ) if "embeddings" not in config else self._create_embedder(
@ -61,14 +65,14 @@ class AbstractGraph(ABC):
self.headless = True if config is None else config.get( self.headless = True if config is None else config.get(
"headless", True) "headless", True)
self.loader_kwargs = config.get("loader_kwargs", {}) self.loader_kwargs = config.get("loader_kwargs", {})
self.schema = config.get("schema", None)
common_params = {"headless": self.headless, common_params = {
"verbose": self.verbose, "headless": self.headless,
"loader_kwargs": self.loader_kwargs, "verbose": self.verbose,
"llm_model": self.llm_model, "loader_kwargs": self.loader_kwargs,
"embedder_model": self.embedder_model, "llm_model": self.llm_model,
"schema": self.schema} "embedder_model": self.embedder_model
}
self.set_common_params(common_params, overwrite=False) self.set_common_params(common_params, overwrite=False)

View File

@ -1,14 +1,18 @@
""" """
Module for creating the smart scraper Module for creating the smart scraper
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
RAGNode, RAGNode,
GenerateAnswerCSVNode GenerateAnswerCSVNode
) )
from .abstract_graph import AbstractGraph
class CSVScraperGraph(AbstractGraph): class CSVScraperGraph(AbstractGraph):
@ -17,11 +21,11 @@ class CSVScraperGraph(AbstractGraph):
information from web pages using a natural language model to interpret and answer prompts. information from web pages using a natural language model to interpret and answer prompts.
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
""" """
Initializes the CSVScraperGraph with a prompt, source, and configuration. Initializes the CSVScraperGraph with a prompt, source, and configuration.
""" """
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "csv" if source.endswith("csv") else "csv_dir" self.input_key = "csv" if source.endswith("csv") else "csv_dir"
@ -53,6 +57,7 @@ class CSVScraperGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.schema,
} }
) )

View File

@ -2,7 +2,11 @@
DeepScraperGraph Module DeepScraperGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
SearchLinkNode, SearchLinkNode,
@ -12,7 +16,6 @@ from ..nodes import (
GraphIteratorNode, GraphIteratorNode,
MergeAnswersNode MergeAnswersNode
) )
from .abstract_graph import AbstractGraph
class DeepScraperGraph(AbstractGraph): class DeepScraperGraph(AbstractGraph):
@ -30,15 +33,19 @@ class DeepScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution. verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode. headless (bool): A flag indicating whether to run the graph in headless mode.
Args: Args:
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> deep_scraper = DeepScraperGraph( >>> deep_scraper = DeepScraperGraph(
... "List me all the job titles and detailed job description.", ... "List me all the job titles and detailed job description.",
@ -49,8 +56,10 @@ class DeepScraperGraph(AbstractGraph):
) )
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
super().__init__(prompt, config, source)
super().__init__(prompt, config, source, schema)
self.input_key = "url" if source.startswith("http") else "local_dir" self.input_key = "url" if source.startswith("http") else "local_dir"
def _create_repeated_graph(self) -> BaseGraph: def _create_repeated_graph(self) -> BaseGraph:
@ -84,7 +93,8 @@ class DeepScraperGraph(AbstractGraph):
input="user_prompt & (relevant_chunks | parsed_doc | doc)", input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model "llm_model": self.llm_model,
"schema": self.schema
} }
) )
search_node = SearchLinkNode( search_node = SearchLinkNode(
@ -108,6 +118,7 @@ class DeepScraperGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.schema
} }
) )

View File

@ -2,14 +2,17 @@
JSONScraperGraph Module JSONScraperGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
RAGNode, RAGNode,
GenerateAnswerNode GenerateAnswerNode
) )
from .abstract_graph import AbstractGraph
class JSONScraperGraph(AbstractGraph): class JSONScraperGraph(AbstractGraph):
@ -20,6 +23,7 @@ class JSONScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -30,6 +34,7 @@ class JSONScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> json_scraper = JSONScraperGraph( >>> json_scraper = JSONScraperGraph(
@ -40,8 +45,8 @@ class JSONScraperGraph(AbstractGraph):
>>> result = json_scraper.run() >>> result = json_scraper.run()
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "json" if source.endswith("json") else "json_dir" self.input_key = "json" if source.endswith("json") else "json_dir"
@ -76,7 +81,8 @@ class JSONScraperGraph(AbstractGraph):
input="user_prompt & (relevant_chunks | parsed_doc | doc)", input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model "llm_model": self.llm_model,
"schema": self.schema
} }
) )

View File

@ -2,7 +2,11 @@
OmniScraperGraph Module OmniScraperGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
@ -10,8 +14,8 @@ from ..nodes import (
RAGNode, RAGNode,
GenerateAnswerOmniNode GenerateAnswerOmniNode
) )
from scrapegraphai.models import OpenAIImageToText
from .abstract_graph import AbstractGraph from ..models import OpenAIImageToText
class OmniScraperGraph(AbstractGraph): class OmniScraperGraph(AbstractGraph):
@ -24,6 +28,7 @@ class OmniScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -35,6 +40,7 @@ class OmniScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> omni_scraper = OmniScraperGraph( >>> omni_scraper = OmniScraperGraph(
@ -46,11 +52,11 @@ class OmniScraperGraph(AbstractGraph):
) )
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
self.max_images = 5 if config is None else config.get("max_images", 5) self.max_images = 5 if config is None else config.get("max_images", 5)
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "url" if source.startswith("http") else "local_dir" self.input_key = "url" if source.startswith("http") else "local_dir"
@ -96,7 +102,8 @@ class OmniScraperGraph(AbstractGraph):
input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc", input="user_prompt & (relevant_chunks | parsed_doc | doc) & img_desc",
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model "llm_model": self.llm_model,
"schema": self.schema
} }
) )

View File

@ -3,15 +3,17 @@ OmniSearchGraph Module
""" """
from copy import copy, deepcopy from copy import copy, deepcopy
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .omni_scraper_graph import OmniScraperGraph
from ..nodes import ( from ..nodes import (
SearchInternetNode, SearchInternetNode,
GraphIteratorNode, GraphIteratorNode,
MergeAnswersNode MergeAnswersNode
) )
from .abstract_graph import AbstractGraph
from .omni_scraper_graph import OmniScraperGraph
class OmniSearchGraph(AbstractGraph): class OmniSearchGraph(AbstractGraph):
@ -31,6 +33,7 @@ class OmniSearchGraph(AbstractGraph):
Args: Args:
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
Example: Example:
>>> omni_search_graph = OmniSearchGraph( >>> omni_search_graph = OmniSearchGraph(
@ -40,7 +43,7 @@ class OmniSearchGraph(AbstractGraph):
>>> result = search_graph.run() >>> result = search_graph.run()
""" """
def __init__(self, prompt: str, config: dict): def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):
self.max_results = config.get("max_results", 3) self.max_results = config.get("max_results", 3)
@ -49,7 +52,7 @@ class OmniSearchGraph(AbstractGraph):
else: else:
self.copy_config = deepcopy(config) self.copy_config = deepcopy(config)
super().__init__(prompt, config) super().__init__(prompt, config, schema)
def _create_graph(self) -> BaseGraph: def _create_graph(self) -> BaseGraph:
""" """
@ -94,6 +97,7 @@ class OmniSearchGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.schema
} }
) )

View File

@ -2,14 +2,17 @@
PDFScraperGraph Module PDFScraperGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
RAGNode, RAGNode,
GenerateAnswerNode GenerateAnswerNode
) )
from .abstract_graph import AbstractGraph
class PDFScraperGraph(AbstractGraph): class PDFScraperGraph(AbstractGraph):
@ -21,6 +24,7 @@ class PDFScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -32,6 +36,7 @@ class PDFScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> pdf_scraper = PDFScraperGraph( >>> pdf_scraper = PDFScraperGraph(
@ -42,8 +47,8 @@ class PDFScraperGraph(AbstractGraph):
>>> result = pdf_scraper.run() >>> result = pdf_scraper.run()
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir" self.input_key = "pdf" if source.endswith("pdf") else "pdf_dir"
@ -79,6 +84,7 @@ class PDFScraperGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.schema,
} }
) )

View File

@ -2,13 +2,16 @@
ScriptCreatorGraph Module ScriptCreatorGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
GenerateScraperNode GenerateScraperNode
) )
from .abstract_graph import AbstractGraph
class ScriptCreatorGraph(AbstractGraph): class ScriptCreatorGraph(AbstractGraph):
@ -19,6 +22,7 @@ class ScriptCreatorGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -31,6 +35,7 @@ class ScriptCreatorGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> script_creator = ScriptCreatorGraph( >>> script_creator = ScriptCreatorGraph(
@ -41,11 +46,11 @@ class ScriptCreatorGraph(AbstractGraph):
>>> result = script_creator.run() >>> result = script_creator.run()
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
self.library = config['library'] self.library = config['library']
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "url" if source.startswith("http") else "local_dir" self.input_key = "url" if source.startswith("http") else "local_dir"
@ -65,14 +70,16 @@ class ScriptCreatorGraph(AbstractGraph):
input="doc", input="doc",
output=["parsed_doc"], output=["parsed_doc"],
node_config={"chunk_size": self.model_token, node_config={"chunk_size": self.model_token,
"verbose": self.verbose,
"parse_html": False "parse_html": False
} }
) )
generate_scraper_node = GenerateScraperNode( generate_scraper_node = GenerateScraperNode(
input="user_prompt & (doc)", input="user_prompt & (doc)",
output=["answer"], output=["answer"],
node_config={"llm_model": self.llm_model}, node_config={
"llm_model": self.llm_model,
"schema": self.schema,
},
library=self.library, library=self.library,
website=self.source website=self.source
) )

View File

@ -3,15 +3,17 @@ SearchGraph Module
""" """
from copy import copy, deepcopy from copy import copy, deepcopy
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .smart_scraper_graph import SmartScraperGraph
from ..nodes import ( from ..nodes import (
SearchInternetNode, SearchInternetNode,
GraphIteratorNode, GraphIteratorNode,
MergeAnswersNode MergeAnswersNode
) )
from .abstract_graph import AbstractGraph
from .smart_scraper_graph import SmartScraperGraph
class SearchGraph(AbstractGraph): class SearchGraph(AbstractGraph):
@ -30,6 +32,7 @@ class SearchGraph(AbstractGraph):
Args: Args:
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
Example: Example:
>>> search_graph = SearchGraph( >>> search_graph = SearchGraph(
@ -39,7 +42,7 @@ class SearchGraph(AbstractGraph):
>>> result = search_graph.run() >>> result = search_graph.run()
""" """
def __init__(self, prompt: str, config: dict): def __init__(self, prompt: str, config: dict, schema: Optional[str] = None):
self.max_results = config.get("max_results", 3) self.max_results = config.get("max_results", 3)
@ -48,7 +51,7 @@ class SearchGraph(AbstractGraph):
else: else:
self.copy_config = deepcopy(config) self.copy_config = deepcopy(config)
super().__init__(prompt, config) super().__init__(prompt, config, schema)
def _create_graph(self) -> BaseGraph: def _create_graph(self) -> BaseGraph:
""" """
@ -93,6 +96,7 @@ class SearchGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.schema
} }
) )

View File

@ -2,14 +2,17 @@
SmartScraperGraph Module SmartScraperGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
RAGNode, RAGNode,
GenerateAnswerNode GenerateAnswerNode
) )
from .abstract_graph import AbstractGraph
class SmartScraperGraph(AbstractGraph): class SmartScraperGraph(AbstractGraph):
@ -22,6 +25,7 @@ class SmartScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -32,6 +36,7 @@ class SmartScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> smart_scraper = SmartScraperGraph( >>> smart_scraper = SmartScraperGraph(
@ -43,8 +48,8 @@ class SmartScraperGraph(AbstractGraph):
) )
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "url" if source.startswith("http") else "local_dir" self.input_key = "url" if source.startswith("http") else "local_dir"
@ -82,7 +87,7 @@ class SmartScraperGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.config.get("schema", None), "schema": self.schema,
} }
) )

View File

@ -1,25 +1,25 @@
""" """
MultipleSearchGraph Module SmartScraperMultiGraph Module
""" """
from copy import copy, deepcopy from copy import copy, deepcopy
from typing import List, Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from .smart_scraper_graph import SmartScraperGraph
from ..nodes import ( from ..nodes import (
GraphIteratorNode, GraphIteratorNode,
MergeAnswersNode, MergeAnswersNode,
KnowledgeGraphNode KnowledgeGraphNode
) )
from .abstract_graph import AbstractGraph
from .smart_scraper_graph import SmartScraperGraph
from typing import List, Optional
class MultipleSearchGraph(AbstractGraph): class SmartScraperMultiGraph(AbstractGraph):
""" """
MultipleSearchGraph is a scraping pipeline that searches the internet for answers to a given prompt. SmartScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
It only requires a user prompt to search the internet and generate an answer. It only requires a user prompt and a list of URLs.
Attributes: Attributes:
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
@ -31,7 +31,9 @@ class MultipleSearchGraph(AbstractGraph):
Args: Args:
prompt (str): The user prompt to search the internet. prompt (str): The user prompt to search the internet.
source (List[str]): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (Optional[str]): The schema for the graph output.
Example: Example:
>>> search_graph = MultipleSearchGraph( >>> search_graph = MultipleSearchGraph(
@ -41,7 +43,7 @@ class MultipleSearchGraph(AbstractGraph):
>>> result = search_graph.run() >>> result = search_graph.run()
""" """
def __init__(self, prompt: str, source: List[str], config: dict): def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[str] = None):
self.max_results = config.get("max_results", 3) self.max_results = config.get("max_results", 3)
@ -50,7 +52,7 @@ class MultipleSearchGraph(AbstractGraph):
else: else:
self.copy_config = deepcopy(config) self.copy_config = deepcopy(config)
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
def _create_graph(self) -> BaseGraph: def _create_graph(self) -> BaseGraph:
""" """
@ -87,15 +89,7 @@ class MultipleSearchGraph(AbstractGraph):
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model, "llm_model": self.llm_model,
"schema": self.config.get("schema", None), "schema": self.schema
}
)
knowledge_graph_node = KnowledgeGraphNode(
input="user_prompt & answer",
output=["kg"],
node_config={
"llm_model": self.llm_model,
} }
) )
@ -103,11 +97,9 @@ class MultipleSearchGraph(AbstractGraph):
nodes=[ nodes=[
graph_iterator_node, graph_iterator_node,
merge_answers_node, merge_answers_node,
knowledge_graph_node
], ],
edges=[ edges=[
(graph_iterator_node, merge_answers_node), (graph_iterator_node, merge_answers_node),
(merge_answers_node, knowledge_graph_node)
], ],
entry_point=graph_iterator_node entry_point=graph_iterator_node
) )

View File

@ -2,9 +2,11 @@
SpeechGraph Module SpeechGraph Module
""" """
from scrapegraphai.utils.save_audio_from_bytes import save_audio_from_bytes from typing import Optional
from ..models import OpenAITextToSpeech
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
@ -12,7 +14,9 @@ from ..nodes import (
GenerateAnswerNode, GenerateAnswerNode,
TextToSpeechNode, TextToSpeechNode,
) )
from .abstract_graph import AbstractGraph
from ..utils.save_audio_from_bytes import save_audio_from_bytes
from ..models import OpenAITextToSpeech
class SpeechGraph(AbstractGraph): class SpeechGraph(AbstractGraph):
@ -23,6 +27,7 @@ class SpeechGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, configured for generating embeddings. embedder_model: An instance of an embedding model client, configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution. verbose (bool): A flag indicating whether to show print statements during execution.
@ -33,6 +38,7 @@ class SpeechGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> speech_graph = SpeechGraph( >>> speech_graph = SpeechGraph(
@ -41,8 +47,8 @@ class SpeechGraph(AbstractGraph):
... {"llm": {"model": "gpt-3.5-turbo"}} ... {"llm": {"model": "gpt-3.5-turbo"}}
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "url" if source.startswith("http") else "local_dir" self.input_key = "url" if source.startswith("http") else "local_dir"
@ -76,7 +82,8 @@ class SpeechGraph(AbstractGraph):
input="user_prompt & (relevant_chunks | parsed_doc | doc)", input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model "llm_model": self.llm_model,
"schema": self.schema
} }
) )
text_to_speech_node = TextToSpeechNode( text_to_speech_node = TextToSpeechNode(

View File

@ -2,14 +2,17 @@
XMLScraperGraph Module XMLScraperGraph Module
""" """
from typing import Optional
from .base_graph import BaseGraph from .base_graph import BaseGraph
from .abstract_graph import AbstractGraph
from ..nodes import ( from ..nodes import (
FetchNode, FetchNode,
ParseNode, ParseNode,
RAGNode, RAGNode,
GenerateAnswerNode GenerateAnswerNode
) )
from .abstract_graph import AbstractGraph
class XMLScraperGraph(AbstractGraph): class XMLScraperGraph(AbstractGraph):
@ -21,6 +24,7 @@ class XMLScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers. llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client, embedder_model: An instance of an embedding model client,
configured for generating embeddings. configured for generating embeddings.
@ -32,6 +36,7 @@ class XMLScraperGraph(AbstractGraph):
prompt (str): The prompt for the graph. prompt (str): The prompt for the graph.
source (str): The source of the graph. source (str): The source of the graph.
config (dict): Configuration parameters for the graph. config (dict): Configuration parameters for the graph.
schema (str): The schema for the graph output.
Example: Example:
>>> xml_scraper = XMLScraperGraph( >>> xml_scraper = XMLScraperGraph(
@ -42,8 +47,8 @@ class XMLScraperGraph(AbstractGraph):
>>> result = xml_scraper.run() >>> result = xml_scraper.run()
""" """
def __init__(self, prompt: str, source: str, config: dict): def __init__(self, prompt: str, source: str, config: dict, schema: Optional[str] = None):
super().__init__(prompt, config, source) super().__init__(prompt, config, source, schema)
self.input_key = "xml" if source.endswith("xml") else "xml_dir" self.input_key = "xml" if source.endswith("xml") else "xml_dir"
@ -78,7 +83,8 @@ class XMLScraperGraph(AbstractGraph):
input="user_prompt & (relevant_chunks | parsed_doc | doc)", input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"], output=["answer"],
node_config={ node_config={
"llm_model": self.llm_model "llm_model": self.llm_model,
"schema": self.schema
} }
) )

View File

@ -1,6 +1,7 @@
""" """
Module for implementing the conditional node Module for implementing the conditional node
""" """
from .base_node import BaseNode from .base_node import BaseNode

View File

@ -10,10 +10,9 @@ from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel from langchain_core.runnables import RunnableParallel
from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
# Imports from the library # Imports from the library
from .base_node import BaseNode from .base_node import BaseNode
from ..helpers.generate_answer_node_csv_prompts import template_chunks_csv, template_no_chunks_csv, template_merge_csv
class GenerateAnswerCSVNode(BaseNode): class GenerateAnswerCSVNode(BaseNode):

View File

@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
from .base_node import BaseNode from .base_node import BaseNode
from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema
class GenerateAnswerNode(BaseNode): class GenerateAnswerNode(BaseNode):
""" """
A node that generates an answer using a large language model (LLM) based on the user's input A node that generates an answer using a large language model (LLM) based on the user's input

View File

@ -15,6 +15,7 @@ from langchain_core.runnables import RunnableParallel
from .base_node import BaseNode from .base_node import BaseNode
from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni from ..helpers.generate_answer_node_omni_prompts import template_no_chunk_omni, template_chunks_omni, template_merge_omni
class GenerateAnswerOmniNode(BaseNode): class GenerateAnswerOmniNode(BaseNode):
""" """
A node that generates an answer using a large language model (LLM) based on the user's input A node that generates an answer using a large language model (LLM) based on the user's input

View File

@ -14,6 +14,7 @@ from langchain_core.runnables import RunnableParallel
from .base_node import BaseNode from .base_node import BaseNode
from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf from ..helpers.generate_answer_node_pdf_prompts import template_chunks_pdf, template_no_chunks_pdf, template_merge_pdf
class GenerateAnswerPDFNode(BaseNode): class GenerateAnswerPDFNode(BaseNode):
""" """
A node that generates an answer using a language model (LLM) based on the user's input A node that generates an answer using a language model (LLM) based on the user's input

View File

@ -10,7 +10,6 @@ from tqdm.asyncio import tqdm
from .base_node import BaseNode from .base_node import BaseNode
_default_batchsize = 16 _default_batchsize = 16

View File

@ -14,6 +14,7 @@ from langchain_core.output_parsers import JsonOutputParser
from .base_node import BaseNode from .base_node import BaseNode
from ..utils import create_graph, create_interactive_graph from ..utils import create_graph, create_interactive_graph
class KnowledgeGraphNode(BaseNode): class KnowledgeGraphNode(BaseNode):
""" """
A node responsible for generating a knowledge graph from a dictionary. A node responsible for generating a knowledge graph from a dictionary.

View File

@ -3,8 +3,10 @@ ParseNode Module
""" """
from typing import List, Optional from typing import List, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import Html2TextTransformer from langchain_community.document_transformers import Html2TextTransformer
from .base_node import BaseNode from .base_node import BaseNode

View File

@ -3,6 +3,7 @@ RAGNode Module
""" """
from typing import List, Optional from typing import List, Optional
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline

View File

@ -4,9 +4,11 @@ RobotsNode Module
from typing import List, Optional from typing import List, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
from langchain_community.document_loaders import AsyncChromiumLoader from langchain_community.document_loaders import AsyncChromiumLoader
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
from langchain.output_parsers import CommaSeparatedListOutputParser from langchain.output_parsers import CommaSeparatedListOutputParser
from .base_node import BaseNode from .base_node import BaseNode
from ..helpers import robots_dictionary from ..helpers import robots_dictionary

View File

@ -3,8 +3,10 @@ SearchInternetNode Module
""" """
from typing import List, Optional from typing import List, Optional
from langchain.output_parsers import CommaSeparatedListOutputParser from langchain.output_parsers import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
from ..utils.research_web import search_on_web from ..utils.research_web import search_on_web
from .base_node import BaseNode from .base_node import BaseNode

View File

@ -6,7 +6,6 @@ SearchLinkNode Module
from typing import List, Optional from typing import List, Optional
from tqdm import tqdm from tqdm import tqdm
# Imports from Langchain # Imports from Langchain
from langchain.prompts import PromptTemplate from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser from langchain_core.output_parsers import JsonOutputParser