diff --git a/examples/custom_graph_rag_example.py b/examples/custom_graph_rag_example.py deleted file mode 100644 index 003d710d..00000000 --- a/examples/custom_graph_rag_example.py +++ /dev/null @@ -1,48 +0,0 @@ -""" -Example of custom graph using existing nodes -""" - -import os -from dotenv import load_dotenv -from scrapegraphai.models import OpenAI -from scrapegraphai.graphs import BaseGraph -from scrapegraphai.nodes import FetchHTMLNode, RAGNode, GenerateAnswerNode - -load_dotenv() - -# Define the configuration for the language model -openai_key = os.getenv("OPENAI_APIKEY") -llm_config = { - "api_key": openai_key, - "model_name": "gpt-3.5-turbo", - "temperature": 0, - "streaming": True -} -model = OpenAI(llm_config) - -# define the nodes for the graph -fetch_html_node = FetchHTMLNode("fetch_html") -rag_node = RAGNode(model, "rag") -generate_answer_node = GenerateAnswerNode(model, "generate_answer") - -# create the graph -graph = BaseGraph( - nodes={ - fetch_html_node, - rag_node, - generate_answer_node - }, - edges={ - (fetch_html_node, rag_node), - (rag_node, generate_answer_node) - }, - entry_point=fetch_html_node -) - -# execute the graph -inputs = {"user_input": "Give me the summary of the page", "url": "https://python.langchain.com/docs/expression_language/how_to/map"} -result = graph.execute(inputs) - -# get the answer from the result -answer = result.get("answer", "No answer found.") -print(answer) diff --git a/examples/example_multichunks.py b/examples/example_multichunks.py deleted file mode 100644 index ca524359..00000000 --- a/examples/example_multichunks.py +++ /dev/null @@ -1,80 +0,0 @@ -from langchain_community.document_transformers import Html2TextTransformer -from langchain_community.document_loaders import AsyncHtmlLoader -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.docstore.document import Document -from langchain_openai import OpenAIEmbeddings -import os -from dotenv import load_dotenv - -from langchain.retrievers import ContextualCompressionRetriever -from langchain.retrievers.document_compressors import EmbeddingsFilter -from langchain.retrievers.document_compressors import DocumentCompressorPipeline -from langchain_community.document_transformers import EmbeddingsRedundantFilter -from langchain_openai import OpenAI -from langchain_community.vectorstores import FAISS - -load_dotenv() - -# Helper function for printing docs - -def pretty_print_docs(docs): - print( - f"\n{'-' * 100}\n".join( - [f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)] - ) - ) - -# Define the configuration for the language model -openai_key = os.getenv("OPENAI_APIKEY") - -# chroma = Chroma('test', OpenAIEmbeddings(api_key=openai_key)) - -# html2text 2020.1.16 -urls = ["https://www.mymovies.it/cinema/roma"] -loader = AsyncHtmlLoader(urls) -docs = loader.load() -text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder( - chunk_size=4000, - chunk_overlap=200, -) - -docs_transformed = Html2TextTransformer().transform_documents(docs) - -doc = docs[0] - -chunks = text_splitter.split_text(doc.page_content) - -chunked_docs = [] - -for i, chunk in enumerate(chunks): - doc = Document( - page_content=chunk, - metadata={ - "chunk": i + 1, - }, - ) - chunked_docs.append(doc) - -retriever = FAISS.from_documents(chunked_docs, OpenAIEmbeddings(api_key=openai_key)).as_retriever() - -embeddings = OpenAIEmbeddings(api_key=openai_key) # could be any embedding of your choice -embeddings_filter = EmbeddingsFilter(embeddings=embeddings) -redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings) -relevant_filter = EmbeddingsFilter(embeddings=embeddings) # similarity_threshold could be set, now k=20 -pipeline_compressor = DocumentCompressorPipeline( - transformers=[redundant_filter, relevant_filter] -) - -compression_retriever = ContextualCompressionRetriever( - base_compressor=pipeline_compressor, base_retriever=retriever -) - -compressed_docs = compression_retriever.get_relevant_documents( - "Dammi i nomi dei cinema in provincia di Roma" -) - -pretty_print_docs(compressed_docs) - -# db = Chroma.from_documents(chunked_docs, OpenAIEmbeddings(api_key=openai_key)) -# chroma.similarity_search_with_relevance_scores('Find the cinema name', 10) - diff --git a/examples/smart_scraper_example.py b/examples/smart_scraper_example.py index e9911126..a1b742c9 100644 --- a/examples/smart_scraper_example.py +++ b/examples/smart_scraper_example.py @@ -15,8 +15,12 @@ llm_config = { "model_name": "gpt-3.5-turbo", } -smart_scraper_graph = SmartScraperGraph("List me all the titles and project descriptions", - "https://perinim.github.io/projects/", llm_config) +# Define URL and prompt +url = "https://perinim.github.io/projects/" +prompt = "List me all the titles and project descriptions" + +# Create the SmartScraperGraph instance +smart_scraper_graph = SmartScraperGraph(prompt, url, llm_config) answer = smart_scraper_graph.run() print(answer) diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index 9bde10d1..160e1d64 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -2,10 +2,8 @@ from ..models import OpenAI from .base_graph import BaseGraph from ..nodes import ( FetchHTMLNode, - ConditionalNode, - GetProbableTagsNode, - GenerateAnswerNode, - ParseHTMLNode + RAGNode, + GenerateAnswerNode ) @@ -74,25 +72,18 @@ class SmartScraperGraph: BaseGraph: An instance of the BaseGraph class. """ fetch_html_node = FetchHTMLNode("fetch_html") - get_probable_tags_node = GetProbableTagsNode( - self.llm, "get_probable_tags") - parse_document_node = ParseHTMLNode("parse_document") + rag_node = RAGNode(self.llm, "rag") generate_answer_node = GenerateAnswerNode(self.llm, "generate_answer") - conditional_node = ConditionalNode( - "conditional", [parse_document_node, generate_answer_node]) return BaseGraph( nodes={ fetch_html_node, - get_probable_tags_node, - conditional_node, - parse_document_node, + rag_node, generate_answer_node, }, edges={ - (fetch_html_node, get_probable_tags_node), - (get_probable_tags_node, conditional_node), - (parse_document_node, generate_answer_node) + (fetch_html_node, rag_node), + (rag_node, generate_answer_node) }, entry_point=fetch_html_node )