diff --git a/examples/openai/multiple_search_openai.py b/examples/openai/multiple_search_openai.py index dbeecf77..abc70803 100644 --- a/examples/openai/multiple_search_openai.py +++ b/examples/openai/multiple_search_openai.py @@ -49,7 +49,7 @@ openai_key = os.getenv("OPENAI_APIKEY") graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py index b7903acf..88ded8b5 100644 --- a/examples/openai/smart_scraper_openai.py +++ b/examples/openai/smart_scraper_openai.py @@ -32,7 +32,8 @@ graph_config = { smart_scraper_graph = SmartScraperGraph( prompt="List me all the projects with their description", # also accepts a string with the already downloaded HTML code - source="https://perinim.github.io/projects/" + source="https://perinim.github.io/projects/", + config=graph_config, ) result = smart_scraper_graph.run() diff --git a/requirements-dev.lock b/requirements-dev.lock index bcfe71ce..84a8a445 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -45,6 +45,10 @@ certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests +colorama==0.4.6 + # via ipython + # via pytest + # via tqdm dataclasses-json==0.6.6 # via langchain # via langchain-community @@ -100,6 +104,7 @@ graphviz==0.20.3 # via scrapegraphai greenlet==3.0.3 # via playwright + # via sqlalchemy groq==0.5.0 # via langchain-groq grpcio==1.63.0 @@ -212,8 +217,6 @@ pandas==2.2.2 # via scrapegraphai parso==0.8.4 # via jedi -pexpect==4.9.0 - # via ipython playwright==1.43.0 # via scrapegraphai pluggy==1.5.0 @@ -230,8 +233,6 @@ protobuf==4.25.3 # via googleapis-common-protos # via grpcio-status # via proto-plus -ptyprocess==0.7.0 - # via pexpect pure-eval==0.2.2 # via stack-data pyasn1==0.6.0 diff --git a/requirements.lock b/requirements.lock index 1176355d..f33598cf 100644 --- a/requirements.lock +++ b/requirements.lock @@ -45,6 +45,9 @@ certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests +colorama==0.4.6 + # via ipython + # via tqdm dataclasses-json==0.6.6 # via langchain # via langchain-community @@ -99,6 +102,7 @@ graphviz==0.20.3 # via scrapegraphai greenlet==3.0.3 # via playwright + # via sqlalchemy groq==0.5.0 # via langchain-groq grpcio==1.63.0 @@ -208,8 +212,6 @@ pandas==2.2.2 # via scrapegraphai parso==0.8.4 # via jedi -pexpect==4.9.0 - # via ipython playwright==1.43.0 # via scrapegraphai prompt-toolkit==3.0.43 @@ -224,8 +226,6 @@ protobuf==4.25.3 # via googleapis-common-protos # via grpcio-status # via proto-plus -ptyprocess==0.7.0 - # via pexpect pure-eval==0.2.2 # via stack-data pyasn1==0.6.0 diff --git a/scrapegraphai/helpers/__init__.py b/scrapegraphai/helpers/__init__.py index 0e43214f..a1981544 100644 --- a/scrapegraphai/helpers/__init__.py +++ b/scrapegraphai/helpers/__init__.py @@ -7,6 +7,6 @@ from .schemas import graph_schema from .models_tokens import models_tokens from .robots import robots_dictionary from .generate_answer_node_prompts import * -from .generate_answer_node_csv_prompts import * -from .generate_answer_node_pdf_prompts import * -from .generate_answer_node_omni_prompts import * +# from .generate_answer_node_csv_prompts import * +# from .generate_answer_node_pdf_prompts import * +# from .generate_answer_node_omni_prompts import * diff --git a/scrapegraphai/helpers/generate_answer_node_prompts.py b/scrapegraphai/helpers/generate_answer_node_prompts.py index 09422862..a9bcdf28 100644 --- a/scrapegraphai/helpers/generate_answer_node_prompts.py +++ b/scrapegraphai/helpers/generate_answer_node_prompts.py @@ -1,7 +1,7 @@ """ Generate answer node prompts """ -template_chunks_gen_answ = """ +template_chunks = """ You are a website scraper and you have just scraped the following content from a website. You are now asked to answer a user question about the content you have scraped.\n @@ -12,7 +12,7 @@ Output instructions: {format_instructions}\n Content of {chunk_id}: {context}. \n """ -template_chunks_with_schema_gen_answ = """ +template_chunks_with_schema = """ You are a website scraper and you have just scraped the following content from a website. You are now asked to answer a user question about the content you have scraped.\n @@ -24,7 +24,7 @@ Output instructions: {format_instructions}\n Content of {chunk_id}: {context}. \n """ -template_no_chunks_gen_answ = """ +template_no_chunks = """ You are a website scraper and you have just scraped the following content from a website. You are now asked to answer a user question about the content you have scraped.\n @@ -35,7 +35,7 @@ User question: {question}\n Website content: {context}\n """ -template_no_chunks_with_schema_gen_answ = """ +template_no_chunks_with_schema = """ You are a website scraper and you have just scraped the following content from a website. You are now asked to answer a user question about the content you have scraped.\n @@ -48,7 +48,7 @@ Website content: {context}\n """ -template_merge_gen_answ = """ +template_merge = """ You are a website scraper and you have just scraped the following content from a website. You are now asked to answer a user question about the content you have scraped.\n diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py index 9f5b52d0..701e23d4 100644 --- a/scrapegraphai/nodes/generate_answer_node.py +++ b/scrapegraphai/nodes/generate_answer_node.py @@ -13,7 +13,7 @@ from langchain_core.runnables import RunnableParallel # Imports from the library from .base_node import BaseNode -from ..helpers import template_chunks_gen_answ, template_no_chunks_gen_answ, template_merge_gen_answ, template_chunks_with_schema_gen_answ, template_chunks_with_schema_gen_answ +from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema class GenerateAnswerNode(BaseNode): """ @@ -77,13 +77,13 @@ class GenerateAnswerNode(BaseNode): for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)): if self.node_config["schema"] is None and len(doc) == 1: prompt = PromptTemplate( - template=template_no_chunks_gen_answ, + template=template_no_chunks, input_variables=["question"], partial_variables={"context": chunk.page_content, "format_instructions": format_instructions}) elif self.node_config["schema"] is not None and len(doc) == 1: prompt = PromptTemplate( - template=template_chunks_with_schema_gen_answ, + template=template_no_chunks_with_schema, input_variables=["question"], partial_variables={"context": chunk.page_content, "format_instructions": format_instructions, @@ -91,14 +91,14 @@ class GenerateAnswerNode(BaseNode): }) elif self.node_config["schema"] is None and len(doc) > 1: prompt = PromptTemplate( - template=template_chunks_gen_answ, + template=template_chunks, input_variables=["question"], partial_variables={"context": chunk.page_content, "chunk_id": i + 1, "format_instructions": format_instructions}) elif self.node_config["schema"] is not None and len(doc) > 1: prompt = PromptTemplate( - template=template_chunks_with_schema_gen_answ, + template=template_chunks_with_schema, input_variables=["question"], partial_variables={"context": chunk.page_content, "chunk_id": i + 1, @@ -116,7 +116,7 @@ class GenerateAnswerNode(BaseNode): answer = map_chain.invoke({"question": user_prompt}) # Merge the answers from the chunks merge_prompt = PromptTemplate( - template=template_merge_gen_answ, + template=template_merge, input_variables=["context", "question"], partial_variables={"format_instructions": format_instructions}, )