mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
feat(multiple): quick fix working
This commit is contained in:
parent
ff53771f87
commit
58cc903d55
@ -49,7 +49,7 @@ openai_key = os.getenv("OPENAI_APIKEY")
|
||||
graph_config = {
|
||||
"llm": {
|
||||
"api_key": openai_key,
|
||||
"model": "gpt-3.5-turbo",
|
||||
"model": "gpt-4o",
|
||||
},
|
||||
"verbose": True,
|
||||
"headless": False,
|
||||
|
||||
@ -32,7 +32,8 @@ graph_config = {
|
||||
smart_scraper_graph = SmartScraperGraph(
|
||||
prompt="List me all the projects with their description",
|
||||
# also accepts a string with the already downloaded HTML code
|
||||
source="https://perinim.github.io/projects/"
|
||||
source="https://perinim.github.io/projects/",
|
||||
config=graph_config,
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
|
||||
@ -45,6 +45,10 @@ certifi==2024.2.2
|
||||
# via requests
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
colorama==0.4.6
|
||||
# via ipython
|
||||
# via pytest
|
||||
# via tqdm
|
||||
dataclasses-json==0.6.6
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
@ -100,6 +104,7 @@ graphviz==0.20.3
|
||||
# via scrapegraphai
|
||||
greenlet==3.0.3
|
||||
# via playwright
|
||||
# via sqlalchemy
|
||||
groq==0.5.0
|
||||
# via langchain-groq
|
||||
grpcio==1.63.0
|
||||
@ -212,8 +217,6 @@ pandas==2.2.2
|
||||
# via scrapegraphai
|
||||
parso==0.8.4
|
||||
# via jedi
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
playwright==1.43.0
|
||||
# via scrapegraphai
|
||||
pluggy==1.5.0
|
||||
@ -230,8 +233,6 @@ protobuf==4.25.3
|
||||
# via googleapis-common-protos
|
||||
# via grpcio-status
|
||||
# via proto-plus
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.2
|
||||
# via stack-data
|
||||
pyasn1==0.6.0
|
||||
|
||||
@ -45,6 +45,9 @@ certifi==2024.2.2
|
||||
# via requests
|
||||
charset-normalizer==3.3.2
|
||||
# via requests
|
||||
colorama==0.4.6
|
||||
# via ipython
|
||||
# via tqdm
|
||||
dataclasses-json==0.6.6
|
||||
# via langchain
|
||||
# via langchain-community
|
||||
@ -99,6 +102,7 @@ graphviz==0.20.3
|
||||
# via scrapegraphai
|
||||
greenlet==3.0.3
|
||||
# via playwright
|
||||
# via sqlalchemy
|
||||
groq==0.5.0
|
||||
# via langchain-groq
|
||||
grpcio==1.63.0
|
||||
@ -208,8 +212,6 @@ pandas==2.2.2
|
||||
# via scrapegraphai
|
||||
parso==0.8.4
|
||||
# via jedi
|
||||
pexpect==4.9.0
|
||||
# via ipython
|
||||
playwright==1.43.0
|
||||
# via scrapegraphai
|
||||
prompt-toolkit==3.0.43
|
||||
@ -224,8 +226,6 @@ protobuf==4.25.3
|
||||
# via googleapis-common-protos
|
||||
# via grpcio-status
|
||||
# via proto-plus
|
||||
ptyprocess==0.7.0
|
||||
# via pexpect
|
||||
pure-eval==0.2.2
|
||||
# via stack-data
|
||||
pyasn1==0.6.0
|
||||
|
||||
@ -7,6 +7,6 @@ from .schemas import graph_schema
|
||||
from .models_tokens import models_tokens
|
||||
from .robots import robots_dictionary
|
||||
from .generate_answer_node_prompts import *
|
||||
from .generate_answer_node_csv_prompts import *
|
||||
from .generate_answer_node_pdf_prompts import *
|
||||
from .generate_answer_node_omni_prompts import *
|
||||
# from .generate_answer_node_csv_prompts import *
|
||||
# from .generate_answer_node_pdf_prompts import *
|
||||
# from .generate_answer_node_omni_prompts import *
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
"""
|
||||
Generate answer node prompts
|
||||
"""
|
||||
template_chunks_gen_answ = """
|
||||
template_chunks = """
|
||||
You are a website scraper and you have just scraped the
|
||||
following content from a website.
|
||||
You are now asked to answer a user question about the content you have scraped.\n
|
||||
@ -12,7 +12,7 @@ Output instructions: {format_instructions}\n
|
||||
Content of {chunk_id}: {context}. \n
|
||||
"""
|
||||
|
||||
template_chunks_with_schema_gen_answ = """
|
||||
template_chunks_with_schema = """
|
||||
You are a website scraper and you have just scraped the
|
||||
following content from a website.
|
||||
You are now asked to answer a user question about the content you have scraped.\n
|
||||
@ -24,7 +24,7 @@ Output instructions: {format_instructions}\n
|
||||
Content of {chunk_id}: {context}. \n
|
||||
"""
|
||||
|
||||
template_no_chunks_gen_answ = """
|
||||
template_no_chunks = """
|
||||
You are a website scraper and you have just scraped the
|
||||
following content from a website.
|
||||
You are now asked to answer a user question about the content you have scraped.\n
|
||||
@ -35,7 +35,7 @@ User question: {question}\n
|
||||
Website content: {context}\n
|
||||
"""
|
||||
|
||||
template_no_chunks_with_schema_gen_answ = """
|
||||
template_no_chunks_with_schema = """
|
||||
You are a website scraper and you have just scraped the
|
||||
following content from a website.
|
||||
You are now asked to answer a user question about the content you have scraped.\n
|
||||
@ -48,7 +48,7 @@ Website content: {context}\n
|
||||
"""
|
||||
|
||||
|
||||
template_merge_gen_answ = """
|
||||
template_merge = """
|
||||
You are a website scraper and you have just scraped the
|
||||
following content from a website.
|
||||
You are now asked to answer a user question about the content you have scraped.\n
|
||||
|
||||
@ -13,7 +13,7 @@ from langchain_core.runnables import RunnableParallel
|
||||
|
||||
# Imports from the library
|
||||
from .base_node import BaseNode
|
||||
from ..helpers import template_chunks_gen_answ, template_no_chunks_gen_answ, template_merge_gen_answ, template_chunks_with_schema_gen_answ, template_chunks_with_schema_gen_answ
|
||||
from ..helpers import template_chunks, template_no_chunks, template_merge, template_chunks_with_schema, template_no_chunks_with_schema
|
||||
|
||||
class GenerateAnswerNode(BaseNode):
|
||||
"""
|
||||
@ -77,13 +77,13 @@ class GenerateAnswerNode(BaseNode):
|
||||
for i, chunk in enumerate(tqdm(doc, desc="Processing chunks", disable=not self.verbose)):
|
||||
if self.node_config["schema"] is None and len(doc) == 1:
|
||||
prompt = PromptTemplate(
|
||||
template=template_no_chunks_gen_answ,
|
||||
template=template_no_chunks,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk.page_content,
|
||||
"format_instructions": format_instructions})
|
||||
elif self.node_config["schema"] is not None and len(doc) == 1:
|
||||
prompt = PromptTemplate(
|
||||
template=template_chunks_with_schema_gen_answ,
|
||||
template=template_no_chunks_with_schema,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk.page_content,
|
||||
"format_instructions": format_instructions,
|
||||
@ -91,14 +91,14 @@ class GenerateAnswerNode(BaseNode):
|
||||
})
|
||||
elif self.node_config["schema"] is None and len(doc) > 1:
|
||||
prompt = PromptTemplate(
|
||||
template=template_chunks_gen_answ,
|
||||
template=template_chunks,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk.page_content,
|
||||
"chunk_id": i + 1,
|
||||
"format_instructions": format_instructions})
|
||||
elif self.node_config["schema"] is not None and len(doc) > 1:
|
||||
prompt = PromptTemplate(
|
||||
template=template_chunks_with_schema_gen_answ,
|
||||
template=template_chunks_with_schema,
|
||||
input_variables=["question"],
|
||||
partial_variables={"context": chunk.page_content,
|
||||
"chunk_id": i + 1,
|
||||
@ -116,7 +116,7 @@ class GenerateAnswerNode(BaseNode):
|
||||
answer = map_chain.invoke({"question": user_prompt})
|
||||
# Merge the answers from the chunks
|
||||
merge_prompt = PromptTemplate(
|
||||
template=template_merge_gen_answ,
|
||||
template=template_merge,
|
||||
input_variables=["context", "question"],
|
||||
partial_variables={"format_instructions": format_instructions},
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user