mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-01 21:00:48 +08:00
fix: search graph
This commit is contained in:
parent
a9569ac08f
commit
d4b26796d9
@ -96,7 +96,17 @@ class MergeAnswersNode(BaseNode):
|
|||||||
|
|
||||||
merge_chain = prompt_template | self.llm_model | output_parser
|
merge_chain = prompt_template | self.llm_model | output_parser
|
||||||
answer = merge_chain.invoke({"user_prompt": user_prompt})
|
answer = merge_chain.invoke({"user_prompt": user_prompt})
|
||||||
answer["sources"] = state.get("urls", [])
|
|
||||||
|
# Get the URLs from the state, ensuring we get the actual URLs used for scraping
|
||||||
|
urls = []
|
||||||
|
if "urls" in state:
|
||||||
|
urls = state["urls"]
|
||||||
|
elif "considered_urls" in state:
|
||||||
|
urls = state["considered_urls"]
|
||||||
|
|
||||||
|
# Only add sources if we actually have URLs
|
||||||
|
if urls:
|
||||||
|
answer["sources"] = urls
|
||||||
|
|
||||||
state.update({self.output[0]: answer})
|
state.update({self.output[0]: answer})
|
||||||
return state
|
return state
|
||||||
|
|||||||
@ -99,5 +99,8 @@ class SearchInternetNode(BaseNode):
|
|||||||
if len(answer) == 0:
|
if len(answer) == 0:
|
||||||
raise ValueError("Zero results found for the search query.")
|
raise ValueError("Zero results found for the search query.")
|
||||||
|
|
||||||
|
# Store both the URLs and considered_urls in the state
|
||||||
state.update({self.output[0]: answer})
|
state.update({self.output[0]: answer})
|
||||||
|
state["considered_urls"] = answer # Add this as a backup
|
||||||
|
|
||||||
return state
|
return state
|
||||||
@ -41,7 +41,7 @@ def search_on_web(query: str, search_engine: str = "Google",
|
|||||||
research = DuckDuckGoSearchResults(max_results=max_results)
|
research = DuckDuckGoSearchResults(max_results=max_results)
|
||||||
res = research.run(query)
|
res = research.run(query)
|
||||||
links = re.findall(r'https?://[^\s,\]]+', res)
|
links = re.findall(r'https?://[^\s,\]]+', res)
|
||||||
return links
|
return links[:max_results]
|
||||||
|
|
||||||
elif search_engine.lower() == "bing":
|
elif search_engine.lower() == "bing":
|
||||||
headers = {
|
headers = {
|
||||||
@ -66,7 +66,7 @@ def search_on_web(query: str, search_engine: str = "Google",
|
|||||||
response = requests.get(url, params=params)
|
response = requests.get(url, params=params)
|
||||||
|
|
||||||
data = response.json()
|
data = response.json()
|
||||||
limited_results = data["results"][:max_results]
|
limited_results = [result['url'] for result in data["results"][:max_results]]
|
||||||
return limited_results
|
return limited_results
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user