diff --git a/pyproject.toml b/pyproject.toml index f74cd39c..a1970155 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dependencies = [ "free-proxy==1.1.1", "playwright==1.43.0", "google==3.0.0", - "yahoo-search-py==0.3", "undetected-playwright==0.3.0", ] @@ -64,7 +63,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Operating System :: OS Independent", ] -requires-python = ">=3.9,<3.12" +requires-python = ">=3.9,<4.0" [project.optional-dependencies] burr = ["burr[start]==0.19.1"] diff --git a/requirements-dev.lock b/requirements-dev.lock index 5798ea02..e716672e 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -30,9 +30,6 @@ anyio==4.3.0 # via openai # via starlette # via watchfiles -async-timeout==4.0.3 - # via aiohttp - # via langchain attrs==23.2.0 # via aiohttp # via jsonschema @@ -93,9 +90,6 @@ docutils==0.19 # via sphinx email-validator==2.1.1 # via fastapi -exceptiongroup==1.2.1 - # via anyio - # via pytest faiss-cpu==1.8.0 # via scrapegraphai fastapi==0.111.0 @@ -175,7 +169,6 @@ httpx==0.27.0 # via fastapi # via groq # via openai - # via yahoo-search-py huggingface-hub==0.23.1 # via tokenizers idna==3.7 @@ -330,7 +323,6 @@ pydantic==2.7.1 # via langchain-core # via langsmith # via openai - # via yahoo-search-py pydantic-core==2.18.2 # via pydantic pydeck==0.9.1 @@ -390,8 +382,6 @@ rsa==4.9 # via google-auth s3transfer==0.10.1 # via boto3 -selectolax==0.3.21 - # via yahoo-search-py sf-hamilton==1.63.0 # via burr shellingham==1.5.4 @@ -447,8 +437,6 @@ tokenizers==0.19.1 # via anthropic toml==0.10.2 # via streamlit -tomli==2.0.1 - # via pytest toolz==0.12.1 # via altair tornado==6.4 @@ -461,9 +449,7 @@ tqdm==4.66.4 typer==0.12.3 # via fastapi-cli typing-extensions==4.12.0 - # via altair # via anthropic - # via anyio # via fastapi # via fastapi-pagination # via google-generativeai @@ -478,7 +464,6 @@ typing-extensions==4.12.0 # via streamlit # via typer # via typing-inspect - # via uvicorn typing-inspect==0.9.0 # via dataclasses-json # via sf-hamilton @@ -493,7 +478,6 @@ uritemplate==4.1.1 urllib3==2.2.1 # via botocore # via requests - # via yahoo-search-py uvicorn==0.29.0 # via burr # via fastapi @@ -505,7 +489,5 @@ websockets==12.0 # via uvicorn win32-setctime==1.1.0 # via loguru -yahoo-search-py==0.3 - # via scrapegraphai yarl==1.9.4 # via aiohttp diff --git a/requirements.lock b/requirements.lock index c8ce6201..995a9e63 100644 --- a/requirements.lock +++ b/requirements.lock @@ -22,9 +22,6 @@ anyio==4.3.0 # via groq # via httpx # via openai -async-timeout==4.0.3 - # via aiohttp - # via langchain attrs==23.2.0 # via aiohttp beautifulsoup4==4.12.3 @@ -54,8 +51,6 @@ distro==1.9.0 # via anthropic # via groq # via openai -exceptiongroup==1.2.1 - # via anyio faiss-cpu==1.8.0 # via scrapegraphai filelock==3.14.0 @@ -115,7 +110,6 @@ httpx==0.27.0 # via anthropic # via groq # via openai - # via yahoo-search-py huggingface-hub==0.23.1 # via tokenizers idna==3.7 @@ -215,7 +209,6 @@ pydantic==2.7.1 # via langchain-core # via langsmith # via openai - # via yahoo-search-py pydantic-core==2.18.2 # via pydantic pyee==11.1.0 @@ -248,8 +241,6 @@ rsa==4.9 # via google-auth s3transfer==0.10.1 # via boto3 -selectolax==0.3.21 - # via yahoo-search-py six==1.16.0 # via python-dateutil sniffio==1.3.1 @@ -279,7 +270,6 @@ tqdm==4.66.4 # via scrapegraphai typing-extensions==4.12.0 # via anthropic - # via anyio # via google-generativeai # via groq # via huggingface-hub @@ -300,8 +290,5 @@ uritemplate==4.1.1 urllib3==2.2.1 # via botocore # via requests - # via yahoo-search-py -yahoo-search-py==0.3 - # via scrapegraphai yarl==1.9.4 # via aiohttp diff --git a/scrapegraphai/integrations/burr_bridge.py b/scrapegraphai/integrations/burr_bridge.py index 746fbdb7..0cac9f4d 100644 --- a/scrapegraphai/integrations/burr_bridge.py +++ b/scrapegraphai/integrations/burr_bridge.py @@ -5,6 +5,7 @@ Bridge class to integrate Burr into ScrapeGraphAI graphs import re from typing import Any, Dict, List, Tuple +import inspect try: import burr @@ -54,6 +55,9 @@ class BurrNodeBridge(Action): def update(self, result: dict, state: State) -> State: return state.update(**result) + + def get_source(self) -> str: + return inspect.getsource(self.node.__class__) def parse_boolean_expression(expression: str) -> List[str]: diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py index 83d44917..a839a680 100644 --- a/scrapegraphai/utils/research_web.py +++ b/scrapegraphai/utils/research_web.py @@ -5,7 +5,6 @@ import re from typing import List from langchain_community.tools import DuckDuckGoSearchResults from googlesearch import search as google_search -from yahoo_search import search as yahoo_search def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]: @@ -43,16 +42,5 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int = links = re.findall(r'https?://[^\s,\]]+', res) return links - elif search_engine.lower() == "yahoo": - list_result = yahoo_search(query) - results = [] - for page in list_result.pages: - if len(results) >= max_results: # Check if max_results has already been reached - break # Exit loop if max_results has been reached - try: - results.append(page.link) - except AttributeError: - continue - return results raise ValueError( "The only search engines available are DuckDuckGo or Google")