mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
feat(version): python 3.12 is now supported 🚀
This commit is contained in:
parent
e43b8018f5
commit
5fb9115330
@ -30,7 +30,6 @@ dependencies = [
|
|||||||
"free-proxy==1.1.1",
|
"free-proxy==1.1.1",
|
||||||
"playwright==1.43.0",
|
"playwright==1.43.0",
|
||||||
"google==3.0.0",
|
"google==3.0.0",
|
||||||
"yahoo-search-py==0.3",
|
|
||||||
"undetected-playwright==0.3.0",
|
"undetected-playwright==0.3.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -64,7 +63,7 @@ classifiers = [
|
|||||||
"Programming Language :: Python :: 3",
|
"Programming Language :: Python :: 3",
|
||||||
"Operating System :: OS Independent",
|
"Operating System :: OS Independent",
|
||||||
]
|
]
|
||||||
requires-python = ">=3.9,<3.12"
|
requires-python = ">=3.9,<4.0"
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
burr = ["burr[start]==0.19.1"]
|
burr = ["burr[start]==0.19.1"]
|
||||||
|
|||||||
@ -30,9 +30,6 @@ anyio==4.3.0
|
|||||||
# via openai
|
# via openai
|
||||||
# via starlette
|
# via starlette
|
||||||
# via watchfiles
|
# via watchfiles
|
||||||
async-timeout==4.0.3
|
|
||||||
# via aiohttp
|
|
||||||
# via langchain
|
|
||||||
attrs==23.2.0
|
attrs==23.2.0
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
# via jsonschema
|
# via jsonschema
|
||||||
@ -93,9 +90,6 @@ docutils==0.19
|
|||||||
# via sphinx
|
# via sphinx
|
||||||
email-validator==2.1.1
|
email-validator==2.1.1
|
||||||
# via fastapi
|
# via fastapi
|
||||||
exceptiongroup==1.2.1
|
|
||||||
# via anyio
|
|
||||||
# via pytest
|
|
||||||
faiss-cpu==1.8.0
|
faiss-cpu==1.8.0
|
||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
fastapi==0.111.0
|
fastapi==0.111.0
|
||||||
@ -175,7 +169,6 @@ httpx==0.27.0
|
|||||||
# via fastapi
|
# via fastapi
|
||||||
# via groq
|
# via groq
|
||||||
# via openai
|
# via openai
|
||||||
# via yahoo-search-py
|
|
||||||
huggingface-hub==0.23.1
|
huggingface-hub==0.23.1
|
||||||
# via tokenizers
|
# via tokenizers
|
||||||
idna==3.7
|
idna==3.7
|
||||||
@ -330,7 +323,6 @@ pydantic==2.7.1
|
|||||||
# via langchain-core
|
# via langchain-core
|
||||||
# via langsmith
|
# via langsmith
|
||||||
# via openai
|
# via openai
|
||||||
# via yahoo-search-py
|
|
||||||
pydantic-core==2.18.2
|
pydantic-core==2.18.2
|
||||||
# via pydantic
|
# via pydantic
|
||||||
pydeck==0.9.1
|
pydeck==0.9.1
|
||||||
@ -390,8 +382,6 @@ rsa==4.9
|
|||||||
# via google-auth
|
# via google-auth
|
||||||
s3transfer==0.10.1
|
s3transfer==0.10.1
|
||||||
# via boto3
|
# via boto3
|
||||||
selectolax==0.3.21
|
|
||||||
# via yahoo-search-py
|
|
||||||
sf-hamilton==1.63.0
|
sf-hamilton==1.63.0
|
||||||
# via burr
|
# via burr
|
||||||
shellingham==1.5.4
|
shellingham==1.5.4
|
||||||
@ -447,8 +437,6 @@ tokenizers==0.19.1
|
|||||||
# via anthropic
|
# via anthropic
|
||||||
toml==0.10.2
|
toml==0.10.2
|
||||||
# via streamlit
|
# via streamlit
|
||||||
tomli==2.0.1
|
|
||||||
# via pytest
|
|
||||||
toolz==0.12.1
|
toolz==0.12.1
|
||||||
# via altair
|
# via altair
|
||||||
tornado==6.4
|
tornado==6.4
|
||||||
@ -461,9 +449,7 @@ tqdm==4.66.4
|
|||||||
typer==0.12.3
|
typer==0.12.3
|
||||||
# via fastapi-cli
|
# via fastapi-cli
|
||||||
typing-extensions==4.12.0
|
typing-extensions==4.12.0
|
||||||
# via altair
|
|
||||||
# via anthropic
|
# via anthropic
|
||||||
# via anyio
|
|
||||||
# via fastapi
|
# via fastapi
|
||||||
# via fastapi-pagination
|
# via fastapi-pagination
|
||||||
# via google-generativeai
|
# via google-generativeai
|
||||||
@ -478,7 +464,6 @@ typing-extensions==4.12.0
|
|||||||
# via streamlit
|
# via streamlit
|
||||||
# via typer
|
# via typer
|
||||||
# via typing-inspect
|
# via typing-inspect
|
||||||
# via uvicorn
|
|
||||||
typing-inspect==0.9.0
|
typing-inspect==0.9.0
|
||||||
# via dataclasses-json
|
# via dataclasses-json
|
||||||
# via sf-hamilton
|
# via sf-hamilton
|
||||||
@ -493,7 +478,6 @@ uritemplate==4.1.1
|
|||||||
urllib3==2.2.1
|
urllib3==2.2.1
|
||||||
# via botocore
|
# via botocore
|
||||||
# via requests
|
# via requests
|
||||||
# via yahoo-search-py
|
|
||||||
uvicorn==0.29.0
|
uvicorn==0.29.0
|
||||||
# via burr
|
# via burr
|
||||||
# via fastapi
|
# via fastapi
|
||||||
@ -505,7 +489,5 @@ websockets==12.0
|
|||||||
# via uvicorn
|
# via uvicorn
|
||||||
win32-setctime==1.1.0
|
win32-setctime==1.1.0
|
||||||
# via loguru
|
# via loguru
|
||||||
yahoo-search-py==0.3
|
|
||||||
# via scrapegraphai
|
|
||||||
yarl==1.9.4
|
yarl==1.9.4
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
|
|||||||
@ -22,9 +22,6 @@ anyio==4.3.0
|
|||||||
# via groq
|
# via groq
|
||||||
# via httpx
|
# via httpx
|
||||||
# via openai
|
# via openai
|
||||||
async-timeout==4.0.3
|
|
||||||
# via aiohttp
|
|
||||||
# via langchain
|
|
||||||
attrs==23.2.0
|
attrs==23.2.0
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
beautifulsoup4==4.12.3
|
beautifulsoup4==4.12.3
|
||||||
@ -54,8 +51,6 @@ distro==1.9.0
|
|||||||
# via anthropic
|
# via anthropic
|
||||||
# via groq
|
# via groq
|
||||||
# via openai
|
# via openai
|
||||||
exceptiongroup==1.2.1
|
|
||||||
# via anyio
|
|
||||||
faiss-cpu==1.8.0
|
faiss-cpu==1.8.0
|
||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
filelock==3.14.0
|
filelock==3.14.0
|
||||||
@ -115,7 +110,6 @@ httpx==0.27.0
|
|||||||
# via anthropic
|
# via anthropic
|
||||||
# via groq
|
# via groq
|
||||||
# via openai
|
# via openai
|
||||||
# via yahoo-search-py
|
|
||||||
huggingface-hub==0.23.1
|
huggingface-hub==0.23.1
|
||||||
# via tokenizers
|
# via tokenizers
|
||||||
idna==3.7
|
idna==3.7
|
||||||
@ -215,7 +209,6 @@ pydantic==2.7.1
|
|||||||
# via langchain-core
|
# via langchain-core
|
||||||
# via langsmith
|
# via langsmith
|
||||||
# via openai
|
# via openai
|
||||||
# via yahoo-search-py
|
|
||||||
pydantic-core==2.18.2
|
pydantic-core==2.18.2
|
||||||
# via pydantic
|
# via pydantic
|
||||||
pyee==11.1.0
|
pyee==11.1.0
|
||||||
@ -248,8 +241,6 @@ rsa==4.9
|
|||||||
# via google-auth
|
# via google-auth
|
||||||
s3transfer==0.10.1
|
s3transfer==0.10.1
|
||||||
# via boto3
|
# via boto3
|
||||||
selectolax==0.3.21
|
|
||||||
# via yahoo-search-py
|
|
||||||
six==1.16.0
|
six==1.16.0
|
||||||
# via python-dateutil
|
# via python-dateutil
|
||||||
sniffio==1.3.1
|
sniffio==1.3.1
|
||||||
@ -279,7 +270,6 @@ tqdm==4.66.4
|
|||||||
# via scrapegraphai
|
# via scrapegraphai
|
||||||
typing-extensions==4.12.0
|
typing-extensions==4.12.0
|
||||||
# via anthropic
|
# via anthropic
|
||||||
# via anyio
|
|
||||||
# via google-generativeai
|
# via google-generativeai
|
||||||
# via groq
|
# via groq
|
||||||
# via huggingface-hub
|
# via huggingface-hub
|
||||||
@ -300,8 +290,5 @@ uritemplate==4.1.1
|
|||||||
urllib3==2.2.1
|
urllib3==2.2.1
|
||||||
# via botocore
|
# via botocore
|
||||||
# via requests
|
# via requests
|
||||||
# via yahoo-search-py
|
|
||||||
yahoo-search-py==0.3
|
|
||||||
# via scrapegraphai
|
|
||||||
yarl==1.9.4
|
yarl==1.9.4
|
||||||
# via aiohttp
|
# via aiohttp
|
||||||
|
|||||||
@ -5,6 +5,7 @@ Bridge class to integrate Burr into ScrapeGraphAI graphs
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
from typing import Any, Dict, List, Tuple
|
from typing import Any, Dict, List, Tuple
|
||||||
|
import inspect
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import burr
|
import burr
|
||||||
@ -54,6 +55,9 @@ class BurrNodeBridge(Action):
|
|||||||
|
|
||||||
def update(self, result: dict, state: State) -> State:
|
def update(self, result: dict, state: State) -> State:
|
||||||
return state.update(**result)
|
return state.update(**result)
|
||||||
|
|
||||||
|
def get_source(self) -> str:
|
||||||
|
return inspect.getsource(self.node.__class__)
|
||||||
|
|
||||||
|
|
||||||
def parse_boolean_expression(expression: str) -> List[str]:
|
def parse_boolean_expression(expression: str) -> List[str]:
|
||||||
|
|||||||
@ -5,7 +5,6 @@ import re
|
|||||||
from typing import List
|
from typing import List
|
||||||
from langchain_community.tools import DuckDuckGoSearchResults
|
from langchain_community.tools import DuckDuckGoSearchResults
|
||||||
from googlesearch import search as google_search
|
from googlesearch import search as google_search
|
||||||
from yahoo_search import search as yahoo_search
|
|
||||||
|
|
||||||
|
|
||||||
def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
|
def search_on_web(query: str, search_engine: str = "Google", max_results: int = 10) -> List[str]:
|
||||||
@ -43,16 +42,5 @@ def search_on_web(query: str, search_engine: str = "Google", max_results: int =
|
|||||||
links = re.findall(r'https?://[^\s,\]]+', res)
|
links = re.findall(r'https?://[^\s,\]]+', res)
|
||||||
|
|
||||||
return links
|
return links
|
||||||
elif search_engine.lower() == "yahoo":
|
|
||||||
list_result = yahoo_search(query)
|
|
||||||
results = []
|
|
||||||
for page in list_result.pages:
|
|
||||||
if len(results) >= max_results: # Check if max_results has already been reached
|
|
||||||
break # Exit loop if max_results has been reached
|
|
||||||
try:
|
|
||||||
results.append(page.link)
|
|
||||||
except AttributeError:
|
|
||||||
continue
|
|
||||||
return results
|
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"The only search engines available are DuckDuckGo or Google")
|
"The only search engines available are DuckDuckGo or Google")
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user