fix(proxy-rotation): removed duplicated arg and passed the loader_kwarhs correctly to the node

This commit is contained in:
Marco Perini 2024-05-12 18:39:03 +02:00
parent b54d984c13
commit 1e9a564616
6 changed files with 35 additions and 2 deletions

11
examples/openai/proxy.py Normal file
View File

@ -0,0 +1,11 @@
from scrapegraphai.utils import search_proxy_servers
proxies = search_proxy_servers(
anonymous=True,
countryset={"IT"},
# secure=True,
timeout=1.0,
max_shape=2
)
print(proxies)

View File

@ -22,6 +22,19 @@ graph_config = {
"model": "gpt-3.5-turbo",
},
"verbose": True,
"headless": False,
"loader_kwargs": {
"proxy" : {
"server": "broker",
"criteria": {
"anonymous": True,
# "secure": True,
"countryset": {"IT"},
"timeout": 5.0,
"max_shape": 2
},
},
}
}
# ************************************************

View File

@ -58,8 +58,11 @@ class AbstractGraph(ABC):
"verbose", False)
self.headless = True if config is None else config.get(
"headless", True)
self.loader_kwargs = config.get("loader_kwargs", {})
common_params = {"headless": self.headless,
"verbose": self.verbose,
"loader_kwargs": self.loader_kwargs,
"llm_model": self.llm_model,
"embedder_model": self.embedder_model}
self.set_common_params(common_params, overwrite=False)

View File

@ -57,7 +57,10 @@ class SmartScraperGraph(AbstractGraph):
"""
fetch_node = FetchNode(
input="url | local_dir",
output=["doc"]
output=["doc"],
node_config={
"loader_kwargs": self.config.get("loader_kwargs", {}),
}
)
parse_node = ParseNode(
input="doc",

View File

@ -49,6 +49,9 @@ class FetchNode(BaseNode):
self.verbose = (
False if node_config is None else node_config.get("verbose", False)
)
self.loader_kwargs = (
{} if node_config is None else node_config.get("loader_kwargs", {})
)
def execute(self, state):
"""

View File

@ -161,7 +161,7 @@ def _search_proxy(proxy: Proxy) -> ProxySettings:
Returns:
A 'playwright' compliant proxy configuration.
"""
server = search_proxy_servers(max_shape=1, **proxy.get("criteria", {}))[0]
server = search_proxy_servers(**proxy.get("criteria", {}))[0]
return {"server": server}