From 5d6d996e8f6132101d4c3af835d74f0674baffa1 Mon Sep 17 00:00:00 2001 From: Marco Perini Date: Mon, 13 May 2024 07:26:43 +0200 Subject: [PATCH] fix(proxy-rotation): removed max_shape duplicate --- examples/openai/proxy.py | 11 ----------- examples/openai/smart_scraper_openai.py | 6 +++--- scrapegraphai/utils/proxy_rotation.py | 8 +++++++- 3 files changed, 10 insertions(+), 15 deletions(-) delete mode 100644 examples/openai/proxy.py diff --git a/examples/openai/proxy.py b/examples/openai/proxy.py deleted file mode 100644 index 6572a6e8..00000000 --- a/examples/openai/proxy.py +++ /dev/null @@ -1,11 +0,0 @@ -from scrapegraphai.utils import search_proxy_servers - -proxies = search_proxy_servers( - anonymous=True, - countryset={"IT"}, - # secure=True, - timeout=1.0, - max_shape=2 -) - -print(proxies) \ No newline at end of file diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py index d8adb548..e5b5cd5d 100644 --- a/examples/openai/smart_scraper_openai.py +++ b/examples/openai/smart_scraper_openai.py @@ -28,10 +28,10 @@ graph_config = { "server": "broker", "criteria": { "anonymous": True, - # "secure": True, + "secure": True, "countryset": {"IT"}, - "timeout": 5.0, - "max_shape": 2 + "timeout": 10.0, + "max_shape": 3 }, }, } diff --git a/scrapegraphai/utils/proxy_rotation.py b/scrapegraphai/utils/proxy_rotation.py index e3421cc1..9938f168 100644 --- a/scrapegraphai/utils/proxy_rotation.py +++ b/scrapegraphai/utils/proxy_rotation.py @@ -161,7 +161,13 @@ def _search_proxy(proxy: Proxy) -> ProxySettings: Returns: A 'playwright' compliant proxy configuration. """ - server = search_proxy_servers(**proxy.get("criteria", {}))[0] + + + # remove max_shape from criteria + criteria = proxy.get("criteria", {}).copy() + criteria.pop("max_shape", None) + + server = search_proxy_servers(max_shape=1, **criteria)[0] return {"server": server}