mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-04 21:00:36 +08:00
fix: error on fetching the code
This commit is contained in:
parent
b2720a452f
commit
7285ab065b
@ -100,18 +100,11 @@ class ChromiumLoader(BaseLoader):
|
|||||||
async def ascrape_playwright(self, url: str) -> str:
|
async def ascrape_playwright(self, url: str) -> str:
|
||||||
"""
|
"""
|
||||||
Asynchronously scrape the content of a given URL using Playwright's async API.
|
Asynchronously scrape the content of a given URL using Playwright's async API.
|
||||||
|
|
||||||
Args:
|
|
||||||
url (str): The URL to scrape.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: The scraped HTML content or an error message if an exception occurs.
|
|
||||||
"""
|
"""
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
from undetected_playwright import Malenia
|
from undetected_playwright import Malenia
|
||||||
|
|
||||||
logger.info(f"Starting scraping with {self.backend}...")
|
logger.info(f"Starting scraping with {self.backend}...")
|
||||||
results = ""
|
|
||||||
attempt = 0
|
attempt = 0
|
||||||
|
|
||||||
while attempt < self.RETRY_LIMIT:
|
while attempt < self.RETRY_LIMIT:
|
||||||
@ -127,16 +120,15 @@ class ChromiumLoader(BaseLoader):
|
|||||||
await page.wait_for_load_state(self.load_state)
|
await page.wait_for_load_state(self.load_state)
|
||||||
results = await page.content()
|
results = await page.content()
|
||||||
logger.info("Content scraped")
|
logger.info("Content scraped")
|
||||||
break
|
return results
|
||||||
except (aiohttp.ClientError, asyncio.TimeoutError, Exception) as e:
|
except (aiohttp.ClientError, asyncio.TimeoutError, Exception) as e:
|
||||||
attempt += 1
|
attempt += 1
|
||||||
logger.error(f"Attempt {attempt} failed: {e}")
|
logger.error(f"Attempt {attempt} failed: {e}")
|
||||||
if attempt == self.RETRY_LIMIT:
|
if attempt == self.RETRY_LIMIT:
|
||||||
results = f"Error: Network error after {self.RETRY_LIMIT} attempts - {e}"
|
raise RuntimeError(f"Failed to fetch {url} after {self.RETRY_LIMIT} attempts: {e}")
|
||||||
finally:
|
finally:
|
||||||
await browser.close()
|
if 'browser' in locals():
|
||||||
|
await browser.close()
|
||||||
return results
|
|
||||||
|
|
||||||
async def ascrape_with_js_support(self, url: str) -> str:
|
async def ascrape_with_js_support(self, url: str) -> str:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user