Merge pull request #368 from stevenmichaelthomas/wait-for-network-idle

Add the ability to specify load state
This commit is contained in:
Marco Vinciguerra 2024-06-11 19:59:23 +02:00 committed by GitHub
commit fa951b4c8b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 0 deletions

View File

@ -29,6 +29,7 @@ class ChromiumLoader(BaseLoader):
backend: str = "playwright",
headless: bool = True,
proxy: Optional[Proxy] = None,
load_state: str = "domcontentloaded",
**kwargs: Any,
):
"""Initialize the loader with a list of URL paths.
@ -55,6 +56,7 @@ class ChromiumLoader(BaseLoader):
self.headless = headless
self.proxy = parse_or_search_proxy(proxy) if proxy else None
self.urls = urls
self.load_state = load_state
async def ascrape_playwright(self, url: str) -> str:
"""
@ -81,6 +83,7 @@ class ChromiumLoader(BaseLoader):
await Malenia.apply_stealth(context)
page = await context.new_page()
await page.goto(url)
await page.wait_for_load_state(self.load_state)
results = await page.content() # Simply get the HTML content
logger.info("Content scraped")
except Exception as e:

View File

@ -83,6 +83,9 @@ class SearchLinkNode(BaseNode):
Assume relevance broadly, including any links that might be related or potentially useful
in relation to the task.
Sort it in order of importance, the first one should be the most important one, the last one
the least important
Please list only valid URLs and make sure to err on the side of inclusion if it's uncertain
whether the content at the link is directly relevant.