feat(browser_base_fetch): add async_mode to support both synchronous and asynchronous execution

- Introduced an async_mode flag to allow users to choose between synchronous and asynchronous fetching using Browserbase. - Refactored common logic (browserbase initialization and result list) to avoid redundancy. - Added internal async handling with asyncio.to_thread() for non-blocking execution in async_mode. - Maintained backward compatibility for existing synchronous functionality.
2026-06-28 21:01:55 +08:00 · 2024-09-08 08:49:08 +00:00 · 2024-09-08 08:49:08 +00:00 · d56253d183
commit d56253d183
parent a5401394cc
1 changed files with 21 additions and 2 deletions
--- a/scrapegraphai/docloaders/browser_base.py
+++ b/scrapegraphai/docloaders/browser_base.py
@ -13,6 +13,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s
    - `api_key`: The API key provided by BrowserBase.
    - `project_id`: The ID of the project on BrowserBase where you want to fetch data from.
    - `link`: The URL or link that you want to fetch data from.
    - `text_content`: A boolean flag to specify whether to return only the text content (True) or the full HTML (False).
    - `async_mode`: A boolean flag that determines whether the function runs asynchronously (True) or synchronously (False, default).
    It initializes a Browserbase object with the given API key and project ID, 
    then uses this object to load the specified link. 
@ -35,6 +37,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s
        api_key (str): The API key provided by BrowserBase.
        project_id (str): The ID of the project on BrowserBase where you want to fetch data from.
        link (str): The URL or link that you want to fetch data from.
        text_content (bool): Whether to return only the text content (True) or the full HTML (False). Defaults to True.
        async_mode (bool): Whether to run the function asynchronously (True) or synchronously (False). Defaults to False.
    Returns:
        object: The result of the loading operation.
@ -49,7 +53,22 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s
    browserbase = Browserbase(api_key=api_key, project_id=project_id)
    result = []
-    for l in link:
+    # Define the async fetch logic for individual links
-        result.append(browserbase.load(l, text_content=True))
+    async def _async_fetch_link(l):
        return await asyncio.to_thread(browserbase.load, l, text_content=text_content)
    if async_mode:
        # Asynchronously process each link
        async def _async_browser_base_fetch():
            for l in link:
                result.append(await _async_fetch_link(l))
            return result
        # Run the async fetch function
        result = asyncio.run(_async_browser_base_fetch())
    else:
        # Synchronous logic
        for l in link:
            result.append(browserbase.load(l, text_content=text_content))
    return result