diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py index 318c9f38..0d6a78c1 100644 --- a/scrapegraphai/docloaders/browser_base.py +++ b/scrapegraphai/docloaders/browser_base.py @@ -13,6 +13,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s - `api_key`: The API key provided by BrowserBase. - `project_id`: The ID of the project on BrowserBase where you want to fetch data from. - `link`: The URL or link that you want to fetch data from. + - `text_content`: A boolean flag to specify whether to return only the text content (True) or the full HTML (False). + - `async_mode`: A boolean flag that determines whether the function runs asynchronously (True) or synchronously (False, default). It initializes a Browserbase object with the given API key and project ID, then uses this object to load the specified link. @@ -35,6 +37,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s api_key (str): The API key provided by BrowserBase. project_id (str): The ID of the project on BrowserBase where you want to fetch data from. link (str): The URL or link that you want to fetch data from. + text_content (bool): Whether to return only the text content (True) or the full HTML (False). Defaults to True. + async_mode (bool): Whether to run the function asynchronously (True) or synchronously (False). Defaults to False. Returns: object: The result of the loading operation. @@ -49,7 +53,22 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s browserbase = Browserbase(api_key=api_key, project_id=project_id) result = [] - for l in link: - result.append(browserbase.load(l, text_content=True)) + # Define the async fetch logic for individual links + async def _async_fetch_link(l): + return await asyncio.to_thread(browserbase.load, l, text_content=text_content) + + if async_mode: + # Asynchronously process each link + async def _async_browser_base_fetch(): + for l in link: + result.append(await _async_fetch_link(l)) + return result + + # Run the async fetch function + result = asyncio.run(_async_browser_base_fetch()) + else: + # Synchronous logic + for l in link: + result.append(browserbase.load(l, text_content=text_content)) return result