mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-07-01 21:00:48 +08:00
feat: add integration with scrape.do
This commit is contained in:
parent
5002c713d5
commit
ae275ec5e8
@ -270,10 +270,10 @@ class FetchNode(BaseNode):
|
|||||||
else:
|
else:
|
||||||
loader_kwargs = {}
|
loader_kwargs = {}
|
||||||
|
|
||||||
if self.node_config is not None:
|
if self.node_config:
|
||||||
loader_kwargs = self.node_config.get("loader_kwargs", {})
|
loader_kwargs = self.node_config.get("loader_kwargs", {})
|
||||||
|
|
||||||
if self.browser_base is not None:
|
if self.browser_base:
|
||||||
try:
|
try:
|
||||||
from ..docloaders.browser_base import browser_base_fetch
|
from ..docloaders.browser_base import browser_base_fetch
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@ -285,7 +285,7 @@ class FetchNode(BaseNode):
|
|||||||
|
|
||||||
document = [Document(page_content=content,
|
document = [Document(page_content=content,
|
||||||
metadata={"source": source}) for content in data]
|
metadata={"source": source}) for content in data]
|
||||||
elif self.scrape_do is not None:
|
elif self.scrape_do:
|
||||||
from ..docloaders.scrape_do import scrape_do_fetch
|
from ..docloaders.scrape_do import scrape_do_fetch
|
||||||
if (self.scrape_do.get("use_proxy") is None) or \
|
if (self.scrape_do.get("use_proxy") is None) or \
|
||||||
self.scrape_do.get("geoCode") is None or \
|
self.scrape_do.get("geoCode") is None or \
|
||||||
|
|||||||
@ -57,6 +57,7 @@ class FetchNodeLevelK(BaseNode):
|
|||||||
self.headless = node_config.get("headless", True) if node_config else True
|
self.headless = node_config.get("headless", True) if node_config else True
|
||||||
self.loader_kwargs = node_config.get("loader_kwargs", {}) if node_config else {}
|
self.loader_kwargs = node_config.get("loader_kwargs", {}) if node_config else {}
|
||||||
self.browser_base = node_config.get("browser_base", None)
|
self.browser_base = node_config.get("browser_base", None)
|
||||||
|
self.scrape_do = node_config.get("scrape_do", None)
|
||||||
self.depth = node_config.get("depth", 1) if node_config else 1
|
self.depth = node_config.get("depth", 1) if node_config else 1
|
||||||
self.only_inside_links = node_config.get("only_inside_links", False) if node_config else False
|
self.only_inside_links = node_config.get("only_inside_links", False) if node_config else False
|
||||||
self.min_input_len = 1
|
self.min_input_len = 1
|
||||||
@ -115,6 +116,11 @@ class FetchNodeLevelK(BaseNode):
|
|||||||
self.browser_base.get("project_id"), [source])
|
self.browser_base.get("project_id"), [source])
|
||||||
document = [Document(page_content=content,
|
document = [Document(page_content=content,
|
||||||
metadata={"source": source}) for content in data]
|
metadata={"source": source}) for content in data]
|
||||||
|
elif self.scrape_do:
|
||||||
|
from ..docloaders.scrape_do import scrape_do_fetch
|
||||||
|
data = scrape_do_fetch(self.scrape_do.get("api_key"), source)
|
||||||
|
document = [Document(page_content=data,
|
||||||
|
metadata={"source": source})]
|
||||||
else:
|
else:
|
||||||
loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
|
loader = ChromiumLoader([source], headless=self.headless, **loader_kwargs)
|
||||||
document = loader.load()
|
document = loader.load()
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user