from scrapegraphai.nodes import FetchNode from langchain_core.documents import Document def test_fetch_html(mocker): title = "ScrapeGraph AI" link_url = "https://github.com/VinciGit00/Scrapegraph-ai" img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" content = f""" {title} ScrapeGraphAI: You Only Scrape Once Scrapegraph-ai Logo """ mock_loader_cls = mocker.patch("scrapegraphai.nodes.fetch_node.ChromiumLoader") mock_loader = mock_loader_cls.return_value mock_loader.load.return_value = [Document(page_content=content)] node = FetchNode( input="url | local_dir", output=["doc", "links", "images"], node_config={"headless": False}, ) result = node.execute({"url": "https://scrapegraph-ai.com/example"}) mock_loader.load.assert_called_once() doc = result["doc"][0] assert result is not None assert "ScrapeGraph AI" in doc.page_content assert "https://github.com/VinciGit00/Scrapegraph-ai" in doc.page_content assert "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png" in doc.page_content def test_fetch_json(): node = FetchNode( input="json", output=["doc"], ) result = node.execute({"json": "inputs/example.json"}) assert result is not None def test_fetch_xml(): node = FetchNode( input="xml", output=["doc"], ) result = node.execute({"xml": "inputs/books.xml"}) assert result is not None def test_fetch_csv(): node = FetchNode( input="csv", output=["doc"], ) result = node.execute({"csv": "inputs/username.csv"}) assert result is not None def test_fetch_txt(): node = FetchNode( input="txt", output=["doc", "links", "images"], ) with open("inputs/plain_html_example.txt") as f: result = node.execute({"txt": f.read()}) assert result is not None