mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-23 21:00:30 +08:00
Merge pull request #352 from tejhande/patch-1
test: Enhance JSON scraping pipeline test
This commit is contained in:
commit
261c4fcdf5
@ -1,56 +1,50 @@
|
||||
"""
|
||||
Module for scraping json documents
|
||||
"""
|
||||
Module for scraping JSON documents
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from scrapegraphai.graphs import JSONScraperGraph
|
||||
|
||||
# Load configuration from a JSON file
|
||||
CONFIG_FILE = "config.json"
|
||||
with open(CONFIG_FILE, "r") as f:
|
||||
CONFIG = json.load(f)
|
||||
|
||||
# Fixture to read the sample JSON file
|
||||
@pytest.fixture
|
||||
def sample_json():
|
||||
"""
|
||||
Example of text
|
||||
Read the sample JSON file
|
||||
"""
|
||||
file_name = "inputs/example.json"
|
||||
curr_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
file_path = os.path.join(curr_dir, file_name)
|
||||
|
||||
with open(file_path, 'r', encoding="utf-8") as file:
|
||||
file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json")
|
||||
with open(file_path, "r", encoding="utf-8") as file:
|
||||
text = file.read()
|
||||
|
||||
return text
|
||||
|
||||
# Parametrized fixture to load graph configurations
|
||||
@pytest.fixture(params=CONFIG["graph_configs"])
|
||||
def graph_config(request):
|
||||
"""
|
||||
Load graph configuration
|
||||
"""
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def graph_config():
|
||||
# Test function for the scraping pipeline
|
||||
def test_scraping_pipeline(sample_json, graph_config):
|
||||
"""
|
||||
Configuration of the graph
|
||||
Test the scraping pipeline
|
||||
"""
|
||||
return {
|
||||
"llm": {
|
||||
"model": "ollama/mistral",
|
||||
"temperature": 0,
|
||||
"format": "json",
|
||||
"base_url": "http://localhost:11434",
|
||||
},
|
||||
"embeddings": {
|
||||
"model": "ollama/nomic-embed-text",
|
||||
"temperature": 0,
|
||||
"base_url": "http://localhost:11434",
|
||||
}
|
||||
}
|
||||
expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles
|
||||
|
||||
|
||||
def test_scraping_pipeline(sample_json: str, graph_config: dict):
|
||||
"""
|
||||
Start of the scraping pipeline
|
||||
"""
|
||||
smart_scraper_graph = JSONScraperGraph(
|
||||
prompt="List me all the titles",
|
||||
source=sample_json,
|
||||
config=graph_config
|
||||
)
|
||||
|
||||
result = smart_scraper_graph.run()
|
||||
|
||||
assert result is not None
|
||||
assert isinstance(result, list)
|
||||
assert sorted(result) == sorted(expected_titles)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user