From d6a77029bbec7de0976dd2f41a8a11d2ee43de4f Mon Sep 17 00:00:00 2001 From: Matteo Vedovati Date: Tue, 24 Sep 2024 18:12:50 +0200 Subject: [PATCH] Validator fixed --- requirements-dev.lock | 2 +- requirements.lock | 5 ++- scrapegraphai/nodes/generate_code_node.py | 42 ++++++++--------------- 3 files changed, 18 insertions(+), 31 deletions(-) diff --git a/requirements-dev.lock b/requirements-dev.lock index 2d0f10a0..0523351a 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -238,7 +238,7 @@ mdurl==0.1.2 minify-html==0.15.0 # via scrapegraphai mistral-common==1.4.1 - + # via scrapegraphai mpire==2.10.2 # via semchunk multidict==6.0.5 diff --git a/requirements.lock b/requirements.lock index 6b66d6f3..6ee34ba9 100644 --- a/requirements.lock +++ b/requirements.lock @@ -166,6 +166,7 @@ marshmallow==3.21.3 minify-html==0.15.0 # via scrapegraphai mistral-common==1.4.1 + # via scrapegraphai mpire==2.10.2 # via semchunk multidict==6.0.5 @@ -255,7 +256,6 @@ pyyaml==6.0.1 referencing==0.35.1 # via jsonschema # via jsonschema-specifications - regex==2024.5.15 # via tiktoken # via transformers @@ -279,10 +279,9 @@ s3transfer==0.10.2 safetensors==0.4.5 # via transformers semchunk==2.2.0 - # via scrapegraphai + # via scrapegraphai sentencepiece==0.2.0 # via mistral-common - six==1.16.0 # via python-dateutil sniffio==1.3.1 diff --git a/scrapegraphai/nodes/generate_code_node.py b/scrapegraphai/nodes/generate_code_node.py index 99b6852d..c4c04d52 100644 --- a/scrapegraphai/nodes/generate_code_node.py +++ b/scrapegraphai/nodes/generate_code_node.py @@ -608,42 +608,30 @@ class GenerateCodeNode(BaseNode): return match.group(1) if match else code -def normalize_string(s: str) -> str: - return ''.join(c for c in s.lower().strip() if c not in string.punctuation) -def normalize_string(s: str) -> str: - """Normalize a string by converting to lowercase and stripping spaces.""" - return s.lower().strip() -def normalize_dict(d: dict) -> dict: - """ - Normalize the dictionary by: - - Converting all string values to lowercase and stripping spaces. - - Recursively normalizing nested dictionaries. - - Sorting lists of primitives and creating sorted list of normalized dicts for lists of dicts. - """ +def normalize_dict(d: Dict[str, Any]) -> Dict[str, Any]: normalized = {} for key, value in d.items(): if isinstance(value, str): - normalized[key] = normalize_string(value) + normalized[key] = value.lower().strip() elif isinstance(value, dict): normalized[key] = normalize_dict(value) elif isinstance(value, list): - if all(isinstance(v, dict) for v in value): - normalized[key] = sorted( - normalize_dict(v) for v in value - ) - else: - normalized[key] = sorted( - normalize_dict(v) if isinstance(v, dict) - else normalize_string(v) if isinstance(v, str) - else v - for v in value - ) + normalized[key] = normalize_list(value) else: normalized[key] = value - return dict(sorted(normalized.items())) + return normalized -def are_content_equal(generated_result: dict, reference_result: dict) -> bool: +def normalize_list(lst: List[Any]) -> List[Any]: + return [ + normalize_dict(item) if isinstance(item, dict) + else normalize_list(item) if isinstance(item, list) + else item.lower().strip() if isinstance(item, str) + else item + for item in lst + ] + +def are_content_equal(generated_result: Dict[str, Any], reference_result: Dict[str, Any]) -> bool: """Compare two dictionaries for semantic equality.""" - return normalize_dict(generated_result) == normalize_dict(reference_result) + return normalize_dict(generated_result) == normalize_dict(reference_result) \ No newline at end of file