From 032a4916054eecc60879d63ec077cbfb91d9958a Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Wed, 17 Jul 2024 23:06:05 +0200 Subject: [PATCH] Update parse_node.py --- scrapegraphai/nodes/parse_node.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scrapegraphai/nodes/parse_node.py b/scrapegraphai/nodes/parse_node.py index 5c6c7cb3..cdca1b55 100644 --- a/scrapegraphai/nodes/parse_node.py +++ b/scrapegraphai/nodes/parse_node.py @@ -74,7 +74,7 @@ class ParseNode(BaseNode): docs_transformed = docs_transformed[0] chunks = chunk(text=docs_transformed.page_content, - chunk_size= self.node_config.get("chunk_size", 4096), + chunk_size= self.node_config.get("chunk_size", 4096)-250, token_counter=lambda x: len(x), memoize=False) else: @@ -82,13 +82,13 @@ class ParseNode(BaseNode): if type(docs_transformed) == Document: chunks = chunk(text=docs_transformed.page_content, - chunk_size= self.node_config.get("chunk_size", 4096), + chunk_size= self.node_config.get("chunk_size", 4096)-250, token_counter=lambda x: len(x), memoize=False) else: chunks = chunk(text=docs_transformed, - chunk_size= self.node_config.get("chunk_size", 4096), + chunk_size= self.node_config.get("chunk_size", 4096)-250, token_counter=lambda x: len(x), memoize=False)