From dc9171936595de2eec0d2ce6b1e82336341f2d81 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Sat, 11 May 2024 10:49:16 +0200 Subject: [PATCH] Update cleanup_html.py --- scrapegraphai/utils/cleanup_html.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scrapegraphai/utils/cleanup_html.py b/scrapegraphai/utils/cleanup_html.py index bc16a99b..00f742a7 100644 --- a/scrapegraphai/utils/cleanup_html.py +++ b/scrapegraphai/utils/cleanup_html.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup from minify_html import minify from urllib.parse import urljoin + def cleanup_html(html_content: str, base_url: str) -> str: """ Processes HTML content by removing unnecessary tags, minifying the HTML, and extracting the title and body content. @@ -45,9 +46,6 @@ def cleanup_html(html_content: str, base_url: str) -> str: if body_content: # Minify the HTML within the body tag minimized_body = minify(str(body_content)) - print("Came here") return "Title: " + title + ", Body: " + minimized_body + ", Links: " + str(link_urls) - - print("No Came here") - return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls) \ No newline at end of file + return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls)