diff --git a/scrapegraphai/utils/cleanup_html.py b/scrapegraphai/utils/cleanup_html.py index 226e4a8b..00f742a7 100644 --- a/scrapegraphai/utils/cleanup_html.py +++ b/scrapegraphai/utils/cleanup_html.py @@ -5,6 +5,7 @@ from bs4 import BeautifulSoup from minify_html import minify from urllib.parse import urljoin + def cleanup_html(html_content: str, base_url: str) -> str: """ Processes HTML content by removing unnecessary tags, minifying the HTML, and extracting the title and body content. @@ -47,5 +48,4 @@ def cleanup_html(html_content: str, base_url: str) -> str: minimized_body = minify(str(body_content)) return "Title: " + title + ", Body: " + minimized_body + ", Links: " + str(link_urls) - - return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls) \ No newline at end of file + return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls)