Merge branch 'deepScrape' of github.com:mayurdb/Scrapegraph-ai into deepScrape

This commit is contained in:
mayurdb 2024-05-11 16:58:54 +05:30
commit dd29c16cbe

View File

@ -5,6 +5,7 @@ from bs4 import BeautifulSoup
from minify_html import minify
from urllib.parse import urljoin
def cleanup_html(html_content: str, base_url: str) -> str:
"""
Processes HTML content by removing unnecessary tags, minifying the HTML, and extracting the title and body content.
@ -47,5 +48,4 @@ def cleanup_html(html_content: str, base_url: str) -> str:
minimized_body = minify(str(body_content))
return "Title: " + title + ", Body: " + minimized_body + ", Links: " + str(link_urls)
return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls)
return "Title: " + title + ", Body: No body content found" + ", Links: " + str(link_urls)