diff --git a/scrapegraphai/utils/convert_to_md.py b/scrapegraphai/utils/convert_to_md.py index 6f1a2334..c9961ae5 100644 --- a/scrapegraphai/utils/convert_to_md.py +++ b/scrapegraphai/utils/convert_to_md.py @@ -20,11 +20,12 @@ def convert_to_md(html: str, url: str = None) -> str: Note: All the styles and links are ignored during the conversion. """ + h = html2text.HTML2Text() + h.ignore_links = False + h.body_width = 0 if url: parsed_url = urlparse(url) domain = f"{parsed_url.scheme}://{parsed_url.netloc}" - h = html2text.HTML2Text() - h.ignore_links = False - h.baseurl = domain - h.body_width = 0 + h.baseurl = domain + return h.handle(html)