diff --git a/scrapegraphai/utils/cleanup_html.py b/scrapegraphai/utils/cleanup_html.py
index 2ec3b140..9b00f61c 100644
--- a/scrapegraphai/utils/cleanup_html.py
+++ b/scrapegraphai/utils/cleanup_html.py
@@ -60,13 +60,18 @@ def minify_html(html):
     """
     minify_html function 
     """
-    html = re.sub(r'<!--.*?-->', '', html, flags=re.DOTALL)
-
-    html = re.sub(r'>\s+<', '><', html)
-    html = re.sub(r'\s+>', '>', html)
-    html = re.sub(r'<\s+', '<', html)
-    html = re.sub(r'\s+', ' ', html)
-    html = re.sub(r'\s*=\s*', '=', html)
+    # Combine multiple regex operations into one for better performance
+    patterns = [
+        (r'<!--.*?-->', '', re.DOTALL),
+        (r'>\s+<', '><', 0),
+        (r'\s+>', '>', 0), 
+        (r'<\s+', '<', 0),
+        (r'\s+', ' ', 0),
+        (r'\s*=\s*', '=', 0)
+    ]
+    
+    for pattern, repl, flags in patterns:
+        html = re.sub(pattern, repl, html, flags=flags)
 
     return html.strip()
 
diff --git a/scrapegraphai/utils/copy.py b/scrapegraphai/utils/copy.py
index a35370ab..2ec7cee2 100644
--- a/scrapegraphai/utils/copy.py
+++ b/scrapegraphai/utils/copy.py
@@ -30,56 +30,38 @@ def is_boto3_client(obj):
 
 def safe_deepcopy(obj: Any) -> Any:
     """
-    Attempts to create a deep copy of the object using `copy.deepcopy`
-    whenever possible. If that fails, it falls back to custom deep copy
-    logic. If that also fails, it raises a `DeepCopyError`.
-
+    Safely create a deep copy of an object, handling special cases.
+    
     Args:
-        obj (Any): The object to be copied, which can be of any type.
-
+        obj: Object to copy
+        
     Returns:
-        Any: A deep copy of the object if possible; otherwise, a shallow
-        copy if deep copying fails; if neither is possible, the original
-        object is returned.
+        Deep copy of the object
+        
     Raises:
-        DeepCopyError: If the object cannot be deep-copied or shallow-copied.
+        DeepCopyError: If object cannot be deep copied
     """
-
     try:
-
-        return copy.deepcopy(obj)
-    except (TypeError, AttributeError) as e:
-
-        if isinstance(obj, dict):
-            new_obj = {}
-
-            for k, v in obj.items():
-                new_obj[k] = safe_deepcopy(v)
-            return new_obj
-
-        elif isinstance(obj, list):
-            new_obj = []
-
-            for v in obj:
-                new_obj.append(safe_deepcopy(v))
-            return new_obj
-
-        elif isinstance(obj, tuple):
-            new_obj = tuple(safe_deepcopy(v) for v in obj)
-
-            return new_obj
-
-        elif isinstance(obj, frozenset):
-            new_obj = frozenset(safe_deepcopy(v) for v in obj)
-            return new_obj
-
-        elif is_boto3_client(obj):
+        # Handle special cases first
+        if obj is None or isinstance(obj, (str, int, float, bool)):
             return obj
-
-        else:
-            try:
-                return copy.copy(obj)
-            except (TypeError, AttributeError):
-                raise DeepCopyError(
-                    f"Cannot deep copy the object of type {type(obj)}"
-                ) from e
+            
+        if isinstance(obj, (list, set)):
+            return type(obj)(safe_deepcopy(v) for v in obj)
+            
+        if isinstance(obj, dict):
+            return {k: safe_deepcopy(v) for k, v in obj.items()}
+            
+        if isinstance(obj, tuple):
+            return tuple(safe_deepcopy(v) for v in obj)
+            
+        if isinstance(obj, frozenset):
+            return frozenset(safe_deepcopy(v) for v in obj)
+            
+        if is_boto3_client(obj):
+            return obj
+            
+        return copy.copy(obj)
+        
+    except Exception as e:
+        raise DeepCopyError(f"Cannot deep copy object of type {type(obj)}") from e
diff --git a/scrapegraphai/utils/research_web.py b/scrapegraphai/utils/research_web.py
index af351ad4..86f9f5f3 100644
--- a/scrapegraphai/utils/research_web.py
+++ b/scrapegraphai/utils/research_web.py
@@ -9,101 +9,97 @@ import requests
 from bs4 import BeautifulSoup
 
 def search_on_web(query: str, search_engine: str = "Google",
-                  max_results: int = 10, port: int = 8080, 
+                  max_results: int = 10, port: int = 8080,
                   timeout: int = 10, proxy: str | dict = None) -> List[str]:
+    """Search web function with improved error handling and validation"""
+    
+    # Input validation
+    if not query or not isinstance(query, str):
+        raise ValueError("Query must be a non-empty string")
+        
+    search_engine = search_engine.lower()
+    valid_engines = {"google", "duckduckgo", "bing", "searxng"}
+    if search_engine not in valid_engines:
+        raise ValueError(f"Search engine must be one of: {', '.join(valid_engines)}")
+
+    # Format proxy once
+    formatted_proxy = None
+    if proxy:
+        formatted_proxy = format_proxy(proxy)
+        
+    try:
+        results = []
+        if search_engine == "google":
+            results = list(google_search(query, num_results=max_results, proxy=formatted_proxy))
+            
+        elif search_engine == "duckduckgo":
+            research = DuckDuckGoSearchResults(max_results=max_results)
+            res = research.run(query)
+            results = re.findall(r'https?://[^\s,\]]+', res)
+            
+        elif search_engine == "bing":
+            results = _search_bing(query, max_results, timeout, formatted_proxy)
+            
+        elif search_engine == "searxng":
+            results = _search_searxng(query, max_results, port, timeout)
+            
+        return filter_pdf_links(results)
+        
+    except requests.Timeout:
+        raise TimeoutError(f"Search request timed out after {timeout} seconds")
+    except requests.RequestException as e:
+        raise RuntimeError(f"Search request failed: {str(e)}")
+
+def _search_bing(query: str, max_results: int, timeout: int, proxy: str = None) -> List[str]:
+    """Helper function for Bing search"""
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
+    }
+    search_url = f"https://www.bing.com/search?q={query}"
+    
+    proxies = {"http": proxy, "https": proxy} if proxy else None
+    response = requests.get(search_url, headers=headers, timeout=timeout, proxies=proxies)
+    response.raise_for_status()
+    
+    soup = BeautifulSoup(response.text, "html.parser")
+    return [result.find('a')['href'] for result in soup.find_all('li', class_='b_algo', limit=max_results)]
+
+def _search_searxng(query: str, max_results: int, port: int, timeout: int) -> List[str]:
+    """Helper function for SearXNG search"""
+    url = f"http://localhost:{port}"
+    params = {
+        "q": query,
+        "format": "json",
+        "engines": "google,duckduckgo,brave,qwant,bing"
+    }
+    response = requests.get(url, params=params, timeout=timeout)
+    response.raise_for_status()
+    return [result['url'] for result in response.json().get("results", [])[:max_results]]
+
+def format_proxy(proxy):
+    if isinstance(proxy, dict):
+        server = proxy.get('server')
+        username = proxy.get('username')
+        password = proxy.get('password')
+
+        if all([username, password, server]):
+            proxy_url = f"http://{username}:{password}@{server}"
+            return proxy_url
+        else:
+            raise ValueError("Proxy dictionary is missing required fields.")
+    elif isinstance(proxy, str):
+        return proxy  # "https://username:password@ip:port"
+    else:
+        raise TypeError("Proxy should be a dictionary or a string.")
+            
+def filter_pdf_links(links: List[str]) -> List[str]:
     """
-    Searches the web for a given query using specified search
-    engine options and filters out PDF links.
+    Filters out any links that point to PDF files.
 
     Args:
-        query (str): The search query to find on the internet.
-        search_engine (str, optional): Specifies the search engine to use, 
-        options include 'Google', 'DuckDuckGo', 'Bing', or 'SearXNG'. Default is 'Google'.
-        max_results (int, optional): The maximum number of search results to return.
-        port (int, optional): The port number to use when searching with 'SearXNG'. Default is 8080.
-        timeout (int, optional): The number of seconds to wait 
-        for a response from a request. Default is 10 seconds.
-        proxy (dict or string, optional): The proxy server to use for the request. Default is None. 
+        links (List[str]): A list of URLs as strings.
 
     Returns:
-        List[str]: A list of URLs as strings that are the search results, excluding any PDF links.
-
-    Raises:
-        ValueError: If the search engine specified is not supported.
-        requests.exceptions.Timeout: If the request times out.
-
-    Example:
-        >>> search_on_web("example query", search_engine="Google", max_results=5)
-        ['http://example.com', 'http://example.org', ...]
+        List[str]: A list of URLs excluding any that end with '.pdf'.
     """
-
-    def format_proxy(proxy):
-        if isinstance(proxy, dict):
-            server = proxy.get('server')
-            username = proxy.get('username')
-            password = proxy.get('password')
-
-            if all([username, password, server]):
-                proxy_url = f"http://{username}:{password}@{server}"
-                return proxy_url
-            else:
-                raise ValueError("Proxy dictionary is missing required fields.")
-        elif isinstance(proxy, str):
-            return proxy  # "https://username:password@ip:port"
-        else:
-            raise TypeError("Proxy should be a dictionary or a string.")
-            
-    def filter_pdf_links(links: List[str]) -> List[str]:
-        """
-        Filters out any links that point to PDF files.
-
-        Args:
-            links (List[str]): A list of URLs as strings.
-
-        Returns:
-            List[str]: A list of URLs excluding any that end with '.pdf'.
-        """
-        return [link for link in links if not link.lower().endswith('.pdf')]
-
-    if proxy:
-        proxy = format_proxy(proxy)
-
-    if search_engine.lower() == "google":
-        res = []
-        for url in google_search(query, num_results=max_results, proxy=proxy):
-            res.append(url)
-        return filter_pdf_links(res)
-
-    elif search_engine.lower() == "duckduckgo":
-        research = DuckDuckGoSearchResults(max_results=max_results)
-        res = research.run(query)
-        links = re.findall(r'https?://[^\s,\]]+', res)
-        return filter_pdf_links(links)
-
-    elif search_engine.lower() == "bing":
-        headers = {
-            "User-Agent": """Mozilla/5.0 (Windows NT 10.0; Win64; x64) 
-            AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"""
-        }
-        search_url = f"https://www.bing.com/search?q={query}"
-        response = requests.get(search_url, headers=headers, timeout=timeout)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, "html.parser")
-
-        search_results = []
-        for result in soup.find_all('li', class_='b_algo', limit=max_results):
-            link = result.find('a')['href']
-            search_results.append(link)
-        return filter_pdf_links(search_results)
-
-    elif search_engine.lower() == "searxng":
-        url = f"http://localhost:{port}"
-        params = {"q": query, "format": "json", "engines": "google,duckduckgo,brave,qwant,bing"}
-        response = requests.get(url, params=params, timeout=timeout)
-        data = response.json()
-        limited_results = [result['url'] for result in data["results"][:max_results]]
-        return filter_pdf_links(limited_results)
-
-    else:
-        raise ValueError("""The only search engines available are
-                         DuckDuckGo, Google, Bing, or SearXNG""")
+    return [link for link in links if not link.lower().endswith('.pdf')]