From 289ecaccdb14a6c7bc50a31924c77e25476f0cc4 Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Wed, 7 Feb 2024 17:01:37 +0100
Subject: [PATCH 1/4] add multiple requests

---
 yosoai/request.py | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/yosoai/request.py b/yosoai/request.py
index 54ce9c45..0a9a6db8 100644
--- a/yosoai/request.py
+++ b/yosoai/request.py
@@ -1,5 +1,6 @@
 import time
 from typing import List
+from multiprocessing import Pool
 from .getter import remover
 from .class_generator import Generator
 from .class_creator import create_class
@@ -7,10 +8,15 @@ from .token_calculator import truncate_text_tokens
 
 EMBEDDING_ENCODING = 'cl100k_base'
 
-def send_request(key: str, text:str, values:list[dict], model:str, temperature:float = 0.0, encoding_name: str = EMBEDDING_ENCODING) -> List[dict]:
+def process_message(args):
+    key, temperature, model, encoding_name, message = args
+    generator_instance = Generator(key, temperature, model)
+    result = generator_instance.invocation(message)
+    return result
+
+def send_request(key: str, text: str, values: list[dict], model: str, temperature: float = 0.0, encoding_name: str = EMBEDDING_ENCODING) -> List[dict]:
     """
     Send a request to openai.
-
     Args:
         key (str): The API key for accessing the language model.
         text (str): The input text to be processed.
@@ -19,32 +25,35 @@ def send_request(key: str, text:str, values:list[dict], model:str, temperature:f
                             - "title" (str): The title of the field.
                             - "type" (str): The type of the field.
                             - "description" (str): The description of the field.
-
         model (str): The name of the language model to be used.
         temperature (float): A parameter controlling the randomness of the language model's output (default: 0).
         encoding_name (str): The name of the encoding to be used (default: EMBEDDING_ENCODING).
-
     Returns:
         List[dict]: The result of the request to openai.
     """
-
     res = []
     create_class(values)
-    time.sleep(2) # TODO: implement an asynchrous waiting
+    time.sleep(2) # TODO: implement asynchronous waiting
 
     # text = remover(text)
 
     messages = truncate_text_tokens(text, model, encoding_name)
-    
-    count = 0
+    total_messages = len(messages)
+    processed_messages = 0
 
-    for message in messages:
-        generator_instance = Generator(key, temperature, model)
+    pool = Pool(processes=2) # Limita il numero di processi a 3
 
-        res.append(generator_instance.invocation(message))
+    for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]):
+        res.append(result)
+        processed_messages += 1
+        progress = processed_messages / total_messages * 100
+        print(f"Overall Progress: {progress:.2f}%")
 
-        print(res)
-        print(f"Percentage: {round(count/len(messages),2)*100}%")
-        count +=1
+        # Attendere 20 secondi tra le richieste per rispettare il limite
+        if processed_messages % 2 == 0:
+            time.sleep(60)
 
-    return res
\ No newline at end of file
+    pool.close()
+    pool.join()
+
+    return res

From d3ec995e949fe4e881012045f259d4ef11acef0c Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Wed, 7 Feb 2024 17:19:02 +0100
Subject: [PATCH 2/4] add a progress bar

---
 yosoai/request.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/yosoai/request.py b/yosoai/request.py
index 0a9a6db8..fc6dc226 100644
--- a/yosoai/request.py
+++ b/yosoai/request.py
@@ -1,6 +1,7 @@
 import time
 from typing import List
 from multiprocessing import Pool
+from tqdm import tqdm  # Import tqdm for progress bar
 from .getter import remover
 from .class_generator import Generator
 from .class_creator import create_class
@@ -41,17 +42,18 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur
     total_messages = len(messages)
     processed_messages = 0
 
-    pool = Pool(processes=2) # Limita il numero di processi a 3
+    pool = Pool(processes=2) # Limit the number of processes to 3
 
-    for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]):
-        res.append(result)
-        processed_messages += 1
-        progress = processed_messages / total_messages * 100
-        print(f"Overall Progress: {progress:.2f}%")
+    with tqdm(total=total_messages) as pbar:
+        for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]):
+            res.append(result)
+            processed_messages += 1
+            pbar.update(1)  # Update the progress bar
+            progress = processed_messages / total_messages * 100
 
-        # Attendere 20 secondi tra le richieste per rispettare il limite
-        if processed_messages % 2 == 0:
-            time.sleep(60)
+            # Wait for 20 seconds between requests to respect the limit
+            if processed_messages % 2 == 0:
+                time.sleep(20)
 
     pool.close()
     pool.join()

From a884d8f261e2a5914df59fb009dcf0693d5c41c2 Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Wed, 7 Feb 2024 20:54:02 +0100
Subject: [PATCH 3/4] refactoring of remover function

---
 yosoai/getter.py  | 14 +++++++-------
 yosoai/request.py | 11 ++++-------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/yosoai/getter.py b/yosoai/getter.py
index a5a38325..c736aec1 100644
--- a/yosoai/getter.py
+++ b/yosoai/getter.py
@@ -17,27 +17,27 @@ def get_function(link:str, param = HEADERS) -> str:
     response = requests.get(url=link, headers=param)
     return str(response.content)
 
-def remover(file:str, only_body:bool = False) -> str:
+def remover(file: str, only_body: bool = False) -> str:
     """
     This function elaborates the HTML file and remove all the not necessary tag
     
     Parameters:
         file (str): the file to parse
+        only_body (bool): whether to parse only the body content or the entire file
 
     Returns:
         str: the parsed file
     """
-
     res = ""
     
-    if only_body == True:
+    if only_body:
         isBody = True
     else:
         isBody = False
 
     for elem in file.splitlines():
         if "<title>" in elem:
-            res = res + elem
+            res += elem
 
         if "<body>" in elem: 
             isBody = True
@@ -48,7 +48,7 @@ def remover(file:str, only_body:bool = False) -> str:
         if "<script>" in elem:
             continue
 
-        if isBody == True:
-            res = res + elem
+        if isBody:
+            res += elem
 
-    return res.replace("\n", "")
\ No newline at end of file
+    return res.replace("\\n", "")
diff --git a/yosoai/request.py b/yosoai/request.py
index fc6dc226..4da64f0a 100644
--- a/yosoai/request.py
+++ b/yosoai/request.py
@@ -36,26 +36,23 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur
     create_class(values)
     time.sleep(2) # TODO: implement asynchronous waiting
 
-    # text = remover(text)
+    text = remover(text)
 
     messages = truncate_text_tokens(text, model, encoding_name)
     total_messages = len(messages)
     processed_messages = 0
 
-    pool = Pool(processes=2) # Limit the number of processes to 3
+    pool = Pool(processes=2) 
 
     with tqdm(total=total_messages) as pbar:
         for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]):
             res.append(result)
             processed_messages += 1
-            pbar.update(1)  # Update the progress bar
-            progress = processed_messages / total_messages * 100
+            pbar.update(1) 
 
             # Wait for 20 seconds between requests to respect the limit
             if processed_messages % 2 == 0:
                 time.sleep(20)
 
     pool.close()
-    pool.join()
-
-    return res
+    pool.join()
\ No newline at end of file

From 9bb0632295bf21e1938e3518a366a80370fd43a7 Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Thu, 8 Feb 2024 20:33:16 +0100
Subject: [PATCH 4/4] add the integration for multiple requests

---
 yosoai/request.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/yosoai/request.py b/yosoai/request.py
index 4da64f0a..4afc89b3 100644
--- a/yosoai/request.py
+++ b/yosoai/request.py
@@ -34,7 +34,7 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur
     """
     res = []
     create_class(values)
-    time.sleep(2) # TODO: implement asynchronous waiting
+    time.sleep(2) #TOFIX
 
     text = remover(text)
 
@@ -45,14 +45,31 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur
     pool = Pool(processes=2) 
 
     with tqdm(total=total_messages) as pbar:
-        for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]):
+        for i, result in enumerate(pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages])):
             res.append(result)
             processed_messages += 1
             pbar.update(1) 
 
-            # Wait for 20 seconds between requests to respect the limit
-            if processed_messages % 2 == 0:
-                time.sleep(20)
+            time.sleep(20)  
+
+            if processed_messages % 3 == 0:
+                time.sleep(40)  
+                continue
+
+            try:
+                time.sleep(5)  
+                result = process_message((key, temperature, model, encoding_name, messages[i]))
+            except Exception as e:
+                if hasattr(e, 'response') and e.response.status_code == 429:
+                    retry_after = int(e.response.headers.get('Retry-After', 30))
+                    print(f"Rate limit reached. Retrying after {retry_after} seconds.")
+                    time.sleep(retry_after)
+                    result = process_message((key, temperature, model, encoding_name, messages[i]))
+                else:
+                    raise  
+            res.append(result)
 
     pool.close()
-    pool.join()
\ No newline at end of file
+    pool.join()
+
+    return res