From 289ecaccdb14a6c7bc50a31924c77e25476f0cc4 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Wed, 7 Feb 2024 17:01:37 +0100 Subject: [PATCH 1/4] add multiple requests --- yosoai/request.py | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/yosoai/request.py b/yosoai/request.py index 54ce9c45..0a9a6db8 100644 --- a/yosoai/request.py +++ b/yosoai/request.py @@ -1,5 +1,6 @@ import time from typing import List +from multiprocessing import Pool from .getter import remover from .class_generator import Generator from .class_creator import create_class @@ -7,10 +8,15 @@ from .token_calculator import truncate_text_tokens EMBEDDING_ENCODING = 'cl100k_base' -def send_request(key: str, text:str, values:list[dict], model:str, temperature:float = 0.0, encoding_name: str = EMBEDDING_ENCODING) -> List[dict]: +def process_message(args): + key, temperature, model, encoding_name, message = args + generator_instance = Generator(key, temperature, model) + result = generator_instance.invocation(message) + return result + +def send_request(key: str, text: str, values: list[dict], model: str, temperature: float = 0.0, encoding_name: str = EMBEDDING_ENCODING) -> List[dict]: """ Send a request to openai. - Args: key (str): The API key for accessing the language model. text (str): The input text to be processed. @@ -19,32 +25,35 @@ def send_request(key: str, text:str, values:list[dict], model:str, temperature:f - "title" (str): The title of the field. - "type" (str): The type of the field. - "description" (str): The description of the field. - model (str): The name of the language model to be used. temperature (float): A parameter controlling the randomness of the language model's output (default: 0). encoding_name (str): The name of the encoding to be used (default: EMBEDDING_ENCODING). - Returns: List[dict]: The result of the request to openai. """ - res = [] create_class(values) - time.sleep(2) # TODO: implement an asynchrous waiting + time.sleep(2) # TODO: implement asynchronous waiting # text = remover(text) messages = truncate_text_tokens(text, model, encoding_name) - - count = 0 + total_messages = len(messages) + processed_messages = 0 - for message in messages: - generator_instance = Generator(key, temperature, model) + pool = Pool(processes=2) # Limita il numero di processi a 3 - res.append(generator_instance.invocation(message)) + for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]): + res.append(result) + processed_messages += 1 + progress = processed_messages / total_messages * 100 + print(f"Overall Progress: {progress:.2f}%") - print(res) - print(f"Percentage: {round(count/len(messages),2)*100}%") - count +=1 + # Attendere 20 secondi tra le richieste per rispettare il limite + if processed_messages % 2 == 0: + time.sleep(60) - return res \ No newline at end of file + pool.close() + pool.join() + + return res From d3ec995e949fe4e881012045f259d4ef11acef0c Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Wed, 7 Feb 2024 17:19:02 +0100 Subject: [PATCH 2/4] add a progress bar --- yosoai/request.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/yosoai/request.py b/yosoai/request.py index 0a9a6db8..fc6dc226 100644 --- a/yosoai/request.py +++ b/yosoai/request.py @@ -1,6 +1,7 @@ import time from typing import List from multiprocessing import Pool +from tqdm import tqdm # Import tqdm for progress bar from .getter import remover from .class_generator import Generator from .class_creator import create_class @@ -41,17 +42,18 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur total_messages = len(messages) processed_messages = 0 - pool = Pool(processes=2) # Limita il numero di processi a 3 + pool = Pool(processes=2) # Limit the number of processes to 3 - for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]): - res.append(result) - processed_messages += 1 - progress = processed_messages / total_messages * 100 - print(f"Overall Progress: {progress:.2f}%") + with tqdm(total=total_messages) as pbar: + for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]): + res.append(result) + processed_messages += 1 + pbar.update(1) # Update the progress bar + progress = processed_messages / total_messages * 100 - # Attendere 20 secondi tra le richieste per rispettare il limite - if processed_messages % 2 == 0: - time.sleep(60) + # Wait for 20 seconds between requests to respect the limit + if processed_messages % 2 == 0: + time.sleep(20) pool.close() pool.join() From a884d8f261e2a5914df59fb009dcf0693d5c41c2 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Wed, 7 Feb 2024 20:54:02 +0100 Subject: [PATCH 3/4] refactoring of remover function --- yosoai/getter.py | 14 +++++++------- yosoai/request.py | 11 ++++------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/yosoai/getter.py b/yosoai/getter.py index a5a38325..c736aec1 100644 --- a/yosoai/getter.py +++ b/yosoai/getter.py @@ -17,27 +17,27 @@ def get_function(link:str, param = HEADERS) -> str: response = requests.get(url=link, headers=param) return str(response.content) -def remover(file:str, only_body:bool = False) -> str: +def remover(file: str, only_body: bool = False) -> str: """ This function elaborates the HTML file and remove all the not necessary tag Parameters: file (str): the file to parse + only_body (bool): whether to parse only the body content or the entire file Returns: str: the parsed file """ - res = "" - if only_body == True: + if only_body: isBody = True else: isBody = False for elem in file.splitlines(): if "" in elem: - res = res + elem + res += elem if "<body>" in elem: isBody = True @@ -48,7 +48,7 @@ def remover(file:str, only_body:bool = False) -> str: if "<script>" in elem: continue - if isBody == True: - res = res + elem + if isBody: + res += elem - return res.replace("\n", "") \ No newline at end of file + return res.replace("\\n", "") diff --git a/yosoai/request.py b/yosoai/request.py index fc6dc226..4da64f0a 100644 --- a/yosoai/request.py +++ b/yosoai/request.py @@ -36,26 +36,23 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur create_class(values) time.sleep(2) # TODO: implement asynchronous waiting - # text = remover(text) + text = remover(text) messages = truncate_text_tokens(text, model, encoding_name) total_messages = len(messages) processed_messages = 0 - pool = Pool(processes=2) # Limit the number of processes to 3 + pool = Pool(processes=2) with tqdm(total=total_messages) as pbar: for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]): res.append(result) processed_messages += 1 - pbar.update(1) # Update the progress bar - progress = processed_messages / total_messages * 100 + pbar.update(1) # Wait for 20 seconds between requests to respect the limit if processed_messages % 2 == 0: time.sleep(20) pool.close() - pool.join() - - return res + pool.join() \ No newline at end of file From 9bb0632295bf21e1938e3518a366a80370fd43a7 Mon Sep 17 00:00:00 2001 From: VinciGit00 <mvincig11@gmail.com> Date: Thu, 8 Feb 2024 20:33:16 +0100 Subject: [PATCH 4/4] add the integration for multiple requests --- yosoai/request.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/yosoai/request.py b/yosoai/request.py index 4da64f0a..4afc89b3 100644 --- a/yosoai/request.py +++ b/yosoai/request.py @@ -34,7 +34,7 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur """ res = [] create_class(values) - time.sleep(2) # TODO: implement asynchronous waiting + time.sleep(2) #TOFIX text = remover(text) @@ -45,14 +45,31 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur pool = Pool(processes=2) with tqdm(total=total_messages) as pbar: - for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]): + for i, result in enumerate(pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages])): res.append(result) processed_messages += 1 pbar.update(1) - # Wait for 20 seconds between requests to respect the limit - if processed_messages % 2 == 0: - time.sleep(20) + time.sleep(20) + + if processed_messages % 3 == 0: + time.sleep(40) + continue + + try: + time.sleep(5) + result = process_message((key, temperature, model, encoding_name, messages[i])) + except Exception as e: + if hasattr(e, 'response') and e.response.status_code == 429: + retry_after = int(e.response.headers.get('Retry-After', 30)) + print(f"Rate limit reached. Retrying after {retry_after} seconds.") + time.sleep(retry_after) + result = process_message((key, temperature, model, encoding_name, messages[i])) + else: + raise + res.append(result) pool.close() - pool.join() \ No newline at end of file + pool.join() + + return res