From a884d8f261e2a5914df59fb009dcf0693d5c41c2 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Wed, 7 Feb 2024 20:54:02 +0100 Subject: [PATCH] refactoring of remover function --- yosoai/getter.py | 14 +++++++------- yosoai/request.py | 11 ++++------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/yosoai/getter.py b/yosoai/getter.py index a5a38325..c736aec1 100644 --- a/yosoai/getter.py +++ b/yosoai/getter.py @@ -17,27 +17,27 @@ def get_function(link:str, param = HEADERS) -> str: response = requests.get(url=link, headers=param) return str(response.content) -def remover(file:str, only_body:bool = False) -> str: +def remover(file: str, only_body: bool = False) -> str: """ This function elaborates the HTML file and remove all the not necessary tag Parameters: file (str): the file to parse + only_body (bool): whether to parse only the body content or the entire file Returns: str: the parsed file """ - res = "" - if only_body == True: + if only_body: isBody = True else: isBody = False for elem in file.splitlines(): if "" in elem: - res = res + elem + res += elem if "<body>" in elem: isBody = True @@ -48,7 +48,7 @@ def remover(file:str, only_body:bool = False) -> str: if "<script>" in elem: continue - if isBody == True: - res = res + elem + if isBody: + res += elem - return res.replace("\n", "") \ No newline at end of file + return res.replace("\\n", "") diff --git a/yosoai/request.py b/yosoai/request.py index fc6dc226..4da64f0a 100644 --- a/yosoai/request.py +++ b/yosoai/request.py @@ -36,26 +36,23 @@ def send_request(key: str, text: str, values: list[dict], model: str, temperatur create_class(values) time.sleep(2) # TODO: implement asynchronous waiting - # text = remover(text) + text = remover(text) messages = truncate_text_tokens(text, model, encoding_name) total_messages = len(messages) processed_messages = 0 - pool = Pool(processes=2) # Limit the number of processes to 3 + pool = Pool(processes=2) with tqdm(total=total_messages) as pbar: for result in pool.imap_unordered(process_message, [(key, temperature, model, encoding_name, message) for message in messages]): res.append(result) processed_messages += 1 - pbar.update(1) # Update the progress bar - progress = processed_messages / total_messages * 100 + pbar.update(1) # Wait for 20 seconds between requests to respect the limit if processed_messages % 2 == 0: time.sleep(20) pool.close() - pool.join() - - return res + pool.join() \ No newline at end of file