mirror of
https://github.com/VinciGit00/Scrapegraph-ai.git
synced 2026-06-25 21:11:11 +08:00
62 lines
2.2 KiB
Python
62 lines
2.2 KiB
Python
"""
|
|
Module for sending the request
|
|
"""
|
|
import time
|
|
from typing import List
|
|
from tqdm import tqdm
|
|
from .remover import remover
|
|
from .class_generator import Generator
|
|
from .class_creator import create_class
|
|
from .token_calculator import truncate_text_tokens
|
|
|
|
EMBEDDING_ENCODING = 'cl100k_base'
|
|
|
|
LAST_REQUEST_TIME = 0
|
|
REQUEST_INTERVAL = 20
|
|
|
|
|
|
def send_request(key: str, text: str, values: List[dict],
|
|
model: str, temperature: float = 0.0,
|
|
encoding_name: str = EMBEDDING_ENCODING) -> List[dict]:
|
|
"""
|
|
Send a request to openai.
|
|
Args:
|
|
key (str): The API key for accessing the language model.
|
|
text (str): The input text to be processed.
|
|
values (List[dict]): Settings of the request.
|
|
Each element of the list should have the following keys:
|
|
- "title" (str): The title of the field.
|
|
- "type" (str): The type of the field.
|
|
- "description" (str): The description of the field.
|
|
model (str): The name of the openai language model to be used.
|
|
temperature (float): A parameter controlling the randomness of the language
|
|
model's output (default: 0).
|
|
encoding_name (str): The name of the encoding to be used (default: EMBEDDING_ENCODING).
|
|
Returns:
|
|
List[dict]: The result of the request to openai.
|
|
"""
|
|
|
|
global LAST_REQUEST_TIME
|
|
|
|
res = []
|
|
create_class(values)
|
|
time.sleep(2) # TODO: implement asynchronous waiting
|
|
|
|
messages = truncate_text_tokens(remover(text), model, encoding_name)
|
|
processed_messages = 0
|
|
|
|
with tqdm(total=len(messages)) as pbar:
|
|
for message in messages:
|
|
current_time = time.time()
|
|
time_since_last_request = current_time - LAST_REQUEST_TIME
|
|
if time_since_last_request < REQUEST_INTERVAL:
|
|
time.sleep(REQUEST_INTERVAL - time_since_last_request)
|
|
generator_instance = Generator(key, temperature, model)
|
|
|
|
res.append(generator_instance.invocation(message))
|
|
processed_messages += 1
|
|
pbar.update(1)
|
|
|
|
LAST_REQUEST_TIME = time.time()
|
|
return res
|