feat UPLOAD_BY_URL

This commit is contained in:
LanQian 2024-05-24 00:59:25 +08:00
parent dc44dd56fb
commit 2a30cc3c38
8 changed files with 79 additions and 130 deletions

View File

@ -7,4 +7,5 @@ POW_DIFFICULTY=000032
RETRY_TIMES=3
ENABLE_GATEWAY=true
CONVERSATION_ONLY=false
ENABLE_LIMIT=false
ENABLE_LIMIT=true
UPLOAD_BY_URL=false

View File

@ -37,7 +37,7 @@ jobs:
images: lanqian528/chat2api
tags: |
type=raw,value=latest,enable={{is_default_branch}}
type=raw,value=v1.1.6
type=raw,value=v1.1.7
- name: Build and push
uses: docker/build-push-action@v5

View File

@ -21,7 +21,7 @@
3. 接口返回的状态码和响应体
## 功能
### 最新版 v1.1.6
### 最新版 v1.1.7
> 已完成
> - [x] 流式、非流式传输
@ -54,19 +54,20 @@
每个环境变量都有默认值,如果不懂环境变量的含义,请不要设置,更不要传空值,字符串无需引号。
| 分类 | 变量名 | 示例值 | 描述 |
|------|-------------------|-------------------------------------|-----------------------------------------------------------|
| 安全相关 | API_PREFIX | your_prefix | API 前缀密码,不设置容易被人访问,设置后需请求 /your_prefix/v1/chat/completions |
| | AUTHORIZATION | sk-xxxxxxxx, sk-yyyyyyyy | 为使用多账号轮询 Tokens 设置的授权,英文逗号分隔 |
| 请求相关 | CHATGPT_BASE_URL | https://chatgpt.com | ChatGPT 网关地址,设置后会改变请求的网站,多个网关用逗号分隔 |
| | PROXY_URL | your_first_proxy, your_second_proxy | 代理 URL多个代理用逗号分隔 |
| | ARKOSE_TOKEN_URL | https://arkose.example.com/token | 获取 Arkose token 的地址 |
| 功能相关 | HISTORY_DISABLED | true | 是否不保存聊天记录并返回 conversation_id |
| | POW_DIFFICULTY | 00003a | 要解决的工作量证明难度 |
| | RETRY_TIMES | 3 | 出错重试次数 |
| | ENABLE_GATEWAY | true | 是否启用网关模式WEBUI |
| | CONVERSATION_ONLY | false | 是否直接使用对话接口 |
| | ENABLE_LIMIT | true | 开启后不尝试突破官方次数限制,尽可能防止封号 |
| 分类 | 变量名 | 示例值 | 描述 |
|------|-------------------|-------------------------------------|--------------------------------------------------------------|
| 安全相关 | API_PREFIX | your_prefix | API 前缀密码,不设置容易被人访问,设置后需请求 `/your_prefix/v1/chat/completions` |
| | AUTHORIZATION | sk-xxxxxxxx, sk-yyyyyyyy | 为使用多账号轮询 Tokens 设置的授权,英文逗号分隔 |
| 请求相关 | CHATGPT_BASE_URL | https://chatgpt.com | ChatGPT 网关地址,设置后会改变请求的网站,多个网关用逗号分隔 |
| | PROXY_URL | your_first_proxy, your_second_proxy | 代理 URL多个代理用逗号分隔 |
| | ARKOSE_TOKEN_URL | https://arkose.example.com/token | 获取 Arkose token 的地址 |
| 功能相关 | HISTORY_DISABLED | true | 是否不保存聊天记录并返回 conversation_id |
| | POW_DIFFICULTY | 00003a | 要解决的工作量证明难度 |
| | RETRY_TIMES | 3 | 出错重试次数 |
| | ENABLE_GATEWAY | true | 是否启用网关模式WEBUI |
| | CONVERSATION_ONLY | false | 是否直接使用对话接口 |
| | ENABLE_LIMIT | true | 开启后不尝试突破官方次数限制,尽可能防止封号 |
| | UPLOAD_BY_URL | false | 开启后按照 `URL+空格+正文` 进行对话,自动解析 URL 内容并上传,多个 URL 用空格分隔 |
## 部署

View File

@ -17,7 +17,7 @@ from utils.Client import Client
from utils.Logger import logger
from utils.authorization import verify_token
from utils.config import proxy_url_list, chatgpt_base_url_list, arkose_token_url_list, history_disabled, pow_difficulty, \
conversation_only, enable_limit, limit_status_code
conversation_only, enable_limit, limit_status_code, upload_by_url
class ChatService:
@ -188,7 +188,7 @@ class ChatService:
async def prepare_send_conversation(self):
try:
chat_messages, self.prompt_tokens = await api_messages_to_chat(self, self.api_messages, self.origin_model)
chat_messages, self.prompt_tokens = await api_messages_to_chat(self, self.api_messages, upload_by_url)
except Exception as e:
logger.error(f"Failed to format messages: {str(e)}")
raise HTTPException(status_code=400, detail="Failed to format messages.")

View File

@ -1,21 +1,18 @@
import asyncio
import json
import random
import re
import string
import time
import uuid
from collections import deque
from functools import reduce
import pybase64
import websockets
from urlextract import URLExtract
from api.files import get_file_content
from api.models import model_system_fingerprint
from api.tokens import split_tokens_from_content, calculate_image_tokens, num_tokens_from_messages
from utils.Logger import logger
from utils.config import max_file_num, enable_search, enable_gpt4o_search
moderation_message = "I'm sorry, I cannot provide or engage in any content related to pornography, violence, or any unethical material. If you have any other questions or need assistance, please feel free to let me know. I'll do my best to provide support and assistance."
@ -170,6 +167,8 @@ async def stream_response(service, response, model, max_tokens):
parts = content.get("parts", [])
delta = {}
for part in parts:
if isinstance(part, str):
continue
inner_content_type = part.get('content_type')
if inner_content_type == "image_asset_pointer":
last_content_type = "image_asset_pointer"
@ -229,40 +228,49 @@ async def stream_response(service, response, model, max_tokens):
continue
async def api_messages_to_chat(service, api_messages, ori_model_name):
def get_url_from_content(content):
if isinstance(content, str) and content.startswith('http'):
try:
url = re.match(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', content.split(' ')[0])[0]
content = content.replace(url, '').strip()
return url, content
except Exception:
return None, content
return None, content
def format_messages_with_url(content):
url_list = []
while True:
url, content = get_url_from_content(content)
if url:
url_list.append(url)
else:
break
new_content = [
{
"type": "text",
"text": content
}
]
for url in url_list:
new_content.append({
"type": "image_url",
"image_url": {
"url": url
}
})
return new_content
async def api_messages_to_chat(service, api_messages, upload_by_url=False):
file_tokens = 0
chat_messages = []
contains_url = False
enable_search_models = 'gpt-3.5' not in ori_model_name and 'claude-3' not in ori_model_name
if 'gpt-4o' in ori_model_name:
api_enable_search = enable_search and enable_gpt4o_search
else:
api_enable_search = enable_search and enable_search_models
if api_enable_search:
all_urls = deque(maxlen=max_file_num)
url_positions = []
extractor = URLExtract()
for i, message in enumerate(api_messages):
content = message.get("content", "")
if not isinstance(content, list) and enable_search:
urls = extractor.find_urls(str(content), True)
urls = [url for url in urls if url.startswith(('https', 'http'))][:max_file_num]
message["content"] = content.strip()
url_positions.extend([(i, urls)])
all_urls.extend(urls)
all_urls = list(all_urls)
contains_url = bool(all_urls)
if len(all_urls) > 0:
logger.info(f"当前请求消息里的包含的URLS:{all_urls}")
# 将列表转换为字典
final_positions = dict(url_positions)
for index, api_message in enumerate(api_messages):
for api_message in api_messages:
role = api_message.get('role')
content = api_message.get('content')
if upload_by_url:
content = format_messages_with_url(content)
if isinstance(content, list):
parts = []
attachments = []
@ -310,65 +318,10 @@ async def api_messages_to_chat(service, api_messages, ori_model_name):
metadata = {
"attachments": attachments
}
# 当模型为3.5或者claude 或者 文本不包含url的时候直接请求
elif not api_enable_search or not contains_url:
else:
content_type = "text"
parts = [content]
metadata = {}
else:
metadata = {}
parts = []
attachments = []
tem_urls = []
content_type = "multimodal_text"
all_urls = final_positions.get(index, [])
for url in all_urls:
file_content, mime_type = await get_file_content(url)
file_meta = await service.upload_file(file_content, mime_type)
if file_meta:
tem_urls.append(url)
file_id = file_meta["file_id"]
file_size = file_meta["size_bytes"]
file_name = file_meta["file_name"]
mime_type = file_meta["mime_type"]
if mime_type.startswith("image/"):
width, height = file_meta["width"], file_meta["height"]
file_tokens += await calculate_image_tokens(width, height, "auto")
parts.append({
"content_type": "image_asset_pointer",
"asset_pointer": f"file-service://{file_id}",
"size_bytes": file_size,
"width": width,
"height": height
})
attachments.append({
"id": file_id,
"size": file_size,
"name": file_name,
"mime_type": mime_type,
"width": width,
"height": height
})
else:
file_tokens += file_size // 1000
attachments.append({
"id": file_id,
"size": file_size,
"name": file_name,
"mime_type": mime_type,
})
if attachments:
metadata = {
"attachments": attachments
}
# 删除content里的url防止影响信息
content = reduce(lambda text, url: text.replace(url, ''), tem_urls, content).strip()
parts.append(content)
chat_message = {
"id": f"{uuid.uuid4()}",
"author": {"role": role},

View File

@ -47,7 +47,7 @@ async def handle_request_limit(request_data, access_token):
def clean_dict():
logger.info("-" * 50)
logger.info("-" * 60)
logger.info("Start to clean limit_access_token......")
current_time = time.time()
keys_to_remove = [key for key, clear_time in limit_access_token.items() if clear_time < current_time]

View File

@ -7,5 +7,4 @@ websockets
pillow
pybase64
jinja2
APScheduler
urlextract
APScheduler

View File

@ -30,32 +30,27 @@ enable_gateway = is_true(os.getenv('ENABLE_GATEWAY', True))
conversation_only = is_true(os.getenv('CONVERSATION_ONLY', False))
enable_limit = is_true(os.getenv('ENABLE_LIMIT', True))
limit_status_code = os.getenv('LIMIT_STATUS_CODE', 429)
upload_by_url = is_true(os.getenv('UPLOAD_BY_URL', False))
enable_search = is_true(os.getenv('ENABLE_SEARCH', False))
max_file_num = os.getenv('MAX_FILE_NUM', 5)
enable_gpt4o_search = is_true(os.getenv('ENABLE_GPT4O_SEARCH', False))
authorization_list = authorization.split(',') if authorization else []
chatgpt_base_url_list = chatgpt_base_url.split(',') if chatgpt_base_url else []
arkose_token_url_list = arkose_token_url.split(',') if arkose_token_url else []
proxy_url_list = proxy_url.split(',') if proxy_url else []
logger.info("-" * 60)
logger.info("Chat2Api v1.1.6 | https://github.com/lanqian528/chat2api")
logger.info("Chat2Api v1.1.7 | https://github.com/lanqian528/chat2api")
logger.info("-" * 60)
logger.info("Environment variables:")
logger.info("API_PREFIX: " + str(api_prefix))
logger.info("AUTHORIZATION: " + str(authorization_list))
logger.info("CHATGPT_BASE_URL: " + str(chatgpt_base_url_list))
logger.info("ARKOSE_TOKEN_URL: " + str(arkose_token_url_list))
logger.info("PROXY_URL: " + str(proxy_url_list))
logger.info("HISTORY_DISABLED: " + str(history_disabled))
logger.info("POW_DIFFICULTY: " + str(pow_difficulty))
logger.info("RETRY_TIMES: " + str(retry_times))
logger.info("ENABLE_GATEWAY: " + str(enable_gateway))
logger.info("CONVERSATION_ONLY: " + str(conversation_only))
logger.info("ENABLE_LIMIT: " + str(enable_limit))
logger.info("LIMIT_STATUS_CODE " + str(limit_status_code))
logger.info("ENABLE_SEARCH: " + str(enable_search))
logger.info("MAX_FILE_NUM: " + str(max_file_num))
logger.info("ENABLE_GPT4O_SEARCH: " + str(enable_gpt4o_search))
logger.info("API_PREFIX: " + str(api_prefix))
logger.info("AUTHORIZATION: " + str(authorization_list))
logger.info("CHATGPT_BASE_URL: " + str(chatgpt_base_url_list))
logger.info("ARKOSE_TOKEN_URL: " + str(arkose_token_url_list))
logger.info("PROXY_URL: " + str(proxy_url_list))
logger.info("HISTORY_DISABLED: " + str(history_disabled))
logger.info("POW_DIFFICULTY: " + str(pow_difficulty))
logger.info("RETRY_TIMES: " + str(retry_times))
logger.info("ENABLE_GATEWAY: " + str(enable_gateway))
logger.info("CONVERSATION_ONLY: " + str(conversation_only))
logger.info("ENABLE_LIMIT: " + str(enable_limit))
logger.info("UPLOAD_BY_URL: " + str(upload_by_url))
logger.info("-" * 60)