feat UPLOAD_BY_URL

2026-06-13 21:02:46 +08:00 · 2024-05-24 00:59:25 +08:00 · 2024-05-24 00:59:25 +08:00 · 2a30cc3c38
commit 2a30cc3c38
parent dc44dd56fb
8 changed files with 79 additions and 130 deletions
--- a/.env.example
+++ b/.env.example
@ -7,4 +7,5 @@ POW_DIFFICULTY=000032
 RETRY_TIMES=3
 ENABLE_GATEWAY=true
 CONVERSATION_ONLY=false
-ENABLE_LIMIT=false
+ENABLE_LIMIT=true
+UPLOAD_BY_URL=false
--- a/.github/workflows/build_docker.yml
+++ b/.github/workflows/build_docker.yml
@ -37,7 +37,7 @@ jobs:
          images: lanqian528/chat2api
          tags: |
            type=raw,value=latest,enable={{is_default_branch}}
-            type=raw,value=v1.1.6
+            type=raw,value=v1.1.7

      - name: Build and push
        uses: docker/build-push-action@v5
--- a/README.md
+++ b/README.md
@ -21,7 +21,7 @@
 3. 接口返回的状态码和响应体

 ## 功能
-### 最新版 v1.1.6
+### 最新版 v1.1.7

 > 已完成
 > - [x] 流式、非流式传输
@ -54,19 +54,20 @@

 每个环境变量都有默认值，如果不懂环境变量的含义，请不要设置，更不要传空值，字符串无需引号。

-| 分类   | 变量名               | 示例值                                 | 描述                                                        |
-|------|-------------------|-------------------------------------|-----------------------------------------------------------|
-| 安全相关 | API_PREFIX        | your_prefix                         | API 前缀密码，不设置容易被人访问，设置后需请求 /your_prefix/v1/chat/completions |
-|      | AUTHORIZATION     | sk-xxxxxxxx, sk-yyyyyyyy            | 为使用多账号轮询 Tokens 设置的授权，英文逗号分隔                              |
-| 请求相关 | CHATGPT_BASE_URL  | https://chatgpt.com                 | ChatGPT 网关地址，设置后会改变请求的网站，多个网关用逗号分隔                         |
-|      | PROXY_URL         | your_first_proxy, your_second_proxy | 代理 URL，多个代理用逗号分隔                                           |
-|      | ARKOSE_TOKEN_URL  | https://arkose.example.com/token    | 获取 Arkose token 的地址                                         |
-| 功能相关 | HISTORY_DISABLED  | true                                | 是否不保存聊天记录并返回 conversation_id                              |
-|      | POW_DIFFICULTY    | 00003a                              | 要解决的工作量证明难度                                               |
-|      | RETRY_TIMES       | 3                                   | 出错重试次数                                                    |
-|      | ENABLE_GATEWAY    | true                                | 是否启用网关模式（WEBUI）                                           |
-|      | CONVERSATION_ONLY | false                               | 是否直接使用对话接口                                                |
-|      | ENABLE_LIMIT      | true                                | 开启后不尝试突破官方次数限制，尽可能防止封号                                    |
+| 分类   | 变量名               | 示例值                                 | 描述                                                           |
+|------|-------------------|-------------------------------------|--------------------------------------------------------------|
+| 安全相关 | API_PREFIX        | your_prefix                         | API 前缀密码，不设置容易被人访问，设置后需请求 `/your_prefix/v1/chat/completions` |
+|      | AUTHORIZATION     | sk-xxxxxxxx, sk-yyyyyyyy            | 为使用多账号轮询 Tokens 设置的授权，英文逗号分隔                                 |
+| 请求相关 | CHATGPT_BASE_URL  | https://chatgpt.com                 | ChatGPT 网关地址，设置后会改变请求的网站，多个网关用逗号分隔                           |
+|      | PROXY_URL         | your_first_proxy, your_second_proxy | 代理 URL，多个代理用逗号分隔                                             |
+|      | ARKOSE_TOKEN_URL  | https://arkose.example.com/token    | 获取 Arkose token 的地址                                          |
+| 功能相关 | HISTORY_DISABLED  | true                                | 是否不保存聊天记录并返回 conversation_id                                 |
+|      | POW_DIFFICULTY    | 00003a                              | 要解决的工作量证明难度                                                  |
+|      | RETRY_TIMES       | 3                                   | 出错重试次数                                                       |
+|      | ENABLE_GATEWAY    | true                                | 是否启用网关模式（WEBUI）                                              |
+|      | CONVERSATION_ONLY | false                               | 是否直接使用对话接口                                                   |
+|      | ENABLE_LIMIT      | true                                | 开启后不尝试突破官方次数限制，尽可能防止封号                                       |
+|      | UPLOAD_BY_URL     | false                               | 开启后按照 `URL+空格+正文` 进行对话，自动解析 URL 内容并上传，多个 URL 用空格分隔           |


 ## 部署
--- a/chatgpt/ChatService.py
+++ b/chatgpt/ChatService.py
@ -17,7 +17,7 @@ from utils.Client import Client
 from utils.Logger import logger
 from utils.authorization import verify_token
 from utils.config import proxy_url_list, chatgpt_base_url_list, arkose_token_url_list, history_disabled, pow_difficulty, \
-    conversation_only, enable_limit, limit_status_code
+    conversation_only, enable_limit, limit_status_code, upload_by_url


 class ChatService:
@ -188,7 +188,7 @@ class ChatService:

    async def prepare_send_conversation(self):
        try:
-            chat_messages, self.prompt_tokens = await api_messages_to_chat(self, self.api_messages, self.origin_model)
+            chat_messages, self.prompt_tokens = await api_messages_to_chat(self, self.api_messages, upload_by_url)
        except Exception as e:
            logger.error(f"Failed to format messages: {str(e)}")
            raise HTTPException(status_code=400, detail="Failed to format messages.")
--- a/chatgpt/chatFormat.py
+++ b/chatgpt/chatFormat.py
@ -1,21 +1,18 @@
 import asyncio
 import json
 import random
+import re
 import string
 import time
 import uuid
-from collections import deque
-from functools import reduce

 import pybase64
 import websockets
-from urlextract import URLExtract

 from api.files import get_file_content
 from api.models import model_system_fingerprint
 from api.tokens import split_tokens_from_content, calculate_image_tokens, num_tokens_from_messages
 from utils.Logger import logger
-from utils.config import max_file_num, enable_search, enable_gpt4o_search

 moderation_message = "I'm sorry, I cannot provide or engage in any content related to pornography, violence, or any unethical material. If you have any other questions or need assistance, please feel free to let me know. I'll do my best to provide support and assistance."

@ -170,6 +167,8 @@ async def stream_response(service, response, model, max_tokens):
                        parts = content.get("parts", [])
                        delta = {}
                        for part in parts:
+                            if isinstance(part, str):
+                                continue
                            inner_content_type = part.get('content_type')
                            if inner_content_type == "image_asset_pointer":
                                last_content_type = "image_asset_pointer"
@ -229,40 +228,49 @@ async def stream_response(service, response, model, max_tokens):
            continue


-async def api_messages_to_chat(service, api_messages, ori_model_name):
+def get_url_from_content(content):
+    if isinstance(content, str) and content.startswith('http'):
+        try:
+            url = re.match(r'(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))', content.split(' ')[0])[0]
+            content = content.replace(url, '').strip()
+            return url, content
+        except Exception:
+            return None, content
+    return None, content
+
+
+def format_messages_with_url(content):
+    url_list = []
+    while True:
+        url, content = get_url_from_content(content)
+        if url:
+            url_list.append(url)
+        else:
+            break
+    new_content = [
+        {
+            "type": "text",
+            "text": content
+        }
+    ]
+    for url in url_list:
+        new_content.append({
+            "type": "image_url",
+            "image_url": {
+                "url": url
+            }
+        })
+    return new_content
+
+
+async def api_messages_to_chat(service, api_messages, upload_by_url=False):
    file_tokens = 0
    chat_messages = []
-    contains_url = False
-    enable_search_models = 'gpt-3.5' not in ori_model_name and 'claude-3' not in ori_model_name
-    if 'gpt-4o' in ori_model_name:
-        api_enable_search = enable_search and enable_gpt4o_search
-    else:
-        api_enable_search = enable_search and enable_search_models
-
-    if api_enable_search:
-        all_urls = deque(maxlen=max_file_num)
-        url_positions = []
-        extractor = URLExtract()
-
-        for i, message in enumerate(api_messages):
-            content = message.get("content", "")
-            if not isinstance(content, list) and enable_search:
-                urls = extractor.find_urls(str(content), True)
-                urls = [url for url in urls if url.startswith(('https', 'http'))][:max_file_num]
-                message["content"] = content.strip()
-                url_positions.extend([(i, urls)])
-                all_urls.extend(urls)
-
-        all_urls = list(all_urls)
-        contains_url = bool(all_urls)
-        if len(all_urls) > 0:
-            logger.info(f"当前请求消息里的包含的URLS:{all_urls}")
-        # 将列表转换为字典
-        final_positions = dict(url_positions)
-
-    for index, api_message in enumerate(api_messages):
+    for api_message in api_messages:
        role = api_message.get('role')
        content = api_message.get('content')
+        if upload_by_url:
+            content = format_messages_with_url(content)
        if isinstance(content, list):
            parts = []
            attachments = []
@ -310,65 +318,10 @@ async def api_messages_to_chat(service, api_messages, ori_model_name):
            metadata = {
                "attachments": attachments
            }
-
-        # 当模型为3.5或者claude 或者 文本不包含url的时候，直接请求
-        elif not api_enable_search or not contains_url:
+        else:
            content_type = "text"
            parts = [content]
            metadata = {}
-
-        else:
-            metadata = {}
-            parts = []
-            attachments = []
-            tem_urls = []
-            content_type = "multimodal_text"
-            all_urls = final_positions.get(index, [])
-
-            for url in all_urls:
-                file_content, mime_type = await get_file_content(url)
-                file_meta = await service.upload_file(file_content, mime_type)
-                if file_meta:
-                    tem_urls.append(url)
-                    file_id = file_meta["file_id"]
-                    file_size = file_meta["size_bytes"]
-                    file_name = file_meta["file_name"]
-                    mime_type = file_meta["mime_type"]
-                    if mime_type.startswith("image/"):
-                        width, height = file_meta["width"], file_meta["height"]
-                        file_tokens += await calculate_image_tokens(width, height, "auto")
-                        parts.append({
-                            "content_type": "image_asset_pointer",
-                            "asset_pointer": f"file-service://{file_id}",
-                            "size_bytes": file_size,
-                            "width": width,
-                            "height": height
-                        })
-                        attachments.append({
-                            "id": file_id,
-                            "size": file_size,
-                            "name": file_name,
-                            "mime_type": mime_type,
-                            "width": width,
-                            "height": height
-                        })
-                    else:
-                        file_tokens += file_size // 1000
-                        attachments.append({
-                            "id": file_id,
-                            "size": file_size,
-                            "name": file_name,
-                            "mime_type": mime_type,
-                        })
-
-            if attachments:
-                metadata = {
-                    "attachments": attachments
-                }
-                # 删除content里的url，防止影响信息
-                content = reduce(lambda text, url: text.replace(url, ''), tem_urls, content).strip()
-            parts.append(content)
-
        chat_message = {
            "id": f"{uuid.uuid4()}",
            "author": {"role": role},
--- a/chatgpt/chatLimit.py
+++ b/chatgpt/chatLimit.py
@ -47,7 +47,7 @@ async def handle_request_limit(request_data, access_token):


 def clean_dict():
-    logger.info("-" * 50)
+    logger.info("-" * 60)
    logger.info("Start to clean limit_access_token......")
    current_time = time.time()
    keys_to_remove = [key for key, clear_time in limit_access_token.items() if clear_time < current_time]
--- a/requirements.txt
+++ b/requirements.txt
@ -7,5 +7,4 @@ websockets
 pillow
 pybase64
 jinja2
-APScheduler
-urlextract
+APScheduler
--- a/utils/config.py
+++ b/utils/config.py
@ -30,32 +30,27 @@ enable_gateway = is_true(os.getenv('ENABLE_GATEWAY', True))
 conversation_only = is_true(os.getenv('CONVERSATION_ONLY', False))
 enable_limit = is_true(os.getenv('ENABLE_LIMIT', True))
 limit_status_code = os.getenv('LIMIT_STATUS_CODE', 429)
+upload_by_url = is_true(os.getenv('UPLOAD_BY_URL', False))

-enable_search = is_true(os.getenv('ENABLE_SEARCH', False))
-max_file_num = os.getenv('MAX_FILE_NUM', 5)
-enable_gpt4o_search = is_true(os.getenv('ENABLE_GPT4O_SEARCH', False))
 authorization_list = authorization.split(',') if authorization else []
 chatgpt_base_url_list = chatgpt_base_url.split(',') if chatgpt_base_url else []
 arkose_token_url_list = arkose_token_url.split(',') if arkose_token_url else []
 proxy_url_list = proxy_url.split(',') if proxy_url else []

 logger.info("-" * 60)
-logger.info("Chat2Api v1.1.6 | https://github.com/lanqian528/chat2api")
+logger.info("Chat2Api v1.1.7 | https://github.com/lanqian528/chat2api")
 logger.info("-" * 60)
 logger.info("Environment variables:")
-logger.info("API_PREFIX:            " + str(api_prefix))
-logger.info("AUTHORIZATION:         " + str(authorization_list))
-logger.info("CHATGPT_BASE_URL:      " + str(chatgpt_base_url_list))
-logger.info("ARKOSE_TOKEN_URL:      " + str(arkose_token_url_list))
-logger.info("PROXY_URL:             " + str(proxy_url_list))
-logger.info("HISTORY_DISABLED:      " + str(history_disabled))
-logger.info("POW_DIFFICULTY:        " + str(pow_difficulty))
-logger.info("RETRY_TIMES:           " + str(retry_times))
-logger.info("ENABLE_GATEWAY:        " + str(enable_gateway))
-logger.info("CONVERSATION_ONLY:     " + str(conversation_only))
-logger.info("ENABLE_LIMIT:          " + str(enable_limit))
-logger.info("LIMIT_STATUS_CODE      " + str(limit_status_code))
-logger.info("ENABLE_SEARCH:         " + str(enable_search))
-logger.info("MAX_FILE_NUM:         " + str(max_file_num))
-logger.info("ENABLE_GPT4O_SEARCH:   " + str(enable_gpt4o_search))
+logger.info("API_PREFIX:        " + str(api_prefix))
+logger.info("AUTHORIZATION:     " + str(authorization_list))
+logger.info("CHATGPT_BASE_URL:  " + str(chatgpt_base_url_list))
+logger.info("ARKOSE_TOKEN_URL:  " + str(arkose_token_url_list))
+logger.info("PROXY_URL:         " + str(proxy_url_list))
+logger.info("HISTORY_DISABLED:  " + str(history_disabled))
+logger.info("POW_DIFFICULTY:    " + str(pow_difficulty))
+logger.info("RETRY_TIMES:       " + str(retry_times))
+logger.info("ENABLE_GATEWAY:    " + str(enable_gateway))
+logger.info("CONVERSATION_ONLY: " + str(conversation_only))
+logger.info("ENABLE_LIMIT:      " + str(enable_limit))
+logger.info("UPLOAD_BY_URL:     " + str(upload_by_url))
 logger.info("-" * 60)