update reqs for vlm

2026-06-04 21:05:48 +08:00 · 2025-12-08 18:05:31 -08:00 · 2025-12-08 18:05:31 -08:00 · f97ec0c99e
commit f97ec0c99e
parent faeb7d16f6
2 changed files with 43 additions and 34 deletions
--- a/requirements_vlm.txt
+++ b/requirements_vlm.txt
@ -1,47 +1,57 @@
 # Requirements for vlm.py - Qwen3-VL Chat Interface
-# Use a separate virtual environment to avoid conflicts with main Forge app
 #
-# Setup:
-#   python -m venv venv_vlm
-#   venv_vlm\Scripts\activate  (Windows)
-#   source venv_vlm/bin/activate  (Linux/Mac)
-#   pip install -r requirements_vlm.txt
+# PLATFORM NOTES:
+#   - Windows: Use transformers backend only (vLLM is Linux-only)
+#   - Linux: Can use vLLM for high-performance inference
 #
-# Run:
-#   python vlm.py
+# WINDOWS SETUP:
+#   1. Install PyTorch with CUDA first:
+#      pip install torch==2.7.1+cu128 torchvision==0.22.1+cu128 --index-url https://download.pytorch.org/whl/cu128
+#   2. Then install requirements:
+#      pip install -r requirements_vlm.txt
+#   3. Run:
+#      python vlm.py --backend transformers
+#
+# LINUX SETUP (with vLLM):
+#   1. Install PyTorch with CUDA first:
+#      pip install torch==2.7.1+cu128 torchvision==0.22.1+cu128 --index-url https://download.pytorch.org/whl/cu128
+#   2. Install requirements:
+#      pip install -r requirements_vlm.txt
+#   3. Install vLLM separately:
+#      pip install vllm==0.11.0
+#   4. Run:
+#      python vlm.py --backend vllm

-# PyTorch - install first with CUDA support
-# pip install torch torchvision --index-url https://download.pytorch.org/whl/cu128
-torch>=2.4.0
-torchvision
-
-# vLLM for high-performance inference
-vllm>=0.11.0
+# Core dependencies (pinned for stability)
+# NOTE: Do NOT include torch/torchvision here - install separately with CUDA index

 # Qwen VL utilities
-qwen-vl-utils>=0.0.14
+qwen-vl-utils==0.0.14

-# Transformers (fallback backend)
-transformers>=4.51.0
-accelerate
-safetensors
+# Transformers backend (works on Windows and Linux)
+transformers==4.51.3
+accelerate==1.2.1
+safetensors==0.5.3

-# Gradio UI
-gradio>=5.0.0
-gradio-client
+# Gradio UI - pinned to 4.x for compatibility with main Forge
+gradio==4.44.1
+gradio-client==1.4.0

 # Image/Video processing
-Pillow>=10.0.0
-opencv-python
+Pillow==11.0.0
+opencv-python==4.10.0.84

 # Other dependencies
-numpy
-tqdm
-pydantic>=2.0.0
-huggingface-hub>=0.20.0
+numpy==1.26.4
+tqdm==4.67.1
+pydantic==2.10.3
+huggingface-hub==0.27.1

-# Optional: Flash Attention 2 (for faster inference)
+# Optional: Flash Attention 2 (for faster inference on Linux)
 # pip install flash-attn --no-build-isolation

 # Optional: bitsandbytes for quantization (transformers backend)
 # pip install bitsandbytes
+
+# vLLM (Linux only - install separately if needed):
+# pip install vllm==0.11.0
--- a/vlm.py
+++ b/vlm.py
@ -929,7 +929,7 @@ def create_ui():
    """Create the Gradio interface."""
    available_models = vlm_manager.get_available_models() if vlm_manager else ["Manager not initialized"]

-    # Theme for Gradio 6.x (passed to launch() instead of Blocks())
+    # Theme for Gradio 4.x (passed to Blocks())
    global vlm_theme, vlm_css
    vlm_theme = themes.Default(
        primary_hue=colors.Color(
@ -979,7 +979,7 @@ def create_ui():
    }
    """

-    with gr.Blocks(title="Chromaforge VLM") as demo:
+    with gr.Blocks(title="Chromaforge VLM", theme=vlm_theme, css=vlm_css) as demo:
        with gr.Row():
            # Left column - Settings (shared across tabs)
            with gr.Column(scale=1):
@ -1118,6 +1118,7 @@ def create_ui():
                        chatbot = gr.Chatbot(
                            label="Conversation",
                            height=400,
+                            show_copy_button=True,
                        )

                        with gr.Row():
@ -1321,8 +1322,6 @@ def main():
        server_name=host,
        server_port=args.port,
        share=args.share,
-        theme=vlm_theme,
-        css=vlm_css,
    )