From aeafe08c14f3585b1ad0c86cd3fd23b8bc735bc8 Mon Sep 17 00:00:00 2001
From: maybleMyers <benjimon678@yahoo.com>
Date: Mon, 8 Dec 2025 15:18:58 -0800
Subject: [PATCH] update 30b req

---
 vlm.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/vlm.py b/vlm.py
index 4f20858c..55a99c9c 100644
--- a/vlm.py
+++ b/vlm.py
@@ -233,20 +233,11 @@ class VLMManager:
 
                     print("Using 8-bit quantization...")
 
-                    # 8-bit quantization must happen on GPU (bitsandbytes requirement)
-                    # But we can control memory by loading weights incrementally
                     load_kwargs["quantization_config"] = BitsAndBytesConfig(
                         load_in_8bit=True,
-                        # Skip quantizing certain modules to save memory during loading
-                        llm_int8_skip_modules=["lm_head", "embed_tokens"],
                     )
-                    load_kwargs["device_map"] = "sequential"  # Load layers one by one
-                    load_kwargs["max_memory"] = {0: "45GiB", "cpu": "100GiB"}
-
-                    # Offload folder for overflow
-                    offload_dir = Path(tempfile.gettempdir()) / "vlm_offload"
-                    offload_dir.mkdir(exist_ok=True)
-                    load_kwargs["offload_folder"] = str(offload_dir)
+                    # Load entirely to GPU
+                    load_kwargs["device_map"] = {"": 0}
 
                 except ImportError as e:
                     print(f"Warning: bitsandbytes not installed, falling back to bfloat16")