From 17a42e5877b02452b52c85497a21c49ecab2197c Mon Sep 17 00:00:00 2001 From: Mathieu Croquelois Date: Mon, 19 May 2025 05:06:23 +0100 Subject: [PATCH 1/2] Add BF16 to GGUF (#2877) --- backend/operations_gguf.py | 1 + packages_3rdparty/gguf/quants.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/backend/operations_gguf.py b/backend/operations_gguf.py index f30ef7dd..468e4991 100644 --- a/backend/operations_gguf.py +++ b/backend/operations_gguf.py @@ -13,6 +13,7 @@ quants_mapping = { gguf.GGMLQuantizationType.Q5_K: gguf.Q5_K, gguf.GGMLQuantizationType.Q6_K: gguf.Q6_K, gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0, + gguf.GGMLQuantizationType.BF16: gguf.BF16, } diff --git a/packages_3rdparty/gguf/quants.py b/packages_3rdparty/gguf/quants.py index abe52d54..cfd4d21b 100644 --- a/packages_3rdparty/gguf/quants.py +++ b/packages_3rdparty/gguf/quants.py @@ -268,6 +268,9 @@ class BF16(__Quant, qtype=GGMLQuantizationType.BF16): def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray: return (blocks.view(np.int16).astype(np.int32) << 16).view(np.float32) + @classmethod + def dequantize_blocks_pytorch(cls, blocks, block_size, type_size, parameter) -> torch.Tensor: + return (blocks.view(torch.int16).to(torch.int32) << 16).view(torch.float32) class Q4_0(__Quant, qtype=GGMLQuantizationType.Q4_0): @classmethod From d557aef9d889556e5765e5497a6b8187100dbeb5 Mon Sep 17 00:00:00 2001 From: Emmanuel Ferdman Date: Fri, 23 May 2025 09:35:41 +0300 Subject: [PATCH 2/2] Resolve warnings of datetime library (#2874) --- modules/launch_utils.py | 2 +- modules/ui.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/launch_utils.py b/modules/launch_utils.py index 9e323708..420d9b19 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -560,7 +560,7 @@ def dump_sysinfo(): import datetime text = sysinfo.get() - filename = f"sysinfo-{datetime.datetime.utcnow().strftime('%Y-%m-%d-%H-%M')}.json" + filename = f"sysinfo-{datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d-%H-%M')}.json" with open(filename, "w", encoding="utf8") as file: file.write(text) diff --git a/modules/ui.py b/modules/ui.py index 1bc85331..8657cab2 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1074,7 +1074,7 @@ def setup_ui_api(app): from fastapi.responses import PlainTextResponse text = sysinfo.get() - filename = f"sysinfo-{datetime.datetime.utcnow().strftime('%Y-%m-%d-%H-%M')}.json" + filename = f"sysinfo-{datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d-%H-%M')}.json" return PlainTextResponse(text, headers={'Content-Disposition': f'{"attachment" if attachment else "inline"}; filename="{filename}"'})