Pick correct dtype on T4 GPUs

2026-06-04 21:03:53 +08:00 · 2025-09-23 17:22:34 -04:00 · 2025-09-23 17:22:34 -04:00 · d3aecc0977
commit d3aecc0977
parent eb179cc543
1 changed files with 1 additions and 1 deletions
--- a/surya/foundation/loader.py
+++ b/surya/foundation/loader.py
@ -36,7 +36,7 @@ class FoundationModelLoader(ModelLoader):
            # emulated bf16, but falls back to very slow kernels, especially for SDPA
            dtype = settings.MODEL_DTYPE_BFLOAT
            if device == "cuda" and not torch.cuda.is_bf16_supported(
-                including_emulation=True
+                including_emulation=False
            ):
                # If the device is cuda, we check if bf16 is supported, and if not, we use float16
                dtype = settings.MODEL_DTYPE