diff --git a/backend/sampling/sampling_function.py b/backend/sampling/sampling_function.py index 1ad5a40b..87147c93 100644 --- a/backend/sampling/sampling_function.py +++ b/backend/sampling/sampling_function.py @@ -438,16 +438,27 @@ def sampling_prepare(unet, x): is_zimage = getattr(getattr(real_model, 'config', None), 'is_zimage', False) if is_zimage and hasattr(real_model, 'predictor') and hasattr(real_model.predictor, 'apply_mu_transform'): - # Z-Image uses patch_size=2, so sequence length = (H/2) * (W/2) - image_seq_len = (H // 2) * (W // 2) - real_model.predictor.apply_mu_transform( - seq_len=image_seq_len, - base_seq_len=256, - max_seq_len=4096, - base_shift=0.5, - max_shift=1.15, - ) - print(f"Z-Image: Updated mu for {H}x{W} latents (seq_len={image_seq_len}, mu={real_model.predictor.mu:.4f})") + from modules.shared import opts + + # Check if user specified a manual shift value + manual_shift = getattr(opts, 'zimage_shift', 0.0) + + if manual_shift > 0: + # Use manual shift value directly + real_model.predictor.apply_mu_transform(mu=manual_shift) + print(f"Z-Image: Using manual mu={manual_shift:.4f} for {H}x{W} latents") + else: + # Calculate dynamic shift based on resolution + # Z-Image uses patch_size=2, so sequence length = (H/2) * (W/2) + image_seq_len = (H // 2) * (W // 2) + real_model.predictor.apply_mu_transform( + seq_len=image_seq_len, + base_seq_len=256, + max_seq_len=4096, + base_shift=0.5, + max_shift=1.15, + ) + print(f"Z-Image: Auto mu for {H}x{W} latents (seq_len={image_seq_len}, mu={real_model.predictor.mu:.4f})") # Set up Z-Image CFG handlers if is_zimage: diff --git a/modules/shared_options.py b/modules/shared_options.py index 5280cb05..2c1e10f3 100644 --- a/modules/shared_options.py +++ b/modules/shared_options.py @@ -201,6 +201,7 @@ options_templates.update(options_section(('sd3', "Stable Diffusion 3", "sd"), { options_templates.update(options_section(('zimage', "Z-Image", "sd"), { "zimage_prompt_expansion_max_tokens": OptionInfo(512, "Prompt expansion max tokens", gr.Slider, {"minimum": 128, "maximum": 1024, "step": 64}).info("maximum number of tokens to generate when expanding prompts"), "zimage_prompt_expansion_temperature": OptionInfo(0.7, "Prompt expansion temperature", gr.Slider, {"minimum": 0.1, "maximum": 1.5, "step": 0.1}).info("higher = more creative, lower = more focused"), + "zimage_shift": OptionInfo(0.0, "Time Shift (mu)", gr.Slider, {"minimum": 0.0, "maximum": 30.0, "step": 0.05}).info("0=auto (resolution-dependent, recommended); manual: 0.5-1.15 typical range; higher=more denoising in early steps"), "zimage_cfg_normalization": OptionInfo(0.0, "CFG Normalization", gr.Slider, {"minimum": 0.0, "maximum": 3.0, "step": 0.1}).info("rescale CFG result if norm exceeds this multiple of original; 0=disabled, 1.5-2.0 recommended; prevents over-saturation at high CFG"), "zimage_cfg_truncation": OptionInfo(1.0, "CFG Truncation", gr.Slider, {"minimum": 0.0, "maximum": 1.0, "step": 0.05}).info("disable CFG after this fraction of steps; 1.0=never truncate, 0.5=disable CFG for last 50% of steps; reduces artifacts"), }))