]+>")
extracted_loras = lora_pattern.findall(prompt)
prompt_without_loras = lora_pattern.sub("", prompt).strip()
# Clean up any double spaces left behind
prompt_without_loras = re.sub(r'\s+', ' ', prompt_without_loras).strip()
print("\n" + "#"*70, flush=True)
print(f"# {prompt_type.upper()} PROMPT EXPANSION REQUEST", flush=True)
print("#"*70, flush=True)
print(f" Timestamp: {time.strftime('%Y-%m-%d %H:%M:%S')}", flush=True)
print(f" LLM Model: {llm_model}", flush=True)
print(f" Has image context: {image is not None}", flush=True)
print(f" Has custom system prompt: {system_prompt is not None and len(system_prompt) > 0}", flush=True)
print(f" Has user input: {user_input is not None and len(user_input.strip()) > 0 if user_input else False}", flush=True)
if is_negative and positive_prompt:
print(f" Using positive prompt as context: Yes ({len(positive_prompt)} chars)", flush=True)
if extracted_loras:
print(f" Extracted LoRAs: {extracted_loras}", flush=True)
print(f" Prompt without LoRAs: \"{prompt_without_loras[:200]}{'...' if len(prompt_without_loras) > 200 else ''}\"", flush=True)
print(f" Original prompt ({len(prompt)} chars):", flush=True)
print(f" \"{prompt[:200]}{'...' if len(prompt) > 200 else ''}\"", flush=True)
print("#"*70, flush=True)
if not prompt_without_loras or prompt_without_loras.strip() == "":
gr.Warning(f"Please enter a {prompt_type} prompt to expand (not just LoRA tags).")
return prompt
# Determine model path
if llm_model and llm_model != "No LLM models found":
model_path = os.path.join("models", "LLM", llm_model)
else:
# Fallback to default model
model_path = "models/LLM/Qwen3-VL-8B-Caption-V4.5"
if not os.path.exists(model_path):
gr.Warning(f"LLM model not found at: {model_path}")
print(f"ERROR: Model path does not exist: {model_path}", flush=True)
return prompt
# Extract PIL image if provided (from ForgeCanvas or gr.Image)
pil_image = None
if image is not None:
if hasattr(image, 'convert'):
pil_image = image
elif isinstance(image, dict) and 'image' in image:
pil_image = image.get('image')
elif isinstance(image, dict) and 'background' in image:
pil_image = image.get('background')
if pil_image is not None:
gr.Info(f"Expanding {prompt_type} prompt with image context... This may take a moment.")
else:
gr.Info(f"Expanding {prompt_type} prompt... This may take a moment.")
# Use the standalone expansion function (with LoRAs removed)
expanded = expand_prompt_standalone(
prompt=prompt_without_loras,
model_path=model_path,
system_prompt=system_prompt,
image=pil_image,
is_negative=is_negative,
positive_prompt=positive_prompt.strip() if positive_prompt else None,
user_input=user_input.strip() if user_input else None
)
total_time = time.time() - start_time
if expanded and expanded != prompt_without_loras:
# Append extracted LoRAs to the end of the expanded prompt
if extracted_loras:
loras_string = " ".join(extracted_loras)
expanded = f"{expanded} {loras_string}"
print(f" Re-appended LoRAs: {loras_string}", flush=True)
gr.Info(f"{prompt_type.capitalize()} prompt expanded successfully! ({total_time:.1f}s)")
print("\n" + "#"*70, flush=True)
print(f"# {prompt_type.upper()} PROMPT EXPANSION SUCCESS", flush=True)
print("#"*70, flush=True)
print(f" Total request time: {total_time:.2f}s", flush=True)
if extracted_loras:
print(f" LoRAs preserved: {extracted_loras}", flush=True)
print("#"*70 + "\n", flush=True)
return expanded
else:
gr.Warning("Prompt expansion returned empty result, keeping original.")
return prompt
except Exception as e:
import traceback
total_time = time.time() - start_time
print("\n" + "!"*70, flush=True)
print(f"! {prompt_type.upper()} PROMPT EXPANSION FAILED", flush=True)
print("!"*70, flush=True)
print(f" Error: {str(e)}", flush=True)
print(f" Time elapsed: {total_time:.2f}s", flush=True)
print("!"*70, flush=True)
traceback.print_exc()
gr.Warning(f"Error during prompt expansion: {str(e)}")
return prompt
# Global cache for LLM model to avoid reloading
_expansion_model_cache = {
'model': None,
'processor': None,
'model_path': None,
'model_type': None # 'vl' for standard VL, 'vl_moe' for VL+MoE
}
def expand_prompt_standalone(prompt: str, model_path: str, system_prompt: str = None, image=None, is_negative: bool = False, positive_prompt: str = None, user_input: str = None):
"""
Standalone prompt expansion using Qwen3-VL models (standard VL or VL+MoE).
Args:
prompt: The user's input prompt to expand
model_path: Path to the LLM model
system_prompt: System prompt to use (with {prompt} placeholder)
image: Optional PIL Image for context
is_negative: Whether this is a negative prompt expansion
positive_prompt: The positive prompt (used as context for negative expansion)
user_input: Optional user instruction to append as separate user message
Returns:
Expanded prompt string
"""
import torch
import gc
import time
import json
from modules.shared import opts
global _expansion_model_cache
# Track total time
total_start_time = time.time()
def log_step(message, start_time=None):
"""Helper to log with timestamp and optional elapsed time."""
timestamp = time.strftime("%H:%M:%S")
if start_time is not None:
elapsed = time.time() - start_time
print(f"[{timestamp}] {message} ({elapsed:.2f}s)", flush=True)
else:
print(f"[{timestamp}] {message}", flush=True)
def get_gpu_memory():
"""Get GPU memory usage if available."""
try:
if torch.cuda.is_available():
allocated = torch.cuda.memory_allocated() / 1024**3
reserved = torch.cuda.memory_reserved() / 1024**3
return f"GPU Memory: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved"
except:
pass
return None
def detect_model_type(model_path):
"""Detect whether model is VL, VL+MoE, or text-only from config."""
config_path = os.path.join(model_path, "config.json")
if os.path.exists(config_path):
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
architectures = config.get("architectures", [])
model_type = config.get("model_type", "")
# Check for VL+MoE models (e.g., Qwen3VLMoeForConditionalGeneration)
if any("Moe" in arch or "MoE" in arch for arch in architectures):
if "vl" in model_type.lower() or any("VL" in arch for arch in architectures):
return "vl_moe"
# Check for VL models (must have "vl" in model_type or architecture)
if "vl" in model_type.lower() or any("VL" in arch for arch in architectures):
return "vl"
# Check for text-only causal LM models (Qwen3, Llama, etc.)
# These don't have VL in their architecture and are ForCausalLM
if any("CausalLM" in arch or "ForCausalLM" in arch for arch in architectures):
return "text"
if model_type.lower() in ["qwen3", "qwen2", "llama", "mistral", "gemma", "phi"]:
return "text"
# Default to VL for img2img support
return "vl"
except Exception as e:
log_step(f" Warning: Could not read config.json: {e}")
return "vl"
prompt_type = "NEGATIVE" if is_negative else "POSITIVE"
print("\n" + "="*70, flush=True)
log_step(f"{prompt_type} PROMPT EXPANSION PIPELINE STARTED")
print("="*70, flush=True)
# Get settings
max_new_tokens = getattr(opts, 'zimage_prompt_expansion_max_tokens', 512)
temperature = getattr(opts, 'zimage_prompt_expansion_temperature', 0.7)
log_step(f"Settings: max_tokens={max_new_tokens}, temperature={temperature}")
# Load model if not cached or different model requested
if (_expansion_model_cache['model'] is None or
_expansion_model_cache['model_path'] != model_path):
# Clear old model if exists
if _expansion_model_cache['model'] is not None:
log_step("Unloading previous LLM model...")
unload_start = time.time()
del _expansion_model_cache['model']
del _expansion_model_cache['processor']
_expansion_model_cache['model'] = None
_expansion_model_cache['processor'] = None
_expansion_model_cache['model_type'] = None
gc.collect()
torch.cuda.empty_cache()
log_step("Previous model unloaded", unload_start)
# Unload diffusion models from VRAM to make room for LLM
log_step("Unloading diffusion models from VRAM to make room for LLM...")
unload_diffusion_start = time.time()
try:
from backend import memory_management
memory_management.unload_all_models()
memory_management.soft_empty_cache(force=True)
gc.collect()
torch.cuda.empty_cache()
gpu_mem = get_gpu_memory()
if gpu_mem:
log_step(f" After diffusion unload: {gpu_mem}")
log_step("Diffusion models unloaded", unload_diffusion_start)
except Exception as e:
log_step(f" Warning: Could not unload diffusion models: {e}")
log_step(f"Loading LLM model: {model_path}")
load_start = time.time()
# Detect model type
detected_type = detect_model_type(model_path)
log_step(f" Detected model type: {detected_type}")
try:
log_step(" Loading model weights (this may take a moment)...")
model_start = time.time()
# Get the device the main program is using to avoid loading on wrong GPU
from backend import memory_management
main_device = memory_management.get_torch_device()
device_index = main_device.index if hasattr(main_device, 'index') and main_device.index is not None else 0
log_step(f" Target device: cuda:{device_index}")
# Calculate available VRAM for LLM (leave some headroom for diffusion model)
total_vram = torch.cuda.get_device_properties(device_index).total_memory / 1024**3
# Use 90% of total VRAM, let accelerate handle the split with CPU
max_gpu_memory = f"{int(total_vram * 0.9)}GiB"
max_memory = {device_index: max_gpu_memory, "cpu": "32GiB"}
log_step(f" GPU {device_index} has {total_vram:.1f}GB, allowing up to {max_gpu_memory}")
if detected_type == "text":
# Text-only causal LM model (e.g., Qwen3-4B-Instruct, Llama, etc.)
from transformers import AutoTokenizer, AutoModelForCausalLM
log_step(" Loading tokenizer for text-only model...")
processor_start = time.time()
_expansion_model_cache['processor'] = AutoTokenizer.from_pretrained(model_path)
log_step(" Tokenizer loaded", processor_start)
log_step(" Using AutoModelForCausalLM")
_expansion_model_cache['model'] = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
max_memory=max_memory,
)
elif detected_type == "vl_moe":
# VL+MoE model
from transformers import AutoProcessor
log_step(" Loading processor...")
processor_start = time.time()
_expansion_model_cache['processor'] = AutoProcessor.from_pretrained(model_path)
log_step(" Processor loaded", processor_start)
try:
from transformers import Qwen3VLMoeForConditionalGeneration
log_step(" Using Qwen3VLMoeForConditionalGeneration")
_expansion_model_cache['model'] = Qwen3VLMoeForConditionalGeneration.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
max_memory=max_memory,
)
except ImportError:
# Fall back to AutoModelForVision2Seq if specific class not available
log_step(" Qwen3VLMoeForConditionalGeneration not available, using AutoModelForVision2Seq")
from transformers import AutoModelForVision2Seq
_expansion_model_cache['model'] = AutoModelForVision2Seq.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
max_memory=max_memory,
trust_remote_code=True,
)
else:
# Standard VL model
from transformers import AutoProcessor
log_step(" Loading processor...")
processor_start = time.time()
_expansion_model_cache['processor'] = AutoProcessor.from_pretrained(model_path)
log_step(" Processor loaded", processor_start)
from transformers import Qwen3VLForConditionalGeneration
log_step(" Using Qwen3VLForConditionalGeneration")
_expansion_model_cache['model'] = Qwen3VLForConditionalGeneration.from_pretrained(
model_path,
torch_dtype=torch.bfloat16,
device_map="auto",
max_memory=max_memory,
)
_expansion_model_cache['model_path'] = model_path
_expansion_model_cache['model_type'] = detected_type
log_step(" Model weights loaded", model_start)
gpu_mem = get_gpu_memory()
if gpu_mem:
log_step(f" {gpu_mem}")
log_step("LLM model ready", load_start)
except Exception as e:
raise RuntimeError(f"Failed to load LLM model: {e}")
else:
log_step("Using cached LLM model")
processor = _expansion_model_cache['processor']
model = _expansion_model_cache['model']
# Build the messages with proper Qwen chat format (system + user roles)
log_step("Preparing prompt...")
prep_start = time.time()
# For negative prompt expansion, inject positive prompt context if available
effective_prompt = prompt
if is_negative and positive_prompt:
log_step(f" Injecting positive prompt context for negative expansion")
# Prepend context about what the image will contain
effective_prompt = f"The image being generated is: \"{positive_prompt}\"\n\nNow expand this negative prompt to exclude unwanted elements: {prompt}"
# Build user message content - this is what the user wants to expand
# Combine the effective prompt with any additional user instructions
user_message_parts = [effective_prompt]
if user_input and user_input.strip():
log_step(f" Adding user instructions: {len(user_input)} chars")
user_message_parts.append(f"\n\nAdditional instructions: {user_input}")
user_message_text = "".join(user_message_parts)
log_step(f" System prompt: {len(system_prompt) if system_prompt else 0} chars")
log_step(f" User prompt: {len(prompt)} chars")
log_step(f" User input: {len(user_input) if user_input else 0} chars")
if is_negative and positive_prompt:
log_step(f" Positive prompt context: {len(positive_prompt)} chars")
log_step(f" Total user message: {len(user_message_text)} chars")
# Build messages list with proper chat format
# Use system role for system prompt, user role for the actual expansion request
messages = []
detected_type = _expansion_model_cache.get('model_type', 'vl')
# Add system message if system prompt is provided
if system_prompt and system_prompt.strip():
messages.append({"role": "system", "content": system_prompt.strip()})
# Build user message content based on model type and whether image is provided
if detected_type == "text":
# Text-only models use simple string content
if image is not None:
log_step(" Warning: Image provided but text-only model cannot use it")
messages.append({"role": "user", "content": user_message_text})
else:
# VL models use structured content with type/text/image
if image is not None:
log_step(" Including image context")
user_content = [
{"type": "image", "image": image},
{"type": "text", "text": user_message_text}
]
else:
user_content = [{"type": "text", "text": user_message_text}]
messages.append({"role": "user", "content": user_content})
# Apply chat template
log_step("Applying chat template...")
template_start = time.time()
text_input = processor.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
log_step("Chat template applied", template_start)
# Process/tokenize inputs
log_step("Tokenizing inputs...")
tokenize_start = time.time()
if detected_type == "text":
# Text-only models: tokenizer returns input_ids directly
inputs = processor(
text_input,
padding=True,
return_tensors="pt",
)
elif image is not None:
# VL models with image
inputs = processor(
text=[text_input],
images=[image],
padding=True,
return_tensors="pt",
)
else:
# VL models without image
inputs = processor(
text=[text_input],
padding=True,
return_tensors="pt",
)
# Move to device
device = next(model.parameters()).device
inputs = {k: v.to(device) for k, v in inputs.items()}
input_token_count = inputs['input_ids'].shape[1]
log_step(f"Tokenization complete: {input_token_count} input tokens", tokenize_start)
# Generate expanded prompt
print("-"*70, flush=True)
log_step(f"GENERATING (max {max_new_tokens} tokens)...")
print("-"*70, flush=True)
generate_start = time.time()
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=0.9,
top_k=50,
repetition_penalty=1.1,
)
generate_time = time.time() - generate_start
# Decode the generated text
log_step("Decoding output...")
decode_start = time.time()
input_len = inputs['input_ids'].shape[1]
generated_ids = outputs[0][input_len:]
output_token_count = len(generated_ids)
raw_output = processor.decode(generated_ids, skip_special_tokens=True)
log_step(f"Decoding complete", decode_start)
# Calculate generation stats
tokens_per_second = output_token_count / generate_time if generate_time > 0 else 0
print("-"*70, flush=True)
log_step("GENERATION COMPLETE")
print("-"*70, flush=True)
log_step(f" Output tokens: {output_token_count}")
log_step(f" Generation time: {generate_time:.2f}s")
log_step(f" Speed: {tokens_per_second:.2f} tokens/sec")
# Print raw output to console
print("\n" + "="*70, flush=True)
print("RAW LLM OUTPUT:", flush=True)
print("="*70, flush=True)
print(raw_output, flush=True)
print("="*70, flush=True)
# Clean up output - remove thinking tags if present
expanded_prompt = raw_output
if "" in expanded_prompt:
log_step("Removing tags from output...")
expanded_prompt = expanded_prompt.split("")[-1].strip()
print("\n" + "="*70, flush=True)
print("FINAL EXPANDED PROMPT:", flush=True)
print("="*70, flush=True)
print(expanded_prompt, flush=True)
print("="*70 + "\n", flush=True)
# Unload model to free VRAM
log_step("Unloading LLM model to free VRAM...")
unload_start = time.time()
gpu_before = get_gpu_memory()
if gpu_before:
log_step(f" Before unload: {gpu_before}")
# Properly unload Hugging Face model with device_map
if _expansion_model_cache['model'] is not None:
try:
# Move model to CPU first to release GPU memory
_expansion_model_cache['model'].to('cpu')
except:
pass
# Clear any internal hooks from accelerate
try:
from accelerate.hooks import remove_hook_from_submodules
remove_hook_from_submodules(_expansion_model_cache['model'])
except:
pass
del _expansion_model_cache['model']
_expansion_model_cache['model'] = None
if _expansion_model_cache['processor'] is not None:
del _expansion_model_cache['processor']
_expansion_model_cache['processor'] = None
_expansion_model_cache['model_path'] = None
# Force garbage collection multiple times for thorough cleanup
gc.collect()
gc.collect()
torch.cuda.empty_cache()
torch.cuda.synchronize()
gpu_after = get_gpu_memory()
if gpu_after:
log_step(f" After unload: {gpu_after}")
log_step("LLM model unloaded", unload_start)
# Final summary
total_time = time.time() - total_start_time
print("\n" + "="*70, flush=True)
log_step("PROMPT EXPANSION PIPELINE COMPLETE")
print("="*70, flush=True)
log_step(f" Total time: {total_time:.2f}s")
log_step(f" Input: {len(prompt)} chars -> Output: {len(expanded_prompt)} chars")
log_step(f" Expansion ratio: {len(expanded_prompt)/len(prompt):.1f}x")
print("="*70 + "\n", flush=True)
return expanded_prompt.strip()
def connect_clear_prompt(button):
"""Given clear button, prompt, and token_counter objects, setup clear prompt button click event"""
button.click(
_js="clear_prompt",
fn=None,
inputs=[],
outputs=[],
)
def update_token_counter(text, steps, styles, *, is_positive=True):
params = script_callbacks.BeforeTokenCounterParams(text, steps, styles, is_positive=is_positive)
script_callbacks.before_token_counter_callback(params)
text = params.prompt
steps = params.steps
styles = params.styles
is_positive = params.is_positive
if shared.opts.include_styles_into_token_counters:
apply_styles = shared.prompt_styles.apply_styles_to_prompt if is_positive else shared.prompt_styles.apply_negative_styles_to_prompt
text = apply_styles(text, styles)
else:
text = comments.strip_comments(text).strip()
try:
text, _ = extra_networks.parse_prompt(text)
if is_positive:
_, prompt_flat_list, _ = prompt_parser.get_multicond_prompt_list([text])
else:
prompt_flat_list = [text]
prompt_schedules = prompt_parser.get_learned_conditioning_prompt_schedules(prompt_flat_list, steps)
except Exception:
# a parsing error can happen here during typing, and we don't want to bother the user with
# messages related to it in console
prompt_schedules = [[[steps, text]]]
try:
get_prompt_lengths_on_ui = sd_models.model_data.sd_model.get_prompt_lengths_on_ui
assert get_prompt_lengths_on_ui is not None
except Exception:
return f"?/?"
flat_prompts = reduce(lambda list1, list2: list1+list2, prompt_schedules)
prompts = [prompt_text for step, prompt_text in flat_prompts]
token_count, max_length = max([get_prompt_lengths_on_ui(prompt) for prompt in prompts], key=lambda args: args[0])
return f"{token_count}/{max_length}"
def update_negative_prompt_token_counter(*args):
return update_token_counter(*args, is_positive=False)
def setup_progressbar(*args, **kwargs):
pass
def apply_setting(key, value):
if value is None:
return gr.update()
if shared.cmd_opts.freeze_settings:
return gr.update()
# dont allow model to be swapped when model hash exists in prompt
if key == "sd_model_checkpoint" and opts.disable_weights_auto_swap:
return gr.update()
if key == "sd_model_checkpoint":
ckpt_info = sd_models.get_closet_checkpoint_match(value)
if ckpt_info is not None:
value = ckpt_info.title
else:
return gr.update()
comp_args = opts.data_labels[key].component_args
if comp_args and isinstance(comp_args, dict) and comp_args.get('visible') is False:
return
valtype = type(opts.data_labels[key].default)
oldval = opts.data.get(key, None)
opts.data[key] = valtype(value) if valtype != type(None) else value
if oldval != value and opts.data_labels[key].onchange is not None:
opts.data_labels[key].onchange()
opts.save(shared.config_filename)
return getattr(opts, key)
def create_output_panel(tabname, outdir, toprow=None):
return ui_common.create_output_panel(tabname, outdir, toprow)
def ordered_ui_categories():
user_order = {x.strip(): i * 2 + 1 for i, x in enumerate(shared.opts.ui_reorder_list)}
for _, category in sorted(enumerate(shared_items.ui_reorder_categories()), key=lambda x: user_order.get(x[1], x[0] * 2 + 0)):
yield category
def create_override_settings_dropdown(tabname, row):
dropdown = gr.Dropdown([], label="Override settings", visible=False, elem_id=f"{tabname}_override_settings", multiselect=True)
dropdown.change(
fn=lambda x: gr.Dropdown.update(visible=bool(x)),
inputs=[dropdown],
outputs=[dropdown],
)
return dropdown
def create_ui():
import modules.img2img
import modules.txt2img
reload_javascript()
parameters_copypaste.reset()
settings = ui_settings.UiSettings()
settings.register_settings()
scripts.scripts_current = scripts.scripts_txt2img
scripts.scripts_txt2img.initialize_scripts(is_img2img=False)
with gr.Blocks(analytics_enabled=False, head=canvas_head) as txt2img_interface:
toprow = ui_toprow.Toprow(is_img2img=False, is_compact=shared.opts.compact_prompt_box)
dummy_component = gr.Textbox(visible=False)
dummy_component_number = gr.Number(visible=False)
extra_tabs = gr.Tabs(elem_id="txt2img_extra_tabs", elem_classes=["extra-networks"])
extra_tabs.__enter__()
with gr.Tab("Generation", id="txt2img_generation") as txt2img_generation_tab, ResizeHandleRow(equal_height=False):
with ExitStack() as stack:
if shared.opts.txt2img_settings_accordion:
stack.enter_context(gr.Accordion("Open for Settings", open=False))
stack.enter_context(gr.Column(variant='compact', elem_id="txt2img_settings"))
scripts.scripts_txt2img.prepare_ui()
for category in ordered_ui_categories():
if category == "prompt":
toprow.create_inline_toprow_prompts()
elif category == "dimensions":
with FormRow():
with gr.Column(elem_id="txt2img_column_size", scale=4):
width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="txt2img_width")
height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="txt2img_height")
with gr.Column(elem_id="txt2img_dimensions_row", scale=1, elem_classes="dimensions-tools"):
res_switch_btn = ToolButton(value=switch_values_symbol, elem_id="txt2img_res_switch_btn", tooltip="Switch width/height")
if opts.dimensions_and_batch_together:
with gr.Column(elem_id="txt2img_column_batch"):
batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
elif category == "cfg":
with gr.Row():
distilled_cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Distilled CFG Scale', value=3.5, elem_id="txt2img_distilled_cfg_scale")
cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.1, label='CFG Scale', value=7.0, elem_id="txt2img_cfg_scale")
zimage_shift = gr.Slider(minimum=0.0, maximum=30.0, step=0.05, label='Z-Image Shift', value=0.0, elem_id="txt2img_zimage_shift")
cfg_scale.change(lambda x: gr.update(interactive=(x != 1)), inputs=[cfg_scale], outputs=[toprow.negative_prompt], queue=False, show_progress=False)
elif category == "checkboxes":
with FormRow(elem_classes="checkboxes-row", variant="compact"):
pass
elif category == "accordions":
with gr.Row(elem_id="txt2img_accordions", elem_classes="accordions"):
with InputAccordion(False, label="Hires. fix", elem_id="txt2img_hr") as enable_hr:
with enable_hr.extra():
hr_final_resolution = FormHTML(value="", elem_id="txtimg_hr_finalres", label="Upscaled resolution")
with FormRow(elem_id="txt2img_hires_fix_row1", variant="compact"):
hr_upscaler = gr.Dropdown(label="Upscaler", elem_id="txt2img_hr_upscaler", choices=[*shared.latent_upscale_modes, *[x.name for x in shared.sd_upscalers]], value=shared.latent_upscale_default_mode)
hr_second_pass_steps = gr.Slider(minimum=0, maximum=150, step=1, label='Hires steps', value=0, elem_id="txt2img_hires_steps")
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.7, elem_id="txt2img_denoising_strength")
with FormRow(elem_id="txt2img_hires_fix_row2", variant="compact"):
hr_scale = gr.Slider(minimum=1.0, maximum=4.0, step=0.05, label="Upscale by", value=2.0, elem_id="txt2img_hr_scale")
hr_resize_x = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize width to", value=0, elem_id="txt2img_hr_resize_x")
hr_resize_y = gr.Slider(minimum=0, maximum=2048, step=8, label="Resize height to", value=0, elem_id="txt2img_hr_resize_y")
with FormRow(elem_id="txt2img_hires_fix_row_cfg", variant="compact"):
hr_distilled_cfg = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label="Hires Distilled CFG Scale", value=3.5, elem_id="txt2img_hr_distilled_cfg")
hr_cfg = gr.Slider(minimum=1.0, maximum=30.0, step=0.1, label="Hires CFG Scale", value=7.0, elem_id="txt2img_hr_cfg")
with FormRow(elem_id="txt2img_hires_fix_row3", variant="compact", visible=shared.opts.hires_fix_show_sampler) as hr_checkpoint_container:
hr_checkpoint_name = gr.Dropdown(label='Hires Checkpoint', elem_id="hr_checkpoint", choices=["Use same checkpoint"] + modules.sd_models.checkpoint_tiles(use_short=True), value="Use same checkpoint", scale=2)
hr_checkpoint_refresh = ToolButton(value=refresh_symbol)
def get_additional_modules():
modules_list = ['Use same choices']
if main_entry.module_list == {}:
_, modules = main_entry.refresh_models()
modules_list += list(modules)
else:
modules_list += list(main_entry.module_list.keys())
return modules_list
modules_list = get_additional_modules()
def refresh_model_and_modules():
modules_list = get_additional_modules()
return gr.update(choices=["Use same checkpoint"] + modules.sd_models.checkpoint_tiles(use_short=True)), gr.update(choices=modules_list)
hr_additional_modules = gr.Dropdown(label='Hires VAE / Text Encoder', elem_id="hr_vae_te", choices=modules_list, value=["Use same choices"], multiselect=True, scale=3)
hr_checkpoint_refresh.click(fn=refresh_model_and_modules, outputs=[hr_checkpoint_name, hr_additional_modules], show_progress=False)
with FormRow(elem_id="txt2img_hires_fix_row3b", variant="compact", visible=shared.opts.hires_fix_show_sampler) as hr_sampler_container:
hr_sampler_name = gr.Dropdown(label='Hires sampling method', elem_id="hr_sampler", choices=["Use same sampler"] + sd_samplers.visible_sampler_names(), value="Use same sampler")
hr_scheduler = gr.Dropdown(label='Hires schedule type', elem_id="hr_scheduler", choices=["Use same scheduler"] + [x.label for x in sd_schedulers.schedulers], value="Use same scheduler")
with FormRow(elem_id="txt2img_hires_fix_row4", variant="compact", visible=shared.opts.hires_fix_show_prompts) as hr_prompts_container:
with gr.Column():
hr_prompt = gr.Textbox(label="Hires prompt", elem_id="hires_prompt", show_label=False, lines=3, placeholder="Prompt for hires fix pass.\nLeave empty to use the same prompt as in first pass.", elem_classes=["prompt"])
with gr.Column():
hr_negative_prompt = gr.Textbox(label="Hires negative prompt", elem_id="hires_neg_prompt", show_label=False, lines=3, placeholder="Negative prompt for hires fix pass.\nLeave empty to use the same negative prompt as in first pass.", elem_classes=["prompt"])
hr_cfg.change(lambda x: gr.update(interactive=(x != 1)), inputs=[hr_cfg], outputs=[hr_negative_prompt], queue=False, show_progress=False)
scripts.scripts_txt2img.setup_ui_for_section(category)
elif category == "batch":
if not opts.dimensions_and_batch_together:
with FormRow(elem_id="txt2img_column_batch"):
batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="txt2img_batch_count")
batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="txt2img_batch_size")
elif category == "override_settings":
with FormRow(elem_id="txt2img_override_settings_row") as row:
override_settings = create_override_settings_dropdown('txt2img', row)
elif category == "scripts":
with FormGroup(elem_id="txt2img_script_container"):
custom_inputs = scripts.scripts_txt2img.setup_ui()
if category not in {"accordions"}:
scripts.scripts_txt2img.setup_ui_for_section(category)
hr_resolution_preview_inputs = [enable_hr, width, height, hr_scale, hr_resize_x, hr_resize_y]
for component in hr_resolution_preview_inputs:
event = component.release if isinstance(component, gr.Slider) else component.change
event(
fn=calc_resolution_hires,
inputs=hr_resolution_preview_inputs,
outputs=[hr_final_resolution],
show_progress=False,
)
event(
None,
_js="onCalcResolutionHires",
inputs=hr_resolution_preview_inputs,
outputs=[],
show_progress=False,
)
output_panel = create_output_panel("txt2img", opts.outdir_txt2img_samples, toprow)
txt2img_inputs = [
dummy_component,
toprow.prompt,
toprow.negative_prompt,
toprow.ui_styles.dropdown,
batch_count,
batch_size,
cfg_scale,
distilled_cfg_scale,
zimage_shift,
height,
width,
enable_hr,
denoising_strength,
hr_scale,
hr_upscaler,
hr_second_pass_steps,
hr_resize_x,
hr_resize_y,
hr_checkpoint_name,
hr_additional_modules,
hr_sampler_name,
hr_scheduler,
hr_prompt,
hr_negative_prompt,
hr_cfg,
hr_distilled_cfg,
override_settings,
] + custom_inputs
txt2img_outputs = [
output_panel.gallery,
output_panel.generation_info,
output_panel.infotext,
output_panel.html_log,
]
txt2img_args = dict(
fn=wrap_gradio_gpu_call(modules.txt2img.txt2img, extra_outputs=[None, '', '']),
_js="submit",
inputs=txt2img_inputs,
outputs=txt2img_outputs,
show_progress=False,
)
toprow.prompt.submit(**txt2img_args)
toprow.submit.click(**txt2img_args)
def select_gallery_image(index):
index = int(index)
if getattr(shared.opts, 'hires_button_gallery_insert', False):
index += 1
return gr.update(selected_index=index)
txt2img_upscale_inputs = txt2img_inputs[0:1] + [output_panel.gallery, dummy_component_number, output_panel.generation_info] + txt2img_inputs[1:]
output_panel.button_upscale.click(
fn=wrap_gradio_gpu_call(modules.txt2img.txt2img_upscale, extra_outputs=[None, '', '']),
_js="submit_txt2img_upscale",
inputs=txt2img_upscale_inputs,
outputs=txt2img_outputs,
show_progress=False,
).then(fn=select_gallery_image, js="selected_gallery_index", inputs=[dummy_component], outputs=[output_panel.gallery])
res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False)
toprow.restore_progress_button.click(
fn=progress.restore_progress,
_js="restoreProgressTxt2img",
inputs=[dummy_component],
outputs=[
output_panel.gallery,
output_panel.generation_info,
output_panel.infotext,
output_panel.html_log,
],
show_progress=False,
)
txt2img_paste_fields = [
PasteField(toprow.prompt, "Prompt", api="prompt"),
PasteField(toprow.negative_prompt, "Negative prompt", api="negative_prompt"),
PasteField(cfg_scale, "CFG scale", api="cfg_scale"),
PasteField(distilled_cfg_scale, "Distilled CFG Scale", api="distilled_cfg_scale"),
PasteField(width, "Size-1", api="width"),
PasteField(height, "Size-2", api="height"),
PasteField(batch_size, "Batch size", api="batch_size"),
PasteField(toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update(), api="styles"),
PasteField(denoising_strength, "Denoising strength", api="denoising_strength"),
PasteField(enable_hr, lambda d: "Denoising strength" in d and ("Hires upscale" in d or "Hires upscaler" in d or "Hires resize-1" in d), api="enable_hr"),
PasteField(hr_scale, "Hires upscale", api="hr_scale"),
PasteField(hr_upscaler, "Hires upscaler", api="hr_upscaler"),
PasteField(hr_second_pass_steps, "Hires steps", api="hr_second_pass_steps"),
PasteField(hr_resize_x, "Hires resize-1", api="hr_resize_x"),
PasteField(hr_resize_y, "Hires resize-2", api="hr_resize_y"),
PasteField(hr_checkpoint_name, "Hires checkpoint", api="hr_checkpoint_name"),
PasteField(hr_additional_modules, "Hires VAE/TE", api="hr_additional_modules"),
PasteField(hr_sampler_name, sd_samplers.get_hr_sampler_from_infotext, api="hr_sampler_name"),
PasteField(hr_scheduler, sd_samplers.get_hr_scheduler_from_infotext, api="hr_scheduler"),
PasteField(hr_sampler_container, lambda d: gr.update(visible=True) if d.get("Hires sampler", "Use same sampler") != "Use same sampler" or d.get("Hires checkpoint", "Use same checkpoint") != "Use same checkpoint" or d.get("Hires schedule type", "Use same scheduler") != "Use same scheduler" else gr.update()),
PasteField(hr_prompt, "Hires prompt", api="hr_prompt"),
PasteField(hr_negative_prompt, "Hires negative prompt", api="hr_negative_prompt"),
PasteField(hr_cfg, "Hires CFG Scale", api="hr_cfg"),
PasteField(hr_distilled_cfg, "Hires Distilled CFG Scale", api="hr_distilled_cfg"),
PasteField(hr_prompts_container, lambda d: gr.update(visible=True) if d.get("Hires prompt", "") != "" or d.get("Hires negative prompt", "") != "" else gr.update()),
*scripts.scripts_txt2img.infotext_fields
]
parameters_copypaste.add_paste_fields("txt2img", None, txt2img_paste_fields, override_settings)
parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(
paste_button=toprow.paste, tabname="txt2img", source_text_component=toprow.prompt, source_image_component=None,
))
steps = scripts.scripts_txt2img.script('Sampler').steps
toprow.ui_styles.dropdown.change(fn=wrap_queued_call(update_token_counter), inputs=[toprow.prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.token_counter])
toprow.ui_styles.dropdown.change(fn=wrap_queued_call(update_negative_prompt_token_counter), inputs=[toprow.negative_prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.negative_token_counter])
toprow.token_button.click(fn=wrap_queued_call(update_token_counter), inputs=[toprow.prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.token_counter])
toprow.negative_token_button.click(fn=wrap_queued_call(update_negative_prompt_token_counter), inputs=[toprow.negative_prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.negative_token_counter])
# Connect expand prompt buttons for Prompt Expansion accordion
# Expand Positive Prompt button
toprow.expand_positive_button.click(
fn=expand_prompt_with_llm,
inputs=[
toprow.prompt,
gr.State(None), # No image for txt2img positive
toprow.llm_model_dropdown,
toprow.positive_system_prompt,
gr.State(False), # is_negative=False
gr.State(None), # positive_prompt (not needed for positive expansion)
toprow.user_prompt_input # User input to append with <|user|> tag
],
outputs=[toprow.prompt],
show_progress=True,
)
# Expand Negative Prompt button - uses positive prompt as context
toprow.expand_negative_button.click(
fn=expand_prompt_with_llm,
inputs=[
toprow.negative_prompt,
gr.State(None), # No image for txt2img negative
toprow.llm_model_dropdown,
toprow.negative_system_prompt,
gr.State(True), # is_negative=True
toprow.prompt, # Pass positive prompt as context
toprow.user_prompt_input # User input to append with <|user|> tag
],
outputs=[toprow.negative_prompt],
show_progress=True,
)
# Legacy expand prompt button (hidden, but kept for backward compatibility)
toprow.expand_prompt_button.click(
fn=expand_prompt_with_llm,
inputs=[toprow.prompt],
outputs=[toprow.prompt],
show_progress=True,
)
extra_networks_ui = ui_extra_networks.create_ui(txt2img_interface, [txt2img_generation_tab], 'txt2img')
ui_extra_networks.setup_ui(extra_networks_ui, output_panel.gallery)
extra_tabs.__exit__()
scripts.scripts_current = scripts.scripts_img2img
scripts.scripts_img2img.initialize_scripts(is_img2img=True)
with gr.Blocks(analytics_enabled=False, head=canvas_head) as img2img_interface:
toprow = ui_toprow.Toprow(is_img2img=True, is_compact=shared.opts.compact_prompt_box)
extra_tabs = gr.Tabs(elem_id="img2img_extra_tabs", elem_classes=["extra-networks"])
extra_tabs.__enter__()
with gr.Tab("Generation", id="img2img_generation") as img2img_generation_tab, ResizeHandleRow(equal_height=False):
with ExitStack() as stack:
if shared.opts.img2img_settings_accordion:
stack.enter_context(gr.Accordion("Open for Settings", open=False))
stack.enter_context(gr.Column(variant='compact', elem_id="img2img_settings"))
copy_image_buttons = []
copy_image_destinations = {}
def add_copy_image_controls(tab_name, elem):
with gr.Row(variant="compact", elem_id=f"img2img_copy_to_{tab_name}"):
for title, name in zip(['to img2img', 'to sketch', 'to inpaint', 'to inpaint sketch'], ['img2img', 'sketch', 'inpaint', 'inpaint_sketch']):
if name == tab_name:
gr.Button(title, interactive=False)
copy_image_destinations[name] = elem
continue
button = gr.Button(title)
copy_image_buttons.append((button, name, elem))
scripts.scripts_img2img.prepare_ui()
for category in ordered_ui_categories():
if category == "prompt":
toprow.create_inline_toprow_prompts()
if category == "image":
with gr.Tabs(elem_id="mode_img2img"):
img2img_selected_tab = gr.Number(value=0, visible=False)
with gr.TabItem('img2img', id='img2img', elem_id="img2img_img2img_tab") as tab_img2img:
init_img = ForgeCanvas(elem_id="img2img_image", height=512, no_scribbles=True)
add_copy_image_controls('img2img', init_img)
with gr.TabItem('Sketch', id='img2img_sketch', elem_id="img2img_img2img_sketch_tab") as tab_sketch:
sketch = ForgeCanvas(elem_id="img2img_sketch", height=512, scribble_color=opts.img2img_sketch_default_brush_color)
add_copy_image_controls('sketch', sketch)
with gr.TabItem('Inpaint', id='inpaint', elem_id="img2img_inpaint_tab") as tab_inpaint:
init_img_with_mask = ForgeCanvas(elem_id="img2maskimg", height=512, contrast_scribbles=opts.img2img_inpaint_mask_high_contrast, scribble_color=opts.img2img_inpaint_mask_brush_color, scribble_color_fixed=True, scribble_alpha=opts.img2img_inpaint_mask_scribble_alpha, scribble_alpha_fixed=True, scribble_softness_fixed=True)
add_copy_image_controls('inpaint', init_img_with_mask)
with gr.TabItem('Inpaint sketch', id='inpaint_sketch', elem_id="img2img_inpaint_sketch_tab") as tab_inpaint_color:
inpaint_color_sketch = ForgeCanvas(elem_id="inpaint_sketch", height=512, scribble_color=opts.img2img_inpaint_sketch_default_brush_color)
add_copy_image_controls('inpaint_sketch', inpaint_color_sketch)
with gr.TabItem('Inpaint upload', id='inpaint_upload', elem_id="img2img_inpaint_upload_tab") as tab_inpaint_upload:
init_img_inpaint = gr.Image(label="Image for img2img", show_label=False, source="upload", interactive=True, type="pil", elem_id="img_inpaint_base")
init_mask_inpaint = gr.Image(label="Mask", source="upload", interactive=True, type="pil", image_mode="RGBA", elem_id="img_inpaint_mask")
with gr.TabItem('Batch', id='batch', elem_id="img2img_batch_tab") as tab_batch:
with gr.Tabs(elem_id="img2img_batch_source"):
img2img_batch_source_type = gr.Textbox(visible=False, value="upload")
with gr.TabItem('Upload', id='batch_upload', elem_id="img2img_batch_upload_tab") as tab_batch_upload:
img2img_batch_upload = gr.Files(label="Files", interactive=True, elem_id="img2img_batch_upload")
with gr.TabItem('From directory', id='batch_from_dir', elem_id="img2img_batch_from_dir_tab") as tab_batch_from_dir:
hidden = '
Disabled when launched with --hide-ui-dir-config.' if shared.cmd_opts.hide_ui_dir_config else ''
gr.HTML(
"Process images in a directory on the same machine where the server is running." +
"
Use an empty output directory to save pictures normally instead of writing to the output directory." +
f"
Add inpaint batch mask directory to enable inpaint batch processing."
f"{hidden}
"
)
img2img_batch_input_dir = gr.Textbox(label="Input directory", **shared.hide_dirs, elem_id="img2img_batch_input_dir")
img2img_batch_output_dir = gr.Textbox(label="Output directory", **shared.hide_dirs, elem_id="img2img_batch_output_dir")
img2img_batch_inpaint_mask_dir = gr.Textbox(label="Inpaint batch mask directory (required for inpaint batch processing only)", **shared.hide_dirs, elem_id="img2img_batch_inpaint_mask_dir")
tab_batch_upload.select(fn=lambda: "upload", inputs=[], outputs=[img2img_batch_source_type])
tab_batch_from_dir.select(fn=lambda: "from dir", inputs=[], outputs=[img2img_batch_source_type])
with gr.Accordion("PNG info", open=False):
img2img_batch_use_png_info = gr.Checkbox(label="Append png info to prompts", elem_id="img2img_batch_use_png_info")
img2img_batch_png_info_dir = gr.Textbox(label="PNG info directory", **shared.hide_dirs, placeholder="Leave empty to use input directory", elem_id="img2img_batch_png_info_dir")
img2img_batch_png_info_props = gr.CheckboxGroup(["Prompt", "Negative prompt", "Seed", "CFG scale", "Sampler", "Steps", "Model hash"], label="Parameters to take from png info", info="Prompts from png info will be appended to prompts set in ui.")
img2img_tabs = [tab_img2img, tab_sketch, tab_inpaint, tab_inpaint_color, tab_inpaint_upload, tab_batch]
for i, tab in enumerate(img2img_tabs):
tab.select(fn=lambda tabnum=i: tabnum, inputs=[], outputs=[img2img_selected_tab])
def copyCanvas_img2img (background, foreground, source):
if source == 1 or source == 3: # 1 is sketch, 3 is Inpaint sketch
bg = Image.alpha_composite(background, foreground)
return bg, None
return background, None
for button, name, elem in copy_image_buttons:
button.click(
fn=copyCanvas_img2img,
inputs=[elem.background, elem.foreground, img2img_selected_tab],
outputs=[copy_image_destinations[name].background, copy_image_destinations[name].foreground],
)
button.click(
fn=None,
_js=f"switch_to_{name.replace(' ', '_')}",
inputs=[],
outputs=[],
)
with FormRow():
resize_mode = gr.Radio(label="Resize mode", elem_id="resize_mode", choices=["Just resize", "Crop and resize", "Resize and fill", "Just resize (latent upscale)"], type="index", value="Just resize")
elif category == "dimensions":
with FormRow():
with gr.Column(elem_id="img2img_column_size", scale=4):
selected_scale_tab = gr.Number(value=0, visible=False)
with gr.Tabs(elem_id="img2img_tabs_resize"):
with gr.Tab(label="Resize to", id="to", elem_id="img2img_tab_resize_to") as tab_scale_to:
with FormRow():
with gr.Column(elem_id="img2img_column_size", scale=4):
width = gr.Slider(minimum=64, maximum=2048, step=8, label="Width", value=512, elem_id="img2img_width")
height = gr.Slider(minimum=64, maximum=2048, step=8, label="Height", value=512, elem_id="img2img_height")
with gr.Column(elem_id="img2img_dimensions_row", scale=1, elem_classes="dimensions-tools"):
res_switch_btn = ToolButton(value=switch_values_symbol, elem_id="img2img_res_switch_btn", tooltip="Switch width/height")
detect_image_size_btn = ToolButton(value=detect_image_size_symbol, elem_id="img2img_detect_image_size_btn", tooltip="Auto detect size from img2img")
with gr.Tab(label="Resize by", id="by", elem_id="img2img_tab_resize_by") as tab_scale_by:
scale_by = gr.Slider(minimum=0.05, maximum=4.0, step=0.01, label="Scale", value=1.0, elem_id="img2img_scale")
with FormRow():
scale_by_html = FormHTML(resize_from_to_html(0, 0, 0.0), elem_id="img2img_scale_resolution_preview")
gr.Slider(label="Unused", elem_id="img2img_unused_scale_by_slider")
button_update_resize_to = gr.Button(visible=False, elem_id="img2img_update_resize_to")
on_change_args = dict(
fn=resize_from_to_html,
_js="currentImg2imgSourceResolution",
inputs=[dummy_component, dummy_component, scale_by],
outputs=scale_by_html,
show_progress=False,
)
scale_by.change(**on_change_args)
button_update_resize_to.click(**on_change_args)
def updateWH (img, w, h):
if img and shared.opts.img2img_autosize == True:
return img.size[0], img.size[1]
else:
return w, h
img_sources = [init_img.background, sketch.background, init_img_with_mask.background, inpaint_color_sketch.background, init_img_inpaint]
for i in img_sources:
i.change(fn=updateWH, inputs=[i, width, height], outputs=[width, height], show_progress='hidden')
i.change(**on_change_args)
tab_scale_to.select(fn=lambda: 0, inputs=[], outputs=[selected_scale_tab])
tab_scale_by.select(fn=lambda: 1, inputs=[], outputs=[selected_scale_tab])
if opts.dimensions_and_batch_together:
with gr.Column(elem_id="img2img_column_batch"):
batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
elif category == "denoising":
denoising_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, label='Denoising strength', value=0.75, elem_id="img2img_denoising_strength")
elif category == "cfg":
with gr.Row():
distilled_cfg_scale = gr.Slider(minimum=0.0, maximum=30.0, step=0.1, label='Distilled CFG Scale', value=3.5, elem_id="img2img_distilled_cfg_scale")
cfg_scale = gr.Slider(minimum=1.0, maximum=30.0, step=0.1, label='CFG Scale', value=7.0, elem_id="img2img_cfg_scale")
zimage_shift = gr.Slider(minimum=0.0, maximum=30.0, step=0.05, label='Z-Image Shift', value=0.0, elem_id="img2img_zimage_shift")
image_cfg_scale = gr.Slider(minimum=0, maximum=3.0, step=0.05, label='Image CFG Scale', value=1.5, elem_id="img2img_image_cfg_scale", visible=False)
cfg_scale.change(lambda x: gr.update(interactive=(x != 1)), inputs=[cfg_scale], outputs=[toprow.negative_prompt], queue=False, show_progress=False)
elif category == "checkboxes":
with FormRow(elem_classes="checkboxes-row", variant="compact"):
pass
elif category == "accordions":
with gr.Row(elem_id="img2img_accordions", elem_classes="accordions"):
scripts.scripts_img2img.setup_ui_for_section(category)
elif category == "batch":
if not opts.dimensions_and_batch_together:
with FormRow(elem_id="img2img_column_batch"):
batch_count = gr.Slider(minimum=1, step=1, label='Batch count', value=1, elem_id="img2img_batch_count")
batch_size = gr.Slider(minimum=1, maximum=8, step=1, label='Batch size', value=1, elem_id="img2img_batch_size")
elif category == "override_settings":
with FormRow(elem_id="img2img_override_settings_row") as row:
override_settings = create_override_settings_dropdown('img2img', row)
elif category == "scripts":
with FormGroup(elem_id="img2img_script_container"):
custom_inputs = scripts.scripts_img2img.setup_ui()
elif category == "inpaint":
with FormGroup(elem_id="inpaint_controls", visible=False) as inpaint_controls:
with FormRow():
mask_blur = gr.Slider(label='Mask blur', minimum=0, maximum=64, step=1, value=4, elem_id="img2img_mask_blur")
mask_alpha = gr.Slider(label="Mask transparency", visible=False, elem_id="img2img_mask_alpha")
with FormRow():
inpainting_mask_invert = gr.Radio(label='Mask mode', choices=['Inpaint masked', 'Inpaint not masked'], value='Inpaint masked', type="index", elem_id="img2img_mask_mode")
with FormRow():
inpainting_fill = gr.Radio(label='Masked content', choices=['fill', 'original', 'latent noise', 'latent nothing'], value='original', type="index", elem_id="img2img_inpainting_fill")
with FormRow():
with gr.Column():
inpaint_full_res = gr.Radio(label="Inpaint area", choices=["Whole picture", "Only masked"], type="index", value="Whole picture", elem_id="img2img_inpaint_full_res")
with gr.Column(scale=4):
inpaint_full_res_padding = gr.Slider(label='Only masked padding, pixels', minimum=0, maximum=256, step=4, value=32, elem_id="img2img_inpaint_full_res_padding")
if category not in {"accordions"}:
scripts.scripts_img2img.setup_ui_for_section(category)
def select_img2img_tab(tab):
return gr.update(visible=tab in [2, 3, 4]), gr.update(visible=tab == 3),
for i, elem in enumerate(img2img_tabs):
elem.select(
fn=lambda tab=i: select_img2img_tab(tab),
inputs=[],
outputs=[inpaint_controls, mask_alpha],
)
output_panel = create_output_panel("img2img", opts.outdir_img2img_samples, toprow)
submit_img2img_inputs = [
dummy_component,
img2img_selected_tab,
toprow.prompt,
toprow.negative_prompt,
toprow.ui_styles.dropdown,
init_img.background,
sketch.background,
sketch.foreground,
init_img_with_mask.background,
init_img_with_mask.foreground,
inpaint_color_sketch.background,
inpaint_color_sketch.foreground,
init_img_inpaint,
init_mask_inpaint,
mask_blur,
mask_alpha,
inpainting_fill,
batch_count,
batch_size,
cfg_scale,
distilled_cfg_scale,
zimage_shift,
image_cfg_scale,
denoising_strength,
selected_scale_tab,
height,
width,
scale_by,
resize_mode,
inpaint_full_res,
inpaint_full_res_padding,
inpainting_mask_invert,
img2img_batch_input_dir,
img2img_batch_output_dir,
img2img_batch_inpaint_mask_dir,
override_settings,
img2img_batch_use_png_info,
img2img_batch_png_info_props,
img2img_batch_png_info_dir,
img2img_batch_source_type,
img2img_batch_upload,
] + custom_inputs
img2img_args = dict(
fn=wrap_gradio_gpu_call(modules.img2img.img2img, extra_outputs=[None, '', '']),
_js="submit_img2img",
inputs=submit_img2img_inputs,
outputs=[
output_panel.gallery,
output_panel.generation_info,
output_panel.infotext,
output_panel.html_log,
],
show_progress=False,
)
interrogate_args = dict(
_js="get_img2img_tab_index",
inputs=[
dummy_component,
img2img_batch_input_dir,
img2img_batch_output_dir,
init_img.background,
sketch.background,
init_img_with_mask.background,
inpaint_color_sketch.background,
init_img_inpaint,
],
outputs=[toprow.prompt, dummy_component],
)
toprow.prompt.submit(**img2img_args)
toprow.submit.click(**img2img_args)
res_switch_btn.click(lambda w, h: (h, w), inputs=[width, height], outputs=[width, height], show_progress=False)
detect_image_size_btn.click(
fn=lambda w, h: (w or gr.update(), h or gr.update()),
_js="currentImg2imgSourceResolution",
inputs=[dummy_component, dummy_component],
outputs=[width, height],
show_progress=False,
)
toprow.restore_progress_button.click(
fn=progress.restore_progress,
_js="restoreProgressImg2img",
inputs=[dummy_component],
outputs=[
output_panel.gallery,
output_panel.generation_info,
output_panel.infotext,
output_panel.html_log,
],
show_progress=False,
)
toprow.button_interrogate.click(
fn=lambda *args: process_interrogate(interrogate, *args),
**interrogate_args,
)
toprow.button_deepbooru.click(
fn=lambda *args: process_interrogate(interrogate_deepbooru, *args),
**interrogate_args,
)
steps = scripts.scripts_img2img.script('Sampler').steps
toprow.ui_styles.dropdown.change(fn=wrap_queued_call(update_token_counter), inputs=[toprow.prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.token_counter])
toprow.ui_styles.dropdown.change(fn=wrap_queued_call(update_negative_prompt_token_counter), inputs=[toprow.negative_prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.negative_token_counter])
toprow.token_button.click(fn=update_token_counter, inputs=[toprow.prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.token_counter])
toprow.negative_token_button.click(fn=wrap_queued_call(update_negative_prompt_token_counter), inputs=[toprow.negative_prompt, steps, toprow.ui_styles.dropdown], outputs=[toprow.negative_token_counter])
# Connect expand prompt buttons for Prompt Expansion accordion (img2img with image context)
# Expand Positive Prompt button - uses image from img2img
toprow.expand_positive_button.click(
fn=expand_prompt_with_llm,
inputs=[
toprow.prompt,
init_img.background, # Image context from img2img
toprow.llm_model_dropdown,
toprow.positive_system_prompt,
gr.State(False), # is_negative=False
gr.State(None), # positive_prompt (not needed for positive expansion)
toprow.user_prompt_input # User input to append with <|user|> tag
],
outputs=[toprow.prompt],
show_progress=True,
)
# Expand Negative Prompt button - uses image from img2img and positive prompt as context
toprow.expand_negative_button.click(
fn=expand_prompt_with_llm,
inputs=[
toprow.negative_prompt,
init_img.background, # Image context from img2img
toprow.llm_model_dropdown,
toprow.negative_system_prompt,
gr.State(True), # is_negative=True
toprow.prompt, # Pass positive prompt as context
toprow.user_prompt_input # User input to append with <|user|> tag
],
outputs=[toprow.negative_prompt],
show_progress=True,
)
# Legacy expand prompt button (hidden, but kept for backward compatibility)
toprow.expand_prompt_button.click(
fn=expand_prompt_with_llm,
inputs=[toprow.prompt, init_img.background],
outputs=[toprow.prompt],
show_progress=True,
)
img2img_paste_fields = [
(toprow.prompt, "Prompt"),
(toprow.negative_prompt, "Negative prompt"),
(cfg_scale, "CFG scale"),
(distilled_cfg_scale, "Distilled CFG Scale"),
(image_cfg_scale, "Image CFG scale"),
(width, "Size-1"),
(height, "Size-2"),
(batch_size, "Batch size"),
(toprow.ui_styles.dropdown, lambda d: d["Styles array"] if isinstance(d.get("Styles array"), list) else gr.update()),
(denoising_strength, "Denoising strength"),
(mask_blur, "Mask blur"),
(inpainting_mask_invert, 'Mask mode'),
(inpainting_fill, 'Masked content'),
(inpaint_full_res, 'Inpaint area'),
(inpaint_full_res_padding, 'Masked area padding'),
*scripts.scripts_img2img.infotext_fields
]
parameters_copypaste.add_paste_fields("img2img", init_img.background, img2img_paste_fields, override_settings)
parameters_copypaste.add_paste_fields("inpaint", init_img_with_mask.background, img2img_paste_fields, override_settings)
parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(
paste_button=toprow.paste, tabname="img2img", source_text_component=toprow.prompt, source_image_component=None,
))
extra_networks_ui_img2img = ui_extra_networks.create_ui(img2img_interface, [img2img_generation_tab], 'img2img')
ui_extra_networks.setup_ui(extra_networks_ui_img2img, output_panel.gallery)
extra_tabs.__exit__()
with gr.Blocks(analytics_enabled=False, head=canvas_head) as space_interface:
forge_space.main_entry()
scripts.scripts_current = None
with gr.Blocks(analytics_enabled=False) as extras_interface:
ui_postprocessing.create_ui()
with gr.Blocks(analytics_enabled=False) as pnginfo_interface:
with ResizeHandleRow(equal_height=False):
with gr.Column(variant='panel'):
image = gr.Image(elem_id="pnginfo_image", label="Source", source="upload", interactive=True, type="pil", height="50vh", image_mode="RGBA")
with gr.Column(variant='panel'):
html = gr.HTML()
generation_info = gr.Textbox(visible=False, elem_id="pnginfo_generation_info")
html2 = gr.HTML()
with gr.Row():
buttons = parameters_copypaste.create_buttons(["txt2img", "img2img", "inpaint", "extras"])
for tabname, button in buttons.items():
parameters_copypaste.register_paste_params_button(parameters_copypaste.ParamBinding(
paste_button=button, tabname=tabname, source_text_component=generation_info, source_image_component=image,
))
image.change(
fn=wrap_gradio_call_no_job(modules.extras.run_pnginfo),
inputs=[image],
outputs=[html, generation_info, html2],
)
modelmerger_ui = ui_checkpoint_merger.UiCheckpointMerger()
loadsave = ui_loadsave.UiLoadsave(cmd_opts.ui_config_file)
ui_settings_from_file = loadsave.ui_settings.copy()
settings.create_ui(loadsave, dummy_component)
interfaces = [
(txt2img_interface, "Txt2img", "txt2img"),
(img2img_interface, "Img2img", "img2img"),
(space_interface, "Spaces", "space"),
(extras_interface, "Extras", "extras"),
(pnginfo_interface, "PNG Info", "pnginfo"),
(modelmerger_ui.blocks, "Checkpoint Merger", "modelmerger"),
]
interfaces += script_callbacks.ui_tabs_callback()
interfaces += [(settings.interface, "Settings", "settings")]
extensions_interface = ui_extensions.create_ui()
interfaces += [(extensions_interface, "Extensions", "extensions")]
shared.tab_names = []
for _interface, label, _ifid in interfaces:
shared.tab_names.append(label)
with gr.Blocks(theme=shared.gradio_theme, analytics_enabled=False, title="Stable Diffusion", head=canvas_head) as demo:
quicksettings_row = settings.add_quicksettings()
parameters_copypaste.connect_paste_params_buttons()
with gr.Tabs(elem_id="tabs") as tabs:
tab_order = {k: i for i, k in enumerate(opts.ui_tab_order)}
sorted_interfaces = sorted(interfaces, key=lambda x: tab_order.get(x[1], 9999))
for interface, label, ifid in sorted_interfaces:
if label in shared.opts.hidden_tabs:
continue
with gr.TabItem(label, id=ifid, elem_id=f"tab_{ifid}"):
interface.render()
if ifid not in ["extensions", "settings"]:
loadsave.add_block(interface, ifid)
loadsave.add_component(f"webui/Tabs@{tabs.elem_id}", tabs)
loadsave.setup_ui()
def tab_changed(evt: gr.SelectData):
no_quick_setting = getattr(shared.opts, "tabs_without_quick_settings_bar", [])
return gr.update(visible=evt.value not in no_quick_setting)
tabs.select(tab_changed, outputs=[quicksettings_row], show_progress=False, queue=False)
if os.path.exists(os.path.join(script_path, "notification.mp3")) and shared.opts.notification_audio:
gr.Audio(interactive=False, value=os.path.join(script_path, "notification.mp3"), elem_id="audio_notification", visible=False)
footer = shared.html("footer.html")
footer = footer.format(versions=versions_html(), api_docs="/docs" if shared.cmd_opts.api else "https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API")
gr.HTML(footer, elem_id="footer")
settings.add_functionality(demo)
update_image_cfg_scale_visibility = lambda: gr.update(visible=False)
settings.text_settings.change(fn=update_image_cfg_scale_visibility, inputs=[], outputs=[image_cfg_scale])
demo.load(fn=update_image_cfg_scale_visibility, inputs=[], outputs=[image_cfg_scale])
# Load saved prompt expansion settings on page load
from modules.ui_toprow import get_all_prompt_components, refresh_all_prompts_on_load
prompt_components = get_all_prompt_components()
if prompt_components:
# Flatten the list of component tuples
all_outputs = []
for pos, neg, user in prompt_components:
all_outputs.extend([pos, neg, user])
if all_outputs:
demo.load(fn=refresh_all_prompts_on_load, inputs=[], outputs=all_outputs)
modelmerger_ui.setup_ui(dummy_component=dummy_component, sd_model_checkpoint_component=main_entry.ui_checkpoint)
main_entry.forge_main_entry()
if ui_settings_from_file != loadsave.ui_settings:
loadsave.dump_defaults()
demo.ui_loadsave = loadsave
return demo
def versions_html():
import torch
import launch
python_version = ".".join([str(x) for x in sys.version_info[0:3]])
commit = launch.commit_hash()
tag = launch.git_tag()
if shared.xformers_available:
import xformers
xformers_version = xformers.__version__
else:
xformers_version = "N/A"
return f"""
version: {tag}
•
python: {python_version}
•
torch: {getattr(torch, '__long_version__',torch.__version__)}
•
xformers: {xformers_version}
•
gradio: {gr.__version__}
•
checkpoint: N/A
"""
def setup_ui_api(app):
from pydantic import BaseModel, Field
class QuicksettingsHint(BaseModel):
name: str = Field(title="Name of the quicksettings field")
label: str = Field(title="Label of the quicksettings field")
def quicksettings_hint():
return [QuicksettingsHint(name=k, label=v.label) for k, v in opts.data_labels.items()]
app.add_api_route("/internal/quicksettings-hint", quicksettings_hint, methods=["GET"], response_model=list[QuicksettingsHint])
app.add_api_route("/internal/ping", lambda: {}, methods=["GET"])
app.add_api_route("/internal/profile-startup", lambda: timer.startup_record, methods=["GET"])
def download_sysinfo(attachment=False):
from fastapi.responses import PlainTextResponse
text = sysinfo.get()
filename = f"sysinfo-{datetime.datetime.now(datetime.timezone.utc).strftime('%Y-%m-%d-%H-%M')}.json"
return PlainTextResponse(text, headers={'Content-Disposition': f'{"attachment" if attachment else "inline"}; filename="{filename}"'})
app.add_api_route("/internal/sysinfo", download_sysinfo, methods=["GET"])
app.add_api_route("/internal/sysinfo-download", lambda: download_sysinfo(attachment=True), methods=["GET"])
import fastapi.staticfiles
app.mount("/webui-assets", fastapi.staticfiles.StaticFiles(directory=launch_utils.repo_dir('stable-diffusion-webui-assets')), name="webui-assets")