Skip to content

Commit 99f6082

Browse files
authored
[sd3] make sure height and size are divisible by 16 (#9573)
* check size * up
1 parent 7f323f0 commit 99f6082

File tree

3 files changed

+33
-6
lines changed

3 files changed

+33
-6
lines changed

src/diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ def __init__(
251251
if hasattr(self, "transformer") and self.transformer is not None
252252
else 128
253253
)
254+
self.patch_size = (
255+
self.transformer.config.patch_size if hasattr(self, "transformer") and self.transformer is not None else 2
256+
)
254257

255258
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline._get_t5_prompt_embeds
256259
def _get_t5_prompt_embeds(
@@ -577,8 +580,14 @@ def check_inputs(
577580
callback_on_step_end_tensor_inputs=None,
578581
max_sequence_length=None,
579582
):
580-
if height % 8 != 0 or width % 8 != 0:
581-
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
583+
if (
584+
height % (self.vae_scale_factor * self.patch_size) != 0
585+
or width % (self.vae_scale_factor * self.patch_size) != 0
586+
):
587+
raise ValueError(
588+
f"`height` and `width` have to be divisible by {self.vae_scale_factor * self.patch_size} but are {height} and {width}."
589+
f"You can use height {height - height % (self.vae_scale_factor * self.patch_size)} and width {width - width % (self.vae_scale_factor * self.patch_size)}."
590+
)
582591

583592
if callback_on_step_end_tensor_inputs is not None and not all(
584593
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs

src/diffusers/pipelines/pag/pipeline_pag_sd_3.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,9 @@ def __init__(
212212
if hasattr(self, "transformer") and self.transformer is not None
213213
else 128
214214
)
215+
self.patch_size = (
216+
self.transformer.config.patch_size if hasattr(self, "transformer") and self.transformer is not None else 2
217+
)
215218

216219
self.set_pag_applied_layers(
217220
pag_applied_layers, pag_attn_processors=(PAGCFGJointAttnProcessor2_0(), PAGJointAttnProcessor2_0())
@@ -542,8 +545,14 @@ def check_inputs(
542545
callback_on_step_end_tensor_inputs=None,
543546
max_sequence_length=None,
544547
):
545-
if height % 8 != 0 or width % 8 != 0:
546-
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
548+
if (
549+
height % (self.vae_scale_factor * self.patch_size) != 0
550+
or width % (self.vae_scale_factor * self.patch_size) != 0
551+
):
552+
raise ValueError(
553+
f"`height` and `width` have to be divisible by {self.vae_scale_factor * self.patch_size} but are {height} and {width}."
554+
f"You can use height {height - height % (self.vae_scale_factor * self.patch_size)} and width {width - width % (self.vae_scale_factor * self.patch_size)}."
555+
)
547556

548557
if callback_on_step_end_tensor_inputs is not None and not all(
549558
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs

src/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ def __init__(
203203
if hasattr(self, "transformer") and self.transformer is not None
204204
else 128
205205
)
206+
self.patch_size = (
207+
self.transformer.config.patch_size if hasattr(self, "transformer") and self.transformer is not None else 2
208+
)
206209

207210
def _get_t5_prompt_embeds(
208211
self,
@@ -525,8 +528,14 @@ def check_inputs(
525528
callback_on_step_end_tensor_inputs=None,
526529
max_sequence_length=None,
527530
):
528-
if height % 8 != 0 or width % 8 != 0:
529-
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
531+
if (
532+
height % (self.vae_scale_factor * self.patch_size) != 0
533+
or width % (self.vae_scale_factor * self.patch_size) != 0
534+
):
535+
raise ValueError(
536+
f"`height` and `width` have to be divisible by {self.vae_scale_factor * self.patch_size} but are {height} and {width}."
537+
f"You can use height {height - height % (self.vae_scale_factor * self.patch_size)} and width {width - width % (self.vae_scale_factor * self.patch_size)}."
538+
)
530539

531540
if callback_on_step_end_tensor_inputs is not None and not all(
532541
k in self._callback_tensor_inputs for k in callback_on_step_end_tensor_inputs

0 commit comments

Comments
 (0)