1
+ import torch
2
+ from PIL import Image
3
+ from diffusers import (
4
+ AutoencoderKL ,
5
+ ControlNetModel ,
6
+ EulerDiscreteScheduler
7
+ )
8
+ from pipeline_controlnet import StableDiffusionControlNetPipeline
9
+ import argparse
10
+
11
+
12
+ # https://huggingface.co/spaces/AP123/IllusionDiffusion/blob/main/app.py
13
+ def center_crop_resize (img , output_size = (512 , 512 )):
14
+ width , height = img .size
15
+
16
+ # Calculate dimensions to crop to the center
17
+ new_dimension = min (width , height )
18
+ left = (width - new_dimension ) / 2
19
+ top = (height - new_dimension ) / 2
20
+ right = (width + new_dimension ) / 2
21
+ bottom = (height + new_dimension ) / 2
22
+
23
+ # Crop and resize
24
+ img = img .crop ((left , top , right , bottom ))
25
+ img = img .resize (output_size )
26
+
27
+ return img
28
+
29
+
30
+ if __name__ == '__main__' :
31
+ parser = argparse .ArgumentParser ()
32
+ parser .add_argument ('--prompt' , type = str , required = True )
33
+ parser .add_argument ('--controlnet_img' , type = str , default = "pano_pattern.png" )
34
+ parser .add_argument ('--negative_prompt' , type = str , default = 'low quality' )
35
+ # controls the fidelity to the controlnet signal. May have to be adjusted depending on the input
36
+ parser .add_argument ('--controlnet_scale' , type = float , default = 1.3 )
37
+ parser .add_argument ('--guidance_scale' , type = float , default = 7.5 )
38
+ parser .add_argument ('--H' , type = int , default = 512 )
39
+ parser .add_argument ('--W' , type = int , default = 1536 )
40
+ parser .add_argument ('--steps' , type = int , default = 30 )
41
+ parser .add_argument ('--seed' , type = int , default = - 1 )
42
+ parser .add_argument ('--stride' , type = int , default = 64 )
43
+ opt = parser .parse_args ()
44
+
45
+ h , w = opt .H , opt .W
46
+ h = h - h % opt .stride
47
+ w = w - w % opt .stride
48
+
49
+ vae = AutoencoderKL .from_pretrained ("stabilityai/sd-vae-ft-mse" )
50
+ controlnet = ControlNetModel .from_pretrained (
51
+ "monster-labs/control_v1p_sd15_qrcode_monster" )
52
+ pipe = StableDiffusionControlNetPipeline .from_pretrained (
53
+ "SG161222/Realistic_Vision_V5.1_noVAE" ,
54
+ controlnet = controlnet ,
55
+ vae = vae ,
56
+ safety_checker = None ,
57
+ ).to ("cuda" )
58
+
59
+ control_image = Image .open (opt .controlnet_img ).convert ("RGB" )
60
+ control_image = center_crop_resize (control_image , output_size = (w , h ))
61
+ pipe .scheduler = EulerDiscreteScheduler .from_config (pipe .scheduler .config )
62
+
63
+ out = pipe (
64
+ prompt = opt .prompt ,
65
+ negative_prompt = opt .negative_prompt ,
66
+ image = control_image ,
67
+ guidance_scale = opt .guidance_scale ,
68
+ controlnet_conditioning_scale = opt .controlnet_scale ,
69
+ generator = torch .manual_seed (opt .seed ) if opt .seed != - 1 else torch .Generator (),
70
+ num_inference_steps = opt .steps ,
71
+ height = h ,
72
+ width = w ,
73
+ stride = opt .stride // 8
74
+ ).images [0 ]
75
+ out .save ("out.png" )
0 commit comments