stable diffusion文生图代码解读

来自于diffusers的文档,更好的说明了文生图pipeline是怎么执行的。

from PIL import Image
import torch
from transformers import CLIPTextModel,CLIPTokenizer
from diffusers import AutoencoderKL,UNet2DConditionModel,UniPCMultistepScheduler

MODEL_PATH = "~/.cache/modelscope/hub/AI-ModelScope/stable-diffusion-v1-5"
#解码latent
vae = AutoencoderKL.from_pretrained(MODEL_PATH,subfolder='vae',use_safetensors=True)
#编码prompt
tokenizer = CLIPTokenizer.from_pretrained(MODEL_PATH,subfolder='tokenizer')
text_encoder = CLIPTextModel.from_pretrained(MODEL_PATH,subfolder="text_encoder",use_safetensors=True)
#预测noise
unet = UNet2DConditionModel.from_pretrained(MODEL_PATH,subfolder="unet",use_safetensors=True)
#去噪,生成图片
scheduler = UniPCMultistepScheduler.from_pretrained(MODEL_PATH,subfolder="scheduler")
#使用cuda
torch_device = "cuda"
vae.to(torch_device)
text_encoder.to(torch_device)
unet.to(torch_device)

prompt = "a photograph of an astronaut riding a horse"
negative_prompt = "lowres, bad anatomy, worst quality, low quality"
height = width = 512
num_inferince_steps = 25
guidance_scale = 7.5
generator = torch.Generator("cuda").manual_seed(42)
batch_size = 1
#生成text embeddings,包含正向prompt和负向prompt
text_input = tokenizer([prompt,negative_prompt],padding="max_length",max_length=tokenizer.model_max_length,truncation=True,return_tensors='pt')
with torch.no_grad(

你可能感兴趣的:(Diffusers,stable,diffusion,人工智能,深度学习)