代码有参考,忘记链接了,找到后会贴出。
/一些说明/
U-Net连接部分选用双线性插值:
crop1 = F.interpolate(enc3, size=dec4.shape[2:], mode='bilinear', align_corners=True)
数据集选用CIFA10。
输入图像处理部分:transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))数值来源模型论文。
预处理模型选用vgg16。
loss函数选用MSE损失与感知损失。
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision import models
from torchvision.models import VGG16_Weights
from PIL import Image
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# U-Net结构
class UNet(nn.Module):
def __init__(self):
super(UNet, self).__init__()
# 编码器部分
self.encoder = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),
nn.ReLU()
)
# 解码器部分
self.deconv1 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1)
self.relu1 = nn.ReLU()
self.deconv2 = nn.ConvTranspose2d(512, 128, kernel_size=4, stride=2, padding=1) # 输入512(256+256)
self.relu2 = nn.ReLU()
self.deconv3 = nn.ConvTranspose2d(256, 64, kernel_size=4, stride=2, padding=1) # 输入256(128+128)
self.relu3 = nn.ReLU()
self.conv_final = nn.Conv2d(128, 3, kernel_size=3, stride=1, padding=1) # 输入128(64+64)
self.tanh = nn.Tanh()
def forward(self, x):
# 编码器前向传播
enc1 = self.encoder[0:2](x) # 输出通道64
enc2 = self.encoder[2:4](enc1) # 输出通道128
enc3 = self.encoder[4:6](enc2) # 输出通道256
enc4 = self.encoder[6:8](enc3) # 输出通道512
# 解码器部分
# 第一层反卷积并拼接enc3
dec4 = self.deconv1(enc4) # 输入512→256,输出尺寸8x8
dec4 = self.relu1(dec4)
# crop1 = self.crop_tensor(enc3, dec4)
crop1 = F.interpolate(enc3, size=dec4.shape[2:], mode='bilinear', align_corners=True)
dec4_up = torch.cat([dec4, crop1], dim=1) # 通道256+256=512
# 第二层反卷积并拼接enc2
dec3 = self.deconv2(dec4_up) # 输入512→128,输出尺寸16x16
dec3 = self.relu2(dec3)
# crop2 = self.crop_tensor(enc2, dec3)
crop2 = F.interpolate(enc2, size=dec3.shape[2:], mode='bilinear', align_corners=True)
dec3_up = torch.cat([dec3, crop2], dim=1) # 通道128+128=256
# 第三层反卷积并拼接enc1
dec2 = self.deconv3(dec3_up) # 输入256→64,输出尺寸32x32
dec2 = self.relu3(dec2)
# crop3 = self.crop_tensor(enc1, dec2)
crop3 = F.interpolate(enc1, size=dec2.shape[2:], mode='bilinear', align_corners=True)
dec2_up = torch.cat([dec2, crop3], dim=1) # 通道64+64=128
# 最终卷积层
out = self.conv_final(dec2_up)
out = self.tanh(out)
return out
# 扩散模型
class DiffusionModel(nn.Module):
def __init__(self, timesteps=1000):
super(DiffusionModel, self).__init__()
self.timesteps = timesteps
self.network = UNet()
self.network.to(device)
def forward(self, x):
return self.network(x)
def noise_schedule(self, t):
steps = torch.linspace(0, 1, self.timesteps)
alpha_t = 0.5 * (1 + torch.cos(steps * torch.pi))
return alpha_t[t]
def forward_diffusion_sample(self, x0, t):
noise = torch.randn_like(x0)
alpha = self.noise_schedule(t).view(-1, 1, 1, 1).to(x0.device)
return torch.sqrt(1 - alpha) * x0 + torch.sqrt(alpha) * noise#alpha * x0会广播
def reverse_diffusion_sample(self, xt, t):
return self.network(xt)
#定义LOSS 基于VGG
vgg = models.vgg16(weights=VGG16_Weights.DEFAULT).features.eval().to(device)
for param in vgg.parameters():
param.requires_grad = False
class PerceptualLoss(nn.Module):
def __init__(self, vgg):
super(PerceptualLoss, self).__init__()
self.vgg = vgg
def forward(self, x, y):
x_features = self.vgg(x)
y_features = self.vgg(y)
loss = F.mse_loss(x_features, y_features)
return loss
perceptual_loss_fn = PerceptualLoss(vgg)
class CustomLoss(nn.Module):
def __init__(self, perceptual_loss_fn):
super(CustomLoss, self).__init__()
self.perceptual_loss_fn = perceptual_loss_fn
def forward(self, x_reconstructed, x_real):
# 计算 MSE 损失
mse_loss = F.mse_loss(x_reconstructed, x_real)
# 计算感知损失
perceptual_loss = self.perceptual_loss_fn(x_reconstructed, x_real)
# 总损失 = MSE + 感知损失
total_loss = mse_loss + 0.1 * perceptual_loss
return total_loss
custom_loss_fn = CustomLoss(perceptual_loss_fn)
# 数据加载和训练循环
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
dataset = datasets.CIFAR10(root='./data_cifa', train=True, download=True, transform=transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True)
model = DiffusionModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
for epoch in range(20):
for x0, _ in dataloader:
x0.requires_grad = True
x0 = x0.to(device)
t = torch.randint(0, model.timesteps, (x0.size(0),), dtype=torch.long)
xt = model.forward_diffusion_sample(x0, t)
xt =xt.to(device)
x0_reconstructed = model.reverse_diffusion_sample(xt, t)
x0_reconstructed =x0_reconstructed.to(device)
loss = custom_loss_fn(x0_reconstructed, x0)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
torch.save(model.state_dict(), 'diffusion_model_cifa_epoch20.pth')
print("模型已保存到 diffusion_model_cifa_epoch20.pth")
/下载图片进行模型验证/
反标准化通道至【0-255】
def load_image(filepath, transform=None):
image = Image.open(filepath)
if transform:
image = transform(image)
return image
# 加载单张图片
image_path = '1.jpg' # 替换为你的图片路径
image = load_image(image_path, transform).unsqueeze(0) # 添加批次维度
image=image.to(device)
# 定义模型并加载权重
model = DiffusionModel()
model.load_state_dict(torch.load('diffusion_model_cifa_epoch10_IMPROVE.pth'))
model.eval() # 切换到评估模式
model.to(device)
def denormalize(tensor, mean, std):
"""
反标准化,将 [0, 1] 或 [-1, 1] 的像素值恢复到原始范围
"""
for t, m, s in zip(tensor, mean, std):
t.mul_(s).add_(m) # 反标准化公式: (tensor * std) + mean
return tensor
# 使用模型进行去噪操作
with torch.no_grad():
noisy_image = model.forward_diffusion_sample(image, 199) # 使用最大时间步添加噪声
reconstructed_image = model.reverse_diffusion_sample(noisy_image, 199)
# 将重建后的图像转换为 PIL 格式并保存
# 反标准化重建图像
reconstructed_image = reconstructed_image.squeeze(0).detach().cpu()
# 恢复到 [0, 1] 或 [0, 255] 范围
reconstructed_image = denormalize(reconstructed_image, torch.tensor([0.485, 0.456, 0.406]), torch.tensor([0.229, 0.224, 0.225]))
# 将图像转换回 [0, 255] 范围,并转换为 PIL 图像
reconstructed_image = torch.clamp(reconstructed_image, 0, 1) # 确保像素值在 [0, 1] 范围
reconstructed_image = transforms.ToPILImage()(reconstructed_image)
reconstructed_image.save('reconstructed_image.jpg')
print("重建后的图像已保存为 reconstructed_image.jpg")
# 可选:显示重建后的图像
reconstructed_image.show()
/验证效果/
原图像1如图:
结果1如图:
原图像2如图:
结果2如图:
原图像3如图:
结果3如图:
代码小白记录生活。请轻喷,有问题请指出,感谢您的观看。
1万+

被折叠的 条评论
为什么被折叠?



