在当今数字化时代,验证码作为一种重要的安全验证手段,广泛应用于各种网络场景。然而,传统的验证码识别方法往往效率低下,准确率不高。今天,我们将介绍一种基于 ResNet18 的验证码识别方法,它能够高效、准确地识别验证码,为网络安全提供有力保障。
深度学习技术在图像识别领域取得了巨大的成功,ResNet18 作为一种经典的深度神经网络架构,具有强大的特征提取能力和良好的泛化性能。我们利用 ResNet18 的这些优势,将其应用于验证码识别任务中,通过迁移学习的方法,快速训练出一个高效的验证码识别模型。
以下是实现 ResNet18 验证码识别的代码:
- import torch.nn as nn
- import torch.optim as optim
- from torch.utils.data import DataLoader, Dataset, random_split
- from torchvision import transforms, models
- import random
- import string
- from PIL import Image, ImageDraw, ImageFont
- import os
- import matplotlib.pyplot as plt
-
- # 检查 CUDA 是否可用
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- print(f'Using device: {device}')
-
- # 数据生成器,支持自定义字符集和验证码长度
- class CaptchaDataset(Dataset):
- def __init__(self, length=1000, charset=None, captcha_length=5, transform=None):
- self.length = length
- self.transform = transform
- self.charset = charset if charset is not None else string.ascii_letters + string.digits
- self.captcha_length = captcha_length
- self.num_classes = len(self.charset)
- self.font = ImageFont.truetype("arial.ttf", 40)
- self.image_size = (100, 40)
-
- def __len__(self):
- return self.length
-
- def __getitem__(self, idx):
- text = ''.join(random.choices(self.charset, k=self.captcha_length))
- image = Image.new('L', self.image_size, color=255)
- draw = ImageDraw.Draw(image)
- draw.text((10, 5), text, font=self.font, fill=0)
- if self.transform:
- image = self.transform(image)
- label = [self.charset.index(c) for c in text]
- return image, torch.tensor(label, dtype=torch.long)
-
- # 数据增强和预处理
- transform = transforms.Compose([
- transforms.Resize((40, 100)),
- transforms.RandomRotation(10),
- transforms.ColorJitter(brightness=0.5, contrast=0.5),
- transforms.ToTensor(),
- transforms.Normalize((0.5,), (0.5,))
- ])
-
- # 设置字符集和验证码长度
- charset = string.digits # 仅支持数字
- captcha_length = 4 # 验证码长度设置为 6 位
- dataset = CaptchaDataset(length=2000, charset=charset, captcha_length=captcha_length, transform=transform)
- train_size = int(0.8 * len(dataset))
- val_size = len(dataset) - train_size
- train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
-
- train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
- val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
-
- # 使用预训练 ResNet 模型,迁移学习
- class CaptchaModel(nn.Module):
- def __init__(self, num_classes, captcha_length):
- super(CaptchaModel, self).__init__()
- self.captcha_length = captcha_length
- self.resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
- self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
- num_ftrs = self.resnet.fc.in_features
- self.resnet.fc = nn.Linear(num_ftrs, num_classes * self.captcha_length) # 动态调整输出层大小
-
- def forward(self, x):
- x = self.resnet(x)
- return x.view(-1, self.captcha_length, num_classes)
-
- # 初始化模型,损失函数和优化器
- num_classes = len(charset)
- model = CaptchaModel(num_classes=num_classes, captcha_length=captcha_length).to(device)
- criterion = nn.CrossEntropyLoss()
- optimizer = optim.Adam(model.parameters(), lr=0.001)
-
- # 加载或保存训练检查点
- def save_checkpoint(state, filename="captcha_model_checkpoint.pth.tar"):
- print("=> Saving checkpoint")
- torch.save(state, filename)
-
- def load_checkpoint(filename="captcha_model_checkpoint.pth.tar"):
- print("=> Loading checkpoint")
- return torch.load(filename)
-
- # 支持多次训练,从检查点恢复训练
- def train_model(epochs, resume=False):
- start_epoch = 0
- if resume and os.path.isfile("captcha_model_checkpoint.pth.tar"):
- checkpoint = load_checkpoint()
- model.load_state_dict(checkpoint['state_dict'])
- optimizer.load_state_dict(checkpoint['optimizer'])
- start_epoch = checkpoint['epoch']
-
- scaler = torch.cuda.amp.GradScaler()
-
- for epoch in range(start_epoch, epochs):
- model.train()
- running_loss = 0.0
- for images, labels in train_loader:
- images, labels = images.to(device), labels.to(device)
-
- optimizer.zero_grad()
-
- with torch.cuda.amp.autocast():
- outputs = model(images)
- loss = sum(criterion(outputs[:, i, :], labels[:, i]) for i in range(captcha_length))
-
- scaler.scale(loss).backward()
- scaler.step(optimizer)
- scaler.update()
-
- running_loss += loss.item()
-
- # 计算验证集准确率
- val_accuracy = evaluate_accuracy(val_loader)
- print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Val Accuracy: {val_accuracy:.4f}')
-
- # 保存检查点
- save_checkpoint({
- 'epoch': epoch + 1,
- 'state_dict': model.state_dict(),
- 'optimizer': optimizer.state_dict(),
- })
-
- # 计算准确率
- def evaluate_accuracy(data_loader):
- model.eval()
- correct = 0
- total = 0
- with torch.no_grad():
- for images, labels in data_loader:
- images, labels = images.to(device), labels.to(device)
- outputs = model(images)
- predicted = torch.argmax(outputs, dim=2)
- total += labels.size(0) * captcha_length
- correct += (predicted == labels).sum().item()
- return correct / total
-
- # 可视化模型预测结果
- def visualize_predictions(num_samples=16):
- model.eval()
- samples, labels = next(iter(DataLoader(val_dataset, batch_size=num_samples, shuffle=True)))
- samples, labels = samples.to(device), labels.to(device)
-
- with torch.no_grad():
- outputs = model(samples)
- predicted = torch.argmax(outputs, dim=2)
-
- samples = samples.cpu()
- predicted = predicted.cpu()
- labels = labels.cpu()
-
- fig, axes = plt.subplots(4, 4, figsize=(10, 10))
- for i in range(16):
- ax = axes[i // 4, i % 4]
- ax.imshow(samples[i].squeeze(), cmap='gray')
- true_text = ''.join([dataset.charset[l] for l in labels[i]])
- pred_text = ''.join([dataset.charset[p] for p in predicted[i]])
- ax.set_title(f'True: {true_text}\nPred: {pred_text}')
- ax.axis('off')
- plt.show()
-
- # 训练模型
- train_model(epochs=20, resume=False)
-
- # 可视化模型预测结果
- visualize_predictions()
-
通过使用 ResNet18 进行验证码识别,我们取得了较好的效果。在未来的工作中,我们可以进一步优化模型架构和训练方法,提高模型的准确率和效率。同时,我们还可以将该方法应用于其他类型的验证码识别任务中,为网络安全提供更加全面的保障。
总之,ResNet18 为验证码识别提供了一种新的思路和方法,它具有强大的特征提取能力和良好的泛化性能,能够高效、准确地识别验证码。相信在未来的发展中,深度学习技术将在验证码识别领域发挥更加重要的作用。