更新接口

This commit is contained in:
2025-10-18 11:20:11 +08:00
parent 2a77e6ca8a
commit 09c3117f12
6 changed files with 1419 additions and 149 deletions

View File

@@ -0,0 +1,546 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from PIL import Image
import cv2
from torchvision import transforms
import os
import math
# 全局变量
lightcrnn_model = None
lightcrnn_decoder = None
lightcrnn_preprocessor = None
device = None
class DepthwiseSeparableConv(nn.Module):
"""深度可分离卷积"""
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
super(DepthwiseSeparableConv, self).__init__()
# 深度卷积
self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size,
stride=stride, padding=padding, groups=in_channels, bias=False)
# 逐点卷积
self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
self.bn = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU6(inplace=True)
def forward(self, x):
x = self.depthwise(x)
x = self.pointwise(x)
x = self.bn(x)
x = self.relu(x)
return x
class ChannelAttention(nn.Module):
"""通道注意力机制"""
def __init__(self, in_channels, reduction=16):
super(ChannelAttention, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.max_pool = nn.AdaptiveMaxPool2d(1)
self.fc = nn.Sequential(
nn.Conv2d(in_channels, in_channels // reduction, 1, bias=False),
nn.ReLU(inplace=True),
nn.Conv2d(in_channels // reduction, in_channels, 1, bias=False)
)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
avg_out = self.fc(self.avg_pool(x))
max_out = self.fc(self.max_pool(x))
out = avg_out + max_out
return x * self.sigmoid(out)
class InvertedResidual(nn.Module):
"""MobileNetV2的倒残差块"""
def __init__(self, in_channels, out_channels, stride=1, expand_ratio=6):
super(InvertedResidual, self).__init__()
self.stride = stride
self.use_residual = stride == 1 and in_channels == out_channels
hidden_dim = int(round(in_channels * expand_ratio))
layers = []
if expand_ratio != 1:
# 扩展层
layers.extend([
nn.Conv2d(in_channels, hidden_dim, 1, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True)
])
# 深度卷积
layers.extend([
nn.Conv2d(hidden_dim, hidden_dim, 3, stride=stride, padding=1, groups=hidden_dim, bias=False),
nn.BatchNorm2d(hidden_dim),
nn.ReLU6(inplace=True),
# 线性瓶颈
nn.Conv2d(hidden_dim, out_channels, 1, bias=False),
nn.BatchNorm2d(out_channels)
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_residual:
return x + self.conv(x)
else:
return self.conv(x)
class LightweightCNN(nn.Module):
"""增强版轻量化CNN特征提取器"""
def __init__(self, num_channels=3):
super(LightweightCNN, self).__init__()
# 初始卷积层 - 适当增加通道数
self.conv1 = nn.Sequential(
nn.Conv2d(num_channels, 48, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(48),
nn.ReLU6(inplace=True)
)
# 增强版MobileNet风格的特征提取
self.features = nn.Sequential(
# 第一组48 -> 32
InvertedResidual(48, 32, stride=1, expand_ratio=2),
InvertedResidual(32, 32, stride=1, expand_ratio=2), # 增加一层
nn.MaxPool2d(kernel_size=2, stride=2), # 32x128 -> 16x64
# 第二组32 -> 48
InvertedResidual(32, 48, stride=1, expand_ratio=4),
InvertedResidual(48, 48, stride=1, expand_ratio=4),
nn.MaxPool2d(kernel_size=2, stride=2), # 16x64 -> 8x32
# 第三组48 -> 64
InvertedResidual(48, 64, stride=1, expand_ratio=4),
InvertedResidual(64, 64, stride=1, expand_ratio=4),
# 第四组64 -> 96
InvertedResidual(64, 96, stride=1, expand_ratio=4),
InvertedResidual(96, 96, stride=1, expand_ratio=4),
nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)), # 8x32 -> 4x32
# 第五组96 -> 128
InvertedResidual(96, 128, stride=1, expand_ratio=4),
InvertedResidual(128, 128, stride=1, expand_ratio=4),
nn.MaxPool2d(kernel_size=(2, 1), stride=(2, 1)), # 4x32 -> 2x32
# 最后的卷积层 - 增加通道数
nn.Conv2d(128, 160, kernel_size=2, stride=1, padding=0, bias=False), # 2x32 -> 1x31
nn.BatchNorm2d(160),
nn.ReLU6(inplace=True)
)
# 通道注意力
self.channel_attention = ChannelAttention(160)
def forward(self, x):
x = self.conv1(x)
x = self.features(x)
x = self.channel_attention(x)
return x
class LightweightGRU(nn.Module):
"""增强版轻量化GRU层"""
def __init__(self, input_size, hidden_size, num_layers=2): # 默认增加到2层
super(LightweightGRU, self).__init__()
self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers,
bidirectional=True, batch_first=True, dropout=0.2 if num_layers > 1 else 0)
# 增加一个额外的线性层
self.linear1 = nn.Linear(hidden_size * 2, hidden_size * 2)
self.linear2 = nn.Linear(hidden_size * 2, hidden_size)
self.dropout = nn.Dropout(0.2) # 增加dropout率
self.norm = nn.LayerNorm(hidden_size) # 添加层归一化
def forward(self, x):
gru_out, _ = self.gru(x)
output = self.linear1(gru_out)
output = F.relu(output) # 添加激活函数
output = self.dropout(output)
output = self.linear2(output)
output = self.norm(output) # 应用层归一化
output = self.dropout(output)
return output
class LightweightCRNN(nn.Module):
"""增强版轻量化CRNN模型"""
def __init__(self, img_height, num_classes, num_channels=3, hidden_size=160): # 调整隐藏层大小
super(LightweightCRNN, self).__init__()
self.img_height = img_height
self.num_classes = num_classes
self.hidden_size = hidden_size
# 增强版轻量化CNN特征提取器
self.cnn = LightweightCNN(num_channels)
# 增强版轻量化RNN序列建模器
self.rnn = LightweightGRU(160, hidden_size, num_layers=2) # 使用更大的输入尺寸和2层GRU
# 输出层 - 添加额外的全连接层
self.fc = nn.Linear(hidden_size, hidden_size // 2)
self.dropout = nn.Dropout(0.2)
self.classifier = nn.Linear(hidden_size // 2, num_classes)
# 初始化权重
self._initialize_weights()
def _initialize_weights(self):
"""初始化模型权重"""
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def forward(self, input):
"""
input: [batch_size, channels, height, width]
output: [seq_len, batch_size, num_classes]
"""
# CNN特征提取
conv_features = self.cnn(input) # [batch_size, 160, 1, seq_len]
# 重塑为RNN输入格式
batch_size, channels, height, width = conv_features.size()
assert height == 1, f"Height should be 1, got {height}"
# [batch_size, 160, 1, seq_len] -> [batch_size, seq_len, 160]
conv_features = conv_features.squeeze(2) # [batch_size, 160, seq_len]
conv_features = conv_features.permute(0, 2, 1) # [batch_size, seq_len, 160]
# RNN序列建模
rnn_output = self.rnn(conv_features) # [batch_size, seq_len, hidden_size]
# 全连接层处理
fc_output = self.fc(rnn_output) # [batch_size, seq_len, hidden_size//2]
fc_output = F.relu(fc_output)
fc_output = self.dropout(fc_output)
# 分类
output = self.classifier(fc_output) # [batch_size, seq_len, num_classes]
# 转换为CTC期望的格式: [seq_len, batch_size, num_classes]
output = output.permute(1, 0, 2)
return output
class LightCTCDecoder:
"""轻量化CTC解码器"""
def __init__(self):
# 中国车牌字符集
# 省份简称
provinces = ['', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '']
# 字母包含I和O
letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
# 数字
digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
# 组合所有字符
self.character = provinces + letters + digits
# 添加空白字符用于CTC
self.character = ['[blank]'] + self.character
# 创建字符到索引的映射
self.dict = {char: i for i, char in enumerate(self.character)}
self.dict_reverse = {i: char for i, char in enumerate(self.character)}
self.num_classes = len(self.character)
self.blank_idx = 0
def decode_greedy(self, predictions):
"""贪婪解码"""
# 获取每个时间步的最大概率索引
indices = torch.argmax(predictions, dim=1)
# CTC解码移除重复字符和空白字符
decoded_chars = []
prev_idx = -1
for idx in indices:
idx = idx.item()
if idx != prev_idx and idx != self.blank_idx:
if idx < len(self.character):
decoded_chars.append(self.character[idx])
prev_idx = idx
return ''.join(decoded_chars)
def decode_with_confidence(self, predictions):
"""解码并返回置信度信息"""
# 应用softmax获得概率
probs = torch.softmax(predictions, dim=1)
# 贪婪解码
indices = torch.argmax(probs, dim=1)
max_probs = torch.max(probs, dim=1)[0]
# CTC解码
decoded_chars = []
char_confidences = []
prev_idx = -1
for i, idx in enumerate(indices):
idx = idx.item()
confidence = max_probs[i].item()
if idx != prev_idx and idx != self.blank_idx:
if idx < len(self.character):
decoded_chars.append(self.character[idx])
char_confidences.append(confidence)
prev_idx = idx
text = ''.join(decoded_chars)
avg_confidence = np.mean(char_confidences) if char_confidences else 0.0
return text, avg_confidence, char_confidences
class LightLicensePlatePreprocessor:
"""轻量化车牌图像预处理器"""
def __init__(self, target_height=32, target_width=128):
self.target_height = target_height
self.target_width = target_width
# 定义图像变换
self.transform = transforms.Compose([
transforms.Resize((target_height, target_width)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
def preprocess_numpy_array(self, image_array):
"""预处理numpy数组格式的图像"""
try:
# 确保图像是RGB格式
if len(image_array.shape) == 3 and image_array.shape[2] == 3:
# 如果是BGR格式转换为RGB
if image_array.dtype == np.uint8:
image_array = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
# 转换为PIL图像
if image_array.dtype != np.uint8:
image_array = (image_array * 255).astype(np.uint8)
image = Image.fromarray(image_array)
# 应用变换
tensor = self.transform(image)
# 添加batch维度
tensor = tensor.unsqueeze(0)
return tensor
except Exception as e:
print(f"图像预处理失败: {e}")
return None
def LPRNinitialize_model():
"""
初始化轻量化CRNN模型
返回:
bool: 初始化是否成功
"""
global lightcrnn_model, lightcrnn_decoder, lightcrnn_preprocessor, device
try:
# 设置设备
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"LightCRNN使用设备: {device}")
# 初始化组件
lightcrnn_decoder = LightCTCDecoder()
lightcrnn_preprocessor = LightLicensePlatePreprocessor(target_height=32, target_width=128)
# 创建模型实例
lightcrnn_model = LightweightCRNN(
img_height=32,
num_classes=lightcrnn_decoder.num_classes,
hidden_size=160
)
# 加载模型权重
model_path = os.path.join(os.path.dirname(__file__), 'best_model.pth')
if not os.path.exists(model_path):
raise FileNotFoundError(f"模型文件不存在: {model_path}")
print(f"正在加载LightCRNN模型: {model_path}")
# 加载检查点,处理可能的模块依赖问题
try:
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
except (ModuleNotFoundError, AttributeError) as e:
if 'config' in str(e) or 'Config' in str(e):
print("检测到模型文件包含config依赖尝试使用weights_only模式加载...")
try:
# 尝试使用weights_only=True来避免pickle问题
checkpoint = torch.load(model_path, map_location=device, weights_only=True)
except Exception:
# 如果还是失败创建一个更完整的mock config
import sys
import types
# 创建mock config模块
mock_config = types.ModuleType('config')
# 添加可能需要的Config类
class Config:
def __init__(self):
pass
mock_config.Config = Config
sys.modules['config'] = mock_config
try:
checkpoint = torch.load(model_path, map_location=device, weights_only=False)
finally:
# 清理临时模块
if 'config' in sys.modules:
del sys.modules['config']
else:
raise e
# 处理不同的模型保存格式
if isinstance(checkpoint, dict):
if 'model_state_dict' in checkpoint:
# 完整检查点格式
state_dict = checkpoint['model_state_dict']
print(f"检查点信息:")
print(f" - 训练轮次: {checkpoint.get('epoch', 'N/A')}")
print(f" - 最佳验证损失: {checkpoint.get('best_val_loss', 'N/A')}")
else:
# 精简模型格式(只包含权重)
print("加载精简模型(仅权重)")
state_dict = checkpoint
else:
# 直接是状态字典
state_dict = checkpoint
# 加载权重
lightcrnn_model.load_state_dict(state_dict)
lightcrnn_model.to(device)
lightcrnn_model.eval()
print("LightCRNN模型初始化完成")
# 统计模型参数
total_params = sum(p.numel() for p in lightcrnn_model.parameters())
print(f"LightCRNN模型参数数量: {total_params:,}")
return True
except Exception as e:
print(f"LightCRNN模型初始化失败: {e}")
import traceback
traceback.print_exc()
return False
def LPRNmodel_predict(image_array):
"""
轻量化CRNN车牌号识别接口函数
参数:
image_array: numpy数组格式的车牌图像已经过矫正处理
返回:
list: 包含最多8个字符的列表代表车牌号的每个字符
例如: ['', 'A', '1', '2', '3', '4', '5', ''] (蓝牌7位+占位符)
['', 'A', 'D', '1', '2', '3', '4', '5'] (绿牌8位)
"""
global lightcrnn_model, lightcrnn_decoder, lightcrnn_preprocessor, device
if lightcrnn_model is None or lightcrnn_decoder is None or lightcrnn_preprocessor is None:
print("LightCRNN模型未初始化请先调用LPRNinitialize_model()")
return ['', '', '', '0', '0', '0', '0', '0']
try:
# 预处理图像
input_tensor = lightcrnn_preprocessor.preprocess_numpy_array(image_array)
if input_tensor is None:
raise ValueError("图像预处理失败")
input_tensor = input_tensor.to(device)
# 模型推理
with torch.no_grad():
outputs = lightcrnn_model(input_tensor) # (seq_len, batch_size, num_classes)
# 移除batch维度
outputs = outputs.squeeze(1) # (seq_len, num_classes)
# CTC解码
predicted_text, confidence, char_confidences = lightcrnn_decoder.decode_with_confidence(outputs)
print(f"LightCRNN识别结果: {predicted_text}, 置信度: {confidence:.3f}")
# 将字符串转换为字符列表
char_list = list(predicted_text)
# 确保返回至少7个字符最多8个字符
if len(char_list) < 7:
# 如果识别结果少于7个字符用'0'补齐到7位
char_list.extend(['0'] * (7 - len(char_list)))
elif len(char_list) > 8:
# 如果识别结果多于8个字符截取前8个
char_list = char_list[:8]
# 如果是7位补齐到8位以保持接口一致性第8位用空字符或占位符
if len(char_list) == 7:
char_list.append('') # 添加空字符作为第8位占位符
return char_list
except Exception as e:
print(f"LightCRNN识别失败: {e}")
import traceback
traceback.print_exc()
return ['', '', '', '', '0', '0', '0', '0']
def create_lightweight_model(model_type='lightweight_crnn', img_height=32, num_classes=66, hidden_size=160):
"""创建增强版轻量化模型"""
if model_type == 'lightweight_crnn':
return LightweightCRNN(img_height, num_classes, hidden_size=hidden_size)
else:
raise ValueError(f"Unknown lightweight model type: {model_type}")
if __name__ == "__main__":
# 测试轻量化模型
print("测试LightCRNN模型...")
# 初始化模型
success = LPRNinitialize_model()
if success:
print("模型初始化成功")
# 创建测试输入
test_input = np.random.randint(0, 255, (32, 128, 3), dtype=np.uint8)
# 测试预测
result = LPRNmodel_predict(test_input)
print(f"测试预测结果: {result}")
else:
print("模型初始化失败")