2025-09-16 11:15:22 +08:00

372 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 导入必要的库
import torch
import torch.nn as nn
import cv2
import numpy as np
import os
import sys
from torch.autograd import Variable
from PIL import Image
# 添加父目录到路径,以便导入模型和数据加载器
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# LPRNet字符集定义与训练时保持一致
# 包含中国省份简称、数字、字母和特殊字符
CHARS = ['', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z', 'I', 'O', '-']
# 创建字符到索引的映射字典
CHARS_DICT = {char: i for i, char in enumerate(CHARS)}
# 简化的LPRNet模型定义 - 基础卷积块
class small_basic_block(nn.Module):
def __init__(self, ch_in, ch_out):
super(small_basic_block, self).__init__()
# 定义一个小的基本卷积块,包含四个卷积层
self.block = nn.Sequential(
# 1x1卷积降低通道数
nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
nn.ReLU(),
# 3x1卷积处理水平特征
nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
nn.ReLU(),
# 1x3卷积处理垂直特征
nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
nn.ReLU(),
# 1x1卷积恢复通道数
nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
)
def forward(self, x):
return self.block(x)
# LPRNet模型定义 - 车牌识别网络
class LPRNet(nn.Module):
def __init__(self, lpr_max_len, phase, class_num, dropout_rate):
super(LPRNet, self).__init__()
self.phase = phase
self.lpr_max_len = lpr_max_len
self.class_num = class_num
# 定义主干网络
self.backbone = nn.Sequential(
# 初始卷积层
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
nn.BatchNorm2d(num_features=64),
nn.ReLU(), # 2
# 最大池化层
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
# 第一个基本块
small_basic_block(ch_in=64, ch_out=128), # *** 4 ***
nn.BatchNorm2d(num_features=128),
nn.ReLU(), # 6
# 第二个池化层
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
# 第二个基本块
small_basic_block(ch_in=64, ch_out=256), # 8
nn.BatchNorm2d(num_features=256),
nn.ReLU(), # 10
# 第三个基本块
small_basic_block(ch_in=256, ch_out=256), # *** 11 ***
nn.BatchNorm2d(num_features=256),
nn.ReLU(), # 13
# 第三个池化层
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)), # 14
# Dropout层防止过拟合
nn.Dropout(dropout_rate),
# 特征提取卷积层
nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1), # 16
nn.BatchNorm2d(num_features=256),
nn.ReLU(), # 18
# 第二个Dropout层
nn.Dropout(dropout_rate),
# 分类卷积层
nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
nn.BatchNorm2d(num_features=class_num),
nn.ReLU(), # 22
)
# 定义容器层,用于融合全局上下文信息
self.container = nn.Sequential(
nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1,1), stride=(1,1)),
)
def forward(self, x):
# 保存中间特征
keep_features = list()
for i, layer in enumerate(self.backbone.children()):
x = layer(x)
# 保存特定层的输出特征
if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]
keep_features.append(x)
# 处理全局上下文信息
global_context = list()
for i, f in enumerate(keep_features):
# 对不同层的特征进行不同尺度的平均池化
if i in [0, 1]:
f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
if i in [2]:
f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)
# 对特征进行归一化处理
f_pow = torch.pow(f, 2)
f_mean = torch.mean(f_pow)
f = torch.div(f, f_mean)
global_context.append(f)
# 拼接全局上下文特征
x = torch.cat(global_context, 1)
# 通过容器层处理
x = self.container(x)
# 对序列维度进行平均,得到最终输出
logits = torch.mean(x, dim=2)
return logits
# LPRNet推理类
class LPRNetInference:
def __init__(self, model_path=None, img_size=[94, 24], lpr_max_len=8, dropout_rate=0.5):
"""
初始化LPRNet推理类
Args:
model_path: 训练好的模型权重文件路径
img_size: 输入图像尺寸 [width, height]
lpr_max_len: 车牌最大长度
dropout_rate: dropout率
"""
self.img_size = img_size
self.lpr_max_len = lpr_max_len
# 检测是否有可用的CUDA设备
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 设置默认模型路径
if model_path is None:
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, 'LPRNet__iteration_74000.pth')
# 初始化模型
self.model = LPRNet(lpr_max_len=lpr_max_len, phase=False, class_num=len(CHARS), dropout_rate=dropout_rate)
# 加载模型权重
if model_path and os.path.exists(model_path):
print(f"Loading LPRNet model from {model_path}")
try:
self.model.load_state_dict(torch.load(model_path, map_location=self.device))
print("LPRNet模型权重加载成功")
except Exception as e:
print(f"Warning: 加载模型权重失败: {e}. 使用随机权重.")
else:
print(f"Warning: 模型文件不存在或未指定: {model_path}. 使用随机权重.")
# 将模型移动到指定设备并设置为评估模式
self.model.to(self.device)
self.model.eval()
print(f"LPRNet模型加载完成设备: {self.device}")
print(f"模型参数数量: {sum(p.numel() for p in self.model.parameters()):,}")
def preprocess_image(self, image_array):
"""
预处理图像数组 - 使用与训练时相同的预处理方式
Args:
image_array: numpy数组格式的图像 (H, W, C)
Returns:
preprocessed_image: 预处理后的图像tensor
"""
if image_array is None:
raise ValueError("Input image is None")
# 确保图像是numpy数组
if not isinstance(image_array, np.ndarray):
raise ValueError("Input must be numpy array")
# 检查图像维度
if len(image_array.shape) != 3:
raise ValueError(f"Expected 3D image array, got {len(image_array.shape)}D")
height, width, channels = image_array.shape
if channels != 3:
raise ValueError(f"Expected 3 channels, got {channels}")
# 调整图像尺寸到模型要求的尺寸
if height != self.img_size[1] or width != self.img_size[0]:
image_array = cv2.resize(image_array, tuple(self.img_size))
# 使用与训练时相同的预处理方式
# 归一化处理减去127.5并乘以0.0078125,将像素值从[0,255]映射到[-1,1]
image_array = image_array.astype('float32')
image_array -= 127.5
image_array *= 0.0078125
# 调整维度顺序从HWC到CHW
image_array = np.transpose(image_array, (2, 0, 1)) # HWC -> CHW
# 转换为tensor并添加batch维度
image_tensor = torch.from_numpy(image_array).unsqueeze(0)
return image_tensor
def decode_prediction(self, logits):
"""
解码模型预测结果 - 使用正确的CTC贪婪解码
Args:
logits: 模型输出的logits [batch_size, num_classes, sequence_length]
Returns:
predicted_text: 预测的车牌号码
"""
# 转换为numpy进行处理
prebs = logits.cpu().detach().numpy()
preb = prebs[0, :, :] # 取第一个batch [num_classes, sequence_length]
# 贪婪解码: 对每个时间步选择最大概率的字符
preb_label = []
for j in range(preb.shape[1]): # 遍历每个时间步
preb_label.append(np.argmax(preb[:, j], axis=0))
# CTC解码去除重复字符和空白字符
no_repeat_blank_label = []
pre_c = preb_label[0]
# 处理第一个字符
if pre_c != len(CHARS) - 1: # 不是空白字符
no_repeat_blank_label.append(pre_c)
# 处理后续字符
for c in preb_label:
if (pre_c == c) or (c == len(CHARS) - 1): # 重复字符或空白字符
if c == len(CHARS) - 1:
pre_c = c
continue
no_repeat_blank_label.append(c)
pre_c = c
# 转换为字符
decoded_chars = [CHARS[idx] for idx in no_repeat_blank_label]
return ''.join(decoded_chars)
def predict(self, image_array):
"""
预测单张图像的车牌号码
Args:
image_array: numpy数组格式的图像
Returns:
prediction: 预测的车牌号码
confidence: 预测置信度
"""
try:
# 预处理图像
image = self.preprocess_image(image_array)
if image is None:
return None, 0.0
image = image.to(self.device)
# 模型推理
with torch.no_grad():
logits = self.model(image)
# logits shape: [batch_size, class_num, sequence_length]
# 计算置信度使用softmax后的最大概率平均值
probs = torch.softmax(logits, dim=1)
max_probs = torch.max(probs, dim=1)[0]
confidence = torch.mean(max_probs).item()
# 解码预测结果
prediction = self.decode_prediction(logits)
return prediction, confidence
except Exception as e:
print(f"预测图像失败: {e}")
return None, 0.0
# 全局变量,用于存储模型实例
lpr_model = None
def LPRNinitialize_model():
"""
初始化LPRNet模型
返回:
bool: 初始化是否成功
"""
global lpr_model
try:
# 模型权重文件路径
model_path = os.path.join(os.path.dirname(__file__), 'LPRNet__iteration_74000.pth')
# 创建推理对象
lpr_model = LPRNetInference(model_path)
print("LPRNet模型初始化完成")
return True
except Exception as e:
print(f"LPRNet模型初始化失败: {e}")
import traceback
traceback.print_exc()
return False
def LPRNmodel_predict(image_array):
"""
LPRNet车牌号识别接口函数
参数:
image_array: numpy数组格式的车牌图像已经过矫正处理
返回:
list: 包含最多8个字符的列表代表车牌号的每个字符
例如: ['', 'A', '1', '2', '3', '4', '5'] (蓝牌7位)
['', 'A', 'D', '1', '2', '3', '4', '5'] (绿牌8位)
"""
global lpr_model
if lpr_model is None:
print("LPRNet模型未初始化请先调用LPRNinitialize_model()")
return ['', '', '', '0', '0', '0', '0', '0']
try:
# 使用OpenCV调整图像大小到模型要求的尺寸
image_array = cv2.resize(image_array, (94, 24))
print(f"666999图片尺寸: {image_array.shape}")
# 显示修正后的图像
cv2.imshow('Resized License Plate Image (94x24)', image_array)
cv2.waitKey(1) # 非阻塞显示,允许程序继续执行
# 预测车牌号
predicted_text, confidence = lpr_model.predict(image_array)
if predicted_text is None:
print("LPRNet识别失败")
return ['', '', '', '', '0', '0', '0', '0']
print(f"LPRNet识别结果: {predicted_text}, 置信度: {confidence:.3f}")
# 将字符串转换为字符列表
char_list = list(predicted_text)
# 确保返回至少7个字符最多8个字符
if len(char_list) < 7:
# 如果识别结果少于7个字符用'0'补齐到7位
char_list.extend(['0'] * (7 - len(char_list)))
elif len(char_list) > 8:
# 如果识别结果多于8个字符截取前8个
char_list = char_list[:8]
# 如果是7位补齐到8位以保持接口一致性第8位用空字符或占位符
if len(char_list) == 7:
char_list.append('') # 添加空字符作为第8位占位符
return char_list
except Exception as e:
print(f"LPRNet识别失败: {e}")
import traceback
traceback.print_exc()
return ['', '', '', '', '0', '0', '0', '0']