Compare commits

..

1 Commits

Author SHA1 Message Date
1c8e15bcd8 更新接口 2025-09-04 00:10:18 +08:00
11 changed files with 539 additions and 873 deletions

View File

@ -5,4 +5,8 @@
<orderEntry type="jdk" jdkName="cnm" jdkType="Python SDK" /> <orderEntry type="jdk" jdkName="cnm" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="sourceFolder" forTests="false" />
</component> </component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module> </module>

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,328 @@
import torch
import torch.nn as nn
import cv2
import numpy as np
import os
import sys
from torch.autograd import Variable
from PIL import Image
# 添加父目录到路径,以便导入模型和数据加载器
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# LPRNet字符集定义与训练时保持一致
CHARS = ['', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K',
'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
'W', 'X', 'Y', 'Z', 'I', 'O', '-']
CHARS_DICT = {char: i for i, char in enumerate(CHARS)}
# 简化的LPRNet模型定义
class small_basic_block(nn.Module):
def __init__(self, ch_in, ch_out):
super(small_basic_block, self).__init__()
self.block = nn.Sequential(
nn.Conv2d(ch_in, ch_out // 4, kernel_size=1),
nn.ReLU(),
nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(3, 1), padding=(1, 0)),
nn.ReLU(),
nn.Conv2d(ch_out // 4, ch_out // 4, kernel_size=(1, 3), padding=(0, 1)),
nn.ReLU(),
nn.Conv2d(ch_out // 4, ch_out, kernel_size=1),
)
def forward(self, x):
return self.block(x)
class LPRNet(nn.Module):
def __init__(self, lpr_max_len, phase, class_num, dropout_rate):
super(LPRNet, self).__init__()
self.phase = phase
self.lpr_max_len = lpr_max_len
self.class_num = class_num
self.backbone = nn.Sequential(
nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1), # 0
nn.BatchNorm2d(num_features=64),
nn.ReLU(), # 2
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 1, 1)),
small_basic_block(ch_in=64, ch_out=128), # *** 4 ***
nn.BatchNorm2d(num_features=128),
nn.ReLU(), # 6
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(2, 1, 2)),
small_basic_block(ch_in=64, ch_out=256), # 8
nn.BatchNorm2d(num_features=256),
nn.ReLU(), # 10
small_basic_block(ch_in=256, ch_out=256), # *** 11 ***
nn.BatchNorm2d(num_features=256),
nn.ReLU(), # 13
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(4, 1, 2)), # 14
nn.Dropout(dropout_rate),
nn.Conv2d(in_channels=64, out_channels=256, kernel_size=(1, 4), stride=1), # 16
nn.BatchNorm2d(num_features=256),
nn.ReLU(), # 18
nn.Dropout(dropout_rate),
nn.Conv2d(in_channels=256, out_channels=class_num, kernel_size=(13, 1), stride=1), # 20
nn.BatchNorm2d(num_features=class_num),
nn.ReLU(), # 22
)
self.container = nn.Sequential(
nn.Conv2d(in_channels=448+self.class_num, out_channels=self.class_num, kernel_size=(1,1), stride=(1,1)),
)
def forward(self, x):
keep_features = list()
for i, layer in enumerate(self.backbone.children()):
x = layer(x)
if i in [2, 6, 13, 22]: # [2, 4, 8, 11, 22]
keep_features.append(x)
global_context = list()
for i, f in enumerate(keep_features):
if i in [0, 1]:
f = nn.AvgPool2d(kernel_size=5, stride=5)(f)
if i in [2]:
f = nn.AvgPool2d(kernel_size=(4, 10), stride=(4, 2))(f)
f_pow = torch.pow(f, 2)
f_mean = torch.mean(f_pow)
f = torch.div(f, f_mean)
global_context.append(f)
x = torch.cat(global_context, 1)
x = self.container(x)
logits = torch.mean(x, dim=2)
return logits
class LPRNetInference:
def __init__(self, model_path=None, img_size=[94, 24], lpr_max_len=8, dropout_rate=0.5):
"""
初始化LPRNet推理类
Args:
model_path: 训练好的模型权重文件路径
img_size: 输入图像尺寸 [width, height]
lpr_max_len: 车牌最大长度
dropout_rate: dropout率
"""
self.img_size = img_size
self.lpr_max_len = lpr_max_len
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 设置默认模型路径
if model_path is None:
current_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(current_dir, 'LPRNet__iteration_74000.pth')
# 初始化模型
self.model = LPRNet(lpr_max_len=lpr_max_len, phase=False, class_num=len(CHARS), dropout_rate=dropout_rate)
# 加载模型权重
if model_path and os.path.exists(model_path):
print(f"Loading LPRNet model from {model_path}")
try:
self.model.load_state_dict(torch.load(model_path, map_location=self.device))
print("LPRNet模型权重加载成功")
except Exception as e:
print(f"Warning: 加载模型权重失败: {e}. 使用随机权重.")
else:
print(f"Warning: 模型文件不存在或未指定: {model_path}. 使用随机权重.")
self.model.to(self.device)
self.model.eval()
print(f"LPRNet模型加载完成设备: {self.device}")
print(f"模型参数数量: {sum(p.numel() for p in self.model.parameters()):,}")
def preprocess_image(self, image_array):
"""
预处理图像数组 - 使用与训练时相同的预处理方式
Args:
image_array: numpy数组格式的图像 (H, W, C)
Returns:
preprocessed_image: 预处理后的图像tensor
"""
if image_array is None:
raise ValueError("Input image is None")
# 确保图像是numpy数组
if not isinstance(image_array, np.ndarray):
raise ValueError("Input must be numpy array")
# 检查图像维度
if len(image_array.shape) != 3:
raise ValueError(f"Expected 3D image array, got {len(image_array.shape)}D")
height, width, channels = image_array.shape
if channels != 3:
raise ValueError(f"Expected 3 channels, got {channels}")
# 调整图像尺寸到模型要求的尺寸
if height != self.img_size[1] or width != self.img_size[0]:
image_array = cv2.resize(image_array, tuple(self.img_size))
# 使用与训练时相同的预处理方式
image_array = image_array.astype('float32')
image_array -= 127.5
image_array *= 0.0078125
image_array = np.transpose(image_array, (2, 0, 1)) # HWC -> CHW
# 转换为tensor并添加batch维度
image_tensor = torch.from_numpy(image_array).unsqueeze(0)
return image_tensor
def decode_prediction(self, logits):
"""
解码模型预测结果 - 使用正确的CTC贪婪解码
Args:
logits: 模型输出的logits [batch_size, num_classes, sequence_length]
Returns:
predicted_text: 预测的车牌号码
"""
# 转换为numpy进行处理
prebs = logits.cpu().detach().numpy()
preb = prebs[0, :, :] # 取第一个batch [num_classes, sequence_length]
# 贪婪解码:对每个时间步选择最大概率的字符
preb_label = []
for j in range(preb.shape[1]): # 遍历每个时间步
preb_label.append(np.argmax(preb[:, j], axis=0))
# CTC解码去除重复字符和空白字符
no_repeat_blank_label = []
pre_c = preb_label[0]
# 处理第一个字符
if pre_c != len(CHARS) - 1: # 不是空白字符
no_repeat_blank_label.append(pre_c)
# 处理后续字符
for c in preb_label:
if (pre_c == c) or (c == len(CHARS) - 1): # 重复字符或空白字符
if c == len(CHARS) - 1:
pre_c = c
continue
no_repeat_blank_label.append(c)
pre_c = c
# 转换为字符
decoded_chars = [CHARS[idx] for idx in no_repeat_blank_label]
return ''.join(decoded_chars)
def predict(self, image_array):
"""
预测单张图像的车牌号码
Args:
image_array: numpy数组格式的图像
Returns:
prediction: 预测的车牌号码
confidence: 预测置信度
"""
try:
# 预处理图像
image = self.preprocess_image(image_array)
if image is None:
return None, 0.0
image = image.to(self.device)
# 模型推理
with torch.no_grad():
logits = self.model(image)
# logits shape: [batch_size, class_num, sequence_length]
# 计算置信度使用softmax后的最大概率平均值
probs = torch.softmax(logits, dim=1)
max_probs = torch.max(probs, dim=1)[0]
confidence = torch.mean(max_probs).item()
# 解码预测结果
prediction = self.decode_prediction(logits)
return prediction, confidence
except Exception as e:
print(f"预测图像失败: {e}")
return None, 0.0
# 全局变量
lpr_model = None
def LPRNinitialize_model():
"""
初始化LPRNet模型
返回:
bool: 初始化是否成功
"""
global lpr_model
try:
# 模型权重文件路径
model_path = os.path.join(os.path.dirname(__file__), 'LPRNet__iteration_74000.pth')
# 创建推理对象
lpr_model = LPRNetInference(model_path)
print("LPRNet模型初始化完成")
return True
except Exception as e:
print(f"LPRNet模型初始化失败: {e}")
import traceback
traceback.print_exc()
return False
def LPRNmodel_predict(image_array):
"""
LPRNet车牌号识别接口函数
参数:
image_array: numpy数组格式的车牌图像已经过矫正处理
返回:
list: 包含最多8个字符的列表代表车牌号的每个字符
例如: ['', 'A', '1', '2', '3', '4', '5'] (蓝牌7位)
['', 'A', 'D', '1', '2', '3', '4', '5'] (绿牌8位)
"""
global lpr_model
if lpr_model is None:
print("LPRNet模型未初始化请先调用LPRNinitialize_model()")
return ['', '', '', '0', '0', '0', '0', '0']
try:
# 预测车牌号
predicted_text, confidence = lpr_model.predict(image_array)
if predicted_text is None:
print("LPRNet识别失败")
return ['', '', '', '', '0', '0', '0', '0']
print(f"LPRNet识别结果: {predicted_text}, 置信度: {confidence:.3f}")
# 将字符串转换为字符列表
char_list = list(predicted_text)
# 确保返回至少7个字符最多8个字符
if len(char_list) < 7:
# 如果识别结果少于7个字符用'0'补齐到7位
char_list.extend(['0'] * (7 - len(char_list)))
elif len(char_list) > 8:
# 如果识别结果多于8个字符截取前8个
char_list = char_list[:8]
# 如果是7位补齐到8位以保持接口一致性第8位用空字符或占位符
if len(char_list) == 7:
char_list.append('') # 添加空字符作为第8位占位符
return char_list
except Exception as e:
print(f"LPRNet识别失败: {e}")
import traceback
traceback.print_exc()
return ['', '', '', '', '0', '0', '0', '0']

BIN
LPRNET_part/吉CF18040.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.0 KiB

BIN
LPRNET_part/藏A0DBN8.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

@ -5,18 +5,6 @@ import cv2
class OCRProcessor: class OCRProcessor:
def __init__(self): def __init__(self):
self.model = TextRecognition(model_name="PP-OCRv5_server_rec") self.model = TextRecognition(model_name="PP-OCRv5_server_rec")
# 定义允许的字符集合(不包含空白字符)
self.allowed_chars = [
# 中文省份简称
'', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '',
'', '', '', '', '', '', '', '', '', '', '',
# 字母 A-Z
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
# 数字 0-9
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
]
print("OCR模型初始化完成占位") print("OCR模型初始化完成占位")
def predict(self, image_array): def predict(self, image_array):
@ -26,14 +14,6 @@ class OCRProcessor:
results = output[0]["rec_text"] results = output[0]["rec_text"]
placeholder_result = results.split(',') placeholder_result = results.split(',')
return placeholder_result return placeholder_result
def filter_allowed_chars(self, text):
"""只保留允许的字符"""
filtered_text = ""
for char in text:
if char in self.allowed_chars:
filtered_text += char
return filtered_text
# 保留原有函数接口 # 保留原有函数接口
_processor = OCRProcessor() _processor = OCRProcessor()
@ -62,12 +42,8 @@ def LPRNmodel_predict(image_array):
else: else:
result_str = str(raw_result) result_str = str(raw_result)
# 过滤掉'·'和'-'字符 # 过滤掉'·'字符
filtered_str = result_str.replace('·', '') filtered_str = result_str.replace('·', '')
filtered_str = filtered_str.replace('-', '')
# 只保留允许的字符
filtered_str = _processor.filter_allowed_chars(filtered_str)
# 转换为字符列表 # 转换为字符列表
char_list = list(filtered_str) char_list = list(filtered_str)

Binary file not shown.

879
main.py

File diff suppressed because it is too large Load Diff

99
test_lpr_real_images.py Normal file
View File

@ -0,0 +1,99 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
LPRNet接口真实图片测试脚本
测试LPRNET_part目录下的真实车牌图片
"""
import cv2
import numpy as np
import os
from LPRNET_part.lpr_interface import LPRNinitialize_model, LPRNmodel_predict
def test_real_images():
"""
测试LPRNET_part目录下的真实车牌图片
"""
print("=== LPRNet真实图片测试 ===")
# 初始化模型
print("1. 初始化LPRNet模型...")
success = LPRNinitialize_model()
if not success:
print("模型初始化失败!")
return
# 获取LPRNET_part目录下的图片文件
lprnet_dir = "LPRNET_part"
image_files = []
if os.path.exists(lprnet_dir):
for file in os.listdir(lprnet_dir):
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
image_files.append(os.path.join(lprnet_dir, file))
if not image_files:
print("未找到图片文件!")
return
print(f"2. 找到 {len(image_files)} 个图片文件")
# 测试每个图片
for i, image_path in enumerate(image_files, 1):
print(f"\n--- 测试图片 {i}: {os.path.basename(image_path)} ---")
try:
# 使用支持中文路径的方式读取图片
image = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
if image is None:
print(f"无法读取图片: {image_path}")
continue
print(f"图片尺寸: {image.shape}")
# 进行预测
result = LPRNmodel_predict(image)
print(f"识别结果: {result}")
print(f"识别车牌号: {''.join(result)}")
except Exception as e:
print(f"处理图片 {image_path} 时出错: {e}")
import traceback
traceback.print_exc()
print("\n=== 测试完成 ===")
def test_image_loading():
"""
测试图片加载方式
"""
print("\n=== 图片加载测试 ===")
lprnet_dir = "LPRNET_part"
if os.path.exists(lprnet_dir):
for file in os.listdir(lprnet_dir):
if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
image_path = os.path.join(lprnet_dir, file)
print(f"\n测试文件: {file}")
# 方法1: 普通cv2.imread
img1 = cv2.imread(image_path)
print(f"cv2.imread结果: {img1 is not None}")
# 方法2: 支持中文路径的方式
try:
img2 = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)
print(f"cv2.imdecode结果: {img2 is not None}")
if img2 is not None:
print(f"图片尺寸: {img2.shape}")
except Exception as e:
print(f"cv2.imdecode失败: {e}")
if __name__ == "__main__":
# 首先测试图片加载
test_image_loading()
# 然后测试完整的识别流程
test_real_images()

View File

@ -2,7 +2,6 @@ import cv2
import numpy as np import numpy as np
from ultralytics import YOLO from ultralytics import YOLO
import os import os
from PIL import Image, ImageDraw, ImageFont
class LicensePlateYOLO: class LicensePlateYOLO:
""" """
@ -114,38 +113,19 @@ class LicensePlateYOLO:
print(f"检测过程中出错: {e}") print(f"检测过程中出错: {e}")
return [] return []
def draw_detections(self, image, detections, plate_numbers=None): def draw_detections(self, image, detections):
""" """
在图像上绘制检测结果 在图像上绘制检测结果
参数: 参数:
image: 输入图像 image: 输入图像
detections: 检测结果列表 detections: 检测结果列表
plate_numbers: 车牌号列表与detections对应
返回: 返回:
numpy.ndarray: 绘制了检测结果的图像 numpy.ndarray: 绘制了检测结果的图像
""" """
draw_image = image.copy() draw_image = image.copy()
# 转换为PIL图像以支持中文字符
pil_image = Image.fromarray(cv2.cvtColor(draw_image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(pil_image)
# 尝试加载中文字体
try:
# Windows系统常见的中文字体
font_path = "C:/Windows/Fonts/simhei.ttf" # 黑体
if not os.path.exists(font_path):
font_path = "C:/Windows/Fonts/msyh.ttc" # 微软雅黑
if not os.path.exists(font_path):
font_path = "C:/Windows/Fonts/simsun.ttc" # 宋体
font = ImageFont.truetype(font_path, 20)
except:
# 如果无法加载字体,使用默认字体
font = ImageFont.load_default()
for i, detection in enumerate(detections): for i, detection in enumerate(detections):
box = detection['box'] box = detection['box']
keypoints = detection['keypoints'] keypoints = detection['keypoints']
@ -153,11 +133,6 @@ class LicensePlateYOLO:
confidence = detection['confidence'] confidence = detection['confidence']
incomplete = detection.get('incomplete', False) incomplete = detection.get('incomplete', False)
# 获取对应的车牌号
plate_number = ""
if plate_numbers and i < len(plate_numbers):
plate_number = plate_numbers[i]
# 绘制边界框 # 绘制边界框
x1, y1, x2, y2 = map(int, box) x1, y1, x2, y2 = map(int, box)
@ -165,53 +140,30 @@ class LicensePlateYOLO:
if class_name == '绿牌': if class_name == '绿牌':
box_color = (0, 255, 0) # 绿色 box_color = (0, 255, 0) # 绿色
elif class_name == '蓝牌': elif class_name == '蓝牌':
box_color = (0, 0, 255) # 蓝色 box_color = (255, 0, 0) # 蓝色
else: else:
box_color = (128, 128, 128) # 灰色 box_color = (128, 128, 128) # 灰色
# 在PIL图像上绘制边界框 cv2.rectangle(draw_image, (x1, y1), (x2, y2), box_color, 2)
draw.rectangle([(x1, y1), (x2, y2)], outline=box_color, width=2)
# 构建标签文本
if plate_number:
label = f"{class_name} {plate_number} {confidence:.2f}"
else:
label = f"{class_name} {confidence:.2f}"
# 绘制标签
label = f"{class_name} {confidence:.2f}"
if incomplete: if incomplete:
label += " (不完整)" label += " (不完整)"
# 计算文本大小 # 计算文本大小和位置
bbox = draw.textbbox((0, 0), label, font=font) font = cv2.FONT_HERSHEY_SIMPLEX
text_width = bbox[2] - bbox[0] font_scale = 0.6
text_height = bbox[3] - bbox[1] thickness = 2
(text_width, text_height), _ = cv2.getTextSize(label, font, font_scale, thickness)
# 绘制文本背景 # 绘制文本背景
draw.rectangle([(x1, y1 - text_height - 10), (x1 + text_width, y1)], cv2.rectangle(draw_image, (x1, y1 - text_height - 10),
fill=box_color) (x1 + text_width, y1), box_color, -1)
# 绘制文本 # 绘制文本
draw.text((x1, y1 - text_height - 5), label, fill=(255, 255, 255), font=font) cv2.putText(draw_image, label, (x1, y1 - 5),
font, font_scale, (255, 255, 255), thickness)
# 转换回OpenCV格式
draw_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
# 绘制关键点和连线使用OpenCV
for i, detection in enumerate(detections):
box = detection['box']
keypoints = detection['keypoints']
incomplete = detection.get('incomplete', False)
x1, y1, x2, y2 = map(int, box)
# 根据车牌类型选择颜色
class_name = detection['class_name']
if class_name == '绿牌':
box_color = (0, 255, 0) # 绿色
elif class_name == '蓝牌':
box_color = (0, 0, 255) # 蓝色
else:
box_color = (128, 128, 128) # 灰色
# 绘制关键点和连线 # 绘制关键点和连线
if len(keypoints) >= 4 and not incomplete: if len(keypoints) >= 4 and not incomplete: