368 lines
13 KiB
Python
368 lines
13 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
YOLO车牌检测器
|
||
基于ONNX Runtime的YOLO11s模型推理
|
||
"""
|
||
|
||
import cv2
|
||
import numpy as np
|
||
import onnxruntime as ort
|
||
import time
|
||
from typing import List, Tuple, Optional
|
||
|
||
class YOLODetector:
|
||
"""YOLO车牌检测器"""
|
||
|
||
def __init__(self, model_path: str, conf_threshold: float = 0.25, nms_threshold: float = 0.4):
|
||
"""
|
||
初始化YOLO检测器
|
||
|
||
Args:
|
||
model_path: ONNX模型文件路径
|
||
conf_threshold: 置信度阈值
|
||
nms_threshold: NMS阈值
|
||
"""
|
||
self.model_path = model_path
|
||
self.conf_threshold = conf_threshold
|
||
self.nms_threshold = nms_threshold
|
||
self.input_size = (640, 640) # YOLO11s输入尺寸
|
||
self.use_gpu = False
|
||
|
||
# 初始化ONNX Runtime会话
|
||
self._init_session()
|
||
|
||
# 获取模型输入输出信息
|
||
self.input_name = self.session.get_inputs()[0].name
|
||
self.output_names = [output.name for output in self.session.get_outputs()]
|
||
|
||
print(f"YOLO检测器初始化完成")
|
||
print(f"模型路径: {model_path}")
|
||
print(f"输入尺寸: {self.input_size}")
|
||
print(f"GPU加速: {self.use_gpu}")
|
||
|
||
def _init_session(self):
|
||
"""初始化ONNX Runtime会话"""
|
||
# 获取可用的providers
|
||
available_providers = ort.get_available_providers()
|
||
print(f"可用的执行提供者: {available_providers}")
|
||
|
||
# 优先使用GPU,如果可用的话
|
||
providers = []
|
||
if 'CUDAExecutionProvider' in available_providers:
|
||
providers.append('CUDAExecutionProvider')
|
||
self.use_gpu = True
|
||
print("检测到CUDA支持,将使用GPU加速")
|
||
elif 'TensorrtExecutionProvider' in available_providers:
|
||
providers.append('TensorrtExecutionProvider')
|
||
self.use_gpu = True
|
||
print("检测到TensorRT支持,将使用GPU加速")
|
||
else:
|
||
self.use_gpu = False
|
||
print("未检测到GPU支持,将使用CPU")
|
||
|
||
# 添加CPU作为备选
|
||
providers.append('CPUExecutionProvider')
|
||
|
||
print(f"使用的执行提供者: {providers}")
|
||
|
||
# 创建会话
|
||
session_options = ort.SessionOptions()
|
||
session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
||
|
||
try:
|
||
self.session = ort.InferenceSession(
|
||
self.model_path,
|
||
sess_options=session_options,
|
||
providers=providers
|
||
)
|
||
|
||
# 检查实际使用的provider
|
||
actual_providers = self.session.get_providers()
|
||
print(f"实际使用的执行提供者: {actual_providers}")
|
||
|
||
if 'CUDAExecutionProvider' in actual_providers or 'TensorrtExecutionProvider' in actual_providers:
|
||
self.use_gpu = True
|
||
print("✅ GPU加速已启用")
|
||
else:
|
||
self.use_gpu = False
|
||
print("⚠️ 使用CPU执行")
|
||
|
||
except Exception as e:
|
||
print(f"模型加载失败: {e}")
|
||
raise
|
||
|
||
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, float]:
|
||
"""
|
||
图像预处理
|
||
|
||
Args:
|
||
image: 输入图像 (BGR格式)
|
||
|
||
Returns:
|
||
preprocessed_image: 预处理后的图像
|
||
scale_x: X轴缩放比例
|
||
scale_y: Y轴缩放比例
|
||
"""
|
||
original_height, original_width = image.shape[:2]
|
||
target_width, target_height = self.input_size
|
||
|
||
# 计算缩放比例
|
||
scale_x = target_width / original_width
|
||
scale_y = target_height / original_height
|
||
|
||
# 等比例缩放
|
||
scale = min(scale_x, scale_y)
|
||
new_width = int(original_width * scale)
|
||
new_height = int(original_height * scale)
|
||
|
||
# 缩放图像
|
||
resized_image = cv2.resize(image, (new_width, new_height))
|
||
|
||
# 创建目标尺寸的图像并居中放置
|
||
padded_image = np.full((target_height, target_width, 3), 114, dtype=np.uint8)
|
||
|
||
# 计算填充位置
|
||
start_x = (target_width - new_width) // 2
|
||
start_y = (target_height - new_height) // 2
|
||
|
||
padded_image[start_y:start_y + new_height, start_x:start_x + new_width] = resized_image
|
||
|
||
# 转换为RGB并归一化
|
||
rgb_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
|
||
normalized_image = rgb_image.astype(np.float32) / 255.0
|
||
|
||
# 转换为NCHW格式
|
||
input_tensor = np.transpose(normalized_image, (2, 0, 1))
|
||
input_tensor = np.expand_dims(input_tensor, axis=0)
|
||
|
||
return input_tensor, scale, scale
|
||
|
||
def postprocess(self, outputs: List[np.ndarray], scale_x: float, scale_y: float,
|
||
original_shape: Tuple[int, int]) -> List[dict]:
|
||
"""
|
||
后处理检测结果
|
||
|
||
Args:
|
||
outputs: 模型输出
|
||
scale_x: X轴缩放比例
|
||
scale_y: Y轴缩放比例
|
||
original_shape: 原始图像尺寸 (height, width)
|
||
|
||
Returns:
|
||
检测结果列表
|
||
"""
|
||
detections = []
|
||
|
||
if len(outputs) == 0:
|
||
return detections
|
||
|
||
# 获取输出张量
|
||
output = outputs[0]
|
||
|
||
# YOLO11输出格式: [batch, 6, 8400] -> [batch, 8400, 6]
|
||
if len(output.shape) == 3:
|
||
output = output.transpose(0, 2, 1)
|
||
|
||
# 处理每个检测结果
|
||
for detection in output[0]: # 取第一个batch
|
||
# 前4个值是边界框坐标,后2个是类别概率
|
||
x_center, y_center, width, height = detection[:4]
|
||
class_scores = detection[4:] # 类别概率 [蓝牌概率, 绿牌概率]
|
||
|
||
# 获取最高概率的类别
|
||
class_id = np.argmax(class_scores)
|
||
confidence = class_scores[class_id] # 使用类别概率作为置信度
|
||
|
||
# 过滤低置信度检测
|
||
if confidence < self.conf_threshold:
|
||
continue
|
||
|
||
# 转换坐标到原始图像尺寸
|
||
original_height, original_width = original_shape
|
||
|
||
# 计算实际缩放比例和偏移
|
||
scale = min(self.input_size[0] / original_width, self.input_size[1] / original_height)
|
||
pad_x = (self.input_size[0] - original_width * scale) / 2
|
||
pad_y = (self.input_size[1] - original_height * scale) / 2
|
||
|
||
# 转换坐标
|
||
x_center = (x_center - pad_x) / scale
|
||
y_center = (y_center - pad_y) / scale
|
||
width = width / scale
|
||
height = height / scale
|
||
|
||
# 计算边界框
|
||
x1 = int(x_center - width / 2)
|
||
y1 = int(y_center - height / 2)
|
||
x2 = int(x_center + width / 2)
|
||
y2 = int(y_center + height / 2)
|
||
|
||
# 确保坐标在图像范围内
|
||
x1 = max(0, min(x1, original_width - 1))
|
||
y1 = max(0, min(y1, original_height - 1))
|
||
x2 = max(0, min(x2, original_width - 1))
|
||
y2 = max(0, min(y2, original_height - 1))
|
||
|
||
# 定义类别名称
|
||
class_names = ['blue_plate', 'green_plate'] # 0: 蓝牌, 1: 绿牌
|
||
class_name = class_names[class_id] if class_id < len(class_names) else 'unknown'
|
||
|
||
detections.append({
|
||
'bbox': [x1, y1, x2, y2],
|
||
'confidence': float(confidence),
|
||
'class_id': int(class_id),
|
||
'class_name': class_name
|
||
})
|
||
|
||
# 应用NMS
|
||
if detections:
|
||
detections = self._apply_nms(detections)
|
||
|
||
return detections
|
||
|
||
def _apply_nms(self, detections: List[dict]) -> List[dict]:
|
||
"""
|
||
应用非极大值抑制
|
||
|
||
Args:
|
||
detections: 检测结果列表
|
||
|
||
Returns:
|
||
NMS后的检测结果
|
||
"""
|
||
if len(detections) == 0:
|
||
return detections
|
||
|
||
# 提取边界框和置信度
|
||
boxes = np.array([det['bbox'] for det in detections])
|
||
scores = np.array([det['confidence'] for det in detections])
|
||
|
||
# 应用NMS
|
||
indices = cv2.dnn.NMSBoxes(
|
||
boxes.tolist(),
|
||
scores.tolist(),
|
||
self.conf_threshold,
|
||
self.nms_threshold
|
||
)
|
||
|
||
# 返回保留的检测结果
|
||
if len(indices) > 0:
|
||
indices = indices.flatten()
|
||
return [detections[i] for i in indices]
|
||
else:
|
||
return []
|
||
|
||
def detect(self, image: np.ndarray) -> List[dict]:
|
||
"""
|
||
检测车牌
|
||
|
||
Args:
|
||
image: 输入图像 (BGR格式)
|
||
|
||
Returns:
|
||
检测结果列表
|
||
"""
|
||
try:
|
||
# 预处理
|
||
input_tensor, scale_x, scale_y = self.preprocess(image)
|
||
|
||
# 推理
|
||
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
|
||
|
||
# 调试输出
|
||
print(f"模型输出数量: {len(outputs)}")
|
||
for i, output in enumerate(outputs):
|
||
print(f"输出 {i} 形状: {output.shape}")
|
||
print(f"输出 {i} 数据范围: [{output.min():.4f}, {output.max():.4f}]")
|
||
|
||
# 后处理
|
||
detections = self.postprocess(outputs, scale_x, scale_y, image.shape[:2])
|
||
print(f"检测到的目标数量: {len(detections)}")
|
||
for i, det in enumerate(detections):
|
||
print(f"检测 {i}: 类别={det['class_name']}, 置信度={det['confidence']:.3f}")
|
||
|
||
return detections
|
||
|
||
except Exception as e:
|
||
print(f"检测过程出错: {e}")
|
||
return []
|
||
|
||
def draw_detections(self, image: np.ndarray, detections: List[dict]) -> np.ndarray:
|
||
"""
|
||
在图像上绘制检测结果
|
||
|
||
Args:
|
||
image: 输入图像
|
||
detections: 检测结果列表
|
||
|
||
Returns:
|
||
绘制了检测框的图像
|
||
"""
|
||
result_image = image.copy()
|
||
|
||
for detection in detections:
|
||
bbox = detection['bbox']
|
||
confidence = detection['confidence']
|
||
class_id = detection['class_id']
|
||
class_name = detection['class_name']
|
||
|
||
x1, y1, x2, y2 = bbox
|
||
|
||
# 根据车牌类型选择颜色
|
||
if class_id == 0: # 蓝牌
|
||
color = (255, 0, 0) # 蓝色 (BGR格式)
|
||
plate_type = "Blue Plate"
|
||
elif class_id == 1: # 绿牌
|
||
color = (0, 255, 0) # 绿色 (BGR格式)
|
||
plate_type = "Green Plate"
|
||
else:
|
||
color = (0, 255, 255) # 黄色 (BGR格式)
|
||
plate_type = "Unknown"
|
||
|
||
# 绘制边界框
|
||
cv2.rectangle(result_image, (x1, y1), (x2, y2), color, 2)
|
||
|
||
# 绘制置信度标签
|
||
label = f"{plate_type}: {confidence:.2f}"
|
||
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
|
||
|
||
# 绘制标签背景
|
||
cv2.rectangle(result_image,
|
||
(x1, y1 - label_size[1] - 10),
|
||
(x1 + label_size[0], y1),
|
||
color, -1)
|
||
|
||
# 绘制标签文字
|
||
cv2.putText(result_image, label,
|
||
(x1, y1 - 5),
|
||
cv2.FONT_HERSHEY_SIMPLEX, 0.6,
|
||
(255, 255, 255), 2)
|
||
|
||
return result_image
|
||
|
||
def crop_plates(self, image: np.ndarray, detections: List[dict]) -> List[np.ndarray]:
|
||
"""
|
||
切割车牌图像
|
||
|
||
Args:
|
||
image: 原始图像
|
||
detections: 检测结果列表
|
||
|
||
Returns:
|
||
切割后的车牌图像列表
|
||
"""
|
||
plate_images = []
|
||
|
||
for detection in detections:
|
||
bbox = detection['bbox']
|
||
x1, y1, x2, y2 = bbox
|
||
|
||
# 确保坐标有效
|
||
if x2 > x1 and y2 > y1:
|
||
# 切割车牌区域
|
||
plate_image = image[y1:y2, x1:x2]
|
||
if plate_image.size > 0:
|
||
plate_images.append(plate_image)
|
||
|
||
return plate_images |