2025-08-28 12:00:56 +08:00

368 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
YOLO车牌检测器
基于ONNX Runtime的YOLO11s模型推理
"""
import cv2
import numpy as np
import onnxruntime as ort
import time
from typing import List, Tuple, Optional
class YOLODetector:
"""YOLO车牌检测器"""
def __init__(self, model_path: str, conf_threshold: float = 0.25, nms_threshold: float = 0.4):
"""
初始化YOLO检测器
Args:
model_path: ONNX模型文件路径
conf_threshold: 置信度阈值
nms_threshold: NMS阈值
"""
self.model_path = model_path
self.conf_threshold = conf_threshold
self.nms_threshold = nms_threshold
self.input_size = (640, 640) # YOLO11s输入尺寸
self.use_gpu = False
# 初始化ONNX Runtime会话
self._init_session()
# 获取模型输入输出信息
self.input_name = self.session.get_inputs()[0].name
self.output_names = [output.name for output in self.session.get_outputs()]
print(f"YOLO检测器初始化完成")
print(f"模型路径: {model_path}")
print(f"输入尺寸: {self.input_size}")
print(f"GPU加速: {self.use_gpu}")
def _init_session(self):
"""初始化ONNX Runtime会话"""
# 获取可用的providers
available_providers = ort.get_available_providers()
print(f"可用的执行提供者: {available_providers}")
# 优先使用GPU如果可用的话
providers = []
if 'CUDAExecutionProvider' in available_providers:
providers.append('CUDAExecutionProvider')
self.use_gpu = True
print("检测到CUDA支持将使用GPU加速")
elif 'TensorrtExecutionProvider' in available_providers:
providers.append('TensorrtExecutionProvider')
self.use_gpu = True
print("检测到TensorRT支持将使用GPU加速")
else:
self.use_gpu = False
print("未检测到GPU支持将使用CPU")
# 添加CPU作为备选
providers.append('CPUExecutionProvider')
print(f"使用的执行提供者: {providers}")
# 创建会话
session_options = ort.SessionOptions()
session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
try:
self.session = ort.InferenceSession(
self.model_path,
sess_options=session_options,
providers=providers
)
# 检查实际使用的provider
actual_providers = self.session.get_providers()
print(f"实际使用的执行提供者: {actual_providers}")
if 'CUDAExecutionProvider' in actual_providers or 'TensorrtExecutionProvider' in actual_providers:
self.use_gpu = True
print("✅ GPU加速已启用")
else:
self.use_gpu = False
print("⚠️ 使用CPU执行")
except Exception as e:
print(f"模型加载失败: {e}")
raise
def preprocess(self, image: np.ndarray) -> Tuple[np.ndarray, float, float]:
"""
图像预处理
Args:
image: 输入图像 (BGR格式)
Returns:
preprocessed_image: 预处理后的图像
scale_x: X轴缩放比例
scale_y: Y轴缩放比例
"""
original_height, original_width = image.shape[:2]
target_width, target_height = self.input_size
# 计算缩放比例
scale_x = target_width / original_width
scale_y = target_height / original_height
# 等比例缩放
scale = min(scale_x, scale_y)
new_width = int(original_width * scale)
new_height = int(original_height * scale)
# 缩放图像
resized_image = cv2.resize(image, (new_width, new_height))
# 创建目标尺寸的图像并居中放置
padded_image = np.full((target_height, target_width, 3), 114, dtype=np.uint8)
# 计算填充位置
start_x = (target_width - new_width) // 2
start_y = (target_height - new_height) // 2
padded_image[start_y:start_y + new_height, start_x:start_x + new_width] = resized_image
# 转换为RGB并归一化
rgb_image = cv2.cvtColor(padded_image, cv2.COLOR_BGR2RGB)
normalized_image = rgb_image.astype(np.float32) / 255.0
# 转换为NCHW格式
input_tensor = np.transpose(normalized_image, (2, 0, 1))
input_tensor = np.expand_dims(input_tensor, axis=0)
return input_tensor, scale, scale
def postprocess(self, outputs: List[np.ndarray], scale_x: float, scale_y: float,
original_shape: Tuple[int, int]) -> List[dict]:
"""
后处理检测结果
Args:
outputs: 模型输出
scale_x: X轴缩放比例
scale_y: Y轴缩放比例
original_shape: 原始图像尺寸 (height, width)
Returns:
检测结果列表
"""
detections = []
if len(outputs) == 0:
return detections
# 获取输出张量
output = outputs[0]
# YOLO11输出格式: [batch, 6, 8400] -> [batch, 8400, 6]
if len(output.shape) == 3:
output = output.transpose(0, 2, 1)
# 处理每个检测结果
for detection in output[0]: # 取第一个batch
# 前4个值是边界框坐标后2个是类别概率
x_center, y_center, width, height = detection[:4]
class_scores = detection[4:] # 类别概率 [蓝牌概率, 绿牌概率]
# 获取最高概率的类别
class_id = np.argmax(class_scores)
confidence = class_scores[class_id] # 使用类别概率作为置信度
# 过滤低置信度检测
if confidence < self.conf_threshold:
continue
# 转换坐标到原始图像尺寸
original_height, original_width = original_shape
# 计算实际缩放比例和偏移
scale = min(self.input_size[0] / original_width, self.input_size[1] / original_height)
pad_x = (self.input_size[0] - original_width * scale) / 2
pad_y = (self.input_size[1] - original_height * scale) / 2
# 转换坐标
x_center = (x_center - pad_x) / scale
y_center = (y_center - pad_y) / scale
width = width / scale
height = height / scale
# 计算边界框
x1 = int(x_center - width / 2)
y1 = int(y_center - height / 2)
x2 = int(x_center + width / 2)
y2 = int(y_center + height / 2)
# 确保坐标在图像范围内
x1 = max(0, min(x1, original_width - 1))
y1 = max(0, min(y1, original_height - 1))
x2 = max(0, min(x2, original_width - 1))
y2 = max(0, min(y2, original_height - 1))
# 定义类别名称
class_names = ['blue_plate', 'green_plate'] # 0: 蓝牌, 1: 绿牌
class_name = class_names[class_id] if class_id < len(class_names) else 'unknown'
detections.append({
'bbox': [x1, y1, x2, y2],
'confidence': float(confidence),
'class_id': int(class_id),
'class_name': class_name
})
# 应用NMS
if detections:
detections = self._apply_nms(detections)
return detections
def _apply_nms(self, detections: List[dict]) -> List[dict]:
"""
应用非极大值抑制
Args:
detections: 检测结果列表
Returns:
NMS后的检测结果
"""
if len(detections) == 0:
return detections
# 提取边界框和置信度
boxes = np.array([det['bbox'] for det in detections])
scores = np.array([det['confidence'] for det in detections])
# 应用NMS
indices = cv2.dnn.NMSBoxes(
boxes.tolist(),
scores.tolist(),
self.conf_threshold,
self.nms_threshold
)
# 返回保留的检测结果
if len(indices) > 0:
indices = indices.flatten()
return [detections[i] for i in indices]
else:
return []
def detect(self, image: np.ndarray) -> List[dict]:
"""
检测车牌
Args:
image: 输入图像 (BGR格式)
Returns:
检测结果列表
"""
try:
# 预处理
input_tensor, scale_x, scale_y = self.preprocess(image)
# 推理
outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
# 调试输出
print(f"模型输出数量: {len(outputs)}")
for i, output in enumerate(outputs):
print(f"输出 {i} 形状: {output.shape}")
print(f"输出 {i} 数据范围: [{output.min():.4f}, {output.max():.4f}]")
# 后处理
detections = self.postprocess(outputs, scale_x, scale_y, image.shape[:2])
print(f"检测到的目标数量: {len(detections)}")
for i, det in enumerate(detections):
print(f"检测 {i}: 类别={det['class_name']}, 置信度={det['confidence']:.3f}")
return detections
except Exception as e:
print(f"检测过程出错: {e}")
return []
def draw_detections(self, image: np.ndarray, detections: List[dict]) -> np.ndarray:
"""
在图像上绘制检测结果
Args:
image: 输入图像
detections: 检测结果列表
Returns:
绘制了检测框的图像
"""
result_image = image.copy()
for detection in detections:
bbox = detection['bbox']
confidence = detection['confidence']
class_id = detection['class_id']
class_name = detection['class_name']
x1, y1, x2, y2 = bbox
# 根据车牌类型选择颜色
if class_id == 0: # 蓝牌
color = (255, 0, 0) # 蓝色 (BGR格式)
plate_type = "Blue Plate"
elif class_id == 1: # 绿牌
color = (0, 255, 0) # 绿色 (BGR格式)
plate_type = "Green Plate"
else:
color = (0, 255, 255) # 黄色 (BGR格式)
plate_type = "Unknown"
# 绘制边界框
cv2.rectangle(result_image, (x1, y1), (x2, y2), color, 2)
# 绘制置信度标签
label = f"{plate_type}: {confidence:.2f}"
label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)[0]
# 绘制标签背景
cv2.rectangle(result_image,
(x1, y1 - label_size[1] - 10),
(x1 + label_size[0], y1),
color, -1)
# 绘制标签文字
cv2.putText(result_image, label,
(x1, y1 - 5),
cv2.FONT_HERSHEY_SIMPLEX, 0.6,
(255, 255, 255), 2)
return result_image
def crop_plates(self, image: np.ndarray, detections: List[dict]) -> List[np.ndarray]:
"""
切割车牌图像
Args:
image: 原始图像
detections: 检测结果列表
Returns:
切割后的车牌图像列表
"""
plate_images = []
for detection in detections:
bbox = detection['bbox']
x1, y1, x2, y2 = bbox
# 确保坐标有效
if x2 > x1 and y2 > y1:
# 切割车牌区域
plate_image = image[y1:y2, x1:x2]
if plate_image.size > 0:
plate_images.append(plate_image)
return plate_images