841 lines
26 KiB
C
841 lines
26 KiB
C
#ifndef AUDIO_MODEL_ESP32_H
|
||
#define AUDIO_MODEL_ESP32_H
|
||
|
||
#ifdef __cplusplus
|
||
extern "C" {
|
||
#endif
|
||
|
||
#include <stdint.h>
|
||
#include <stdbool.h>
|
||
#include <math.h>
|
||
#include <string.h>
|
||
#include <stdlib.h>
|
||
#include <math.h>
|
||
|
||
// 添加Arduino相关头文件
|
||
#ifdef ARDUINO
|
||
#include <Arduino.h>
|
||
#else
|
||
#include <stdint.h>
|
||
#include <stdbool.h>
|
||
#endif
|
||
#include "audio_model_data.h"
|
||
|
||
// ==================== 模型配置参数 ====================
|
||
#define SAMPLE_RATE 16000 // 音频采样率 (Hz)
|
||
#define AUDIO_DURATION_MS 2000 // 音频片段时长 (毫秒)
|
||
#define N_MELS 32 // Mel频谱图频率bins数量
|
||
#define N_FFT 1024 // FFT窗口大小
|
||
#define HOP_LENGTH 512 // 跳跃长度
|
||
#define NUM_CLASSES 4 // 分类数量
|
||
|
||
// 计算得出的参数
|
||
#define AUDIO_BUFFER_SIZE (SAMPLE_RATE * AUDIO_DURATION_MS / 1000) // 32000 samples
|
||
#define MEL_FRAMES ((AUDIO_BUFFER_SIZE - N_FFT) / HOP_LENGTH + 1) // 约63帧
|
||
#define INPUT_SIZE (MEL_FRAMES * N_MELS) // 输入特征大小
|
||
|
||
// 预处理参数
|
||
#define MEL_FMIN 0.0f // Mel滤波器最低频率
|
||
#define MEL_FMAX 8000.0f // Mel滤波器最高频率
|
||
#define WINDOW_TYPE_HANN 1 // 海宁窗
|
||
#define ENERGY_THRESHOLD 0.01f // 音频活动检测阈值
|
||
#define CONFIDENCE_THRESHOLD 0.6f // 预测置信度阈值
|
||
|
||
// ==================== 数据结构定义 ====================
|
||
|
||
// 音频分类枚举
|
||
typedef enum {
|
||
AUDIO_CLASS_PERSON_PRESENT = 0, // 室内有人
|
||
AUDIO_CLASS_DOOR_CLOSING = 1, // 关门
|
||
AUDIO_CLASS_KEY_JINGLING = 2, // 钥匙弹子声
|
||
AUDIO_CLASS_PERSON_ABSENT = 3 // 室内无人
|
||
} AudioClassType;
|
||
|
||
// 预测结果结构体
|
||
typedef struct {
|
||
AudioClassType predicted_class; // 预测的类别
|
||
float confidence; // 最高置信度 (0.0 - 1.0)
|
||
float class_probabilities[NUM_CLASSES]; // 各类别概率
|
||
bool is_valid; // 预测结果是否有效
|
||
uint32_t inference_time_us; // 推理耗时(微秒)
|
||
} AudioPredictionResult;
|
||
|
||
// 音频预处理状态
|
||
typedef struct {
|
||
float* mel_buffer; // Mel特征缓冲区
|
||
float* fft_buffer; // FFT计算缓冲区
|
||
float* window_buffer; // 窗函数缓冲区
|
||
bool is_initialized; // 是否已初始化
|
||
} AudioPreprocessor;
|
||
|
||
// ==================== 全局变量定义 ====================
|
||
static AudioPreprocessor g_preprocessor = {0};
|
||
static float g_confidence_threshold = CONFIDENCE_THRESHOLD;
|
||
static bool g_debug_mode = false;
|
||
static char g_last_error[256] = {0};
|
||
static uint32_t g_last_inference_time = 0;
|
||
static uint32_t g_total_predictions = 0;
|
||
static uint32_t g_successful_predictions = 0;
|
||
static float g_total_confidence = 0.0f;
|
||
|
||
// ==================== 常量定义 ====================
|
||
static const char* CLASS_NAMES_EN[NUM_CLASSES] = {
|
||
"person_present", // 室内有人
|
||
"door_closing", // 关门
|
||
"key_jingling", // 钥匙弹子声
|
||
"person_absent" // 室内无人
|
||
};
|
||
|
||
static const char* CLASS_NAMES_CN[NUM_CLASSES] = {
|
||
"室内有人",
|
||
"关门声",
|
||
"钥匙声",
|
||
"室内无人"
|
||
};
|
||
|
||
// ==================== 函数声明 ====================
|
||
int preprocess_audio_to_mel(const int16_t* audio_data, int audio_length, float* mel_features);
|
||
int preprocess_audio_to_mel_simple(const int16_t* audio_data, int audio_length, float* mel_features, int feature_count);
|
||
float calculate_rms_energy(const int16_t* audio_data, int length);
|
||
void audio_model_cleanup(void);
|
||
const unsigned char* get_audio_model_data(void);
|
||
size_t get_audio_model_size(void);
|
||
const char* get_class_name_en(AudioClassType class_id);
|
||
const char* get_class_name_cn(AudioClassType class_id);
|
||
|
||
// ==================== 核心API函数 ====================
|
||
|
||
/**
|
||
* @brief 初始化音频模型
|
||
* @return 0: 成功, -1: 失败
|
||
* @note 必须在使用其他函数前调用
|
||
*/
|
||
int audio_model_init(void) {
|
||
if (g_preprocessor.is_initialized) {
|
||
return 0; // 已经初始化
|
||
}
|
||
|
||
// 分配内存缓冲区
|
||
g_preprocessor.mel_buffer = (float*)malloc(INPUT_SIZE * sizeof(float));
|
||
g_preprocessor.fft_buffer = (float*)malloc(N_FFT * sizeof(float));
|
||
g_preprocessor.window_buffer = (float*)malloc(N_FFT * sizeof(float));
|
||
|
||
if (!g_preprocessor.mel_buffer || !g_preprocessor.fft_buffer || !g_preprocessor.window_buffer) {
|
||
strcpy(g_last_error, "内存分配失败");
|
||
audio_model_cleanup();
|
||
return -1;
|
||
}
|
||
|
||
// 预计算海宁窗
|
||
for (int i = 0; i < N_FFT; i++) {
|
||
g_preprocessor.window_buffer[i] = 0.5f * (1.0f - cosf(2.0f * M_PI * i / (N_FFT - 1)));
|
||
}
|
||
|
||
g_preprocessor.is_initialized = true;
|
||
strcpy(g_last_error, "");
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief 清理音频模型资源
|
||
* @note 程序结束时调用,释放内存
|
||
*/
|
||
void audio_model_cleanup(void) {
|
||
if (g_preprocessor.mel_buffer) {
|
||
free(g_preprocessor.mel_buffer);
|
||
g_preprocessor.mel_buffer = NULL;
|
||
}
|
||
if (g_preprocessor.fft_buffer) {
|
||
free(g_preprocessor.fft_buffer);
|
||
g_preprocessor.fft_buffer = NULL;
|
||
}
|
||
if (g_preprocessor.window_buffer) {
|
||
free(g_preprocessor.window_buffer);
|
||
g_preprocessor.window_buffer = NULL;
|
||
}
|
||
g_preprocessor.is_initialized = false;
|
||
}
|
||
|
||
/**
|
||
* @brief 音频预测函数(完整版)
|
||
* @param audio_data 输入音频数据指针
|
||
* @param audio_length 音频数据长度(样本数)
|
||
* @param result 预测结果输出指针
|
||
* @return 0: 成功, -1: 失败
|
||
*
|
||
* 音频数据格式要求:
|
||
* - 数据类型: int16_t (16位有符号整数)
|
||
* - 采样率: 16000 Hz
|
||
* - 声道数: 单声道
|
||
* - 数据长度: 32000 samples (2秒)
|
||
* - 数值范围: -32768 到 32767
|
||
* - 字节序: 小端序(Little Endian)
|
||
*
|
||
* 示例调用:
|
||
* int16_t audio_buffer[AUDIO_BUFFER_SIZE];
|
||
* AudioPredictionResult result;
|
||
* // ... 填充audio_buffer ...
|
||
* int ret = audio_model_predict(audio_buffer, AUDIO_BUFFER_SIZE, &result);
|
||
*/
|
||
int audio_model_predict(const int16_t* audio_data, int audio_length, AudioPredictionResult* result) {
|
||
if (!g_preprocessor.is_initialized) {
|
||
strcpy(g_last_error, "模型未初始化");
|
||
return -1;
|
||
}
|
||
|
||
if (!audio_data || !result || audio_length != AUDIO_BUFFER_SIZE) {
|
||
strcpy(g_last_error, "无效参数");
|
||
return -1;
|
||
}
|
||
|
||
uint32_t start_time = micros();
|
||
|
||
// 添加看门狗喂狗
|
||
#ifdef ARDUINO
|
||
yield();
|
||
#endif
|
||
|
||
// TODO: 集成TensorFlow Lite模型进行真实的音频识别
|
||
// 当前使用audio_model_data.h中的TensorFlow Lite模型数据
|
||
// 需要实现以下步骤:
|
||
// 1. 初始化TensorFlow Lite解释器
|
||
// 2. 加载模型数据 (audio_model_data)
|
||
// 3. 预处理音频数据为模型输入格式
|
||
// 4. 执行推理
|
||
// 5. 解析输出结果
|
||
|
||
// 临时实现:基于音频能量的简单分类,提供更合理的结果
|
||
#ifdef ARDUINO
|
||
Serial.println("警告:当前使用临时实现,等待TensorFlow Lite模型集成");
|
||
#endif
|
||
|
||
// 计算音频能量来做简单的分类判断
|
||
float rms_energy = calculate_rms_energy(audio_data, audio_length);
|
||
|
||
// 添加调试信息:检查音频数据的实际值
|
||
#ifdef ARDUINO
|
||
int non_zero_count = 0;
|
||
int16_t min_val = 32767, max_val = -32768;
|
||
long long sum_abs = 0;
|
||
|
||
for (int i = 0; i < min(100, audio_length); i++) { // 检查前100个样本
|
||
if (audio_data[i] != 0) non_zero_count++;
|
||
if (audio_data[i] < min_val) min_val = audio_data[i];
|
||
if (audio_data[i] > max_val) max_val = audio_data[i];
|
||
sum_abs += abs(audio_data[i]);
|
||
}
|
||
|
||
Serial.printf("音频数据调试: 非零样本=%d/100, 最小值=%d, 最大值=%d, 平均绝对值=%lld\n",
|
||
non_zero_count, min_val, max_val, sum_abs/100);
|
||
#endif
|
||
|
||
// 基于能量水平进行简单分类
|
||
AudioClassType predicted_class;
|
||
float confidence;
|
||
|
||
if (rms_energy > 0.1f) {
|
||
// 高能量:可能是关门声或钥匙声
|
||
if (rms_energy > 0.3f) {
|
||
predicted_class = AUDIO_CLASS_DOOR_CLOSING;
|
||
confidence = 0.75f;
|
||
} else {
|
||
predicted_class = AUDIO_CLASS_KEY_JINGLING;
|
||
confidence = 0.65f;
|
||
}
|
||
} else if (rms_energy > 0.02f) {
|
||
// 中等能量:室内有人
|
||
predicted_class = AUDIO_CLASS_PERSON_PRESENT;
|
||
confidence = 0.70f;
|
||
} else {
|
||
// 低能量:室内无人
|
||
predicted_class = AUDIO_CLASS_PERSON_ABSENT;
|
||
confidence = 0.80f;
|
||
}
|
||
|
||
// 设置结果
|
||
result->predicted_class = predicted_class;
|
||
result->confidence = confidence;
|
||
|
||
// 设置概率分布
|
||
for (int i = 0; i < NUM_CLASSES; i++) {
|
||
if (i == (int)predicted_class) {
|
||
result->class_probabilities[i] = confidence;
|
||
} else {
|
||
result->class_probabilities[i] = (1.0f - confidence) / (NUM_CLASSES - 1);
|
||
}
|
||
}
|
||
|
||
result->is_valid = result->confidence >= g_confidence_threshold;
|
||
result->inference_time_us = micros() - start_time;
|
||
g_last_inference_time = result->inference_time_us;
|
||
|
||
// 更新统计信息
|
||
g_total_predictions++;
|
||
if (result->is_valid) {
|
||
g_successful_predictions++;
|
||
g_total_confidence += result->confidence;
|
||
}
|
||
|
||
#ifdef ARDUINO
|
||
Serial.printf("音频能量: %.4f, 预测类别: %s, 置信度: %.2f\n",
|
||
rms_energy, get_class_name_cn(predicted_class), confidence);
|
||
#endif
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief 音频预测函数(简化版)
|
||
* @param audio_data 输入音频数据指针
|
||
* @param audio_length 音频数据长度
|
||
* @param predicted_class 预测类别输出
|
||
* @param confidence 置信度输出
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int audio_model_predict_simple(const int16_t* audio_data, int audio_length,
|
||
AudioClassType* predicted_class, float* confidence) {
|
||
AudioPredictionResult result;
|
||
int ret = audio_model_predict(audio_data, audio_length, &result);
|
||
if (ret == 0 && result.is_valid) {
|
||
*predicted_class = result.predicted_class;
|
||
*confidence = result.confidence;
|
||
}
|
||
return ret;
|
||
}
|
||
|
||
// ==================== 数据预处理函数 ====================
|
||
|
||
/**
|
||
* @brief 音频数据预处理(完整流程)
|
||
* @param audio_data 原始音频数据 (int16_t格式)
|
||
* @param audio_length 音频长度
|
||
* @param mel_features 输出的Mel特征 (大小为INPUT_SIZE)
|
||
* @return 0: 成功, -1: 失败
|
||
*
|
||
* 预处理步骤:
|
||
* 1. 数据类型转换 (int16_t -> float)
|
||
* 2. 归一化处理 ([-1.0, 1.0])
|
||
* 3. 预加重滤波
|
||
* 4. 加窗处理 (汉宁窗)
|
||
* 5. FFT变换
|
||
* 6. 功率谱计算
|
||
* 7. Mel滤波器组
|
||
* 8. 对数变换
|
||
* 9. 特征归一化
|
||
*/
|
||
/**
|
||
* @brief 将音频数据预处理为Mel频谱图特征(优化版本)
|
||
* @param audio_data 输入音频数据指针
|
||
* @param audio_length 音频数据长度
|
||
* @param mel_features 输出Mel特征数组
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
/**
|
||
* @brief 简化的音频预处理函数,减少内存使用
|
||
* @param audio_data 输入音频数据
|
||
* @param audio_length 音频数据长度
|
||
* @param mel_features 输出特征数组
|
||
* @param feature_count 特征数量
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int preprocess_audio_to_mel_simple(const int16_t* audio_data, int audio_length, float* mel_features, int feature_count) {
|
||
if (!audio_data || !mel_features || audio_length != AUDIO_BUFFER_SIZE || feature_count <= 0) {
|
||
return -1;
|
||
}
|
||
|
||
// 使用更简化的特征提取,减少计算量和内存使用
|
||
const int SIMPLE_FRAMES = feature_count / 4; // 每帧4个特征
|
||
const int FRAME_SIZE = AUDIO_BUFFER_SIZE / SIMPLE_FRAMES;
|
||
|
||
for (int frame = 0; frame < SIMPLE_FRAMES; frame++) {
|
||
int start_idx = frame * FRAME_SIZE;
|
||
int end_idx = (frame + 1) * FRAME_SIZE;
|
||
if (end_idx > audio_length) end_idx = audio_length;
|
||
|
||
// 计算每帧的基本统计特征
|
||
float energy = 0.0f;
|
||
float zero_crossings = 0.0f;
|
||
int16_t prev_sample = 0;
|
||
|
||
for (int i = start_idx; i < end_idx; i++) {
|
||
float sample = (float)audio_data[i] / 32768.0f;
|
||
energy += sample * sample;
|
||
|
||
// 零交叉率计算
|
||
if (i > start_idx &&
|
||
((audio_data[i] >= 0 && prev_sample < 0) ||
|
||
(audio_data[i] < 0 && prev_sample >= 0))) {
|
||
zero_crossings += 1.0f;
|
||
}
|
||
prev_sample = audio_data[i];
|
||
|
||
// 添加看门狗喂狗,防止长时间计算
|
||
#ifdef ARDUINO
|
||
if (i % 1000 == 0) {
|
||
yield(); // ESP32看门狗喂狗
|
||
}
|
||
#endif
|
||
}
|
||
|
||
// 归一化特征
|
||
energy = sqrtf(energy / (end_idx - start_idx));
|
||
zero_crossings = zero_crossings / (end_idx - start_idx);
|
||
|
||
// 为每帧生成4个特征值
|
||
for (int mel = 0; mel < 4; mel++) {
|
||
int feature_idx = frame * 4 + mel;
|
||
if (feature_idx < feature_count) {
|
||
switch (mel) {
|
||
case 0: mel_features[feature_idx] = logf(energy + 1e-10f); break;
|
||
case 1: mel_features[feature_idx] = logf(zero_crossings + 1e-10f); break;
|
||
case 2: mel_features[feature_idx] = energy * zero_crossings; break;
|
||
case 3: mel_features[feature_idx] = energy - zero_crossings; break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 填充剩余特征(如果需要)
|
||
int filled_features = SIMPLE_FRAMES * 4;
|
||
for (int i = filled_features; i < feature_count; i++) {
|
||
mel_features[i] = -10.0f; // 静音值
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
int preprocess_audio_to_mel(const int16_t* audio_data, int audio_length, float* mel_features) {
|
||
if (!audio_data || !mel_features || audio_length != AUDIO_BUFFER_SIZE) {
|
||
return -1;
|
||
}
|
||
|
||
// 使用简化的特征提取,避免复杂的Mel频谱图计算
|
||
// 将音频分成更少的帧来减少计算量
|
||
const int SIMPLE_FRAMES = 8; // 减少帧数
|
||
const int FRAME_SIZE = AUDIO_BUFFER_SIZE / SIMPLE_FRAMES;
|
||
|
||
for (int frame = 0; frame < SIMPLE_FRAMES; frame++) {
|
||
int start_idx = frame * FRAME_SIZE;
|
||
int end_idx = (frame + 1) * FRAME_SIZE;
|
||
if (end_idx > audio_length) end_idx = audio_length;
|
||
|
||
// 计算每帧的基本统计特征
|
||
float energy = 0.0f;
|
||
float zero_crossings = 0.0f;
|
||
int16_t prev_sample = 0;
|
||
|
||
for (int i = start_idx; i < end_idx; i++) {
|
||
float sample = (float)audio_data[i] / 32768.0f;
|
||
energy += sample * sample;
|
||
|
||
// 零交叉率计算
|
||
if (i > start_idx &&
|
||
((audio_data[i] >= 0 && prev_sample < 0) ||
|
||
(audio_data[i] < 0 && prev_sample >= 0))) {
|
||
zero_crossings += 1.0f;
|
||
}
|
||
prev_sample = audio_data[i];
|
||
|
||
// 添加看门狗喂狗,防止长时间计算
|
||
#ifdef ARDUINO
|
||
if (i % 1000 == 0) {
|
||
yield(); // ESP32看门狗喂狗
|
||
}
|
||
#endif
|
||
}
|
||
|
||
// 归一化特征
|
||
energy = sqrtf(energy / (end_idx - start_idx));
|
||
zero_crossings = zero_crossings / (end_idx - start_idx);
|
||
|
||
// 为每帧生成4个特征值(模拟32个Mel频带的简化版本)
|
||
for (int mel = 0; mel < 4; mel++) {
|
||
int feature_idx = frame * 4 + mel;
|
||
if (feature_idx < INPUT_SIZE) {
|
||
switch (mel) {
|
||
case 0: mel_features[feature_idx] = logf(energy + 1e-10f); break;
|
||
case 1: mel_features[feature_idx] = logf(zero_crossings + 1e-10f); break;
|
||
case 2: mel_features[feature_idx] = energy * zero_crossings; break;
|
||
case 3: mel_features[feature_idx] = energy - zero_crossings; break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 填充剩余特征(如果需要)
|
||
int filled_features = SIMPLE_FRAMES * 4;
|
||
for (int i = filled_features; i < INPUT_SIZE; i++) {
|
||
mel_features[i] = -10.0f; // 静音值
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief 音频数据归一化
|
||
* @param audio_data 输入音频数据
|
||
* @param length 数据长度
|
||
* @param normalized_data 归一化后的数据输出
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int normalize_audio_data(const int16_t* audio_data, int length, float* normalized_data);
|
||
|
||
/**
|
||
* @brief 计算Mel频谱图
|
||
* @param audio_float 浮点音频数据
|
||
* @param length 数据长度
|
||
* @param mel_spectrogram 输出的Mel频谱图
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int compute_mel_spectrogram(const float* audio_float, int length, float* mel_spectrogram);
|
||
|
||
/**
|
||
* @brief 应用汉宁窗
|
||
* @param data 输入数据
|
||
* @param length 数据长度
|
||
* @param windowed_data 加窗后的数据
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int apply_hann_window(const float* data, int length, float* windowed_data);
|
||
|
||
// ==================== 辅助函数 ====================
|
||
|
||
/**
|
||
* @brief 简单的FFT实现(仅用于演示)
|
||
* @param real 实部数组
|
||
* @param imag 虚部数组
|
||
* @param n 数据长度(必须是2的幂)
|
||
*/
|
||
void simple_fft(float* real, float* imag, int n) {
|
||
// 位反转
|
||
int j = 0;
|
||
for (int i = 1; i < n; i++) {
|
||
int bit = n >> 1;
|
||
while (j & bit) {
|
||
j ^= bit;
|
||
bit >>= 1;
|
||
}
|
||
j ^= bit;
|
||
if (i < j) {
|
||
float temp = real[i];
|
||
real[i] = real[j];
|
||
real[j] = temp;
|
||
temp = imag[i];
|
||
imag[i] = imag[j];
|
||
imag[j] = temp;
|
||
}
|
||
}
|
||
|
||
// FFT计算
|
||
for (int len = 2; len <= n; len <<= 1) {
|
||
float angle = -2.0f * M_PI / len;
|
||
float wlen_real = cosf(angle);
|
||
float wlen_imag = sinf(angle);
|
||
|
||
for (int i = 0; i < n; i += len) {
|
||
float w_real = 1.0f;
|
||
float w_imag = 0.0f;
|
||
|
||
for (int j = 0; j < len / 2; j++) {
|
||
int u = i + j;
|
||
int v = i + j + len / 2;
|
||
|
||
float u_real = real[u];
|
||
float u_imag = imag[u];
|
||
float v_real = real[v] * w_real - imag[v] * w_imag;
|
||
float v_imag = real[v] * w_imag + imag[v] * w_real;
|
||
|
||
real[u] = u_real + v_real;
|
||
imag[u] = u_imag + v_imag;
|
||
real[v] = u_real - v_real;
|
||
imag[v] = u_imag - v_imag;
|
||
|
||
float temp_real = w_real * wlen_real - w_imag * wlen_imag;
|
||
w_imag = w_real * wlen_imag + w_imag * wlen_real;
|
||
w_real = temp_real;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/**
|
||
* @brief 预处理音频数据为模型输入
|
||
* @param audio_data 原始音频数据
|
||
* @param length 数据长度
|
||
* @param output 输出特征数组
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int preprocess_audio(const int16_t* audio_data, int length, float* output) {
|
||
if (!g_preprocessor.is_initialized || !audio_data || !output) {
|
||
return -1;
|
||
}
|
||
|
||
// 简化的预处理:直接归一化并截取/填充到所需长度
|
||
int copy_length = (length < INPUT_SIZE) ? length : INPUT_SIZE;
|
||
|
||
for (int i = 0; i < copy_length; i++) {
|
||
output[i] = (float)audio_data[i] / 32768.0f; // 归一化到[-1,1]
|
||
}
|
||
|
||
// 如果长度不足,用零填充
|
||
for (int i = copy_length; i < INPUT_SIZE; i++) {
|
||
output[i] = 0.0f;
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief 检测音频活动
|
||
* @param audio_data 音频数据
|
||
* @param length 数据长度
|
||
* @param threshold 能量阈值
|
||
* @return true: 检测到音频活动, false: 静音
|
||
*/
|
||
bool detect_audio_activity(const int16_t* audio_data, int length, float threshold) {
|
||
if (!audio_data || length <= 0) return false;
|
||
|
||
float energy = calculate_rms_energy(audio_data, length);
|
||
return energy > threshold;
|
||
}
|
||
|
||
/**
|
||
* @brief 计算音频RMS能量
|
||
* @param audio_data 音频数据
|
||
* @param length 数据长度
|
||
* @return RMS能量值
|
||
*/
|
||
float calculate_rms_energy(const int16_t* audio_data, int length) {
|
||
if (!audio_data || length <= 0) return 0.0f;
|
||
|
||
float sum = 0.0f;
|
||
for (int i = 0; i < length; i++) {
|
||
float sample = (float)audio_data[i] / 32768.0f; // 归一化到[-1,1]
|
||
sum += sample * sample;
|
||
}
|
||
return sqrtf(sum / length);
|
||
}
|
||
|
||
/**
|
||
* @brief 获取类别名称(英文)
|
||
* @param class_id 类别ID
|
||
* @return 类别名称字符串
|
||
*/
|
||
const char* get_class_name_en(AudioClassType class_id) {
|
||
if (class_id >= 0 && class_id < NUM_CLASSES) {
|
||
return CLASS_NAMES_EN[class_id];
|
||
}
|
||
return "unknown";
|
||
}
|
||
|
||
/**
|
||
* @brief 获取类别名称(中文)
|
||
* @param class_id 类别ID
|
||
* @return 类别名称字符串
|
||
*/
|
||
const char* get_class_name_cn(AudioClassType class_id) {
|
||
if (class_id >= 0 && class_id < NUM_CLASSES) {
|
||
return CLASS_NAMES_CN[class_id];
|
||
}
|
||
return "未知";
|
||
}
|
||
|
||
/**
|
||
* @brief 验证音频数据格式
|
||
* @param audio_data 音频数据指针
|
||
* @param length 数据长度
|
||
* @return true: 格式正确, false: 格式错误
|
||
*/
|
||
bool validate_audio_format(const int16_t* audio_data, int length) {
|
||
if (!audio_data) return false;
|
||
if (length != AUDIO_BUFFER_SIZE) return false;
|
||
return true;
|
||
}
|
||
|
||
/**
|
||
* @brief 打印预测结果
|
||
* @param result 预测结果指针
|
||
*/
|
||
void print_prediction_result(const AudioPredictionResult* result) {
|
||
if (!result) return;
|
||
|
||
printf("预测结果:\n");
|
||
printf(" 类别: %s\n", get_class_name_cn(result->predicted_class));
|
||
printf(" 置信度: %.2f\n", result->confidence);
|
||
printf(" 有效性: %s\n", result->is_valid ? "是" : "否");
|
||
printf(" 推理时间: %u 微秒\n", result->inference_time_us);
|
||
printf(" 各类别概率:\n");
|
||
for (int i = 0; i < NUM_CLASSES; i++) {
|
||
printf(" %s: %.3f\n", get_class_name_cn((AudioClassType)i), result->class_probabilities[i]);
|
||
}
|
||
}
|
||
|
||
// ==================== 性能监控函数 ====================
|
||
|
||
/**
|
||
* @brief 获取上次推理耗时
|
||
* @return 推理时间(微秒)
|
||
*/
|
||
uint32_t get_last_inference_time_us(void) {
|
||
return g_last_inference_time;
|
||
}
|
||
|
||
/**
|
||
* @brief 获取模型内存使用情况
|
||
* @param model_memory 模型占用内存(字节)
|
||
* @param buffer_memory 缓冲区占用内存(字节)
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int get_memory_usage(size_t* model_memory, size_t* buffer_memory) {
|
||
if (!model_memory || !buffer_memory) return -1;
|
||
|
||
*model_memory = get_audio_model_size();
|
||
*buffer_memory = (INPUT_SIZE + N_FFT + N_FFT) * sizeof(float);
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief 获取预测统计信息
|
||
* @param total_predictions 总预测次数
|
||
* @param successful_predictions 成功预测次数
|
||
* @param average_confidence 平均置信度
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int get_prediction_statistics(uint32_t* total_predictions, uint32_t* successful_predictions,
|
||
float* average_confidence) {
|
||
if (!total_predictions || !successful_predictions || !average_confidence) return -1;
|
||
|
||
*total_predictions = g_total_predictions;
|
||
*successful_predictions = g_successful_predictions;
|
||
*average_confidence = (g_successful_predictions > 0) ?
|
||
(g_total_confidence / g_successful_predictions) : 0.0f;
|
||
return 0;
|
||
}
|
||
|
||
// ==================== 配置函数 ====================
|
||
|
||
/**
|
||
* @brief 设置置信度阈值
|
||
* @param threshold 新的阈值 (0.0 - 1.0)
|
||
* @return 0: 成功, -1: 失败
|
||
*/
|
||
int set_confidence_threshold(float threshold) {
|
||
if (threshold < 0.0f || threshold > 1.0f) {
|
||
strcpy(g_last_error, "置信度阈值必须在0.0-1.0之间");
|
||
return -1;
|
||
}
|
||
g_confidence_threshold = threshold;
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief 获取当前置信度阈值
|
||
* @return 当前阈值
|
||
*/
|
||
float get_confidence_threshold(void) {
|
||
return g_confidence_threshold;
|
||
}
|
||
|
||
/**
|
||
* @brief 启用/禁用调试模式
|
||
* @param enable true: 启用, false: 禁用
|
||
*/
|
||
void set_debug_mode(bool enable) {
|
||
g_debug_mode = enable;
|
||
}
|
||
|
||
/**
|
||
* @brief 检查调试模式状态
|
||
* @return true: 已启用, false: 已禁用
|
||
*/
|
||
bool is_debug_mode_enabled(void) {
|
||
return g_debug_mode;
|
||
}
|
||
|
||
// ==================== 错误处理 ====================
|
||
|
||
/**
|
||
* @brief 获取最后一次错误信息
|
||
* @return 错误信息字符串
|
||
*/
|
||
const char* get_last_error_message(void) {
|
||
return g_last_error;
|
||
}
|
||
|
||
/**
|
||
* @brief 清除错误状态
|
||
*/
|
||
void clear_error_status(void) {
|
||
strcpy(g_last_error, "");
|
||
}
|
||
|
||
// ==================== 模型数据访问函数 ====================
|
||
|
||
/**
|
||
* @brief 获取模型数据指针
|
||
* @return 模型数据指针
|
||
*/
|
||
const unsigned char* get_audio_model_data(void) {
|
||
return audio_model_data;
|
||
}
|
||
|
||
/**
|
||
* @brief 获取模型数据大小
|
||
* @return 模型数据大小(字节)
|
||
*/
|
||
size_t get_audio_model_size(void) {
|
||
return AUDIO_MODEL_SIZE;
|
||
}
|
||
|
||
// ==================== 使用示例 ====================
|
||
/*
|
||
使用示例代码:
|
||
|
||
#include "audio_model_esp32.h"
|
||
|
||
void example_usage() {
|
||
// 1. 初始化模型
|
||
if (audio_model_init() != 0) {
|
||
printf("模型初始化失败\n");
|
||
return;
|
||
}
|
||
|
||
// 2. 准备音频数据
|
||
int16_t audio_buffer[AUDIO_BUFFER_SIZE];
|
||
// ... 从麦克风或其他源获取音频数据 ...
|
||
|
||
// 3. 验证音频格式
|
||
if (!validate_audio_format(audio_buffer, AUDIO_BUFFER_SIZE)) {
|
||
printf("音频格式不正确\n");
|
||
return;
|
||
}
|
||
|
||
// 4. 进行预测
|
||
AudioPredictionResult result;
|
||
if (audio_model_predict(audio_buffer, AUDIO_BUFFER_SIZE, &result) == 0) {
|
||
// 5. 处理预测结果
|
||
if (result.is_valid && result.confidence > 0.6f) {
|
||
printf("预测类别: %s (置信度: %.2f)\n",
|
||
get_class_name_cn(result.predicted_class),
|
||
result.confidence);
|
||
}
|
||
}
|
||
|
||
// 6. 清理资源
|
||
audio_model_cleanup();
|
||
}
|
||
|
||
音频数据获取示例(ESP32 I2S):
|
||
*/
|
||
#include "driver/i2s.h"
|
||
|
||
void get_audio_data(int16_t* buffer, size_t buffer_size) {
|
||
size_t bytes_read;
|
||
i2s_read(I2S_NUM_0, buffer, buffer_size * sizeof(int16_t), &bytes_read, portMAX_DELAY);
|
||
}
|
||
|
||
#ifdef __cplusplus
|
||
}
|
||
#endif
|
||
|
||
#endif // AUDIO_MODEL_ESP32_H
|