Files
Dorm-Air-Conditioner-Smart-…/audio_model_esp32.h
2025-09-24 12:42:13 +08:00

841 lines
26 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef AUDIO_MODEL_ESP32_H
#define AUDIO_MODEL_ESP32_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdbool.h>
#include <math.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
// 添加Arduino相关头文件
#ifdef ARDUINO
#include <Arduino.h>
#else
#include <stdint.h>
#include <stdbool.h>
#endif
#include "audio_model_data.h"
// ==================== 模型配置参数 ====================
#define SAMPLE_RATE 16000 // 音频采样率 (Hz)
#define AUDIO_DURATION_MS 2000 // 音频片段时长 (毫秒)
#define N_MELS 32 // Mel频谱图频率bins数量
#define N_FFT 1024 // FFT窗口大小
#define HOP_LENGTH 512 // 跳跃长度
#define NUM_CLASSES 4 // 分类数量
// 计算得出的参数
#define AUDIO_BUFFER_SIZE (SAMPLE_RATE * AUDIO_DURATION_MS / 1000) // 32000 samples
#define MEL_FRAMES ((AUDIO_BUFFER_SIZE - N_FFT) / HOP_LENGTH + 1) // 约63帧
#define INPUT_SIZE (MEL_FRAMES * N_MELS) // 输入特征大小
// 预处理参数
#define MEL_FMIN 0.0f // Mel滤波器最低频率
#define MEL_FMAX 8000.0f // Mel滤波器最高频率
#define WINDOW_TYPE_HANN 1 // 海宁窗
#define ENERGY_THRESHOLD 0.01f // 音频活动检测阈值
#define CONFIDENCE_THRESHOLD 0.6f // 预测置信度阈值
// ==================== 数据结构定义 ====================
// 音频分类枚举
typedef enum {
AUDIO_CLASS_PERSON_PRESENT = 0, // 室内有人
AUDIO_CLASS_DOOR_CLOSING = 1, // 关门
AUDIO_CLASS_KEY_JINGLING = 2, // 钥匙弹子声
AUDIO_CLASS_PERSON_ABSENT = 3 // 室内无人
} AudioClassType;
// 预测结果结构体
typedef struct {
AudioClassType predicted_class; // 预测的类别
float confidence; // 最高置信度 (0.0 - 1.0)
float class_probabilities[NUM_CLASSES]; // 各类别概率
bool is_valid; // 预测结果是否有效
uint32_t inference_time_us; // 推理耗时(微秒)
} AudioPredictionResult;
// 音频预处理状态
typedef struct {
float* mel_buffer; // Mel特征缓冲区
float* fft_buffer; // FFT计算缓冲区
float* window_buffer; // 窗函数缓冲区
bool is_initialized; // 是否已初始化
} AudioPreprocessor;
// ==================== 全局变量定义 ====================
static AudioPreprocessor g_preprocessor = {0};
static float g_confidence_threshold = CONFIDENCE_THRESHOLD;
static bool g_debug_mode = false;
static char g_last_error[256] = {0};
static uint32_t g_last_inference_time = 0;
static uint32_t g_total_predictions = 0;
static uint32_t g_successful_predictions = 0;
static float g_total_confidence = 0.0f;
// ==================== 常量定义 ====================
static const char* CLASS_NAMES_EN[NUM_CLASSES] = {
"person_present", // 室内有人
"door_closing", // 关门
"key_jingling", // 钥匙弹子声
"person_absent" // 室内无人
};
static const char* CLASS_NAMES_CN[NUM_CLASSES] = {
"室内有人",
"关门声",
"钥匙声",
"室内无人"
};
// ==================== 函数声明 ====================
int preprocess_audio_to_mel(const int16_t* audio_data, int audio_length, float* mel_features);
int preprocess_audio_to_mel_simple(const int16_t* audio_data, int audio_length, float* mel_features, int feature_count);
float calculate_rms_energy(const int16_t* audio_data, int length);
void audio_model_cleanup(void);
const unsigned char* get_audio_model_data(void);
size_t get_audio_model_size(void);
const char* get_class_name_en(AudioClassType class_id);
const char* get_class_name_cn(AudioClassType class_id);
// ==================== 核心API函数 ====================
/**
* @brief 初始化音频模型
* @return 0: 成功, -1: 失败
* @note 必须在使用其他函数前调用
*/
int audio_model_init(void) {
if (g_preprocessor.is_initialized) {
return 0; // 已经初始化
}
// 分配内存缓冲区
g_preprocessor.mel_buffer = (float*)malloc(INPUT_SIZE * sizeof(float));
g_preprocessor.fft_buffer = (float*)malloc(N_FFT * sizeof(float));
g_preprocessor.window_buffer = (float*)malloc(N_FFT * sizeof(float));
if (!g_preprocessor.mel_buffer || !g_preprocessor.fft_buffer || !g_preprocessor.window_buffer) {
strcpy(g_last_error, "内存分配失败");
audio_model_cleanup();
return -1;
}
// 预计算海宁窗
for (int i = 0; i < N_FFT; i++) {
g_preprocessor.window_buffer[i] = 0.5f * (1.0f - cosf(2.0f * M_PI * i / (N_FFT - 1)));
}
g_preprocessor.is_initialized = true;
strcpy(g_last_error, "");
return 0;
}
/**
* @brief 清理音频模型资源
* @note 程序结束时调用,释放内存
*/
void audio_model_cleanup(void) {
if (g_preprocessor.mel_buffer) {
free(g_preprocessor.mel_buffer);
g_preprocessor.mel_buffer = NULL;
}
if (g_preprocessor.fft_buffer) {
free(g_preprocessor.fft_buffer);
g_preprocessor.fft_buffer = NULL;
}
if (g_preprocessor.window_buffer) {
free(g_preprocessor.window_buffer);
g_preprocessor.window_buffer = NULL;
}
g_preprocessor.is_initialized = false;
}
/**
* @brief 音频预测函数(完整版)
* @param audio_data 输入音频数据指针
* @param audio_length 音频数据长度(样本数)
* @param result 预测结果输出指针
* @return 0: 成功, -1: 失败
*
* 音频数据格式要求:
* - 数据类型: int16_t (16位有符号整数)
* - 采样率: 16000 Hz
* - 声道数: 单声道
* - 数据长度: 32000 samples (2秒)
* - 数值范围: -32768 到 32767
* - 字节序: 小端序(Little Endian)
*
* 示例调用:
* int16_t audio_buffer[AUDIO_BUFFER_SIZE];
* AudioPredictionResult result;
* // ... 填充audio_buffer ...
* int ret = audio_model_predict(audio_buffer, AUDIO_BUFFER_SIZE, &result);
*/
int audio_model_predict(const int16_t* audio_data, int audio_length, AudioPredictionResult* result) {
if (!g_preprocessor.is_initialized) {
strcpy(g_last_error, "模型未初始化");
return -1;
}
if (!audio_data || !result || audio_length != AUDIO_BUFFER_SIZE) {
strcpy(g_last_error, "无效参数");
return -1;
}
uint32_t start_time = micros();
// 添加看门狗喂狗
#ifdef ARDUINO
yield();
#endif
// TODO: 集成TensorFlow Lite模型进行真实的音频识别
// 当前使用audio_model_data.h中的TensorFlow Lite模型数据
// 需要实现以下步骤:
// 1. 初始化TensorFlow Lite解释器
// 2. 加载模型数据 (audio_model_data)
// 3. 预处理音频数据为模型输入格式
// 4. 执行推理
// 5. 解析输出结果
// 临时实现:基于音频能量的简单分类,提供更合理的结果
#ifdef ARDUINO
Serial.println("警告当前使用临时实现等待TensorFlow Lite模型集成");
#endif
// 计算音频能量来做简单的分类判断
float rms_energy = calculate_rms_energy(audio_data, audio_length);
// 添加调试信息:检查音频数据的实际值
#ifdef ARDUINO
int non_zero_count = 0;
int16_t min_val = 32767, max_val = -32768;
long long sum_abs = 0;
for (int i = 0; i < min(100, audio_length); i++) { // 检查前100个样本
if (audio_data[i] != 0) non_zero_count++;
if (audio_data[i] < min_val) min_val = audio_data[i];
if (audio_data[i] > max_val) max_val = audio_data[i];
sum_abs += abs(audio_data[i]);
}
Serial.printf("音频数据调试: 非零样本=%d/100, 最小值=%d, 最大值=%d, 平均绝对值=%lld\n",
non_zero_count, min_val, max_val, sum_abs/100);
#endif
// 基于能量水平进行简单分类
AudioClassType predicted_class;
float confidence;
if (rms_energy > 0.1f) {
// 高能量:可能是关门声或钥匙声
if (rms_energy > 0.3f) {
predicted_class = AUDIO_CLASS_DOOR_CLOSING;
confidence = 0.75f;
} else {
predicted_class = AUDIO_CLASS_KEY_JINGLING;
confidence = 0.65f;
}
} else if (rms_energy > 0.02f) {
// 中等能量:室内有人
predicted_class = AUDIO_CLASS_PERSON_PRESENT;
confidence = 0.70f;
} else {
// 低能量:室内无人
predicted_class = AUDIO_CLASS_PERSON_ABSENT;
confidence = 0.80f;
}
// 设置结果
result->predicted_class = predicted_class;
result->confidence = confidence;
// 设置概率分布
for (int i = 0; i < NUM_CLASSES; i++) {
if (i == (int)predicted_class) {
result->class_probabilities[i] = confidence;
} else {
result->class_probabilities[i] = (1.0f - confidence) / (NUM_CLASSES - 1);
}
}
result->is_valid = result->confidence >= g_confidence_threshold;
result->inference_time_us = micros() - start_time;
g_last_inference_time = result->inference_time_us;
// 更新统计信息
g_total_predictions++;
if (result->is_valid) {
g_successful_predictions++;
g_total_confidence += result->confidence;
}
#ifdef ARDUINO
Serial.printf("音频能量: %.4f, 预测类别: %s, 置信度: %.2f\n",
rms_energy, get_class_name_cn(predicted_class), confidence);
#endif
return 0;
}
/**
* @brief 音频预测函数(简化版)
* @param audio_data 输入音频数据指针
* @param audio_length 音频数据长度
* @param predicted_class 预测类别输出
* @param confidence 置信度输出
* @return 0: 成功, -1: 失败
*/
int audio_model_predict_simple(const int16_t* audio_data, int audio_length,
AudioClassType* predicted_class, float* confidence) {
AudioPredictionResult result;
int ret = audio_model_predict(audio_data, audio_length, &result);
if (ret == 0 && result.is_valid) {
*predicted_class = result.predicted_class;
*confidence = result.confidence;
}
return ret;
}
// ==================== 数据预处理函数 ====================
/**
* @brief 音频数据预处理(完整流程)
* @param audio_data 原始音频数据 (int16_t格式)
* @param audio_length 音频长度
* @param mel_features 输出的Mel特征 (大小为INPUT_SIZE)
* @return 0: 成功, -1: 失败
*
* 预处理步骤:
* 1. 数据类型转换 (int16_t -> float)
* 2. 归一化处理 ([-1.0, 1.0])
* 3. 预加重滤波
* 4. 加窗处理 (汉宁窗)
* 5. FFT变换
* 6. 功率谱计算
* 7. Mel滤波器组
* 8. 对数变换
* 9. 特征归一化
*/
/**
* @brief 将音频数据预处理为Mel频谱图特征优化版本
* @param audio_data 输入音频数据指针
* @param audio_length 音频数据长度
* @param mel_features 输出Mel特征数组
* @return 0: 成功, -1: 失败
*/
/**
* @brief 简化的音频预处理函数,减少内存使用
* @param audio_data 输入音频数据
* @param audio_length 音频数据长度
* @param mel_features 输出特征数组
* @param feature_count 特征数量
* @return 0: 成功, -1: 失败
*/
int preprocess_audio_to_mel_simple(const int16_t* audio_data, int audio_length, float* mel_features, int feature_count) {
if (!audio_data || !mel_features || audio_length != AUDIO_BUFFER_SIZE || feature_count <= 0) {
return -1;
}
// 使用更简化的特征提取,减少计算量和内存使用
const int SIMPLE_FRAMES = feature_count / 4; // 每帧4个特征
const int FRAME_SIZE = AUDIO_BUFFER_SIZE / SIMPLE_FRAMES;
for (int frame = 0; frame < SIMPLE_FRAMES; frame++) {
int start_idx = frame * FRAME_SIZE;
int end_idx = (frame + 1) * FRAME_SIZE;
if (end_idx > audio_length) end_idx = audio_length;
// 计算每帧的基本统计特征
float energy = 0.0f;
float zero_crossings = 0.0f;
int16_t prev_sample = 0;
for (int i = start_idx; i < end_idx; i++) {
float sample = (float)audio_data[i] / 32768.0f;
energy += sample * sample;
// 零交叉率计算
if (i > start_idx &&
((audio_data[i] >= 0 && prev_sample < 0) ||
(audio_data[i] < 0 && prev_sample >= 0))) {
zero_crossings += 1.0f;
}
prev_sample = audio_data[i];
// 添加看门狗喂狗,防止长时间计算
#ifdef ARDUINO
if (i % 1000 == 0) {
yield(); // ESP32看门狗喂狗
}
#endif
}
// 归一化特征
energy = sqrtf(energy / (end_idx - start_idx));
zero_crossings = zero_crossings / (end_idx - start_idx);
// 为每帧生成4个特征值
for (int mel = 0; mel < 4; mel++) {
int feature_idx = frame * 4 + mel;
if (feature_idx < feature_count) {
switch (mel) {
case 0: mel_features[feature_idx] = logf(energy + 1e-10f); break;
case 1: mel_features[feature_idx] = logf(zero_crossings + 1e-10f); break;
case 2: mel_features[feature_idx] = energy * zero_crossings; break;
case 3: mel_features[feature_idx] = energy - zero_crossings; break;
}
}
}
}
// 填充剩余特征(如果需要)
int filled_features = SIMPLE_FRAMES * 4;
for (int i = filled_features; i < feature_count; i++) {
mel_features[i] = -10.0f; // 静音值
}
return 0;
}
int preprocess_audio_to_mel(const int16_t* audio_data, int audio_length, float* mel_features) {
if (!audio_data || !mel_features || audio_length != AUDIO_BUFFER_SIZE) {
return -1;
}
// 使用简化的特征提取避免复杂的Mel频谱图计算
// 将音频分成更少的帧来减少计算量
const int SIMPLE_FRAMES = 8; // 减少帧数
const int FRAME_SIZE = AUDIO_BUFFER_SIZE / SIMPLE_FRAMES;
for (int frame = 0; frame < SIMPLE_FRAMES; frame++) {
int start_idx = frame * FRAME_SIZE;
int end_idx = (frame + 1) * FRAME_SIZE;
if (end_idx > audio_length) end_idx = audio_length;
// 计算每帧的基本统计特征
float energy = 0.0f;
float zero_crossings = 0.0f;
int16_t prev_sample = 0;
for (int i = start_idx; i < end_idx; i++) {
float sample = (float)audio_data[i] / 32768.0f;
energy += sample * sample;
// 零交叉率计算
if (i > start_idx &&
((audio_data[i] >= 0 && prev_sample < 0) ||
(audio_data[i] < 0 && prev_sample >= 0))) {
zero_crossings += 1.0f;
}
prev_sample = audio_data[i];
// 添加看门狗喂狗,防止长时间计算
#ifdef ARDUINO
if (i % 1000 == 0) {
yield(); // ESP32看门狗喂狗
}
#endif
}
// 归一化特征
energy = sqrtf(energy / (end_idx - start_idx));
zero_crossings = zero_crossings / (end_idx - start_idx);
// 为每帧生成4个特征值模拟32个Mel频带的简化版本
for (int mel = 0; mel < 4; mel++) {
int feature_idx = frame * 4 + mel;
if (feature_idx < INPUT_SIZE) {
switch (mel) {
case 0: mel_features[feature_idx] = logf(energy + 1e-10f); break;
case 1: mel_features[feature_idx] = logf(zero_crossings + 1e-10f); break;
case 2: mel_features[feature_idx] = energy * zero_crossings; break;
case 3: mel_features[feature_idx] = energy - zero_crossings; break;
}
}
}
}
// 填充剩余特征(如果需要)
int filled_features = SIMPLE_FRAMES * 4;
for (int i = filled_features; i < INPUT_SIZE; i++) {
mel_features[i] = -10.0f; // 静音值
}
return 0;
}
/**
* @brief 音频数据归一化
* @param audio_data 输入音频数据
* @param length 数据长度
* @param normalized_data 归一化后的数据输出
* @return 0: 成功, -1: 失败
*/
int normalize_audio_data(const int16_t* audio_data, int length, float* normalized_data);
/**
* @brief 计算Mel频谱图
* @param audio_float 浮点音频数据
* @param length 数据长度
* @param mel_spectrogram 输出的Mel频谱图
* @return 0: 成功, -1: 失败
*/
int compute_mel_spectrogram(const float* audio_float, int length, float* mel_spectrogram);
/**
* @brief 应用汉宁窗
* @param data 输入数据
* @param length 数据长度
* @param windowed_data 加窗后的数据
* @return 0: 成功, -1: 失败
*/
int apply_hann_window(const float* data, int length, float* windowed_data);
// ==================== 辅助函数 ====================
/**
* @brief 简单的FFT实现仅用于演示
* @param real 实部数组
* @param imag 虚部数组
* @param n 数据长度必须是2的幂
*/
void simple_fft(float* real, float* imag, int n) {
// 位反转
int j = 0;
for (int i = 1; i < n; i++) {
int bit = n >> 1;
while (j & bit) {
j ^= bit;
bit >>= 1;
}
j ^= bit;
if (i < j) {
float temp = real[i];
real[i] = real[j];
real[j] = temp;
temp = imag[i];
imag[i] = imag[j];
imag[j] = temp;
}
}
// FFT计算
for (int len = 2; len <= n; len <<= 1) {
float angle = -2.0f * M_PI / len;
float wlen_real = cosf(angle);
float wlen_imag = sinf(angle);
for (int i = 0; i < n; i += len) {
float w_real = 1.0f;
float w_imag = 0.0f;
for (int j = 0; j < len / 2; j++) {
int u = i + j;
int v = i + j + len / 2;
float u_real = real[u];
float u_imag = imag[u];
float v_real = real[v] * w_real - imag[v] * w_imag;
float v_imag = real[v] * w_imag + imag[v] * w_real;
real[u] = u_real + v_real;
imag[u] = u_imag + v_imag;
real[v] = u_real - v_real;
imag[v] = u_imag - v_imag;
float temp_real = w_real * wlen_real - w_imag * wlen_imag;
w_imag = w_real * wlen_imag + w_imag * wlen_real;
w_real = temp_real;
}
}
}
}
/**
* @brief 预处理音频数据为模型输入
* @param audio_data 原始音频数据
* @param length 数据长度
* @param output 输出特征数组
* @return 0: 成功, -1: 失败
*/
int preprocess_audio(const int16_t* audio_data, int length, float* output) {
if (!g_preprocessor.is_initialized || !audio_data || !output) {
return -1;
}
// 简化的预处理:直接归一化并截取/填充到所需长度
int copy_length = (length < INPUT_SIZE) ? length : INPUT_SIZE;
for (int i = 0; i < copy_length; i++) {
output[i] = (float)audio_data[i] / 32768.0f; // 归一化到[-1,1]
}
// 如果长度不足,用零填充
for (int i = copy_length; i < INPUT_SIZE; i++) {
output[i] = 0.0f;
}
return 0;
}
/**
* @brief 检测音频活动
* @param audio_data 音频数据
* @param length 数据长度
* @param threshold 能量阈值
* @return true: 检测到音频活动, false: 静音
*/
bool detect_audio_activity(const int16_t* audio_data, int length, float threshold) {
if (!audio_data || length <= 0) return false;
float energy = calculate_rms_energy(audio_data, length);
return energy > threshold;
}
/**
* @brief 计算音频RMS能量
* @param audio_data 音频数据
* @param length 数据长度
* @return RMS能量值
*/
float calculate_rms_energy(const int16_t* audio_data, int length) {
if (!audio_data || length <= 0) return 0.0f;
float sum = 0.0f;
for (int i = 0; i < length; i++) {
float sample = (float)audio_data[i] / 32768.0f; // 归一化到[-1,1]
sum += sample * sample;
}
return sqrtf(sum / length);
}
/**
* @brief 获取类别名称(英文)
* @param class_id 类别ID
* @return 类别名称字符串
*/
const char* get_class_name_en(AudioClassType class_id) {
if (class_id >= 0 && class_id < NUM_CLASSES) {
return CLASS_NAMES_EN[class_id];
}
return "unknown";
}
/**
* @brief 获取类别名称(中文)
* @param class_id 类别ID
* @return 类别名称字符串
*/
const char* get_class_name_cn(AudioClassType class_id) {
if (class_id >= 0 && class_id < NUM_CLASSES) {
return CLASS_NAMES_CN[class_id];
}
return "未知";
}
/**
* @brief 验证音频数据格式
* @param audio_data 音频数据指针
* @param length 数据长度
* @return true: 格式正确, false: 格式错误
*/
bool validate_audio_format(const int16_t* audio_data, int length) {
if (!audio_data) return false;
if (length != AUDIO_BUFFER_SIZE) return false;
return true;
}
/**
* @brief 打印预测结果
* @param result 预测结果指针
*/
void print_prediction_result(const AudioPredictionResult* result) {
if (!result) return;
printf("预测结果:\n");
printf(" 类别: %s\n", get_class_name_cn(result->predicted_class));
printf(" 置信度: %.2f\n", result->confidence);
printf(" 有效性: %s\n", result->is_valid ? "" : "");
printf(" 推理时间: %u 微秒\n", result->inference_time_us);
printf(" 各类别概率:\n");
for (int i = 0; i < NUM_CLASSES; i++) {
printf(" %s: %.3f\n", get_class_name_cn((AudioClassType)i), result->class_probabilities[i]);
}
}
// ==================== 性能监控函数 ====================
/**
* @brief 获取上次推理耗时
* @return 推理时间(微秒)
*/
uint32_t get_last_inference_time_us(void) {
return g_last_inference_time;
}
/**
* @brief 获取模型内存使用情况
* @param model_memory 模型占用内存(字节)
* @param buffer_memory 缓冲区占用内存(字节)
* @return 0: 成功, -1: 失败
*/
int get_memory_usage(size_t* model_memory, size_t* buffer_memory) {
if (!model_memory || !buffer_memory) return -1;
*model_memory = get_audio_model_size();
*buffer_memory = (INPUT_SIZE + N_FFT + N_FFT) * sizeof(float);
return 0;
}
/**
* @brief 获取预测统计信息
* @param total_predictions 总预测次数
* @param successful_predictions 成功预测次数
* @param average_confidence 平均置信度
* @return 0: 成功, -1: 失败
*/
int get_prediction_statistics(uint32_t* total_predictions, uint32_t* successful_predictions,
float* average_confidence) {
if (!total_predictions || !successful_predictions || !average_confidence) return -1;
*total_predictions = g_total_predictions;
*successful_predictions = g_successful_predictions;
*average_confidence = (g_successful_predictions > 0) ?
(g_total_confidence / g_successful_predictions) : 0.0f;
return 0;
}
// ==================== 配置函数 ====================
/**
* @brief 设置置信度阈值
* @param threshold 新的阈值 (0.0 - 1.0)
* @return 0: 成功, -1: 失败
*/
int set_confidence_threshold(float threshold) {
if (threshold < 0.0f || threshold > 1.0f) {
strcpy(g_last_error, "置信度阈值必须在0.0-1.0之间");
return -1;
}
g_confidence_threshold = threshold;
return 0;
}
/**
* @brief 获取当前置信度阈值
* @return 当前阈值
*/
float get_confidence_threshold(void) {
return g_confidence_threshold;
}
/**
* @brief 启用/禁用调试模式
* @param enable true: 启用, false: 禁用
*/
void set_debug_mode(bool enable) {
g_debug_mode = enable;
}
/**
* @brief 检查调试模式状态
* @return true: 已启用, false: 已禁用
*/
bool is_debug_mode_enabled(void) {
return g_debug_mode;
}
// ==================== 错误处理 ====================
/**
* @brief 获取最后一次错误信息
* @return 错误信息字符串
*/
const char* get_last_error_message(void) {
return g_last_error;
}
/**
* @brief 清除错误状态
*/
void clear_error_status(void) {
strcpy(g_last_error, "");
}
// ==================== 模型数据访问函数 ====================
/**
* @brief 获取模型数据指针
* @return 模型数据指针
*/
const unsigned char* get_audio_model_data(void) {
return audio_model_data;
}
/**
* @brief 获取模型数据大小
* @return 模型数据大小(字节)
*/
size_t get_audio_model_size(void) {
return AUDIO_MODEL_SIZE;
}
// ==================== 使用示例 ====================
/*
使用示例代码:
#include "audio_model_esp32.h"
void example_usage() {
// 1. 初始化模型
if (audio_model_init() != 0) {
printf("模型初始化失败\n");
return;
}
// 2. 准备音频数据
int16_t audio_buffer[AUDIO_BUFFER_SIZE];
// ... 从麦克风或其他源获取音频数据 ...
// 3. 验证音频格式
if (!validate_audio_format(audio_buffer, AUDIO_BUFFER_SIZE)) {
printf("音频格式不正确\n");
return;
}
// 4. 进行预测
AudioPredictionResult result;
if (audio_model_predict(audio_buffer, AUDIO_BUFFER_SIZE, &result) == 0) {
// 5. 处理预测结果
if (result.is_valid && result.confidence > 0.6f) {
printf("预测类别: %s (置信度: %.2f)\n",
get_class_name_cn(result.predicted_class),
result.confidence);
}
}
// 6. 清理资源
audio_model_cleanup();
}
音频数据获取示例ESP32 I2S
*/
#include "driver/i2s.h"
void get_audio_data(int16_t* buffer, size_t buffer_size) {
size_t bytes_read;
i2s_read(I2S_NUM_0, buffer, buffer_size * sizeof(int16_t), &bytes_read, portMAX_DELAY);
}
#ifdef __cplusplus
}
#endif
#endif // AUDIO_MODEL_ESP32_H