【Follow me第二季第4期】基于 Edge Impulse 的语音识别
[复制链接]
本帖最后由 eew_uscYT9 于 2024-12-28 23:23 编辑
本项目教你如何做出自己的语音识别
本项目的大体流程
我的语音识别是识别开灯和关灯两个声音,听到对应的声音就对板子上的灯进行开和关
https://studio.edgeimpulse.com/ 先去这个网站注册好账号
然后创建一个新的项目
输入项目名字,其他保存不动
接着到了数据的采集,我通过对Edge Impulse的文档查阅,发现可以用Arduino Nano主板进行数据的采集,其他的数据采集方式有手机、Edge Impulse CLI(该方式比较复杂)
先下载Edge Impulse准备好的固件
然后把该固件下载到nano板子上
接着点左边的data acquisition
点击下面所指的图标就能进行数据的采集,数据分为训练数据集和测试数据集,一般是8、2开,
数据采集完之后点击creat impulse进行训练的设置,如下面所示,设置完之后点击save impulse
点击mfcc进行参数的设置,直接默认设置,然后保存参数
然后点击上方的generate features进行生成特征 ,根据feature explorer能够看出我的数据区分度还是挺高的
接着点击classifier 进行训练,参数都可以用默认设置
点击model testing进行模型测试 我的模型训练还是可以的
接下来进行模型的部署,我们现在arduino,然后进行build,把生成的文件下载下来,打开arduino ide进行添加
我的代码如下
/* Edge Impulse ingestion SDK
* Copyright (c) 2022 EdgeImpulse Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
// If your target is limited in memory remove this macro to save 10K RAM
#define EIDSP_QUANTIZE_FILTERBANK 0
/*
** NOTE: If you run into TFLite arena allocation issue.
**
** This may be due to may dynamic memory fragmentation.
** Try defining "-DEI_CLASSIFIER_ALLOCATION_STATIC" in boards.local.txt (create
** if it doesn't exist) and copy this file to
** `<ARDUINO_CORE_INSTALL_PATH>/arduino/hardware/<mbed_core>/<core_version>/`.
**
** See
** (https://support.arduino.cc/hc/en-us/articles/360012076960-Where-are-the-installed-cores-located-)
** to find where Arduino installs cores on your machine.
**
** If the problem persists then there's not enough memory for this model and application.
*/
/* Includes ---------------------------------------------------------------- */
#include <rp2040_inferencing.h>
#include <PDM.h>
#include "WiFiNINA.h"
#define led1 LEDB
#define led2 LEDG
#define led3 LEDR
/** Audio buffers, pointers and selectors */
typedef struct {
int16_t *buffer;
uint8_t buf_ready;
uint32_t buf_count;
uint32_t n_samples;
} inference_t;
static inference_t inference;
static signed short sampleBuffer[2048];
static bool debug_nn = false; // Set this to true to see e.g. features generated from the raw signal
static volatile bool record_ready = false;
/**
* [url=home.php?mod=space&uid=159083]@brief[/url] Arduino setup function
*/
void setup()
{
// put your setup code here, to run once:
Serial.begin(115200);
pinMode(led3, OUTPUT);
pinMode(led2, OUTPUT);
// comment out the below line to cancel the wait for USB connection (needed for native USB)
while (!Serial);
Serial.println("Edge Impulse Inferencing Demo");
// summary of inferencing settings (from model_metadata.h)
ei_printf("Inferencing settings:\n");
ei_printf("\tInterval: ");
ei_printf_float((float)EI_CLASSIFIER_INTERVAL_MS);
ei_printf(" ms.\n");
ei_printf("\tFrame size: %d\n", EI_CLASSIFIER_DSP_INPUT_FRAME_SIZE);
ei_printf("\tSample length: %d ms.\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT / 16);
ei_printf("\tNo. of classes: %d\n", sizeof(ei_classifier_inferencing_categories) / sizeof(ei_classifier_inferencing_categories[0]));
if (microphone_inference_start(EI_CLASSIFIER_RAW_SAMPLE_COUNT) == false) {
ei_printf("ERR: Could not allocate audio buffer (size %d), this could be due to the window length of your model\r\n", EI_CLASSIFIER_RAW_SAMPLE_COUNT);
return;
}
}
/**
* @brief Arduino main function. Runs the inferencing loop.
*/
void loop()
{
ei_printf("Starting inferencing in 2 seconds...\n");
delay(2000);
ei_printf("Recording...\n");
bool m = microphone_inference_record();
if (!m) {
ei_printf("ERR: Failed to record audio...\n");
return;
}
ei_printf("Recording done\n");
signal_t signal;
signal.total_length = EI_CLASSIFIER_RAW_SAMPLE_COUNT;
signal.get_data = µphone_audio_signal_get_data;
ei_impulse_result_t result = { 0 };
EI_IMPULSE_ERROR res = run_classifier_continuous(&signal, &result, debug_nn);
if (res != EI_IMPULSE_OK) {
ei_printf("ERR: Failed to run classifier (%d)\n", res);
return;
}
// print inference return code
ei_printf("run_classifier returned: %d\r\n", res);
print_inference_result(result);
//ei_printf(" %s: ", ei_classifier_inferencing_categories[i]);
}
/**
* @brief PDM buffer full callback
* Copy audio data to app buffers
*/
static void pdm_data_ready_inference_callback(void)
{
int bytesAvailable = PDM.available();
// read into the sample buffer
int bytesRead = PDM.read((char *)&sampleBuffer[0], bytesAvailable);
if ((inference.buf_ready == 0) && (record_ready == true)) {
for(int i = 0; i < bytesRead>>1; i++) {
inference.buffer[inference.buf_count++] = sampleBuffer[i];
if(inference.buf_count >= inference.n_samples) {
inference.buf_count = 0;
inference.buf_ready = 1;
break;
}
}
}
}
/**
* @brief Init inferencing struct and setup/start PDM
*
* @param[in] n_samples The n samples
*
* [url=home.php?mod=space&uid=784970]@return[/url] { description_of_the_return_value }
*/
static bool microphone_inference_start(uint32_t n_samples)
{
inference.buffer = (int16_t *)malloc(n_samples * sizeof(int16_t));
if(inference.buffer == NULL) {
return false;
}
inference.buf_count = 0;
inference.n_samples = n_samples;
inference.buf_ready = 0;
// configure the data receive callback
PDM.onReceive(pdm_data_ready_inference_callback);
PDM.setBufferSize(2048);
delay(250);
// initialize PDM with:
// - one channel (mono mode)
if (!PDM.begin(1, EI_CLASSIFIER_FREQUENCY)) {
ei_printf("ERR: Failed to start PDM!");
microphone_inference_end();
return false;
}
// optionally set the gain, defaults to 24
// Note: values >=52 not supported
//PDM.setGain(40);
return true;
}
/**
* @brief Wait on new data
*
* @return True when finished
*/
static bool microphone_inference_record(void)
{
bool ret = true;
record_ready = true;
while (inference.buf_ready == 0) {
delay(10);
}
inference.buf_ready = 0;
record_ready = false;
return ret;
}
/**
* Get raw audio signal data
*/
static int microphone_audio_signal_get_data(size_t offset, size_t length, float *out_ptr)
{
numpy::int16_to_float(&inference.buffer[offset], out_ptr, length);
return 0;
}
/**
* @brief Stop PDM and release buffers
*/
static void microphone_inference_end(void)
{
PDM.end();
ei_free(inference.buffer);
}
void print_inference_result(ei_impulse_result_t result) {
// Print how long it took to perform inference
ei_printf("Timing: DSP %d ms, inference %d ms, anomaly %d ms\r\n",
result.timing.dsp,
result.timing.classification,
result.timing.anomaly);
ei_printf("Predictions:\r\n");
for (uint16_t i = 0; i < EI_CLASSIFIER_LABEL_COUNT; i++) {
if(result.classification[i].value>0.7)
{
switch (i) {
case 0: digitalWrite(led3, LOW); break;
case 1: digitalWrite(led3, HIGH); break;
default: digitalWrite(led2, LOW);
}
}
ei_printf(" %s: ", ei_classifier_inferencing_categories[i]);
ei_printf("%.5f\r\n", result.classification[i].value);
}
// Print anomaly result (if it exists)
#if EI_CLASSIFIER_HAS_ANOMALY == 1
ei_printf("Anomaly prediction: %.3f\r\n", result.anomaly);
#endif
}
#if !defined(EI_CLASSIFIER_SENSOR) || EI_CLASSIFIER_SENSOR != EI_CLASSIFIER_SENSOR_MICROPHONE
#error "Invalid model for current sensor."
#endif
演示视频
VID_20241226_212700
|