1663|2

531

帖子

4

TA的资源

纯净的硅(高级)

【米尔-STM32MP257开发板试用体验】实时对讲中的回声消除和降噪应用案例 [复制链接]

本帖最后由 qinyunti 于 2025-4-11 11:35 编辑

4月11日

 

b站视频


 

代码:

echo.zip (155.07 KB, 下载次数: 0)

一.前言

前面实现了基于alsa的音频播放与采集,同时也验证了移植speex算法实现回声消除和降噪的执行效率。现在将两者结合起来实现实时对讲中的回声消除和降噪的典型应用案例。
整个数据流和结构如下红色路径
wd_105632jitqr5j9i3qy439l.png
对讲的远端可能来自于网络或者UAC等不同应用,这里使用wav文件模拟,即wav输入文件模拟远端的讲话,回声消除和降噪处理后的近端语音存入wav文件。
由于没有回采,所以直接使用远端语音数据。实际应用使用PA之后的回采数据(和MIC数据最好保持同步)将会更好。
采集到的MIC数据包含了远端语音喇叭播放,叠加本地语音,所以需要通过回声消除去掉远端语音的影响,只保留本地语音,同时进行降噪处理,处理后的数据才发送到远端(这里是保存到wav文件)。
  1. 实现
    参考前面的speex移植测试和alsa音频播放和采集的文章。
    新建echo.c,添加speex的源码
    代码结构如下
    .
    ├── build.sh
    ├── config.h
    ├── echo.c
    ├── include
    ├── libspeexdsp
    └── os_support_custom.h
    2 directories, 4 files
    echo.c的实现如下
    #ifdef HAVE_CONFIG_H
    #include "config.h"
    #endif
    
    #include "speex/speex_echo.h"
    #include "speex/speex_preprocess.h"
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #include <time.h>
    static uint64_t get_tm(void){
        struct timespec ts;
        clock_gettime(CLOCK_MONOTONIC,&ts);
        return ts.tv_sec*1000000000ull + ts.tv_nsec;
    }
    
    
    #include <stdint.h>
    #include <stdio.h>
    #include <string.h>
    /* WAV解析 */
    #define CHUNK_RIFF "RIFF"
    #define CHUNK_WAVE "WAVE"
    #define CHUNK_FMT "fmt "
    #define CHUNK_DATA "data"
    
    typedef struct
    {
        uint32_t off;
        uint32_t chunksize;
        uint16_t audioformat;
        uint16_t numchannels;
        uint32_t samplerate;
        uint32_t byterate;
        uint16_t blockalign;
        uint16_t bitspersample;
        uint32_t datasize;
    }wav_t;
    
    static int wav_decode_head(uint8_t* buffer, wav_t* wav)
    {
        uint8_t* p = buffer;
        uint32_t chunksize;
        uint32_t subchunksize;
        if(0 != memcmp(p,CHUNK_RIFF,4))
        {
            return -1;
        }
        p += 4;
        chunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
        wav->chunksize = chunksize;
        p += 4;
        if(0 != memcmp(p,CHUNK_WAVE,4))
        {
            return -2;
        }
        p += 4;
    
        do
        {
            if(0 == memcmp(p,CHUNK_FMT,4))
            {
                p += 4;
                subchunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
                p += 4;
                /* 解析参数 */
                wav->audioformat = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
                if((wav->audioformat == 0x0001) || (wav->audioformat == 0xFFFE))
                {
                    p += 2;
                    wav->numchannels = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
                    p += 2;
                    wav->samplerate = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
                    p += 4;
                    wav->byterate = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
                    p += 4;
                    wav->blockalign = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
                    p += 2;
                    wav ->bitspersample = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
                    p += 2;
    
                    if(subchunksize >16)
                    {
                        /* 有ext区域 */
                        uint16_t cbsize = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
                        p += 2;
                        if(cbsize > 0)
                        {
                            /* ext数据 2字节有效bits wValidBitsPerSample ,4字节dwChannelMask 16字节SubFormat */
                            p += 2;
                            p += 4;
                            /* 比对subformat */
                            p += 16;       
                        }
                    }
                }
                else
                {
                    p += subchunksize;
                }
            }
            else if(0 == memcmp(p,CHUNK_DATA,4))
            {
                p += 4;
                subchunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
                wav->datasize = subchunksize;
                p += 4;
                wav->off = (uint32_t)(p- buffer);
                return 0;
            }
            else
            {
                p += 4;
                subchunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
                p += 4;
                p += subchunksize;
            }
        }while((uint32_t)(p - buffer) < (chunksize + 8));
        return -3;
    }
    
    /* 填充44字节的wav头 */
    static void wav_fill_head(uint8_t* buffer, int samples, int chnum, int freq)
    {
        /*
         * 添加wav头信息
         */
        uint32_t chunksize = 44-8+samples*chnum*16/8;
        uint8_t* p = (uint8_t*)buffer;
        uint32_t bps = freq*chnum*16/8;
        uint32_t datalen = samples*chnum*16/8;
        p[0] = 'R';
        p[1] = 'I';
        p[2] = 'F';
        p[3] = 'F';
        p[4] = chunksize & 0xFF;
        p[5] = (chunksize>>8) & 0xFF;
        p[6] = (chunksize>>16) & 0xFF;
        p[7] = (chunksize>>24) & 0xFF;
        p[8] = 'W';
        p[9] = 'A';
        p[10] = 'V';
        p[11] = 'E';
    
        p[12] = 'f';
        p[13] = 'm';
        p[14] = 't';
        p[15] = ' ';
    
        p[16] = 16;  /* Subchunk1Size */
        p[17] = 0;
        p[18] = 0;
        p[19] = 0;
    
        p[20] = 1;  /* PCM */
        p[21] = 0;
    
        p[22] = chnum; /* 通道数 */
        p[23] = 0;
    
        p[24] = freq & 0xFF;
        p[25] = (freq>>8) & 0xFF;
        p[26] = (freq>>16) & 0xFF;
        p[27] = (freq>>24) & 0xFF; 
    
        p[28] = bps & 0xFF;      /* ByteRate */
        p[29] = (bps>>8) & 0xFF;
        p[30] = (bps>>16) & 0xFF;
        p[31] = (bps>>24) & 0xFF; 
    
        p[32] = chnum*16/8; /* BlockAlign */
        p[33] = 0;
    
        p[34] = 16;  /* BitsPerSample */
        p[35] = 0;
    
        p[36] = 'd';
        p[37] = 'a';
        p[38] = 't';
        p[39] = 'a';
    
        p[40] = datalen & 0xFF;
        p[41] = (datalen>>8) & 0xFF;
        p[42] = (datalen>>16) & 0xFF;
        p[43] = (datalen>>24) & 0xFF; 
    }
    
    void wav_print(wav_t* wav)
    {
       printf("off:%d\r\n",wav->off); 
       printf("chunksize:%d\r\n",wav->chunksize); 
       printf("audioformat:%d\r\n",wav->audioformat); 
       printf("numchannels:%d\r\n",wav->numchannels); 
       printf("samplerate:%d\r\n",wav->samplerate); 
       printf("byterate:%d\r\n",wav->byterate); 
       printf("blockalign:%d\r\n",wav->blockalign); 
       printf("bitspersample:%d\r\n",wav->bitspersample); 
       printf("datasize:%d\r\n",wav->datasize); 
    }
    
    #include <alsa/asoundlib.h>
    #define NN 128
    #define TAIL 2048
    
    int alsa_play_init(char* device, snd_pcm_t **handle, int ch, int freq)
    {
        int err;
    
        if ((err = snd_pcm_open(handle, device, SND_PCM_STREAM_PLAYBACK, 0)) < 0) {  /* 最后一个参数0 阻塞模式 1非阻塞模式 */
            printf("Playback open error: %s\n", snd_strerror(err));
            return -1;
        }
    
        snd_pcm_sframes_t frames;
        if ((err = snd_pcm_set_params(*handle,
                        SND_PCM_FORMAT_S16_LE,
                        SND_PCM_ACCESS_RW_INTERLEAVED,
                        ch,
                        freq,
                        0,
                        100000)) < 0) {   /* 0.5sec */
            printf("Playback open error: %s\n", snd_strerror(err));
            snd_pcm_close(*handle);
            return -2;
        }
    
    	snd_pcm_hw_params_t *hwparams = NULL;
    	snd_pcm_hw_params_malloc(&hwparams);
    	snd_pcm_hw_params_any(*handle, hwparams);
    	snd_pcm_hw_params_set_access(*handle, hwparams, SND_PCM_ACCESS_RW_INTERLEAVED);
    	snd_pcm_hw_params_set_format(*handle, hwparams,SND_PCM_FORMAT_S16_LE);
    	snd_pcm_hw_params_set_channels(*handle, hwparams,ch);
    	snd_pcm_hw_params_set_rate(*handle, hwparams,freq,0);
    	snd_pcm_hw_params_set_period_size(*handle, hwparams, NN, 0);
    	snd_pcm_hw_params_set_buffer_size(*handle, hwparams, 4*NN);
    	snd_pcm_hw_params(*handle, hwparams);
    	snd_pcm_hw_params_free(hwparams);
    
        return 0;
    }
    
    int alsa_rec_init(char* device, snd_pcm_t **handle, int ch, int freq)
    {
        int err;
    
        if ((err = snd_pcm_open(handle, device, SND_PCM_STREAM_CAPTURE, 0)) < 0) {  /* 最后一个参数0 阻塞模式 1非阻塞模式 */
            printf("Playback open error: %s\n", snd_strerror(err));
            return -1;
        }
    
        snd_pcm_sframes_t frames;
        if ((err = snd_pcm_set_params(*handle,
                        SND_PCM_FORMAT_S16_LE,
                        SND_PCM_ACCESS_RW_INTERLEAVED,
                        ch,
                        freq,
                        0,
                        100000)) < 0) {   /* 0.5sec */
            printf("Playback open error: %s\n", snd_strerror(err));
            snd_pcm_close(*handle);
            return -2;
        }
    
    	snd_pcm_hw_params_t *hwparams = NULL;
    	snd_pcm_hw_params_malloc(&hwparams);
    	snd_pcm_hw_params_any(*handle, hwparams);
    	snd_pcm_hw_params_set_access(*handle, hwparams, SND_PCM_ACCESS_RW_INTERLEAVED);
    	snd_pcm_hw_params_set_format(*handle, hwparams,SND_PCM_FORMAT_S16_LE);
    	snd_pcm_hw_params_set_channels(*handle, hwparams,ch);
    	snd_pcm_hw_params_set_rate(*handle, hwparams,freq,0);
    	snd_pcm_hw_params_set_period_size(*handle, hwparams, NN, 0);
    	snd_pcm_hw_params_set_buffer_size(*handle, hwparams, 4*NN);
    	snd_pcm_hw_params(*handle, hwparams);
    	snd_pcm_hw_params_free(hwparams);
    
        snd_pcm_start(*handle);
        return 0;
    }
    
    int alsa_deinit(snd_pcm_t *handle)
    {
        int err;
        /* pass the remaining samples, otherwise they're dropped in close */
        err = snd_pcm_drain(handle);
        if (err < 0){
            printf("snd_pcm_drain failed: %s\n", snd_strerror(err));
        }
        snd_pcm_close(handle);
        return 0;
    }
    
    int alsa_play(snd_pcm_t *handle, const void* buffer, size_t size)
    {
        snd_pcm_sframes_t frames;
        frames = snd_pcm_writei(handle, buffer, (snd_pcm_uframes_t)size);  /* 注意这里的size为frames即点数 而不是字节大小 */
        if (frames < 0){
            frames = snd_pcm_recover(handle, frames, 0);
    
            printf("snd_pcm_recover: %s\n", snd_strerror(frames));
        }
        if (frames < 0) {
            printf("snd_pcm_writei failed: %s\n", snd_strerror(frames));
            return -1;
        }
        if (frames > 0 && frames < (long)sizeof(buffer)){
            printf("Short write (expected %li, wrote %li)\n", (long)sizeof(buffer), frames);
            return -2;
        }
        return 0;
    }
    
    int alsa_rec(snd_pcm_t *handle, void* buffer, size_t size)
    {
        int timeout = 0;
        snd_pcm_sframes_t frames;
        do{
            frames = snd_pcm_avail_update(handle);
            if(frames < size){
                usleep(1000);
                timeout++;
                if(timeout >= 1000){
                    printf("rec timeout\n");
                    return -1;
                }
            }
        }while(frames < size);
    
        frames = snd_pcm_readi(handle, buffer, (snd_pcm_uframes_t)size);  /* 注意这里的size为frames即点数 而不是字节大小 */
        if (frames < 0){
            frames = snd_pcm_recover(handle, frames, 0);
    
            printf("snd_pcm_recover: %s\n", snd_strerror(frames));
        }
        if (frames < 0) {
            printf("snd_pcm_writei failed: %s\n", snd_strerror(frames));
            return -1;
        }
        if (frames > 0 && frames < (long)sizeof(buffer)){
            printf("Short write (expected %li, wrote %li)\n", (long)sizeof(buffer), frames);
            return -2;
        }
        return 0;
    }
    
    int main(int argc, char* argv[])
    {    
        snd_pcm_t *play_handle = NULL;
        snd_pcm_t *rec_handle = NULL;
        FILE *spk_fd;
        FILE *out_fd;
        char* spk_fname;
        char* out_fname;
        char* play_dev_name;
        char* rec_dev_name;
        int res;
        SpeexEchoState *st;
        SpeexPreprocessState *den;
        int ctl_i;
        float ctl_f;
        uint32_t t0;
        uint32_t t1;
        uint32_t tused_max = 0;
        uint32_t tused_min = 0;
        int16_t spk_buf[NN], mic_buf[NN], out_buf[NN];
        int16_t out_wr_buf[3*NN];
    
        uint8_t spk_wav_head_buf[128]; /* 输入spk wav文件头缓存 */
        uint8_t out_wav_head_buf[44]; /* 输出文件wav头缓存 */
        wav_t spk_wav;
    
        int samps;  /* 采样点数 */
        int times;    /* 读取次数 */
        int sampleRate;
    
        if(argc != 5){
            printf("usage:alsa_rec_wav playdev recdev spkwav outwav\r\n");
            return -1;
        }
    
        play_dev_name = argv[1];
        rec_dev_name = argv[2];
        spk_fname = argv[3];
        out_fname = argv[4];
    
        spk_fd = fopen(spk_fname, "rb");
        if(spk_fd < 0){
            printf("open file %s err\r\n",spk_fname);
            return -2;
        }
        if(fread(spk_wav_head_buf, 1, sizeof(spk_wav_head_buf), spk_fd) < 44){
            printf("read file %s err\r\n",spk_fname);
            fclose(spk_fd);
            return -3;
        }
    
        if(0 != (res=wav_decode_head(spk_wav_head_buf, &spk_wav))){
            printf("decode file %s err %d\r\n",spk_fname,res);
            fclose(spk_fd);
            return -4;
        }
        fseek(spk_fd,spk_wav.off,SEEK_SET);
    
        samps = spk_wav.datasize; 
        samps /= spk_wav.blockalign;  /* 采样点数 =  数据大小 除以 blockalign */
        times = samps / NN;   /* 一次读取NN个点,读取times次 */
        sampleRate = spk_wav.samplerate;
    
        out_fd    = fopen(out_fname, "wb+");
        if(out_fd < 0){
           fprintf(stderr, "open file %s err\n",out_fname);
           fclose(spk_fd);
           return -5;
        }
        wav_fill_head(out_wav_head_buf, times*NN, 3, sampleRate);  /* 输出文件头 3通道 spk+mic+out */
        if(44 != fwrite(out_wav_head_buf, 1, 44, out_fd)){
           printf("write file %s err\n",out_fname);
           fclose(out_fd);
           return -6;
        }
    
        res = alsa_play_init(play_dev_name,&play_handle, 1, sampleRate);
        if(res != 0){
            fclose(spk_fd);
            fclose(out_fd);
            printf("alsa_play_init err\r\n");
            return -7;
        }
    
        res = alsa_rec_init(rec_dev_name,&rec_handle, 1, sampleRate);
        if(res != 0){
            fclose(spk_fd);
            fclose(out_fd);
            alsa_deinit(play_handle);
            printf("alsa_rec_init err\r\n");
            return -8;
        }
    
        st = speex_echo_state_init(NN, TAIL);
        den = speex_preprocess_state_init(NN, sampleRate);
        speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate);
    
        ctl_i=1;
        speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_DENOISE, &ctl_i); /* 打开降噪 ctl_i=1打开 0关闭*/
        ctl_i=80;
        speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &ctl_i); 
        ctl_i=80;
        speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_SUPPRESS, &ctl_i);
    
        for(int i=0; i<times; i++)
        {
            if(alsa_rec(rec_handle, mic_buf, NN) < 0){  /* 注意这里最后一个参数是frames为单位 不需要乘以ch */
                printf("rec err\r\n");
                fclose(spk_fd);
                fclose(out_fd);
                alsa_deinit(play_handle);
                alsa_deinit(rec_handle);
                return -9;
            }
            if(NN != fread(spk_buf, sizeof(int16_t), NN, spk_fd)){
                printf("read file %s err\r\n",spk_fname);
                fclose(spk_fd);
                fclose(out_fd);
                alsa_deinit(play_handle);
                alsa_deinit(rec_handle);
                return -10;
            }
            if(alsa_play(play_handle, spk_buf, NN) < 0){
                printf("play err\r\n");
                fclose(spk_fd);
                fclose(out_fd);
                alsa_deinit(play_handle);
                alsa_deinit(rec_handle);
                return -11;
            }
    
            t0 = get_tm();
            speex_echo_cancellation(st, mic_buf, spk_buf, out_buf);
            speex_preprocess_run(den, out_buf);
            t1 = get_tm();
            if((t1-t0) > tused_max){
                tused_max = t1-t0;
            }
            if(tused_min == 0){
                tused_min = t1-t0;
            }else{
                if((t1-t0) < tused_min){
                    tused_min = t1-t0;
                } 
            }
    
            for(int i=0; i<NN; i++){
                out_wr_buf[3*i+0] = spk_buf[i];
                out_wr_buf[3*i+1] = mic_buf[i];
                out_wr_buf[3*i+2] = out_buf[i];
            }
    
            if(NN*3 != fwrite(out_wr_buf, sizeof(int16_t), NN*3, out_fd)){
                printf("write file %s err\r\n",out_fname);
                fclose(spk_fd);
                fclose(out_fd);
                alsa_deinit(play_handle);
                alsa_deinit(rec_handle);
                return -12;
            } 
        }
        printf("used max:%duS,used min:%duS\r\n",tused_max/1000,tused_min/1000);
        speex_echo_state_destroy(st);
        speex_preprocess_state_destroy(den);
        fclose(spk_fd);
        fclose(out_fd);
        alsa_deinit(play_handle);
        alsa_deinit(rec_handle);    
        return 0;
    }
    

     

  2. 测试
    编译
    build.sh中
    #! /bin/sh
    $CC libspeexdsp/*.c echo.c -Iinclude -I. -DHAVE_CONFIG_H -lm -lasound -o echo
    所以执行
    source /opt/st/myd-ld25x/4.2.4-snapshot/environment-setup-cortexa35-ostl-linux
    ./build.sh
    编译
    复制到win下
    cp echo /mnt/d/
    导入到开发板
    wd_105632qtcxzbv3zv4n37gx.png
    chmod +x echo
    测试
    ./echo default default spk1.wav out.wav
    可以看到最低只要2mS执行完算法
    wd_105632bahak0htzsxkax88.png
    会播放spk1.wav模拟远程语音,同时本地也开始说话,叠加远程语音。
    结果记录到out.wav
    out.wav中通道1为spk1的数据,通道2为spk1叠加本地说话的数据,通道3为回声消除和降噪后的数据,
    导出out.wav 可以修改串口速率这样传输快一点
    stty -F /dev/ttySTM0 ispeed 921600 ospeed 921600 cs8
    wd_105632t73kg2flogxmnmbr.png
    使用Audacity工具查看
    可以对比通道2和通道3,可以看到通道3明显降噪后的直线,和减去远端声音只保留了本地语音。
    wd_105632c3i783m338m7gh77.png
  3. 总结
可以看出得益于开发板完善的驱动,可以直接基于alsa方便的进行语音的播放与采集;
得益于开发板强大的性能,实时语音处理算法的执行效率非常高,执行时间很短,实时处理完全无问题,对于负载本身就很高的应用再增加语音实时处理影响也不大。

最新回复

看来回声消除和降噪是比较关键的步骤   详情 回复 发表于 6 天前

回复
举报

7098

帖子

0

TA的资源

五彩晶圆(高级)

看来回声消除和降噪是比较关键的步骤

点评

是的,移植了开源成熟的方案  详情 回复 发表于 4 天前

回复

531

帖子

4

TA的资源

纯净的硅(高级)

Jacktang 发表于 2025-4-13 15:24 看来回声消除和降噪是比较关键的步骤

是的,移植了开源成熟的方案


回复
您需要登录后才可以回帖 登录 | 注册

随便看看
查找数据手册?

EEWorld Datasheet 技术支持

相关文章 更多>>
关闭
站长推荐上一条 1/10 下一条

 
EEWorld订阅号

 
EEWorld服务号

 
汽车开发圈

 
机器人开发圈

About Us 关于我们 客户服务 联系方式 器件索引 网站地图 最新更新 手机版

站点相关: 国产芯 安防电子 汽车电子 手机便携 工业控制 家用电子 医疗电子 测试测量 网络通信 物联网 13

北京市海淀区中关村大街18号B座15层1530室 电话:(010)82350740 邮编:100190

电子工程世界版权所有 京B2-20211791 京ICP备10001474号-1 电信业务审批[2006]字第258号函 京公网安备 11010802033920号 Copyright © 2005-2025 EEWORLD.com.cn, Inc. All rights reserved
快速回复 返回顶部 返回列表