【STM32H7S78-DK】测评+ADC DMA采集1024点FFT计算速度测评
<p>在前面帖子实现ADC DMA采集、DSP库添加的基础上,本帖将采集1024点数据进行FFT运算,评估STM32H7S78的DSP性能。</p><p>测试代码如下:</p>
<pre>
<code class="language-cpp">/* USER CODE END Header */
/* Includes ------------------------------------------------------------------*/
#include "main.h"
/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */
#include "stdio.h"
#include "arm_math.h"
#include "perf_counter.h"
/* USER CODE END Includes */
/* Private typedef -----------------------------------------------------------*/
/* USER CODE BEGIN PTD */
/* USER CODE END PTD */
/* Private define ------------------------------------------------------------*/
/* USER CODE BEGIN PD */
#define VDDA_APPLI (3300UL)
#define VAR_CONVERTED_DATA_INIT_VALUE (__LL_ADC_DIGITAL_SCALE(LL_ADC_RESOLUTION_12B) + 1)
#define ADC_CONVERTED_DATA_BUFFER_SIZE 1024
ALIGN_32BYTES (uint16_t uhADCxConvertedData);
uint16_t uhADCxConvertedData_Voltage_mVolt;
//0: DMA transfer is not completed
//1: DMA transfer is completed
//2: DMA transfer has not yet been started yet (initial state)
__IO uint8_t ubDmaTransferStatus = 2U;
/* USER CODE END PD */
/* Private macro -------------------------------------------------------------*/
/* USER CODE BEGIN PM */
/* USER CODE END PM */
/* Private variables ---------------------------------------------------------*/
ADC_HandleTypeDef hadc2;
DMA_HandleTypeDef handle_GPDMA1_Channel0;
TIM_HandleTypeDef htim6;
UART_HandleTypeDef huart4;
/* USER CODE BEGIN PV */
/* USER CODE END PV */
/* Private function prototypes -----------------------------------------------*/
static void MPU_Config(void);
static void MX_GPIO_Init(void);
static void MX_GPDMA1_Init(void);
static void MX_TIM6_Init(void);
static void MX_UART4_Init(void);
static void MX_ADC2_Init(void);
/* USER CODE BEGIN PFP */
/* USER CODE END PFP */
/* Private user code ---------------------------------------------------------*/
/* USER CODE BEGIN 0 */
/* USER CODE BEGIN PFP */
int __io_putchar(int ch)
{
HAL_UART_Transmit(&huart4 , (uint8_t *)&ch, 1, 0xFFFF);
return ch;
}
/* USER CODE END PFP */
void HAL_ADC_ConvCpltCallback(ADC_HandleTypeDef *hadc)
{
ubDmaTransferStatus = 1;
}
int tmp_index;
#define FFT_LENGTH ADC_CONVERTED_DATA_BUFFER_SIZE
float fft_inputbuf;
float fft_outputbuf;
uint32_t ifftFlag = 0;
uint32_t fftSize = 0;
#define TEST_LENGTH_SAMPLES 1024
static float32_t testOutput_f32;
static float32_t testOutputMag_f32;
static float32_t testInput_f32;
static float32_t Phase_f32;
void PowerPhaseRadians_f32(float32_t *_ptr, float32_t *_phase, uint16_t _usFFTPoints, float32_t _uiCmpValue)
{
float32_t lX, lY;
uint16_t i;
float32_t phase;
float32_t mag;
for (i=0; i <_usFFTPoints; i++)
{
lX= _ptr;//实部
lY= _ptr;//虚部
phase = atan2f(lY, lX);//atan2求解的结果范围是(-pi, pi], 弧度
arm_sqrt_f32((float32_t)(lX*lX+ lY*lY), &mag);//求模
if(_uiCmpValue > mag)
{
Phase_f32 = 0;
}
else
{
Phase_f32 = phase* 180.0f/3.1415926f;//将求解的结果由弧度转换为角度
}
}
}
static void arm_rfft_f32_app2(void)
{
uint16_t i;
arm_rfft_fast_instance_f32 S;
//正变换
ifftFlag = 0;
//初始化结构体S中的参数
arm_rfft_fast_init_f32(&S, TEST_LENGTH_SAMPLES);
for(i=0; i<TEST_LENGTH_SAMPLES; i++)
{
//测试波形
//testInput_f32 = 1 + cos(2*3.1415926f*50*i/1024 + 3.1415926f/3);
testInput_f32 = uhADCxConvertedData_Voltage_mVolt;
testInput_f32 = 0;//虚部赋值,固定为0.
}
//1024点实序列快速变换
arm_rfft_fast_f32(&S, testInput_f32, testOutput_f32, ifftFlag);
arm_cmplx_mag_f32(testOutput_f32, testOutputMag_f32, TEST_LENGTH_SAMPLES);
PowerPhaseRadians_f32(testOutput_f32, Phase_f32, TEST_LENGTH_SAMPLES, 0.5f);
//串口打印求解的幅频和相频
for(i=0; i<TEST_LENGTH_SAMPLES; i++)
{
//printf("%f, %f\r\n", testOutputMag_f32, Phase_f32);
}
}
/* USER CODE END 0 */
/**
* @brief The application entry point.
* @retval int
*/
int main(void)
{
/* USER CODE BEGIN 1 */
/* USER CODE END 1 */
/* MPU Configuration--------------------------------------------------------*/
MPU_Config();
/* MCU Configuration--------------------------------------------------------*/
/* Update SystemCoreClock variable according to RCC registers values. */
SystemCoreClockUpdate();
/* Reset of all peripherals, Initializes the Flash interface and the Systick. */
HAL_Init();
/* USER CODE BEGIN Init */
init_cycle_counter(true);
/* USER CODE END Init */
/* USER CODE BEGIN SysInit */
/* USER CODE END SysInit */
/* Initialize all configured peripherals */
MX_GPIO_Init();
MX_GPDMA1_Init();
MX_TIM6_Init();
MX_UART4_Init();
MX_ADC2_Init();
/* USER CODE BEGIN 2 */
start_cycle_counter();
//DSP_RMS();
int64_t lCycleUsed = stop_cycle_counter();
printf("ADC DMA DEMO\n");
for (tmp_index = 0; tmp_index < ADC_CONVERTED_DATA_BUFFER_SIZE; tmp_index++)
{
uhADCxConvertedData = VAR_CONVERTED_DATA_INIT_VALUE;
}
if (HAL_ADCEx_Calibration_Start(&hadc2, ADC_SINGLE_ENDED) != HAL_OK)
{
printf("ADC Calibration Err\n");
Error_Handler();
}
printf("ADC Calibration End\n");
/* USER CODE END 2 */
/* Infinite loop */
/* USER CODE BEGIN WHILE */
while (1)
{
/* USER CODE END WHILE */
/* USER CODE BEGIN 3 */
HAL_GPIO_TogglePin(LD4_GPIO_Port, LD4_Pin);
if (HAL_ADC_Start_DMA(&hadc2,(uint32_t *)uhADCxConvertedData,ADC_CONVERTED_DATA_BUFFER_SIZE) != HAL_OK)
{
Error_Handler();
}
HAL_Delay(50);
if (ubDmaTransferStatus == 1)
{
ubDmaTransferStatus = 0;
for (tmp_index = 0; tmp_index < ADC_CONVERTED_DATA_BUFFER_SIZE; tmp_index++)
{
uhADCxConvertedData_Voltage_mVolt = __LL_ADC_CALC_DATA_TO_VOLTAGE(VDDA_APPLI, uhADCxConvertedData, LL_ADC_RESOLUTION_12B);
//printf("voltage[%d]=%d mV\n",tmp_index,uhADCxConvertedData_Voltage_mVolt);
//printf("%d\n",uhADCxConvertedData_Voltage_mVolt);
arm_rfft_f32_app2();
printf("cycle counter = %lld\n",lCycleUsed);
}
}
}
/* USER CODE END 3 */
}
</code></pre>
<p>主要用到的函数是arm_rfft_fast_f32,函数原型是</p>
<pre>
<code class="language-cpp">void arm_rfft_fast_f32(
const arm_rfft_fast_instance_f32 * S,
float32_t * p,
float32_t * pOut,
uint8_t ifftFlag)</code></pre>
<p>这个函数用于单精度浮点实数FFT,函数有4个参数:</p>
<p>S是FFT实例化句柄,调用函数arm_rfft_fast_init_f32初始化得到,然后供此函数arm_rfft_fast_f32调用。支持32, 64, 128, 256, 512, 1024, 2048, 4096点FFT。<br />
比如做1024点FFT,代码如下:</p>
<p>arm_rfft_fast_instance_f32 S;</p>
<p>arm_rfft_fast_init_f32(&S, 1024);</p>
<p>arm_rfft_fast_f32(&S, testInput_f32, testOutput_f32, ifftFlag);</p>
<p>p是实数地址,比如我们要做1024点实数FFT,要保证有1024个缓冲。</p>
<p>pOut是FFT转换结果,转换结果不是实数了,而是复数,按照实部,虚拟,实部,虚部,依次排列。比如做1024点FFT,这里的输出也会有1024个数据,即512个复位。</p>
<p>ifftFlag用于设置正变换和逆变换,ifftFlag=0表示正变换,ifftFlag=1表示逆变换。</p>
<p> </p>
<p>测试结果:</p>
<div style="text-align: center;"></div>
<p>调用arm_rfft_fast_f32计算1024点FFT耗时184个时钟周期,主频配置的是600MHz,即1/600*184=0.307us,下图是大佬测试的STM32F4和F1的数据,可见1024点FFT性能可以说是F4的369.25/0.307=1202倍</p>
<div style="text-align: center;"></div>
<p> </p>
<p>1024点数据进行FFT运算看起来有点麻烦,整这么多代码</p>
<table cellpadding="0" cellspacing="0">
<tbody>
<tr>
<td id="postmessage_3374025">
<p>1024点数据进行FFT运算看起来有点麻烦,整这么多代码</p>
</td>
</tr>
</tbody>
</table>
王1979 发表于 2024-11-12 08:32
**** 作者被禁止或删除 内容自动屏蔽 ****
<p>算法调用不麻烦</p>
页:
[1]