本篇讲述通过Dhrystone测量开发板MCU性能。
一.了解Dhrystone
Dhrystone 是测量 MCU 运算能力的最常见的基准测试程序之一,其主要目的是测试 MCU 的整数运算和逻辑运算的性能。
Dhrystone 的测试原理是在单位时间内,测试 MCU 运行了多少次 Dhrystone 程序,测试结果指标用单位 DMIPS/MHz 表示。 DMIPS 是 Dhrystone Million Instructions Per Second 的缩
写,表示每秒处理的百万级机器语言指令数。
●MIPS: milion instruction per second,表示每秒多少百万条指令,如 10MIPS ,表示每秒一千万条指令。MIPSIMHz: 表示 CPU 在每 1MHz的运行速度下可以执行多少个MIPS。如 10MIPS/MHz,表示如果 CPU 运行在 1MHz的频率下,每秒可执行一千万条指令,如果 CPU 运行在 5MHz 的频率下,每秒可执行五千万条指令。
●DMIPS: Dhrystone MIPS,它是一个测量CPU运行一个叫Dhrystone(整数运算)的测试程序时表现出来的相对性能高低的一个单位(很多场合人们也习惯用MIPS作为这个性能指标的单位)。
二.移植Dhrystone
1.Dhrystone 源码源码获取:http://www.roylongbottom.org.uk/classic_benchmarks.tar.gz 。解压后从里面获取如下源文件。
图1:Dhrystone源文件
2.添加到工程
图2:Dhrystone源文件工程添加
3.串口打印与定时器实现。定时器代码如下
void timer_config(void)
{
timer_parameter_struct timer_initpara;
rcu_periph_clock_enable(RCU_TIMER1);
rcu_periph_clock_enable(RCU_TIMER2);
/* deinit TIMER */
timer_deinit(TIMER1);
timer_deinit(TIMER2);
/* initialize TIMER init parameter struct */
timer_struct_para_init(&timer_initpara);
/* TIMER1 configuration */
timer_initpara.prescaler = ((rcu_clock_freq_get(CK_APB1)/1000000)*2 -1);
timer_initpara.alignedmode = TIMER_COUNTER_EDGE;
timer_initpara.counterdirection = TIMER_COUNTER_UP;
timer_initpara.period = 9999; //9999
timer_initpara.clockdivision = TIMER_CKDIV_DIV1;
timer_init(TIMER1, &timer_initpara);
printf("clk:%d,prescaler:%d\r\n",rcu_clock_freq_get(CK_APB1),timer_initpara.prescaler);
/* TIMER2 configuration */
timer_initpara.prescaler = 0;
timer_initpara.alignedmode = TIMER_COUNTER_EDGE;
timer_initpara.counterdirection = TIMER_COUNTER_UP;
timer_initpara.period = 9999;//9999
timer_initpara.clockdivision = TIMER_CKDIV_DIV1;
timer_init(TIMER2, &timer_initpara);
timer_master_slave_mode_config(TIMER1, TIMER_MASTER_SLAVE_MODE_ENABLE);
//timer_master_output_trigger_source_select(TIMER1, TIMER_TRI_OUT_SRC_UPDATE);
timer_master_output0_trigger_source_select(TIMER1,TIMER_TRI_OUT0_SRC_UPDATE);
//timer_master_output1_trigger_source_select(TIMER1,TIMER_TRI_OUT0_SRC_UPDATE);
timer_master_slave_mode_config(TIMER2, TIMER_MASTER_SLAVE_MODE_ENABLE);
timer_slave_mode_select(TIMER2, TIMER_SLAVE_MODE_EXTERNAL0);
timer_input_trigger_source_select(TIMER2, TIMER_SMCFG_TRGSEL_ITI0);
/* enable TIMER */
timer_enable(TIMER1);
timer_enable(TIMER2);
printf("timer_config\r\n");
}
4.修改dhry_1.c 文件的 main 函数 如下.将main.c里的main函数屏蔽掉。这里注意在启动定时器之后与停止定时器之前不要加串口打印。
int main (void)
/*****/
/* main program, corresponds to procedures */
/* Main and Proc_0 in the Ada version */
{
One_Fifty Int_1_Loc;
REG One_Fifty Int_2_Loc;
One_Fifty Int_3_Loc;
REG char Ch_Index;
Enumeration Enum_Loc;
Str_30 Str_1_Loc;
Str_30 Str_2_Loc;
REG int Run_Index;
REG int Number_Of_Runs;
/* enable the CPU cache */
cache_enable();
clk = rcu_clock_freq_get(CK_APB1);
Next_Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
Ptr_Glob = (Rec_Pointer) malloc (sizeof (Rec_Type));
Ptr_Glob->Ptr_Comp = Next_Ptr_Glob;
Ptr_Glob->Discr = Ident_1;
Ptr_Glob->variant.var_1.Enum_Comp = Ident_3;
Ptr_Glob->variant.var_1.Int_Comp = 20;
strcpy (Ptr_Glob->variant.var_1.Str_Comp,"DHRYSTONE PROGRAM, SOME STRING");
strcpy (Str_1_Loc, "DHRYSTONE PROGRAM, 1'ST STRING");
Arr_2_Glob [8][7] = 10;
/* Was missing in published program. Without this statement, */
/* Arr_2_Glob [8][7] would have an undefined value. */
/* Warning: With 16-Bit processors and Number_Of_Runs > 32000, */
/* overflow may occur for this array element. */
//rcu_configuration();
usart_config();
printf ("Dhrystone Benchmark, Version 2.1 (Language: C)\n\r");
Number_Of_Runs = 500000;//1000000;//
printf ("Execution starts, %d runs through Dhrystone\n\r", Number_Of_Runs);
timer_config();
Begin_Time = timer_counter_read(TIMER2)*10000 + timer_counter_read(TIMER1);
for (Run_Index = 1; Run_Index <= Number_Of_Runs; ++Run_Index)
{
Proc_5();
Proc_4();
/* Ch_1_Glob == 'A', Ch_2_Glob == 'B', Bool_Glob == true */
Int_1_Loc = 2;
Int_2_Loc = 3;
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 2'ND STRING");
Enum_Loc = Ident_2;
Bool_Glob = ! Func_2 (Str_1_Loc, Str_2_Loc);
/* Bool_Glob == 1 */
while (Int_1_Loc < Int_2_Loc) /* loop body executed once */
{
Int_3_Loc = 5 * Int_1_Loc - Int_2_Loc;
/* Int_3_Loc == 7 */
Proc_7 (Int_1_Loc, Int_2_Loc, &Int_3_Loc);
/* Int_3_Loc == 7 */
Int_1_Loc += 1;
} /* while */
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Proc_8 (Arr_1_Glob, Arr_2_Glob, Int_1_Loc, Int_3_Loc);
/* Int_Glob == 5 */
Proc_1 (Ptr_Glob);
for (Ch_Index = 'A'; Ch_Index <= Ch_2_Glob; ++Ch_Index)
/* loop body executed twice */
{
if (Enum_Loc == Func_1 (Ch_Index, 'C'))
/* then, not executed */
{
Proc_6 (Ident_1, &Enum_Loc);
strcpy (Str_2_Loc, "DHRYSTONE PROGRAM, 3'RD STRING");
Int_2_Loc = Run_Index;
Int_Glob = Run_Index;
}
}
/* Int_1_Loc == 3, Int_2_Loc == 3, Int_3_Loc == 7 */
Int_2_Loc = Int_2_Loc * Int_1_Loc;
Int_1_Loc = Int_2_Loc / Int_3_Loc;
Int_2_Loc = 7 * (Int_2_Loc - Int_3_Loc) - Int_1_Loc;
/* Int_1_Loc == 1, Int_2_Loc == 13, Int_3_Loc == 7 */
Proc_2 (&Int_1_Loc);
/* Int_1_Loc == 5 */
} /* loop "for Run_Index" */
/**************/
/* Stop timer */
/**************/
timer_disable(TIMER1);
timer_disable(TIMER2);
End_Time = timer_counter_read(TIMER2)*10000 + timer_counter_read(TIMER1);
User_Time = (End_Time - Begin_Time)*1000;
printf ("Begin_Time:%d\n\r",Begin_Time);
printf ("End_Time:%d,User_Time:%6.6f\n\r",End_Time,User_Time);
Dhrystones_Per_Second = (double) Number_Of_Runs / (User_Time / 1000000);
Vax_Mips = Dhrystones_Per_Second / 1757.0;
printf ("Run time is: %6.6f \n\r", User_Time/1000000);
printf ("Dhrystones per Second: %6.1f \n\r", Dhrystones_Per_Second);
printf ("Vax_Mips is: %6.1f \n", Vax_Mips);
printf ("\n");
printf("MCU CK_SYS frequency is: %d\n\r", rcu_clock_freq_get(CK_AHB));
printf("DMIPS/MHz is: %f \n", (double)Vax_Mips / (rcu_clock_freq_get(CK_AHB)/1000000));
while(1);
}
三.Dhrystone测验
分别设置 Number_Of_Runs 为 10000000和500000,其中 Dhrystones_Per_Second = 运行次数 / 运行时间, Vax_Mips = Dhrystones_Per_Second/ 1757.0, DMIPS/MHz = Vax_Mips/ 主频。
编译烧录后,可看到测试结果如下。
图3::Dhrystone测试结果
兆易GD32L23x 的DMIPS/MHz 得分0.311182左右,而GD32H759I的DMIPS/MHz 得分1.05左右,后者性能更强的。