|
Altera socfpga NEON 指令加速测试成功(附源代码)
[复制链接]
本帖最后由 yupc123 于 2016-8-15 16:51 编辑
10000个浮点数据累加和测试,结果如下图:
源代码如下:
#include "NE10.h"
#include
#include
#include
#define ALIGH_UNIT 4
#define ARRAY_SIZE 10000
//NEONl加速
static float calc_ne10(const float* data,int size)
{
float sum = 0.f;
float sum_vec[ALIGH_UNIT]={0};
for(int i=0; i
{
ne10_add_float_neon(sum_vec,sum_vec,(float*)data+ALIGH_UNIT*i,ALIGH_UNIT);
}
for (int i=0;i
{
sum+=sum_vec;
}
int odd = size & (ALIGH_UNIT-1);
if(odd){
for(int i=size-odd;i
sum+=data;
}
return sum;
}//普通C语言计算
static float calc_c(const float *data,int size)
{
float sum=0.f;
for(int i=0;i
{
sum+=data;
}
return sum;
}
int main(int argc,char** argv)
{
float data[ARRAY_SIZE] = {0};
float sum;
struct timeval starttv;
struct timeval endtv;
for(int i=0;i
{
data=rand()%5;
}
gettimeofday(&starttv,NULL);
sum=calc_c(data,ARRAY_SIZE);
gettimeofday(&endtv,NULL);
printf("calc_c=%f,time=%d us\n",sum,(int)(1000000*(endtv.tv_sec-starttv.tv_sec)+(endtv.tv_usec-starttv.tv_usec)));
gettimeofday(&starttv,NULL);
sum=calc_ne10(data,ARRAY_SIZE);
gettimeofday(&endtv,NULL);
printf("calc_c=%f,time=%d us\n",sum,(int)(1000000*(endtv.tv_sec-starttv.tv_sec)+(endtv.tv_usec-starttv.tv_usec)));
//printf("calc_neon=%f\n",calc_ne10(data,ARRAY_SIZE));
return 0;
}
|
赞赏
-
1
查看全部赞赏
-
|