首先给出工程打包仓库地址,参考官网yolov5-rtsp示例
git@github.com:LitchiCheng/RV1106_Linux.git
包含内容如下:
1. v4l2封装调用库工程
2. st7735 LCD播放badapple工程
3. MNIST多数字实时识别工程
4. rkrtsp推流测试工程
5. rkmpi csi摄像头测试工程
6. yolov5单帧识别工程
RV1106其他的测试帖子链接:
rv1106开发板上使用yolov5转换rknn模型进行图像识别的推理实测
rv1106开发板配置TypeC接口USB Host并识别USB设备
rv1106开发板buildroot下使用v4l2和fswebcam进行USB摄像头测试
RV1106手把手教你:yolov5图像识别模型从pt转换到onnx再到rknn
RV1106手把手教你:ffmpeg无界面使用USB摄像头录制视频
RV1106手把手教你:使用ffmpeg、framebuffer在ST7735屏幕上播放bad apple
RV1106手把手教你:惊呆了!USB摄像头秒变AI助手,rknn轻松拍照做yolov5推理!
#AI挑战营第一站#pytorch训练MNIST数据集实现手写数字识别
如下直接介绍MNIST多数字实时识别思路:
- 使用rkmpi库进行frame的抓取
- 使用opencv-mobile库进行单帧frame进行处理,通过二值化、反相、腐蚀、膨胀等处理,获得多个数字的框选位置
- 使用mnist训练且转换后的rknn模型单独对框选位置的图片进行推理,得到该框选位置的概率以及数字
- 在frame上通过opencv-mobile进行画框以及标记数字及概率
- 使用rkrtsp进行推流
如下为代码关键部分
一、获取frame、处理、推理、标记
void *data = RK_MPI_MB_Handle2VirAddr(stVpssFrame.stVFrame.pMbBlk);
cv::Mat frame(height,width,CV_8UC3,data);
std::vector<cv::Rect> rects;
std::vector<detect_result> detect_results;
std::vector<cv::Mat> sub_pics;
rects = find_contour(frame, sub_pics);
for(int i = 0; i < rects.size(); i++){
if (rects[i].area() > 0){
inference_mnist_model(&rknn_app_ctx, sub_pics[i], detect_results);
if (!detect_results.empty()){
detect_result result = detect_results.back();
//绿框,thickness为1
cv::rectangle(frame, rects[i], cv::Scalar(0, 255, 0), 1);
//红字,fontscale为1,thickness为1
cv::putText(frame, std::to_string(result.num), cv::Point(rects[i].x, rects[i].y + 10),
cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(255, 0, 0), 1);
//蓝字,fontscale为1,thickness为1
cv::putText(frame, std::to_string(result.probability), cv::Point(rects[i].x+ 30, rects[i].y + 10),
cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 1);
detect_results.pop_back();
}
}
}
二、单帧图像处理
std::vector<cv::Rect> find_contour(const cv::Mat &image, std::vector<cv::Mat>& sub_pics) {
// 预处理图像
cv::Mat gray, edged, org_clone;
org_clone = image.clone();
// 转成灰度图
cv::cvtColor(image, gray, cv::COLOR_BGR2GRAY);
// 反相灰度图
edged = ~gray;
cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(3, 3));
// 边缘膨胀
cv::dilate(edged, edged, kernel);
// 腐蚀
cv::erode(edged, edged, kernel);
// 二值化
cv::threshold(edged, edged, 127, 0, cv::THRESH_TOZERO);
// 用来存找到的轮廓
std::vector<std::vector<cv::Point>> contours;
cv::findContours(edged, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE);
std::vector<cv::Rect> borders;
for(auto contour: contours){
cv::Rect bounding_box = cv::boundingRect(contour);
//当前矩形框大于30才使用
if (cv::contourArea(contour) > 30) {
//扩大矩形框,以防数字被截断
bounding_box.x = std::max(0, bounding_box.x - 10);
bounding_box.y = std::max(0, bounding_box.y - 10);
bounding_box.width = std::min(image.cols - bounding_box.x, bounding_box.width + 20);
bounding_box.height = std::min(image.rows - bounding_box.y, bounding_box.height + 20);
borders.push_back(bounding_box);
cv::Mat postsub;
cv::Mat sub = edged(bounding_box);
// 扩大数字轮廓
cv::threshold(sub, sub, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU);
// // 将图片大小调整为28
cv::resize(sub, postsub, cv::Size(28, 28), 0, 0, cv::INTER_AREA);
sub_pics.push_back(postsub);
}
}
return borders;
}
三、推理函数
static float deqnt_affine_to_f32(int8_t qnt, int32_t zp, float scale) { return ((float)qnt - (float)zp) * scale; }
int inference_mnist_model(rknn_app_context_t* app_ctx, cv::Mat &frame, std::vector<detect_result>& results)
{
int ret;
int width = app_ctx->input_attrs[0].dims[2];
int stride = app_ctx->input_attrs[0].w_stride;
if (width == stride){
memcpy(app_ctx->input_mems[0]->virt_addr, frame.data, width * app_ctx->input_attrs[0].dims[1] * app_ctx->input_attrs[0].dims[3]);
}else{
int height = app_ctx->input_attrs[0].dims[1];
int channel = app_ctx->input_attrs[0].dims[3];
// copy from src to dst with stride
uint8_t *src_ptr = frame.data;
uint8_t *dst_ptr = (uint8_t *)app_ctx->input_mems[0]->virt_addr;
// width-channel elements
int src_wc_elems = width * channel;
int dst_wc_elems = stride * channel;
for (int h = 0; h < height; ++h){
memcpy(dst_ptr, src_ptr, src_wc_elems);
src_ptr += src_wc_elems;
dst_ptr += dst_wc_elems;
}
}
ret = rknn_run(app_ctx->rknn_ctx, nullptr);
if (ret < 0) {
printf("rknn_run fail! ret=%d\n", ret);
return -1;
}
#define DETECT_NUM_SIZE 10
// Post Process
uint8_t *output= (uint8_t*)malloc(sizeof(uint8_t) * DETECT_NUM_SIZE);
float *out_fp32 = (float*)malloc(sizeof(float) * DETECT_NUM_SIZE);
output = (uint8_t *)app_ctx->output_mems[0]->virt_addr;
int32_t zp = app_ctx->output_attrs[0].zp;
float scale = app_ctx->output_attrs[0].scale;
//反量化为浮点数
for(int i = 0; i < DETECT_NUM_SIZE; i ++)
out_fp32[i] = deqnt_affine_to_f32(output[i],zp,scale);
//归一化
float sum = 0;
for(int i = 0; i < DETECT_NUM_SIZE; i++)
sum += out_fp32[i] * out_fp32[i];
float norm = sqrt(sum);
for(int i = 0; i < DETECT_NUM_SIZE; i++)
out_fp32[i] /= norm;
//对概率进行排序
float max_probability = -1.0;
int detect_num = -1;
for (int i = 0; i < DETECT_NUM_SIZE; ++i){
if (out_fp32[i] > max_probability){
max_probability = out_fp32[i];
detect_num = i;
}
}
//结果入列
results.push_back({detect_num, max_probability});
return 0;
}
工程编译
cd MNIST
mkdri build
cd build
cmake ..
make
make install
scp -r ../rtsp_mnist_test root@xxx.xxx.xxx.xxx:~
目标板运行
killall rkipc
cd rtsp_mnist_test
./rtsp_mnist model/mnist.rknn
视频分享