waterman 发表于 2024-12-4 02:35

AI挑战营(进阶):3. onnx2rknn

在上一节中,我们获得了retinaface和facenet的onnx模型,在这一节中我们将其转换为RKNPU能够识别并运行的rknn格式。
# 环境安装
参考我之前写的一篇博客:https://bbs.elecfans.com/jishu_2454118_1_1.html
# 模型转换
## retinaface
### 数据集准备
首先上传一张包含人脸的校准数据集,并将其路径存放至retinaface_dataset.txt文件中。

retinaface_dataset.txt内容如下:
```
../../img/detection/face.jpg
```
### 模型转换
编写模型转换脚本:
```python
import sys

from rknn.api import RKNN

def parse_arg():
    if len(sys.argv) < 5:
      print("Usage: python3 {} ".format(sys.argv));
      exit(1)

    model_path = sys.argv
    dataset_path= sys.argv
    output_path = sys.argv
    model_type = sys.argv

    return model_path, dataset_path, output_path,model_type

if __name__ == '__main__':
    model_path, dataset_path, output_path, model_type= parse_arg()

    # Create RKNN object
    rknn = RKNN(verbose=False)

    # Pre-process config
    print('--> Config model')
    if model_type == 'Retinaface':
      rknn.config(mean_values=[], std_values=[], target_platform='rv1103',
                              quantized_algorithm="normal", quant_img_RGB2BGR=True,optimization_level=0)
      print("Use retinaface mode")
    else:
      rknn.config(mean_values=[], std_values=[], target_platform='rv1103')
   
    print('done')

    # Load model
    print('--> Loading model')
    ret = rknn.load_onnx(model=model_path)
    if ret != 0:
      print('Load model failed!')
      exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset=dataset_path)
    if ret != 0:
      print('Build model failed!')
      exit(ret)
    print('done')

    # Export rknn model
    print('--> Export rknn model')
    ret = rknn.export_rknn(output_path)
    if ret != 0:
      print('Export rknn model failed!')
      exit(ret)
    print('done')

    # Release
    rknn.release()
```
执行convert.py进行模型转换:
```bash
python convert.py ../model_zoo/export/retinaface.onnx ./dataset/retinaface_dataset.txt ../model_zoo/retinaface.rknn Retinaface
```

## facenet
### 数据集准备
首先上传一张包含人脸的校准数据集,并将其路径存放至facenet_dataset.txt文件中。

facenet_dataset.txt内容如下:
```
../../img/recongnition/calib.jpg
```
### 模型转换
执行convert.py进行模型转换:
```bash
python convert.py ../model_zoo/export/facenet.onnx ./dataset/facenet_dataset.txt ../model_zoo/facenet.rknn Facenet
```


转换完成后,就能够分别得到两个相应的rknn模型文件于model_zoo目录下:


# 模型评估
## 数据集准备
首先准备验证数据集:

目录结构如下:
```
eval
├── dataset
│   ├── facenet
│   │   ├── 1_001.jpg
│   │   ├── 1_002.jpg
│   │   └── 2_001.jpg
│   └── retinaface
│       └── j1.jpg
├── facenet.py
└── retinaface.py
```
## retinaface
```python
import os
import sys
import urllib
import urllib.request
import time
import numpy as np
import cv2
from math import ceil
from itertools import product as product

from rknn.api import RKNN
DATASET_PATH = '../dataset/retinaface_dataset.txt'
DEFAULT_QUANT = True

def letterbox_resize(image, size, bg_color):
    """
    letterbox_resize the image according to the specified size
    :param image: input image, which can be a NumPy array or file path
    :param size: target size (width, height)
    :param bg_color: background filling data
    :return: processed image
    """
    if isinstance(image, str):
      image = cv2.imread(image)

    target_width, target_height = size
    image_height, image_width, _ = image.shape

    # 计算调整后的图像尺寸
    aspect_ratio = min(target_width / image_width, target_height / image_height)
    new_width = int(image_width * aspect_ratio)
    new_height = int(image_height * aspect_ratio)

    # 使用 cv2.resize() 进行等比缩放
    image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)

    # 创建新的画布并进行填充
    result_image = np.ones((target_height, target_width, 3), dtype=np.uint8) * bg_color
    offset_x = (target_width - new_width) // 2
    offset_y = (target_height - new_height) // 2
    result_image = image
    return result_image, aspect_ratio, offset_x, offset_y

def PriorBox(image_size): #image_size Support (320,320) and (640,640)
    anchors = []
    min_sizes = [, , ]
    steps =
    feature_maps = [ / step), ceil(image_size / step)] for step in steps]
    for k, f in enumerate(feature_maps):
      min_sizes_ = min_sizes
      for i, j in product(range(f), range(f)):
            for min_size in min_sizes_:
                s_kx = min_size / image_size
                s_ky = min_size / image_size
                dense_cx = / image_size for x in ]
                dense_cy = / image_size for y in ]
                for cy, cx in product(dense_cy, dense_cx):
                  anchors +=
    output = np.array(anchors).reshape(-1, 4)
    print("image_size:",image_size," num_priors=",output.shape)
    return output


def box_decode(loc, priors):
    """Decode locations from predictions using priors to undo
    the encoding we did for offset regression at train time.
    Args:
      loc (tensor): location predictions for loc layers,
            Shape:
      priors (tensor): Prior boxes in center-offset form.
            Shape: .
      variances: (list) Variances of priorboxes
    Return:
      decoded bounding box predictions
    """
    variances =
    boxes = np.concatenate((
      priors[:, :2] + loc[:, :2] * variances * priors[:, 2:],
      priors[:, 2:] * np.exp(loc[:, 2:] * variances)), axis=1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes


def decode_landm(pre, priors):
    """Decode landm from predictions using priors to undo
    the encoding we did for offset regression at train time.
    Args:
      pre (tensor): landm predictions for loc layers,
            Shape:
      priors (tensor): Prior boxes in center-offset form.
            Shape: .
      variances: (list) Variances of priorboxes
    Return:
      decoded landm predictions
    """
    variances =
    landmarks = np.concatenate((
      priors[:, :2] + pre[:, :2] * variances * priors[:, 2:],
      priors[:, :2] + pre[:, 2:4] * variances * priors[:, 2:],
      priors[:, :2] + pre[:, 4:6] * variances * priors[:, 2:],
      priors[:, :2] + pre[:, 6:8] * variances * priors[:, 2:],
      priors[:, :2] + pre[:, 8:10] * variances * priors[:, 2:]
    ), axis=1)
    return landmarks


def nms(dets, thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]

    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
      i = order
      keep.append(i)
      xx1 = np.maximum(x1, x1])
      yy1 = np.maximum(y1, y1])
      xx2 = np.minimum(x2, x2])
      yy2 = np.minimum(y2, y2])

      w = np.maximum(0.0, xx2 - xx1 + 1)
      h = np.maximum(0.0, yy2 - yy1 + 1)
      inter = w * h
      ovr = inter / (areas + areas] - inter)

      inds = np.where(ovr <= thresh)
      order = order

    return keep


if __name__ == '__main__':
    # 创建RKNN对象
    rknn = RKNN()

    # 预处理设置
    print('--> Config model')
    rknn.config(mean_values=[], std_values=[], target_platform="rv1106",
                quantized_algorithm="normal", quant_img_RGB2BGR=True)# mmse
    print('done')

    # 载入模型
    print('--> Loading model')
    ret = rknn.load_onnx(model="../../model_zoo/export/retinaface.onnx")
    if ret != 0:
      print('Load model failed!')
      exit(ret)
    print('done')

    # 创建模型
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset=DATASET_PATH)
    if ret != 0:
      print('Build model failed!')
      exit(ret)
    print('done')


    # 输入图像
    img = cv2.imread('./dataset/retinaface/j1.jpg')
    img_height, img_width, _ = img.shape
    model_height, model_width = (640, 640)
    letterbox_img, aspect_ratio, offset_x, offset_y = letterbox_resize(img, (model_height,model_width), 114)# letterbox缩放
    infer_img = letterbox_img[..., ::-1]# BGR2RGB

    # 初始化运行时环境
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    if ret != 0:
      print('Init runtime environment failed!')
      exit(ret)
    print('done')

    # 运行
    print('--> Running model')
    outputs = rknn.inference(inputs=)
    loc, conf, landmarks = outputs #获取输出数据
   
    priors = PriorBox(image_size=(model_height, model_width)) # 获取先验框
    boxes = box_decode(loc.squeeze(0), priors)                # 解码输出数据
   
    # letterbox
    scale = np.array([model_width, model_height,
                      model_width, model_height])
    boxes = boxes * scale // 1# face box
    boxes[...,0::2] =np.clip((boxes[...,0::2] - offset_x) / aspect_ratio, 0, img_width)#letterbox
    boxes[...,1::2] =np.clip((boxes[...,1::2] - offset_y) / aspect_ratio, 0, img_height) #letterbox
    scores = conf.squeeze(0)[:, 1]# 人脸检测的置信度
    landmarks = decode_landm(landmarks.squeeze(
      0), priors)# face keypoint data
    scale_landmarks = np.array([model_width, model_height, model_width, model_height,
                              model_width, model_height, model_width, model_height,
                              model_width, model_height])
    landmarks = landmarks * scale_landmarks // 1
    landmarks[...,0::2] = np.clip((landmarks[...,0::2] - offset_x) / aspect_ratio, 0, img_width) #letterbox
    landmarks[...,1::2] = np.clip((landmarks[...,1::2] - offset_y) / aspect_ratio, 0, img_height) #letterbox
   
    # 丢弃置信度过低的部分
    inds = np.where(scores > 0.5)
    boxes = boxes
    landmarks = landmarks
    scores = scores

    order = scores.argsort()[::-1]
    boxes = boxes
    landmarks = landmarks
    scores = scores

    # 非极大值抑制
    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(
      np.float32, copy=False)
    keep = nms(dets, 0.2)
    dets = dets
    landmarks = landmarks
    dets = np.concatenate((dets, landmarks), axis=1)

    # 画框标记
    for data in dets:
      if data < 0.5:
            continue
      print("face @ (%d %d %d %d) %f"%(data, data, data, data, data))
      text = "{:.4f}".format(data)
      data = list(map(int, data))
      cv2.rectangle(img, (data, data),
                      (data, data), (0, 0, 255), 2)
      cx = data
      cy = data + 12
      cv2.putText(img, text, (cx, cy),
                  cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
      # landmarks
      cv2.circle(img, (data, data), 1, (0, 0, 255), 5)
      cv2.circle(img, (data, data), 1, (0, 255, 255), 5)
      cv2.circle(img, (data, data), 1, (255, 0, 255), 5)
      cv2.circle(img, (data, data), 1, (0, 255, 0), 5)
      cv2.circle(img, (data, data), 1, (255, 0, 0), 5)
    img_path = './result.jpg'
    cv2.imwrite(img_path, img)
    print("save image in", img_path)
    # 释放
    rknn.release()
```
执行结果如下:


## facenet
```
import numpy as np
import cv2
import os
from rknn.api import RKNN
from PIL import Image
from sklearn import preprocessing
from scipy.spatial.distance import pdist
os.environ['RKNN_DRAW_DATA_DISTRIBUTE']="1"

if __name__ == '__main__':
    BUILD_QUANT = True

    # Create RKNN object
    rknn = RKNN()
    print('--> config model')
    rknn.config(mean_values=[], std_values=[], target_platform='rv1103')
    print('done')

    # Load model
    print('--> Lccoading model')
    ret = rknn.load_onnx(model='../../model_zoo/export/facenet.onnx')
    if ret != 0:
      print('Load facenet failed!')
      exit(ret)
    print('done')

    # Build model
    print('--> Building model')
    ret = rknn.build(do_quantization=True, dataset='../dataset/facenet_dataset.txt')
    if ret != 0:
      print('Build model failed!')
      exit(ret)
    print('done')

    # Init runtime
    print('--> Init runtime environment')
    ret = rknn.init_runtime()
    if ret != 0:
      print('Init runtime environment failed')
      exit(ret)
    print('done')

    # Set inputs and inference
    image_1 = Image.open("./dataset/facenet/1_001.jpg")
    image_1 = image_1.resize((160,160), Image.BICUBIC)
    img1 = np.asarray(image_1, np.uint8)
    outputs1 = np.array(rknn.inference(data_format='nhwc', inputs=))
    outputs1 = preprocessing.normalize(outputs1, norm='l2')
   
    image_2 = Image.open("./dataset/facenet/1_002.jpg")
    image_2 = image_2.resize((160,160), Image.BICUBIC)
    img2 = np.asarray(image_2, np.uint8)
    outputs2 = np.array(rknn.inference(data_format='nhwc', inputs=))
    outputs2 = preprocessing.normalize(outputs2, norm='l2')

    # Get distance
    distance = np.linalg.norm(outputs2 - outputs1, axis=1)
    print("distance:", distance)
   

    rknn.release()
```
执行结果如下:


工程文件在附录中。
页: [1]
查看完整版本: AI挑战营(进阶):3. onnx2rknn