【昉·星光 2 RISC-V单板计算机】物体识别的板端推理
本文介绍了昉·星光2 (StarFive2)单板计算机基于 OpenCV 模型实现物体识别的板端推理。
准备工作
在实现前面帖子描述的官方软件库更新、OpenCV正常调用的基础上,进一步实现 python 代码改进,实现物体识别的板端推理的项目设计。
新建文件夹
SSH 登录系统,进入 home/user/ 文件夹,为了便于管理项目,新建 Ai_Inf 文件夹用以装载对应的板端识别工程文件,包括 onnx 模型文件、coco.names 物体类别文件、yolov5_inf.py 执行程序以及待识别的目标输入图片文件等。
流程图
项目代码
新建 yolov5_inf.py 文件,添加板端推理代码。
import cv2 import numpy as np import argparse import sys # Constants INPUT_WIDTH = 640 # Model input width INPUT_HEIGHT = 640 # Model input height SCORE_THRESHOLD = 0.5 # Class score threshold NMS_THRESHOLD = 0.45 # Non-Maximum Suppression threshold CONFIDENCE_THRESHOLD = 0.45 # Confidence threshold # Text parameters FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX # Font type FONT_SCALE = 0.7 # Font scale factor THICKNESS = 1 # Font thickness # Colors (BGR format) BLACK = (0, 0, 0) # Black BLUE = (255, 178, 50) # Blue GREEN = (0, 255, 0) # Green YELLOW = (0, 255, 255) # Yellow RED = (0, 0, 255) # Red def draw_label(input_image, label, left, top): """Draw detection label on image """ # Get text size text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS) dim, baseline = text_size[0], text_size[1] # Draw black background rectangle cv2.rectangle(input_image, (left, top), (left + dim[0], top + dim[1] + baseline), BLACK, cv2.FILLED) # Draw text on rectangle cv2.putText(input_image, label, (left, top + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA) def pre_process(input_image, net): """Image preprocessing function """ # Create 4D blob (1,3,640,640) blob = cv2.dnn.blobFromImage( input_image, 1/255, # Scale factor (INPUT_WIDTH, INPUT_HEIGHT), # Target size [0,0,0], # Mean subtraction swapRB=True, # BGR to RGB crop=False # No cropping ) # Set model input net.setInput(blob) # Forward pass to get output outputs = net.forward(net.getUnconnectedOutLayersNames()) return outputs def post_process(input_image, outputs, classes): """Post-processing: Parse detection results and draw bounding boxes """ class_ids = [] confidences = [] boxes = [] # Get number of detections rows = outputs[0].shape[1] image_height, image_width = input_image.shape[:2] # Resizing factors x_factor = image_width / INPUT_WIDTH y_factor = image_height / INPUT_HEIGHT # Parse detections for r in range(rows): row = outputs[0][0][r] confidence = row[4] # Filter weak detections if confidence >= CONFIDENCE_THRESHOLD: classes_scores = row[5:] class_id = np.argmax(classes_scores) # Filter by class score threshold if classes_scores[class_id] > SCORE_THRESHOLD: confidences.append(confidence) class_ids.append(class_id) # Get bounding box coordinates cx, cy, w, h = row[0], row[1], row[2], row[3] left = int((cx - w/2) * x_factor) top = int((cy - h/2) * y_factor) width = int(w * x_factor) height = int(h * y_factor) boxes.append([left, top, width, height]) # Apply Non-Maximum Suppression indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD) result_image = input_image.copy() # Draw filtered detections for i in indices: box = boxes[i] left, top, width, height = box cv2.rectangle(result_image, (left, top), (left + width, top + height), GREEN, 2*THICKNESS) label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}" draw_label(result_image, label, left, top) return result_image def main(): # Parse arguments parser = argparse.ArgumentParser(description='YOLOv5 Object Detection') parser.add_argument('image_path', type=str, default="model/test.jpg", help='Input image path') parser.add_argument('--model', type=str, default="model/yolov5n.onnx", help='ONNX model path') parser.add_argument('--classes', type=str, default="model/coco.names", help='Class names file path') parser.add_argument('--output', type=str, default="result.jpg", help='Output image path') args = parser.parse_args() # Load class names try: with open(args.classes, 'rt') as f: classes = f.read().rstrip('\n').split('\n') except Exception as e: print(f"Error: Cannot load class file {args.classes}: {str(e)}") sys.exit(1) # Load model try: net = cv2.dnn.readNet(args.model) net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) # Use CPU on VisionFive 2 except Exception as e: print(f"Error: Cannot load model {args.model}: {str(e)}") sys.exit(1) # Read input image try: frame = cv2.imread(args.image_path) if frame is None: raise ValueError("Cannot read image file") except Exception as e: print(f"Error: Cannot load image {args.image_path}: {str(e)}") sys.exit(1) # Run inference pipeline try: detections = pre_process(frame, net) result_img = post_process(frame.copy(), detections, classes) # Calculate inference time t, _ = net.getPerfProfile() inference_time = t * 1000.0 / cv2.getTickFrequency() print(f"Inference time: {inference_time:.2f} ms") # label on image time_label = f'Inference time: {inference_time:.2f} ms' cv2.putText(result_img, time_label, (20, 30), FONT_FACE, FONT_SCALE, RED, THICKNESS, cv2.LINE_AA) # Save result cv2.imwrite(args.output, result_img) print(f"Results have saved to: {args.output}") except Exception as e: print(f"Error in the inference process: {str(e)}") sys.exit(1) sys.exit(0) if __name__ == '__main__': main()
保存代码至板端,注意 ONNX 和 coco.names 文件的位置,修改代码中模型文件对应的路径。
运行程序
进入执行文件所在文件夹,输入指令
python3 yolov5_inf.py model/test.jpg
运行程序,推理结束后,在执行程序同目录下生成 result.jpg 文件,即推理结果。
仅使用 CPU 完成推理过程,耗时较长,单张图片的实际推理时间大约 5 秒 .
识别效果
针对 YOLOv5 模型可识别的项目(见 coco.names ),选取了一些特定场景进行物体识别,并展示板端推理的结果。
数码电器
马克杯没有识别出来,可能原因是模型未收录,或图片清晰度不足、拍摄角度问题等。
识别洗衣机存在误差,且部分电器未能识别,原因可能是模型未收录。
水果蔬菜
识别效果较好,基本可以正确识别,但置信度有待提升。
可见,对于多种物体混合分布的情况,识别效果会收到影响。
车辆行人
对于真实场景(街道、路口)的识别较为仔细和丰富,且结果准确率较高。
重复识别同一或相似场景,可以得到较为一致的推理结果和准确率。
对于国外的素材场景能够实现更佳的推理结果,模型需要增加本土环境的训练。
动物
对于动物的识别存在误差,准确率在 60% 左右,建议增加训练样本数量和种类,以覆盖更多物种。
运动
运动场景也是推理识别常见的应用场合,试验结果表明该模型对运动素材具有较好的识别能力。
识别范围
由 coco.names 可知该模型支持识别的物品种类包括
person bicycle car motorbike aeroplane bus train truck boat traffic light fire hydrant stop sign parking meter bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard sports ball kite baseball bat baseball glove skateboard surfboard tennis racket bottle wine glass cup fork knife spoon bowl banana apple sandwich orange broccoli carrot hot dog pizza donut cake chair sofa pottedplant bed diningtable toilet tvmonitor laptop mouse remote keyboard cell phone microwave oven toaster sink refrigerator book clock vase scissors teddy bear hair drier toothbrush
共包含 80 种物体类别,基本覆盖日常生活场景。
总结
本文介绍了昉·星光2 (StarFive2)单板计算机基于 OpenCV 模型实现物体识别的板端推理,包括准备工作、流程图、项目代码、程序运行等流程,并展示了不同应用场景下的识别效果,为该设备在人工智能方面和边缘 AI 的应用与开发提供了参考。