vision/YOLOPv2.cpp
2025-04-06 17:27:22 +08:00

303 lines
12 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "YOLOPv2.h"
#include <QLoggingCategory>
YOLOPv2::YOLOPv2(Net_config config)
{
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
//string model_path = config.modelpath;
//std::wstring widestr = std::wstring(model_path.begin(), model_path.end());
//CUDA option set
OrtCUDAProviderOptions cuda_option;
cuda_option.device_id = 0;
cuda_option.arena_extend_strategy = 0;
cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;
cuda_option.gpu_mem_limit = SIZE_MAX;
cuda_option.do_copy_in_default_stream = 1;
//CUDA 加速
sessionOptions.SetIntraOpNumThreads(1);//设置线程数
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); //函数用于设置在使用 ORT 库执行模型时应用的图优化级别。ORT_ENABLE_ALL 选项启用所有可用的优化,这可以导致更快速和更高效的执行。
sessionOptions.AppendExecutionProvider_CUDA(cuda_option);
const char *modelpath = "/home/wuxianfu/Projects/Fast-YolopV2/build/onnx/yolopv2_192x320.onnx" ;
ort_session = new Session(env, modelpath, sessionOptions);
size_t numInputNodes = ort_session->GetInputCount();
size_t numOutputNodes = ort_session->GetOutputCount();
AllocatorWithDefaultOptions allocator;
for (int i = 0; i < numInputNodes; i++)
{
//input_names.push_back(ort_session->GetInputName(i, allocator));
AllocatedStringPtr input_name_Ptr = ort_session->GetInputNameAllocated(i, allocator);
input_names.push_back(input_name_Ptr.get());
qDebug() << "Input Name: " << input_name_Ptr.get();
Ort::TypeInfo input_type_info = ort_session->GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_node_dims.push_back(input_dims);
}
for (int i = 0; i < numOutputNodes; i++)
{
//output_names.push_back(ort_session->GetOutputName(i, allocator));
AllocatedStringPtr output_name_Ptr= ort_session->GetOutputNameAllocated(i, allocator);
output_names.push_back(output_name_Ptr.get());
qDebug() << "Output Name: " << output_name_Ptr.get();
Ort::TypeInfo output_type_info = ort_session->GetOutputTypeInfo(i);
auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
auto output_dims = output_tensor_info.GetShape();
output_node_dims.push_back(output_dims);
}
this->inpHeight = input_node_dims[0][2];
this->inpWidth = input_node_dims[0][3];
string classesFile = "/home/wuxianfu/Projects/Fast-YolopV2/build/coco.names";
ifstream ifs(classesFile.c_str());
string line;
while (getline(ifs, line)) this->class_names.push_back(line);
this->num_class = class_names.size();
}
void YOLOPv2::normalize_(Mat img)
{
// img.convertTo(img, CV_32F);
int row = img.rows;
int col = img.cols;
this->input_image_.resize(row * col * img.channels());
for (int c = 0; c < 3; c++)
{
for (int i = 0; i < row; i++)
{
for (int j = 0; j < col; j++)
{
float pix = img.ptr<uchar>(i)[j * 3 + 2 - c];
this->input_image_[c * row * col + i * col + j] = pix / 255.0;
}
}
}
}
void YOLOPv2::nms(vector<BoxInfo>& input_boxes)
{
sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
vector<bool> isSuppressed(input_boxes.size(), false);
for (int i = 0; i < int(input_boxes.size()); ++i)
{
if (isSuppressed[i]) { continue; }
for (int j = i + 1; j < int(input_boxes.size()); ++j)
{
if (isSuppressed[j]) { continue; }
float xx1 = (max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (max)(float(0), xx2 - xx1 + 1);
float h = (max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= this->nmsThreshold)
{
isSuppressed[j] = true;
}
}
}
// return post_nms;
int idx_t = 0;
input_boxes.erase(remove_if(input_boxes.begin(), input_boxes.end(), [&idx_t, &isSuppressed](const BoxInfo& f) { return isSuppressed[idx_t++]; }), input_boxes.end());
}
inline float sigmoid(float x)
{
return 1.0 / (1 + exp(-x));
}
Mat YOLOPv2::detect(Mat& frame)
{
Mat dstimg;
resize(frame, dstimg, Size(this->inpWidth, this->inpHeight));
this->normalize_(dstimg);
array<int64_t, 4> input_shape_{ 1, 3, this->inpHeight, this->inpWidth };
auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
Value input_tensor_ = Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
// 开始推理
/*qDebug() << " output_names size: " << output_names.size()<< " sec \n";
qDebug() << " input_names: " << input_names[0]<< " sec \n";
qDebug() << " output_names: " << output_names[1]<< " sec \n";
vector<Value> ort_outputs = ort_session->Run(RunOptions{nullptr}, input_names.data(), &input_tensor_, 1, output_names.data(), output_names.size());*/
const char* inputNames[] = { "input" };//这两个值是根据netron查看onnx格式得到的输入输出名称
const char* outputNames[] = { "seg" , "ll" , "pred0" , "pred1" , "pred2" , };
vector<Value> ort_outputs = ort_session->Run(RunOptions{nullptr}, inputNames, &input_tensor_, 1, outputNames, 5);
/////generate proposals
vector<BoxInfo> generate_boxes;
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
int n = 0, q = 0, i = 0, j = 0, nout = this->class_names.size() + 5, c = 0, area = 0;
for (n = 0; n < 3; n++) ///尺度
{
int num_grid_x = (int)(this->inpWidth / this->stride[n]);
int num_grid_y = (int)(this->inpHeight / this->stride[n]);
area = num_grid_x * num_grid_y;
const float* pdata = ort_outputs[n + 2].GetTensorMutableData<float>();
for (q = 0; q < 3; q++) ///anchor数
{
const float anchor_w = this->anchors[n][q * 2];
const float anchor_h = this->anchors[n][q * 2 + 1];
for (i = 0; i < num_grid_y; i++)
{
for (j = 0; j < num_grid_x; j++)
{
float box_score = sigmoid(pdata[4 * area + i * num_grid_x + j]);
if (box_score > this->confThreshold)
{
float max_class_socre = -100000, class_socre = 0;
int max_class_id = 0;
for (c = 0; c < this->class_names.size(); c++) //// get max socre
{
class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
if (class_socre > max_class_socre)
{
max_class_socre = class_socre;
max_class_id = c;
}
}
max_class_socre = sigmoid(max_class_socre) * box_score;
if (max_class_socre > this->confThreshold)
{
float cx = (sigmoid(pdata[i * num_grid_x + j]) * 2.f - 0.5f + j) * this->stride[n]; ///cx
float cy = (sigmoid(pdata[area + i * num_grid_x + j]) * 2.f - 0.5f + i) * this->stride[n]; ///cy
float w = powf(sigmoid(pdata[2 * area + i * num_grid_x + j]) * 2.f, 2.f) * anchor_w; ///w
float h = powf(sigmoid(pdata[3 * area + i * num_grid_x + j]) * 2.f, 2.f) * anchor_h; ///h
float xmin = (cx - 0.5*w)*ratiow;
float ymin = (cy - 0.5*h)*ratioh;
float xmax = (cx + 0.5*w)*ratiow;
float ymax = (cy + 0.5*h)*ratioh;
generate_boxes.push_back(BoxInfo{ xmin, ymin, xmax, ymax, max_class_socre, max_class_id });
}
}
}
}
pdata += area * nout;
}
}
nms(generate_boxes);
Mat outimg = frame.clone();
for (size_t i = 0; i < generate_boxes.size(); ++i)
{
int xmin = int(generate_boxes[i].x1);
int ymin = int(generate_boxes[i].y1);
rectangle(outimg, Point(xmin, ymin), Point(int(generate_boxes[i].x2), int(generate_boxes[i].y2)), Scalar(0, 0, 255), 2);
string label = format("%.2f", generate_boxes[i].score);
label = this->class_names[generate_boxes[i].label-1] + ":" + label;
putText(outimg, label, Point(xmin, ymin - 5), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}
const float* pdrive_area = ort_outputs[0].GetTensorMutableData<float>();
const float* plane_line = ort_outputs[1].GetTensorMutableData<float>();
area = this->inpHeight*this->inpWidth;
int min_y = -1;
vector<Point2f> points_L, points_R;
for (i = 0; i < frame.rows; i++)
{
bool flg = false;
int left = -1, right = -1;
for (j = 0; j < frame.cols; j++)
{
const int x = int(j / ratiow);
const int y = int(i / ratioh);
if (pdrive_area[y * this->inpWidth + x] < pdrive_area[area + y * this->inpWidth + x])
{
outimg.at<Vec3b>(i, j)[0] = 0;
outimg.at<Vec3b>(i, j)[1] = 255;
outimg.at<Vec3b>(i, j)[2] = 0;
}
if (plane_line[y * this->inpWidth + x] > 0.5)
{
outimg.at<Vec3b>(i, j)[0] = 255;
outimg.at<Vec3b>(i, j)[1] = 0;
outimg.at<Vec3b>(i, j)[2] = 0;
if (!flg && j >= frame.cols / 2 && right == -1) { // 记录图像右半部分最靠左的车道线的左边缘坐标
right = j;
}
flg = true;
} else {
if (flg && j - 1 < frame.cols / 2) { //记录图像左半部分最靠右的车道线的右边缘坐标
left = j - 1;
}
flg = false;
}
}
if (min_y == -1 && (left != -1 || right != -1)) {
min_y = i;
}
if (left != -1){
points_L.push_back(Point2f(left, i));
}
if (right != -1){
points_R.push_back(Point2f(right, i));
}
//若左右参考车道线均存在,计算并标记中心点
if (left > -1 && right > -1) {
int mid = (left + right) / 2;
for (int k = -5; k <= 5; k++) {
outimg.at<Vec3b>(i, mid+k)[0] = 255;
outimg.at<Vec3b>(i, mid+k)[1] = 255;
outimg.at<Vec3b>(i, mid+k)[2] = 0;
}
}
//(需要考虑的问题 1.双车道3条线 2.拐角处曲线 3.近处显示不全 4.两条线粘连)
}
//备选方案,对左右车道线分别拟合直线并计算中心线解析式 泛化 鲁棒 目前有bug
if (points_L.size() && points_R.size()) {
Vec4f line_L, line_R;
float kL, bL, kR, bR, kM, bM; // x=ky+b
fitLine(points_L, line_L, DIST_WELSCH, 0, 0.01, 0.01);
fitLine(points_R, line_R, DIST_WELSCH, 0, 0.01, 0.01);
kL = line_L[0] / line_L[1];
bL = line_L[2] - kL * line_L[3];
kR = line_R[0] / line_R[1];
bR = line_R[2] - kR * line_R[3];
kM = (kL + kR) / 2;
bM = (bL + bR) / 2;
for (int i = min_y; i < frame.rows; i++) {
int mid = round(kM * i + bM);
for (int k = -5; k <= 5; k++) {
outimg.at<Vec3b>(i, mid+k)[0] = 255;
outimg.at<Vec3b>(i, mid+k)[1] = 0;
outimg.at<Vec3b>(i, mid+k)[2] = 255;
}
}
}
return outimg;
}