Skip to content

Commit

Permalink
Correct aspect ratio after frame to input resizing
Browse files Browse the repository at this point in the history
  • Loading branch information
Nuzhny007 committed Dec 10, 2024
1 parent f5d869b commit bd7ae44
Show file tree
Hide file tree
Showing 13 changed files with 62 additions and 66 deletions.
7 changes: 4 additions & 3 deletions src/Detector/tensorrt_yolo/YoloONNX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,9 @@ bool YoloONNX::ProcessInputAspectRatio(const std::vector<cv::Mat>& sampleImages)
}
}

#if 0
m_resizedROI = cv::Rect(0, 0, inputW, inputH);

#if 1
// resize the DsImage with scale
const float imgHeight = static_cast<float>(sampleImages[0].rows);
const float imgWidth = static_cast<float>(sampleImages[0].cols);
Expand All @@ -351,7 +352,7 @@ bool YoloONNX::ProcessInputAspectRatio(const std::vector<cv::Mat>& sampleImages)
assert(2 * yOffset + resizeH == inputH);

cv::Size scaleSize(inputW, inputH);
cv::Rect roiRect(xOffset, yOffset, resizeW, resizeH);
m_resizedROI = cv::Rect(xOffset, yOffset, resizeW, resizeH);

if (m_resizedBatch.size() < sampleImages.size())
m_resizedBatch.resize(sampleImages.size());
Expand All @@ -361,7 +362,7 @@ bool YoloONNX::ProcessInputAspectRatio(const std::vector<cv::Mat>& sampleImages)
{
if (m_resizedBatch[b].size() != scaleSize)
m_resizedBatch[b] = cv::Mat(scaleSize, sampleImages[b].type(), cv::Scalar::all(128));
cv::resize(sampleImages[b], cv::Mat(m_resizedBatch[b], roiRect), roiRect.size(), 0, 0, cv::INTER_LINEAR);
cv::resize(sampleImages[b], cv::Mat(m_resizedBatch[b], m_resizedROI), m_resizedROI.size(), 0, 0, cv::INTER_LINEAR);
cv::split(m_resizedBatch[b], m_inputChannels[b]);
std::swap(m_inputChannels[b][0], m_inputChannels[b][2]);
}
Expand Down
7 changes: 4 additions & 3 deletions src/Detector/tensorrt_yolo/YoloONNX.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ class YoloONNX
size_t GetNumClasses() const;

protected:
SampleYoloParams m_params; //!< The parameters for the sample.
nvinfer1::Dims m_inputDims; //!< The dimensions of the input to the network.
std::vector<nvinfer1::Dims> m_outpuDims; //!< The dimensions of the input to the network.
SampleYoloParams m_params; //!< The parameters for the sample
nvinfer1::Dims m_inputDims; //!< The dimensions of the input to the network
std::vector<nvinfer1::Dims> m_outpuDims; //!< The dimensions of the input to the network
cv::Rect m_resizedROI; //!< Input frame resized into input dimensions with the frame aspect ratio

virtual std::vector<tensor_rt::Result> GetResult(size_t imgIdx, int keep_topk, const std::vector<float*>& outputs, cv::Size frameSize) = 0;

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv10_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class YOLOv10_bb_onnx : public YoloONNX
//0: name: images, size: 1x3x640x640
//1: name: output0, size: 1x300x6

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -51,8 +51,8 @@ class YOLOv10_bb_onnx : public YoloONNX
//if (i == 0)
// std::cout << i << ": " << output[k + 0] << " " << output[k + 1] << " " << output[k + 2] << " " << output[k + 3] << " " << output[k + 4] << " " << output[k + 5] << std::endl;

float x = fw * output[k + 0];
float y = fh * output[k + 1];
float x = fw * (output[k + 0] - m_resizedROI.x);
float y = fh * (output[k + 1] - m_resizedROI.y);
float width = fw * (output[k + 2] - output[k + 0]);
float height = fh * (output[k + 3] - output[k + 1]);
float objectConf = output[k + 4];
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv11_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class YOLOv11_bb_onnx : public YoloONNX
//0: name: images, size: 1x3x640x640
//1: name: output0, size: 1x84x8400

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -88,8 +88,8 @@ class YOLOv11_bb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv11_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class YOLOv11_instance_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

size_t outInd = (outputs.size() == 0) ? 1 : 0;
size_t segInd = (outputs.size() == 0) ? 0 : 1;
Expand Down Expand Up @@ -155,8 +155,8 @@ class YOLOv11_instance_onnx : public YoloONNX
if (objectConf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv11_obb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class YOLOv11_obb_onnx : public YoloONNX
//20: 15 DOTA classes + x + y + w + h + a
constexpr int shapeDataSize = 5;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -96,8 +96,8 @@ class YOLOv11_obb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height)
float cx = fw * output[k];
float cy = fh * output[k + 1];
float cx = fw * (output[k] - m_resizedROI.x);
float cy = fh * (output[k + 1] - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI;
Expand Down
21 changes: 9 additions & 12 deletions src/Detector/tensorrt_yolo/YoloONNXv6_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class YOLOv6_bb_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

if (outputs.size() == 4)
{
auto dets = reinterpret_cast<int*>(outputs[0]);
Expand All @@ -26,9 +29,6 @@ class YOLOv6_bb_onnx : public YoloONNX

int objectsCount = m_outpuDims[1].d[1];

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

//std::cout << "Dets[" << imgIdx << "] = " << dets[imgIdx] << ", objectsCount = " << objectsCount << std::endl;

const size_t step1 = imgIdx * objectsCount;
Expand All @@ -41,8 +41,8 @@ class YOLOv6_bb_onnx : public YoloONNX
int classId = classes[i + step1];
if (class_conf >= m_params.confThreshold)
{
float x = fw * boxes[k + 0 + step2];
float y = fh * boxes[k + 1 + step2];
float x = fw * (boxes[k + 0 + step2] - m_resizedROI.x);
float y = fh * (boxes[k + 1 + step2] - m_resizedROI.y);
float width = fw * boxes[k + 2 + step2] - x;
float height = fh * boxes[k + 3 + step2] - y;

Expand All @@ -57,9 +57,6 @@ class YOLOv6_bb_onnx : public YoloONNX
}
else if (outputs.size() == 1)
{
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

auto output = outputs[0];

size_t ncInd = 2;
Expand Down Expand Up @@ -96,8 +93,8 @@ class YOLOv6_bb_onnx : public YoloONNX
int classId = cvRound(output[k + 5]);
if (class_conf >= m_params.confThreshold)
{
float x = fw * output[k + 1];
float y = fh * output[k + 2];
float x = fw * (output[k + 1] - m_resizedROI.x);
float y = fh * (output[k + 2] - m_resizedROI.y);
float width = fw * (output[k + 3] - output[k + 1]);
float height = fh * (output[k + 4] - output[k + 2]);

Expand Down Expand Up @@ -150,8 +147,8 @@ class YOLOv6_bb_onnx : public YoloONNX
if (object_conf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
21 changes: 9 additions & 12 deletions src/Detector/tensorrt_yolo/YoloONNXv7_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ class YOLOv7_bb_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

if (outputs.size() == 4)
{
auto dets = reinterpret_cast<int*>(outputs[0]);
Expand All @@ -26,9 +29,6 @@ class YOLOv7_bb_onnx : public YoloONNX

int objectsCount = m_outpuDims[1].d[1];

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

//std::cout << "Dets[" << imgIdx << "] = " << dets[imgIdx] << ", objectsCount = " << objectsCount << std::endl;

const size_t step1 = imgIdx * objectsCount;
Expand All @@ -41,8 +41,8 @@ class YOLOv7_bb_onnx : public YoloONNX
int classId = classes[i + step1];
if (class_conf >= m_params.confThreshold)
{
float x = fw * boxes[k + 0 + step2];
float y = fh * boxes[k + 1 + step2];
float x = fw * (boxes[k + 0 + step2] - m_resizedROI.x);
float y = fh * (boxes[k + 1 + step2] - m_resizedROI.y);
float width = fw * boxes[k + 2 + step2] - x;
float height = fh * boxes[k + 3 + step2] - y;

Expand All @@ -57,9 +57,6 @@ class YOLOv7_bb_onnx : public YoloONNX
}
else if (outputs.size() == 1)
{
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);

auto output = outputs[0];

size_t ncInd = 2;
Expand Down Expand Up @@ -96,8 +93,8 @@ class YOLOv7_bb_onnx : public YoloONNX
int classId = cvRound(output[k + 5]);
if (class_conf >= m_params.confThreshold)
{
float x = fw * output[k + 1];
float y = fh * output[k + 2];
float x = fw * (output[k + 1] - m_resizedROI.x);
float y = fh * (output[k + 2] - m_resizedROI.y);
float width = fw * (output[k + 3] - output[k + 1]);
float height = fh * (output[k + 4] - output[k + 2]);

Expand Down Expand Up @@ -150,8 +147,8 @@ class YOLOv7_bb_onnx : public YoloONNX
if (object_conf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv7_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class YOLOv7_instance_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

size_t outInd = (outputs.size() == 0) ? 0 : 1;
size_t segInd = (outputs.size() == 0) ? 1 : 0;
Expand Down Expand Up @@ -123,8 +123,8 @@ class YOLOv7_instance_onnx : public YoloONNX
if (object_conf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv8_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ class YOLOv8_bb_onnx : public YoloONNX
//0: name: images, size: 1x3x640x640
//1: name: output0, size: 1x84x8400

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -88,8 +88,8 @@ class YOLOv8_bb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv8_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ class YOLOv8_instance_onnx : public YoloONNX
{
std::vector<tensor_rt::Result> resBoxes;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

size_t outInd = (outputs.size() == 0) ? 0 : 1;
size_t segInd = (outputs.size() == 0) ? 1 : 0;
Expand Down Expand Up @@ -155,8 +155,8 @@ class YOLOv8_instance_onnx : public YoloONNX
if (objectConf >= m_params.confThreshold)
{
// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];

Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv8_obb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class YOLOv8_obb_onnx : public YoloONNX
//20: 15 DOTA classes + x + y + w + h + a
constexpr int shapeDataSize = 5;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -96,8 +96,8 @@ class YOLOv8_obb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height)
float cx = fw * output[k];
float cy = fh * output[k + 1];
float cx = fw * (output[k] - m_resizedROI.x);
float cy = fh * (output[k + 1] - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
float angle = 180.f * output[k + nc + shapeDataSize - 1] / M_PI;
Expand Down
8 changes: 4 additions & 4 deletions src/Detector/tensorrt_yolo/YoloONNXv9_bb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class YOLOv9_bb_onnx : public YoloONNX
//84: 80 COCO classes + x + y + w + h
constexpr int shapeDataSize = 4;

const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_inputDims.d[3]);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_inputDims.d[2]);
const float fw = static_cast<float>(frameSize.width) / static_cast<float>(m_resizedROI.width);
const float fh = static_cast<float>(frameSize.height) / static_cast<float>(m_resizedROI.height);

auto output = outputs[0];

Expand Down Expand Up @@ -90,8 +90,8 @@ class YOLOv9_bb_onnx : public YoloONNX
confidences.push_back(objectConf);

// (center x, center y, width, height) to (x, y, w, h)
float x = fw * (output[k] - output[k + 2] / 2);
float y = fh * (output[k + 1] - output[k + 3] / 2);
float x = fw * (output[k] - output[k + 2] / 2 - m_resizedROI.x);
float y = fh * (output[k + 1] - output[k + 3] / 2 - m_resizedROI.y);
float width = fw * output[k + 2];
float height = fh * output[k + 3];
rectBoxes.emplace_back(cvRound(x), cvRound(y), cvRound(width), cvRound(height));
Expand Down

0 comments on commit bd7ae44

Please sign in to comment.