SceneSwitcher/plugins/video/opencv-helpers.cpp
2025-05-27 22:37:46 +02:00

332 lines
9.0 KiB
C++

#include "opencv-helpers.hpp"
#include <log-helper.hpp>
namespace advss {
PatternImageData CreatePatternData(const QImage &pattern)
{
PatternImageData data{};
if (pattern.isNull()) {
return data;
}
data.rgbaPattern = QImageToMat(pattern);
std::vector<cv::Mat1b> rgbaChannelsPattern;
cv::split(data.rgbaPattern, rgbaChannelsPattern);
std::vector<cv::Mat1b> rgbChanlesPattern(
rgbaChannelsPattern.begin(), rgbaChannelsPattern.begin() + 3);
cv::merge(rgbChanlesPattern, data.rgbPattern);
cv::threshold(rgbaChannelsPattern[3], data.mask, 0, 255,
cv::THRESH_BINARY);
return data;
}
static void preprocessPatternMatchResult(cv::Mat &mat, bool invert)
{
// Not finite values are not handled well by the cv::threshold method.
// These occur when the image is completely black, the denominator in all
// normalized algorithms approaches 0 in these cases if the alpha channel is
// used as mask.
//
// So we are clamping the values here to 0.0..1.0 and dismiss not-finite
// values.
// Invertion mode is for the TM_SQDIFF_NORMED method.
float value;
for (int r = 0; r < mat.rows; r++) {
for (int c = 0; c < mat.cols; c++) {
value = mat.at<float>(r, c);
if (invert) {
value = 1.0f - value;
}
if (!std::isfinite(value)) {
value = 0.0f;
}
mat.at<float>(r, c) =
std::fmaxf(0.0f, std::fminf(1.0f, value));
}
}
}
void MatchPattern(QImage &img, const PatternImageData &patternData,
double threshold, cv::Mat &result, double *pBestFitValue,
bool useAlphaAsMask, cv::TemplateMatchModes matchMode)
{
result = cv::Mat(0, 0, CV_32F);
if (pBestFitValue) {
*pBestFitValue = std::numeric_limits<double>::signaling_NaN();
}
if (img.isNull() || patternData.rgbaPattern.empty()) {
return;
}
if (img.height() < patternData.rgbaPattern.rows ||
img.width() < patternData.rgbaPattern.cols) {
return;
}
auto input = QImageToMat(img);
if (useAlphaAsMask) {
// Remove alpha channel of input image as the alpha channel
// information is used as a stencil for the pattern instead and
// thus should not be used while matching the pattern as well
//
// Input format is Format_RGBA8888 so discard the 4th channel
std::vector<cv::Mat1b> inputChannels;
cv::split(input, inputChannels);
std::vector<cv::Mat1b> rgbChanlesImage(
inputChannels.begin(), inputChannels.begin() + 3);
cv::Mat3b rgbInput;
cv::merge(rgbChanlesImage, rgbInput);
cv::matchTemplate(rgbInput, patternData.rgbPattern, result,
matchMode, patternData.mask);
} else {
cv::matchTemplate(input, patternData.rgbaPattern, result,
matchMode);
}
// A perfect match is represented as "0" for TM_SQDIFF_NORMED
//
// For TM_CCOEFF_NORMED and TM_CCORR_NORMED a perfect match is
// represented as "1"
//
// -> Invert TM_SQDIFF_NORMED in the preprocess step
preprocessPatternMatchResult(result, matchMode == cv::TM_SQDIFF_NORMED);
if (pBestFitValue) {
cv::minMaxLoc(result, nullptr, pBestFitValue);
}
cv::threshold(result, result, threshold, 0.0, cv::THRESH_TOZERO);
}
void MatchPattern(QImage &img, QImage &pattern, double threshold,
cv::Mat &result, double *pBestFitValue, bool useAlphaAsMask,
cv::TemplateMatchModes matchColor)
{
auto data = CreatePatternData(pattern);
MatchPattern(img, data, threshold, result, pBestFitValue,
useAlphaAsMask, matchColor);
}
std::vector<cv::Rect> MatchObject(QImage &img, cv::CascadeClassifier &cascade,
double scaleFactor, int minNeighbors,
const cv::Size &minSize,
const cv::Size &maxSize)
{
if (img.isNull() || cascade.empty()) {
return {};
}
auto image = QImageToMat(img);
cv::Mat frameGray;
cv::cvtColor(image, frameGray, cv::COLOR_RGBA2GRAY);
cv::equalizeHist(frameGray, frameGray);
std::vector<cv::Rect> objects;
try {
cascade.detectMultiScale(frameGray, objects, scaleFactor,
minNeighbors, 0, minSize, maxSize);
} catch (const std::exception &e) {
vblog(LOG_INFO, "detectMultiScale failed: %s", e.what());
}
return objects;
}
uchar GetAvgBrightness(QImage &img)
{
if (img.isNull()) {
return 0;
}
auto image = QImageToMat(img);
cv::Mat hsvImage, rgbImage;
cv::cvtColor(image, rgbImage, cv::COLOR_RGBA2RGB);
cv::cvtColor(rgbImage, hsvImage, cv::COLOR_RGB2HSV);
long long brightnessSum = 0;
for (int i = 0; i < hsvImage.rows; ++i) {
for (int j = 0; j < hsvImage.cols; ++j) {
brightnessSum += hsvImage.at<cv::Vec3b>(i, j)[2];
}
}
return brightnessSum / (hsvImage.rows * hsvImage.cols);
}
static bool colorIsSimilar(const QColor &color1, const QColor &color2,
int maxDiff)
{
const int diffRed = std::abs(color1.red() - color2.red());
const int diffGreen = std::abs(color1.green() - color2.green());
const int diffBlue = std::abs(color1.blue() - color2.blue());
return diffRed <= maxDiff && diffGreen <= maxDiff &&
diffBlue <= maxDiff;
}
cv::Mat PreprocessForOCR(const QImage &image, const QColor &textColor,
double colorDiff)
{
auto mat = QImageToMat(image);
// Tesseract works best when matching black text on a white background,
// so everything that matches the text color will be displayed black
// while the rest of the image should be white.
const int diff = colorDiff * 255;
for (int y = 0; y < image.height(); y++) {
for (int x = 0; x < image.width(); x++) {
if (colorIsSimilar(image.pixelColor(x, y), textColor,
diff)) {
mat.at<cv::Vec4b>(y, x) = {0, 0, 0, 255};
} else {
mat.at<cv::Vec4b>(y, x) = {255, 255, 255, 255};
}
}
}
// Scale image up if selected area is very small.
// Results will probably still be unsatisfying.
if (mat.rows <= 300 || mat.cols <= 300) {
double scale = 0.;
if (mat.rows < mat.cols) {
scale = 300. / mat.rows;
} else {
scale = 300. / mat.cols;
}
cv::resize(mat, mat,
cv::Size(mat.cols * scale, mat.rows * scale),
cv::INTER_CUBIC);
}
cv::Mat result;
mat.copyTo(result);
return result;
}
std::optional<std::string> RunOCR(tesseract::TessBaseAPI *ocr,
const QImage &image, const QColor &color,
double colorDiff)
{
(void)ocr;
(void)color;
(void)colorDiff;
if (image.isNull()) {
return {};
}
#ifdef OCR_SUPPORT
auto mat = PreprocessForOCR(image, color, colorDiff);
cv::Mat gray;
cv::cvtColor(mat, gray, cv::COLOR_RGBA2GRAY);
ocr->SetImage(gray.data, gray.cols, gray.rows, 1, gray.step);
ocr->Recognize(0);
std::unique_ptr<char[]> detectedText(ocr->GetUTF8Text());
if (!detectedText) {
return {};
}
return detectedText.get();
#else
return {};
#endif
}
bool ContainsPixelsInColorRange(const QImage &image, const QColor &color,
double colorDeviationThreshold,
double totalPixelMatchThreshold)
{
int totalPixels = image.width() * image.height();
int matchingPixels = 0;
int maxColorDiff = static_cast<int>(colorDeviationThreshold * 255.0);
for (int y = 0; y < image.height(); y++) {
for (int x = 0; x < image.width(); x++) {
if (colorIsSimilar(image.pixelColor(x, y), color,
maxColorDiff)) {
matchingPixels++;
}
}
}
double matchPercentage =
static_cast<double>(matchingPixels) / totalPixels;
return matchPercentage >= totalPixelMatchThreshold;
}
QColor GetAverageColor(const QImage &img)
{
if (img.isNull()) {
return QColor();
}
auto image = QImageToMat(img);
cv::Scalar meanColor = cv::mean(image);
int averageBlue = cvRound(meanColor[0]);
int averageGreen = cvRound(meanColor[1]);
int averageRed = cvRound(meanColor[2]);
return QColor(averageRed, averageGreen, averageBlue);
}
QColor GetDominantColor(const QImage &img, int k)
{
if (img.isNull()) {
return QColor();
}
auto image = QImageToMat(img);
cv::Mat reshapedImage = image.reshape(1, image.rows * image.cols);
reshapedImage.convertTo(reshapedImage, CV_32F);
cv::mean(reshapedImage);
// Apply k-means clustering to group similar colors
cv::TermCriteria criteria(
cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 100, 0.2);
cv::Mat labels, centers;
cv::kmeans(reshapedImage, k, labels, criteria, 1,
cv::KMEANS_RANDOM_CENTERS, centers);
// Find the dominant color
// Center of the cluster with the largest number of pixels
cv::Mat counts = cv::Mat::zeros(1, k, CV_32SC1);
for (int i = 0; i < labels.rows; i++) {
counts.at<int>(0, labels.at<int>(i))++;
}
cv::Point max_loc;
cv::minMaxLoc(counts, nullptr, nullptr, nullptr, &max_loc);
try {
cv::Scalar dominantColor = centers.at<cv::Scalar>(max_loc.y);
const int blue = cv::saturate_cast<int>(dominantColor.val[0]);
const int green = cv::saturate_cast<int>(dominantColor.val[1]);
const int red = cv::saturate_cast<int>(dominantColor.val[2]);
const int alpha = cv::saturate_cast<int>(dominantColor.val[3]);
return QColor(red, green, blue, alpha);
} catch (...) {
}
return QColor();
}
// Assumption is that QImage uses Format_RGBA8888.
// Conversion from: https://github.com/dbzhang800/QtOpenCV
cv::Mat QImageToMat(const QImage &img)
{
if (img.isNull()) {
return cv::Mat();
}
return cv::Mat(img.height(), img.width(), CV_8UC(img.depth() / 8),
(uchar *)img.bits(), img.bytesPerLine());
}
QImage MatToQImage(const cv::Mat &mat)
{
if (mat.empty()) {
return QImage();
}
return QImage(mat.data, mat.cols, mat.rows,
QImage::Format::Format_RGBA8888);
}
} // namespace advss