Add OCR support to video condition using tesseract

This commit is contained in:
WarmUpTill 2023-02-04 01:42:08 +01:00 committed by WarmUpTill
parent 6ca8e6b3ea
commit 83c72ebba5
16 changed files with 562 additions and 80 deletions

View File

@ -178,6 +178,7 @@ AdvSceneSwitcher.condition.video.condition.noImage="has no output"
AdvSceneSwitcher.condition.video.condition.pattern="matches pattern"
AdvSceneSwitcher.condition.video.condition.object="contains object"
AdvSceneSwitcher.condition.video.condition.brightness="brightness"
AdvSceneSwitcher.condition.video.condition.ocr="contains text"
AdvSceneSwitcher.condition.video.askFileAction="Do you want to use an existing file or create a screenshot of the currently selected source?"
AdvSceneSwitcher.condition.video.askFileAction.file="Use existing file"
AdvSceneSwitcher.condition.video.askFileAction.screenshot="Create screenshot"
@ -202,7 +203,18 @@ AdvSceneSwitcher.condition.video.patternMatchFail="Pattern was not found!"
AdvSceneSwitcher.condition.video.patternMatchSuccess="Pattern is highlighted in red"
AdvSceneSwitcher.condition.video.objectMatchFail="Object was not found!"
AdvSceneSwitcher.condition.video.objectMatchSuccess="Object is highlighted in red"
AdvSceneSwitcher.condition.video.ocrMatchSuccess="Detected text:\n\n%1"
AdvSceneSwitcher.condition.video.modelLoadFail="Model data could not be loaded!"
AdvSceneSwitcher.condition.video.selectColor="Select Color"
AdvSceneSwitcher.condition.video.ocrMode.singleColumn="Single column of text of variable sizes"
AdvSceneSwitcher.condition.video.ocrMode.singleBlockVertText="Single uniform block of vertically aligned text"
AdvSceneSwitcher.condition.video.ocrMode.singleBlock="Single uniform block of text"
AdvSceneSwitcher.condition.video.ocrMode.singleLine="Single text line"
AdvSceneSwitcher.condition.video.ocrMode.singleWord="Single word"
AdvSceneSwitcher.condition.video.ocrMode.circleWord="Single word in a circle"
AdvSceneSwitcher.condition.video.ocrMode.singleChar="Single character"
AdvSceneSwitcher.condition.video.ocrMode.sparseText="Find text in no particular order"
AdvSceneSwitcher.condition.video.ocrMode.sparseTextOSD="Find text in no particular order (OSD)"
AdvSceneSwitcher.condition.video.type.main="OBS's main output"
AdvSceneSwitcher.condition.video.type.source="Source"
AdvSceneSwitcher.condition.video.type.scene="Scene"
@ -212,6 +224,8 @@ AdvSceneSwitcher.condition.video.entry.minNeighbor="Minimum neighbors: {{minNeig
AdvSceneSwitcher.condition.video.entry.throttle="{{throttleEnable}}Reduce CPU load by performing check only every {{throttleCount}} milliseconds"
AdvSceneSwitcher.condition.video.entry.checkAreaEnable="Perform check only in area"
AdvSceneSwitcher.condition.video.entry.checkArea="{{checkAreaEnable}}{{checkArea}}{{selectArea}}"
AdvSceneSwitcher.condition.video.entry.orcColorPick="Check for text color:{{textColor}}{{selectColor}}"
AdvSceneSwitcher.condition.video.entry.orcTextType="Check for text type:{{textType}}"
AdvSceneSwitcher.condition.video.minSize="Minimum size:"
AdvSceneSwitcher.condition.video.maxSize="Maximum size:"
AdvSceneSwitcher.condition.video.selectArea="Select area"

Binary file not shown.

View File

@ -17,5 +17,5 @@ install_advss_plugin_dependency(...)
... to install the plugin and its dependencies.
#]]
add_subdirectory(opencv)
add_subdirectory(openvr)
add_subdirectory(video)

View File

@ -1,45 +0,0 @@
cmake_minimum_required(VERSION 3.14)
project(advanced-scene-switcher-opencv)
# --- Check OpenCV requirements ---
find_package(OpenCV)
if(NOT OpenCV_FOUND)
message(
WARNING
"OpenCV not found! Video condition will be disabled!\nOpenCV sources are available under: ${CMAKE_CURRENT_SOURCE_DIR}/../../../deps/opencv"
)
return()
endif()
# --- End of section ---
add_library(advanced-scene-switcher-opencv MODULE)
target_sources(
${PROJECT_NAME}
PRIVATE area-selection.cpp
area-selection.hpp
macro-condition-video.cpp
macro-condition-video.hpp
opencv-helpers.cpp
opencv-helpers.hpp
paramerter-wrappers.cpp
paramerter-wrappers.hpp
preview-dialog.cpp
preview-dialog.hpp)
setup_advss_plugin(${PROJECT_NAME})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
# --- OpenCV build settings ---
target_include_directories(${PROJECT_NAME} PRIVATE "${OpenCV_INCLUDE_DIRS}")
target_link_libraries(${PROJECT_NAME} PRIVATE ${OpenCV_LIBRARIES})
# --- End of section ---
install_advss_plugin(${PROJECT_NAME})
if(NOT OS_LINUX)
install_advss_plugin_dependency(TARGET ${PROJECT_NAME} DEPENDENCIES
${OpenCV_LIBS})
endif()

View File

@ -0,0 +1,63 @@
cmake_minimum_required(VERSION 3.14)
project(advanced-scene-switcher-opencv)
# --- Check OpenCV requirements ---
find_package(OpenCV)
if(NOT OpenCV_FOUND)
message(
WARNING
"OpenCV not found!\n"
"Video condition will be disabled!\n\n"
"OpenCV sources are available under: ${CMAKE_CURRENT_SOURCE_DIR}/../../../deps/opencv"
)
return()
endif()
# --- Check optional OCR dependencies ---
find_package(Leptonica)
find_package(Tesseract)
# --- End of section ---
add_library(${PROJECT_NAME} MODULE)
if(Leptonica_FOUND AND Tesseract_FOUND)
target_compile_definitions(${PROJECT_NAME} PRIVATE OCR_SUPPORT)
target_link_libraries(${PROJECT_NAME} PRIVATE Tesseract::libtesseract
${Leptonica_LIBRARIES})
target_include_directories(${PROJECT_NAME} PRIVATE ${Tesseract_INCLUDE_DIRS}
${Leptonica_INCLUDE_DIRS})
else()
message(
WARNING
"OCR capabilities of video condition disabled!\n"
"Leptonica or Tesseract dependencies were not found!\n\n"
"Sources are available under: ${CMAKE_CURRENT_SOURCE_DIR}/../../../deps/")
endif()
target_sources(
${PROJECT_NAME}
PRIVATE area-selection.cpp
area-selection.hpp
macro-condition-video.cpp
macro-condition-video.hpp
opencv-helpers.cpp
opencv-helpers.hpp
paramerter-wrappers.cpp
paramerter-wrappers.hpp
preview-dialog.cpp
preview-dialog.hpp)
setup_advss_plugin(${PROJECT_NAME})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "")
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} PRIVATE ${OpenCV_LIBRARIES})
install_advss_plugin(${PROJECT_NAME})
if(NOT OS_LINUX)
install_advss_plugin_dependency(TARGET ${PROJECT_NAME} DEPENDENCIES
${OpenCV_LIBS})
endif()

View File

@ -10,6 +10,7 @@
#include <QToolTip>
#include <QMessageBox>
#include <QtGlobal>
#include <QColorDialog>
const std::string MacroConditionVideo::id = "video";
@ -35,6 +36,9 @@ const static std::map<VideoCondition, std::string> conditionTypes = {
"AdvSceneSwitcher.condition.video.condition.object"},
{VideoCondition::BRIGHTNESS,
"AdvSceneSwitcher.condition.video.condition.brightness"},
#ifdef OCR_SUPPORT
{VideoCondition::OCR, "AdvSceneSwitcher.condition.video.condition.ocr"},
#endif
};
const static std::map<VideoInput::Type, std::string> videoInputTypes = {
@ -46,6 +50,27 @@ const static std::map<VideoInput::Type, std::string> videoInputTypes = {
"AdvSceneSwitcher.condition.video.type.scene"},
};
const static std::map<tesseract::PageSegMode, std::string> pageSegModes = {
{tesseract::PageSegMode::PSM_SINGLE_COLUMN,
"AdvSceneSwitcher.condition.video.ocrMode.singleColumn"},
{tesseract::PageSegMode::PSM_SINGLE_BLOCK_VERT_TEXT,
"AdvSceneSwitcher.condition.video.ocrMode.singleBlockVertText"},
{tesseract::PageSegMode::PSM_SINGLE_BLOCK,
"AdvSceneSwitcher.condition.video.ocrMode.singleBlock"},
{tesseract::PageSegMode::PSM_SINGLE_LINE,
"AdvSceneSwitcher.condition.video.ocrMode.singleLine"},
{tesseract::PageSegMode::PSM_SINGLE_WORD,
"AdvSceneSwitcher.condition.video.ocrMode.singleWord"},
{tesseract::PageSegMode::PSM_CIRCLE_WORD,
"AdvSceneSwitcher.condition.video.ocrMode.circleWord"},
{tesseract::PageSegMode::PSM_SINGLE_CHAR,
"AdvSceneSwitcher.condition.video.ocrMode.singleChar"},
{tesseract::PageSegMode::PSM_SPARSE_TEXT,
"AdvSceneSwitcher.condition.video.ocrMode.sparseText"},
{tesseract::PageSegMode::PSM_SPARSE_TEXT_OSD,
"AdvSceneSwitcher.condition.video.ocrMode.sparseTextOSD"},
};
cv::CascadeClassifier initObjectCascade(std::string &path)
{
cv::CascadeClassifier cascade;
@ -58,7 +83,7 @@ cv::CascadeClassifier initObjectCascade(std::string &path)
return cascade;
}
bool requiresFileInput(VideoCondition t)
static bool requiresFileInput(VideoCondition t)
{
return t == VideoCondition::MATCH || t == VideoCondition::DIFFER ||
t == VideoCondition::PATTERN;
@ -126,6 +151,7 @@ bool MacroConditionVideo::Save(obs_data_t *obj) const
obs_data_set_double(obj, "brightness", _brightnessThreshold);
_patternMatchParameters.Save(obj);
_objMatchParameters.Save(obj);
_ocrParamters.Save(obj);
obs_data_set_bool(obj, "throttleEnabled", _throttleEnabled);
obs_data_set_int(obj, "throttleCount", _throttleCount);
_areaParameters.Save(obj);
@ -144,6 +170,7 @@ bool MacroConditionVideo::Load(obs_data_t *obj)
_brightnessThreshold = obs_data_get_double(obj, "brightness");
_patternMatchParameters.Load(obj);
_objMatchParameters.Load(obj);
_ocrParamters.Load(obj);
_throttleEnabled = obs_data_get_bool(obj, "throttleEnabled");
_throttleCount = obs_data_get_int(obj, "throttleCount");
_areaParameters.Load(obj);
@ -203,6 +230,11 @@ std::string MacroConditionVideo::GetModelDataPath() const
return _objMatchParameters.modelPath;
}
void MacroConditionVideo::SetPageSegMode(tesseract::PageSegMode mode)
{
_ocrParamters.SetPageMode(mode);
}
bool MacroConditionVideo::ScreenshotContainsPattern()
{
cv::Mat result;
@ -238,8 +270,30 @@ bool MacroConditionVideo::ScreenshotContainsObject()
bool MacroConditionVideo::CheckBrightnessThreshold()
{
_currentBrigthness = getAvgBrightness(_screenshotData.image) / 255.;
return _currentBrigthness > _brightnessThreshold;
_currentBrightness = getAvgBrightness(_screenshotData.image) / 255.;
return _currentBrightness > _brightnessThreshold;
}
bool MacroConditionVideo::CheckOCR()
{
if (!_ocrParamters.Initialized()) {
return false;
}
auto text = runOCR(_ocrParamters.GetOCR(), _screenshotData.image,
_ocrParamters.color);
if (_ocrParamters.regex.Enabled()) {
auto expr = _ocrParamters.regex.GetRegularExpression(
_ocrParamters.text);
if (!expr.isValid()) {
return false;
}
auto match = expr.match(QString::fromStdString(text));
return match.hasMatch();
}
return text == std::string(_ocrParamters.text);
}
bool MacroConditionVideo::Compare()
@ -268,6 +322,8 @@ bool MacroConditionVideo::Compare()
return ScreenshotContainsObject();
case VideoCondition::BRIGHTNESS:
return CheckBrightnessThreshold();
case VideoCondition::OCR:
return CheckOCR();
default:
break;
}
@ -288,6 +344,14 @@ static inline void populateConditionSelection(QComboBox *list)
}
}
static inline void populatePageSegModeSelection(QComboBox *list)
{
for (const auto &[mode, name] : pageSegModes) {
list->addItem(obs_module_text(name.c_str()),
static_cast<int>(mode));
}
}
MacroConditionVideoEdit::MacroConditionVideoEdit(
QWidget *parent, std::shared_ptr<MacroConditionVideo> entryData)
: QWidget(parent),
@ -316,6 +380,13 @@ MacroConditionVideoEdit::MacroConditionVideoEdit(
obs_module_text(
"AdvSceneSwitcher.condition.video.brightnessThresholdDescription"))),
_currentBrightness(new QLabel),
_ocrLayout(new QVBoxLayout),
_matchText(new VariableTextEdit(this)),
_regex(new RegexConfigWidget(this)),
_textColor(new QLabel),
_selectColor(new QPushButton(obs_module_text(
"AdvSceneSwitcher.condition.video.selectColor"))),
_pageSegMode(new QComboBox()),
_modelDataPath(new FileSelection()),
_modelPathLayout(new QHBoxLayout),
_objectScaleThreshold(new SliderSpinBox(
@ -351,6 +422,7 @@ MacroConditionVideoEdit::MacroConditionVideoEdit(
"AdvSceneSwitcher.condition.video.usePatternForChangedCheck.tooltip"));
_minNeighbors->setMinimum(minMinNeighbors);
_minNeighbors->setMaximum(maxMinNeighbors);
populatePageSegModeSelection(_pageSegMode);
_throttleCount->setMinimum(1 * GetSwitcher()->interval);
_throttleCount->setMaximum(10 * GetSwitcher()->interval);
_throttleCount->setSingleStep(GetSwitcher()->interval);
@ -414,6 +486,14 @@ MacroConditionVideoEdit::MacroConditionVideoEdit(
&_previewDialog, SLOT(ConditionChanged(int)));
QWidget::connect(_selectArea, SIGNAL(clicked()), this,
SLOT(SelectAreaClicked()));
QWidget::connect(_selectColor, SIGNAL(clicked()), this,
SLOT(SelectColorClicked()));
QWidget::connect(_matchText, SIGNAL(textChanged()), this,
SLOT(MatchTextChanged()));
QWidget::connect(_regex, SIGNAL(RegexConfigChanged(RegexConfig)), this,
SLOT(RegexChanged(RegexConfig)));
QWidget::connect(_pageSegMode, SIGNAL(currentIndexChanged(int)), this,
SLOT(PageSegModeChanged(int)));
populateVideoInputSelection(_videoInputTypes);
populateConditionSelection(_condition);
@ -435,6 +515,9 @@ MacroConditionVideoEdit::MacroConditionVideoEdit(
{"{{checkAreaEnable}}", _checkAreaEnable},
{"{{checkArea}}", _checkArea},
{"{{selectArea}}", _selectArea},
{"{{textColor}}", _textColor},
{"{{selectColor}}", _selectColor},
{"{{textType}}", _pageSegMode},
};
placeWidgets(obs_module_text("AdvSceneSwitcher.condition.video.entry"),
entryLine1Layout, widgetPlaceholders);
@ -454,6 +537,24 @@ MacroConditionVideoEdit::MacroConditionVideoEdit(
"AdvSceneSwitcher.condition.video.entry.checkArea"),
_checkAreaControlLayout, widgetPlaceholders);
_ocrLayout->addWidget(_matchText);
auto regexLayout = new QHBoxLayout;
regexLayout->addWidget(_regex);
regexLayout->addStretch();
_ocrLayout->addLayout(regexLayout);
auto pageModeSegLayout = new QHBoxLayout();
placeWidgets(
obs_module_text(
"AdvSceneSwitcher.condition.video.entry.orcTextType"),
pageModeSegLayout, widgetPlaceholders);
_ocrLayout->addLayout(pageModeSegLayout);
auto colorPickLayout = new QHBoxLayout();
placeWidgets(
obs_module_text(
"AdvSceneSwitcher.condition.video.entry.orcColorPick"),
colorPickLayout, widgetPlaceholders);
_ocrLayout->addLayout(colorPickLayout);
QGridLayout *sizeGrid = new QGridLayout;
sizeGrid->addWidget(
new QLabel(obs_module_text(
@ -479,6 +580,7 @@ MacroConditionVideoEdit::MacroConditionVideoEdit(
mainLayout->addWidget(_useAlphaAsMask);
mainLayout->addWidget(_brightnessThreshold);
mainLayout->addWidget(_currentBrightness);
mainLayout->addLayout(_ocrLayout);
mainLayout->addLayout(_modelPathLayout);
mainLayout->addWidget(_objectScaleThreshold);
mainLayout->addLayout(_neighborsControlLayout);
@ -810,6 +912,78 @@ void MacroConditionVideoEdit::MaxSizeChanged(advss::Size value)
_entryData->_objMatchParameters);
}
void MacroConditionVideoEdit::SetupColorLabel(const QColor &color)
{
_textColor->setText(color.name());
_textColor->setPalette(QPalette(color));
_textColor->setAutoFillBackground(true);
}
void MacroConditionVideoEdit::SelectColorClicked()
{
if (_loading || !_entryData) {
return;
}
const QColor color = QColorDialog::getColor(
_entryData->_ocrParamters.color, this,
obs_module_text("AdvSceneSwitcher.condition.video.selectColor"),
QColorDialog::ColorDialogOption());
if (!color.isValid()) {
return;
}
SetupColorLabel(color);
std::lock_guard<std::mutex> lock(GetSwitcher()->m);
_entryData->_ocrParamters.color = color;
_previewDialog.OCRParamtersChanged(_entryData->_ocrParamters);
}
void MacroConditionVideoEdit::MatchTextChanged()
{
if (_loading || !_entryData) {
return;
}
std::lock_guard<std::mutex> lock(GetSwitcher()->m);
_entryData->_ocrParamters.text =
_matchText->toPlainText().toUtf8().constData();
adjustSize();
updateGeometry();
_previewDialog.OCRParamtersChanged(_entryData->_ocrParamters);
}
void MacroConditionVideoEdit::RegexChanged(RegexConfig conf)
{
if (_loading || !_entryData) {
return;
}
std::lock_guard<std::mutex> lock(GetSwitcher()->m);
_entryData->_ocrParamters.regex = conf;
adjustSize();
updateGeometry();
_previewDialog.OCRParamtersChanged(_entryData->_ocrParamters);
}
void MacroConditionVideoEdit::PageSegModeChanged(int idx)
{
if (_loading || !_entryData) {
return;
}
std::lock_guard<std::mutex> lock(GetSwitcher()->m);
_entryData->SetPageSegMode(static_cast<tesseract::PageSegMode>(
_pageSegMode->itemData(idx).toInt()));
_previewDialog.OCRParamtersChanged(_entryData->_ocrParamters);
}
void MacroConditionVideoEdit::CheckAreaEnableChanged(int value)
{
if (_loading || !_entryData) {
@ -907,37 +1081,37 @@ void MacroConditionVideoEdit::ModelPathChanged(const QString &text)
}
}
bool needsShowMatch(VideoCondition cond)
static bool needsShowMatch(VideoCondition cond)
{
return cond == VideoCondition::PATTERN ||
cond == VideoCondition::OBJECT;
cond == VideoCondition::OBJECT || cond == VideoCondition::OCR;
}
bool needsObjectControls(VideoCondition cond)
static bool needsObjectControls(VideoCondition cond)
{
return cond == VideoCondition::OBJECT;
}
bool needsThrottleControls(VideoCondition cond)
static bool needsThrottleControls(VideoCondition cond)
{
return cond == VideoCondition::PATTERN ||
cond == VideoCondition::OBJECT;
}
bool needsThreshold(VideoCondition cond)
static bool needsThreshold(VideoCondition cond)
{
return cond == VideoCondition::PATTERN ||
cond == VideoCondition::HAS_CHANGED ||
cond == VideoCondition::HAS_NOT_CHANGED;
}
bool patternControlIsOptional(VideoCondition cond)
static bool patternControlIsOptional(VideoCondition cond)
{
return cond == VideoCondition::HAS_CHANGED ||
cond == VideoCondition::HAS_NOT_CHANGED;
}
bool needsAreaControls(VideoCondition cond)
static bool needsAreaControls(VideoCondition cond)
{
return cond != VideoCondition::NO_IMAGE;
}
@ -964,6 +1138,8 @@ void MacroConditionVideoEdit::SetWidgetVisibility()
needsObjectControls(_entryData->_condition));
_minNeighborsDescription->setVisible(
needsObjectControls(_entryData->_condition));
setLayoutVisible(_ocrLayout,
_entryData->_condition == VideoCondition::OCR);
setLayoutVisible(_sizeLayout,
needsObjectControls(_entryData->_condition));
setLayoutVisible(_modelPathLayout,
@ -990,6 +1166,7 @@ void MacroConditionVideoEdit::SetupPreviewDialogParams()
_entryData->_patternMatchParameters);
_previewDialog.ObjDetectParamtersChanged(
_entryData->_objMatchParameters);
_previewDialog.OCRParamtersChanged(_entryData->_ocrParamters);
_previewDialog.VideoSelectionChanged(_entryData->_video);
_previewDialog.AreaParamtersChanged(_entryData->_areaParameters);
_previewDialog.ConditionChanged(
@ -1019,6 +1196,11 @@ void MacroConditionVideoEdit::UpdateEntryData()
_modelDataPath->SetPath(_entryData->GetModelDataPath().c_str());
_objectScaleThreshold->SetDoubleValue(
_entryData->_objMatchParameters.scaleFactor);
_matchText->setPlainText(_entryData->_ocrParamters.text);
_regex->SetRegexConfig(_entryData->_ocrParamters.regex);
SetupColorLabel(_entryData->_ocrParamters.color);
_pageSegMode->setCurrentIndex(_pageSegMode->findData(
static_cast<int>(_entryData->_ocrParamters.GetPageMode())));
_minNeighbors->setValue(_entryData->_objMatchParameters.minNeighbors);
_minSize->SetSize(_entryData->_objMatchParameters.minSize);
_maxSize->SetSize(_entryData->_objMatchParameters.maxSize);
@ -1028,5 +1210,6 @@ void MacroConditionVideoEdit::UpdateEntryData()
_checkAreaEnable->setChecked(_entryData->_areaParameters.enable);
_checkArea->SetArea(_entryData->_areaParameters.area);
UpdatePreviewTooltip();
SetupPreviewDialogParams();
SetWidgetVisibility();
}

View File

@ -8,6 +8,7 @@
#include <file-selection.hpp>
#include <screenshot-helper.hpp>
#include <slider-spinbox.hpp>
#include <variable-text-edit.hpp>
#include <QWidget>
#include <QComboBox>
@ -21,7 +22,7 @@ class PreviewDialog;
class MacroConditionVideo : public MacroCondition {
public:
MacroConditionVideo(Macro *m) : MacroCondition(m) {}
MacroConditionVideo(Macro *m) : MacroCondition(m){};
bool CheckCondition();
bool Save(obs_data_t *obj) const;
bool Load(obs_data_t *obj);
@ -37,7 +38,8 @@ public:
bool LoadModelData(std::string &path);
std::string GetModelDataPath() const;
void ResetLastMatch() { _lastMatchResult = false; }
double GetCurrentBrightness() const { return _currentBrigthness; }
double GetCurrentBrightness() const { return _currentBrightness; }
void SetPageSegMode(tesseract::PageSegMode);
VideoInput _video;
VideoCondition _condition = VideoCondition::MATCH;
@ -51,7 +53,8 @@ public:
bool _blockUntilScreenshotDone = false;
double _brightnessThreshold = 0.5;
PatternMatchParameters _patternMatchParameters;
ObjDetectParamerts _objMatchParameters;
ObjDetectParameters _objMatchParameters;
OCRParameters _ocrParamters;
AreaParamters _areaParameters;
bool _throttleEnabled = false;
int _throttleCount = 3;
@ -61,6 +64,7 @@ private:
bool ScreenshotContainsPattern();
bool ScreenshotContainsObject();
bool CheckBrightnessThreshold();
bool CheckOCR();
bool Compare();
bool CheckShouldBeSkipped();
@ -71,7 +75,8 @@ private:
bool _lastMatchResult = false;
int _runCount = 0;
double _currentBrigthness = 0.;
double _currentBrightness = 0.;
static bool _registered;
static const std::string id;
@ -117,6 +122,11 @@ private slots:
void MinSizeChanged(advss::Size value);
void MaxSizeChanged(advss::Size value);
void SelectColorClicked();
void MatchTextChanged();
void RegexChanged(RegexConfig conf);
void PageSegModeChanged(int);
void CheckAreaEnableChanged(int value);
void CheckAreaChanged(advss::Area);
void CheckAreaChanged(QRect area);
@ -134,6 +144,8 @@ signals:
private:
void SetWidgetVisibility();
void HandleVideoInputUpdate();
void SetupPreviewDialogParams();
void SetupColorLabel(const QColor &);
QComboBox *_videoInputTypes;
SceneSelectionWidget *_scenes;
@ -151,6 +163,13 @@ private:
SliderSpinBox *_brightnessThreshold;
QLabel *_currentBrightness;
QVBoxLayout *_ocrLayout;
VariableTextEdit *_matchText;
RegexConfigWidget *_regex;
QLabel *_textColor;
QPushButton *_selectColor;
QComboBox *_pageSegMode;
FileSelection *_modelDataPath;
QHBoxLayout *_modelPathLayout;
SliderSpinBox *_objectScaleThreshold;

View File

@ -92,6 +92,56 @@ uchar getAvgBrightness(QImage &img)
return brightnessSum / (hsvImage.rows * hsvImage.cols);
}
cv::Mat preprocessForOCR(const QImage &image, const QColor &color)
{
auto mat = QImageToMat(image);
// Only keep the desired color
cv::cvtColor(mat, mat, cv::COLOR_RGBA2RGB);
cv::cvtColor(mat, mat, cv::COLOR_RGB2HSV);
cv::inRange(mat, cv::Scalar(0, 0, 0),
cv::Scalar(color.red(), color.green(), color.blue()), mat);
// Invert to improve ORC detection
cv::bitwise_not(mat, mat);
// Scale image up if selected area is too small
// Results will probably still be unsatisfying
if (mat.rows <= 300 || mat.cols <= 300) {
double scale = 0.;
if (mat.rows < mat.cols) {
scale = 300. / mat.rows;
} else {
scale = 300. / mat.cols;
}
cv::resize(mat, mat,
cv::Size(mat.cols * scale, mat.rows * scale),
cv::INTER_CUBIC);
}
return mat;
}
std::string runOCR(tesseract::TessBaseAPI *ocr, const QImage &image,
const QColor &color)
{
#ifdef OCR_SUPPORT
auto mat = preprocessForOCR(image, color);
ocr->SetImage(mat.data, mat.cols, mat.rows, 1, mat.step);
ocr->Recognize(0);
std::unique_ptr<char[]> detectedText(ocr->GetUTF8Text());
if (!detectedText) {
return "";
}
return detectedText.get();
#else
return "";
#endif
}
// Assumption is that QImage uses Format_RGBA8888.
// Conversion from: https://github.com/dbzhang800/QtOpenCV
cv::Mat QImageToMat(const QImage &img)

View File

@ -3,6 +3,38 @@
#undef NO // MacOS macro that can conflict with OpenCV
#include <opencv2/opencv.hpp>
#ifdef OCR_SUPPORT
#include <tesseract/baseapi.h>
#else
namespace tesseract {
enum PageSegMode {
PSM_OSD_ONLY = 0,
PSM_AUTO_OSD = 1,
PSM_AUTO_ONLY = 2,
PSM_AUTO = 3,
PSM_SINGLE_COLUMN = 4,
PSM_SINGLE_BLOCK_VERT_TEXT = 5,
PSM_SINGLE_BLOCK = 6,
PSM_SINGLE_LINE = 7,
PSM_SINGLE_WORD = 8,
PSM_CIRCLE_WORD = 9,
PSM_SINGLE_CHAR = 10,
PSM_SPARSE_TEXT = 11,
PSM_SPARSE_TEXT_OSD = 12,
PSM_RAW_LINE = 13,
PSM_COUNT
};
class TessBaseAPI {
public:
void SetPageSegMode(PageSegMode) {}
int Init(const char *, const char *) { return 0; }
void End() {}
};
}
#endif
constexpr int minMinNeighbors = 3;
constexpr int maxMinNeighbors = 6;
constexpr double defaultScaleFactor = 1.1;
@ -23,5 +55,7 @@ std::vector<cv::Rect> matchObject(QImage &img, cv::CascadeClassifier &cascade,
double scaleFactor, int minNeighbors,
cv::Size minSize, cv::Size maxSize);
uchar getAvgBrightness(QImage &img);
cv::Mat preprocessForOCR(const QImage &image, const QColor &color);
std::string runOCR(tesseract::TessBaseAPI *, const QImage &, const QColor &);
cv::Mat QImageToMat(const QImage &img);
QImage MatToQImage(const cv::Mat &mat);

View File

@ -29,7 +29,7 @@ bool PatternMatchParameters::Load(obs_data_t *obj)
return true;
}
bool ObjDetectParamerts::Save(obs_data_t *obj) const
bool ObjDetectParameters::Save(obs_data_t *obj) const
{
auto data = obs_data_create();
obs_data_set_string(data, "modelPath", modelPath.c_str());
@ -53,7 +53,7 @@ bool isMinNeighborsValid(int minNeighbors)
minNeighbors <= maxMinNeighbors;
}
bool ObjDetectParamerts::Load(obs_data_t *obj)
bool ObjDetectParameters::Load(obs_data_t *obj)
{
// TODO: Remove this fallback in a future version
if (!obs_data_has_user_value(obj, "patternMatchData")) {
@ -188,3 +188,109 @@ OBSWeakSource VideoInput::GetVideo() const
}
return nullptr;
}
static void SaveColor(obs_data_t *obj, const char *name, const QColor &color)
{
auto data = obs_data_create();
obs_data_set_int(data, "red", color.red());
obs_data_set_int(data, "green", color.green());
obs_data_set_int(data, "blue", color.blue());
obs_data_set_obj(obj, name, data);
obs_data_release(data);
}
OCRParameters::OCRParameters()
{
Setup();
}
OCRParameters::~OCRParameters()
{
if (!initDone) {
return;
}
ocr->End();
}
OCRParameters::OCRParameters(const OCRParameters &other)
: text(other.text),
regex(other.regex),
color(other.color),
pageSegMode(other.pageSegMode)
{
Setup();
if (initDone) {
ocr->SetPageSegMode(pageSegMode);
}
}
OCRParameters &OCRParameters::operator=(const OCRParameters &other)
{
text = other.text;
regex = other.regex;
color = other.color;
pageSegMode = other.pageSegMode;
ocr->SetPageSegMode(pageSegMode);
return *this;
}
bool OCRParameters::Save(obs_data_t *obj) const
{
auto data = obs_data_create();
text.Save(data, "pattern");
regex.Save(data);
SaveColor(data, "textColor", color);
obs_data_set_int(data, "pageSegMode", static_cast<int>(pageSegMode));
obs_data_set_obj(obj, "ocrData", data);
obs_data_release(data);
return true;
}
static QColor LoadColor(obs_data_t *obj, const char *name)
{
QColor color = Qt::black;
auto data = obs_data_get_obj(obj, name);
color.setRed(obs_data_get_int(data, "red"));
color.setGreen(obs_data_get_int(data, "green"));
color.setBlue(obs_data_get_int(data, "blue"));
obs_data_release(data);
return color;
}
bool OCRParameters::Load(obs_data_t *obj)
{
auto data = obs_data_get_obj(obj, "ocrData");
text.Load(data, "pattern");
regex.Load(data);
color = LoadColor(data, "textColor");
pageSegMode = static_cast<tesseract::PageSegMode>(
obs_data_get_int(data, "pageSegMode"));
obs_data_release(data);
if (initDone) {
ocr->SetPageSegMode(pageSegMode);
}
return true;
}
void OCRParameters::SetPageMode(tesseract::PageSegMode mode)
{
pageSegMode = mode;
ocr->SetPageSegMode(mode);
}
void OCRParameters::Setup()
{
ocr = std::make_unique<tesseract::TessBaseAPI>();
if (!ocr) {
initDone = false;
return;
}
std::string dataPath = obs_get_module_data_path(obs_current_module()) +
std::string("/res/ocr");
if (ocr->Init(dataPath.c_str(), "eng") != 0) {
initDone = false;
return;
}
initDone = true;
}

View File

@ -4,9 +4,17 @@
#include <source-selection.hpp>
#include <scene-selection.hpp>
#include <regex-config.hpp>
#include <variable.hpp>
#include <obs.hpp>
#include <obs-module.h>
#include <QMetaType>
#ifdef OCR_SUPPORT
#include <tesseract/baseapi.h>
#endif
enum class VideoCondition {
MATCH,
DIFFER,
@ -16,6 +24,7 @@ enum class VideoCondition {
PATTERN,
OBJECT,
BRIGHTNESS,
OCR,
};
class VideoInput {
@ -48,7 +57,7 @@ public:
double threshold = 0.8;
};
class ObjDetectParamerts {
class ObjDetectParameters {
public:
bool Save(obs_data_t *obj) const;
bool Load(obs_data_t *obj);
@ -64,6 +73,36 @@ public:
advss::Size maxSize{0, 0};
};
class OCRParameters {
public:
OCRParameters();
~OCRParameters();
OCRParameters(const OCRParameters &other);
OCRParameters &operator=(const OCRParameters &);
bool Save(obs_data_t *obj) const;
bool Load(obs_data_t *obj);
bool Initialized() const { return initDone; }
void SetPageMode(tesseract::PageSegMode);
tesseract::PageSegMode GetPageMode() const { return pageSegMode; }
tesseract::TessBaseAPI *GetOCR() const { return ocr.get(); }
VariableResolvingString text =
obs_module_text("AdvSceneSwitcher.enterText");
RegexConfig regex = RegexConfig::PartialMatchRegexConfig();
QColor color = Qt::black;
private:
void Setup();
tesseract::PageSegMode pageSegMode = tesseract::PSM_SINGLE_BLOCK;
std::unique_ptr<tesseract::TessBaseAPI> ocr;
bool initDone = false;
};
Q_DECLARE_METATYPE(OCRParameters)
class AreaParamters {
public:
bool Save(obs_data_t *obj) const;

View File

@ -89,16 +89,16 @@ PreviewDialog::~PreviewDialog()
void PreviewDialog::ShowMatch()
{
Start();
_rubberBand->hide();
_type = PreviewType::SHOW_MATCH;
_rubberBand->hide();
Start();
}
void PreviewDialog::SelectArea()
{
_selectingArea = false;
Start();
_type = PreviewType::SELECT_AREA;
Start();
DrawFrame();
_statusLabel->setText(obs_module_text(
"AdvSceneSwitcher.condition.video.selectArea.status"));
@ -123,12 +123,18 @@ void PreviewDialog::PatternMatchParamtersChanged(
_patternImageData = createPatternData(_patternMatchParams.image);
}
void PreviewDialog::ObjDetectParamtersChanged(const ObjDetectParamerts &params)
void PreviewDialog::ObjDetectParamtersChanged(const ObjDetectParameters &params)
{
std::unique_lock<std::mutex> lock(_mtx);
_objDetectParams = params;
}
void PreviewDialog::OCRParamtersChanged(const OCRParameters &params)
{
std::unique_lock<std::mutex> lock(_mtx);
_ocrParams = params;
}
void PreviewDialog::VideoSelectionChanged(const VideoInput &video)
{
std::unique_lock<std::mutex> lock(_mtx);
@ -163,7 +169,7 @@ void PreviewDialog::UpdateImage(const QPixmap &image)
DrawFrame();
}
emit NeedImage(_video, _type, _patternMatchParams, _patternImageData,
_objDetectParams, _areaParams, _condition);
_objDetectParams, _ocrParams, _areaParams, _condition);
}
void PreviewDialog::Start()
@ -191,7 +197,7 @@ void PreviewDialog::Start()
_thread.start();
emit NeedImage(_video, _type, _patternMatchParams, _patternImageData,
_objDetectParams, _areaParams, _condition);
_objDetectParams, _ocrParams, _areaParams, _condition);
}
void PreviewDialog::DrawFrame()
@ -238,7 +244,8 @@ void markObjects(QImage &image, std::vector<cv::Rect> &objects)
void PreviewImage::CreateImage(const VideoInput &video, PreviewType type,
const PatternMatchParameters &patternMatchParams,
const PatternImageData &patternImageData,
ObjDetectParamerts objDetectParams,
ObjDetectParameters objDetectParams,
OCRParameters ocrParams,
const AreaParamters &areaParams,
VideoCondition condition)
{
@ -268,7 +275,8 @@ void PreviewImage::CreateImage(const VideoInput &video, PreviewType type,
}
// Will emit status label update
MarkMatch(screenshot.image, patternMatchParams,
patternImageData, objDetectParams, condition);
patternImageData, objDetectParams, ocrParams,
condition);
} else {
emit StatusUpdate("");
}
@ -278,7 +286,8 @@ void PreviewImage::CreateImage(const VideoInput &video, PreviewType type,
void PreviewImage::MarkMatch(QImage &screenshot,
const PatternMatchParameters &patternMatchParams,
const PatternImageData &patternImageData,
ObjDetectParamerts &objDetectParams,
ObjDetectParameters &objDetectParams,
const OCRParameters &ocrParams,
VideoCondition condition)
{
if (condition == VideoCondition::PATTERN) {
@ -309,5 +318,13 @@ void PreviewImage::MarkMatch(QImage &screenshot,
"AdvSceneSwitcher.condition.video.objectMatchSuccess"));
markObjects(screenshot, objects);
}
} else if (condition == VideoCondition::OCR) {
auto text =
runOCR(ocrParams.GetOCR(), screenshot, ocrParams.color);
QString status(obs_module_text(
"AdvSceneSwitcher.condition.video.ocrMatchSuccess"));
emit StatusUpdate(status.arg(QString::fromStdString(text)));
// TODO: show preprocessed image
}
}

View File

@ -21,16 +21,16 @@ class PreviewImage : public QObject {
public slots:
void CreateImage(const VideoInput &, PreviewType,
const PatternMatchParameters &,
const PatternImageData &, ObjDetectParamerts,
const AreaParamters &, VideoCondition);
const PatternImageData &, ObjDetectParameters,
OCRParameters, const AreaParamters &, VideoCondition);
signals:
void ImageReady(const QPixmap &);
void StatusUpdate(const QString &);
private:
void MarkMatch(QImage &screenshot, const PatternMatchParameters &,
const PatternImageData &, ObjDetectParamerts &,
VideoCondition);
const PatternImageData &, ObjDetectParameters &,
const OCRParameters &, VideoCondition);
};
class PreviewDialog : public QDialog {
@ -46,7 +46,8 @@ public:
public slots:
void PatternMatchParamtersChanged(const PatternMatchParameters &);
void ObjDetectParamtersChanged(const ObjDetectParamerts &);
void ObjDetectParamtersChanged(const ObjDetectParameters &);
void OCRParamtersChanged(const OCRParameters &);
void VideoSelectionChanged(const VideoInput &);
void AreaParamtersChanged(const AreaParamters &);
void ConditionChanged(int cond);
@ -57,8 +58,8 @@ signals:
void SelectionAreaChanged(QRect area);
void NeedImage(const VideoInput &, PreviewType,
const PatternMatchParameters &, const PatternImageData &,
ObjDetectParamerts, const AreaParamters &,
VideoCondition);
ObjDetectParameters, OCRParameters,
const AreaParamters &, VideoCondition);
private:
void Start();
@ -71,7 +72,8 @@ private:
VideoInput _video;
PatternMatchParameters _patternMatchParams;
PatternImageData _patternImageData;
ObjDetectParamerts _objDetectParams;
ObjDetectParameters _objDetectParams;
OCRParameters _ocrParams;
AreaParamters _areaParams;
VideoCondition _condition = VideoCondition::PATTERN;

View File