Update cipt parsing (#5712)

* refactor

* move thing out

* write unit tests

* get thing to work

* optimization?

* fix build failure
This commit is contained in:
RickyRister 2025-03-14 18:44:13 -07:00 committed by GitHub
parent 068465143b
commit 3a11ccb854
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 305 additions and 3 deletions

View File

@ -15,6 +15,7 @@ set(oracle_SOURCES
src/oraclewizard.cpp
src/oracleimporter.cpp
src/pagetemplates.cpp
src/parsehelpers.cpp
src/qt-json/json.cpp
../cockatrice/src/game/cards/card_database.cpp
../cockatrice/src/game/cards/card_database_manager.cpp

View File

@ -1,6 +1,7 @@
#include "oracleimporter.h"
#include "game/cards/card_database_parser/cockatrice_xml_4.h"
#include "parsehelpers.h"
#include "qt-json/json.h"
#include <QDebug>
@ -157,9 +158,7 @@ CardInfoPtr OracleImporter::addCard(QString name,
// DETECT CARD POSITIONING INFO
// cards that enter the field tapped
QRegularExpression ciptRegex("( it|" + QRegularExpression::escape(name) +
") enters( the battlefield)? tapped(?! unless)");
bool cipt = ciptRegex.match(text).hasMatch();
bool cipt = parseCipt(name, text);
bool landscapeOrientation = properties.value("maintype").toString() == "Battle" ||
properties.value("layout").toString() == "split" ||

View File

@ -0,0 +1,63 @@
#include "parsehelpers.h"
#include <QChar>
#include <QRegularExpression>
/**
* Parses the card text to determine if the card should have the cipt tag
*
* The parsing logic is able to handle the following cases:
* - "<name> enters tapped"
* - "<shortname> enters tapped", if the card name starts with the shortname
* - "This <type> enters tapped"
* - "..., it enters tapped"
* - Any naming scheme that appends a non-alphanumeric character plus extra text to the end of the name.
* (e.g. name is "Card Name_SET" or "Card Name (Set)" and text contains "Card Name enters tapped")
*
* However, it will still miss on certain cases:
* - shortnames that aren't the at the beginning of the card name
*
* Note that "...enters tapped unless..." returns false.
*
* @param name The name of the card
* @param text The oracle text of the card
*/
bool parseCipt(const QString &name, const QString &text)
{
// Try to split shortname on most non-alphanumeric characters (including _)
auto isShortnameDivider = [](const QChar &c) {
return c == '_' || (!c.isLetterOrNumber() && c != '\'' && c != '\"');
};
// Try all possible shortnames.
// This also handles the case of extra text appended at end.
QStringList possibleNames;
bool inAlphanumericPart = true;
for (int i = 0; i < name.length(); ++i) {
if (isShortnameDivider(name.at(i))) {
if (inAlphanumericPart) {
// only add to names on a "falling edge", in order to reduce the amount of redundant splits
possibleNames.append(QRegularExpression::escape(name.left(i)));
inAlphanumericPart = false;
}
} else {
inAlphanumericPart = true;
}
}
// and the full name
possibleNames.append(QRegularExpression::escape(name));
QString subject = "(it|" // "..., it enters tapped"
"(T|t)his [^ ]+|" // "This <type> enters tapped"
+ possibleNames.join("|") + ")";
auto ciptPattern = QRegularExpression(
// cipt phrase is either first sentence of line, or is after a punctuation mark
"(^|(, |\\. ))" + subject +
// support old wording, and exclude the "unless" case
" enters( the battlefield)? tapped(?! unless)",
QRegularExpression::MultilineOption);
return ciptPattern.match(text).hasMatch();
}

View File

@ -0,0 +1,8 @@
#ifndef PARSEHELPERS_H
#define PARSEHELPERS_H
#include <QString>
bool parseCipt(const QString &name, const QString &text);
#endif // PARSEHELPERS_H

View File

@ -51,3 +51,4 @@ target_link_libraries(password_hash_test cockatrice_common Threads::Threads ${GT
add_subdirectory(carddatabase)
add_subdirectory(loading_from_clipboard)
add_subdirectory(oracle)

View File

@ -0,0 +1,11 @@
add_executable(parse_cipt_test ../../oracle/src/parsehelpers.cpp parse_cipt_test.cpp)
if(NOT GTEST_FOUND)
add_dependencies(parse_cipt_test gtest)
endif()
set(TEST_QT_MODULES ${COCKATRICE_QT_VERSION_NAME}::Widgets)
target_link_libraries(parse_cipt_test cockatrice_common Threads::Threads ${GTEST_BOTH_LIBRARIES} ${TEST_QT_MODULES})
add_test(NAME parse_cipt_test COMMAND parse_cipt_test)

View File

@ -0,0 +1,219 @@
#include "../../oracle/src/parsehelpers.h"
#include "gtest/gtest.h"
TEST(ParseCiptTest, parsesThisEntersTapped)
{
auto name = "Boring Fields";
auto text = "This land enters tapped.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesThisEntersTheBattlefieldTapped)
{
auto name = "Boring Fields";
auto text = "This land enters the battlefield tapped.\n"
"{T}: Add {G}.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesItEntersTappedAtEndOfSentence)
{
auto name = "Shocking Fields";
auto text = "As this land enters, you may pay 2 life. If you don't, it enters tapped.\n"
"{T}: Add {G}.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesThisEntersTappedWhenNotOnFirstLine)
{
auto name = "Boring Fields";
auto text = "Flying\n"
"This land enters tapped.\n"
"{T}: Add {G}.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesFullNameWithUnderscoreAppendedText)
{
auto name = "Boring Fields_SL50";
auto text = "Boring Fields enters tapped.\n"
"{T}: Add {G}.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesFullNameWithBracketsAppendedText)
{
auto name = "Boring Fields (SL50)";
auto text = "Boring Fields enters tapped.\n"
"{T}: Add {G}.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesFullNameWithComma)
{
auto name = "Bob, the Legend";
auto text = "Bob, the Legend enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesFullNameWithCommaAtEndOfSentence)
{
auto name = "Bob, the Legend";
auto text = "As Bob, the Legend enters, you may pay 2 life. If you don't, Bob, the Legend enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesFullNameWithApostropheAtEndOfSentence)
{
auto name = "Bob's Bobber";
auto text = "As Bob's Bobber enters, you may pay 2 life. If you don't, Bob's Bobber enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesShortnameEndingWithComma)
{
auto name = "Bob, the Legend";
auto text = "Bob enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesShortnameEndingWithSpace)
{
auto name = "Bob the Legend";
auto text = "Bob enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithComma)
{
auto name = "Bob Dod, the Legend";
auto text = "Bob Dod enters tapped.\n"
"Whenever Bob Dod attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithSpace)
{
auto name = "Bob Dod the Legend";
auto text = "Bob Dod enters tapped.\n"
"Whenever Bob Dod attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesShortnameEndingWithSpaceWithUnderscoreAppendedText)
{
auto name = "Bob the Legend_SL50";
auto text = "Bob enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesShortnameEndingWithSpaceWithBracketsAppendedText)
{
auto name = "Bob the Legend (SL50)";
auto text = "Bob enters tapped.\n"
"Whenever Bob attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithSpaceWithUnderscoreAppendedText)
{
auto name = "Bob Dod the Legend_SL50";
auto text = "Bob Dod enters tapped.\n"
"Whenever Bob Dod attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesMultiWordShortnameEndingWithSpaceWithBracketsAppendedText)
{
auto name = "Bob Dod the Legend (SL50)";
auto text = "Bob Dod enters tapped.\n"
"Whenever Bob Dod attacks, you win the game.";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, rejectsEmptyText)
{
auto name = "Vanilla Dude";
auto text = "";
ASSERT_FALSE(parseCipt(name, text));
}
TEST(ParseCiptTest, rejectsEntersTappedUnless)
{
auto name = "Fast Fields";
auto text = "This land enters tapped unless you control another land.";
ASSERT_FALSE(parseCipt(name, text));
}
TEST(ParseCiptTest, rejectsWhenNameIsDifferent)
{
auto name = "Boring Fields";
auto text = "Fast Fields enters tapped.";
ASSERT_FALSE(parseCipt(name, text));
}
TEST(ParseCiptTest, rejectsOtherCreaturesEnterTapped)
{
auto name = "Imposing Guy";
auto text = "Other creatures enter tapped.";
ASSERT_FALSE(parseCipt(name, text));
}
TEST(ParseCiptTest, rejectsAbilityGrantingEntersTapped)
{
auto name = "Imposing Guy";
auto text = "Other creatures have \"This creature enters tapped\".";
ASSERT_FALSE(parseCipt(name, text));
}
TEST(ParseCiptTest, parsesEntersTappedAndAbilityGrantingEntersTappedOnSameCard)
{
auto name = "Imposing Guy";
auto text = "This creature enters tapped."
"Other creatures have \"This creature enters tapped\".";
ASSERT_TRUE(parseCipt(name, text));
}
TEST(ParseCiptTest, rejectsItEntersTappedAndAttacking)
{
auto name = "Token Maker";
auto text = "When Token Maker attacks, create a token. It enters tapped and attacking.";
ASSERT_FALSE(parseCipt(name, text));
}
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}