Chat-monitor: Centralize functions and migrate to JSON (#8328)

This commit is contained in:
Mia 2021-05-30 00:11:32 -05:00 committed by GitHub
parent 487fc5e3fa
commit 5f9072b2ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 168 additions and 131 deletions

View File

@ -1,7 +1,8 @@
import {FS, Utils} from '../../lib';
import type {FilterWord} from '../chat';
const MONITOR_FILE = 'config/chat-plugins/chat-monitor.tsv';
const LEGACY_MONITOR_FILE = 'config/chat-plugins/chat-monitor.tsv';
const MONITOR_FILE = 'config/chat-plugins/chat-filter.json';
const WRITE_THROTTLE_TIME = 5 * 60 * 1000;
// Substitution dictionary adapted from https://github.com/ThreeLetters/NoSwearingPlease/blob/master/index.js
@ -43,75 +44,156 @@ const EVASION_DETECTION_SUBSTITUTIONS: {[k: string]: string[]} = {
z: ["z", "ᘔ", "Z", "ⓩ", "Ⓩ", "Ⱬ", "ẓ", "Ẓ", "ፚ", "", "ʐ", "", "", "", "🅩", "𝐳", "𝐙", "𝘻", "𝘡", "𝙯", "𝙕", "𝓏", "𝔃", "𝓩", "𝕫", "𝕋", "𝔷", "𝔙", "𝖟", "𝖅", "🅉", "🆉", "𝒵", "ȥ", "𝚣", "𝚉", "☡", "z"],
};
const EVASION_DETECTION_SUB_STRINGS: {[k: string]: string} = {};
export const Filters = new class {
readonly EVASION_DETECTION_SUB_STRINGS: {[k: string]: string} = {};
constructor() {
for (const letter in EVASION_DETECTION_SUBSTITUTIONS) {
this.EVASION_DETECTION_SUB_STRINGS[letter] = `[${EVASION_DETECTION_SUBSTITUTIONS[letter].join('')}]`;
}
this.load();
}
constructEvasionRegex(str: string) {
const buf = "\\b" +
[...str].map(letter => (this.EVASION_DETECTION_SUB_STRINGS[letter] || letter) + '+').join('\\.?') +
"\\b";
return new RegExp(buf, 'iu');
}
for (const letter in EVASION_DETECTION_SUBSTITUTIONS) {
EVASION_DETECTION_SUB_STRINGS[letter] = `[${EVASION_DETECTION_SUBSTITUTIONS[letter].join('')}]`;
}
generateRegex(word: string, isEvasion = false, isShortener = false, isReplacement = false) {
try {
if (isEvasion) {
return this.constructEvasionRegex(word);
} else {
return new RegExp((isShortener ? `\\b${word}` : word), (isReplacement ? 'igu' : 'iu'));
}
} catch (e) {
throw new Chat.ErrorMessage(
e.message.startsWith('Invalid regular expression: ') ? e.message : `Invalid regular expression: /${word}/: ${e.message}`
);
}
}
stripWordBoundaries(regex: RegExp) {
return new RegExp(regex.toString().replace('/\\b', '').replace('\\b/iu', ''), 'iu');
}
save(force = false) {
FS(MONITOR_FILE).writeUpdate(() => {
const buf: {[k: string]: FilterWord[]} = {};
for (const key in Chat.monitors) {
buf[key] = [];
for (const filterWord of filterWords[key]) {
const word = {...filterWord};
delete (word as any).regex; // no reason to save this. does not stringify.
buf[key].push(word);
}
}
return JSON.stringify(buf);
}, {throttle: force ? 0 : WRITE_THROTTLE_TIME});
}
add(filterWord: Partial<Chat.FilterWord> & {list: string, word: string}) {
if (!filterWord.hits) filterWord.hits = 0;
const punishment = Chat.monitors[filterWord.list].punishment;
if (!filterWord.regex) {
filterWord.regex = this.generateRegex(
filterWord.word,
punishment === 'EVASION',
punishment === 'SHORTENER',
!!filterWord.replacement,
);
}
if (filterWords[filterWord.list].some(val => String(val.regex) === String(filterWord.regex))) {
throw new Chat.ErrorMessage(`${filterWord.word} is already added to the ${filterWord.list} list.`);
}
filterWords[filterWord.list].push(filterWord as Chat.FilterWord);
this.save(true);
}
load() {
const legacy = FS(LEGACY_MONITOR_FILE);
if (legacy.existsSync()) {
return process.nextTick(() => {
this.loadLegacy();
legacy.renameSync(LEGACY_MONITOR_FILE + '.backup');
Monitor.notice(`Legacy chatfilter data loaded and renamed to a .backup file.`);
});
}
const data = JSON.parse(FS(MONITOR_FILE).readIfExistsSync() || "{}");
for (const k in data) {
filterWords[k] = [];
// previously, this checked to be sure the monitor existed in Chat.monitors and that there was
// a proper `[LOCATION, PUNISHMENT]` pair. Now, we do not do that, as a frequent issue with the TSV was that
// plugins with monitors would not be loaded into Chat before the filter words started loading.
// as such, they would crash, and usually it would lead to the words being overwritten and lost altogether
// Therefore, instead of throwing if it isn't found, we just add it to the list anyway.
// either a) the monitor will be loaded later, and all will be well
// or b) the monitor doesn't exist anymore,
// in which case it can either be deleted manually or the data will be fine if the monitor is re-added later
for (const entry of data[k]) {
if (entry.punishment === 'EVASION') {
entry.regex = this.constructEvasionRegex(entry.word);
} else {
entry.regex = new RegExp(
entry.punishment === 'SHORTENER' ? `\\b${entry.word}` : entry.word,
entry.replacement ? 'igu' : 'iu'
);
}
filterWords[k].push(entry);
}
}
}
loadLegacy() {
let data;
try {
data = FS(LEGACY_MONITOR_FILE).readSync();
} catch (e) {
if (e.code !== 'ENOENT') throw e;
}
if (!data) return;
const lines = data.split('\n');
loop: for (const line of lines) {
if (!line || line === '\r') continue;
const [location, word, punishment, reason, times, ...rest] = line.split('\t').map(param => param.trim());
if (location === 'Location') continue;
if (!(location && word && punishment)) continue;
for (const key in Chat.monitors) {
if (Chat.monitors[key].location === location && Chat.monitors[key].punishment === punishment) {
const replacement = rest[0];
const publicReason = rest[1];
let regex: RegExp;
if (punishment === 'EVASION') {
regex = Filters.constructEvasionRegex(word);
} else {
regex = new RegExp(punishment === 'SHORTENER' ? `\\b${word}` : word, replacement ? 'igu' : 'iu');
}
const filterWord: FilterWord = {regex, word, hits: parseInt(times) || 0};
// "undefined" is the result of an issue with filter storage.
// As far as I'm aware, nothing is actually filtered with "undefined" as the reason.
if (reason && reason !== "undefined") filterWord.reason = reason;
if (publicReason) filterWord.publicReason = publicReason;
if (replacement) filterWord.replacement = replacement;
filterWords[key].push(filterWord);
continue loop;
}
}
// this is not thrown because we DO NOT WANT SECRET FILTERS TO BE LEAKED, but we want this to be known
// (this sends the filter line info only in the email, but still reports the crash to Dev)
Monitor.crashlog(new Error("Couldn't find [location, punishment] pair for a filter word"), "The main process", {
location, word, punishment, reason, times, rest,
});
}
}
};
const filterWords: {[k: string]: Chat.FilterWord[]} = Chat.filterWords;
export function constructEvasionRegex(str: string) {
const buf = "\\b" +
[...str].map(letter => (EVASION_DETECTION_SUB_STRINGS[letter] || letter) + '+').join('\\.?') +
"\\b";
return new RegExp(buf, 'iu');
}
export function stripWordBoundaries(regex: RegExp) {
return new RegExp(regex.toString().replace('/\\b', '').replace('\\b/iu', ''), 'iu');
}
function renderEntry(location: string, word: Chat.FilterWord, punishment: string) {
return `${location}\t${word.word}\t${punishment}\t${word.reason || ''}\t${word.hits}\t${word.replacement || ''}\t${word.publicReason || ''}\r\n`;
}
function saveFilters(force = false) {
FS(MONITOR_FILE).writeUpdate(() => {
let buf = 'Location\tWord\tPunishment\tReason\tTimes\r\n';
for (const key in Chat.monitors) {
buf += filterWords[key].map(
word => renderEntry(Chat.monitors[key].location, word, Chat.monitors[key].punishment)
).join('');
}
return buf;
}, {throttle: force ? 0 : WRITE_THROTTLE_TIME});
}
export function addFilter(filterWord: Partial<Chat.FilterWord> & {list: string, word: string}) {
if (!filterWord.hits) filterWord.hits = 0;
const punishment = Chat.monitors[filterWord.list].punishment;
if (!filterWord.regex) {
filterWord.regex = generateRegex(
filterWord.word,
punishment === 'EVASION',
punishment === 'SHORTENER',
!!filterWord.replacement,
);
}
if (filterWords[filterWord.list].some(val => String(val.regex) === String(filterWord.regex))) {
throw new Chat.ErrorMessage(`${filterWord.word} is already added to the ${filterWord.list} list.`);
}
filterWords[filterWord.list].push(filterWord as Chat.FilterWord);
saveFilters(true);
}
export function generateRegex(word: string, isEvasion = false, isShortener = false, isReplacement = false) {
try {
if (isEvasion) {
return constructEvasionRegex(word);
} else {
return new RegExp((isShortener ? `\\b${word}` : word), (isReplacement ? 'igu' : 'iu'));
}
} catch (e) {
throw new Chat.ErrorMessage(
e.message.startsWith('Invalid regular expression: ') ? e.message : `Invalid regular expression: /${word}/: ${e.message}`
);
}
}
// Register the chat monitors used
Chat.registerMonitor('autolock', {
location: 'EVERYWHERE',
@ -285,50 +367,6 @@ Chat.registerMonitor('shorteners', {
* Punishment: AUTOLOCK, WARN, FILTERTO, SHORTENER, MUTE, EVASION
*/
export function loadFilters() {
let data;
try {
data = FS(MONITOR_FILE).readSync();
} catch (e) {
if (e.code !== 'ENOENT') throw e;
}
if (!data) return;
const lines = data.split('\n');
loop: for (const line of lines) {
if (!line || line === '\r') continue;
const [location, word, punishment, reason, times, ...rest] = line.split('\t').map(param => param.trim());
if (location === 'Location') continue;
if (!(location && word && punishment)) continue;
for (const key in Chat.monitors) {
if (Chat.monitors[key].location === location && Chat.monitors[key].punishment === punishment) {
const replacement = rest[0];
const publicReason = rest[1];
let regex: RegExp;
if (punishment === 'EVASION') {
regex = constructEvasionRegex(word);
} else {
regex = new RegExp(punishment === 'SHORTENER' ? `\\b${word}` : word, replacement ? 'igu' : 'iu');
}
const filterWord: FilterWord = {regex, word, hits: parseInt(times) || 0};
// "undefined" is the result of an issue with filter storage.
// As far as I'm aware, nothing is actually filtered with "undefined" as the reason.
if (reason && reason !== "undefined") filterWord.reason = reason;
if (publicReason) filterWord.publicReason = publicReason;
if (replacement) filterWord.replacement = replacement;
filterWords[key].push(filterWord);
continue loop;
}
}
// this is not thrown because we DO NOT WANT SECRET FILTERS TO BE LEAKED, but we want this to be known
// (this sends the filter line info only in the email, but still reports the crash to Dev)
Monitor.crashlog(new Error("Couldn't find [location, punishment] pair for a filter word"), "The main process", {
location, word, punishment, reason, times, rest,
});
}
}
/* The sucrase transformation of optional chaining is too expensive to be used in a hot function like this. */
/* eslint-disable @typescript-eslint/prefer-optional-chain */
export const chatfilter: Chat.ChatFilter = function (message, user, room) {
@ -368,7 +406,7 @@ export const chatfilter: Chat.ChatFilter = function (message, user, room) {
const ret = monitor.call(this, line, room, user, message, lcMessage, isStaff);
if (ret !== undefined && ret !== message) {
line.hits++;
saveFilters();
Filters.save();
}
if (typeof ret === 'string') {
message = ret;
@ -406,7 +444,7 @@ export const namefilter: Chat.NameFilter = (name, user) => {
if (Chat.monitors[list].location === 'BATTLES') continue;
const punishment = Chat.monitors[list].punishment;
for (const line of filterWords[list]) {
const regex = (punishment === 'EVASION' ? stripWordBoundaries(line.regex) : line.regex);
const regex = (punishment === 'EVASION' ? Filters.stripWordBoundaries(line.regex) : line.regex);
if (regex.test(lcName)) {
if (Chat.monitors[list].punishment === 'AUTOLOCK') {
void Punishments.autolock(
@ -415,7 +453,7 @@ export const namefilter: Chat.NameFilter = (name, user) => {
);
}
line.hits++;
saveFilters();
Filters.save();
return '';
}
}
@ -458,7 +496,7 @@ export const nicknamefilter: Chat.NicknameFilter = (name, user) => {
// Evasion banwords by default require whitespace on either side.
// If we didn't remove it here, it would be quite easy to evade the filter
// and use slurs in Pokémon nicknames.
regex = stripWordBoundaries(regex);
regex = Filters.stripWordBoundaries(regex);
}
const match = regex.exec(lcName);
@ -476,7 +514,7 @@ export const nicknamefilter: Chat.NicknameFilter = (name, user) => {
);
}
line.hits++;
saveFilters();
Filters.save();
return '';
}
}
@ -503,7 +541,7 @@ export const statusfilter: Chat.StatusFilter = (status, user) => {
for (const list in filterWords) {
const punishment = Chat.monitors[list].punishment;
for (const line of filterWords[list]) {
const regex = (punishment === 'EVASION' ? stripWordBoundaries(line.regex) : line.regex);
const regex = (punishment === 'EVASION' ? Filters.stripWordBoundaries(line.regex) : line.regex);
if (regex.test(lcStatus)) {
if (punishment === 'AUTOLOCK') {
// I'm only locking for true autolock phrases, not evasion of slurs
@ -515,7 +553,7 @@ export const statusfilter: Chat.StatusFilter = (status, user) => {
);
}
line.hits++;
saveFilters();
Filters.save();
return '';
}
}
@ -598,7 +636,7 @@ export const commands: Chat.ChatCommands = {
}
filterWord.word = filterWord.word.trim();
addFilter(filterWord);
Filters.add(filterWord);
const reason = filterWord.reason ? ` (${filterWord.reason})` : '';
if (Chat.monitors[list].punishment === 'FILTERTO') {
this.globalModlog(`ADDFILTER`, null, `'${String(filterWord.regex)} => ${filterWord.replacement}' to ${list} list${reason}`);
@ -628,7 +666,7 @@ export const commands: Chat.ChatCommands = {
filterWords[list] = filterWords[list].filter(entry => !words.includes(entry.word));
this.globalModlog(`REMOVEFILTER`, null, `'${words.join(', ')}' from ${list} list`);
saveFilters(true);
Filters.save(true);
const output = `'${words.join(', ')}' ${Chat.plural(words, "were", "was")} removed from the ${list} list.`;
Rooms.get('upperstaff')?.add(output).update();
if (room?.roomid !== 'upperstaff') this.sendReply(output);
@ -726,5 +764,4 @@ export const commands: Chat.ChatCommands = {
process.nextTick(() => {
Chat.multiLinePattern.register('/filter (add|remove) ');
loadFilters();
});

View File

@ -8,28 +8,28 @@
const assert = require('assert').strict;
const {makeUser} = require('../../users-utils');
const chatMonitor = require('../../../.server-dist/chat-plugins/chat-monitor');
const {Filters} = require('../../../.server-dist/chat-plugins/chat-monitor');
describe('Chat monitor', () => {
describe('regex generator', () => {
it('should generate case-insensitive regexes', () => {
const regex = chatMonitor.generateRegex('slur');
const regex = Filters.generateRegex('slur');
assert(regex.flags.includes('i'));
});
it('should use word boundaries for URL shortener regexes', () => {
const regex = chatMonitor.generateRegex('bit.ly/', false, true);
const regex = Filters.generateRegex('bit.ly/', false, true);
assert(String(regex).startsWith('/\\b'));
});
it('should correctly strip word boundaries', () => {
const regex = /\btest\b/iu;
assert.deepEqual(chatMonitor.stripWordBoundaries(regex), /test/iu);
assert.deepEqual(Filters.stripWordBoundaries(regex), /test/iu);
});
describe('evasion regexes', () => {
before(() => {
this.evasionRegex = chatMonitor.generateRegex('slur', true);
this.evasionRegex = Filters.generateRegex('slur', true);
});
it('should account for stretching', () => {
@ -72,7 +72,7 @@ describe('Chat monitor', () => {
it('should lock users who use autolock phrases', async () => {
assert(!this.user.locked);
chatMonitor.addFilter({
Filters.add({
word: 'autolock',
list: 'autolock',
});
@ -85,7 +85,7 @@ describe('Chat monitor', () => {
it('should lock users who evade evasion phrases', async () => {
assert(!this.user.locked);
chatMonitor.addFilter({
Filters.add({
word: 'slur',
list: 'evasion',
});
@ -97,7 +97,7 @@ describe('Chat monitor', () => {
it('should replace words filtered to other words', async () => {
assert(!this.user.locked);
chatMonitor.addFilter({
Filters.add({
word: 'replace me',
list: 'wordfilter',
replacement: 'i got replaced',
@ -112,7 +112,7 @@ describe('Chat monitor', () => {
it('should prevent filtered words from being said', async () => {
assert(!this.user.locked);
chatMonitor.addFilter({
Filters.add({
word: 'mild slur',
list: 'warn',
});
@ -122,12 +122,12 @@ describe('Chat monitor', () => {
});
it('should prevent banwords and evasion banwords from being used in usernames', () => {
chatMonitor.addFilter({
Filters.add({
word: 'nameslur',
list: 'warn',
});
chatMonitor.addFilter({
Filters.add({
word: 'strongnameslur',
list: 'evasion',
});