mirror of
https://github.com/smogon/pokemon-showdown.git
synced 2026-05-15 08:50:10 -05:00
* Lint arrow-body-style * Lint prefer-object-spread Object spread is faster _and_ more readable. This also fixes a few unnecessary object clones. * Enable no-parameter-properties This isn't currently used, but this makes clear that it shouldn't be. * Refactor more Promises to async/await * Remove unnecessary code from getDataMoveHTML etc * Lint prefer-string-starts-ends-with * Stop using no-undef According to the typescript-eslint FAQ, this is redundant with TypeScript, and they're not wrong. This will save us from needing to specify globals in two different places which will be nice.
317 lines
9.1 KiB
TypeScript
317 lines
9.1 KiB
TypeScript
/**
|
|
* Dashycode!
|
|
*
|
|
* Encodes a string in a restricted string containing only alphanumeric
|
|
* characters and dashes.
|
|
*
|
|
* (The name is a riff on Punycode, which is what I originally wanted
|
|
* to use for this purpose, but it turns out Punycode does not work on
|
|
* arbitrary strings.)
|
|
*
|
|
* @author Guangcong Luo <guangcongluo@gmail.com>
|
|
* @license MIT
|
|
*/
|
|
|
|
const CODE_MAP = "23456789abcdefghijkmnpqrstuvwxyz";
|
|
const UNSAFE_MAP = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
|
|
|
|
// chunk types:
|
|
// 0b00 = 0x0 = block separator ("wait until next dash")
|
|
// 0b01 = 0x1 = capitalize
|
|
// 0b1_01 = 0x5 = specify capitalization of next 8 alphabetic chars of safe part (8 more bits)
|
|
// 0b0_01 = 0x1 = short for 0b00000001_1_01 (capitalize first letter of next 8 alpha chars of safe part)
|
|
// 0b10 = 0x2 = ASCII in UNSAFE_MAP (5 more bits)
|
|
// 0b11 = 0x3 = other
|
|
// 0b0_11 = 0x3 = space
|
|
// 0b1_11 = 0x7 = UTF-16 (16 more bits)
|
|
|
|
/**
|
|
* An object representing a Dashycode bitstream.
|
|
* The stream can be either a read stream or a write stream, but not
|
|
* both simultaneously.
|
|
*/
|
|
interface DashyStream {
|
|
codeBuf: string;
|
|
buf: number;
|
|
bufLength: number;
|
|
}
|
|
|
|
function streamWrite(stream: DashyStream, writeBufLength: number, writeBuf: number) {
|
|
stream.buf += (writeBuf << stream.bufLength);
|
|
stream.bufLength += writeBufLength;
|
|
while (stream.bufLength >= 5) {
|
|
stream.codeBuf += CODE_MAP.charAt(stream.buf & 0x1F);
|
|
stream.buf >>= 5;
|
|
stream.bufLength -= 5;
|
|
}
|
|
}
|
|
|
|
function streamGetCode(stream: DashyStream) {
|
|
const buf = stream.codeBuf + CODE_MAP.charAt(stream.buf);
|
|
|
|
// truncate trailing `2`s (0b00000 chunks)
|
|
let end2Len = 0;
|
|
while (buf.charAt(buf.length - 1 - end2Len) === '2') end2Len++;
|
|
return end2Len ? buf.slice(0, -end2Len) : buf;
|
|
}
|
|
|
|
function streamPeek(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) {
|
|
while (stream.bufLength < readLength && stream.codeBuf.length) {
|
|
const next5Bits = CODE_MAP.indexOf(stream.codeBuf.charAt(0));
|
|
if (next5Bits < 0) throw new Error("Invalid character in coded buffer");
|
|
stream.codeBuf = stream.codeBuf.slice(1);
|
|
stream.buf += next5Bits << stream.bufLength;
|
|
stream.bufLength += 5;
|
|
}
|
|
return stream.buf & readMask;
|
|
}
|
|
|
|
function streamRead(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) {
|
|
const output = streamPeek(stream, readLength, readMask);
|
|
// Note: bufLength can go negative! Streams have infinite trailing 0s
|
|
stream.buf >>= readLength;
|
|
stream.bufLength -= readLength;
|
|
return output;
|
|
}
|
|
|
|
export function encode(str: string, allowCaps = false) {
|
|
if (!str) return '0--0';
|
|
let safePart = '';
|
|
const unsafeStream: DashyStream = {
|
|
codeBuf: '',
|
|
buf: 0x0,
|
|
bufLength: 0,
|
|
};
|
|
let isSafe = true;
|
|
let alphaIndex = 0;
|
|
let capBuffer = 0x0;
|
|
for (let i = 0; i < str.length + 1; i++) {
|
|
let curCharCode = i !== str.length ? str.charCodeAt(i) : -1;
|
|
const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z
|
|
const isUppercase = (65 <= curCharCode && curCharCode <= 90); // A-Z
|
|
const isNumeric = (48 <= curCharCode && curCharCode <= 57); // 0-9
|
|
if (capBuffer && (
|
|
!(isLowercase || isUppercase || isNumeric) ||
|
|
alphaIndex >= 8 ||
|
|
i === str.length
|
|
)) {
|
|
// flush cap buffer
|
|
if (capBuffer === 0xD) {
|
|
streamWrite(unsafeStream, 3, 0x1);
|
|
} else {
|
|
streamWrite(unsafeStream, 11, capBuffer);
|
|
}
|
|
alphaIndex -= 8;
|
|
capBuffer = 0x0;
|
|
}
|
|
if (i === str.length) break;
|
|
if (isLowercase || isUppercase || isNumeric) {
|
|
if (alphaIndex < 0) throw new Error("alphaIndex should be non-negative here");
|
|
if (!isSafe) {
|
|
if (capBuffer) throw new Error("capBuffer shouldn't exist here");
|
|
streamWrite(unsafeStream, 2, 0x0);
|
|
isSafe = true;
|
|
}
|
|
if (isUppercase && !allowCaps) {
|
|
safePart += String.fromCharCode(curCharCode + 32);
|
|
while (alphaIndex >= 8) {
|
|
if (capBuffer) throw new Error("capBuffer shouldn't exist here");
|
|
alphaIndex -= 8;
|
|
streamWrite(unsafeStream, 11, 0x5);
|
|
}
|
|
if (!capBuffer) capBuffer = 0x5;
|
|
capBuffer += 1 << (alphaIndex + 3);
|
|
} else {
|
|
safePart += str.charAt(i);
|
|
}
|
|
if (isUppercase || isLowercase) alphaIndex++;
|
|
continue;
|
|
}
|
|
if (capBuffer) throw new Error("capBuffer shouldn't exist here");
|
|
alphaIndex = 0;
|
|
if (isSafe && curCharCode === 32) { // space
|
|
const nextCharCode = str.charCodeAt(i + 1);
|
|
if ((97 <= nextCharCode && nextCharCode <= 122) || // a-z
|
|
(65 <= nextCharCode && nextCharCode <= 90) || // A-Z
|
|
(48 <= nextCharCode && nextCharCode <= 57)) { // 0-9
|
|
safePart += '-';
|
|
streamWrite(unsafeStream, 2, 0x0);
|
|
continue;
|
|
}
|
|
}
|
|
if (isSafe) {
|
|
safePart += '-';
|
|
isSafe = false;
|
|
}
|
|
let unsafeMapIndex = -1;
|
|
if (curCharCode === -1) {
|
|
streamWrite(unsafeStream, 2, 0x0);
|
|
} else if (curCharCode === 32) { // space
|
|
streamWrite(unsafeStream, 3, 0x3);
|
|
// tslint:disable-next-line:no-conditional-assignment
|
|
} else if ((unsafeMapIndex = UNSAFE_MAP.indexOf(str.charAt(i))) >= 0) {
|
|
curCharCode = (unsafeMapIndex << 2) + 0x2;
|
|
streamWrite(unsafeStream, 7, curCharCode);
|
|
} else {
|
|
curCharCode = (curCharCode << 3) + 0x7;
|
|
streamWrite(unsafeStream, 19, curCharCode);
|
|
}
|
|
}
|
|
let unsafePart = streamGetCode(unsafeStream);
|
|
if (safePart.startsWith('-')) {
|
|
safePart = safePart.slice(1);
|
|
unsafePart = unsafePart + '2';
|
|
}
|
|
if (safePart.endsWith('-')) {
|
|
safePart = safePart.slice(0, -1);
|
|
}
|
|
if (!safePart) {
|
|
safePart = '0';
|
|
unsafePart = '0' + unsafePart;
|
|
if (unsafePart.endsWith('2')) unsafePart = unsafePart.slice(0, -1);
|
|
}
|
|
if (!unsafePart) return safePart;
|
|
return safePart + '--' + unsafePart;
|
|
}
|
|
|
|
export function decode(codedStr: string) {
|
|
let str = '';
|
|
let lastDashIndex = codedStr.lastIndexOf('--');
|
|
if (lastDashIndex < 0) {
|
|
// the regular decoder can also handle this case; but this should
|
|
// be faster
|
|
return codedStr.replace(/-/g, ' ');
|
|
}
|
|
if (codedStr.charAt(lastDashIndex + 2) === '0') {
|
|
if (!codedStr.startsWith('0') || lastDashIndex !== 1) {
|
|
throw new Error("Invalid Dashycode");
|
|
}
|
|
lastDashIndex -= 1;
|
|
codedStr = '--' + codedStr.slice(4);
|
|
}
|
|
if (codedStr.endsWith('2')) {
|
|
codedStr = '-' + codedStr.slice(0, -1);
|
|
lastDashIndex += 1;
|
|
}
|
|
const unsafeStream: DashyStream = {
|
|
codeBuf: codedStr.slice(lastDashIndex + 2),
|
|
buf: 0x0,
|
|
bufLength: 0,
|
|
};
|
|
/**
|
|
* Status:
|
|
* 1 : awaiting next read
|
|
* 0 : assume all-lowercase
|
|
* other: 1 followed by n bits, describing the capitalization of the
|
|
* next n bits of alphabetic characters
|
|
*/
|
|
let capBuffer = 1;
|
|
for (let i = 0; i < lastDashIndex + 1; i++) {
|
|
let curChar = codedStr.charAt(i);
|
|
if (curChar !== '-') {
|
|
// safe char
|
|
const curCharCode = codedStr.charCodeAt(i);
|
|
const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z
|
|
if (isLowercase) {
|
|
if (capBuffer === 1) {
|
|
capBuffer = 0;
|
|
if (streamPeek(unsafeStream, 2, 0x3) === 0x1) {
|
|
switch (streamRead(unsafeStream, 3, 0x7)) {
|
|
case 0x5:
|
|
capBuffer = streamRead(unsafeStream, 8, 0xFF) + 0x100;
|
|
break;
|
|
case 0x1:
|
|
capBuffer = 0x101;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
const toCapitalize = capBuffer & 0x1;
|
|
capBuffer >>= 1;
|
|
if (toCapitalize) {
|
|
curChar = String.fromCharCode(curCharCode - 32);
|
|
}
|
|
}
|
|
str += curChar;
|
|
} else {
|
|
capBuffer = 1;
|
|
// pull out the next unsafe string
|
|
let isEmpty = true;
|
|
do {
|
|
switch (streamRead(unsafeStream, 2, 0x3)) {
|
|
case 0x0:
|
|
// go back to parsing safe chars
|
|
curChar = '';
|
|
break;
|
|
case 0x1:
|
|
throw new Error("Invalid capitalization token");
|
|
case 0x2:
|
|
curChar = UNSAFE_MAP.charAt(streamRead(unsafeStream, 5, 0x1F));
|
|
isEmpty = false;
|
|
break;
|
|
case 0x3:
|
|
if (streamRead(unsafeStream, 1, 0x1)) {
|
|
curChar = String.fromCharCode(streamRead(unsafeStream, 16, 0xFFFF));
|
|
} else {
|
|
curChar = ' ';
|
|
}
|
|
isEmpty = false;
|
|
break;
|
|
}
|
|
str += curChar;
|
|
} while (curChar);
|
|
if (isEmpty && i !== lastDashIndex) str += ' ';
|
|
}
|
|
}
|
|
return str;
|
|
}
|
|
|
|
export function vizStream(codeBuf: string, translate = true) {
|
|
let spacedStream = '';
|
|
if (codeBuf.startsWith('0')) {
|
|
codeBuf = codeBuf.slice(1);
|
|
spacedStream = ' [no safe chars]' + spacedStream;
|
|
}
|
|
if (codeBuf.endsWith('2')) {
|
|
codeBuf = codeBuf.slice(0, -1);
|
|
spacedStream = ' [start unsafe]' + spacedStream;
|
|
}
|
|
const stream: DashyStream = {
|
|
codeBuf,
|
|
buf: 0x0,
|
|
bufLength: 0,
|
|
};
|
|
|
|
function vizBlock(s: DashyStream, bufLen: number) {
|
|
const buf = streamRead(s, bufLen);
|
|
// @ts-ignore
|
|
return buf.toString(2).padStart(bufLen, '0');
|
|
}
|
|
|
|
while (stream.bufLength > 0 || stream.codeBuf) {
|
|
switch (streamRead(stream, 2)) {
|
|
case 0x0:
|
|
spacedStream = (translate ? ' |' : ' 00') + spacedStream;
|
|
break;
|
|
case 0x1:
|
|
if (streamRead(stream, 1)) {
|
|
spacedStream = ' ' + vizBlock(stream, 8) + (translate ? '-cap' : '_1_01') + spacedStream;
|
|
} else {
|
|
spacedStream = (translate ? ' capfirst' : ' 0_01') + spacedStream;
|
|
}
|
|
break;
|
|
case 0x2:
|
|
spacedStream = ' ' + vizBlock(stream, 5) + (translate ? '-ascii' : '_10') + spacedStream;
|
|
break;
|
|
case 0x3:
|
|
if (streamRead(stream, 1)) {
|
|
spacedStream = ' ' + vizBlock(stream, 16) + (translate ? '-utf' : '_1_11') + spacedStream;
|
|
} else {
|
|
spacedStream = (translate ? ' space' : ' 0_11') + spacedStream;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return spacedStream;
|
|
}
|