Improve smol compressor (#7073)

Co-authored-by: Hedara <hedara90@gmail.com>
This commit is contained in:
hedara90 2025-06-09 18:28:00 +02:00 committed by GitHub
parent bc864c2519
commit 260813fc3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 961 additions and 1210 deletions

View File

@ -319,7 +319,7 @@ clean-assets:
rm -f $(DATA_ASM_SUBDIR)/layouts/layouts.inc $(DATA_ASM_SUBDIR)/layouts/layouts_table.inc
rm -f $(DATA_ASM_SUBDIR)/maps/connections.inc $(DATA_ASM_SUBDIR)/maps/events.inc $(DATA_ASM_SUBDIR)/maps/groups.inc $(DATA_ASM_SUBDIR)/maps/headers.inc $(DATA_SRC_SUBDIR)/map_group_count.h
find sound -iname '*.bin' -exec rm {} +
find . \( -iname '*.1bpp' -o -iname '*.4bpp' -o -iname '*.8bpp' -o -iname '*.gbapal' -o -iname '*.lz' -o -iname '*.smol' -o -iname '*.fastSmol' -o -iname '*.rl' -o -iname '*.latfont' -o -iname '*.hwjpnfont' -o -iname '*.fwjpnfont' \) -exec rm {} +
find . \( -iname '*.1bpp' -o -iname '*.4bpp' -o -iname '*.8bpp' -o -iname '*.gbapal' -o -iname '*.lz' -o -iname '*.smol' -o -iname '*.fastSmol' -o -iname '*.smolTM' -o -iname '*.rl' -o -iname '*.latfont' -o -iname '*.hwjpnfont' -o -iname '*.fwjpnfont' \) -exec rm {} +
find $(DATA_ASM_SUBDIR)/maps \( -iname 'connections.inc' -o -iname 'events.inc' -o -iname 'header.inc' \) -exec rm {} +
tidy: tidymodern tidycheck tidydebug

View File

@ -1,14 +1,14 @@
CXX ?= g++
CXXFLAGS := -Werror -std=c++17 -pthread -O2 -Wunused
CXXFLAGS := -Werror -std=c++17 -O2
INCLUDES := -I .
SRCS := compresSmol.cpp compressAlgo.cpp tANS.cpp fileDispatcher.cpp
TILEMAP_SRCS := mainTiles.cpp compressAlgo.cpp compressSmolTiles.cpp tANS.cpp fileDispatcher.cpp
SRCS := compresSmol.cpp compressAlgo.cpp tANS.cpp
TILEMAP_SRCS := mainTiles.cpp compressSmolTiles.cpp tANS.cpp compressAlgo.cpp
HEADERS := compressAlgo.h tANS.h fileDispatcher.h
TILEMAP_HEADERS := compressAlgo.h compressSmolTiles.h tANS.h fileDispatcher.h
HEADERS := compressAlgo.h tANS.h
TILEMAP_HEADERS := compressSmolTiles.h tANS.h compressAlgo.h
ifeq ($(OS),Windows_NT)
EXE := .exe

View File

@ -18,7 +18,6 @@ bool isNumber(std::string input)
}
enum Option {
ANALYZE,
WRITE,
FRAME_WRITE,
DECODE,
@ -31,15 +30,12 @@ int main(int argc, char *argv[])
bool printUsage = false;
std::string input;
std::string output;
int numThreads = 1;
InputSettings settings(true, true, true);
if (argc > 1)
{
std::string argument = argv[1];
if (argument.compare("-a") == 0)
option = ANALYZE;
else if (argument.compare("-w") == 0)
if (argument.compare("-w") == 0)
option = WRITE;
else if (argument.compare("-fw") == 0)
option = FRAME_WRITE;
@ -48,46 +44,12 @@ int main(int argc, char *argv[])
}
switch (option)
{
case ANALYZE:
if (argc > 2)
input = argv[2];
else
printUsage = true;
if (argc > 4)
{
std::string arg2 = argv[3];
std::string arg2arg = argv[4];
if (arg2.compare("-t") == 0 && isNumber(arg2arg))
numThreads = std::stoi(arg2arg.c_str());
}
if (argc > 7)
{
std::string setting1 = argv[5];
std::string setting2 = argv[6];
std::string setting3 = argv[7];
if (setting1.compare("true") == 0)
settings.canEncodeLO = true;
else if (setting1.compare("false") == 0)
settings.canEncodeLO = false;
else
fprintf(stderr, "Unrecognized setting1 \"%s\", defaulting to \"true\"\n", setting1.c_str());
if (setting2.compare("true") == 0)
settings.canEncodeSyms = true;
else if (setting2.compare("false") == 0)
settings.canEncodeSyms = false;
else
fprintf(stderr, "Unrecognized setting2 \"%s\", defaulting to \"true\"\n", setting2.c_str());
if (setting3.compare("true") == 0)
settings.canDeltaSyms = true;
else if (setting3.compare("false") == 0)
settings.canDeltaSyms = false;
else
fprintf(stderr, "Unrecognized setting3 \"%s\", defaulting to \"true\"\n", setting3.c_str());
}
break;
case FRAME_WRITE:
// Not implemented yet
fprintf(stderr, "Frame writing isn't implemented yet\n");
settings.useFrames = true;
option = WRITE;
return 1;
case WRITE:
if (argc > 3)
{
@ -141,10 +103,6 @@ int main(int argc, char *argv[])
if (printUsage)
{
printf("Usage:\n\
%s -a \"path/to/some/directory\"\n\
Analyses all images currently in .4bpp.lz format and compares with this compression.\n\
-t <number> can be appended to this mode to specify how many threads to use.\n\
\n\
%s -w \"path/to/some/file.4bpp\" \"path/to/some/file.4bpp.smol\"\
Compresses the first argument and writes the result to the second argument.\n\
These modes can also be appended with 4 true/false statements that control the following settings of the compression:\n\
@ -153,64 +111,11 @@ int main(int argc, char *argv[])
- If the compression instructions can be delta encoded.\n\
- If the raw symbols in the compression ca be delta encoded.\n\
%s -d \"path/to/some/file.4bpp.smol\" \"path/to/some/file.4bpp\"\n\
Decompresses the first argument and writes it to the second argument.", argv[0], argv[0], argv[0]);
Decompresses the first argument and writes it to the second argument.", argv[0], argv[0]);
return 0;
}
if (option == ANALYZE)
{
std::filesystem::path dirPath = input;
FileDispatcher dispatcher(dirPath);
if (!dispatcher.initFileList())
{
fprintf(stderr, "Failed to init file list\n");
return 1;
}
std::mutex dispatchMutex;
std::vector<CompressedImage> allImages;
std::mutex imageMutex;
settings.shouldCompare = true;
std::vector<std::thread> threads;
for (int i = 0; i < numThreads; i++)
{
threads.emplace_back(analyzeImages, &allImages, &imageMutex,
&dispatcher, &dispatchMutex,
settings);
}
for (int i = 0; i < numThreads; i++)
threads[i].join();
size_t lzSizes = 0;
size_t newSizes = 0;
size_t rawSizes = 0;
size_t totalImages = 0;
size_t invalidImages = 0;
for (CompressedImage currImage : allImages)
{
totalImages++;
if (currImage.isValid)
{
lzSizes += currImage.lzSize;
newSizes += currImage.compressedSize;
rawSizes += currImage.rawNumBytes;
}
else
{
fprintf(stderr, "Failed to solve %s\n", currImage.fileName.c_str());
invalidImages++;
}
}
fprintf(stderr, "RawSize: %zu\n", rawSizes);
fprintf(stderr, "LZsize: %zu\n", lzSizes);
fprintf(stderr, "SmolSize: %zu\n", newSizes);
fprintf(stderr, "Total Images: %zu\n", totalImages);
fprintf(stderr, "Invalid Images: %zu\n", invalidImages);
}
if (option == WRITE)
{
if (std::filesystem::exists(input))
@ -240,8 +145,13 @@ int main(int argc, char *argv[])
{
if (std::filesystem::exists(input))
{
std::vector<unsigned int> inData = readFileAsUInt(input);
std::vector<unsigned short> image4bpp = readRawDataVecs(&inData);
std::vector<unsigned int> inData;
if (!readFileAsUInt(input, &inData))
{
return 0;
}
std::vector<unsigned short> image4bpp;
readRawDataVecs(&inData, &image4bpp);
std::vector<unsigned char> charVec(image4bpp.size()*2);
for (size_t i = 0; i < image4bpp.size(); i++)
{

File diff suppressed because it is too large Load Diff

View File

@ -54,20 +54,7 @@ struct ShortCopy {
size_t length;
size_t offset;
unsigned short firstSymbol;
std::vector<unsigned short> usSequence;
ShortCopy();
ShortCopy(size_t index, size_t length, size_t offset, std::vector<unsigned short> usSequence);
};
struct CompressionInstruction {
size_t length;
size_t offset;
size_t index;
unsigned char firstSymbol;
std::vector<unsigned char> symbols;
std::vector<unsigned char> bytes;
void buildBytes();
bool verifyInstruction();
ShortCopy(size_t index, size_t length, size_t offset, unsigned short firstSymbol);
};
struct ShortCompressionInstruction {
@ -75,20 +62,9 @@ struct ShortCompressionInstruction {
size_t offset;
size_t index;
unsigned short firstSymbol;
std::vector<unsigned short> symbols;
std::vector<unsigned char> loBytes;
std::vector<unsigned short> symShorts;
void buildBytes();
bool verifyInstruction();
};
struct SortedShortElement {
size_t index;
ShortCopy copy;
bool isRun = false;
bool isCopy = false;
SortedShortElement();
SortedShortElement(size_t index, ShortCopy copy);
void buildBytes(std::vector<unsigned short> *pInput);
};
struct CompressedImage {
@ -116,7 +92,6 @@ struct InputSettings {
bool canEncodeLO = true;
bool canEncodeSyms = true;
bool canDeltaSyms = true;
bool shouldCompare = false;
bool useFrames = false;
InputSettings();
InputSettings(bool canEncodeLO, bool canEncodeSyms, bool canDeltaSyms);
@ -127,45 +102,38 @@ struct DataVecs {
std::vector<unsigned short> symVec;
};
void analyzeImages(std::vector<CompressedImage> *allImages, std::mutex *imageMutex, FileDispatcher *dispatcher, std::mutex *dispatchMutex, InputSettings settings);
CompressedImage processImage(std::string fileName, InputSettings settings);
CompressedImage processImageFrames(std::string fileName, InputSettings settings);
CompressedImage processImageData(std::vector<unsigned char> input, InputSettings settings, std::string fileName);
bool processImageData(std::vector<unsigned char> *pInput, CompressedImage *pImage, InputSettings settings, std::string fileName);
std::vector<unsigned int> readFileAsUInt(std::string filePath);
bool readFileAsUInt(std::string filePath, std::vector<unsigned int> *pFileData);
size_t getCompressedSize(CompressedImage *pImage);
std::vector<ShortCopy> getShortCopies(std::vector<unsigned short> input, size_t minLength);
bool getShortCopies(std::vector<unsigned short> *pInput, size_t minLength, std::vector<ShortCopy> *pShortCopies);
bool verifyShortCopies(std::vector<ShortCopy> *pCopies, std::vector<unsigned short> *pImage);
std::vector<int> getNormalizedCounts(std::vector<size_t> input);
std::vector<unsigned int> getFreqWriteInts(std::vector<int> input);
std::vector<unsigned int> getNewHeaders(CompressionMode mode, size_t imageSize, size_t symLength, int initialState, size_t bitstreamSize, size_t loLength);
int findInitialState(EncodeCol encodeCol, unsigned char firstSymbol);
CompressedImage fillCompressVecNew(std::vector<unsigned char> loVec, std::vector<unsigned short> symVec, CompressionMode mode, size_t imageBytes, std::string name);
std::vector<ShortCompressionInstruction> getShortInstructions(std::vector<ShortCopy> copies, size_t lengthMod);
std::vector<unsigned char> getLosFromInstructions(std::vector<ShortCompressionInstruction> instructions);
std::vector<unsigned short> getSymsFromInstructions(std::vector<ShortCompressionInstruction> instructions);
int findInitialState(EncodeCol *encodeCol, unsigned char firstSymbol);
bool fillCompressVec(std::vector<unsigned char> *pLoVec, std::vector<unsigned short> *pSymVec, CompressionMode mode, size_t imageBytes, std::string name, CompressedImage *pOutput);
bool getShortInstructions(std::vector<ShortCopy> *pCopies, std::vector<ShortCompressionInstruction> *pInstructions, std::vector<unsigned short> *pInput);
void getLosFromInstructions(std::vector<ShortCompressionInstruction> *pInstructions, std::vector<unsigned char> *pOutput);
void getSymsFromInstructions(std::vector<ShortCompressionInstruction> *pInstructions, std::vector<unsigned short> *pOutput);
std::vector<int> unpackFrequencies(unsigned int pInts[3]);
CompressedImage getDataFromUIntVec(std::vector<unsigned int> *pInput);
CompressedImage readNewHeader(std::vector<unsigned int> *pInput);
std::vector<unsigned int> getUIntVecFromData(CompressedImage *pImage);
void readNewHeader(std::vector<unsigned int> *pInput, CompressedImage *pOutput);
void getUIntVecFromData(CompressedImage *pImage, std::vector<unsigned int> *pOutput);
std::vector<unsigned short> decodeBytesShort(std::vector<unsigned char> *pLoVec, std::vector<unsigned short> *pSymVec);
std::vector<unsigned short> decodeImageShort(CompressedImage *pInput);
DataVecs decodeDataVectorsNew(CompressedImage *pInput);
size_t decodeNibbles(std::vector<DecodeCol> decodeTable, std::vector<unsigned int> *bits, int *currState, std::vector<unsigned char> *nibbleVec, size_t currBitIndex, size_t numNibbles);
bool compareVectorsShort(std::vector<unsigned short> *pVec1, std::vector<unsigned short> *pVec2);
bool verifyCompressionShort(CompressedImage *pInput, std::vector<unsigned short> *pImage);
bool verifyBytesShort(std::vector<unsigned char> *pLoVec, std::vector<unsigned short> *pSymVec, std::vector<unsigned short> *pImage);
bool verifyUIntVecShort(std::vector<unsigned int> *pInput, std::vector<unsigned short> *pImage);
std::vector<unsigned short> readRawDataVecs(std::vector<unsigned int> *pInput);
bool verifyBytesShort(std::vector<unsigned char> *pLoVec, std::vector<unsigned short> *pSymVec, std::vector<unsigned short> *pImage);
void readRawDataVecs(std::vector<unsigned int> *pInput, std::vector<unsigned short> *pOutput);
bool isModeLoEncoded(CompressionMode mode);
bool isModeSymEncoded(CompressionMode mode);
@ -175,4 +143,5 @@ void deltaEncode(std::vector<unsigned char> *buffer, int length);
void deltaDecode(std::vector<unsigned char> *buffer, int length);
std::vector<int> getTestFreqs(std::vector<int> freqs, std::string name);
#endif

View File

@ -91,15 +91,19 @@ std::vector<unsigned short> decompressVector(std::vector<unsigned short> *pVec)
CompressVectors compressVector(std::vector<unsigned short> *pVec)
{
CompressVectors vecs;
std::vector<ShortCopy> shortCopies = getShortCopies(*pVec, 2);
std::vector<ShortCopy> shortCopies;
getShortCopies(pVec, 2, &shortCopies);
if (!verifyShortCopies(&shortCopies, pVec))
{
fprintf(stderr, "Error getting tile-number compression\n");
return vecs;
}
std::vector<ShortCompressionInstruction> shortInstructions = getShortInstructions(shortCopies, 0);
std::vector<unsigned char> loVec = getLosFromInstructions(shortInstructions);
std::vector<unsigned short> symVec = getSymsFromInstructions(shortInstructions);
std::vector<ShortCompressionInstruction> shortInstructions;
getShortInstructions(&shortCopies, &shortInstructions, pVec);
std::vector<unsigned char> loVec;
getLosFromInstructions(&shortInstructions, &loVec);
std::vector<unsigned short> symVec;
getSymsFromInstructions(&shortInstructions, &symVec);
if (!verifyBytesShort(&loVec, &symVec, pVec))
{

View File

@ -19,6 +19,7 @@ int main(int argc, char *argv[])
fileOut.close();
return 0;
}
/*
else if (argc == 2)
{
std::filesystem::path filePath = argv[1];
@ -66,6 +67,7 @@ int main(int argc, char *argv[])
printf("New size: %zu\n", totalSize);
return 0;
}
*/
else
{
return 0;

View File

@ -1,4 +1,5 @@
#include "tANS.h"
#include <stdio.h>
std::vector<DecodeCol> createDecodingTable(std::vector<unsigned char> symbols, std::vector<int> frequencies)
{
@ -8,6 +9,7 @@ std::vector<DecodeCol> createDecodingTable(std::vector<unsigned char> symbols, s
{
for (size_t j = 0; j < frequencies[i]; j++)
{
//printf("%zu %zu\n", table.size(), currCol);
table[currCol].state = TANS_TABLE_SIZE + currCol;
table[currCol].symbol = symbols[i];
table[currCol].y = frequencies[i] + j;