mirror of
https://github.com/GearsProgress/Poke_Transporter_GB.git
synced 2026-03-21 17:34:42 -05:00
475 lines
13 KiB
C++
475 lines
13 KiB
C++
#include "zx0_decompressor.h"
|
|
#include <cstring>
|
|
|
|
// The following code is a custom implementation of the ZX0 decompression algorithm invented by Einar Saukas
|
|
// Original implementation can be found here: https://github.com/einar-saukas/ZX0
|
|
// It uses classes, but keeps them completely hidden in the .cpp file with an anonymous namespace for internal linkage.
|
|
// The header provides a C facade to access the relevant methods, but the rest of Poke Transporter GB
|
|
// doesn't need to be aware of all the datatypes/classes defined here.
|
|
namespace
|
|
{
|
|
/**
|
|
* This class implements a ringbuffer. This is useful for ZX0 decompression
|
|
* because it allows you to look back easily.
|
|
*
|
|
* WARNING: the specified bufferSize MUST be a power of 2!
|
|
* This is needed because I optimized a modulo division (for wraparound)
|
|
* with a bitmask AND. (and it simply won't work correctly if the bufferSize is not a power of 2!)
|
|
*/
|
|
class RingBuffer
|
|
{
|
|
public:
|
|
RingBuffer(uint8_t *buffer, const uint16_t bufferSize);
|
|
|
|
/**
|
|
* Returns the buffer size.
|
|
*/
|
|
uint16_t getBufferSize() const;
|
|
|
|
uint16_t read(uint8_t *outputBuffer, uint16_t bytesToRead);
|
|
uint8_t readByte();
|
|
void writeByte(uint8_t value);
|
|
|
|
/**
|
|
* This function seeks backwards from the current end of the buffer
|
|
*/
|
|
void seekBackwardsFromBufferEnd(uint16_t offset);
|
|
|
|
void reset();
|
|
protected:
|
|
private:
|
|
uint8_t *buffer_;
|
|
uint16_t ringStartPos_;
|
|
uint16_t ringReadPos_;
|
|
uint16_t ringEndPos_;
|
|
uint16_t bufferSize_;
|
|
};
|
|
|
|
/**
|
|
* This class makes reading on a per-bit basis much easier.
|
|
*/
|
|
class BitReader
|
|
{
|
|
public:
|
|
BitReader(const uint8_t* buffer);
|
|
|
|
uint8_t readBit();
|
|
uint8_t read(uint8_t numBits);
|
|
uint8_t readByte();
|
|
protected:
|
|
private:
|
|
const uint8_t* buffer_;
|
|
const uint8_t* curBuffer_;
|
|
uint8_t currentByte_;
|
|
uint8_t bitsLeft_;
|
|
};
|
|
|
|
enum class ZX0OperationType
|
|
{
|
|
NONE,
|
|
LITERAL_BLOCK,
|
|
COPY_LAST_OFFSET,
|
|
COPY_NEW_OFFSET
|
|
};
|
|
|
|
typedef struct ZX0Command
|
|
{
|
|
ZX0OperationType cmdType;
|
|
uint16_t length;
|
|
uint16_t offset;
|
|
uint16_t bytePos;
|
|
} ZX0Command;
|
|
|
|
/**
|
|
* @brief This class implements the actual ZX0 decompression.
|
|
*/
|
|
class ZX0Decompressor
|
|
{
|
|
public:
|
|
ZX0Decompressor(uint8_t* decompressionBuffer, uint16_t decompressionBufferSize);
|
|
|
|
/**
|
|
* @brief This function prepares the ZX0Decompressor instance
|
|
* for decompressing the specified inputData
|
|
* @param compressedData
|
|
*/
|
|
void setInput(const uint8_t *inputData);
|
|
|
|
/**
|
|
* @brief Retrieves the size of the data when it is fully decompressed
|
|
* This is read from the first 2 bytes of the inputData
|
|
*/
|
|
uint16_t getDecompressedSize() const;
|
|
|
|
/**
|
|
* @brief This function reads <numBytes> of data into <outputBuffer>
|
|
*/
|
|
uint16_t read(uint8_t *outputBuffer, uint16_t numBytes);
|
|
|
|
/**
|
|
* @brief This function uncompresses from the current point until the
|
|
* specified outputBytePos lies inside the decompressionBuffer
|
|
*/
|
|
void seek(uint16_t outputBytePos);
|
|
protected:
|
|
private:
|
|
void readNextCommand();
|
|
uint16_t copy_block(uint8_t *outputBuffer, uint16_t numBytes);
|
|
|
|
RingBuffer buffer_;
|
|
BitReader reader_;
|
|
ZX0Command cur_command_;
|
|
const uint8_t *inputData_;
|
|
uint16_t bytesDecompressed_;
|
|
uint16_t lastOffset_;
|
|
};
|
|
|
|
RingBuffer::RingBuffer(uint8_t *buffer, const uint16_t bufferSize)
|
|
: buffer_(buffer)
|
|
, ringStartPos_(0)
|
|
, ringReadPos_(0)
|
|
, ringEndPos_(0)
|
|
, bufferSize_(bufferSize)
|
|
{
|
|
}
|
|
|
|
__attribute__((unused))
|
|
uint16_t RingBuffer::getBufferSize() const
|
|
{
|
|
return bufferSize_;
|
|
}
|
|
|
|
__attribute__((unused))
|
|
uint16_t RingBuffer::read(uint8_t *outputBuffer, uint16_t bytesToRead)
|
|
{
|
|
if(ringReadPos_ == ringEndPos_) return 0; // Early exit if empty
|
|
|
|
const uint16_t wrapMask = bufferSize_ - 1;
|
|
uint16_t available;
|
|
uint16_t bytesRead;
|
|
uint16_t chunkSize;
|
|
|
|
available = (ringReadPos_ < ringEndPos_) ? (ringEndPos_ - ringReadPos_) : (bufferSize_ - ringReadPos_);
|
|
|
|
chunkSize = (bytesToRead < available) ? bytesToRead : available;
|
|
memcpy(outputBuffer, buffer_ + ringReadPos_, chunkSize);
|
|
bytesRead = chunkSize;
|
|
ringReadPos_ = (ringReadPos_ + chunkSize) & wrapMask; // wraparound done by bitmask
|
|
bytesToRead -= chunkSize;
|
|
|
|
if(bytesToRead > 0 && ringReadPos_ != ringEndPos_)
|
|
{
|
|
// we need more bytes from the start of the buffer
|
|
available = ringEndPos_;
|
|
chunkSize = (bytesToRead <= ringEndPos_) ? bytesToRead : available;
|
|
memcpy(outputBuffer + bytesRead, buffer_, chunkSize);
|
|
bytesRead += chunkSize;
|
|
ringReadPos_ = chunkSize & wrapMask; // Since we wrapped around to start
|
|
}
|
|
return bytesRead;
|
|
}
|
|
|
|
uint8_t RingBuffer::readByte()
|
|
{
|
|
uint8_t value;
|
|
if(ringReadPos_ == ringEndPos_) return 0; // Early exit if empty
|
|
|
|
value = buffer_[ringReadPos_];
|
|
ringReadPos_ = (ringReadPos_ + 1) & (bufferSize_ - 1);
|
|
return value;
|
|
}
|
|
|
|
void RingBuffer::writeByte(uint8_t value)
|
|
{
|
|
buffer_[ringEndPos_] = value;
|
|
ringEndPos_ = (ringEndPos_ + 1) & (bufferSize_ - 1); // wraparound done by bitmask
|
|
|
|
if(ringEndPos_ == ringStartPos_)
|
|
{
|
|
// buffer is full, overwrite oldest byte
|
|
ringStartPos_ = (ringStartPos_ + 1) & (bufferSize_ - 1); // wraparound done by bitmask
|
|
}
|
|
}
|
|
|
|
void RingBuffer::seekBackwardsFromBufferEnd(uint16_t offset)
|
|
{
|
|
ringReadPos_ = (ringEndPos_ - offset) & (bufferSize_ - 1);
|
|
}
|
|
|
|
void RingBuffer::reset()
|
|
{
|
|
ringStartPos_ = 0;
|
|
ringReadPos_ = 0;
|
|
ringEndPos_ = 0;
|
|
}
|
|
|
|
static inline uint16_t read_elias_gamma(BitReader& reader)
|
|
{
|
|
uint16_t num_non_leading_bits = 0;
|
|
uint16_t value;
|
|
while (!reader.readBit())
|
|
{
|
|
++num_non_leading_bits; // Count leading zeros
|
|
}
|
|
// reconstruct the most significant bit of value
|
|
value = 1 << num_non_leading_bits; // Start with MSB
|
|
|
|
// now apply the binary part (the actual value)
|
|
while(num_non_leading_bits--)
|
|
{
|
|
value |= (reader.readBit() << num_non_leading_bits);
|
|
}
|
|
|
|
// Adjust back to zero-based
|
|
return value - 1;
|
|
}
|
|
|
|
static inline void read_new_offset(BitReader& reader, uint16_t& offset)
|
|
{
|
|
const uint8_t has_msb = reader.readBit();
|
|
|
|
const uint16_t lsb = reader.read(7);
|
|
const uint16_t msb = (has_msb) ? read_elias_gamma(reader) : 0;
|
|
|
|
offset = ((msb << 7) | lsb) + 1;
|
|
}
|
|
|
|
BitReader::BitReader(const uint8_t* buffer)
|
|
: buffer_(buffer)
|
|
, curBuffer_(buffer)
|
|
, currentByte_(0)
|
|
, bitsLeft_(0)
|
|
{
|
|
}
|
|
|
|
uint8_t BitReader::readBit()
|
|
{
|
|
// Pre-decrement and check underflow
|
|
if (--bitsLeft_ == 0xFF)
|
|
{
|
|
currentByte_ = (*curBuffer_);
|
|
++curBuffer_;
|
|
bitsLeft_ = 7;
|
|
}
|
|
return (currentByte_ >> bitsLeft_) & 1;
|
|
}
|
|
|
|
uint8_t BitReader::read(uint8_t numBits)
|
|
{
|
|
uint8_t result = 0;
|
|
while (numBits--)
|
|
{
|
|
result = (result << 1) | readBit();
|
|
}
|
|
return result;
|
|
}
|
|
|
|
uint8_t BitReader::readByte()
|
|
{
|
|
if(bitsLeft_ == 0)
|
|
{
|
|
const uint8_t value = (*curBuffer_);
|
|
++curBuffer_;
|
|
return value;
|
|
}
|
|
// Handle byte reads across bit boundaries
|
|
return read(8);
|
|
}
|
|
|
|
ZX0Decompressor::ZX0Decompressor(uint8_t* decompressionBuffer, uint16_t decompressionBufferSize)
|
|
: buffer_(decompressionBuffer, decompressionBufferSize)
|
|
, reader_(nullptr)
|
|
, cur_command_({ZX0OperationType::NONE, 0, 0, 0})
|
|
, inputData_(nullptr)
|
|
, bytesDecompressed_(0)
|
|
, lastOffset_(UINT16_MAX)
|
|
{
|
|
}
|
|
|
|
void ZX0Decompressor::setInput(const uint8_t *inputData)
|
|
{
|
|
buffer_.reset();
|
|
reader_ = BitReader(inputData + 2);
|
|
cur_command_ = {ZX0OperationType::NONE, 0, 0, 0};
|
|
inputData_ = inputData;
|
|
bytesDecompressed_ = 0;
|
|
lastOffset_ = UINT16_MAX;
|
|
}
|
|
|
|
uint16_t ZX0Decompressor::getDecompressedSize() const
|
|
{
|
|
if(!inputData_)
|
|
{
|
|
return 0;
|
|
}
|
|
return *((uint16_t*)inputData_);
|
|
}
|
|
|
|
uint16_t ZX0Decompressor::read(uint8_t *outputBuffer, uint16_t numBytes)
|
|
{
|
|
const uint16_t decompressed_size = getDecompressedSize();
|
|
const uint16_t bytesDecompressedBefore = bytesDecompressed_;
|
|
uint16_t bytesRead;
|
|
|
|
while(numBytes)
|
|
{
|
|
if(bytesDecompressed_ == decompressed_size)
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Check if we have finished processing the previous pending command
|
|
// if we have, we need to read a new operation
|
|
if(cur_command_.cmdType == ZX0OperationType::NONE || cur_command_.bytePos >= cur_command_.length)
|
|
{
|
|
readNextCommand();
|
|
}
|
|
|
|
bytesRead = copy_block(outputBuffer + bytesDecompressed_, numBytes);
|
|
numBytes -= bytesRead;
|
|
bytesDecompressed_ += bytesRead;
|
|
}
|
|
|
|
return bytesDecompressed_ - bytesDecompressedBefore;
|
|
}
|
|
|
|
void ZX0Decompressor::seek(uint16_t outputBytePos)
|
|
{
|
|
uint8_t read_buffer[32];
|
|
uint16_t bytesToRead;
|
|
uint16_t chunkSize;
|
|
|
|
// NOTE: outputBytePos denotes the index of the byte in the output (decompressed data) buffer!!
|
|
|
|
// ZX0 doesn't actually have random access
|
|
// so we'll have to simulate it.
|
|
// first check if the specified position is in the backwards direction.
|
|
if(outputBytePos < bytesDecompressed_)
|
|
{
|
|
// the specified position is in earlier decoded data.
|
|
// this will be expensive, because we will have to start decompression from the beginning
|
|
// until the specified position.
|
|
// reset the decoder
|
|
setInput(inputData_);
|
|
bytesToRead = outputBytePos;
|
|
}
|
|
else
|
|
{
|
|
// The specified position is in future -to-be-decoded- decompressed data.
|
|
// so we just need to keep decoding until we're there.
|
|
bytesToRead = outputBytePos - bytesDecompressed_;
|
|
}
|
|
|
|
// Start decompressing until we're at the desired point
|
|
do
|
|
{
|
|
chunkSize = (bytesToRead > sizeof(read_buffer)) ? sizeof(read_buffer) : bytesToRead;
|
|
bytesToRead -= read(read_buffer, chunkSize);
|
|
}
|
|
while(bytesToRead);
|
|
}
|
|
|
|
void ZX0Decompressor::readNextCommand()
|
|
{
|
|
const uint8_t cmdBit = reader_.readBit();
|
|
|
|
// the "COPY_NEW_OFFSET" command adds + 1 to the length, but the other commands don't.
|
|
// given that read_elias_gamma() function is marked "inline", the way I set the length
|
|
// is to avoid having multiple calls to it here. (for code size)
|
|
if(cmdBit)
|
|
{
|
|
read_new_offset(reader_, lastOffset_);
|
|
cur_command_.cmdType = ZX0OperationType::COPY_NEW_OFFSET;
|
|
cur_command_.length = 1;
|
|
cur_command_.offset = lastOffset_;
|
|
cur_command_.bytePos = 0;
|
|
}
|
|
else if(cur_command_.cmdType == ZX0OperationType::LITERAL_BLOCK)
|
|
{
|
|
cur_command_.cmdType = ZX0OperationType::COPY_LAST_OFFSET;
|
|
// copy from new offset and last offset differs in the sense that with the new offset the encoded length is reduced by one
|
|
// and for last offset it isn't. This is likely because you still need to be able to insert a dummy "copy-from-last-offset" operation.
|
|
cur_command_.length = 0;
|
|
cur_command_.offset = lastOffset_;
|
|
}
|
|
else
|
|
{
|
|
cur_command_.cmdType = ZX0OperationType::LITERAL_BLOCK;
|
|
cur_command_.length = 0;
|
|
}
|
|
cur_command_.length += read_elias_gamma(reader_);
|
|
cur_command_.bytePos = 0;
|
|
}
|
|
|
|
uint16_t ZX0Decompressor::copy_block(uint8_t *outputBuffer, uint16_t numBytes)
|
|
{
|
|
const uint16_t available = cur_command_.length - cur_command_.bytePos;
|
|
const uint16_t bytesToRead = (numBytes > available) ? available : numBytes;
|
|
uint16_t bytesRemaining = bytesToRead;
|
|
|
|
if(cur_command_.cmdType == ZX0OperationType::LITERAL_BLOCK)
|
|
{
|
|
// Literal copy
|
|
do
|
|
{
|
|
(*outputBuffer) = reader_.readByte();
|
|
buffer_.writeByte((*outputBuffer));
|
|
++outputBuffer;
|
|
}
|
|
while(--bytesRemaining);
|
|
}
|
|
else
|
|
{
|
|
if(!cur_command_.bytePos)
|
|
{
|
|
buffer_.seekBackwardsFromBufferEnd(cur_command_.offset);
|
|
}
|
|
|
|
do
|
|
{
|
|
*outputBuffer = buffer_.readByte();
|
|
buffer_.writeByte(*outputBuffer);
|
|
++outputBuffer;
|
|
}
|
|
while(--bytesRemaining);
|
|
}
|
|
|
|
cur_command_.bytePos += bytesToRead;
|
|
|
|
return bytesToRead;
|
|
}
|
|
|
|
}
|
|
|
|
// define a global 2 KB decompression buffer in IWRAM.
|
|
// IWRAM is much faster than EXRAM, so it's ideally suited for decompression.
|
|
// 2 KB is a modest/reasonable size to reserve.
|
|
// But this also means we can only have one instance of ZX0Decompressor.
|
|
// This is one of the reasons why it is implemented in the way that it is.
|
|
__attribute__((section(".iwram")))
|
|
static uint8_t decompression_buffer[2048];
|
|
__attribute__((section(".iwram")))
|
|
static ZX0Decompressor decompressor(decompression_buffer, sizeof(decompression_buffer));
|
|
|
|
extern "C"
|
|
{
|
|
void zx0_decompressor_set_input(const uint8_t *input_data)
|
|
{
|
|
decompressor.setInput(input_data);
|
|
}
|
|
|
|
uint16_t zx0_decompressor_get_decompressed_size()
|
|
{
|
|
return decompressor.getDecompressedSize();
|
|
}
|
|
|
|
void zx0_decompressor_seek(uint16_t output_byte_pos)
|
|
{
|
|
decompressor.seek(output_byte_pos);
|
|
}
|
|
|
|
uint16_t zx0_decompressor_read(uint8_t *output_buffer, uint16_t num_bytes)
|
|
{
|
|
return decompressor.read(output_buffer, num_bytes);
|
|
}
|
|
} |