poketcg/tools/compressed_data_extractor.py
2021-04-05 14:32:43 +01:00

169 lines
5.5 KiB
Python

import argparse
import mmap
from math import floor as floor
parser = argparse.ArgumentParser(description='Extracts compressed data.')
parser.add_argument('offsets', metavar='offsets', type=str, nargs='+',
help='start offset(s) of the compressed data')
parser.add_argument('-s', metavar='suffix', dest='suffix', type=str, nargs=1, default=[],
help='suffix for output file names')
parser.add_argument('-d', dest='decompress', action='store_true',
help='whether the output file should be decompressed')
args = parser.parse_args()
def getByteString(offset, len):
with open('baserom.gbc') as rom:
romMap = mmap.mmap(rom.fileno(), 0, access=mmap.ACCESS_READ)
return romMap[offset : offset + len]
def getByte(offset):
return getByteString(offset, 1)[0]
def getFormattedOffset(offset):
return '{:0x}'.format(offset)
def convertWordToInt(byteArr):
assert(len(byteArr) == 2)
return byteArr[1] * 0x100 + byteArr[0]
def getCompressedData(offset):
data = []
pos = offset # init current position
repeatToggle = False
lenByte = 0x00
size = convertWordToInt(getByteString(pos, 2))
#data.extend(getByteString(pos, 2)) don't append size bytes to output
pos += 2
while (size > 0):
cmdByte = getByte(pos)
pos += 1
data.append(cmdByte)
print('{:0x}'.format(cmdByte))
for bit in range(8):
if (cmdByte & (1 << (7 - bit)) != 0):
# copy one byte literally
data.append(getByte(pos))
pos += 1
size -= 1
else:
# copy previous sequence
data.append(getByte(pos))
pos += 1
repeatToggle = not repeatToggle
if (repeatToggle):
# sequence length
lenByte = getByte(pos)
data.append(lenByte)
pos += 1
size -= (lenByte >> 4) + 2
else:
# no sequence length byte if toggle is off
size -= (lenByte & 0x0f) + 2
assert(size >= 0)
# the decompression might finish while still
# reading command bits, so break early when this happens
if (size == 0):
if not repeatToggle:
# extra bytes to match source
data.append(getByte(pos))
data.append(getByte(pos + 1))
break
return bytes(data)
def decompressData(source):
buffer = [0x00 for i in range(0x100)]
sourcePos = 0
bufferPos = 0xef
lenByte = 0x00
repeatToggle = False
decompData = [] # final decompressed data that will be output
while True:
if sourcePos >= len(source):
# got to the end of the data
break
cmdByte = source[sourcePos]
sourcePos += 1
for cmdBit in range(8):
if sourcePos >= len(source):
# got to the end of the data
break
if (cmdByte & (1 << (7 - cmdBit)) != 0):
# copy one byte literally
byteToCopy = source[sourcePos]
decompData.append(byteToCopy)
buffer[bufferPos] = byteToCopy
sourcePos += 1
bufferPos = (bufferPos + 1) % 0x100
else:
# copy previous sequence
repeatToggle = not repeatToggle
if (repeatToggle):
# sequence length
offsetToCopy = source[sourcePos]
lenByte = source[sourcePos + 1]
bytesToCopy = []
curLen = (lenByte >> 4) + 2
for i in range(curLen):
buffer[bufferPos] = buffer[offsetToCopy]
bytesToCopy.append(buffer[offsetToCopy])
offsetToCopy = (offsetToCopy + 1) % 0x100
bufferPos = (bufferPos + 1) % 0x100
decompData.extend(bytesToCopy)
sourcePos += 2
else:
# no sequence length byte if toggle is off
offsetToCopy = source[sourcePos]
bytesToCopy = []
curLen = (lenByte & 0x0f) + 2
for i in range(curLen):
buffer[bufferPos] = buffer[offsetToCopy]
bytesToCopy.append(buffer[offsetToCopy])
offsetToCopy = (offsetToCopy + 1) % 0x100
bufferPos = (bufferPos + 1) % 0x100
decompData.extend(bytesToCopy)
sourcePos += 1
return bytes(decompData)
def outputData(offset, filename, decompress):
comprData = getCompressedData(offset)
data = []
if decompress:
data = decompressData(comprData)
else:
data = comprData
with open(filename + '.bin', 'wb') as outFile:
outFile.write(data)
n = 1
for offsetStr in args.offsets:
offset = int(offsetStr, 16)
filename = 'compressed_data_' + getFormattedOffset(offset)
if (args.suffix):
filename = args.suffix[0]
if (len(args.offsets) > 1):
filename = filename + str(n)
outputData(offset, filename, args.decompress)
n += 1