Merge pull request #86 from yenatch/master

Refactor the LZ tools and fix BSSReader's eval shenanigans.
This commit is contained in:
Bryan Bishop 2015-03-12 12:03:17 -05:00
commit 46492bd907
5 changed files with 876 additions and 488 deletions

View File

@ -370,7 +370,8 @@ class Sound:
self.asms = []
self.parse()
def parse(self):
def parse_header(self):
self.num_channels = (rom[self.address] >> 6) + 1
self.channels = []
for ch in xrange(self.num_channels):
@ -383,9 +384,9 @@ class Sound:
self.channels += [(current_channel, channel)]
self.labels += channel.labels
asms = []
asms += [generate_label_asm(self.base_label, self.start_address)]
def make_header(self):
asms = []
for i, (num, channel) in enumerate(self.channels):
channel_id = num - 1
@ -397,16 +398,27 @@ class Sound:
comment_text = '; %x\n' % self.address
asms += [(self.address, comment_text, self.address)]
return asms
def parse(self):
self.parse_header()
asms = []
asms += [generate_label_asm(self.base_label, self.start_address)]
asms += self.make_header()
for num, channel in self.channels:
asms += channel.output
asms = sort_asms(asms)
self.last_address = asms[-1][2]
_, _, self.last_address = asms[-1]
asms += [(self.last_address,'; %x\n' % self.last_address, self.last_address)]
self.asms += asms
def to_asm(self, labels=[]):
"""insert outside labels here"""
asms = self.asms

View File

@ -913,7 +913,7 @@ class PointerLabelParam(MultiByteParam):
lo, hi = self.bytes[1:3]
else:
lo, hi = self.bytes[0:2]
pointer_part = "{0}{1:2x}{2:2x}".format(self.prefix, hi, lo)
pointer_part = "{0}{1:02x}{2:02x}".format(self.prefix, hi, lo)
# bank positioning matters!
if bank == True or bank == "reverse": # bank, pointer

View File

@ -5,17 +5,22 @@ import sys
import png
from math import sqrt, floor, ceil
import argparse
import yaml
import operator
import configuration
config = configuration.Config()
import pokemon_constants
from pokemon_constants import pokemon_constants
import trainers
import romstr
from lz import Compressed, Decompressed
def load_rom():
rom = romstr.RomStr.load(filename=config.rom_path)
def load_rom(filename=config.rom_path):
rom = romstr.RomStr.load(filename=filename)
return bytearray(rom)
def rom_offset(bank, address):
@ -124,19 +129,31 @@ def deinterleave_tiles(image, width):
return connect(deinterleave(get_tiles(image), width))
def condense_tiles_to_map(image):
def condense_tiles_to_map(image, pic=0):
tiles = get_tiles(image)
new_tiles = []
tilemap = []
for tile in tiles:
# Leave the first frame intact for pics.
new_tiles = tiles[:pic]
tilemap = range(pic)
for i, tile in enumerate(tiles[pic:]):
if tile not in new_tiles:
new_tiles += [tile]
tilemap += [new_tiles.index(tile)]
# Match the first frame where possible.
if tile == new_tiles[i % pic]:
tilemap += [i % pic]
else:
tilemap += [new_tiles.index(tile)]
new_image = connect(new_tiles)
return new_image, tilemap
def to_file(filename, data):
"""
Apparently open(filename, 'wb').write(bytearray(data)) won't work.
"""
file = open(filename, 'wb')
for byte in data:
file.write('%c' % byte)
@ -144,425 +161,6 @@ def to_file(filename, data):
"""
A rundown of Pokemon Crystal's compression scheme:
Control commands occupy bits 5-7.
Bits 0-4 serve as the first parameter <n> for each command.
"""
lz_commands = {
'literal': 0, # n values for n bytes
'iterate': 1, # one value for n bytes
'alternate': 2, # alternate two values for n bytes
'blank': 3, # zero for n bytes
}
"""
Repeater commands repeat any data that was just decompressed.
They take an additional signed parameter <s> to mark a relative starting point.
These wrap around (positive from the start, negative from the current position).
"""
lz_commands.update({
'repeat': 4, # n bytes starting from s
'flip': 5, # n bytes in reverse bit order starting from s
'reverse': 6, # n bytes backwards starting from s
})
"""
The long command is used when 5 bits aren't enough. Bits 2-4 contain a new control code.
Bits 0-1 are appended to a new byte as 8-9, allowing a 10-bit parameter.
"""
lz_commands.update({
'long': 7, # n is now 10 bits for a new control code
})
max_length = 1 << 10 # can't go higher than 10 bits
lowmax = 1 << 5 # standard 5-bit param
"""
If 0xff is encountered instead of a command, decompression ends.
"""
lz_end = 0xff
class Compressed:
def __init__(self, data=None, commands=lz_commands, debug=False):
self.data = list(bytearray(data))
self.commands = commands
self.debug = debug
self.compress()
def byte_at(self, address):
if address < len(self.data):
return self.data[address]
return None
def compress(self):
"""
This algorithm is greedy.
It aims to match the compressor it's based on as closely as possible.
It doesn't, but in the meantime the output is smaller.
"""
self.address = 0
self.end = len(self.data)
self.output = []
self.literal = []
while self.address < self.end:
# Tally up the number of bytes that can be compressed
# by a single command from the current address.
self.scores = {}
for method in self.commands.keys():
self.scores[method] = 0
# The most common byte by far is 0 (whitespace in
# images and padding in tilemaps and regular data).
address = self.address
while self.byte_at(address) == 0x00:
self.scores['blank'] += 1
address += 1
# In the same vein, see how long the same byte repeats for.
address = self.address
self.iter = self.byte_at(address)
while self.byte_at(address) == self.iter:
self.scores['iterate'] += 1
address += 1
# Do it again, but for alternating bytes.
address = self.address
self.alts = []
self.alts += [self.byte_at(address)]
self.alts += [self.byte_at(address + 1)]
while self.byte_at(address) == self.alts[(address - self.address) % 2]:
self.scores['alternate'] += 1
address += 1
# Check if we can repeat any data that the
# decompressor just output (here, the input data).
# TODO this includes the current command's output
self.matches = {}
last_matches = {}
address = self.address
min_length = 4 # minimum worthwhile length
max_length = 9 # any further and the time loss is too significant
for length in xrange(min_length, min(len(self.data) - address, max_length)):
keyword = self.data[address:address+length]
for offset, byte in enumerate(self.data[:address]):
# offset ranges are -0x80:-1 and 0:0x7fff
if offset > 0x7fff and offset < address - 0x80:
continue
if byte == keyword[0]:
# Straight repeat...
if self.data[offset:offset+length] == keyword:
if self.scores['repeat'] < length:
self.scores['repeat'] = length
self.matches['repeat'] = offset
# In reverse...
if self.data[offset-1:offset-length-1:-1] == keyword:
if self.scores['reverse'] < length:
self.scores['reverse'] = length
self.matches['reverse'] = offset
# Or bitflipped
if self.bit_flip([byte]) == self.bit_flip([keyword[0]]):
if self.bit_flip(self.data[offset:offset+length]) == self.bit_flip(keyword):
if self.scores['flip'] < length:
self.scores['flip'] = length
self.matches['flip'] = offset
if self.matches == last_matches:
break
last_matches = list(self.matches)
# If the scores are too low, try again from the next byte.
if not any(map(lambda x: {
'blank': 1,
'iterate': 2,
'alternate': 3,
'repeat': 3,
'reverse': 3,
'flip': 3,
}.get(x[0], 10000) < x[1], self.scores.items())):
self.literal += [self.data[self.address]]
self.address += 1
else: # payload
# bug: literal [00] is a byte longer than blank 1.
# this bug exists in the target compressor as well,
# so don't fix until we've given up on replicating it.
self.do_literal()
self.do_scored()
# unload any literals we're sitting on
self.do_literal()
self.output += [lz_end]
def bit_flip(self, data):
return [sum(((byte >> i) & 1) << (7 - i) for i in xrange(8)) for byte in data]
def do_literal(self):
if self.literal:
cmd = self.commands['literal']
length = len(self.literal)
self.do_cmd(cmd, length)
# self.address has already been
# incremented in the main loop
self.literal = []
def do_cmd(self, cmd, length):
if length > max_length:
length = max_length
cmd_length = length - 1
if length > lowmax:
output = [(self.commands['long'] << 5) + (cmd << 2) + (cmd_length >> 8)]
output += [cmd_length & 0xff]
else:
output = [(cmd << 5) + cmd_length]
if cmd == self.commands['literal']:
output += self.literal
elif cmd == self.commands['iterate']:
output += [self.iter]
elif cmd == self.commands['alternate']:
output += self.alts
else:
for command in ['repeat', 'reverse', 'flip']:
if cmd == self.commands[command]:
offset = self.matches[command]
# negative offsets are a byte shorter
if self.address - offset <= 0x80:
offset = self.address - offset + 0x80
if cmd == self.commands['repeat']:
offset -= 1 # this is a hack, but it seems to work
output += [offset]
else:
output += [offset / 0x100, offset % 0x100]
if self.debug:
print (
dict(map(reversed, self.commands.items()))[cmd],
length, '\t',
' '.join(map('{:02x}'.format, output))
)
self.output += output
return length
def do_scored(self):
# Which command did the best?
winner, score = sorted(
self.scores.items(),
key=lambda x:(-x[1], [
'blank',
'repeat',
'reverse',
'flip',
'iterate',
'alternate',
'literal',
'long', # hack
].index(x[0]))
)[0]
cmd = self.commands[winner]
length = self.do_cmd(cmd, score)
self.address += length
class Decompressed:
"""
Parse compressed data, usually 2bpp.
parameters:
[compressed data]
[tile arrangement] default: 'vert'
[size of pic] default: None
[start] (optional)
splits output into pic [size] and animation tiles if applicable
data can be fed in from rom if [start] is specified
"""
def __init__(self, lz=None, start=0, debug=False):
# todo: play nice with Compressed
assert lz, 'need something to decompress!'
self.lz = bytearray(lz)
self.byte = None
self.address = 0
self.start = start
self.output = []
self.decompress()
self.compressed_data = self.lz[self.start : self.start + self.address]
# print tuple containing start and end address
if debug: print '(' + hex(self.start) + ', ' + hex(self.start + self.address+1) + '),'
def command_list(self):
"""
Print a list of commands that were used. Useful for debugging.
"""
data = bytearray(self.lz)
address = self.address
while 1:
cmd_addr = address
byte = data[address]
address += 1
if byte == lz_end: break
cmd = (byte >> 5) & 0b111
if cmd == lz_commands['long']:
cmd = (byte >> 2) & 0b111
length = (byte & 0b11) << 8
length += data[address]
address += 1
else:
length = byte & 0b11111
length += 1
name = dict(map(reversed, lz_commands.items()))[cmd]
if name == 'iterate':
address += 1
elif name == 'alternate':
address += 2
elif name in ['repeat', 'reverse', 'flip']:
if data[address] < 0x80:
address += 2
else:
address += 1
elif name == 'literal':
address += length
print name, length, '\t', ' '.join(map('{:02x}'.format, list(data)[cmd_addr:address]))
def decompress(self):
"""
Replica of crystal's decompression.
"""
self.output = []
while True:
self.getCurByte()
if (self.byte == lz_end):
self.address += 1
break
self.cmd = (self.byte & 0b11100000) >> 5
if self.cmd == lz_commands['long']: # 10-bit param
self.cmd = (self.byte & 0b00011100) >> 2
self.length = (self.byte & 0b00000011) << 8
self.next()
self.length += self.byte + 1
else: # 5-bit param
self.length = (self.byte & 0b00011111) + 1
# literals
if self.cmd == lz_commands['literal']:
self.doLiteral()
elif self.cmd == lz_commands['iterate']:
self.doIter()
elif self.cmd == lz_commands['alternate']:
self.doAlt()
elif self.cmd == lz_commands['blank']:
self.doZeros()
else: # repeaters
self.next()
if self.byte > 0x7f: # negative
self.displacement = self.byte & 0x7f
self.displacement = len(self.output) - self.displacement - 1
else: # positive
self.displacement = self.byte * 0x100
self.next()
self.displacement += self.byte
if self.cmd == lz_commands['flip']:
self.doFlip()
elif self.cmd == lz_commands['reverse']:
self.doReverse()
else: # lz_commands['repeat']
self.doRepeat()
self.address += 1
#self.next() # somewhat of a hack
def getCurByte(self):
self.byte = self.lz[self.start+self.address]
def next(self):
self.address += 1
self.getCurByte()
def doLiteral(self):
"""
Copy data directly.
"""
for byte in range(self.length):
self.next()
self.output.append(self.byte)
def doIter(self):
"""
Write one byte repeatedly.
"""
self.next()
for byte in range(self.length):
self.output.append(self.byte)
def doAlt(self):
"""
Write alternating bytes.
"""
self.alts = []
self.next()
self.alts.append(self.byte)
self.next()
self.alts.append(self.byte)
for byte in range(self.length):
self.output.append(self.alts[byte&1])
def doZeros(self):
"""
Write zeros.
"""
for byte in range(self.length):
self.output.append(0x00)
def doFlip(self):
"""
Repeat flipped bytes from output.
eg 11100100 -> 00100111
quat 3 2 1 0 -> 0 2 1 3
"""
for byte in range(self.length):
flipped = sum(1<<(7-i) for i in range(8) if self.output[self.displacement+byte]>>i&1)
self.output.append(flipped)
def doReverse(self):
"""
Repeat reversed bytes from output.
"""
for byte in range(self.length):
self.output.append(self.output[self.displacement-byte])
def doRepeat(self):
"""
Repeat bytes from output.
"""
for byte in range(self.length):
self.output.append(self.output[self.displacement+byte])
sizes = [
@ -930,7 +528,7 @@ def dump_monster_pals():
pal_length = 0x4
for mon in range(251):
name = pokemon_constants.pokemon_constants[mon+1].title().replace('_','')
name = pokemon_constants[mon+1].title().replace('_','')
num = str(mon+1).zfill(3)
dir = 'gfx/pics/'+num+'/'
@ -1088,33 +686,80 @@ def png_to_rgb(palette):
return output
def read_filename_arguments(filename):
int_args = {
def read_yaml_arguments(filename, yaml_filename = os.path.join(config.path, 'gfx.yaml'), path_arguments = ['pal_file']):
parsed_arguments = {}
# Read arguments from gfx.yaml if it exists.
if os.path.exists(yaml_filename):
yargs = yaml.load(open(yaml_filename))
dirs = os.path.splitext(filename)[0].split('/')
current_path = os.path.dirname(filename)
path = []
while yargs:
for key, value in yargs.items():
# Follow directories to the bottom while picking up keys.
# Try not to mistake other files for keys.
parsed_path = os.path.join( * (path + [key]) )
for guessed_path in map(parsed_path.__add__, ['', '.png']):
if os.path.exists(guessed_path) or '.' in key:
if guessed_path != filename:
continue
if key in path_arguments:
value = os.path.join(current_path, value)
parsed_arguments[key] = value
if not dirs:
break
yargs = yargs.get(dirs[0], {})
path.append(dirs.pop(0))
return parsed_arguments
def read_filename_arguments(filename, yaml_filename = os.path.join(config.path, 'gfx.yaml'), path_arguments = ['pal_file']):
"""
Infer graphics conversion arguments given a filename.
If it exists, ./gfx.yaml is traversed for arguments.
Then additional arguments within the filename (separated with ".") are grabbed.
"""
parsed_arguments = {}
parsed_arguments.update(read_yaml_arguments(
filename,
yaml_filename = yaml_filename,
path_arguments = path_arguments
))
int_arguments = {
'w': 'width',
'h': 'height',
't': 'tile_padding',
}
parsed_arguments = {}
# Filename arguments override yaml.
arguments = os.path.splitext(filename)[0].lstrip('.').split('.')[1:]
for argument in arguments:
# Check for integer arguments first (i.e. "w128").
arg = argument[0]
param = argument[1:]
if param.isdigit():
arg = int_args.get(arg, False)
arg = int_arguments.get(arg, False)
if arg:
parsed_arguments[arg] = int(param)
elif argument == 'interleave':
parsed_arguments['interleave'] = True
elif argument == 'norepeat':
parsed_arguments['norepeat'] = True
elif argument == 'arrange':
parsed_arguments['norepeat'] = True
parsed_arguments['tilemap'] = True
elif 'x' in argument:
# Pic dimensions (i.e. "6x6").
elif 'x' in argument and any(map(str.isdigit, argument)):
w, h = argument.split('x')
if w.isdigit() and h.isdigit():
parsed_arguments['pic_dimensions'] = (int(w), int(h))
else:
parsed_arguments[argument] = True
return parsed_arguments
@ -1249,24 +894,172 @@ def convert_2bpp_to_png(image, **kwargs):
return width, height, palette, greyscale, bitdepth, px_map
def export_png_to_2bpp(filein, fileout=None, palout=None, tile_padding=0, pic_dimensions=None):
def get_pic_animation(tmap, w, h):
"""
Generate pic animation data from a combined tilemap of each frame.
"""
frame_text = ''
bitmask_text = ''
frames = list(split(tmap, w * h))
base = frames.pop(0)
bitmasks = []
for i in xrange(len(frames)):
frame_text += '\tdw .frame{}\n'.format(i + 1)
for i, frame in enumerate(frames):
bitmask = map(operator.eq, frame, base)
if bitmask not in bitmasks:
bitmasks.append(bitmask)
which_bitmask = bitmasks.index(bitmask)
mask = iter(bitmask)
masked_frame = filter(mask.next, frame)
frame_text += '.frame{}\n'.format(i + 1)
frame_text += '\tdb ${:02x} ; bitmask\n'.format(which_bitmask)
if masked_frame:
frame_text += '\tdb {}\n'.format(', '.join(
map('${:02x}'.format, masked_frame)
))
frame_text += '\n'
for i, bitmask in enumerate(bitmasks):
bitmask_text += '; {}\n'.format(i)
for byte in split(bitmask, 8):
byte = int(''.join(map(int.__repr__, reversed(byte))), 2)
bitmask_text += '\tdb %{:08b}\n'.format(byte)
return frame_text, bitmask_text
def dump_pic_animations(addresses={'bitmasks': 'BitmasksPointers', 'frames': 'FramesPointers'}, pokemon=pokemon_constants, rom=None):
"""
The code to dump pic animations from rom is mysteriously absent.
Here it is again, but now it dumps images instead of text.
Said text can then be derived from the images.
"""
if rom is None: rom = load_rom()
# Labels can be passed in instead of raw addresses.
for which, offset in addresses.items():
if type(offset) is str:
for line in open('pokecrystal.sym').readlines():
if offset in line.split():
addresses[which] = rom_offset(*map(lambda x: int(x, 16), line[:7].split(':')))
break
for i, name in pokemon.items():
if name.lower() == 'unown': continue
i -= 1
directory = os.path.join('gfx', 'pics', name.lower())
size = sizes[i]
if i > 151 - 1:
bank = 0x36
else:
bank = 0x35
address = addresses['frames'] + i * 2
address = rom_offset(bank, rom[address] + rom[address + 1] * 0x100)
addrs = []
while address not in addrs:
addr = rom[address] + rom[address + 1] * 0x100
addrs.append(rom_offset(bank, addr))
address += 2
num_frames = len(addrs)
# To go any further, we need bitmasks.
# Bitmasks need the number of frames, which we now have.
bank = 0x34
address = addresses['bitmasks'] + i * 2
address = rom_offset(bank, rom[address] + rom[address + 1] * 0x100)
length = size ** 2
num_bytes = (length + 7) / 8
bitmasks = []
for _ in xrange(num_frames):
bitmask = []
bytes_ = rom[ address : address + num_bytes ]
for byte in bytes_:
bits = map(int, bin(byte)[2:].zfill(8))
bits.reverse()
bitmask += bits
bitmasks.append(bitmask)
address += num_bytes
# Back to frames:
frames = []
for addr in addrs:
bitmask = bitmasks[rom[addr]]
num_tiles = len(filter(int, bitmask))
frame = (rom[addr], rom[addr + 1 : addr + 1 + num_tiles])
frames.append(frame)
tmap = range(length) * (len(frames) + 1)
for i, frame in enumerate(frames):
bitmask = bitmasks[frame[0]]
tiles = (x for x in frame[1])
for j, bit in enumerate(bitmask):
if bit:
tmap[(i + 1) * length + j] = tiles.next()
filename = os.path.join(directory, 'front.{0}x{0}.2bpp.lz'.format(size))
tiles = get_tiles(Decompressed(open(filename).read()).output)
new_tiles = map(tiles.__getitem__, tmap)
new_image = connect(new_tiles)
filename = os.path.splitext(filename)[0]
to_file(filename, new_image)
export_2bpp_to_png(filename)
def export_png_to_2bpp(filein, fileout=None, palout=None, **kwargs):
arguments = {
'tile_padding': tile_padding,
'pic_dimensions': pic_dimensions,
'tile_padding': 0,
'pic_dimensions': None,
'animate': False,
'stupid_bitmask_hack': [],
}
arguments.update(kwargs)
arguments.update(read_filename_arguments(filein))
image, palette, tmap = png_to_2bpp(filein, **arguments)
image, arguments = png_to_2bpp(filein, **arguments)
if fileout == None:
fileout = os.path.splitext(filein)[0] + '.2bpp'
to_file(fileout, image)
if tmap != None:
mapout = os.path.splitext(fileout)[0] + '.tilemap'
to_file(mapout, tmap)
tmap = arguments.get('tmap')
if tmap != None and arguments['animate'] and arguments['pic_dimensions']:
# Generate pic animation data.
frame_text, bitmask_text = get_pic_animation(tmap, *arguments['pic_dimensions'])
frames_path = os.path.join(os.path.split(fileout)[0], 'frames.asm')
with open(frames_path, 'w') as out:
out.write(frame_text)
bitmask_path = os.path.join(os.path.split(fileout)[0], 'bitmask.asm')
# The following Pokemon have a bitmask dummied out.
for exception in arguments['stupid_bitmask_hack']:
if exception in bitmask_path:
bitmasks = bitmask_text.split(';')
bitmasks[-1] = bitmasks[-1].replace('1', '0')
bitmask_text = ';'.join(bitmasks)
with open(bitmask_path, 'w') as out:
out.write(bitmask_text)
elif tmap != None and arguments.get('tilemap', False):
tilemap_path = os.path.splitext(fileout)[0] + '.tilemap'
to_file(tilemap_path, tmap)
palette = arguments.get('palette')
if palout == None:
palout = os.path.splitext(fileout)[0] + '.pal'
export_palette(palette, palout)
@ -1299,55 +1092,58 @@ def png_to_2bpp(filein, **kwargs):
Convert a png image to planar 2bpp.
"""
tile_padding = kwargs.get('tile_padding', 0)
pic_dimensions = kwargs.get('pic_dimensions', None)
interleave = kwargs.get('interleave', False)
norepeat = kwargs.get('norepeat', False)
tilemap = kwargs.get('tilemap', False)
arguments = {
'tile_padding': 0,
'pic_dimensions': False,
'interleave': False,
'norepeat': False,
'tilemap': False,
}
arguments.update(kwargs)
with open(filein, 'rb') as data:
width, height, rgba, info = png.Reader(data).asRGBA8()
rgba = list(rgba)
greyscale = info['greyscale']
if type(filein) is str:
filein = open(filein)
assert type(filein) is file
width, height, rgba, info = png.Reader(filein).asRGBA8()
# png.Reader returns flat pixel data. Nested is easier to work with
len_px = 4 # rgba
len_px = len('rgba')
image = []
palette = []
for line in rgba:
newline = []
for px in xrange(0, len(line), len_px):
color = { 'r': line[px ],
'g': line[px+1],
'b': line[px+2],
'a': line[px+3], }
newline += [color]
color = dict(zip('rgba', line[px:px+len_px]))
if color not in palette:
palette += [color]
if len(palette) < 4:
palette += [color]
else:
# TODO Find the nearest match
print 'WARNING: %s: Color %s truncated to' % (filein, color),
color = sorted(palette, key=lambda x: sum(x.values()))[0]
print color
newline += [color]
image += [newline]
assert len(palette) <= 4, 'Palette should be 4 colors, is really %d' % len(palette)
assert len(palette) <= 4, '%s: palette should be 4 colors, is really %d (%s)' % (filein, len(palette), palette)
# Pad out smaller palettes with greyscale colors
hues = {
'white': { 'r': 0xff, 'g': 0xff, 'b': 0xff, 'a': 0xff },
greyscale = {
'black': { 'r': 0x00, 'g': 0x00, 'b': 0x00, 'a': 0xff },
'grey': { 'r': 0x55, 'g': 0x55, 'b': 0x55, 'a': 0xff },
'gray': { 'r': 0xaa, 'g': 0xaa, 'b': 0xaa, 'a': 0xff },
'white': { 'r': 0xff, 'g': 0xff, 'b': 0xff, 'a': 0xff },
}
for hue in hues.values():
preference = 'white', 'black', 'grey', 'gray'
for hue in map(greyscale.get, preference):
if len(palette) >= 4:
break
if hue not in palette:
palette += [hue]
# Sort palettes by luminance
def luminance(color):
rough = { 'r': 4.7,
'g': 1.4,
'b': 13.8, }
return sum(color[key] * rough[key] for key in rough.keys())
palette.sort(key=luminance)
palette.sort(key=lambda x: sum(x.values()))
# Game Boy palette order
palette.reverse()
@ -1391,8 +1187,16 @@ def png_to_2bpp(filein, **kwargs):
top += (quad /2 & 1) << (7 - bit)
image += [bottom, top]
if pic_dimensions:
w, h = pic_dimensions
dim = arguments['pic_dimensions']
if dim:
if type(dim) in (tuple, list):
w, h = dim
else:
# infer dimensions based on width.
w = width / tile_width
h = height / tile_height
if h % w == 0:
h = w
tiles = get_tiles(image)
pic_length = w * h
@ -1410,17 +1214,23 @@ def png_to_2bpp(filein, **kwargs):
image = connect(new_image)
# Remove any tile padding used to make the png rectangular.
image = image[:len(image) - tile_padding * 0x10]
image = image[:len(image) - arguments['tile_padding'] * 0x10]
if interleave:
tmap = None
if arguments['interleave']:
image = deinterleave_tiles(image, num_columns)
if norepeat:
if arguments['pic_dimensions']:
image, tmap = condense_tiles_to_map(image, w * h)
elif arguments['norepeat']:
image, tmap = condense_tiles_to_map(image)
if not tilemap:
tmap = None
if not arguments['tilemap']:
tmap = None
return image, palette, tmap
arguments.update({ 'palette': palette, 'tmap': tmap, })
return image, arguments
def export_palette(palette, filename):
@ -1510,7 +1320,7 @@ def export_png_to_1bpp(filename, fileout=None):
to_file(fileout, image)
def png_to_1bpp(filename, **kwargs):
image, palette, tmap = png_to_2bpp(filename, **kwargs)
image, kwargs = png_to_2bpp(filename, **kwargs)
return convert_2bpp_to_1bpp(image)

566
pokemontools/lz.py Normal file
View File

@ -0,0 +1,566 @@
# -*- coding: utf-8 -*-
"""
Pokemon Crystal data de/compression.
"""
"""
A rundown of Pokemon Crystal's compression scheme:
Control commands occupy bits 5-7.
Bits 0-4 serve as the first parameter <n> for each command.
"""
lz_commands = {
'literal': 0, # n values for n bytes
'iterate': 1, # one value for n bytes
'alternate': 2, # alternate two values for n bytes
'blank': 3, # zero for n bytes
}
"""
Repeater commands repeat any data that was just decompressed.
They take an additional signed parameter <s> to mark a relative starting point.
These wrap around (positive from the start, negative from the current position).
"""
lz_commands.update({
'repeat': 4, # n bytes starting from s
'flip': 5, # n bytes in reverse bit order starting from s
'reverse': 6, # n bytes backwards starting from s
})
"""
The long command is used when 5 bits aren't enough. Bits 2-4 contain a new control code.
Bits 0-1 are appended to a new byte as 8-9, allowing a 10-bit parameter.
"""
lz_commands.update({
'long': 7, # n is now 10 bits for a new control code
})
max_length = 1 << 10 # can't go higher than 10 bits
lowmax = 1 << 5 # standard 5-bit param
"""
If 0xff is encountered instead of a command, decompression ends.
"""
lz_end = 0xff
bit_flipped = [
sum(((byte >> i) & 1) << (7 - i) for i in xrange(8))
for byte in xrange(0x100)
]
class Compressed:
"""
Usage:
lz = Compressed(data).output
or
lz = Compressed().compress(data)
or
c = Compressed()
c.data = data
lz = c.compress()
There are some issues with reproducing the target compressor.
Some notes are listed here:
- the criteria for detecting a lookback is inconsistent
- sometimes lookbacks that are mostly 0s are pruned, sometimes not
- target appears to skip ahead if it can use a lookback soon, stopping the current command short or in some cases truncating it with literals.
- this has been implemented, but the specifics are unknown
- self.min_scores: It's unknown if blank's minimum score should be 1 or 2. Most likely it's 1, with some other hack to account for edge cases.
- may be related to the above
- target does not appear to compress backwards
"""
def __init__(self, *args, **kwargs):
self.min_scores = {
'blank': 1,
'iterate': 2,
'alternate': 3,
'repeat': 3,
'reverse': 3,
'flip': 3,
}
self.preference = [
'repeat',
'blank',
'flip',
'reverse',
'iterate',
'alternate',
#'literal',
]
self.lookback_methods = 'repeat', 'reverse', 'flip'
self.__dict__.update({
'data': None,
'commands': lz_commands,
'debug': False,
'literal_only': False,
})
self.arg_names = 'data', 'commands', 'debug', 'literal_only'
self.__dict__.update(kwargs)
self.__dict__.update(dict(zip(self.arg_names, args)))
if self.data is not None:
self.compress()
def compress(self, data=None):
if data is not None:
self.data = data
self.data = list(bytearray(self.data))
self.indexes = {}
self.lookbacks = {}
for method in self.lookback_methods:
self.lookbacks[method] = {}
self.address = 0
self.end = len(self.data)
self.output = []
self.literal = None
while self.address < self.end:
if self.score():
self.do_literal()
self.do_winner()
else:
if self.literal == None:
self.literal = self.address
self.address += 1
self.do_literal()
self.output += [lz_end]
return self.output
def reset_scores(self):
self.scores = {}
self.offsets = {}
self.helpers = {}
for method in self.min_scores.iterkeys():
self.scores[method] = 0
def bit_flip(self, byte):
return bit_flipped[byte]
def do_literal(self):
if self.literal != None:
length = abs(self.address - self.literal)
start = min(self.literal, self.address + 1)
self.helpers['literal'] = self.data[start:start+length]
self.do_cmd('literal', length)
self.literal = None
def score(self):
self.reset_scores()
map(self.score_literal, ['iterate', 'alternate', 'blank'])
for method in self.lookback_methods:
self.scores[method], self.offsets[method] = self.find_lookback(method, self.address)
# Compatibility:
# If a lookback is close, reduce the scores of other commands
best_method, best_score = max(
self.scores.items(),
key = lambda x: (
x[1],
-self.preference.index(x[0])
)
)
for method in self.lookback_methods:
for address in xrange(self.address+1, self.address+min(best_score, 6)):
if self.find_lookback(method, address)[0] > max(self.min_scores[method], best_score):
# BUG: lookbacks can reduce themselves. This appears to be a bug in the target also.
for m, score in self.scores.items():
self.scores[m] = min(score, address - self.address)
return any(
score
> self.min_scores[method] + int(score > lowmax)
for method, score in self.scores.iteritems()
)
def read(self, address=None):
if address is None:
address = self.address
if 0 <= address < len(self.data):
return self.data[address]
return None
def find_all_lookbacks(self):
for method in self.lookback_methods:
for address, byte in enumerate(self.data):
self.find_lookback(method, address)
def find_lookback(self, method, address=None):
if address is None:
address = self.address
existing = self.lookbacks.get(method, {}).get(address)
if existing != None:
return existing
lookback = 0, None
# Better to not carelessly optimize at the moment.
"""
if address < 2:
return lookback
"""
byte = self.read(address)
if byte is None:
return lookback
direction, mutate = {
'repeat': ( 1, int),
'reverse': (-1, int),
'flip': ( 1, self.bit_flip),
}[method]
# Doesn't seem to help
"""
if mutate == self.bit_flip:
if byte == 0:
self.lookbacks[method][address] = lookback
return lookback
"""
data_len = len(self.data)
is_two_byte_index = lambda index: int(index < address - 0x7f)
for index in self.get_indexes(mutate(byte)):
if index >= address:
break
old_length, old_index = lookback
if direction == 1:
if old_length > data_len - index: break
else:
if old_length > index: continue
if self.read(index) in [None]: continue
length = 1 # we know there's at least one match, or we wouldn't be checking this index
while 1:
this_byte = self.read(address + length)
that_byte = self.read(index + length * direction)
if that_byte == None or this_byte != mutate(that_byte):
break
length += 1
"""
if direction == 1:
if not any(self.data[address+2:address+length]): continue
"""
if length - is_two_byte_index(index) >= old_length - is_two_byte_index(old_index): # XXX >?
# XXX maybe avoid two-byte indexes when possible
lookback = length, index
self.lookbacks[method][address] = lookback
return lookback
def get_indexes(self, byte):
if not self.indexes.has_key(byte):
self.indexes[byte] = []
index = -1
while 1:
try:
index = self.data.index(byte, index + 1)
except ValueError:
break
self.indexes[byte].append(index)
return self.indexes[byte]
def score_literal(self, method):
address = self.address
compare = {
'blank': [0],
'iterate': [self.read(address)],
'alternate': [self.read(address), self.read(address + 1)],
}[method]
# XXX may or may not be correct
if method == 'alternate' and compare[0] == 0:
return
length = 0
while self.read(address + length) == compare[length % len(compare)]:
length += 1
self.scores[method] = length
self.helpers[method] = compare
def do_winner(self):
winners = filter(
lambda (method, score):
score
> self.min_scores[method] + int(score > lowmax),
self.scores.iteritems()
)
winners.sort(
key = lambda (method, score): (
-(score - self.min_scores[method] - int(score > lowmax)),
self.preference.index(method)
)
)
winner, score = winners[0]
length = min(score, max_length)
self.do_cmd(winner, length)
self.address += length
def do_cmd(self, cmd, length):
start_address = self.address
cmd_length = length - 1
output = []
if length > lowmax:
output.append(
(self.commands['long'] << 5)
+ (self.commands[cmd] << 2)
+ (cmd_length >> 8)
)
output.append(
cmd_length & 0xff
)
else:
output.append(
(self.commands[cmd] << 5)
+ cmd_length
)
self.helpers['blank'] = [] # quick hack
output += self.helpers.get(cmd, [])
if cmd in self.lookback_methods:
offset = self.offsets[cmd]
# Negative offsets are one byte.
# Positive offsets are two.
if start_address - offset <= 0x7f:
offset = start_address - offset + 0x80
offset -= 1 # this seems to work
output += [offset]
else:
output += [offset / 0x100, offset % 0x100] # big endian
if self.debug:
print ' '.join(map(str, [
cmd, length, '\t',
' '.join(map('{:02x}'.format, output)),
self.data[start_address:start_address+length] if cmd in self.lookback_methods else '',
]))
self.output += output
class Decompressed:
"""
Interpret and decompress lz-compressed data, usually 2bpp.
"""
"""
Usage:
data = Decompressed(lz).output
or
data = Decompressed().decompress(lz)
or
d = Decompressed()
d.lz = lz
data = d.decompress()
To decompress from offset 0x80000 in a rom:
data = Decompressed(rom, start=0x80000).output
"""
lz = None
start = 0
commands = lz_commands
debug = False
arg_names = 'lz', 'start', 'commands', 'debug'
def __init__(self, *args, **kwargs):
self.__dict__.update(dict(zip(self.arg_names, args)))
self.__dict__.update(kwargs)
self.command_names = dict(map(reversed, self.commands.items()))
self.address = self.start
if self.lz is not None:
self.decompress()
if self.debug: print self.command_list()
def command_list(self):
"""
Print a list of commands that were used. Useful for debugging.
"""
text = ''
for name, attrs in self.used_commands:
length = attrs['length']
address = attrs['address']
offset = attrs['offset']
direction = attrs['direction']
text += '{0}: {1}'.format(name, length)
text += '\t' + ' '.join(
'{:02x}'.format(int(byte))
for byte in self.lz[ address : address + attrs['cmd_length'] ]
)
if offset is not None:
repeated_data = self.output[ offset : offset + length * direction : direction ]
text += ' [' + ' '.join(map('{:02x}'.format, repeated_data)) + ']'
text += '\n'
return text
def decompress(self, lz=None):
if lz is not None:
self.lz = lz
self.lz = bytearray(self.lz)
self.used_commands = []
self.output = []
while 1:
cmd_address = self.address
self.offset = None
self.direction = None
if (self.byte == lz_end):
self.next()
break
self.cmd = (self.byte & 0b11100000) >> 5
if self.cmd_name == 'long':
# 10-bit length
self.cmd = (self.byte & 0b00011100) >> 2
self.length = (self.next() & 0b00000011) * 0x100
self.length += self.next() + 1
else:
# 5-bit length
self.length = (self.next() & 0b00011111) + 1
self.__class__.__dict__[self.cmd_name](self)
self.used_commands += [(
self.cmd_name,
{
'length': self.length,
'address': cmd_address,
'offset': self.offset,
'cmd_length': self.address - cmd_address,
'direction': self.direction,
}
)]
# Keep track of the data we just decompressed.
self.compressed_data = self.lz[self.start : self.address]
@property
def byte(self):
return self.lz[ self.address ]
def next(self):
byte = self.byte
self.address += 1
return byte
@property
def cmd_name(self):
return self.command_names.get(self.cmd)
def get_offset(self):
if self.byte >= 0x80: # negative
# negative
offset = self.next() & 0x7f
offset = len(self.output) - offset - 1
else:
# positive
offset = self.next() * 0x100
offset += self.next()
self.offset = offset
def literal(self):
"""
Copy data directly.
"""
self.output += self.lz[ self.address : self.address + self.length ]
self.address += self.length
def iterate(self):
"""
Write one byte repeatedly.
"""
self.output += [self.next()] * self.length
def alternate(self):
"""
Write alternating bytes.
"""
alts = [self.next(), self.next()]
self.output += [ alts[x & 1] for x in xrange(self.length) ]
def blank(self):
"""
Write zeros.
"""
self.output += [0] * self.length
def flip(self):
"""
Repeat flipped bytes from output.
Example: 11100100 -> 00100111
"""
self._repeat(table=bit_flipped)
def reverse(self):
"""
Repeat reversed bytes from output.
"""
self._repeat(direction=-1)
def repeat(self):
"""
Repeat bytes from output.
"""
self._repeat()
def _repeat(self, direction=1, table=None):
self.get_offset()
self.direction = direction
# Note: appends must be one at a time (this way, repeats can draw from themselves if required)
for i in xrange(self.length):
byte = self.output[ self.offset + i * direction ]
self.output.append( table[byte] if table else byte )

View File

@ -58,7 +58,7 @@ class BSSReader:
if token in ['ds', 'db', 'dw']:
if any(params):
length = eval(rgbasm_to_py(params[0]), self.constants)
length = eval(rgbasm_to_py(params[0]), self.constants.copy())
else:
length = {'ds': 1, 'db': 1, 'dw': 2}[token]
self.address += length
@ -172,7 +172,7 @@ class BSSReader:
real = split_line[index]
name, value = map(' '.join, [split_line[:index], split_line[index+1:]])
value = rgbasm_to_py(value)
self.constants[name] = eval(value, self.constants)
self.constants[name] = eval(value, self.constants.copy())
else:
self.read_bss_line(line)
@ -195,7 +195,7 @@ def scrape_constants(text):
bss = BSSReader()
bss.read_bss_sections(text)
constants = bss.constants
return constants
return {v: k for k, v in constants.items()}
def read_constants(filepath):
"""