Python 3 compatibility, fix decode errors. Closes #5

This commit is contained in:
William Toohey 2017-06-26 19:39:53 +10:00
parent e61daef7a3
commit 01eee84a1c
5 changed files with 129 additions and 99 deletions

View File

@ -2,6 +2,12 @@
An encoder/decoder for Konami's binary XML format, used in some of their games. An encoder/decoder for Konami's binary XML format, used in some of their games.
### Setup:
`pip install bitarray`
Python 2 only:
`pip install future`
```python ```python
In [1]: from kbinxml import KBinXML In [1]: from kbinxml import KBinXML
In [2]: text = KBinXML('<?xml version="1.0"?><root __type="str">Hello, world!</root>') In [2]: text = KBinXML('<?xml version="1.0"?><root __type="str">Hello, world!</root>')

View File

@ -7,6 +7,8 @@ class ByteBuffer():
if isinstance(input, bytearray): if isinstance(input, bytearray):
self.data = input self.data = input
else: else:
if not isinstance(input, bytes):
input = input.encode('utf-8')
self.data = bytearray(input) self.data = bytearray(input)
self.endian = endian self.endian = endian
self.offset = offset self.offset = offset
@ -34,14 +36,14 @@ class ByteBuffer():
def append(self, data, type, count = None): def append(self, data, type, count = None):
fmt = self._format_type(type, count) fmt = self._format_type(type, count)
self.offset += calcsize(fmt) self.offset += calcsize(fmt)
if isinstance(data, list) or isinstance(data, bytes) and type != 's': if count and count > 1 or isinstance(data, list):
self.data.extend(pack(fmt, *data)) self.data.extend(pack(fmt, *data))
else: else:
self.data.extend(pack(fmt, data)) self.data.extend(pack(fmt, data))
def set(self, data, offset, type, count = None): def set(self, data, offset, type, count = None):
fmt = self._format_type(type, count) fmt = self._format_type(type, count)
if isinstance(data, list) or isinstance(data, bytes) and type != 's': if count and count > 1 or isinstance(data, list):
pack_into(fmt, self.data, offset, *data) pack_into(fmt, self.data, offset, *data)
else: else:
pack_into(fmt, self.data, offset, data) pack_into(fmt, self.data, offset, data)
@ -92,7 +94,7 @@ def _make_set(fmt):
return self.set(data, offset, fmt) return self.set(data, offset, fmt)
return _method return _method
for name, fmt in typeMap.iteritems(): for name, fmt in typeMap.items():
_get = _make_get(fmt) _get = _make_get(fmt)
_peek = _make_peek(fmt) _peek = _make_peek(fmt)
_append = _make_append(fmt) _append = _make_append(fmt)

View File

@ -1,68 +1,87 @@
from struct import pack, unpack
def parseIP(string):
bunch = map(int, string.split('.'))
# pack to bytes
p = pack('4B', *bunch)
# unpack as u16
return unpack('>I', p)[0]
def writeIP(raw):
# pack to bytes
p = pack('>I', raw)
# unpack
return '.'.join(map(str, unpack('4B', p)))
def writeFloat(raw):
# this is just how floats get printed...
return '{0:.6f}'.format(raw)
xml_formats = { xml_formats = {
1 : { 'type' : None, 'count' : None, 'pyType' : None, 'names' : ['void']}, 1 : { 'names' : ['void']},
2 : { 'type' : 'b', 'count' : 1, 'pyType' : int, 'names' : ['s8']}, 2 : { 'type' : 'b', 'count' : 1, 'names' : ['s8']},
3 : { 'type' : 'B', 'count' : 1, 'pyType' : int, 'names' : ['u8']}, 3 : { 'type' : 'B', 'count' : 1, 'names' : ['u8']},
4 : { 'type' : 'h', 'count' : 1, 'pyType' : int, 'names' : ['s16']}, 4 : { 'type' : 'h', 'count' : 1, 'names' : ['s16']},
5 : { 'type' : 'H', 'count' : 1, 'pyType' : int, 'names' : ['u16']}, 5 : { 'type' : 'H', 'count' : 1, 'names' : ['u16']},
6 : { 'type' : 'i', 'count' : 1, 'pyType' : int, 'names' : ['s32']}, 6 : { 'type' : 'i', 'count' : 1, 'names' : ['s32']},
7 : { 'type' : 'I', 'count' : 1, 'pyType' : int, 'names' : ['u32']}, 7 : { 'type' : 'I', 'count' : 1, 'names' : ['u32']},
8 : { 'type' : 'q', 'count' : 1, 'pyType' : int, 'names' : ['s64']}, 8 : { 'type' : 'q', 'count' : 1, 'names' : ['s64']},
9 : { 'type' : 'Q', 'count' : 1, 'pyType' : int, 'names' : ['u64']}, 9 : { 'type' : 'Q', 'count' : 1, 'names' : ['u64']},
10 : { 'type' : 'c', 'count' : -1, 'pyType' : None, 'names' : ['bin', 'binary'], 'delimiter' : ''}, 10 : { 'type' : 'B', 'count' : -1, 'names' : ['bin', 'binary'], 'fromStr' : None},
11 : { 'type' : 's', 'count' : -1, 'pyType' : None, 'names' : ['str', 'string'], 'delimiter' : ''}, 11 : { 'type' : 'B', 'count' : -1, 'names' : ['str', 'string'], 'fromStr' : None},
12 : { 'type' : 'B', 'count' : 4, 'pyType' : int, 'names' : ['ip4'], 'delimiter' : '.'}, 12 : { 'type' : 'I', 'count' : 1, 'names' : ['ip4'], 'fromStr' : parseIP, 'toStr' : writeIP},
13 : { 'type' : 'I', 'count' : 1, 'pyType' : int, 'names' : ['time']}, # todo: how to print 13 : { 'type' : 'I', 'count' : 1, 'names' : ['time']}, # unix timestamp
14 : { 'type' : 'f', 'count' : 1, 'pyType' : float, 'names' : ['float', 'f']}, 14 : { 'type' : 'f', 'count' : 1, 'names' : ['float', 'f'], 'fromStr' : float, 'toStr' : writeFloat},
15 : { 'type' : 'd', 'count' : 1, 'pyType' : float, 'names' : ['double', 'd']}, 15 : { 'type' : 'd', 'count' : 1, 'names' : ['double', 'd'], 'fromStr' : float, 'toStr' : writeFloat},
16 : { 'type' : 'b', 'count' : 2, 'pyType' : int, 'names' : ['2s8']}, 16 : { 'type' : 'b', 'count' : 2, 'names' : ['2s8']},
17 : { 'type' : 'B', 'count' : 2, 'pyType' : int, 'names' : ['2u8']}, 17 : { 'type' : 'B', 'count' : 2, 'names' : ['2u8']},
18 : { 'type' : 'h', 'count' : 2, 'pyType' : int, 'names' : ['2s16']}, 18 : { 'type' : 'h', 'count' : 2, 'names' : ['2s16']},
19 : { 'type' : 'H', 'count' : 2, 'pyType' : int, 'names' : ['2u16']}, 19 : { 'type' : 'H', 'count' : 2, 'names' : ['2u16']},
20 : { 'type' : 'i', 'count' : 2, 'pyType' : int, 'names' : ['2s32']}, 20 : { 'type' : 'i', 'count' : 2, 'names' : ['2s32']},
21 : { 'type' : 'I', 'count' : 2, 'pyType' : int, 'names' : ['2u32']}, 21 : { 'type' : 'I', 'count' : 2, 'names' : ['2u32']},
22 : { 'type' : 'q', 'count' : 2, 'pyType' : int, 'names' : ['2s64', 'vs64']}, 22 : { 'type' : 'q', 'count' : 2, 'names' : ['2s64', 'vs64']},
23 : { 'type' : 'Q', 'count' : 2, 'pyType' : int, 'names' : ['2u64', 'vu64']}, 23 : { 'type' : 'Q', 'count' : 2, 'names' : ['2u64', 'vu64']},
24 : { 'type' : 'f', 'count' : 2, 'pyType' : float, 'names' : ['2f']}, 24 : { 'type' : 'f', 'count' : 2, 'names' : ['2f'], 'fromStr' : float, 'toStr' : writeFloat},
25 : { 'type' : 'd', 'count' : 2, 'pyType' : float, 'names' : ['2d', 'vd']}, 25 : { 'type' : 'd', 'count' : 2, 'names' : ['2d', 'vd'], 'fromStr' : float, 'toStr' : writeFloat},
26 : { 'type' : 'b', 'count' : 3, 'pyType' : int, 'names' : ['3s8']}, 26 : { 'type' : 'b', 'count' : 3, 'names' : ['3s8']},
27 : { 'type' : 'B', 'count' : 3, 'pyType' : int, 'names' : ['3u8']}, 27 : { 'type' : 'B', 'count' : 3, 'names' : ['3u8']},
28 : { 'type' : 'h', 'count' : 3, 'pyType' : int, 'names' : ['3s16']}, 28 : { 'type' : 'h', 'count' : 3, 'names' : ['3s16']},
29 : { 'type' : 'H', 'count' : 3, 'pyType' : int, 'names' : ['3u16']}, 29 : { 'type' : 'H', 'count' : 3, 'names' : ['3u16']},
30 : { 'type' : 'i', 'count' : 3, 'pyType' : int, 'names' : ['3s32']}, 30 : { 'type' : 'i', 'count' : 3, 'names' : ['3s32']},
31 : { 'type' : 'I', 'count' : 3, 'pyType' : int, 'names' : ['3u32']}, 31 : { 'type' : 'I', 'count' : 3, 'names' : ['3u32']},
32 : { 'type' : 'q', 'count' : 3, 'pyType' : int, 'names' : ['3s64']}, 32 : { 'type' : 'q', 'count' : 3, 'names' : ['3s64']},
33 : { 'type' : 'Q', 'count' : 3, 'pyType' : int, 'names' : ['3u64']}, 33 : { 'type' : 'Q', 'count' : 3, 'names' : ['3u64']},
34 : { 'type' : 'f', 'count' : 3, 'pyType' : float, 'names' : ['3f']}, 34 : { 'type' : 'f', 'count' : 3, 'names' : ['3f'], 'fromStr' : float, 'toStr' : writeFloat},
35 : { 'type' : 'd', 'count' : 3, 'pyType' : float, 'names' : ['3d']}, 35 : { 'type' : 'd', 'count' : 3, 'names' : ['3d'], 'fromStr' : float, 'toStr' : writeFloat},
36 : { 'type' : 'b', 'count' : 4, 'pyType' : int, 'names' : ['4s8']}, 36 : { 'type' : 'b', 'count' : 4, 'names' : ['4s8']},
37 : { 'type' : 'B', 'count' : 4, 'pyType' : int, 'names' : ['4u8']}, 37 : { 'type' : 'B', 'count' : 4, 'names' : ['4u8']},
38 : { 'type' : 'h', 'count' : 4, 'pyType' : int, 'names' : ['4s16']}, 38 : { 'type' : 'h', 'count' : 4, 'names' : ['4s16']},
39 : { 'type' : 'H', 'count' : 4, 'pyType' : int, 'names' : ['4u16']}, 39 : { 'type' : 'H', 'count' : 4, 'names' : ['4u16']},
40 : { 'type' : 'i', 'count' : 4, 'pyType' : int, 'names' : ['4s32', 'vs32']}, 40 : { 'type' : 'i', 'count' : 4, 'names' : ['4s32', 'vs32']},
41 : { 'type' : 'I', 'count' : 4, 'pyType' : int, 'names' : ['4u32', 'vu32']}, 41 : { 'type' : 'I', 'count' : 4, 'names' : ['4u32', 'vu32']},
42 : { 'type' : 'q', 'count' : 4, 'pyType' : int, 'names' : ['4s64']}, 42 : { 'type' : 'q', 'count' : 4, 'names' : ['4s64']},
43 : { 'type' : 'Q', 'count' : 4, 'pyType' : int, 'names' : ['4u64']}, 43 : { 'type' : 'Q', 'count' : 4, 'names' : ['4u64']},
44 : { 'type' : 'f', 'count' : 4, 'pyType' : float, 'names' : ['4f', 'vf']}, 44 : { 'type' : 'f', 'count' : 4, 'names' : ['4f', 'vf'], 'fromStr' : float, 'toStr' : writeFloat},
45 : { 'type' : 'd', 'count' : 4, 'pyType' : float, 'names' : ['4d']}, 45 : { 'type' : 'd', 'count' : 4, 'names' : ['4d'], 'fromStr' : float, 'toStr' : writeFloat},
46 : { 'type' : None, 'count' : None, 'pyType' : None, 'names' : ['attr']}, 46 : { 'names' : ['attr']},
#47 : { 'type' : None, 'count' : None, 'pyType' : None, 'names' : ['array']}, #47 : { 'names' : ['array']}, # TODO: how does this work?
48 : { 'type' : 'b', 'count' : 16, 'pyType' : int, 'names' : ['vs8']}, 48 : { 'type' : 'b', 'count' : 16, 'names' : ['vs8']},
49 : { 'type' : 'B', 'count' : 16, 'pyType' : int, 'names' : ['vu8']}, 49 : { 'type' : 'B', 'count' : 16, 'names' : ['vu8']},
50 : { 'type' : 'h', 'count' : 8, 'pyType' : int, 'names' : ['vs16']}, 50 : { 'type' : 'h', 'count' : 8, 'names' : ['vs16']},
51 : { 'type' : 'H', 'count' : 8, 'pyType' : int, 'names' : ['vu16']}, 51 : { 'type' : 'H', 'count' : 8, 'names' : ['vu16']},
52 : { 'type' : 'b', 'count' : 1, 'pyType' : int, 'names' : ['bool', 'b']}, 52 : { 'type' : 'b', 'count' : 1, 'names' : ['bool', 'b']},
53 : { 'type' : 'b', 'count' : 2, 'pyType' : int, 'names' : ['2b']}, 53 : { 'type' : 'b', 'count' : 2, 'names' : ['2b']},
54 : { 'type' : 'b', 'count' : 3, 'pyType' : int, 'names' : ['3b']}, 54 : { 'type' : 'b', 'count' : 3, 'names' : ['3b']},
55 : { 'type' : 'b', 'count' : 4, 'pyType' : int, 'names' : ['4b']}, 55 : { 'type' : 'b', 'count' : 4, 'names' : ['4b']},
56 : { 'type' : 'b', 'count' : 16, 'pyType' : int, 'names' : ['vb']} 56 : { 'type' : 'b', 'count' : 16, 'names' : ['vb']}
} }
# little less boilerplate for writing # little less boilerplate for writing
for key, val in xml_formats.iteritems(): for key, val in xml_formats.items():
xml_formats[key]['name'] = xml_formats[key]['names'][0] xml_formats[key]['name'] = xml_formats[key]['names'][0]
xml_types = {} xml_types = {}
for key, val in xml_formats.iteritems(): for key, val in xml_formats.items():
for n in val['names']: for n in val['names']:
xml_types[n] = key xml_types[n] = key
xml_types['nodeStart'] = 1 xml_types['nodeStart'] = 1

View File

@ -1,3 +1,5 @@
# python 3 style, ints instead of b''
from builtins import bytes
from xml.dom import minidom from xml.dom import minidom
from struct import calcsize from struct import calcsize
import string import string
@ -8,6 +10,8 @@ from bytebuffer import ByteBuffer
from sixbit import pack_sixbit, unpack_sixbit from sixbit import pack_sixbit, unpack_sixbit
from format_ids import xml_formats, xml_types from format_ids import xml_formats, xml_types
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
DEBUG_OFFSETS = False DEBUG_OFFSETS = False
DEBUG = False DEBUG = False
@ -32,7 +36,7 @@ encoding_vals = {val : key for key, val in encoding_strings.items()}
def debug_print(string): def debug_print(string):
if DEBUG: if DEBUG:
print string print(string)
class KBinXML(): class KBinXML():
@ -69,15 +73,10 @@ class KBinXML():
def data_grab_string(self): def data_grab_string(self):
data = self.data_grab_auto() data = self.data_grab_auto()
res = '' return bytes(data[:-1]).decode(self.encoding)
for b in data:
if b == 0:
break
res += chr(b)
return res.decode(self.encoding)
def data_append_string(self, string): def data_append_string(self, string):
string = string.encode(self.encoding) + '\0' string = bytes(string.encode(self.encoding) + b'\0')
self.data_append_auto(string) self.data_append_auto(string)
# has its own separate state and other assorted garbage # has its own separate state and other assorted garbage
@ -122,6 +121,8 @@ class KBinXML():
self.dataBuf.realign_writes() self.dataBuf.realign_writes()
def _node_to_binary(self, node): def _node_to_binary(self, node):
if node.nodeType == node.TEXT_NODE or node.nodeType == node.COMMENT_NODE:
return
nodeType = node.getAttribute('__type') nodeType = node.getAttribute('__type')
if not nodeType: if not nodeType:
nodeType = 'void' nodeType = 'void'
@ -145,10 +146,12 @@ class KBinXML():
if fmt['name'] == 'bin': if fmt['name'] == 'bin':
data = bytes(bytearray.fromhex(val)) data = bytes(bytearray.fromhex(val))
elif fmt['name'] == 'str': elif fmt['name'] == 'str':
data = val.encode(self.encoding) + '\0' data = bytes(val.encode(self.encoding) + b'\0')
else: else:
val = val.split(fmt.get('delimiter', ' ')) val = val.split(' ')
data = map(fmt['pyType'], val) data = list(map(fmt.get('fromStr', int), val))
if count and len(data) / fmt['count'] != count:
raise ValueError('Array length does not match __count attribute')
if isArray or fmt['count'] == -1: if isArray or fmt['count'] == -1:
self.dataBuf.append_u32(len(data) * calcsize(fmt['type'])) self.dataBuf.append_u32(len(data) * calcsize(fmt['type']))
@ -157,7 +160,7 @@ class KBinXML():
else: else:
self.data_append_aligned(data, fmt['type'], fmt['count']) self.data_append_aligned(data, fmt['type'], fmt['count'])
# for consistency and to be more faithful # for test consistency and to be more faithful, sort the attrs
sorted_attrs = sorted(node.attributes.items(), key=operator.itemgetter(0)) sorted_attrs = sorted(node.attributes.items(), key=operator.itemgetter(0))
for key, value in sorted_attrs: for key, value in sorted_attrs:
if key not in ['__type', '__size', '__count']: if key not in ['__type', '__size', '__count']:
@ -166,8 +169,7 @@ class KBinXML():
pack_sixbit(key, self.nodeBuf) pack_sixbit(key, self.nodeBuf)
for child in node.childNodes: for child in node.childNodes:
if child.nodeType != child.TEXT_NODE: self._node_to_binary(child)
self._node_to_binary(child)
# always has the isArray bit set # always has the isArray bit set
self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64) self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64)
@ -270,42 +272,42 @@ class KBinXML():
node.setAttribute('__type', nodeFormat['name']) node.setAttribute('__type', nodeFormat['name'])
if isArray:
arrayCount = self.dataBuf.get_u32() / calcsize(nodeFormat['type'])
node.setAttribute('__count', str(arrayCount))
else:
arrayCount = 1
varCount = nodeFormat['count'] varCount = nodeFormat['count']
if varCount == -1: arrayCount = 1
if varCount == -1: # the 2 cannot be combined
varCount = self.dataBuf.get_u32() varCount = self.dataBuf.get_u32()
isArray = True
elif isArray:
arrayCount = self.dataBuf.get_u32() // (calcsize(nodeFormat['type'] * varCount))
node.setAttribute('__count', str(arrayCount))
totalCount = arrayCount * varCount totalCount = arrayCount * varCount
delim = nodeFormat.get('delimiter', ' ') if isArray:
if isArray or nodeFormat['count'] == -1:
data = self.dataBuf.get(nodeFormat['type'], totalCount) data = self.dataBuf.get(nodeFormat['type'], totalCount)
self.dataBuf.realign_reads() self.dataBuf.realign_reads()
else: else:
data = self.data_grab_aligned(nodeFormat['type'], totalCount) data = self.data_grab_aligned(nodeFormat['type'], totalCount)
string = delim.join(map(str, data))
if nodeType == xml_types['binary']: if nodeType == xml_types['binary']:
node.setAttribute('__size', str(totalCount)) node.setAttribute('__size', str(totalCount))
string = ''.join(('{0:02x}'.format(ord(x)) for x in string)) string = ''.join(('{0:02x}'.format(x) for x in data))
elif nodeType == xml_types['string']: elif nodeType == xml_types['string']:
string = string[:-1].decode(self.encoding) string = bytes(data[:-1]).decode(self.encoding)
else:
string = ' '.join(map(nodeFormat.get('toStr', str), data))
node.appendChild(self.xml_doc.createTextNode(string)) node.appendChild(self.xml_doc.createTextNode(string))
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) != 2: if len(sys.argv) != 2:
print 'bin_xml.py file.[xml/bin]' print('bin_xml.py file.[xml/bin]')
exit()
with open(sys.argv[1:], 'rb') as f: with open(sys.argv[1], 'rb') as f:
input = f.read() input = f.read()
xml = KBinXML(input) xml = KBinXML(input)
if KBinXML.is_binary_xml(input): if KBinXML.is_binary_xml(input):
print xml.to_text() stdout.write(xml.to_text())
else: else:
print xml.to_binary() stdout.write(xml.to_binary())

View File

@ -1,27 +1,28 @@
# python 3 style, ints instead of b''
from builtins import bytes
from bitarray import bitarray from bitarray import bitarray
def pack_sixbit(string, byteBuf): def pack_sixbit(string, byteBuf):
chars = str_to_sixbit(string) chars = str_to_sixbit(string)
bits = bitarray(endian='big') bits = bitarray(endian='big')
for c in chars: for c in chars:
bits.frombytes(c) bits.frombytes(c.encode())
# leave only the 6 bits we care for # leave only the 6 bits we care for
del bits[-8:-6] del bits[-8:-6]
data = bits.tobytes() data = bytes(bits.tobytes())
byteBuf.append_u8(len(string)) byteBuf.append_u8(len(string))
byteBuf.append(data, 'c', len(data)) byteBuf.append(data, 'B', len(data))
def unpack_sixbit(byteBuf): def unpack_sixbit(byteBuf):
bitBuf = bitarray(endian='big')
bitBuf.frombytes(bytes(byteBuf.data))
length = byteBuf.get_u8() length = byteBuf.get_u8()
length_bytes = (length * 6 + 7) // 8
bitBuf = bitarray(endian='big')
bitBuf.frombytes(bytes(byteBuf.get('B', length_bytes)))
result = [] result = []
offset = byteBuf.offset * 8 offset = 0
for i in range(length): for i in range(length):
result.append(ord(bitBuf[offset:offset+6].tobytes()) >> (8 - 6)) result.append(ord(bitBuf[offset:offset+6].tobytes()) >> (8 - 6))
offset += 6 offset += 6
# padding
byteBuf.offset += (length * 6 + 7) // 8
return sixbit_to_str(result) return sixbit_to_str(result)
# 0-9 for numbers, 10 is ':', 11 to 36 for capitals, 37 for underscore, 38-63 for lowercase # 0-9 for numbers, 10 is ':', 11 to 36 for capitals, 37 for underscore, 38-63 for lowercase