Python 3 compatibility, fix decode errors. Closes #5

This commit is contained in:
William Toohey 2017-06-26 19:39:53 +10:00
parent e61daef7a3
commit 01eee84a1c
5 changed files with 129 additions and 99 deletions

View File

@ -2,6 +2,12 @@
An encoder/decoder for Konami's binary XML format, used in some of their games.
### Setup:
`pip install bitarray`
Python 2 only:
`pip install future`
```python
In [1]: from kbinxml import KBinXML
In [2]: text = KBinXML('<?xml version="1.0"?><root __type="str">Hello, world!</root>')

View File

@ -7,6 +7,8 @@ class ByteBuffer():
if isinstance(input, bytearray):
self.data = input
else:
if not isinstance(input, bytes):
input = input.encode('utf-8')
self.data = bytearray(input)
self.endian = endian
self.offset = offset
@ -34,14 +36,14 @@ class ByteBuffer():
def append(self, data, type, count = None):
fmt = self._format_type(type, count)
self.offset += calcsize(fmt)
if isinstance(data, list) or isinstance(data, bytes) and type != 's':
if count and count > 1 or isinstance(data, list):
self.data.extend(pack(fmt, *data))
else:
self.data.extend(pack(fmt, data))
def set(self, data, offset, type, count = None):
fmt = self._format_type(type, count)
if isinstance(data, list) or isinstance(data, bytes) and type != 's':
if count and count > 1 or isinstance(data, list):
pack_into(fmt, self.data, offset, *data)
else:
pack_into(fmt, self.data, offset, data)
@ -92,7 +94,7 @@ def _make_set(fmt):
return self.set(data, offset, fmt)
return _method
for name, fmt in typeMap.iteritems():
for name, fmt in typeMap.items():
_get = _make_get(fmt)
_peek = _make_peek(fmt)
_append = _make_append(fmt)

View File

@ -1,68 +1,87 @@
from struct import pack, unpack
def parseIP(string):
bunch = map(int, string.split('.'))
# pack to bytes
p = pack('4B', *bunch)
# unpack as u16
return unpack('>I', p)[0]
def writeIP(raw):
# pack to bytes
p = pack('>I', raw)
# unpack
return '.'.join(map(str, unpack('4B', p)))
def writeFloat(raw):
# this is just how floats get printed...
return '{0:.6f}'.format(raw)
xml_formats = {
1 : { 'type' : None, 'count' : None, 'pyType' : None, 'names' : ['void']},
2 : { 'type' : 'b', 'count' : 1, 'pyType' : int, 'names' : ['s8']},
3 : { 'type' : 'B', 'count' : 1, 'pyType' : int, 'names' : ['u8']},
4 : { 'type' : 'h', 'count' : 1, 'pyType' : int, 'names' : ['s16']},
5 : { 'type' : 'H', 'count' : 1, 'pyType' : int, 'names' : ['u16']},
6 : { 'type' : 'i', 'count' : 1, 'pyType' : int, 'names' : ['s32']},
7 : { 'type' : 'I', 'count' : 1, 'pyType' : int, 'names' : ['u32']},
8 : { 'type' : 'q', 'count' : 1, 'pyType' : int, 'names' : ['s64']},
9 : { 'type' : 'Q', 'count' : 1, 'pyType' : int, 'names' : ['u64']},
10 : { 'type' : 'c', 'count' : -1, 'pyType' : None, 'names' : ['bin', 'binary'], 'delimiter' : ''},
11 : { 'type' : 's', 'count' : -1, 'pyType' : None, 'names' : ['str', 'string'], 'delimiter' : ''},
12 : { 'type' : 'B', 'count' : 4, 'pyType' : int, 'names' : ['ip4'], 'delimiter' : '.'},
13 : { 'type' : 'I', 'count' : 1, 'pyType' : int, 'names' : ['time']}, # todo: how to print
14 : { 'type' : 'f', 'count' : 1, 'pyType' : float, 'names' : ['float', 'f']},
15 : { 'type' : 'd', 'count' : 1, 'pyType' : float, 'names' : ['double', 'd']},
16 : { 'type' : 'b', 'count' : 2, 'pyType' : int, 'names' : ['2s8']},
17 : { 'type' : 'B', 'count' : 2, 'pyType' : int, 'names' : ['2u8']},
18 : { 'type' : 'h', 'count' : 2, 'pyType' : int, 'names' : ['2s16']},
19 : { 'type' : 'H', 'count' : 2, 'pyType' : int, 'names' : ['2u16']},
20 : { 'type' : 'i', 'count' : 2, 'pyType' : int, 'names' : ['2s32']},
21 : { 'type' : 'I', 'count' : 2, 'pyType' : int, 'names' : ['2u32']},
22 : { 'type' : 'q', 'count' : 2, 'pyType' : int, 'names' : ['2s64', 'vs64']},
23 : { 'type' : 'Q', 'count' : 2, 'pyType' : int, 'names' : ['2u64', 'vu64']},
24 : { 'type' : 'f', 'count' : 2, 'pyType' : float, 'names' : ['2f']},
25 : { 'type' : 'd', 'count' : 2, 'pyType' : float, 'names' : ['2d', 'vd']},
26 : { 'type' : 'b', 'count' : 3, 'pyType' : int, 'names' : ['3s8']},
27 : { 'type' : 'B', 'count' : 3, 'pyType' : int, 'names' : ['3u8']},
28 : { 'type' : 'h', 'count' : 3, 'pyType' : int, 'names' : ['3s16']},
29 : { 'type' : 'H', 'count' : 3, 'pyType' : int, 'names' : ['3u16']},
30 : { 'type' : 'i', 'count' : 3, 'pyType' : int, 'names' : ['3s32']},
31 : { 'type' : 'I', 'count' : 3, 'pyType' : int, 'names' : ['3u32']},
32 : { 'type' : 'q', 'count' : 3, 'pyType' : int, 'names' : ['3s64']},
33 : { 'type' : 'Q', 'count' : 3, 'pyType' : int, 'names' : ['3u64']},
34 : { 'type' : 'f', 'count' : 3, 'pyType' : float, 'names' : ['3f']},
35 : { 'type' : 'd', 'count' : 3, 'pyType' : float, 'names' : ['3d']},
36 : { 'type' : 'b', 'count' : 4, 'pyType' : int, 'names' : ['4s8']},
37 : { 'type' : 'B', 'count' : 4, 'pyType' : int, 'names' : ['4u8']},
38 : { 'type' : 'h', 'count' : 4, 'pyType' : int, 'names' : ['4s16']},
39 : { 'type' : 'H', 'count' : 4, 'pyType' : int, 'names' : ['4u16']},
40 : { 'type' : 'i', 'count' : 4, 'pyType' : int, 'names' : ['4s32', 'vs32']},
41 : { 'type' : 'I', 'count' : 4, 'pyType' : int, 'names' : ['4u32', 'vu32']},
42 : { 'type' : 'q', 'count' : 4, 'pyType' : int, 'names' : ['4s64']},
43 : { 'type' : 'Q', 'count' : 4, 'pyType' : int, 'names' : ['4u64']},
44 : { 'type' : 'f', 'count' : 4, 'pyType' : float, 'names' : ['4f', 'vf']},
45 : { 'type' : 'd', 'count' : 4, 'pyType' : float, 'names' : ['4d']},
46 : { 'type' : None, 'count' : None, 'pyType' : None, 'names' : ['attr']},
#47 : { 'type' : None, 'count' : None, 'pyType' : None, 'names' : ['array']},
48 : { 'type' : 'b', 'count' : 16, 'pyType' : int, 'names' : ['vs8']},
49 : { 'type' : 'B', 'count' : 16, 'pyType' : int, 'names' : ['vu8']},
50 : { 'type' : 'h', 'count' : 8, 'pyType' : int, 'names' : ['vs16']},
51 : { 'type' : 'H', 'count' : 8, 'pyType' : int, 'names' : ['vu16']},
52 : { 'type' : 'b', 'count' : 1, 'pyType' : int, 'names' : ['bool', 'b']},
53 : { 'type' : 'b', 'count' : 2, 'pyType' : int, 'names' : ['2b']},
54 : { 'type' : 'b', 'count' : 3, 'pyType' : int, 'names' : ['3b']},
55 : { 'type' : 'b', 'count' : 4, 'pyType' : int, 'names' : ['4b']},
56 : { 'type' : 'b', 'count' : 16, 'pyType' : int, 'names' : ['vb']}
1 : { 'names' : ['void']},
2 : { 'type' : 'b', 'count' : 1, 'names' : ['s8']},
3 : { 'type' : 'B', 'count' : 1, 'names' : ['u8']},
4 : { 'type' : 'h', 'count' : 1, 'names' : ['s16']},
5 : { 'type' : 'H', 'count' : 1, 'names' : ['u16']},
6 : { 'type' : 'i', 'count' : 1, 'names' : ['s32']},
7 : { 'type' : 'I', 'count' : 1, 'names' : ['u32']},
8 : { 'type' : 'q', 'count' : 1, 'names' : ['s64']},
9 : { 'type' : 'Q', 'count' : 1, 'names' : ['u64']},
10 : { 'type' : 'B', 'count' : -1, 'names' : ['bin', 'binary'], 'fromStr' : None},
11 : { 'type' : 'B', 'count' : -1, 'names' : ['str', 'string'], 'fromStr' : None},
12 : { 'type' : 'I', 'count' : 1, 'names' : ['ip4'], 'fromStr' : parseIP, 'toStr' : writeIP},
13 : { 'type' : 'I', 'count' : 1, 'names' : ['time']}, # unix timestamp
14 : { 'type' : 'f', 'count' : 1, 'names' : ['float', 'f'], 'fromStr' : float, 'toStr' : writeFloat},
15 : { 'type' : 'd', 'count' : 1, 'names' : ['double', 'd'], 'fromStr' : float, 'toStr' : writeFloat},
16 : { 'type' : 'b', 'count' : 2, 'names' : ['2s8']},
17 : { 'type' : 'B', 'count' : 2, 'names' : ['2u8']},
18 : { 'type' : 'h', 'count' : 2, 'names' : ['2s16']},
19 : { 'type' : 'H', 'count' : 2, 'names' : ['2u16']},
20 : { 'type' : 'i', 'count' : 2, 'names' : ['2s32']},
21 : { 'type' : 'I', 'count' : 2, 'names' : ['2u32']},
22 : { 'type' : 'q', 'count' : 2, 'names' : ['2s64', 'vs64']},
23 : { 'type' : 'Q', 'count' : 2, 'names' : ['2u64', 'vu64']},
24 : { 'type' : 'f', 'count' : 2, 'names' : ['2f'], 'fromStr' : float, 'toStr' : writeFloat},
25 : { 'type' : 'd', 'count' : 2, 'names' : ['2d', 'vd'], 'fromStr' : float, 'toStr' : writeFloat},
26 : { 'type' : 'b', 'count' : 3, 'names' : ['3s8']},
27 : { 'type' : 'B', 'count' : 3, 'names' : ['3u8']},
28 : { 'type' : 'h', 'count' : 3, 'names' : ['3s16']},
29 : { 'type' : 'H', 'count' : 3, 'names' : ['3u16']},
30 : { 'type' : 'i', 'count' : 3, 'names' : ['3s32']},
31 : { 'type' : 'I', 'count' : 3, 'names' : ['3u32']},
32 : { 'type' : 'q', 'count' : 3, 'names' : ['3s64']},
33 : { 'type' : 'Q', 'count' : 3, 'names' : ['3u64']},
34 : { 'type' : 'f', 'count' : 3, 'names' : ['3f'], 'fromStr' : float, 'toStr' : writeFloat},
35 : { 'type' : 'd', 'count' : 3, 'names' : ['3d'], 'fromStr' : float, 'toStr' : writeFloat},
36 : { 'type' : 'b', 'count' : 4, 'names' : ['4s8']},
37 : { 'type' : 'B', 'count' : 4, 'names' : ['4u8']},
38 : { 'type' : 'h', 'count' : 4, 'names' : ['4s16']},
39 : { 'type' : 'H', 'count' : 4, 'names' : ['4u16']},
40 : { 'type' : 'i', 'count' : 4, 'names' : ['4s32', 'vs32']},
41 : { 'type' : 'I', 'count' : 4, 'names' : ['4u32', 'vu32']},
42 : { 'type' : 'q', 'count' : 4, 'names' : ['4s64']},
43 : { 'type' : 'Q', 'count' : 4, 'names' : ['4u64']},
44 : { 'type' : 'f', 'count' : 4, 'names' : ['4f', 'vf'], 'fromStr' : float, 'toStr' : writeFloat},
45 : { 'type' : 'd', 'count' : 4, 'names' : ['4d'], 'fromStr' : float, 'toStr' : writeFloat},
46 : { 'names' : ['attr']},
#47 : { 'names' : ['array']}, # TODO: how does this work?
48 : { 'type' : 'b', 'count' : 16, 'names' : ['vs8']},
49 : { 'type' : 'B', 'count' : 16, 'names' : ['vu8']},
50 : { 'type' : 'h', 'count' : 8, 'names' : ['vs16']},
51 : { 'type' : 'H', 'count' : 8, 'names' : ['vu16']},
52 : { 'type' : 'b', 'count' : 1, 'names' : ['bool', 'b']},
53 : { 'type' : 'b', 'count' : 2, 'names' : ['2b']},
54 : { 'type' : 'b', 'count' : 3, 'names' : ['3b']},
55 : { 'type' : 'b', 'count' : 4, 'names' : ['4b']},
56 : { 'type' : 'b', 'count' : 16, 'names' : ['vb']}
}
# little less boilerplate for writing
for key, val in xml_formats.iteritems():
for key, val in xml_formats.items():
xml_formats[key]['name'] = xml_formats[key]['names'][0]
xml_types = {}
for key, val in xml_formats.iteritems():
for key, val in xml_formats.items():
for n in val['names']:
xml_types[n] = key
xml_types['nodeStart'] = 1

View File

@ -1,3 +1,5 @@
# python 3 style, ints instead of b''
from builtins import bytes
from xml.dom import minidom
from struct import calcsize
import string
@ -8,6 +10,8 @@ from bytebuffer import ByteBuffer
from sixbit import pack_sixbit, unpack_sixbit
from format_ids import xml_formats, xml_types
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
DEBUG_OFFSETS = False
DEBUG = False
@ -32,7 +36,7 @@ encoding_vals = {val : key for key, val in encoding_strings.items()}
def debug_print(string):
if DEBUG:
print string
print(string)
class KBinXML():
@ -69,15 +73,10 @@ class KBinXML():
def data_grab_string(self):
data = self.data_grab_auto()
res = ''
for b in data:
if b == 0:
break
res += chr(b)
return res.decode(self.encoding)
return bytes(data[:-1]).decode(self.encoding)
def data_append_string(self, string):
string = string.encode(self.encoding) + '\0'
string = bytes(string.encode(self.encoding) + b'\0')
self.data_append_auto(string)
# has its own separate state and other assorted garbage
@ -122,6 +121,8 @@ class KBinXML():
self.dataBuf.realign_writes()
def _node_to_binary(self, node):
if node.nodeType == node.TEXT_NODE or node.nodeType == node.COMMENT_NODE:
return
nodeType = node.getAttribute('__type')
if not nodeType:
nodeType = 'void'
@ -145,10 +146,12 @@ class KBinXML():
if fmt['name'] == 'bin':
data = bytes(bytearray.fromhex(val))
elif fmt['name'] == 'str':
data = val.encode(self.encoding) + '\0'
data = bytes(val.encode(self.encoding) + b'\0')
else:
val = val.split(fmt.get('delimiter', ' '))
data = map(fmt['pyType'], val)
val = val.split(' ')
data = list(map(fmt.get('fromStr', int), val))
if count and len(data) / fmt['count'] != count:
raise ValueError('Array length does not match __count attribute')
if isArray or fmt['count'] == -1:
self.dataBuf.append_u32(len(data) * calcsize(fmt['type']))
@ -157,7 +160,7 @@ class KBinXML():
else:
self.data_append_aligned(data, fmt['type'], fmt['count'])
# for consistency and to be more faithful
# for test consistency and to be more faithful, sort the attrs
sorted_attrs = sorted(node.attributes.items(), key=operator.itemgetter(0))
for key, value in sorted_attrs:
if key not in ['__type', '__size', '__count']:
@ -166,8 +169,7 @@ class KBinXML():
pack_sixbit(key, self.nodeBuf)
for child in node.childNodes:
if child.nodeType != child.TEXT_NODE:
self._node_to_binary(child)
self._node_to_binary(child)
# always has the isArray bit set
self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64)
@ -270,42 +272,42 @@ class KBinXML():
node.setAttribute('__type', nodeFormat['name'])
if isArray:
arrayCount = self.dataBuf.get_u32() / calcsize(nodeFormat['type'])
node.setAttribute('__count', str(arrayCount))
else:
arrayCount = 1
varCount = nodeFormat['count']
if varCount == -1:
arrayCount = 1
if varCount == -1: # the 2 cannot be combined
varCount = self.dataBuf.get_u32()
isArray = True
elif isArray:
arrayCount = self.dataBuf.get_u32() // (calcsize(nodeFormat['type'] * varCount))
node.setAttribute('__count', str(arrayCount))
totalCount = arrayCount * varCount
delim = nodeFormat.get('delimiter', ' ')
if isArray or nodeFormat['count'] == -1:
if isArray:
data = self.dataBuf.get(nodeFormat['type'], totalCount)
self.dataBuf.realign_reads()
else:
data = self.data_grab_aligned(nodeFormat['type'], totalCount)
string = delim.join(map(str, data))
if nodeType == xml_types['binary']:
node.setAttribute('__size', str(totalCount))
string = ''.join(('{0:02x}'.format(ord(x)) for x in string))
string = ''.join(('{0:02x}'.format(x) for x in data))
elif nodeType == xml_types['string']:
string = string[:-1].decode(self.encoding)
string = bytes(data[:-1]).decode(self.encoding)
else:
string = ' '.join(map(nodeFormat.get('toStr', str), data))
node.appendChild(self.xml_doc.createTextNode(string))
if __name__ == '__main__':
if len(sys.argv) != 2:
print 'bin_xml.py file.[xml/bin]'
print('bin_xml.py file.[xml/bin]')
exit()
with open(sys.argv[1:], 'rb') as f:
with open(sys.argv[1], 'rb') as f:
input = f.read()
xml = KBinXML(input)
if KBinXML.is_binary_xml(input):
print xml.to_text()
stdout.write(xml.to_text())
else:
print xml.to_binary()
stdout.write(xml.to_binary())

View File

@ -1,27 +1,28 @@
# python 3 style, ints instead of b''
from builtins import bytes
from bitarray import bitarray
def pack_sixbit(string, byteBuf):
chars = str_to_sixbit(string)
bits = bitarray(endian='big')
for c in chars:
bits.frombytes(c)
bits.frombytes(c.encode())
# leave only the 6 bits we care for
del bits[-8:-6]
data = bits.tobytes()
data = bytes(bits.tobytes())
byteBuf.append_u8(len(string))
byteBuf.append(data, 'c', len(data))
byteBuf.append(data, 'B', len(data))
def unpack_sixbit(byteBuf):
bitBuf = bitarray(endian='big')
bitBuf.frombytes(bytes(byteBuf.data))
length = byteBuf.get_u8()
length_bytes = (length * 6 + 7) // 8
bitBuf = bitarray(endian='big')
bitBuf.frombytes(bytes(byteBuf.get('B', length_bytes)))
result = []
offset = byteBuf.offset * 8
offset = 0
for i in range(length):
result.append(ord(bitBuf[offset:offset+6].tobytes()) >> (8 - 6))
offset += 6
# padding
byteBuf.offset += (length * 6 + 7) // 8
return sixbit_to_str(result)
# 0-9 for numbers, 10 is ':', 11 to 36 for capitals, 37 for underscore, 38-63 for lowercase