Python 3 compatibility, fix decode errors. Closes #5

2026-04-26 02:01:45 -05:00 · 2017-06-26 19:39:53 +10:00 · 2017-06-26 19:39:53 +10:00 · 01eee84a1c
commit 01eee84a1c
parent e61daef7a3
5 changed files with 129 additions and 99 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,12 @@
 An encoder/decoder for Konami's binary XML format, used in some of their games.
 ### Setup:
 `pip install bitarray`
 Python 2 only:  
 `pip install future`
 ```python
 In [1]: from kbinxml import KBinXML
 In [2]: text = KBinXML('<?xml version="1.0"?><root __type="str">Hello, world!</root>')
--- a/bytebuffer.py
+++ b/bytebuffer.py
@ -7,6 +7,8 @@ class ByteBuffer():
        if isinstance(input, bytearray):
            self.data = input
        else:
            if not isinstance(input, bytes):
                input = input.encode('utf-8')
            self.data = bytearray(input)
        self.endian = endian
        self.offset = offset
@ -34,14 +36,14 @@ class ByteBuffer():
    def append(self, data, type, count = None):
        fmt = self._format_type(type, count)
        self.offset += calcsize(fmt)
-        if isinstance(data, list) or isinstance(data, bytes) and type != 's':
+        if count and count > 1 or isinstance(data, list):
            self.data.extend(pack(fmt, *data))
        else:
            self.data.extend(pack(fmt, data))
    def set(self, data, offset, type, count = None):
        fmt = self._format_type(type, count)
-        if isinstance(data, list) or isinstance(data, bytes) and type != 's':
+        if count and count > 1 or isinstance(data, list):
            pack_into(fmt, self.data, offset, *data)
        else:
            pack_into(fmt, self.data, offset, data)
@ -92,7 +94,7 @@ def _make_set(fmt):
        return self.set(data, offset, fmt)
    return _method
-for name, fmt in typeMap.iteritems():
+for name, fmt in typeMap.items():
    _get = _make_get(fmt)
    _peek = _make_peek(fmt)
    _append = _make_append(fmt)
--- a/format_ids.py
+++ b/format_ids.py
@ -1,68 +1,87 @@
 from struct import pack, unpack
 def parseIP(string):
    bunch = map(int, string.split('.'))
    # pack to bytes
    p = pack('4B', *bunch)
    # unpack as u16
    return unpack('>I', p)[0]
 def writeIP(raw):
    # pack to bytes
    p = pack('>I', raw)
    # unpack
    return '.'.join(map(str, unpack('4B', p)))
 def writeFloat(raw):
    # this is just how floats get printed...
    return '{0:.6f}'.format(raw)
 xml_formats = {
-    1  : { 'type' : None, 'count' : None, 'pyType' : None,  'names' : ['void']},
+    1  : { 'names' : ['void']},
-    2  : { 'type' : 'b',  'count' : 1,    'pyType' : int,   'names' : ['s8']},
+    2  : { 'type' : 'b',  'count' : 1,  'names' : ['s8']},
-    3  : { 'type' : 'B',  'count' : 1,    'pyType' : int,   'names' : ['u8']},
+    3  : { 'type' : 'B',  'count' : 1,  'names' : ['u8']},
-    4  : { 'type' : 'h',  'count' : 1,    'pyType' : int,   'names' : ['s16']},
+    4  : { 'type' : 'h',  'count' : 1,  'names' : ['s16']},
-    5  : { 'type' : 'H',  'count' : 1,    'pyType' : int,   'names' : ['u16']},
+    5  : { 'type' : 'H',  'count' : 1,  'names' : ['u16']},
-    6  : { 'type' : 'i',  'count' : 1,    'pyType' : int,   'names' : ['s32']},
+    6  : { 'type' : 'i',  'count' : 1,  'names' : ['s32']},
-    7  : { 'type' : 'I',  'count' : 1,    'pyType' : int,   'names' : ['u32']},
+    7  : { 'type' : 'I',  'count' : 1,  'names' : ['u32']},
-    8  : { 'type' : 'q',  'count' : 1,    'pyType' : int,   'names' : ['s64']},
+    8  : { 'type' : 'q',  'count' : 1,  'names' : ['s64']},
-    9  : { 'type' : 'Q',  'count' : 1,    'pyType' : int,   'names' : ['u64']},
+    9  : { 'type' : 'Q',  'count' : 1,  'names' : ['u64']},
-    10 : { 'type' : 'c',  'count' : -1,   'pyType' : None, 'names' : ['bin', 'binary'], 'delimiter' : ''},
+    10 : { 'type' : 'B',  'count' : -1, 'names' : ['bin', 'binary'], 'fromStr' : None},
-    11 : { 'type' : 's',  'count' : -1,   'pyType' : None, 'names' : ['str', 'string'], 'delimiter' : ''},
+    11 : { 'type' : 'B',  'count' : -1, 'names' : ['str', 'string'], 'fromStr' : None},
-    12 : { 'type' : 'B',  'count' : 4,    'pyType' : int,   'names' : ['ip4'], 'delimiter' : '.'},
+    12 : { 'type' : 'I',  'count' : 1,  'names' : ['ip4'], 'fromStr' : parseIP, 'toStr' : writeIP},
-    13 : { 'type' : 'I',  'count' : 1,    'pyType' : int,   'names' : ['time']}, # todo: how to print
+    13 : { 'type' : 'I',  'count' : 1,  'names' : ['time']}, # unix timestamp
-    14 : { 'type' : 'f',  'count' : 1,    'pyType' : float, 'names' : ['float', 'f']},
+    14 : { 'type' : 'f',  'count' : 1,  'names' : ['float', 'f'], 'fromStr' : float, 'toStr' : writeFloat},
-    15 : { 'type' : 'd',  'count' : 1,    'pyType' : float, 'names' : ['double', 'd']},
+    15 : { 'type' : 'd',  'count' : 1,  'names' : ['double', 'd'], 'fromStr' : float, 'toStr' : writeFloat},
-    16 : { 'type' : 'b',  'count' : 2,    'pyType' : int,   'names' : ['2s8']},
+    16 : { 'type' : 'b',  'count' : 2,  'names' : ['2s8']},
-    17 : { 'type' : 'B',  'count' : 2,    'pyType' : int,   'names' : ['2u8']},
+    17 : { 'type' : 'B',  'count' : 2,  'names' : ['2u8']},
-    18 : { 'type' : 'h',  'count' : 2,    'pyType' : int,   'names' : ['2s16']},
+    18 : { 'type' : 'h',  'count' : 2,  'names' : ['2s16']},
-    19 : { 'type' : 'H',  'count' : 2,    'pyType' : int,   'names' : ['2u16']},
+    19 : { 'type' : 'H',  'count' : 2,  'names' : ['2u16']},
-    20 : { 'type' : 'i',  'count' : 2,    'pyType' : int,   'names' : ['2s32']},
+    20 : { 'type' : 'i',  'count' : 2,  'names' : ['2s32']},
-    21 : { 'type' : 'I',  'count' : 2,    'pyType' : int,   'names' : ['2u32']},
+    21 : { 'type' : 'I',  'count' : 2,  'names' : ['2u32']},
-    22 : { 'type' : 'q',  'count' : 2,    'pyType' : int,   'names' : ['2s64', 'vs64']},
+    22 : { 'type' : 'q',  'count' : 2,  'names' : ['2s64', 'vs64']},
-    23 : { 'type' : 'Q',  'count' : 2,    'pyType' : int,   'names' : ['2u64', 'vu64']},
+    23 : { 'type' : 'Q',  'count' : 2,  'names' : ['2u64', 'vu64']},
-    24 : { 'type' : 'f',  'count' : 2,    'pyType' : float, 'names' : ['2f']},
+    24 : { 'type' : 'f',  'count' : 2,  'names' : ['2f'], 'fromStr' : float, 'toStr' : writeFloat},
-    25 : { 'type' : 'd',  'count' : 2,    'pyType' : float, 'names' : ['2d', 'vd']},
+    25 : { 'type' : 'd',  'count' : 2,  'names' : ['2d', 'vd'], 'fromStr' : float, 'toStr' : writeFloat},
-    26 : { 'type' : 'b',  'count' : 3,    'pyType' : int,   'names' : ['3s8']},
+    26 : { 'type' : 'b',  'count' : 3,  'names' : ['3s8']},
-    27 : { 'type' : 'B',  'count' : 3,    'pyType' : int,   'names' : ['3u8']},
+    27 : { 'type' : 'B',  'count' : 3,  'names' : ['3u8']},
-    28 : { 'type' : 'h',  'count' : 3,    'pyType' : int,   'names' : ['3s16']},
+    28 : { 'type' : 'h',  'count' : 3,  'names' : ['3s16']},
-    29 : { 'type' : 'H',  'count' : 3,    'pyType' : int,   'names' : ['3u16']},
+    29 : { 'type' : 'H',  'count' : 3,  'names' : ['3u16']},
-    30 : { 'type' : 'i',  'count' : 3,    'pyType' : int,   'names' : ['3s32']},
+    30 : { 'type' : 'i',  'count' : 3,  'names' : ['3s32']},
-    31 : { 'type' : 'I',  'count' : 3,    'pyType' : int,   'names' : ['3u32']},
+    31 : { 'type' : 'I',  'count' : 3,  'names' : ['3u32']},
-    32 : { 'type' : 'q',  'count' : 3,    'pyType' : int,   'names' : ['3s64']},
+    32 : { 'type' : 'q',  'count' : 3,  'names' : ['3s64']},
-    33 : { 'type' : 'Q',  'count' : 3,    'pyType' : int,   'names' : ['3u64']},
+    33 : { 'type' : 'Q',  'count' : 3,  'names' : ['3u64']},
-    34 : { 'type' : 'f',  'count' : 3,    'pyType' : float, 'names' : ['3f']},
+    34 : { 'type' : 'f',  'count' : 3,  'names' : ['3f'], 'fromStr' : float, 'toStr' : writeFloat},
-    35 : { 'type' : 'd',  'count' : 3,    'pyType' : float, 'names' : ['3d']},
+    35 : { 'type' : 'd',  'count' : 3,  'names' : ['3d'], 'fromStr' : float, 'toStr' : writeFloat},
-    36 : { 'type' : 'b',  'count' : 4,    'pyType' : int,   'names' : ['4s8']},
+    36 : { 'type' : 'b',  'count' : 4,  'names' : ['4s8']},
-    37 : { 'type' : 'B',  'count' : 4,    'pyType' : int,   'names' : ['4u8']},
+    37 : { 'type' : 'B',  'count' : 4,  'names' : ['4u8']},
-    38 : { 'type' : 'h',  'count' : 4,    'pyType' : int,   'names' : ['4s16']},
+    38 : { 'type' : 'h',  'count' : 4,  'names' : ['4s16']},
-    39 : { 'type' : 'H',  'count' : 4,    'pyType' : int,   'names' : ['4u16']},
+    39 : { 'type' : 'H',  'count' : 4,  'names' : ['4u16']},
-    40 : { 'type' : 'i',  'count' : 4,    'pyType' : int,   'names' : ['4s32', 'vs32']},
+    40 : { 'type' : 'i',  'count' : 4,  'names' : ['4s32', 'vs32']},
-    41 : { 'type' : 'I',  'count' : 4,    'pyType' : int,   'names' : ['4u32', 'vu32']},
+    41 : { 'type' : 'I',  'count' : 4,  'names' : ['4u32', 'vu32']},
-    42 : { 'type' : 'q',  'count' : 4,    'pyType' : int,   'names' : ['4s64']},
+    42 : { 'type' : 'q',  'count' : 4,  'names' : ['4s64']},
-    43 : { 'type' : 'Q',  'count' : 4,    'pyType' : int,   'names' : ['4u64']},
+    43 : { 'type' : 'Q',  'count' : 4,  'names' : ['4u64']},
-    44 : { 'type' : 'f',  'count' : 4,    'pyType' : float, 'names' : ['4f', 'vf']},
+    44 : { 'type' : 'f',  'count' : 4,  'names' : ['4f', 'vf'], 'fromStr' : float, 'toStr' : writeFloat},
-    45 : { 'type' : 'd',  'count' : 4,    'pyType' : float, 'names' : ['4d']},
+    45 : { 'type' : 'd',  'count' : 4,  'names' : ['4d'], 'fromStr' : float, 'toStr' : writeFloat},
-    46 : { 'type' : None, 'count' : None, 'pyType' : None,  'names' : ['attr']},
+    46 : { 'names' : ['attr']},
-    #47 : { 'type' : None, 'count' : None, 'pyType' : None,  'names' : ['array']},
+    #47 : { 'names' : ['array']}, # TODO: how does this work?
-    48 : { 'type' : 'b',  'count' : 16,   'pyType' : int,   'names' : ['vs8']},
+    48 : { 'type' : 'b',  'count' : 16, 'names' : ['vs8']},
-    49 : { 'type' : 'B',  'count' : 16,   'pyType' : int,   'names' : ['vu8']},
+    49 : { 'type' : 'B',  'count' : 16, 'names' : ['vu8']},
-    50 : { 'type' : 'h',  'count' : 8,    'pyType' : int,   'names' : ['vs16']},
+    50 : { 'type' : 'h',  'count' : 8,  'names' : ['vs16']},
-    51 : { 'type' : 'H',  'count' : 8,    'pyType' : int,   'names' : ['vu16']},
+    51 : { 'type' : 'H',  'count' : 8,  'names' : ['vu16']},
-    52 : { 'type' : 'b',  'count' : 1,    'pyType' : int,   'names' : ['bool', 'b']},
+    52 : { 'type' : 'b',  'count' : 1,  'names' : ['bool', 'b']},
-    53 : { 'type' : 'b',  'count' : 2,    'pyType' : int,   'names' : ['2b']},
+    53 : { 'type' : 'b',  'count' : 2,  'names' : ['2b']},
-    54 : { 'type' : 'b',  'count' : 3,    'pyType' : int,   'names' : ['3b']},
+    54 : { 'type' : 'b',  'count' : 3,  'names' : ['3b']},
-    55 : { 'type' : 'b',  'count' : 4,    'pyType' : int,   'names' : ['4b']},
+    55 : { 'type' : 'b',  'count' : 4,  'names' : ['4b']},
-    56 : { 'type' : 'b',  'count' : 16,   'pyType' : int,   'names' : ['vb']}
+    56 : { 'type' : 'b',  'count' : 16, 'names' : ['vb']}
 }
 # little less boilerplate for writing
-for key, val in xml_formats.iteritems():
+for key, val in xml_formats.items():
    xml_formats[key]['name'] = xml_formats[key]['names'][0]
 xml_types = {}
-for key, val in xml_formats.iteritems():
+for key, val in xml_formats.items():
    for n in val['names']:
        xml_types[n] = key
 xml_types['nodeStart'] = 1
--- a/kbinxml.py
+++ b/kbinxml.py
@ -1,3 +1,5 @@
 # python 3 style, ints instead of b''
 from builtins import bytes
 from xml.dom import minidom
 from struct import calcsize
 import string
@ -8,6 +10,8 @@ from bytebuffer import ByteBuffer
 from sixbit import pack_sixbit, unpack_sixbit
 from format_ids import xml_formats, xml_types
 stdout = getattr(sys.stdout, 'buffer', sys.stdout)
 DEBUG_OFFSETS = False
 DEBUG = False
@ -32,7 +36,7 @@ encoding_vals = {val : key for key, val in encoding_strings.items()}
 def debug_print(string):
    if DEBUG:
-        print string
+        print(string)
 class KBinXML():
@ -69,15 +73,10 @@ class KBinXML():
    def data_grab_string(self):
        data = self.data_grab_auto()
-        res = ''
+        return bytes(data[:-1]).decode(self.encoding)
        for b in data:
            if b == 0:
                break
            res += chr(b)
        return res.decode(self.encoding)
    def data_append_string(self, string):
-        string = string.encode(self.encoding) + '\0'
+        string = bytes(string.encode(self.encoding) + b'\0')
        self.data_append_auto(string)
    # has its own separate state and other assorted garbage
@ -122,6 +121,8 @@ class KBinXML():
            self.dataBuf.realign_writes()
    def _node_to_binary(self, node):
        if node.nodeType == node.TEXT_NODE or node.nodeType == node.COMMENT_NODE:
            return
        nodeType = node.getAttribute('__type')
        if not nodeType:
            nodeType = 'void'
@ -145,10 +146,12 @@ class KBinXML():
            if fmt['name'] == 'bin':
                data = bytes(bytearray.fromhex(val))
            elif fmt['name'] == 'str':
-                data = val.encode(self.encoding) + '\0'
+                data = bytes(val.encode(self.encoding) + b'\0')
            else:
-                val = val.split(fmt.get('delimiter', ' '))
+                val = val.split(' ')
-                data = map(fmt['pyType'], val)
+                data = list(map(fmt.get('fromStr', int), val))
                if count and len(data) / fmt['count'] != count:
                    raise ValueError('Array length does not match __count attribute')
            if isArray or fmt['count'] == -1:
                self.dataBuf.append_u32(len(data) * calcsize(fmt['type']))
@ -157,7 +160,7 @@ class KBinXML():
            else:
                self.data_append_aligned(data, fmt['type'], fmt['count'])
-        # for consistency and to be more faithful
+        # for test consistency and to be more faithful, sort the attrs
        sorted_attrs = sorted(node.attributes.items(), key=operator.itemgetter(0))
        for key, value in sorted_attrs:
            if key not in ['__type', '__size', '__count']:
@ -166,8 +169,7 @@ class KBinXML():
                pack_sixbit(key, self.nodeBuf)
        for child in node.childNodes:
-            if child.nodeType != child.TEXT_NODE:
+            self._node_to_binary(child)
                self._node_to_binary(child)
        # always has the isArray bit set
        self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64)
@ -270,42 +272,42 @@ class KBinXML():
            node.setAttribute('__type', nodeFormat['name'])
            if isArray:
                arrayCount = self.dataBuf.get_u32() / calcsize(nodeFormat['type'])
                node.setAttribute('__count', str(arrayCount))
            else:
                 arrayCount = 1
            varCount = nodeFormat['count']
-            if varCount == -1:
+            arrayCount = 1
            if varCount == -1: # the 2 cannot be combined
                varCount = self.dataBuf.get_u32()
                isArray = True
            elif isArray:
                arrayCount = self.dataBuf.get_u32() // (calcsize(nodeFormat['type'] * varCount))
                node.setAttribute('__count', str(arrayCount))
            totalCount = arrayCount * varCount
-            delim = nodeFormat.get('delimiter', ' ')
+            if isArray:
            if isArray or nodeFormat['count'] == -1:
                data = self.dataBuf.get(nodeFormat['type'], totalCount)
                self.dataBuf.realign_reads()
            else:
                data = self.data_grab_aligned(nodeFormat['type'], totalCount)
            string = delim.join(map(str, data))
            if nodeType == xml_types['binary']:
                node.setAttribute('__size', str(totalCount))
-                string = ''.join(('{0:02x}'.format(ord(x)) for x in string))
+                string = ''.join(('{0:02x}'.format(x) for x in data))
            elif nodeType == xml_types['string']:
-                string = string[:-1].decode(self.encoding)
+                string = bytes(data[:-1]).decode(self.encoding)
            else:
                string = ' '.join(map(nodeFormat.get('toStr', str), data))
            node.appendChild(self.xml_doc.createTextNode(string))
 if __name__ == '__main__':
    if len(sys.argv) != 2:
-        print 'bin_xml.py file.[xml/bin]'
+        print('bin_xml.py file.[xml/bin]')
        exit()
-    with open(sys.argv[1:], 'rb') as f:
+    with open(sys.argv[1], 'rb') as f:
        input = f.read()
    xml = KBinXML(input)
    if KBinXML.is_binary_xml(input):
-        print xml.to_text()
+        stdout.write(xml.to_text())
    else:
-        print xml.to_binary()
+        stdout.write(xml.to_binary())
--- a/sixbit.py
+++ b/sixbit.py
@ -1,27 +1,28 @@
 # python 3 style, ints instead of b''
 from builtins import bytes
 from bitarray import bitarray
 def pack_sixbit(string, byteBuf):
    chars = str_to_sixbit(string)
    bits = bitarray(endian='big')
    for c in chars:
-        bits.frombytes(c)
+        bits.frombytes(c.encode())
        # leave only the 6 bits we care for
        del bits[-8:-6]
-    data = bits.tobytes()
+    data = bytes(bits.tobytes())
    byteBuf.append_u8(len(string))
-    byteBuf.append(data, 'c', len(data))
+    byteBuf.append(data, 'B', len(data))
 def unpack_sixbit(byteBuf):
    bitBuf = bitarray(endian='big')
    bitBuf.frombytes(bytes(byteBuf.data))
    length = byteBuf.get_u8()
    length_bytes = (length * 6 + 7) // 8
    bitBuf = bitarray(endian='big')
    bitBuf.frombytes(bytes(byteBuf.get('B', length_bytes)))
    result = []
-    offset = byteBuf.offset * 8
+    offset = 0
    for i in range(length):
        result.append(ord(bitBuf[offset:offset+6].tobytes()) >> (8 - 6))
        offset += 6
    # padding
    byteBuf.offset += (length * 6 + 7) // 8
    return sixbit_to_str(result)
 # 0-9 for numbers, 10 is ':', 11 to 36 for capitals, 37 for underscore, 38-63 for lowercase