From c39b50a2362cb104b9fbbec755d653d67aaea0a8 Mon Sep 17 00:00:00 2001
From: William Toohey <will@mon.im>
Date: Tue, 13 Jun 2017 23:52:50 +1000
Subject: [PATCH] Fix various issues, move to class structure

---
 bin_xml.py    | 593 ++++++++++++++++++++++++++------------------------
 bytebuffer.py |  42 +++-
 format_ids.py |   2 +-
 3 files changed, 344 insertions(+), 293 deletions(-)

diff --git a/bin_xml.py b/bin_xml.py
index 555d28a..61d2507 100644
--- a/bin_xml.py
+++ b/bin_xml.py
@@ -4,8 +4,7 @@ import string
 from bitarray import bitarray
 from bytebuffer import ByteBuffer
 from format_ids import xml_formats, xml_types
-
-import IPython
+import sys
 
 DEBUG_OFFSETS = False
 DEBUG = False
@@ -25,301 +24,331 @@ def debug_print(string):
     if DEBUG:
         print string
 
-def pack_bits(string, nodeBuf, bits = 6):
-    chars = str_to_sixbit(string)
-    bits = bitarray(endian='big')
-    for c in chars:
-        bits.frombytes(c)
-        del bits[-8:-6]
-    for c in bits.tobytes():
-        nodeBuf.append_u8(ord(c))
+class kbinxml():
 
-def unpack_bits(bitArray, byteBuf, length, bits = 6):
-    result = []
-    offset = byteBuf.offset * 8
-    for i in range(length):
-        result.append(ord(bitArray[offset:offset+bits].tobytes()) >> (8 - bits))
-        offset += bits
-    # padding
-    byteBuf.offset += (length * bits + 7) // 8
-    return sixbit_to_str(result)
-
-# 0-9 for numbers, 10 to 36 for capitals, 37 for underscore, 38-63 for lowercase
-def sixbit_to_str(decompressed):
-    string = ''
-    for d in decompressed:
-        if d <= 10:
-            d += ord('0')
-        elif d < 37:
-            d += 54
-        elif d == 37:
-            d += 58
+    def __init__(self, input):
+        if isinstance(input, minidom.Document):
+            self.xml_doc = input
+        elif self.is_binary_xml(input):
+            self.from_binary(input)
         else:
-            d += 59
-        string += chr(d)
-    return string
+            self.from_text(input)
 
-def str_to_sixbit(string):
-    compress = []
-    for c in string:
-        if c >= '0' and c <= '9':
-            compress.append(ord(c) - ord('0'))
-        elif c >= 'A' and c <= 'Z':
-            compress.append(ord(c) - 54)
-        elif c == '_':
-            compress.append(ord(c) - 58)
-        elif c >= 'a' and c <= 'z':
-            compress.append(ord(c) - 59)
-        else:
-            raise ValueError('Node name can only contain alphanumeric + underscore')
-    return ''.join(map(chr, compress))
+    def pack_bits(self, string, bits = 6):
+        chars = self.str_to_sixbit(string)
+        bits = bitarray(endian='big')
+        for c in chars:
+            bits.frombytes(c)
+            del bits[-8:-6]
+        for c in bits.tobytes():
+            self.nodeBuf.append_u8(ord(c))
 
-def data_grab_auto(dataBuf):
-    size = dataBuf.get_s32()
-    ret = [dataBuf.get_u8() for x in range(size)]
-    # padding
-    dataBuf.offset += 3
-    # round to dword
-    dataBuf.offset &= ~0b11
-    return ret
+    def unpack_bits(self, length, bits = 6):
+        result = []
+        offset = self.nodeBuf.offset * 8
+        for i in range(length):
+            result.append(ord(self.nodeBits[offset:offset+bits].tobytes()) >> (8 - bits))
+            offset += bits
+        # padding
+        self.nodeBuf.offset += (length * bits + 7) // 8
+        return self.sixbit_to_str(result)
 
-def data_append_auto(dataBuf, data):
-    dataBuf.append_s32(len(data))
-    dataBuf.append(data, 's', len(data))
-
-    # padding
-    while len(dataBuf) % 4:
-        dataBuf.append_u8(0)
-
-def data_append_string(dataBuf, string):
-    string = string.encode('shift_jisx0213')
-    data_append_auto(dataBuf, string)
-
-def data_grab_string(dataBuf):
-    data = data_grab_auto(dataBuf)
-    res = ''
-    for b in data:
-        if b == 0:
-            break
-        res += chr(b)
-    return res.decode('shift_jisx0213')
-
-# has its own separate state and other assorted garbage
-def data_grab_aligned(dataBuf, dataByteBuf, dataWordBuf, type, count):
-    if dataByteBuf.offset % 4 == 0:
-        dataByteBuf.offset = dataBuf.offset
-    if dataWordBuf.offset % 4 == 0:
-        dataWordBuf.offset = dataBuf.offset
-    # multiply by count since 2u2 reads from the 16 bit buffer, for example
-    size = calcsize(type) * count
-    if size == 1:
-        ret = dataByteBuf.get(type, count)
-    elif size == 2:
-        ret = dataWordBuf.get(type, count)
-    else:
-        ret = dataBuf.get(type, count)
-    trailing = max(dataByteBuf.offset, dataWordBuf.offset)
-    if dataBuf.offset < trailing:
-        dataBuf.offset = trailing + 3
-        dataBuf.offset &= ~0b11
-    return ret
-
-def is_binary_xml(input):
-    nodeBuf = ByteBuffer(input)
-    return nodeBuf.get_u16() == SIGNATURE
-
-def _xml_node_to_binary(node, nodeBuf, dataBuf):
-    nodeType = node.getAttribute('__type')
-    if not nodeType:
-        nodeType = 'void'
-    nodeId = xml_types[nodeType]
-
-    isArray = 0
-    count = node.getAttribute('__count')
-    if count:
-        count = int(count)
-        isArray = 64 # bit position for array flag
-
-    nodeBuf.append_u8(nodeId | isArray)
-
-    name = node.nodeName
-    nodeBuf.append_u8(len(name))
-    pack_bits(name, nodeBuf)
-
-    import operator
-    sorted_x = sorted(node.attributes.items(), key=operator.itemgetter(0))
-    for key, value in sorted_x:#node.attributes.items():
-        if key in ['__type', '__size', '__count']:
-            pass
-        else:
-            data_append_string(dataBuf, value)
-            nodeBuf.append_u8(xml_types['attr'])
-            nodeBuf.append_u8(len(key))
-            pack_bits(key, nodeBuf)
-
-    if nodeType != 'void':
-        nodeId = xml_types[nodeType]
-        fmt = xml_formats[nodeId]
-
-        data = map(fmt['pType'], node.firstChild.nodeValue.split(fmt.get('delimiter', ' ')))
-
-        if fmt['count'] == -1 or not isArray:
-            data = data[0]
-        if isArray or fmt['count'] == -1:
-            dataBuf.append_u32(len(data))
-            if isArray:
-                for d in data:
-                    dataBuf.append(d, fmt['type'])
+    # 0-9 for numbers, 10 to 36 for capitals, 37 for underscore, 38-63 for lowercase
+    def sixbit_to_str(self, decompressed):
+        string = ''
+        for d in decompressed:
+            if d <= 10:
+                d += ord('0')
+            elif d < 37:
+                d += 54
+            elif d == 37:
+                d += 58
             else:
-                dataBuf.append(data, fmt['type'])
+                d += 59
+            string += chr(d)
+        return string
+
+    def str_to_sixbit(self, string):
+        compress = []
+        for c in string:
+            if c >= '0' and c <= '9':
+                compress.append(ord(c) - ord('0'))
+            elif c >= 'A' and c <= 'Z':
+                compress.append(ord(c) - 54)
+            elif c == '_':
+                compress.append(ord(c) - 58)
+            elif c >= 'a' and c <= 'z':
+                compress.append(ord(c) - 59)
+            else:
+                raise ValueError('Node name can only contain alphanumeric + underscore')
+        return ''.join(map(chr, compress))
+
+    def data_grab_auto(self):
+        size = self.dataBuf.get_s32()
+        ret = [self.dataBuf.get_u8() for x in range(size)]
+        # padding
+        self.dataBuf.offset += 3
+        # round to dword
+        self.dataBuf.offset &= ~0b11
+        return ret
+
+    def data_append_auto(self, data):
+        self.dataBuf.append_s32(len(data))
+        self.dataBuf.append(data, 's', len(data))
+
+        # padding
+        while len(self.dataBuf) % 4:
+            self.dataBuf.append_u8(0)
+
+    def data_append_string(self, string):
+        string = string.encode('shift_jisx0213') + '\0'
+        self.data_append_auto(string)
+
+    def data_grab_string(self):
+        data = self.data_grab_auto()
+        res = ''
+        for b in data:
+            if b == 0:
+                break
+            res += chr(b)
+        return res.decode('shift_jisx0213')
+
+    # has its own separate state and other assorted garbage
+    def data_grab_aligned(self, type, count):
+        if self.dataByteBuf.offset % 4 == 0:
+            self.dataByteBuf.offset = self.dataBuf.offset
+        if self.dataWordBuf.offset % 4 == 0:
+            self.dataWordBuf.offset = self.dataBuf.offset
+        # multiply by count since 2u2 reads from the 16 bit buffer, for example
+        size = calcsize(type) * count
+        if size == 1:
+            ret = self.dataByteBuf.get(type, count)
+        elif size == 2:
+            ret = self.dataWordBuf.get(type, count)
         else:
-            data_append_aligned(dataBuf, dataByteBuf, dataWordBuf, fmt['type'], fmt['count'])
+            ret = self.dataBuf.get(type, count)
+        trailing = max(self.dataByteBuf.offset, self.dataWordBuf.offset)
+        if self.dataBuf.offset < trailing:
+            self.dataBuf.offset = trailing + 3
+            self.dataBuf.offset &= ~0b11
+        return ret
 
-    for child in node.childNodes:
-        if child.nodeType != child.TEXT_NODE:
-            _xml_node_to_binary(child, nodeBuf, dataBuf)
-
-    nodeBuf.append_u8(xml_types['nodeEnd'] | 64)
-
-def xml_text_to_binary(input):
-    return xml_to_binary(minidom.parseString(input))
-
-def xml_to_binary(input):
-    header = ByteBuffer()
-    header.append_u16(SIGNATURE)
-    header.append_u8(4 << 5) # SHIFT-JIS TODO make encoding variable
-    header.append_u8(0x7F) # TODO what does this do as 7f or ff
-    nodeBuf = ByteBuffer()
-    dataBuf = ByteBuffer()
-
-    for child in input.childNodes:
-        _xml_node_to_binary(child, nodeBuf, dataBuf)
-
-    nodeBuf.append_u8(xml_types['endSection'] | 64)
-    while len(nodeBuf) % 4 != 0:
-        nodeBuf.append_u8(0)
-    header.append_u32(len(nodeBuf))
-    nodeBuf.append_u32(len(dataBuf))
-    return header.data + nodeBuf.data + dataBuf.data
-
-def binary_to_xml_text(input):
-    return binary_to_xml(input).toprettyxml(indent="    ", encoding='UTF-8')
-
-def binary_to_xml(input):
-    doc = minidom.Document()
-    node = doc
-
-    nodeBuf = ByteBuffer(input)
-    assert nodeBuf.get_u16() == SIGNATURE
-    encoding = encodings[(nodeBuf.get_u8() & 0xE0) >> 5]
-    unknown = nodeBuf.get_u8()
-
-    # creating bitarrays is slow, cache for speed
-    nodeBits = bitarray(endian='big')
-    nodeBits.frombytes(input)
-
-    nodeEnd = nodeBuf.get_u32() + 8
-    nodeBuf.end = nodeEnd
-
-    dataBuf = ByteBuffer(input, nodeEnd)
-    dataSize = dataBuf.get_u32()
-    # WHY MUST YOU DO THIS TO ME
-    dataByteBuf = ByteBuffer(input, nodeEnd)
-    dataWordBuf = ByteBuffer(input, nodeEnd)
-
-    nodesLeft = True
-    while nodesLeft and nodeBuf.hasData():
-        while nodeBuf.peek_u8() == 0:
-            debug_print("Skipping 0 node ID")
-            nodeBuf.get_u8()
-
-        nodeType = nodeBuf.get_u8()
-        isArray = nodeType & 64
-        nodeType &= ~64
-
-        nodeFormat = xml_formats.get(nodeType, {'name':'Unknown'})
-        debug_print('Node type is {} ({})'.format(nodeFormat['name'], nodeType))
-
-        # node name
-        name = ''
-        if nodeType != xml_types['nodeEnd'] and nodeType != xml_types['endSection']:
-            strLen = nodeBuf.get_u8()
-            name = unpack_bits(nodeBits, nodeBuf, strLen)
-            debug_print(name)
-
-        skip = True
-
-        if nodeType == xml_types['attr']:
-            value = data_grab_string(dataBuf)
-            node.setAttribute(name, value)
-        elif nodeType == xml_types['nodeEnd']:
-            if node.parentNode:
-                node = node.parentNode
-        elif nodeType == xml_types['endSection']:
-            nodesLeft = False
-        elif nodeType not in xml_formats:
-            raise NotImplementedError('Implement node {}'.format(nodeType))
-        else: # inner value to process
-            skip = False
-
-        if skip:
-            continue
-
-        child = doc.createElement(name)
-        node.appendChild(child)
-        node = child
-
-        if nodeType == xml_types['nodeStart']:
-            continue
-
-        node.setAttribute('__type', nodeFormat['name'])
-
-        if isArray:
-            arrayCount = dataBuf.get_u32()
-            node.setAttribute('__count', str(arrayCount))
+    def data_append_aligned(self, data, type, count):
+        if self.dataByteBuf.offset % 4 == 0:
+            self.dataByteBuf.offset = self.dataBuf.offset
+        if self.dataWordBuf.offset % 4 == 0:
+            self.dataWordBuf.offset = self.dataBuf.offset
+        # multiply by count since 2u2 reads from the 16 bit buffer, for example
+        size = calcsize(type) * count
+        if size == 1:
+            # make room if fresh dword for our stuff
+            if self.dataByteBuf.offset % 4 == 0:
+                self.dataBuf.append_u32(0)
+            self.dataByteBuf.set(data, self.dataByteBuf.offset, type, count)
+        elif size == 2:
+            if self.dataWordBuf.offset % 4 == 0:
+                self.dataBuf.append_u32(0)
+            self.dataWordBuf.set(data, self.dataWordBuf.offset, type, count)
         else:
-             arrayCount = 1
-        varCount = nodeFormat['count']
-        if varCount == -1:
-            varCount = dataBuf.get_u32()
-        totalCount = arrayCount * varCount
+            self.dataBuf.append(data, type, count)
 
-        delim = nodeFormat.get('delimiter', ' ')
+    def is_binary_xml(self, input):
+        nodeBuf = ByteBuffer(input)
+        return nodeBuf.get_u16() == SIGNATURE
 
-        if isArray or nodeFormat['count'] == -1:
-            try:
-                data = dataBuf.get(nodeFormat['type'], totalCount)
-            except:
-                print doc.toprettyxml(indent="  ", encoding='UTF-8')
-                IPython.embed()
-            dataBuf.offset += 3 # padding
-            dataBuf.offset &= ~0b11 # align to dword
-        else:
-            data = data_grab_aligned(dataBuf, dataByteBuf, dataWordBuf, nodeFormat['type'], totalCount)
-        string = delim.join(map(str, data))
+    def _node_to_binary(self, node):
+        nodeType = node.getAttribute('__type')
+        if not nodeType:
+            nodeType = 'void'
+        nodeId = xml_types[nodeType]
 
-        if nodeType == xml_types['binary']:
-            node.setAttribute('__size', str(totalCount))
-            string = ''.join(('{0:02x}'.format(ord(x)) for x in string))
-        if nodeType == xml_types['string']:
-            string = string[:-1].decode('shift_jisx0213')
+        isArray = 0
+        count = node.getAttribute('__count')
+        if count:
+            count = int(count)
+            isArray = 64 # bit position for array flag
 
-        node.appendChild(doc.createTextNode(string))
+        self.nodeBuf.append_u8(nodeId | isArray)
 
-        #print doc.toprettyxml(indent="  ", encoding='UTF-8')
-    return doc
+        name = node.nodeName
+        self.nodeBuf.append_u8(len(name))
+        self.pack_bits(name)
 
+        import operator
+        sorted_x = sorted(node.attributes.items(), key=operator.itemgetter(0))
+        for key, value in sorted_x:#node.attributes.items():
+            if key in ['__type', '__size', '__count']:
+                pass
+            else:
+                self.data_append_string(value)
+                self.nodeBuf.append_u8(xml_types['attr'])
+                self.nodeBuf.append_u8(len(key))
+                self.pack_bits(key)
+
+        if nodeType != 'void':
+            fmt = xml_formats[nodeId]
+
+            val = node.firstChild.nodeValue
+            if fmt['count'] != -1:
+                val = val.split(fmt.get('delimiter', ' '))
+                data = map(fmt['pType'], val)
+            else:
+                data = fmt['pType'](val)
+
+            if isArray or fmt['count'] == -1:
+                self.dataBuf.append_u32(len(data) * calcsize(fmt['type']))
+                self.dataBuf.append(data, fmt['type'], len(data))
+                # padding
+                while len(self.dataBuf) % 4:
+                    self.dataBuf.append_u8(0)
+            else:
+                self.data_append_aligned(data, fmt['type'], fmt['count'])
+
+        for child in node.childNodes:
+            if child.nodeType != child.TEXT_NODE:
+                self._node_to_binary(child)
+
+        self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64)
+
+    def from_text(self, input):
+        self.xml_doc = minidom.parseString(input)
+
+    def to_binary(self):
+        header = ByteBuffer()
+        header.append_u16(SIGNATURE)
+        header.append_u8(4 << 5) # SHIFT-JIS TODO make encoding variable
+        header.append_u8(0x7F) # TODO what does this do as 7f or ff
+        self.nodeBuf = ByteBuffer()
+        self.dataBuf = ByteBuffer()
+        self.dataByteBuf = ByteBuffer(self.dataBuf.data)
+        self.dataWordBuf = ByteBuffer(self.dataBuf.data)
+
+        for child in self.xml_doc.childNodes:
+            self._node_to_binary(child)
+
+        self.nodeBuf.append_u8(xml_types['endSection'] | 64)
+        while len(self.nodeBuf) % 4 != 0:
+            self.nodeBuf.append_u8(0)
+        header.append_u32(len(self.nodeBuf))
+        self.nodeBuf.append_u32(len(self.dataBuf))
+        return bytes(header.data + self.nodeBuf.data + self.dataBuf.data)
+
+    def to_text(self):
+        return self.xml_doc.toprettyxml(indent="    ", encoding='UTF-8')
+
+    def from_binary(self, input):
+        self.xml_doc = minidom.Document()
+        node = self.xml_doc
+
+        self.nodeBuf = ByteBuffer(input)
+        assert self.nodeBuf.get_u16() == SIGNATURE
+        encoding = encodings[(self.nodeBuf.get_u8() & 0xE0) >> 5]
+        unknown = self.nodeBuf.get_u8()
+
+        # creating bitarrays is slow, cache for speed
+        self.nodeBits = bitarray(endian='big')
+        self.nodeBits.frombytes(input)
+
+        nodeEnd = self.nodeBuf.get_u32() + 8
+        self.nodeBuf.end = nodeEnd
+
+        self.dataBuf = ByteBuffer(input, nodeEnd)
+        dataSize = self.dataBuf.get_u32()
+        # WHY MUST YOU DO THIS TO ME
+        self.dataByteBuf = ByteBuffer(input, nodeEnd)
+        self.dataWordBuf = ByteBuffer(input, nodeEnd)
+
+        nodesLeft = True
+        while nodesLeft and self.nodeBuf.hasData():
+            while self.nodeBuf.peek_u8() == 0:
+                debug_print("Skipping 0 node ID")
+                self.nodeBuf.get_u8()
+
+            nodeType = self.nodeBuf.get_u8()
+            isArray = nodeType & 64
+            nodeType &= ~64
+
+            nodeFormat = xml_formats.get(nodeType, {'name':'Unknown'})
+            debug_print('Node type is {} ({})'.format(nodeFormat['name'], nodeType))
+
+            # node name
+            name = ''
+            if nodeType != xml_types['nodeEnd'] and nodeType != xml_types['endSection']:
+                strLen = self.nodeBuf.get_u8()
+                name = self.unpack_bits(strLen)
+                debug_print(name)
+
+            skip = True
+
+            if nodeType == xml_types['attr']:
+                value = self.data_grab_string()
+                node.setAttribute(name, value)
+            elif nodeType == xml_types['nodeEnd']:
+                if node.parentNode:
+                    node = node.parentNode
+            elif nodeType == xml_types['endSection']:
+                nodesLeft = False
+            elif nodeType not in xml_formats:
+                raise NotImplementedError('Implement node {}'.format(nodeType))
+            else: # inner value to process
+                skip = False
+
+            if skip:
+                continue
+
+            child = self.xml_doc.createElement(name)
+            node.appendChild(child)
+            node = child
+
+            if nodeType == xml_types['nodeStart']:
+                continue
+
+            node.setAttribute('__type', nodeFormat['name'])
+
+            if isArray:
+                arrayCount = self.dataBuf.get_u32() / calcsize(nodeFormat['type'])
+                node.setAttribute('__count', str(arrayCount))
+            else:
+                 arrayCount = 1
+            varCount = nodeFormat['count']
+            if varCount == -1:
+                varCount = self.dataBuf.get_u32()
+            totalCount = arrayCount * varCount
+
+            delim = nodeFormat.get('delimiter', ' ')
+
+            if isArray or nodeFormat['count'] == -1:
+                data = self.dataBuf.get(nodeFormat['type'], totalCount)
+                self.dataBuf.offset += 3 # padding
+                self.dataBuf.offset &= ~0b11 # align to dword
+            else:
+                data = self.data_grab_aligned(nodeFormat['type'], totalCount)
+            string = delim.join(map(str, data))
+
+            if nodeType == xml_types['binary']:
+                node.setAttribute('__size', str(totalCount))
+                string = ''.join(('{0:02x}'.format(ord(x)) for x in string))
+            if nodeType == xml_types['string']:
+                string = string[:-1].decode('shift_jisx0213')
+
+            node.appendChild(self.xml_doc.createTextNode(string))
+
+            #print self.xml_doc.toprettyxml(indent="  ", encoding='UTF-8')
 
 if __name__ == '__main__':
-    #input = open('./dump/_core_model=KFC_J_A_A_2016121200_module=package_method=list_out.raw','rb').read()
-    #input = open('./dump/KFCmodelKFCJAA2016121200modulegame3methodcommon.raw','rb').read()
-    input = open('test.raw', 'rb').read()
-    xml = binary_to_xml(input)
-    binary = xml_to_binary(xml)
-    with open('out.raw', 'wb') as f:
-        f.write(binary)
+    if len(sys.argv) < 2:
+        print 'bin_xml.py file1 [file2 ...]'
 
-    #print [ord(x) for x in input]
-    #print [ord(x) for x in binary]
-    #print binary_to_xml_text(input)
-    print binary_to_xml_text(binary)
+    # by default, confirm the implementation is correct
+    for f in sys.argv[1:]:
+        with open(f, 'rb') as f:
+            input = f.read()
+        xml = kbinxml(input)
+        print xml.to_text()
+        try:
+            # just politely ignore the signature since we don't do encoding yet
+            assert xml.to_binary()[4:] == input[4:]
+        except AssertionError:
+            print 'Files do not match!'
+            with open('out.raw', 'wb') as f:
+                f.write(xml.to_binary())
diff --git a/bytebuffer.py b/bytebuffer.py
index 386c11c..9f1a9f0 100644
--- a/bytebuffer.py
+++ b/bytebuffer.py
@@ -2,11 +2,20 @@ from struct import *
 
 class ByteBuffer():
     def __init__(self, input = b'', offset = 0, endian = '>'):
-        self.data = input
+        if isinstance(input, bytearray):
+            self.data = input
+        else:
+            self.data = bytearray(input)
         self.endian = endian
         self.offset = offset
         self.end = len(self.data)
 
+    def _format_type(self, type, count):
+        if count is None:
+            return self.endian + type
+        else:
+            return self.endian + str(count) + type
+
     def get(self, type, count = None):
         ret = self.peek(type, count)
         size = calcsize(type)
@@ -16,19 +25,25 @@ class ByteBuffer():
         return ret
 
     def peek(self, type, count = None):
-        if count is None:
-            fmt = self.endian + type
-        else:
-            fmt = self.endian + str(count) + type
+        fmt = self._format_type(type, count)
         ret = unpack(fmt, self.data[self.offset:self.offset+calcsize(fmt)])
         return ret[0] if count is None else ret
 
-    def append(self, data, type, count = 1):
-        if count is None:
-            fmt = self.endian + type
+    def append(self, data, type, count = None):
+        fmt = self._format_type(type, count)
+        self.offset += calcsize(fmt)
+        if isinstance(data, list):
+            self.data.extend(pack(fmt, *data))
         else:
-            fmt = self.endian + str(count) + type
-        self.data += pack(fmt, data)
+            self.data.extend(pack(fmt, data))
+
+    def set(self, data, offset, type, count = None):
+        fmt = self._format_type(type, count)
+        if isinstance(data, list):
+            pack_into(fmt, self.data, offset, *data)
+        else:
+            pack_into(fmt, self.data, offset, data)
+        self.offset += calcsize(fmt)
 
     def hasData(self):
         return self.offset < self.end
@@ -62,10 +77,17 @@ def _make_append(fmt):
         return self.append(data, fmt)
     return _method
 
+def _make_set(fmt):
+    def _method(self, data, offset):
+        return self.set(data, offset, fmt)
+    return _method
+
 for name, fmt in typeMap.iteritems():
     _get = _make_get(fmt)
     _peek = _make_peek(fmt)
     _append = _make_append(fmt)
+    _set = _make_set(fmt)
     setattr(ByteBuffer, 'get_' + name, _get)
     setattr(ByteBuffer, 'peek_' + name, _peek)
     setattr(ByteBuffer, 'append_' + name, _append)
+    setattr(ByteBuffer, 'set_' + name, _set)
diff --git a/format_ids.py b/format_ids.py
index 4c14806..b3b2559 100644
--- a/format_ids.py
+++ b/format_ids.py
@@ -1,6 +1,6 @@
 
 def jisString(string):
-    return string.encode('shift_jisx0213')
+    return string.encode('shift_jisx0213') + '\0'
 
 xml_formats = {
     1  : { 'type' : None, 'count' : None, 'pType' : None,  'names' : ['void']},