From b1cb927fd2073d3f6a5622cf85e189fc04c561bc Mon Sep 17 00:00:00 2001 From: William Toohey Date: Thu, 26 Oct 2023 18:02:31 +1000 Subject: [PATCH] Remove py2 support, format code, add type annotations, convert illegal node names --- .vscode/settings.json | 3 + kbinxml/bytebuffer.py | 153 +++++++++++++++++--------- kbinxml/format_ids.py | 40 ++++--- kbinxml/kbinxml.py | 246 +++++++++++++++++++++++++----------------- kbinxml/sixbit.py | 36 ++----- kbinxml/test.py | 20 ++-- setup.py | 27 +++-- 7 files changed, 310 insertions(+), 215 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..4884f7b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "editor.formatOnSave": true +} diff --git a/kbinxml/bytebuffer.py b/kbinxml/bytebuffer.py index 500d4b4..fa2e07a 100644 --- a/kbinxml/bytebuffer.py +++ b/kbinxml/bytebuffer.py @@ -1,31 +1,33 @@ from struct import * +from typing import Any -class ByteBuffer(): - def __init__(self, input = b'', offset = 0, endian = '>'): + +class ByteBuffer: + def __init__(self, input: bytes | bytearray | str = b"", offset=0, endian=">"): # so multiple ByteBuffers can hold on to one set of underlying data # this is useful for writers in multiple locations if isinstance(input, bytearray): self.data = input else: if not isinstance(input, bytes): - input = input.encode('utf-8') + input = input.encode("utf-8") self.data = bytearray(input) self.endian = endian self.offset = offset self.end = len(self.data) - def _format_type(self, type, count): + def _format_type(self, type: str, count: int | None = None): if count is None: return self.endian + type else: return self.endian + str(count) + type - def get_bytes(self, count): + def get_bytes(self, count: int): start = self.offset self.offset += count - return self.data[start:self.offset] + return self.data[start : self.offset] - def get(self, type, count = None): + def get(self, type: str, count: int | None = None): ret = self.peek(type, count) size = calcsize(type) if count is not None: @@ -33,16 +35,16 @@ class ByteBuffer(): self.offset += size return ret - def peek(self, type, count = None): + def peek(self, type: str, count: int | None = None): fmt = self._format_type(type, count) ret = unpack_from(fmt, self.data, self.offset) return ret[0] if count is None else ret - def append_bytes(self, data): + def append_bytes(self, data: bytes): self.data.extend(data) self.offset += len(data) - def append(self, data, type, count = None): + def append(self, data: Any, type: str, count: int | None = None): fmt = self._format_type(type, count) self.offset += calcsize(fmt) try: @@ -50,7 +52,7 @@ class ByteBuffer(): except TypeError: self.data.extend(pack(fmt, data)) - def set(self, data, offset, type, count = None): + def set(self, data: Any, offset: int, type: str, count: int | None = None): fmt = self._format_type(type, count) try: pack_into(fmt, self.data, offset, *data) @@ -61,54 +63,109 @@ class ByteBuffer(): def hasData(self): return self.offset < self.end - def realign_writes(self, size = 4): + def realign_writes(self, size=4): while len(self) % size: self.append_u8(0) - def realign_reads(self, size = 4): + def realign_reads(self, size=4): while self.offset % size: self.offset += 1 def __len__(self): return len(self.data) -typeMap = { - 's8' : 'b', - 's16' : 'h', - 's32' : 'i', - 's64' : 'q', - 'u8' : 'B', - 'u16' : 'H', - 'u32' : 'I', - 'u64' : 'Q' -} + def get_s8(self) -> int: + return self.get("b") -def _make_get(fmt): - def _method(self): - return self.get(fmt) - return _method + def peek_s8(self) -> int: + return self.peek("b") -def _make_peek(fmt): - def _method(self): - return self.peek(fmt) - return _method + def append_s8(self, data: int): + return self.append(data, "b") -def _make_append(fmt): - def _method(self, data): - return self.append(data, fmt) - return _method + def set_s8(self, data: int, offset: int): + return self.set(data, offset, "b") -def _make_set(fmt): - def _method(self, data, offset): - return self.set(data, offset, fmt) - return _method + def get_s16(self) -> int: + return self.get("h") -for name, fmt in typeMap.items(): - _get = _make_get(fmt) - _peek = _make_peek(fmt) - _append = _make_append(fmt) - _set = _make_set(fmt) - setattr(ByteBuffer, 'get_' + name, _get) - setattr(ByteBuffer, 'peek_' + name, _peek) - setattr(ByteBuffer, 'append_' + name, _append) - setattr(ByteBuffer, 'set_' + name, _set) + def peek_s16(self) -> int: + return self.peek("h") + + def append_s16(self, data: int): + return self.append(data, "h") + + def set_s16(self, data: int, offset: int): + return self.set(data, offset, "h") + + def get_s32(self) -> int: + return self.get("i") + + def peek_s32(self) -> int: + return self.peek("i") + + def append_s32(self, data: int): + return self.append(data, "i") + + def set_s32(self, data: int, offset: int): + return self.set(data, offset, "i") + + def get_s64(self) -> int: + return self.get("q") + + def peek_s64(self) -> int: + return self.peek("q") + + def append_s64(self, data: int): + return self.append(data, "q") + + def set_s64(self, data: int, offset: int): + return self.set(data, offset, "q") + + def get_u8(self) -> int: + return self.get("B") + + def peek_u8(self) -> int: + return self.peek("B") + + def append_u8(self, data: int): + return self.append(data, "B") + + def set_u8(self, data: int, offset: int): + return self.set(data, offset, "B") + + def get_u16(self) -> int: + return self.get("H") + + def peek_u16(self) -> int: + return self.peek("H") + + def append_u16(self, data: int): + return self.append(data, "H") + + def set_u16(self, data: int, offset: int): + return self.set(data, offset, "H") + + def get_u32(self) -> int: + return self.get("I") + + def peek_u32(self) -> int: + return self.peek("I") + + def append_u32(self, data: int): + return self.append(data, "I") + + def set_u32(self, data: int, offset: int): + return self.set(data, offset, "I") + + def get_u64(self) -> int: + return self.get("Q") + + def peek_u64(self) -> int: + return self.peek("Q") + + def append_u64(self, data: int): + return self.append(data, "Q") + + def set_u64(self, data: int, offset: int): + return self.set(data, offset, "Q") diff --git a/kbinxml/format_ids.py b/kbinxml/format_ids.py index 9c501b7..2c6802e 100644 --- a/kbinxml/format_ids.py +++ b/kbinxml/format_ids.py @@ -1,22 +1,27 @@ from struct import pack, unpack -def parseIP(string): - bunch = map(int, string.split('.')) - # pack to bytes - p = pack('4B', *bunch) - # unpack as u16 - return unpack('>I', p)[0] -def writeIP(raw): +def parseIP(string: str) -> int: + bunch = map(int, string.split(".")) # pack to bytes - p = pack('>I', raw) + p = pack("4B", *bunch) + # unpack as u32 + return unpack(">I", p)[0] + + +def writeIP(raw: int): + # pack to bytes + p = pack(">I", raw) # unpack - return '.'.join(map(str, unpack('4B', p))) + return ".".join(map(str, unpack("4B", p))) -def writeFloat(raw): + +def writeFloat(raw: float): # this is just how floats get printed... - return '{0:.6f}'.format(raw) + return f"{raw:.6f}" + +# fmt: off xml_formats = { 1 : { 'names' : ['void']}, 2 : { 'type' : 'b', 'count' : 1, 'names' : ['s8']}, @@ -75,15 +80,16 @@ xml_formats = { 55 : { 'type' : 'b', 'count' : 4, 'names' : ['4b']}, 56 : { 'type' : 'b', 'count' : 16, 'names' : ['vb']} } +# fmt: on # little less boilerplate for writing for key, val in xml_formats.items(): - xml_formats[key]['name'] = xml_formats[key]['names'][0] + xml_formats[key]["name"] = xml_formats[key]["names"][0] -xml_types = {} +xml_types: dict[str, int] = {} for key, val in xml_formats.items(): - for n in val['names']: + for n in val["names"]: xml_types[n] = key -xml_types['nodeStart'] = 1 -xml_types['nodeEnd'] = 190 -xml_types['endSection'] = 191 +xml_types["nodeStart"] = 1 +xml_types["nodeEnd"] = 190 +xml_types["endSection"] = 191 diff --git a/kbinxml/kbinxml.py b/kbinxml/kbinxml.py index 1a28bff..37bde67 100644 --- a/kbinxml/kbinxml.py +++ b/kbinxml/kbinxml.py @@ -1,7 +1,5 @@ -# python 3 style, ints instead of b'' -from builtins import bytes +import argparse from struct import calcsize -import string import sys import operator from io import BytesIO @@ -12,8 +10,6 @@ from .bytebuffer import ByteBuffer from .sixbit import pack_sixbit, unpack_sixbit from .format_ids import xml_formats, xml_types -stdout = getattr(sys.stdout, 'buffer', sys.stdout) - DEBUG_OFFSETS = False DEBUG = False @@ -22,30 +18,42 @@ SIGNATURE = 0xA0 SIG_COMPRESSED = 0x42 SIG_UNCOMPRESSED = 0x45 -XML_ENCODING = 'UTF-8' -BIN_ENCODING = 'cp932' # windows shift-jis variant +XML_ENCODING = "UTF-8" +BIN_ENCODING = "cp932" # windows shift-jis variant # NOTE: all of these are their python codec names encoding_strings = { - 0x00: 'cp932', - 0x20: 'ASCII', - 0x40: 'ISO-8859-1', - 0x60: 'EUC_JP', - 0x80: 'cp932', - 0xA0: 'UTF-8' + 0x00: "cp932", + 0x20: "ASCII", + 0x40: "ISO-8859-1", + 0x60: "EUC_JP", + 0x80: "cp932", + 0xA0: "UTF-8", } -encoding_vals = {val : key for key, val in encoding_strings.items()} +encoding_vals = {val: key for key, val in encoding_strings.items()} # ensure that duplicated value from above is correct. Avoid exporting 0x00 type -encoding_vals['cp932'] = 0x80 +encoding_vals["cp932"] = 0x80 + def debug_print(string): if DEBUG: print(string) -class KBinXML(): - def __init__(self, input): +class KBinException(Exception): + pass + + +class KBinXML: + def __init__(self, input, convert_illegal_things=False): + """If `convert_illegal_things` is true, + - Any shift-jis string that cannot be decoded as shift-jis will + try to be decoded as utf-8 + - If a node name is invalid (for example, it starts with a number), + the name will be prefixed with an underscore + """ + self.convert_illegal_things = convert_illegal_things if isinstance(input, etree._Element): self.xml_doc = input elif isinstance(input, etree._ElementTree): @@ -55,10 +63,11 @@ class KBinXML(): else: self.from_text(input) - def to_text(self): + def to_text(self) -> str: # we decode again because I want unicode, dammit - return etree.tostring(self.xml_doc, pretty_print=True, - encoding=XML_ENCODING, xml_declaration=True).decode(XML_ENCODING) + return etree.tostring( + self.xml_doc, pretty_print=True, encoding=XML_ENCODING, xml_declaration=True + ).decode(XML_ENCODING) def from_text(self, input): self.xml_doc = etree.parse(BytesIO(input)).getroot() @@ -72,8 +81,10 @@ class KBinXML(): return False nodeBuf = ByteBuffer(input) - return (nodeBuf.get_u8() == SIGNATURE and - nodeBuf.get_u8() in (SIG_COMPRESSED, SIG_UNCOMPRESSED)) + return nodeBuf.get_u8() == SIGNATURE and nodeBuf.get_u8() in ( + SIG_COMPRESSED, + SIG_UNCOMPRESSED, + ) @property def _data_mem_size(self): @@ -81,25 +92,25 @@ class KBinXML(): data_len = 0 for e in self.xml_doc.iter(tag=etree.Element): - t = e.attrib.get('__type') + t = e.attrib.get("__type") if t is None: continue - count = e.attrib.get('__count', 1) - size = e.attrib.get('__size', 1) + count = e.attrib.get("__count", 1) + size = e.attrib.get("__size", 1) x = xml_formats[xml_types[t]] - if x['count'] > 0: - m = x['count'] * calcsize(x['type']) * count * size - elif x['name'] == 'bin': + if x["count"] > 0: + m = x["count"] * calcsize(x["type"]) * count * size + elif x["name"] == "bin": m = len(e.text) // 2 - else: # string + else: # string # null terminator space m = len(e.text.encode(self.encoding)) + 1 if m <= 4: continue - if x['name'] == 'bin': + if x["name"] == "bin": data_len += (m + 1) & ~1 else: data_len += (m + 3) & ~3 @@ -107,7 +118,7 @@ class KBinXML(): @property def mem_size(self): - '''used when allocating memory ingame''' + """used when allocating memory ingame""" data_len = self._data_mem_size node_count = len(list(self.xml_doc.iter(tag=etree.Element))) @@ -124,7 +135,7 @@ class KBinXML(): size = 56 * node_count + data_len + 630 + tags_len # debugging - #print('nodes:{} ({}) data:{} ({})'.format(node_count,hex(node_count), data_len, hex(data_len))) + # print('nodes:{} ({}) data:{} ({})'.format(node_count,hex(node_count), data_len, hex(data_len))) return (size + 8) & ~7 @@ -144,19 +155,27 @@ class KBinXML(): data = bytes(data[:-1]) try: return data.decode(self.encoding) - except UnicodeDecodeError: - if self.encoding == 'cp932': + except UnicodeDecodeError as e: + if self.encoding == "cp932": + if not self.convert_illegal_things: + raise KBinException( + f"Could not decode string. To force utf8 decode {convert_illegal_help}." + ) from e + # having to do this kinda sucks, but it's better than just giving up - print("KBinXML: Malformed Shift-JIS string found, attempting UTF-8 decode", file=sys.stderr) + print( + "KBinXML: Malformed Shift-JIS string found, attempting UTF-8 decode", + file=sys.stderr, + ) print("KBinXML: Raw string data:", data, file=sys.stderr) - return data.decode('utf8') + return data.decode("utf8") else: # in the unlikely event of malformed data that isn't shift-jis, # fix it later raise def data_append_string(self, string): - string = bytes(string.encode(self.encoding) + b'\0') + string = bytes(string.encode(self.encoding) + b"\0") self.data_append_auto(string) # has its own separate state and other assorted garbage @@ -209,8 +228,8 @@ class KBinXML(): self.nodeBuf.append_bytes(enc) def _add_namespace(self, node, name, value): - ''' Add a namespace (xmlns) to an existing node. Returns the new node to - work with ''' + """Add a namespace (xmlns) to an existing node. Returns the new node to + work with""" # I wish this worked, but we need to specifiy it in the constructor # node.nsmap[name] = value @@ -226,64 +245,64 @@ class KBinXML(): return node def _node_to_binary(self, node): - nodeType = node.attrib.get('__type') + nodeType = node.attrib.get("__type") if not nodeType: # typeless tags with text become string if node.text is not None and len(node.text.strip()) > 0: - nodeType = 'str' + nodeType = "str" else: - nodeType = 'void' + nodeType = "void" nodeId = xml_types[nodeType] isArray = 0 - count = node.attrib.get('__count') + count = node.attrib.get("__count") if count: count = int(count) - isArray = 64 # bit position for array flag + isArray = 64 # bit position for array flag self.nodeBuf.append_u8(nodeId | isArray) name = node.tag self.append_node_name(name) - if nodeType != 'void': + if nodeType != "void": fmt = xml_formats[nodeId] val = node.text - if fmt['name'] == 'bin': + if fmt["name"] == "bin": data = bytes(bytearray.fromhex(val)) - elif fmt['name'] == 'str': - if val is None: # empty string - val = '' - data = bytes(val.encode(self.encoding, 'replace') + b'\0') + elif fmt["name"] == "str": + if val is None: # empty string + val = "" + data = bytes(val.encode(self.encoding, "replace") + b"\0") else: - val = val.split(' ') - data = list(map(fmt.get('fromStr', int), val)) - if count and len(data) / fmt['count'] != count: - raise ValueError('Array length does not match __count attribute') + val = val.split(" ") + data = list(map(fmt.get("fromStr", int), val)) + if count and len(data) / fmt["count"] != count: + raise ValueError("Array length does not match __count attribute") - if isArray or fmt['count'] == -1: - self.dataBuf.append_u32(len(data) * calcsize(fmt['type'])) - self.dataBuf.append(data, fmt['type'], len(data)) + if isArray or fmt["count"] == -1: + self.dataBuf.append_u32(len(data) * calcsize(fmt["type"])) + self.dataBuf.append(data, fmt["type"], len(data)) self.dataBuf.realign_writes() else: - self.data_append_aligned(data, fmt['type'], fmt['count']) + self.data_append_aligned(data, fmt["type"], fmt["count"]) # for test consistency and to be more faithful, sort the attrs sorted_attrs = sorted(node.attrib.items(), key=operator.itemgetter(0)) for key, value in sorted_attrs: - if key not in ['__type', '__size', '__count']: + if key not in ["__type", "__size", "__count"]: self.data_append_string(value) - self.nodeBuf.append_u8(xml_types['attr']) + self.nodeBuf.append_u8(xml_types["attr"]) self.append_node_name(key) for child in node.iterchildren(tag=etree.Element): self._node_to_binary(child) # always has the isArray bit set - self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64) + self.nodeBuf.append_u8(xml_types["nodeEnd"] | 64) - def to_binary(self, encoding = BIN_ENCODING, compressed = True): + def to_binary(self, encoding=BIN_ENCODING, compressed=True): self.encoding = encoding self.compressed = compressed @@ -304,7 +323,7 @@ class KBinXML(): self._node_to_binary(self.xml_doc) # always has the isArray bit set - self.nodeBuf.append_u8(xml_types['endSection'] | 64) + self.nodeBuf.append_u8(xml_types["endSection"] | 64) self.nodeBuf.realign_writes() header.append_u32(len(self.nodeBuf)) self.dataSize = len(self.dataBuf) @@ -312,7 +331,7 @@ class KBinXML(): return bytes(header.data + self.nodeBuf.data + self.dataBuf.data) def from_binary(self, input): - self.xml_doc = etree.Element('root') + self.xml_doc = etree.Element("root") node = self.xml_doc self.nodeBuf = ByteBuffer(input) @@ -345,12 +364,12 @@ class KBinXML(): isArray = nodeType & 64 nodeType &= ~64 - nodeFormat = xml_formats.get(nodeType, {'name':'Unknown'}) - debug_print('Node type is {} ({})'.format(nodeFormat['name'], nodeType)) + nodeFormat = xml_formats.get(nodeType, {"name": "Unknown"}) + debug_print("Node type is {} ({})".format(nodeFormat["name"], nodeType)) # node or attribute name - name = '' - if nodeType != xml_types['nodeEnd'] and nodeType != xml_types['endSection']: + name = "" + if nodeType != xml_types["nodeEnd"] and nodeType != xml_types["endSection"]: if self.compressed: name = unpack_sixbit(self.nodeBuf) else: @@ -361,83 +380,110 @@ class KBinXML(): skip = True - if nodeType == xml_types['attr']: + if nodeType == xml_types["attr"]: value = self.data_grab_string() # because someone thought it was a good idea to serialise namespaces - if name.startswith('xmlns:'): - _, name = name.split('xmlns:') + if name.startswith("xmlns:"): + _, name = name.split("xmlns:") node = self._add_namespace(node, name, value) - elif ':' in name: - prefix, name = name.split(':') + elif ":" in name: + prefix, name = name.split(":") # if this fails, the xml is invalid. Open an issue. node.set(etree.QName(node.nsmap[prefix], name), value) # this is the case you'll get in 99% of places else: node.attrib[name] = value - elif nodeType == xml_types['nodeEnd']: + elif nodeType == xml_types["nodeEnd"]: if node.getparent() is not None: node = node.getparent() - elif nodeType == xml_types['endSection']: + elif nodeType == xml_types["endSection"]: nodesLeft = False elif nodeType not in xml_formats: - raise NotImplementedError('Implement node {}'.format(nodeType)) - else: # inner value to process + raise NotImplementedError("Implement node {}".format(nodeType)) + else: # inner value to process skip = False if skip: continue - child = etree.SubElement(node, name) + try: + child = etree.SubElement(node, name) + except ValueError as e: + fixed_name = f"_{name}" + if self.convert_illegal_things: + # todo: there are other invalid node names. Fix them when you see them. + child = etree.SubElement(node, fixed_name) + else: + raise KBinException( + f'Could not create node with name "{name}". To rename it to "{fixed_name}", {convert_illegal_help}.' + ) from e node = child - if nodeType == xml_types['nodeStart']: + if nodeType == xml_types["nodeStart"]: continue - node.attrib['__type'] = nodeFormat['name'] + node.attrib["__type"] = nodeFormat["name"] - varCount = nodeFormat['count'] + varCount = nodeFormat["count"] arrayCount = 1 - if varCount == -1: # the 2 cannot be combined + if varCount == -1: # the 2 cannot be combined varCount = self.dataBuf.get_u32() isArray = True elif isArray: - arrayCount = self.dataBuf.get_u32() // (calcsize(nodeFormat['type'] * varCount)) - node.attrib['__count'] = str(arrayCount) + arrayCount = self.dataBuf.get_u32() // ( + calcsize(nodeFormat["type"] * varCount) + ) + node.attrib["__count"] = str(arrayCount) totalCount = arrayCount * varCount if isArray: - data = self.dataBuf.get(nodeFormat['type'], totalCount) + data = self.dataBuf.get(nodeFormat["type"], totalCount) self.dataBuf.realign_reads() else: - data = self.data_grab_aligned(nodeFormat['type'], totalCount) + data = self.data_grab_aligned(nodeFormat["type"], totalCount) - if nodeType == xml_types['binary']: - node.attrib['__size'] = str(totalCount) - string = ''.join(('{0:02x}'.format(x) for x in data)) - elif nodeType == xml_types['string']: + if nodeType == xml_types["binary"]: + node.attrib["__size"] = str(totalCount) + string = "".join(("{0:02x}".format(x) for x in data)) + elif nodeType == xml_types["string"]: string = bytes(data[:-1]).decode(self.encoding) else: - string = ' '.join(map(nodeFormat.get('toStr', str), data)) + string = " ".join(map(nodeFormat.get("toStr", str), data)) # some strings have extra NUL bytes, compatible behaviour is to strip - node.text = string.strip('\0') + node.text = string.strip("\0") # because we need the 'real' root self.xml_doc = self.xml_doc[0] -def main(): - if len(sys.argv) != 2: - print('bin_xml.py file.[xml/bin]') - exit() - with open(sys.argv[1], 'rb') as f: +convert_illegal_help = "set convert_illegal_things=True in the KBinXML constructor" + + +def main(): + # interestingly, this doesn't work if added inside the + # `if __name__ == "__main__"` branch + global convert_illegal_help + convert_illegal_help = "add the --convert-illegal flag" + + parser = argparse.ArgumentParser( + prog="kbinxml", description="Convert kbin to xml, or xml to kbin" + ) + parser.add_argument("filename", metavar="file.[xml/bin]") + parser.add_argument("--convert-illegal", action="store_true") + + args = parser.parse_args() + + with open(args.filename, "rb") as f: input = f.read() - xml = KBinXML(input) + xml = KBinXML(input, convert_illegal_things=args.convert_illegal) + stdout = getattr(sys.stdout, "buffer", sys.stdout) if KBinXML.is_binary_xml(input): - stdout.write(xml.to_text().encode('utf-8')) + stdout.write(xml.to_text().encode("utf-8")) else: stdout.write(xml.to_binary()) -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/kbinxml/sixbit.py b/kbinxml/sixbit.py index 28bce32..00b71f5 100644 --- a/kbinxml/sixbit.py +++ b/kbinxml/sixbit.py @@ -1,28 +1,13 @@ -# python 3 style, ints instead of b'' -from builtins import bytes as newbytes - -def py2_int_to_bytes(n, length): - h = '%x' % n - s = ('0'*(len(h) % 2) + h).zfill(length*2).decode('hex') - return newbytes(s) - -try: - # python 3 - int.from_bytes - int_from_bytes = lambda b : int.from_bytes(b, byteorder='big') - int_to_bytes = lambda i, length : i.to_bytes(length, byteorder='big') -except AttributeError: - # python 2 - int_from_bytes = lambda b : int(bytes(b).encode('hex'), 16) - int_to_bytes = py2_int_to_bytes +from kbinxml.bytebuffer import ByteBuffer -charmap = '0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz' -bytemap = {charmap[i] : i for i in range(len(charmap))} +charmap = "0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz" +bytemap = {c: i for i, c in enumerate(charmap)} -def pack_sixbit(string, byteBuf): + +def pack_sixbit(string: str, byteBuf: ByteBuffer): chars = [bytemap[x] for x in string] - padding = 8 - (len(string)*6 % 8) + padding = 8 - (len(string) * 6 % 8) if padding == 8: padding = 0 bits = 0 @@ -30,21 +15,22 @@ def pack_sixbit(string, byteBuf): bits <<= 6 bits |= c bits <<= padding - data = int_to_bytes(bits, (len(string)*6 + padding) // 8) + data = bits.to_bytes((len(string) * 6 + padding) // 8, byteorder="big") byteBuf.append_bytes((len(string),)) byteBuf.append_bytes(data) -def unpack_sixbit(byteBuf): + +def unpack_sixbit(byteBuf: ByteBuffer): length = byteBuf.get_u8() length_bits = length * 6 length_bytes = (length_bits + 7) // 8 padding = 8 - (length_bits % 8) if padding == 8: padding = 0 - bits = int_from_bytes(byteBuf.get_bytes(length_bytes)) + bits = int.from_bytes(byteBuf.get_bytes(length_bytes), byteorder="big") bits >>= padding result = [] for _ in range(length): result.append(bits & 0b111111) bits >>= 6 - return ''.join([charmap[x] for x in result[::-1]]) + return "".join([charmap[x] for x in result[::-1]]) diff --git a/kbinxml/test.py b/kbinxml/test.py index e5f13c1..d76ea9d 100644 --- a/kbinxml/test.py +++ b/kbinxml/test.py @@ -1,28 +1,26 @@ from .kbinxml import KBinXML -# python 2/3 cross compat -from io import open -with open('testcases.xml', 'rb') as f: +with open("testcases.xml", "rb") as f: xml_in = f.read() -with open('testcases_out.xml', 'r', encoding='UTF-8') as f: +with open("testcases_out.xml", "r", encoding="UTF-8") as f: expected_xml = f.read() -with open('testcases_out.kbin', 'rb') as f: +with open("testcases_out.kbin", "rb") as f: expected_bin = f.read() k = KBinXML(xml_in) kbin = k.to_binary() if kbin != expected_bin: - with open('failed_test.kbin', 'wb') as f: + with open("failed_test.kbin", "wb") as f: f.write(kbin) - raise AssertionError('Binary output does not match, check failed_test.kbin') + raise AssertionError("Binary output does not match, check failed_test.kbin") else: - print('XML -> Binary correct!') + print("XML -> Binary correct!") backwards = KBinXML(kbin) btext = backwards.to_text() if btext != expected_xml: - with open('failed_test.xml', 'w', encoding='UTF-8') as f: + with open("failed_test.xml", "w", encoding="UTF-8") as f: f.write(btext) - raise AssertionError('XML putput does not match, check failed_test.xml') + raise AssertionError("XML putput does not match, check failed_test.xml") else: - print('Binary -> XML correct!') + print("Binary -> XML correct!") diff --git a/setup.py b/setup.py index 92ec09c..a943199 100644 --- a/setup.py +++ b/setup.py @@ -1,26 +1,25 @@ from setuptools import setup -import sys requires = [ - 'lxml', + "lxml", ] -if sys.version_info < (3,0): - requires.append('future') -version = '1.7' +python_requires = ">=3.10" + +version = "2.0" setup( - name='kbinxml', + name="kbinxml", description="Decoder/encoder for Konami's binary XML format", long_description="See Github for up to date documentation", version=version, - entry_points = { - 'console_scripts': ['kbinxml=kbinxml:main'], + entry_points={ + "console_scripts": ["kbinxml=kbinxml:main"], }, - packages=['kbinxml'], - url='https://github.com/mon/kbinxml/', - download_url = 'https://github.com/mon/kbinxml/archive/{}.tar.gz'.format(version), - author='mon', - author_email='me@mon.im', - install_requires=requires + packages=["kbinxml"], + url="https://github.com/mon/kbinxml/", + download_url="https://github.com/mon/kbinxml/archive/{}.tar.gz".format(version), + author="mon", + author_email="me@mon.im", + install_requires=requires, )