mirror of
https://github.com/mon/kbinxml.git
synced 2026-03-22 10:25:31 -05:00
Compare commits
No commits in common. "master" and "1.6" have entirely different histories.
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
|
@ -1,3 +0,0 @@
|
|||
{
|
||||
"editor.formatOnSave": true
|
||||
}
|
||||
|
|
@ -1,33 +1,31 @@
|
|||
from struct import *
|
||||
from typing import Any
|
||||
|
||||
|
||||
class ByteBuffer:
|
||||
def __init__(self, input: bytes | bytearray | str = b"", offset=0, endian=">"):
|
||||
class ByteBuffer():
|
||||
def __init__(self, input = b'', offset = 0, endian = '>'):
|
||||
# so multiple ByteBuffers can hold on to one set of underlying data
|
||||
# this is useful for writers in multiple locations
|
||||
if isinstance(input, bytearray):
|
||||
self.data = input
|
||||
else:
|
||||
if not isinstance(input, bytes):
|
||||
input = input.encode("utf-8")
|
||||
input = input.encode('utf-8')
|
||||
self.data = bytearray(input)
|
||||
self.endian = endian
|
||||
self.offset = offset
|
||||
self.end = len(self.data)
|
||||
|
||||
def _format_type(self, type: str, count: int | None = None):
|
||||
def _format_type(self, type, count):
|
||||
if count is None:
|
||||
return self.endian + type
|
||||
else:
|
||||
return self.endian + str(count) + type
|
||||
|
||||
def get_bytes(self, count: int):
|
||||
def get_bytes(self, count):
|
||||
start = self.offset
|
||||
self.offset += count
|
||||
return self.data[start : self.offset]
|
||||
return self.data[start:self.offset]
|
||||
|
||||
def get(self, type: str, count: int | None = None):
|
||||
def get(self, type, count = None):
|
||||
ret = self.peek(type, count)
|
||||
size = calcsize(type)
|
||||
if count is not None:
|
||||
|
|
@ -35,16 +33,16 @@ class ByteBuffer:
|
|||
self.offset += size
|
||||
return ret
|
||||
|
||||
def peek(self, type: str, count: int | None = None):
|
||||
def peek(self, type, count = None):
|
||||
fmt = self._format_type(type, count)
|
||||
ret = unpack_from(fmt, self.data, self.offset)
|
||||
return ret[0] if count is None else ret
|
||||
|
||||
def append_bytes(self, data: bytes):
|
||||
def append_bytes(self, data):
|
||||
self.data.extend(data)
|
||||
self.offset += len(data)
|
||||
|
||||
def append(self, data: Any, type: str, count: int | None = None):
|
||||
def append(self, data, type, count = None):
|
||||
fmt = self._format_type(type, count)
|
||||
self.offset += calcsize(fmt)
|
||||
try:
|
||||
|
|
@ -52,7 +50,7 @@ class ByteBuffer:
|
|||
except TypeError:
|
||||
self.data.extend(pack(fmt, data))
|
||||
|
||||
def set(self, data: Any, offset: int, type: str, count: int | None = None):
|
||||
def set(self, data, offset, type, count = None):
|
||||
fmt = self._format_type(type, count)
|
||||
try:
|
||||
pack_into(fmt, self.data, offset, *data)
|
||||
|
|
@ -63,109 +61,54 @@ class ByteBuffer:
|
|||
def hasData(self):
|
||||
return self.offset < self.end
|
||||
|
||||
def realign_writes(self, size=4):
|
||||
def realign_writes(self, size = 4):
|
||||
while len(self) % size:
|
||||
self.append_u8(0)
|
||||
|
||||
def realign_reads(self, size=4):
|
||||
def realign_reads(self, size = 4):
|
||||
while self.offset % size:
|
||||
self.offset += 1
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def get_s8(self) -> int:
|
||||
return self.get("b")
|
||||
typeMap = {
|
||||
's8' : 'b',
|
||||
's16' : 'h',
|
||||
's32' : 'i',
|
||||
's64' : 'q',
|
||||
'u8' : 'B',
|
||||
'u16' : 'H',
|
||||
'u32' : 'I',
|
||||
'u64' : 'Q'
|
||||
}
|
||||
|
||||
def peek_s8(self) -> int:
|
||||
return self.peek("b")
|
||||
def _make_get(fmt):
|
||||
def _method(self):
|
||||
return self.get(fmt)
|
||||
return _method
|
||||
|
||||
def append_s8(self, data: int):
|
||||
return self.append(data, "b")
|
||||
def _make_peek(fmt):
|
||||
def _method(self):
|
||||
return self.peek(fmt)
|
||||
return _method
|
||||
|
||||
def set_s8(self, data: int, offset: int):
|
||||
return self.set(data, offset, "b")
|
||||
def _make_append(fmt):
|
||||
def _method(self, data):
|
||||
return self.append(data, fmt)
|
||||
return _method
|
||||
|
||||
def get_s16(self) -> int:
|
||||
return self.get("h")
|
||||
def _make_set(fmt):
|
||||
def _method(self, data, offset):
|
||||
return self.set(data, offset, fmt)
|
||||
return _method
|
||||
|
||||
def peek_s16(self) -> int:
|
||||
return self.peek("h")
|
||||
|
||||
def append_s16(self, data: int):
|
||||
return self.append(data, "h")
|
||||
|
||||
def set_s16(self, data: int, offset: int):
|
||||
return self.set(data, offset, "h")
|
||||
|
||||
def get_s32(self) -> int:
|
||||
return self.get("i")
|
||||
|
||||
def peek_s32(self) -> int:
|
||||
return self.peek("i")
|
||||
|
||||
def append_s32(self, data: int):
|
||||
return self.append(data, "i")
|
||||
|
||||
def set_s32(self, data: int, offset: int):
|
||||
return self.set(data, offset, "i")
|
||||
|
||||
def get_s64(self) -> int:
|
||||
return self.get("q")
|
||||
|
||||
def peek_s64(self) -> int:
|
||||
return self.peek("q")
|
||||
|
||||
def append_s64(self, data: int):
|
||||
return self.append(data, "q")
|
||||
|
||||
def set_s64(self, data: int, offset: int):
|
||||
return self.set(data, offset, "q")
|
||||
|
||||
def get_u8(self) -> int:
|
||||
return self.get("B")
|
||||
|
||||
def peek_u8(self) -> int:
|
||||
return self.peek("B")
|
||||
|
||||
def append_u8(self, data: int):
|
||||
return self.append(data, "B")
|
||||
|
||||
def set_u8(self, data: int, offset: int):
|
||||
return self.set(data, offset, "B")
|
||||
|
||||
def get_u16(self) -> int:
|
||||
return self.get("H")
|
||||
|
||||
def peek_u16(self) -> int:
|
||||
return self.peek("H")
|
||||
|
||||
def append_u16(self, data: int):
|
||||
return self.append(data, "H")
|
||||
|
||||
def set_u16(self, data: int, offset: int):
|
||||
return self.set(data, offset, "H")
|
||||
|
||||
def get_u32(self) -> int:
|
||||
return self.get("I")
|
||||
|
||||
def peek_u32(self) -> int:
|
||||
return self.peek("I")
|
||||
|
||||
def append_u32(self, data: int):
|
||||
return self.append(data, "I")
|
||||
|
||||
def set_u32(self, data: int, offset: int):
|
||||
return self.set(data, offset, "I")
|
||||
|
||||
def get_u64(self) -> int:
|
||||
return self.get("Q")
|
||||
|
||||
def peek_u64(self) -> int:
|
||||
return self.peek("Q")
|
||||
|
||||
def append_u64(self, data: int):
|
||||
return self.append(data, "Q")
|
||||
|
||||
def set_u64(self, data: int, offset: int):
|
||||
return self.set(data, offset, "Q")
|
||||
for name, fmt in typeMap.items():
|
||||
_get = _make_get(fmt)
|
||||
_peek = _make_peek(fmt)
|
||||
_append = _make_append(fmt)
|
||||
_set = _make_set(fmt)
|
||||
setattr(ByteBuffer, 'get_' + name, _get)
|
||||
setattr(ByteBuffer, 'peek_' + name, _peek)
|
||||
setattr(ByteBuffer, 'append_' + name, _append)
|
||||
setattr(ByteBuffer, 'set_' + name, _set)
|
||||
|
|
|
|||
|
|
@ -1,27 +1,22 @@
|
|||
from struct import pack, unpack
|
||||
|
||||
|
||||
def parseIP(string: str) -> int:
|
||||
bunch = map(int, string.split("."))
|
||||
def parseIP(string):
|
||||
bunch = map(int, string.split('.'))
|
||||
# pack to bytes
|
||||
p = pack("4B", *bunch)
|
||||
# unpack as u32
|
||||
return unpack(">I", p)[0]
|
||||
p = pack('4B', *bunch)
|
||||
# unpack as u16
|
||||
return unpack('>I', p)[0]
|
||||
|
||||
|
||||
def writeIP(raw: int):
|
||||
def writeIP(raw):
|
||||
# pack to bytes
|
||||
p = pack(">I", raw)
|
||||
p = pack('>I', raw)
|
||||
# unpack
|
||||
return ".".join(map(str, unpack("4B", p)))
|
||||
return '.'.join(map(str, unpack('4B', p)))
|
||||
|
||||
|
||||
def writeFloat(raw: float):
|
||||
def writeFloat(raw):
|
||||
# this is just how floats get printed...
|
||||
return f"{raw:.6f}"
|
||||
return '{0:.6f}'.format(raw)
|
||||
|
||||
|
||||
# fmt: off
|
||||
xml_formats = {
|
||||
1 : { 'names' : ['void']},
|
||||
2 : { 'type' : 'b', 'count' : 1, 'names' : ['s8']},
|
||||
|
|
@ -80,16 +75,15 @@ xml_formats = {
|
|||
55 : { 'type' : 'b', 'count' : 4, 'names' : ['4b']},
|
||||
56 : { 'type' : 'b', 'count' : 16, 'names' : ['vb']}
|
||||
}
|
||||
# fmt: on
|
||||
|
||||
# little less boilerplate for writing
|
||||
for key, val in xml_formats.items():
|
||||
xml_formats[key]["name"] = xml_formats[key]["names"][0]
|
||||
xml_formats[key]['name'] = xml_formats[key]['names'][0]
|
||||
|
||||
xml_types: dict[str, int] = {}
|
||||
xml_types = {}
|
||||
for key, val in xml_formats.items():
|
||||
for n in val["names"]:
|
||||
for n in val['names']:
|
||||
xml_types[n] = key
|
||||
xml_types["nodeStart"] = 1
|
||||
xml_types["nodeEnd"] = 190
|
||||
xml_types["endSection"] = 191
|
||||
xml_types['nodeStart'] = 1
|
||||
xml_types['nodeEnd'] = 190
|
||||
xml_types['endSection'] = 191
|
||||
|
|
|
|||
|
|
@ -1,14 +1,18 @@
|
|||
import argparse
|
||||
import operator
|
||||
import sys
|
||||
from io import BytesIO
|
||||
# python 3 style, ints instead of b''
|
||||
from builtins import bytes
|
||||
from struct import calcsize
|
||||
import string
|
||||
import sys
|
||||
import operator
|
||||
from io import BytesIO
|
||||
|
||||
import lxml.etree as etree
|
||||
|
||||
from .bytebuffer import ByteBuffer
|
||||
from .format_ids import xml_formats, xml_types
|
||||
from .sixbit import pack_sixbit, unpack_sixbit
|
||||
from .format_ids import xml_formats, xml_types
|
||||
|
||||
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
|
||||
|
||||
DEBUG_OFFSETS = False
|
||||
DEBUG = False
|
||||
|
|
@ -18,42 +22,27 @@ SIGNATURE = 0xA0
|
|||
SIG_COMPRESSED = 0x42
|
||||
SIG_UNCOMPRESSED = 0x45
|
||||
|
||||
XML_ENCODING = "UTF-8"
|
||||
BIN_ENCODING = "cp932" # windows shift-jis variant
|
||||
XML_ENCODING = 'UTF-8'
|
||||
BIN_ENCODING = 'cp932' # windows shift-jis variant
|
||||
|
||||
# NOTE: all of these are their python codec names
|
||||
encoding_strings = {
|
||||
0x00: "cp932",
|
||||
0x20: "ASCII",
|
||||
0x40: "ISO-8859-1",
|
||||
0x60: "EUC_JP",
|
||||
0x80: "cp932",
|
||||
0xA0: "UTF-8",
|
||||
0x20: 'ASCII',
|
||||
0x00: 'ISO-8859-1',
|
||||
0x60: 'EUC_JP',
|
||||
0x80: 'cp932',
|
||||
0xA0: 'UTF-8'
|
||||
}
|
||||
|
||||
encoding_vals = {val: key for key, val in encoding_strings.items()}
|
||||
# ensure that duplicated value from above is correct. Avoid exporting 0x00 type
|
||||
encoding_vals["cp932"] = 0x80
|
||||
|
||||
encoding_vals = {val : key for key, val in encoding_strings.items()}
|
||||
|
||||
def debug_print(string):
|
||||
if DEBUG:
|
||||
print(string)
|
||||
|
||||
class KBinXML():
|
||||
|
||||
class KBinException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class KBinXML:
|
||||
def __init__(self, input, convert_illegal_things=False):
|
||||
"""If `convert_illegal_things` is true,
|
||||
- Any shift-jis string that cannot be decoded as shift-jis will
|
||||
try to be decoded as utf-8
|
||||
- If a node name is invalid (for example, it starts with a number),
|
||||
the name will be prefixed with an underscore
|
||||
"""
|
||||
self.convert_illegal_things = convert_illegal_things
|
||||
def __init__(self, input):
|
||||
if isinstance(input, etree._Element):
|
||||
self.xml_doc = input
|
||||
elif isinstance(input, etree._ElementTree):
|
||||
|
|
@ -63,11 +52,10 @@ class KBinXML:
|
|||
else:
|
||||
self.from_text(input)
|
||||
|
||||
def to_text(self) -> str:
|
||||
def to_text(self):
|
||||
# we decode again because I want unicode, dammit
|
||||
return etree.tostring(
|
||||
self.xml_doc, pretty_print=True, encoding=XML_ENCODING, xml_declaration=True
|
||||
).decode(XML_ENCODING)
|
||||
return etree.tostring(self.xml_doc, pretty_print=True,
|
||||
encoding=XML_ENCODING, xml_declaration=True).decode(XML_ENCODING)
|
||||
|
||||
def from_text(self, input):
|
||||
self.xml_doc = etree.parse(BytesIO(input)).getroot()
|
||||
|
|
@ -77,14 +65,9 @@ class KBinXML:
|
|||
|
||||
@staticmethod
|
||||
def is_binary_xml(input):
|
||||
if len(input) < 2:
|
||||
return False
|
||||
|
||||
nodeBuf = ByteBuffer(input)
|
||||
return nodeBuf.get_u8() == SIGNATURE and nodeBuf.get_u8() in (
|
||||
SIG_COMPRESSED,
|
||||
SIG_UNCOMPRESSED,
|
||||
)
|
||||
return (nodeBuf.get_u8() == SIGNATURE and
|
||||
nodeBuf.get_u8() in (SIG_COMPRESSED, SIG_UNCOMPRESSED))
|
||||
|
||||
@property
|
||||
def _data_mem_size(self):
|
||||
|
|
@ -92,25 +75,25 @@ class KBinXML:
|
|||
|
||||
data_len = 0
|
||||
for e in self.xml_doc.iter(tag=etree.Element):
|
||||
t = e.attrib.get("__type")
|
||||
t = e.attrib.get('__type')
|
||||
if t is None:
|
||||
continue
|
||||
|
||||
count = e.attrib.get("__count", 1)
|
||||
size = e.attrib.get("__size", 1)
|
||||
count = e.attrib.get('__count', 1)
|
||||
size = e.attrib.get('__size', 1)
|
||||
x = xml_formats[xml_types[t]]
|
||||
if x["count"] > 0:
|
||||
m = x["count"] * calcsize(x["type"]) * count * size
|
||||
elif x["name"] == "bin":
|
||||
if x['count'] > 0:
|
||||
m = x['count'] * calcsize(x['type']) * count * size
|
||||
elif x['name'] == 'bin':
|
||||
m = len(e.text) // 2
|
||||
else: # string
|
||||
else: # string
|
||||
# null terminator space
|
||||
m = len(e.text.encode(self.encoding)) + 1
|
||||
|
||||
if m <= 4:
|
||||
continue
|
||||
|
||||
if x["name"] == "bin":
|
||||
if x['name'] == 'bin':
|
||||
data_len += (m + 1) & ~1
|
||||
else:
|
||||
data_len += (m + 3) & ~3
|
||||
|
|
@ -118,7 +101,7 @@ class KBinXML:
|
|||
|
||||
@property
|
||||
def mem_size(self):
|
||||
"""used when allocating memory ingame"""
|
||||
'''used when allocating memory ingame'''
|
||||
|
||||
data_len = self._data_mem_size
|
||||
node_count = len(list(self.xml_doc.iter(tag=etree.Element)))
|
||||
|
|
@ -135,7 +118,7 @@ class KBinXML:
|
|||
size = 56 * node_count + data_len + 630 + tags_len
|
||||
|
||||
# debugging
|
||||
# print('nodes:{} ({}) data:{} ({})'.format(node_count,hex(node_count), data_len, hex(data_len)))
|
||||
#print('nodes:{} ({}) data:{} ({})'.format(node_count,hex(node_count), data_len, hex(data_len)))
|
||||
|
||||
return (size + 8) & ~7
|
||||
|
||||
|
|
@ -152,30 +135,10 @@ class KBinXML:
|
|||
|
||||
def data_grab_string(self):
|
||||
data = self.data_grab_auto()
|
||||
data = bytes(data[:-1])
|
||||
try:
|
||||
return data.decode(self.encoding)
|
||||
except UnicodeDecodeError as e:
|
||||
if self.encoding == "cp932":
|
||||
if not self.convert_illegal_things:
|
||||
raise KBinException(
|
||||
f"Could not decode string. To force utf8 decode {convert_illegal_help}."
|
||||
) from e
|
||||
|
||||
# having to do this kinda sucks, but it's better than just giving up
|
||||
print(
|
||||
"KBinXML: Malformed Shift-JIS string found, attempting UTF-8 decode",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print("KBinXML: Raw string data:", data, file=sys.stderr)
|
||||
return data.decode("utf8")
|
||||
else:
|
||||
# in the unlikely event of malformed data that isn't shift-jis,
|
||||
# fix it later
|
||||
raise
|
||||
return bytes(data[:-1]).decode(self.encoding)
|
||||
|
||||
def data_append_string(self, string):
|
||||
string = bytes(string.encode(self.encoding) + b"\0")
|
||||
string = bytes(string.encode(self.encoding) + b'\0')
|
||||
self.data_append_auto(string)
|
||||
|
||||
# has its own separate state and other assorted garbage
|
||||
|
|
@ -227,82 +190,65 @@ class KBinXML:
|
|||
self.nodeBuf.append_u8((len(enc) - 1) | 64)
|
||||
self.nodeBuf.append_bytes(enc)
|
||||
|
||||
def _add_namespace(self, node, name, value):
|
||||
"""Add a namespace (xmlns) to an existing node. Returns the new node to
|
||||
work with"""
|
||||
|
||||
# I wish this worked, but we need to specifiy it in the constructor
|
||||
# node.nsmap[name] = value
|
||||
ns = node.nsmap
|
||||
ns[name] = value
|
||||
old_node = node
|
||||
node = etree.Element(old_node.tag, nsmap=ns)
|
||||
node[:] = old_node[:]
|
||||
parent = old_node.getparent()
|
||||
if parent is not None:
|
||||
parent.remove(old_node)
|
||||
parent.append(node)
|
||||
return node
|
||||
|
||||
def _node_to_binary(self, node):
|
||||
nodeType = node.attrib.get("__type")
|
||||
nodeType = node.attrib.get('__type')
|
||||
if not nodeType:
|
||||
# typeless tags with text become string
|
||||
if node.text is not None and len(node.text.strip()) > 0:
|
||||
nodeType = "str"
|
||||
nodeType = 'str'
|
||||
else:
|
||||
nodeType = "void"
|
||||
nodeType = 'void'
|
||||
nodeId = xml_types[nodeType]
|
||||
|
||||
isArray = 0
|
||||
count = node.attrib.get("__count")
|
||||
count = node.attrib.get('__count')
|
||||
if count:
|
||||
count = int(count)
|
||||
isArray = 64 # bit position for array flag
|
||||
isArray = 64 # bit position for array flag
|
||||
|
||||
self.nodeBuf.append_u8(nodeId | isArray)
|
||||
|
||||
name = node.tag
|
||||
self.append_node_name(name)
|
||||
|
||||
if nodeType != "void":
|
||||
if nodeType != 'void':
|
||||
fmt = xml_formats[nodeId]
|
||||
|
||||
val = node.text
|
||||
if fmt["name"] == "bin":
|
||||
if fmt['name'] == 'bin':
|
||||
data = bytes(bytearray.fromhex(val))
|
||||
elif fmt["name"] == "str":
|
||||
if val is None: # empty string
|
||||
val = ""
|
||||
data = bytes(val.encode(self.encoding, "replace") + b"\0")
|
||||
elif fmt['name'] == 'str':
|
||||
if val is None: # empty string
|
||||
val = ''
|
||||
data = bytes(val.encode(self.encoding, 'replace') + b'\0')
|
||||
else:
|
||||
val = val.split(" ")
|
||||
data = list(map(fmt.get("fromStr", int), val))
|
||||
if count and len(data) / fmt["count"] != count:
|
||||
raise ValueError("Array length does not match __count attribute")
|
||||
val = val.split(' ')
|
||||
data = list(map(fmt.get('fromStr', int), val))
|
||||
if count and len(data) / fmt['count'] != count:
|
||||
raise ValueError('Array length does not match __count attribute')
|
||||
|
||||
if isArray or fmt["count"] == -1:
|
||||
self.dataBuf.append_u32(len(data) * calcsize(fmt["type"]))
|
||||
self.dataBuf.append(data, fmt["type"], len(data))
|
||||
if isArray or fmt['count'] == -1:
|
||||
self.dataBuf.append_u32(len(data) * calcsize(fmt['type']))
|
||||
self.dataBuf.append(data, fmt['type'], len(data))
|
||||
self.dataBuf.realign_writes()
|
||||
else:
|
||||
self.data_append_aligned(data, fmt["type"], fmt["count"])
|
||||
self.data_append_aligned(data, fmt['type'], fmt['count'])
|
||||
|
||||
# for test consistency and to be more faithful, sort the attrs
|
||||
sorted_attrs = sorted(node.attrib.items(), key=operator.itemgetter(0))
|
||||
for key, value in sorted_attrs:
|
||||
if key not in ["__type", "__size", "__count"]:
|
||||
if key not in ['__type', '__size', '__count']:
|
||||
self.data_append_string(value)
|
||||
self.nodeBuf.append_u8(xml_types["attr"])
|
||||
self.nodeBuf.append_u8(xml_types['attr'])
|
||||
self.append_node_name(key)
|
||||
|
||||
for child in node.iterchildren(tag=etree.Element):
|
||||
self._node_to_binary(child)
|
||||
|
||||
# always has the isArray bit set
|
||||
self.nodeBuf.append_u8(xml_types["nodeEnd"] | 64)
|
||||
self.nodeBuf.append_u8(xml_types['nodeEnd'] | 64)
|
||||
|
||||
def to_binary(self, encoding=BIN_ENCODING, compressed=True):
|
||||
def to_binary(self, encoding = BIN_ENCODING, compressed = True):
|
||||
self.encoding = encoding
|
||||
self.compressed = compressed
|
||||
|
||||
|
|
@ -323,7 +269,7 @@ class KBinXML:
|
|||
self._node_to_binary(self.xml_doc)
|
||||
|
||||
# always has the isArray bit set
|
||||
self.nodeBuf.append_u8(xml_types["endSection"] | 64)
|
||||
self.nodeBuf.append_u8(xml_types['endSection'] | 64)
|
||||
self.nodeBuf.realign_writes()
|
||||
header.append_u32(len(self.nodeBuf))
|
||||
self.dataSize = len(self.dataBuf)
|
||||
|
|
@ -331,7 +277,7 @@ class KBinXML:
|
|||
return bytes(header.data + self.nodeBuf.data + self.dataBuf.data)
|
||||
|
||||
def from_binary(self, input):
|
||||
self.xml_doc = etree.Element("root")
|
||||
self.xml_doc = etree.Element('root')
|
||||
node = self.xml_doc
|
||||
|
||||
self.nodeBuf = ByteBuffer(input)
|
||||
|
|
@ -364,12 +310,12 @@ class KBinXML:
|
|||
isArray = nodeType & 64
|
||||
nodeType &= ~64
|
||||
|
||||
nodeFormat = xml_formats.get(nodeType, {"name": "Unknown"})
|
||||
debug_print("Node type is {} ({})".format(nodeFormat["name"], nodeType))
|
||||
nodeFormat = xml_formats.get(nodeType, {'name':'Unknown'})
|
||||
debug_print('Node type is {} ({})'.format(nodeFormat['name'], nodeType))
|
||||
|
||||
# node or attribute name
|
||||
name = ""
|
||||
if nodeType != xml_types["nodeEnd"] and nodeType != xml_types["endSection"]:
|
||||
name = ''
|
||||
if nodeType != xml_types['nodeEnd'] and nodeType != xml_types['endSection']:
|
||||
if self.compressed:
|
||||
name = unpack_sixbit(self.nodeBuf)
|
||||
else:
|
||||
|
|
@ -380,114 +326,72 @@ class KBinXML:
|
|||
|
||||
skip = True
|
||||
|
||||
if nodeType == xml_types["attr"]:
|
||||
if nodeType == xml_types['attr']:
|
||||
value = self.data_grab_string()
|
||||
# because someone thought it was a good idea to serialise namespaces
|
||||
if name.startswith("xmlns:"):
|
||||
_, name = name.split("xmlns:")
|
||||
node = self._add_namespace(node, name, value)
|
||||
elif ":" in name:
|
||||
prefix, name = name.split(":")
|
||||
# if this fails, the xml is invalid. Open an issue.
|
||||
node.set(etree.QName(node.nsmap[prefix], name), value)
|
||||
# this is the case you'll get in 99% of places
|
||||
else:
|
||||
node.attrib[name] = value
|
||||
elif nodeType == xml_types["nodeEnd"]:
|
||||
node.attrib[name] = value
|
||||
elif nodeType == xml_types['nodeEnd']:
|
||||
if node.getparent() is not None:
|
||||
node = node.getparent()
|
||||
elif nodeType == xml_types["endSection"]:
|
||||
elif nodeType == xml_types['endSection']:
|
||||
nodesLeft = False
|
||||
elif nodeType not in xml_formats:
|
||||
raise NotImplementedError("Implement node {}".format(nodeType))
|
||||
else: # inner value to process
|
||||
raise NotImplementedError('Implement node {}'.format(nodeType))
|
||||
else: # inner value to process
|
||||
skip = False
|
||||
|
||||
if skip:
|
||||
continue
|
||||
|
||||
try:
|
||||
child = etree.SubElement(node, name)
|
||||
except ValueError as e:
|
||||
fixed_name = f"_{name}"
|
||||
if self.convert_illegal_things:
|
||||
# todo: there are other invalid node names. Fix them when you see them.
|
||||
child = etree.SubElement(node, fixed_name)
|
||||
else:
|
||||
raise KBinException(
|
||||
f'Could not create node with name "{name}". To rename it to "{fixed_name}", {convert_illegal_help}.'
|
||||
) from e
|
||||
child = etree.SubElement(node, name)
|
||||
node = child
|
||||
|
||||
if nodeType == xml_types["nodeStart"]:
|
||||
if nodeType == xml_types['nodeStart']:
|
||||
continue
|
||||
|
||||
node.attrib["__type"] = nodeFormat["name"]
|
||||
node.attrib['__type'] = nodeFormat['name']
|
||||
|
||||
varCount = nodeFormat["count"]
|
||||
varCount = nodeFormat['count']
|
||||
arrayCount = 1
|
||||
if varCount == -1: # the 2 cannot be combined
|
||||
if varCount == -1: # the 2 cannot be combined
|
||||
varCount = self.dataBuf.get_u32()
|
||||
isArray = True
|
||||
elif isArray:
|
||||
arrayCount = self.dataBuf.get_u32() // (
|
||||
calcsize(nodeFormat["type"] * varCount)
|
||||
)
|
||||
node.attrib["__count"] = str(arrayCount)
|
||||
arrayCount = self.dataBuf.get_u32() // (calcsize(nodeFormat['type'] * varCount))
|
||||
node.attrib['__count'] = str(arrayCount)
|
||||
totalCount = arrayCount * varCount
|
||||
|
||||
if isArray:
|
||||
data = self.dataBuf.get(nodeFormat["type"], totalCount)
|
||||
data = self.dataBuf.get(nodeFormat['type'], totalCount)
|
||||
self.dataBuf.realign_reads()
|
||||
else:
|
||||
data = self.data_grab_aligned(nodeFormat["type"], totalCount)
|
||||
data = self.data_grab_aligned(nodeFormat['type'], totalCount)
|
||||
|
||||
if nodeType == xml_types["binary"]:
|
||||
node.attrib["__size"] = str(totalCount)
|
||||
string = "".join(("{0:02x}".format(x) for x in data))
|
||||
elif nodeType == xml_types["string"]:
|
||||
if nodeType == xml_types['binary']:
|
||||
node.attrib['__size'] = str(totalCount)
|
||||
string = ''.join(('{0:02x}'.format(x) for x in data))
|
||||
elif nodeType == xml_types['string']:
|
||||
string = bytes(data[:-1]).decode(self.encoding)
|
||||
else:
|
||||
string = " ".join(map(nodeFormat.get("toStr", str), data))
|
||||
string = ' '.join(map(nodeFormat.get('toStr', str), data))
|
||||
|
||||
# some strings have extra NUL bytes, compatible behaviour is to strip
|
||||
node.text = string.strip("\0")
|
||||
node.text = string
|
||||
|
||||
# because we need the 'real' root
|
||||
self.xml_doc = self.xml_doc[0]
|
||||
|
||||
|
||||
convert_illegal_help = "set convert_illegal_things=True in the KBinXML constructor"
|
||||
|
||||
|
||||
def main():
|
||||
# interestingly, this doesn't work if added inside the
|
||||
# `if __name__ == "__main__"` branch
|
||||
global convert_illegal_help
|
||||
convert_illegal_help = "add the --convert-illegal flag"
|
||||
if len(sys.argv) != 2:
|
||||
print('bin_xml.py file.[xml/bin]')
|
||||
exit()
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="kbinxml", description="Convert kbin to xml, or xml to kbin"
|
||||
)
|
||||
parser.add_argument("filename", metavar="file.[xml/bin]")
|
||||
parser.add_argument("--convert-illegal", action="store_true")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.filename, "rb") as f:
|
||||
with open(sys.argv[1], 'rb') as f:
|
||||
input = f.read()
|
||||
|
||||
xml = KBinXML(input, convert_illegal_things=args.convert_illegal)
|
||||
stdout = getattr(sys.stdout, "buffer", sys.stdout)
|
||||
try:
|
||||
if KBinXML.is_binary_xml(input):
|
||||
stdout.write(xml.to_text().encode("utf-8"))
|
||||
else:
|
||||
stdout.write(xml.to_binary())
|
||||
except BrokenPipeError:
|
||||
# allows kbinxml to be piped to `head` or similar
|
||||
sys.exit(141)
|
||||
xml = KBinXML(input)
|
||||
if KBinXML.is_binary_xml(input):
|
||||
stdout.write(xml.to_text().encode('utf-8'))
|
||||
else:
|
||||
stdout.write(xml.to_binary())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
|||
|
|
@ -1,13 +1,28 @@
|
|||
from kbinxml.bytebuffer import ByteBuffer
|
||||
# python 3 style, ints instead of b''
|
||||
from builtins import bytes as newbytes
|
||||
|
||||
def py2_int_to_bytes(n, length):
|
||||
h = '%x' % n
|
||||
s = ('0'*(len(h) % 2) + h).zfill(length*2).decode('hex')
|
||||
return newbytes(s)
|
||||
|
||||
try:
|
||||
# python 3
|
||||
int.from_bytes
|
||||
int_from_bytes = lambda b : int.from_bytes(b, byteorder='big')
|
||||
int_to_bytes = lambda i, length : i.to_bytes(length, byteorder='big')
|
||||
except AttributeError:
|
||||
# python 2
|
||||
int_from_bytes = lambda b : int(bytes(b).encode('hex'), 16)
|
||||
int_to_bytes = py2_int_to_bytes
|
||||
|
||||
|
||||
charmap = "0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"
|
||||
bytemap = {c: i for i, c in enumerate(charmap)}
|
||||
charmap = '0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz'
|
||||
bytemap = {charmap[i] : i for i in range(len(charmap))}
|
||||
|
||||
|
||||
def pack_sixbit(string: str, byteBuf: ByteBuffer):
|
||||
def pack_sixbit(string, byteBuf):
|
||||
chars = [bytemap[x] for x in string]
|
||||
padding = 8 - (len(string) * 6 % 8)
|
||||
padding = 8 - (len(string)*6 % 8)
|
||||
if padding == 8:
|
||||
padding = 0
|
||||
bits = 0
|
||||
|
|
@ -15,22 +30,21 @@ def pack_sixbit(string: str, byteBuf: ByteBuffer):
|
|||
bits <<= 6
|
||||
bits |= c
|
||||
bits <<= padding
|
||||
data = bits.to_bytes((len(string) * 6 + padding) // 8, byteorder="big")
|
||||
data = int_to_bytes(bits, (len(string)*6 + padding) // 8)
|
||||
byteBuf.append_bytes((len(string),))
|
||||
byteBuf.append_bytes(data)
|
||||
|
||||
|
||||
def unpack_sixbit(byteBuf: ByteBuffer):
|
||||
def unpack_sixbit(byteBuf):
|
||||
length = byteBuf.get_u8()
|
||||
length_bits = length * 6
|
||||
length_bytes = (length_bits + 7) // 8
|
||||
padding = 8 - (length_bits % 8)
|
||||
if padding == 8:
|
||||
padding = 0
|
||||
bits = int.from_bytes(byteBuf.get_bytes(length_bytes), byteorder="big")
|
||||
bits = int_from_bytes(byteBuf.get_bytes(length_bytes))
|
||||
bits >>= padding
|
||||
result = []
|
||||
for _ in range(length):
|
||||
result.append(bits & 0b111111)
|
||||
bits >>= 6
|
||||
return "".join([charmap[x] for x in result[::-1]])
|
||||
return ''.join([charmap[x] for x in result[::-1]])
|
||||
|
|
|
|||
|
|
@ -1,26 +1,28 @@
|
|||
from .kbinxml import KBinXML
|
||||
# python 2/3 cross compat
|
||||
from io import open
|
||||
|
||||
with open("testcases.xml", "rb") as f:
|
||||
with open('testcases.xml', 'rb') as f:
|
||||
xml_in = f.read()
|
||||
with open("testcases_out.xml", "r", encoding="UTF-8") as f:
|
||||
with open('testcases_out.xml', 'r', encoding='UTF-8') as f:
|
||||
expected_xml = f.read()
|
||||
with open("testcases_out.kbin", "rb") as f:
|
||||
with open('testcases_out.kbin', 'rb') as f:
|
||||
expected_bin = f.read()
|
||||
|
||||
k = KBinXML(xml_in)
|
||||
kbin = k.to_binary()
|
||||
if kbin != expected_bin:
|
||||
with open("failed_test.kbin", "wb") as f:
|
||||
with open('failed_test.kbin', 'wb') as f:
|
||||
f.write(kbin)
|
||||
raise AssertionError("Binary output does not match, check failed_test.kbin")
|
||||
raise AssertionError('Binary output does not match, check failed_test.kbin')
|
||||
else:
|
||||
print("XML -> Binary correct!")
|
||||
print('XML -> Binary correct!')
|
||||
|
||||
backwards = KBinXML(kbin)
|
||||
btext = backwards.to_text()
|
||||
if btext != expected_xml:
|
||||
with open("failed_test.xml", "w", encoding="UTF-8") as f:
|
||||
with open('failed_test.xml', 'w', encoding='UTF-8') as f:
|
||||
f.write(btext)
|
||||
raise AssertionError("XML putput does not match, check failed_test.xml")
|
||||
raise AssertionError('XML putput does not match, check failed_test.xml')
|
||||
else:
|
||||
print("Binary -> XML correct!")
|
||||
print('Binary -> XML correct!')
|
||||
|
|
|
|||
28
setup.py
28
setup.py
|
|
@ -1,24 +1,26 @@
|
|||
from setuptools import setup
|
||||
import sys
|
||||
|
||||
|
||||
requires = [
|
||||
"lxml",
|
||||
'lxml',
|
||||
]
|
||||
if sys.version_info < (3,0):
|
||||
requires.append('future')
|
||||
|
||||
python_requires = ">=3.10"
|
||||
|
||||
version = "2.1"
|
||||
version = '1.6'
|
||||
setup(
|
||||
name="kbinxml",
|
||||
name='kbinxml',
|
||||
description="Decoder/encoder for Konami's binary XML format",
|
||||
long_description="See Github for up to date documentation",
|
||||
version=version,
|
||||
entry_points={
|
||||
"console_scripts": ["kbinxml=kbinxml:main"],
|
||||
entry_points = {
|
||||
'console_scripts': ['kbinxml=kbinxml:main'],
|
||||
},
|
||||
packages=["kbinxml"],
|
||||
url="https://github.com/mon/kbinxml/",
|
||||
download_url="https://github.com/mon/kbinxml/archive/{}.tar.gz".format(version),
|
||||
author="mon",
|
||||
author_email="me@mon.im",
|
||||
install_requires=requires,
|
||||
packages=['kbinxml'],
|
||||
url='https://github.com/mon/kbinxml/',
|
||||
download_url = 'https://github.com/mon/kbinxml/archive/{}.tar.gz'.format(version),
|
||||
author='mon',
|
||||
author_email='me@mon.im',
|
||||
install_requires=requires
|
||||
)
|
||||
|
|
|
|||
|
|
@ -8,11 +8,9 @@
|
|||
<!-- Testing encoding, plus __type-less should become string -->
|
||||
<superstar babe="ミツル">シ イス マイ ワイフ</superstar>
|
||||
<!-- Testing 6bit conversion -->
|
||||
<xXx_T4GG3R_xXx __type="2u8">8 9</xXx_T4GG3R_xXx>
|
||||
<!-- Shouldn't have alignment issues from the 2u8 -->
|
||||
<xXx_T4GG3R_xXx __type="3u8">8 9 10</xXx_T4GG3R_xXx>
|
||||
<!-- Shouldn't have alignment issues from the 3u8 -->
|
||||
<aligned __type="u8">12</aligned>
|
||||
<!-- Array contents are never packed -->
|
||||
<aligned_arr __type="u8" __count="1">13</aligned_arr>
|
||||
<!-- Binary parsing -->
|
||||
<entry __type="binary">DEADBEEF</entry>
|
||||
<!-- Lowercase, too -->
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -3,9 +3,8 @@
|
|||
<entry __type="ip4" __count="2">127.0.0.1 192.168.0.1</entry>
|
||||
<entry __type="str" attr="test" attr2="best">Hello, world!</entry>
|
||||
<superstar __type="str" babe="ミツル">シ イス マイ ワイフ</superstar>
|
||||
<xXx_T4GG3R_xXx __type="2u8">8 9</xXx_T4GG3R_xXx>
|
||||
<xXx_T4GG3R_xXx __type="3u8">8 9 10</xXx_T4GG3R_xXx>
|
||||
<aligned __type="u8">12</aligned>
|
||||
<aligned_arr __type="u8" __count="1">13</aligned_arr>
|
||||
<entry __type="bin" __size="4">deadbeef</entry>
|
||||
<entry __type="bin" __size="4">deadbe7a</entry>
|
||||
<entry __type="3u8" __count="2">1 2 3 1 2 3</entry>
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user