From f3a6fae0a1f4f964cc5b880b21e3be49d7ee46ff Mon Sep 17 00:00:00 2001 From: Matt Bilker Date: Thu, 31 May 2018 09:52:39 +0000 Subject: [PATCH] Initial commit --- .gitignore | 3 + Cargo.toml | 13 ++++ LICENSE | 19 ++++++ README.md | 7 ++ src/bin/kbinxml.rs | 35 ++++++++++ src/compression.rs | 22 +++++++ src/encoding_type.rs | 38 +++++++++++ src/lib.rs | 137 +++++++++++++++++++++++++++++++++++++++ src/node_types.rs | 149 +++++++++++++++++++++++++++++++++++++++++++ src/sixbit.rs | 80 +++++++++++++++++++++++ 10 files changed, 503 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 src/bin/kbinxml.rs create mode 100644 src/compression.rs create mode 100644 src/encoding_type.rs create mode 100644 src/lib.rs create mode 100644 src/node_types.rs create mode 100644 src/sixbit.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b1c7301 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/Cargo.lock +/target +**/*.rs.bk diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..44bb2d0 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "kbinxml" +version = "0.1.0" +authors = ["Matt Bilker "] + +[dependencies] +byteorder = "1.2.3" +lazy_static = "1.0.0" +log = "0.4.1" +minidom = "0.9.0" +num = "0.1.42" +pretty_env_logger = "0.2.3" +quick-xml = "0.12.1" diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..427611c --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright (c) 2018 Matt Bilker + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..eb151e0 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# kbinxml + +An encoder/decoder for Konami's binary XML format, used in some of their games. + +### Setup + +To be written. diff --git a/src/bin/kbinxml.rs b/src/bin/kbinxml.rs new file mode 100644 index 0000000..49edee6 --- /dev/null +++ b/src/bin/kbinxml.rs @@ -0,0 +1,35 @@ +extern crate kbinxml; +extern crate pretty_env_logger; +extern crate quick_xml; + +use std::env; +use std::fs::File; +use std::io::{Cursor, Error as IoError, ErrorKind as IoErrorKind, Read, Write, stdout}; + +use kbinxml::KbinXml; +use quick_xml::Writer; + +fn main() -> std::io::Result<()> { + pretty_env_logger::init(); + + if let Some(file_name) = env::args().skip(1).next() { + println!("file_name: {}", file_name); + + let mut file = File::open(file_name)?; + let mut contents = Vec::new(); + file.read_to_end(&mut contents)?; + + let element = KbinXml::from_binary(&contents); + //println!("element: {:#?}", element); + + let inner = Cursor::new(Vec::new()); + let mut writer = Writer::new_with_indent(inner, b' ', 2); + element.to_writer(&mut writer).map_err(|e| IoError::new(IoErrorKind::Other, format!("{:?}", e)))?; + + let buf = writer.into_inner().into_inner(); + let stdout = stdout(); + stdout.lock().write_all(&buf)?; + println!(); + } + Ok(()) +} diff --git a/src/compression.rs b/src/compression.rs new file mode 100644 index 0000000..703f73b --- /dev/null +++ b/src/compression.rs @@ -0,0 +1,22 @@ +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum Compression { + Compressed, + Uncompressed, +} + +impl Compression { + pub fn from_byte(byte: u8) -> Option { + match byte { + 0x42 => Some(Compression::Compressed), + 0x45 => Some(Compression::Uncompressed), + _ => None, + } + } + + pub fn _to_byte(&self) -> u8 { + match *self { + Compression::Compressed => 0x42, + Compression::Uncompressed => 0x45, + } + } +} diff --git a/src/encoding_type.rs b/src/encoding_type.rs new file mode 100644 index 0000000..39e610f --- /dev/null +++ b/src/encoding_type.rs @@ -0,0 +1,38 @@ +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum EncodingType { + None, + ASCII, + ISO_8859_1, + EUC_JP, + SHIFT_JIS, + UTF_8, +} + +impl EncodingType { + #[allow(dead_code)] + pub fn to_byte(&self) -> u8 { + match *self { + EncodingType::None => 0x00, // 0x00 >> 5 = 0 + EncodingType::ASCII => 0x20, // 0x20 >> 5 = 1 + EncodingType::ISO_8859_1 => 0x40, // 0x40 >> 5 = 2 + EncodingType::EUC_JP => 0x60, // 0x60 >> 5 = 3 + EncodingType::SHIFT_JIS => 0x80, // 0x80 >> 5 = 4 + EncodingType::UTF_8 => 0xA0, // 0xA0 >> 5 = 5 + } + } + + pub fn from_byte(byte: u8) -> Option { + let val = match byte { + 0x00 => EncodingType::None, + 0x20 => EncodingType::ASCII, + 0x40 => EncodingType::ISO_8859_1, + 0x60 => EncodingType::EUC_JP, + 0x80 => EncodingType::SHIFT_JIS, + 0xA0 => EncodingType::UTF_8, + _ => return None, + }; + + Some(val) + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..4b39232 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,137 @@ +#![feature(int_to_from_bytes)] + +extern crate byteorder; +extern crate minidom; +extern crate num; + +#[macro_use] extern crate lazy_static; +#[macro_use] extern crate log; + +use byteorder::{BigEndian, ReadBytesExt}; +use std::io::Cursor; + +use minidom::Element; + +mod compression; +mod encoding_type; +mod node_types; +mod sixbit; + +use compression::Compression; +use encoding_type::EncodingType; +use node_types::KbinType; +use sixbit::unpack_sixbit; + +const SIGNATURE: u8 = 0xA0; + +const SIG_COMPRESSED: u8 = 0x42; + +pub struct KbinXml { +} + +impl KbinXml { + pub fn new() -> Self { + Self { + } + } + + pub fn from_binary(input: &[u8]) -> Element { + // Node buffer starts from the beginning. + // Data buffer starts later after reading `len_data`. + let mut node_buf = Cursor::new(&input[..]); + + let signature = node_buf.read_u8().expect("Unable to read signature byte"); + assert_eq!(signature, SIGNATURE); + + // TODO: support uncompressed + let compress_byte = node_buf.read_u8().expect("Unable to read compression byte"); + assert_eq!(compress_byte, SIG_COMPRESSED); + + let compressed = Compression::from_byte(compress_byte); + assert!(compressed.is_some()); + + let encoding_byte = node_buf.read_u8().expect("Unable to read encoding byte"); + let encoding = EncodingType::from_byte(encoding_byte); + assert!(encoding.is_some()); + + let encoding_negation = node_buf.read_u8().expect("Unable to read encoding negation byte"); + assert_eq!(encoding_negation, 0xFF ^ encoding_byte); + + println!("signature: 0x{:x}", signature); + println!("compression: 0x{:x} ({:?})", compress_byte, compressed); + println!("encoding: 0x{:x} ({:?})", encoding_byte, encoding); + + let len_node = node_buf.read_u32::().expect("Unable to read len_node"); + println!("len_node: {} (0x{:x})", len_node, len_node); + + // We have read 8 bytes so far, so offset the start of the data buffer from + // our current position. + let data_buf_start = len_node + 8; + let mut data_buf = Cursor::new(&input[(data_buf_start as usize)..]); + + let len_data = data_buf.read_u32::().expect("Unable to read len_data"); + println!("len_data: {} (0x{:x})", len_data, len_data); + + let root = Element::bare("root"); + let mut stack = vec![root]; + { + let mut nodes_left = true; + let node_buf_end = data_buf_start.into(); + while nodes_left && node_buf.position() < node_buf_end { + let raw_node_type = node_buf.read_u8().expect("Unable to read node type"); + let is_array = raw_node_type & 64 == 64; + let node_type = raw_node_type & !64; + + let xml_type = KbinType::from_u8(node_type); + println!("raw_node_type: {}, node_type: {:?} ({}), is_array: {}", raw_node_type, xml_type, node_type, is_array); + + match xml_type { + KbinType::NodeEnd => { + if stack.len() > 1 { + let node = stack.pop().expect("Stack must have last node"); + if let Some(to) = stack.last_mut() { + to.append_child(node); + } + } + continue; + }, + KbinType::FileEnd => { + if stack.len() > 1 { + let node = stack.pop().expect("Stack must have last node"); + if let Some(to) = stack.last_mut() { + to.append_child(node); + } + } + //nodes_left = false; + break; + }, + _ => {}, + }; + + let name = unpack_sixbit(&mut node_buf); + if xml_type == KbinType::NodeStart { + stack.push(Element::bare(name)); + } else { + if xml_type != KbinType::Attribute { + stack.push(Element::bare(name.clone())); + } + if let Some(to) = stack.last_mut() { + match xml_type { + KbinType::Attribute => { + to.set_attr(name, ""); + }, + _ => { + to.set_attr("__type", xml_type.name()); + }, + }; + } + } + } + } + if stack.len() > 1 { + println!("stack: {:#?}", stack); + } + stack.truncate(1); + stack.pop().expect("Stack must have root node") + } +} diff --git a/src/node_types.rs b/src/node_types.rs new file mode 100644 index 0000000..1714006 --- /dev/null +++ b/src/node_types.rs @@ -0,0 +1,149 @@ +//use std::collections::HashMap; + +#[derive(Clone, Copy, Debug)] +pub struct NodeType { + pub id: u8, + pub name: &'static str, + pub alt_name: Option<&'static str>, + pub size: i32, + pub count: i32, +} + +impl NodeType { + fn new( + id: u8, + name: &'static str, + alt_name: Option<&'static str>, + size: i32, + count: i32, + ) -> Self { + Self { id, name, alt_name, size, count } + } +} + +macro_rules! construct_types { + ( + $( + ($id:expr, $konst:ident, $name:expr, $alt_name:expr, $size:expr, $count:expr, $handler:tt); + )+ + ) => { + #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] + pub enum KbinType { + $( + $konst, + )+ + } + + impl KbinType { + pub fn from_u8(input: u8) -> KbinType { + match input { + $( + $id => KbinType::$konst, + )+ + _ => panic!("Node type {} not implemented", input), + } + } + + pub fn name(&self) -> &'static str { + match *self { + $( + KbinType::$konst => $name, + )+ + } + } + + #[allow(dead_code)] + pub fn as_node_type(&self) -> NodeType { + match *self { + $( + KbinType::$konst => NodeType::new($id, $name, $alt_name, $size, $count), + )+ + } + } + } + + /* + lazy_static! { + pub static ref BYTE_XML_MAPPING: HashMap = { + let mut map = HashMap::new(); + $( + map.insert($id, KbinType::$konst); + )+ + map + }; + + pub static ref XML_TYPES: HashMap<&'static str, KbinType> = { + let mut map = HashMap::new(); + $( + map.insert($name, KbinType::$konst); + )+ + map + }; + } + */ + } +} + +construct_types! { + ( 2, S8, "s8", None, 1, 1, s8); + ( 3, U8, "u8", None, 1, 1, u8); + ( 4, S16, "s16", None, 2, 1, s16); + ( 5, U16, "u16", None, 2, 1, u16); + ( 6, S32, "s32", None, 4, 1, s32); + ( 7, U32, "u32", None, 4, 1, u32); + ( 8, S64, "s64", None, 8, 1, s64); + ( 9, U64, "u64", None, 8, 1, u64); + (10, Binary, "bin", Some("binary"), 1, 0, special); + (11, String, "str", Some("string"), 1, 0, special); + (12, Ip4, "ip4", None, 1, 4, special); + (13, Time, "time", None, 4, 1, u32); + (14, Float, "float", Some("f"), 4, 1, f32); + (15, Double, "double", Some("d"), 8, 1, f64); + (16, S8_2, "2s8", None, 1, 2, s8); + (17, U8_2, "2u8", None, 1, 2, u8); + (18, S16_2, "2s16", None, 2, 2, s16); + (19, U16_2, "2u16", None, 2, 2, u16); + (20, S32_2, "2s32", None, 4, 2, s32); + (21, U32_2, "2u32", None, 4, 2, u32); + (22, S64_2, "2s64", Some("vs64"), 8, 2, s64); + (23, U64_2, "2u64", Some("vu64"), 8, 2, u64); + (24, Float2, "2f", None, 4, 2, f32); + (25, Double2, "2d", Some("vd"), 8, 2, f64); + (26, S8_3, "3s8", None, 1, 3, s8); + (27, U8_3, "3u8", None, 1, 3, u8); + (28, S16_3, "3s16", None, 2, 3, s16); + (29, U16_3, "3u16", None, 2, 3, u16); + (30, S32_3, "3s32", None, 4, 3, s32); + (31, U32_3, "3u32", None, 4, 3, u32); + (32, S64_3, "3s64", None, 8, 3, s64); + (33, U64_3, "3u64", None, 8, 3, u64); + (34, Float3, "3f", None, 4, 3, f32); + (35, Double3, "3d", None, 8, 3, f64); + (36, S8_4, "4s8", None, 1, 4, s8); + (37, U8_4, "4u8", None, 1, 4, u8); + (38, S16_4, "4s16", None, 2, 4, s16); + (39, U16_4, "4u16", None, 2, 4, u16); + (40, S32_4, "4s32", Some("vs32"), 4, 4, s32); + (41, U32_4, "4u32", Some("vu32"), 4, 4, u32); + (42, S64_4, "4s64", None, 8, 4, s64); + (43, U64_4, "4u64", None, 8, 4, u64); + (44, Float4, "4f", Some("vf"), 4, 4, f32); + (45, Double4, "4d", None, 8, 4, f64); + // 46 = Attribute + // no 47 + (48, Vs8, "vs8", None, 1, 16, s8); + (49, Vu8, "vu8", None, 1, 16, u8); + (50, Vs16, "vs16", None, 1, 8, s16); + (51, Vu16, "vu16", None, 1, 8, u16); + (52, Boolean, "bool", Some("b"), 1, 1, bool); + (53, Boolean2, "2b", None, 1, 2, bool); + (54, Boolean3, "3b", None, 1, 3, bool); + (55, Boolean4, "4b", None, 1, 4, bool); + (56, Vb, "vb", None, 1, 16, bool); + + ( 1, NodeStart, "void", None, 0, 0, invalid); + (46, Attribute, "attr", None, 0, 0, invalid); + + (190, NodeEnd, "nodeEnd", None, 0, 0, invalid); + (191, FileEnd, "fileEnd", None, 0, 0, invalid); +} diff --git a/src/sixbit.rs b/src/sixbit.rs new file mode 100644 index 0000000..f6bf65b --- /dev/null +++ b/src/sixbit.rs @@ -0,0 +1,80 @@ +use std::collections::HashMap; +use std::io::{Read, Write}; + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use num::{BigUint, FromPrimitive, ToPrimitive}; + +static CHAR_MAP: &'static [u8] = b"0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"; + +lazy_static! { + static ref BYTE_MAP: HashMap = { + CHAR_MAP + .iter() + .enumerate() + .map(|(i, value)| { + (*value, i as u8) + }) + .collect() + }; +} + +#[allow(dead_code)] +pub fn pack_sixbit(writer: &mut T, input: &str) + where T: Write +{ + let sixbit_chars = input + .bytes() + .map(|ch| { + *BYTE_MAP.get(&ch).expect("Character must be a valid sixbit character") + }); + let padding = 8 - input.len() * 6 % 8; + let padding = if padding == 8 { 0 } else { padding }; + + let mut bits = 0; + for ch in sixbit_chars { + bits <<= 6; + bits |= ch as u64; + } + bits <<= padding; + + let len = input.len() as u8; + writer.write_u8(len).expect("Unable to write sixbit string length"); + writer.write_uint::(bits, (input.len() * 6 + padding) / 8).expect("Unable to write sixbit contents"); +} + +pub fn unpack_sixbit(reader: &mut T) -> String + where T: Read +{ + let len = reader.read_u8().expect("Unable to read sixbit string length"); + let real_len = (f32::from(len * 6) / 8f32).ceil(); + let real_len = (real_len as u32) as usize; + let padding = (8 - ((len * 6) % 8)) as usize; + let padding = if padding == 8 { 0 } else { padding }; + debug!("sixbit_len: {}, real_len: {}, padding: {}", len, real_len, padding); + + let mut buf = vec![0; real_len]; + reader.read_exact(&mut buf).expect("Unable to read sixbit string content"); + + let bits = BigUint::from_bytes_be(&buf); + let bits = bits >> padding; + debug!("bits: 0b{:b}", bits); + + let mask = BigUint::from_u8(0b111111).unwrap(); + let result = (1..=len).map(|i| { + // Get the current sixbit part starting from the the left most bit in + // big endian order + let shift = ((len - i) * 6) as usize; + let bits = bits.clone(); + let mask = mask.clone(); + let current = (bits >> shift) & mask; + //println!("current: 0b{:b} ({})", current, current); + + let entry = CHAR_MAP[current.to_usize().unwrap()]; + //println!("entry: {} ({})", entry, entry as char); + + entry as char + }).collect(); + + debug!("result: {}", result); + result +}