diff --git a/Cargo.toml b/Cargo.toml index 44bb2d0..6ee57ca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ authors = ["Matt Bilker "] [dependencies] byteorder = "1.2.3" +encoding = "0.2" lazy_static = "1.0.0" log = "0.4.1" minidom = "0.9.0" diff --git a/src/encoding_type.rs b/src/encoding_type.rs index 39e610f..94571cd 100644 --- a/src/encoding_type.rs +++ b/src/encoding_type.rs @@ -1,3 +1,6 @@ +use encoding::{DecoderTrap, Encoding}; +use encoding::all::{ASCII, EUC_JP, ISO_8859_1, WINDOWS_31J}; + #[allow(non_camel_case_types)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum EncodingType { @@ -35,4 +38,24 @@ impl EncodingType { Some(val) } + + /// Decode bytes using the encoding definition from the `encoding` crate. + /// + /// A `Some` value indicates an encoding should be used from the `encoding` + /// crate. A `None` value indicates Rust's own UTF-8 handling should be used. + pub fn decode_bytes(&self, input: Vec) -> String { + const DECODER_FAIL: &str = "Unable to interpret string as alternate encoding"; + + match *self { + EncodingType::None | + EncodingType::UTF_8 => { + String::from_utf8(input).expect("Unable to interpret string as UTF-8") + }, + + EncodingType::ASCII => ASCII.decode(&input, DecoderTrap::Strict).expect(DECODER_FAIL), + EncodingType::ISO_8859_1 => ISO_8859_1.decode(&input, DecoderTrap::Strict).expect(DECODER_FAIL), + EncodingType::EUC_JP => EUC_JP.decode(&input, DecoderTrap::Strict).expect(DECODER_FAIL), + EncodingType::SHIFT_JIS => WINDOWS_31J.decode(&input, DecoderTrap::Strict).expect(DECODER_FAIL), + } + } } diff --git a/src/lib.rs b/src/lib.rs index 960d614..813aa28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,18 @@ #![feature(int_to_from_bytes)] extern crate byteorder; +extern crate encoding; extern crate minidom; extern crate num; #[macro_use] extern crate lazy_static; #[macro_use] extern crate log; -use byteorder::{BigEndian, ReadBytesExt}; -use std::io::Cursor; +use std::cmp::max; +use std::fmt::Write; +use std::io::{Cursor, Read, Seek, SeekFrom}; +use byteorder::{BigEndian, ReadBytesExt}; use minidom::Element; mod compression; @@ -27,15 +30,125 @@ const SIGNATURE: u8 = 0xA0; const SIG_COMPRESSED: u8 = 0x42; pub struct KbinXml { + offset_1: u64, + offset_2: u64, } impl KbinXml { pub fn new() -> Self { Self { + offset_1: 0, + offset_2: 0, } } - pub fn from_binary(input: &[u8]) -> Element { + #[inline] + fn data_buf_offset(&self, data_buf: &Cursor<&[u8]>) -> u64 { + // Position is not the index of the previously read byte, it is the current + // index (offset). + // + // This is so much fun to debug. + //data_buf.position() - 1 + data_buf.position() + } + + fn data_buf_read(&mut self, data_buf: &mut Cursor<&[u8]>) -> Vec { + let size = data_buf.read_i32::().expect("Unable to read data size"); + let mut data = vec![0; size as usize]; + data_buf.read_exact(&mut data).expect("Unable to read data"); + println!("data_buf_read => size: {}, data: 0x{:02x?}", data.len(), data); + + self.data_buf_realign(data_buf, None); + + data + } + + fn data_buf_read_str(&mut self, data_buf: &mut Cursor<&[u8]>, encoding: EncodingType) -> String { + let mut data = self.data_buf_read(data_buf); + + // Remove trailing null bytes + let mut index = data.len() - 1; + while data[index] == 0x00 { + index -= 1; + } + data.truncate(index + 1); + println!("data_buf_read_str => size: {}, data: 0x{:02x?}", data.len(), data); + + //String::from_utf8(data).expect("Unable to interpret string node as UTF-8") + encoding.decode_bytes(data) + } + + fn data_buf_get(&mut self, data_buf: &mut Cursor<&[u8]>, size: u32) -> Vec { + let mut data = vec![0; size as usize]; + data_buf.read_exact(&mut data).expect("Unable to read data"); + + data + } + + fn data_buf_get_aligned(&mut self, data_buf: &mut Cursor<&[u8]>, data_type: KbinType) -> Vec { + if self.offset_1 % 4 == 0 { + self.offset_1 = self.data_buf_offset(data_buf); + } + if self.offset_2 % 4 == 0 { + self.offset_2 = self.data_buf_offset(data_buf); + } + + let old_pos = self.data_buf_offset(data_buf); + let size = data_type.size() * data_type.count(); + println!("data_buf_get_aligned => old_pos: {}, size: {}", old_pos, size); + let (check_old, data) = match size { + 1 => { + data_buf.seek(SeekFrom::Start(self.offset_1)).expect("Unable to seek data buffer"); + + let data = data_buf.read_u8().expect("Unable to read 1 byte data"); + self.offset_1 += 1; + + (true, vec![data]) + }, + 2 => { + data_buf.seek(SeekFrom::Start(self.offset_2)).expect("Unable to seek data buffer"); + + let mut data = vec![0; 2]; + data_buf.read_exact(&mut data).expect("Unable to read 2 byte data"); + self.offset_2 += 2; + + (true, data) + }, + size => { + let mut data = vec![0; size as usize]; + data_buf.read_exact(&mut data).expect("Unable to read aligned data from data buffer"); + self.data_buf_realign(data_buf, None); + + (false, data) + }, + }; + + + if check_old { + data_buf.seek(SeekFrom::Start(old_pos)).expect("Unable to seek data buffer"); + + let trailing = max(self.offset_1, self.offset_2); + println!("old_pos: {}, trailing: {}", old_pos, trailing); + if old_pos < trailing { + data_buf.seek(SeekFrom::Start(trailing)).expect("Unable to seek data buffer"); + self.data_buf_realign(data_buf, None); + } + } + + data + } + + fn data_buf_realign(&mut self, data_buf: &mut Cursor<&[u8]>, size: Option) { + let size = size.unwrap_or(4); + println!("data_buf_realign => position: {}, size: {}", data_buf.position(), size); + + while data_buf.position() % size > 0 { + data_buf.seek(SeekFrom::Current(1)).expect("Unable to seek data buffer"); + } + println!("data_buf_realign => realigned to: {}", data_buf.position()); + } + + fn from_binary_internal(&mut self, input: &[u8]) -> Element { // Node buffer starts from the beginning. // Data buffer starts later after reading `len_data`. let mut node_buf = Cursor::new(&input[..]); @@ -47,12 +160,10 @@ impl KbinXml { let compress_byte = node_buf.read_u8().expect("Unable to read compression byte"); assert_eq!(compress_byte, SIG_COMPRESSED); - let compressed = Compression::from_byte(compress_byte); - assert!(compressed.is_some()); + let compressed = Compression::from_byte(compress_byte).expect("Unknown compression value"); let encoding_byte = node_buf.read_u8().expect("Unable to read encoding byte"); - let encoding = EncodingType::from_byte(encoding_byte); - assert!(encoding.is_some()); + let encoding = EncodingType::from_byte(encoding_byte).expect("Unknown encoding"); let encoding_negation = node_buf.read_u8().expect("Unable to read encoding negation byte"); assert_eq!(encoding_negation, 0xFF ^ encoding_byte); @@ -69,6 +180,13 @@ impl KbinXml { let data_buf_start = len_node + 8; let mut data_buf = Cursor::new(&input[(data_buf_start as usize)..]); + { + let pos = data_buf.position(); + self.offset_1 = pos; + self.offset_2 = pos; + println!("offset_1: {}, offset_2: {}", self.offset_1, self.offset_2); + } + let len_data = data_buf.read_u32::().expect("Unable to read len_data"); println!("len_data: {} (0x{:x})", len_data, len_data); @@ -116,10 +234,62 @@ impl KbinXml { if let Some(to) = stack.last_mut() { match xml_type { KbinType::Attribute => { - to.set_attr(name, ""); + let val = self.data_buf_read_str(&mut data_buf, encoding); + println!("attr name: {}, val: {}", name, val); + to.set_attr(name, val); }, _ => { to.set_attr("__type", xml_type.name()); + + let type_size = xml_type.size(); + let type_count = xml_type.count(); + let (is_array, size) = if xml_type.count() == -1 { + println!("xml_type.count() == -1"); + (true, data_buf.read_u32::().expect("Unable to read binary/string byte length")) + } else if is_array { + let node_size = type_size * type_count; + let arr_count = data_buf.read_u32::().expect("Unable to read array node length") / node_size as u32; + to.set_attr("__count", arr_count); + + let size = (node_size as u32) * arr_count; + (true, size) + } else { + (false, 1) + }; + + println!("type: {:?}, type_size: {}, type_count: {}, is_array: {}, size: {}", + xml_type, + type_size, + type_count, + is_array, + size); + + let data = if is_array { + let data = self.data_buf_get(&mut data_buf, size); + self.data_buf_realign(&mut data_buf, None); + + data + } else { + self.data_buf_get_aligned(&mut data_buf, xml_type) + }; + println!("data: 0x{:02x?}", data); + if xml_type == KbinType::String { + let val = encoding.decode_bytes(data); + println!("name: {}, string: {}", name, val); + to.append_text_node(val); + } else if xml_type == KbinType::Binary { + let len = data.len() * 2; + let val = data.into_iter().fold(String::with_capacity(len), |mut val, x| { + write!(val, "{:02x}", x).expect("Failed to append hex char"); + val + }); + println!("name: {}, string: {}", name, val); + to.append_text_node(val); + } else { + let inner_value = xml_type.parse_bytes(&data); + println!("name: {}, string: {}", name, inner_value); + to.append_text_node(inner_value); + } }, }; } @@ -132,4 +302,9 @@ impl KbinXml { stack.truncate(1); stack.pop().expect("Stack must have root node") } + + pub fn from_binary(input: &[u8]) -> Element { + let mut kbinxml = KbinXml::new(); + kbinxml.from_binary_internal(input) + } } diff --git a/src/node_types.rs b/src/node_types.rs index 1714006..9b12010 100644 --- a/src/node_types.rs +++ b/src/node_types.rs @@ -1,30 +1,75 @@ -//use std::collections::HashMap; - -#[derive(Clone, Copy, Debug)] -pub struct NodeType { - pub id: u8, - pub name: &'static str, - pub alt_name: Option<&'static str>, - pub size: i32, - pub count: i32, +trait KbinWrapperType { + fn from_kbin_bytes(input: &[u8]) -> String; } -impl NodeType { - fn new( - id: u8, - name: &'static str, - alt_name: Option<&'static str>, - size: i32, - count: i32, - ) -> Self { - Self { id, name, alt_name, size, count } +macro_rules! number_impl { + (integer; $($inner_type:ident),*) => { + $( + impl KbinWrapperType<$inner_type> for $inner_type { + fn from_kbin_bytes(input: &[u8]) -> String { + println!("KbinWrapperType<{}> => input: {:02x?}", stringify!($inner_type), input); + //String::from(concat!("integer ", stringify!($inner_type))) + + let mut data = [0; ::std::mem::size_of::<$inner_type>()]; + data.clone_from_slice(input); + format!("{}", $inner_type::from_be($inner_type::from_bytes(data))) + } + } + )* + }; + (float; $($intermediate:ident => $inner_type:ident),*) => { + $( + impl KbinWrapperType<$inner_type> for $inner_type { + fn from_kbin_bytes(input: &[u8]) -> String { + println!("KbinWrapperType<{}> => input: {:02x?}", stringify!($inner_type), input); + //String::from(concat!("float ", stringify!($inner_type))) + + let mut data = [0; ::std::mem::size_of::<$inner_type>()]; + data.clone_from_slice(input); + let bits = $intermediate::from_be($intermediate::from_bytes(data)); + + format!("{:.6}", $inner_type::from_bits(bits)) + } + } + )* + }; +} + +number_impl!(integer; i8, u8, i16, u16, i32, u32, i64, u64); +number_impl!(float; u32 => f32, u64 => f64); + +impl KbinWrapperType for bool { + fn from_kbin_bytes(input: &[u8]) -> String { + println!("KbinWrapperType => input: {:02x?}", input); + //String::from("bool") + + let value = match input[0] { + 0x00 => "0", + 0x01 => "1", + v => panic!("Unsupported value for boolean: {}", v), + }; + String::from(value) + } +} + +struct DummyConverter; +impl KbinWrapperType for DummyConverter { + fn from_kbin_bytes(_input: &[u8]) -> String { + String::from("") + } +} + +struct InvalidConverter; +impl KbinWrapperType for InvalidConverter { + fn from_kbin_bytes(input: &[u8]) -> String { + panic!("Invalid kbin type converter called for input: {:02x?}", input); } } macro_rules! construct_types { ( $( - ($id:expr, $konst:ident, $name:expr, $alt_name:expr, $size:expr, $count:expr, $handler:tt); + ($id:expr, $konst:ident, $name:expr, $alt_name:expr, $size:expr, $count:expr, $inner_type:ident); )+ ) => { #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] @@ -53,97 +98,113 @@ macro_rules! construct_types { } #[allow(dead_code)] - pub fn as_node_type(&self) -> NodeType { + pub fn alt_name(&self) -> Option<&'static str> { match *self { $( - KbinType::$konst => NodeType::new($id, $name, $alt_name, $size, $count), + KbinType::$konst => $alt_name, + )+ + } + } + + pub fn size(&self) -> i8 { + match *self { + $( + KbinType::$konst => $size, + )+ + } + } + + pub fn count(&self) -> i8 { + match *self { + $( + KbinType::$konst => $count, + )+ + } + } + + pub fn parse_bytes(&self, input: &[u8]) -> String { + match *self { + $( + KbinType::$konst => { + if $count == -1 { + panic!("Tried to parse special type: {}", self.name()); + } else if $count == 0 { + String::new() + } else if $count == 1 { + $inner_type::from_kbin_bytes(input) + } else if $count > 1 { + String::new() + } else { + unimplemented!(); + } + }, )+ } } } - - /* - lazy_static! { - pub static ref BYTE_XML_MAPPING: HashMap = { - let mut map = HashMap::new(); - $( - map.insert($id, KbinType::$konst); - )+ - map - }; - - pub static ref XML_TYPES: HashMap<&'static str, KbinType> = { - let mut map = HashMap::new(); - $( - map.insert($name, KbinType::$konst); - )+ - map - }; - } - */ } } construct_types! { - ( 2, S8, "s8", None, 1, 1, s8); + ( 2, S8, "s8", None, 1, 1, i8); ( 3, U8, "u8", None, 1, 1, u8); - ( 4, S16, "s16", None, 2, 1, s16); + ( 4, S16, "s16", None, 2, 1, i16); ( 5, U16, "u16", None, 2, 1, u16); - ( 6, S32, "s32", None, 4, 1, s32); + ( 6, S32, "s32", None, 4, 1, i32); ( 7, U32, "u32", None, 4, 1, u32); - ( 8, S64, "s64", None, 8, 1, s64); + ( 8, S64, "s64", None, 8, 1, i64); ( 9, U64, "u64", None, 8, 1, u64); - (10, Binary, "bin", Some("binary"), 1, 0, special); - (11, String, "str", Some("string"), 1, 0, special); - (12, Ip4, "ip4", None, 1, 4, special); + (10, Binary, "bin", Some("binary"), 1, -1, DummyConverter); + (11, String, "str", Some("string"), 1, -1, DummyConverter); + (12, Ip4, "ip4", None, 1, 4, DummyConverter); // TODO: implement IP address parsing (13, Time, "time", None, 4, 1, u32); (14, Float, "float", Some("f"), 4, 1, f32); - (15, Double, "double", Some("d"), 8, 1, f64); - (16, S8_2, "2s8", None, 1, 2, s8); - (17, U8_2, "2u8", None, 1, 2, u8); - (18, S16_2, "2s16", None, 2, 2, s16); - (19, U16_2, "2u16", None, 2, 2, u16); - (20, S32_2, "2s32", None, 4, 2, s32); - (21, U32_2, "2u32", None, 4, 2, u32); - (22, S64_2, "2s64", Some("vs64"), 8, 2, s64); - (23, U64_2, "2u64", Some("vu64"), 8, 2, u64); - (24, Float2, "2f", None, 4, 2, f32); - (25, Double2, "2d", Some("vd"), 8, 2, f64); - (26, S8_3, "3s8", None, 1, 3, s8); - (27, U8_3, "3u8", None, 1, 3, u8); - (28, S16_3, "3s16", None, 2, 3, s16); - (29, U16_3, "3u16", None, 2, 3, u16); - (30, S32_3, "3s32", None, 4, 3, s32); - (31, U32_3, "3u32", None, 4, 3, u32); - (32, S64_3, "3s64", None, 8, 3, s64); - (33, U64_3, "3u64", None, 8, 3, u64); - (34, Float3, "3f", None, 4, 3, f32); - (35, Double3, "3d", None, 8, 3, f64); - (36, S8_4, "4s8", None, 1, 4, s8); - (37, U8_4, "4u8", None, 1, 4, u8); - (38, S16_4, "4s16", None, 2, 4, s16); - (39, U16_4, "4u16", None, 2, 4, u16); - (40, S32_4, "4s32", Some("vs32"), 4, 4, s32); - (41, U32_4, "4u32", Some("vu32"), 4, 4, u32); - (42, S64_4, "4s64", None, 8, 4, s64); - (43, U64_4, "4u64", None, 8, 4, u64); - (44, Float4, "4f", Some("vf"), 4, 4, f32); - (45, Double4, "4d", None, 8, 4, f64); + (15, Double, "double", Some("d"), 8, 1, f64); + (16, S8_2, "2s8", None, 1, 2, i8); + (17, U8_2, "2u8", None, 1, 2, u8); + (18, S16_2, "2s16", None, 2, 2, i16); + (19, U16_2, "2u16", None, 2, 2, u16); + (20, S32_2, "2s32", None, 4, 2, i32); + (21, U32_2, "2u32", None, 4, 2, u32); + (22, S64_2, "2s64", Some("vs64"), 8, 2, i64); + (23, U64_2, "2u64", Some("vu64"), 8, 2, u64); + (24, Float2, "2f", None, 4, 2, f32); + (25, Double2, "2d", Some("vd"), 8, 2, f64); + (26, S8_3, "3s8", None, 1, 3, i8); + (27, U8_3, "3u8", None, 1, 3, u8); + (28, S16_3, "3s16", None, 2, 3, i16); + (29, U16_3, "3u16", None, 2, 3, u16); + (30, S32_3, "3s32", None, 4, 3, i32); + (31, U32_3, "3u32", None, 4, 3, u32); + (32, S64_3, "3s64", None, 8, 3, i64); + (33, U64_3, "3u64", None, 8, 3, u64); + (34, Float3, "3f", None, 4, 3, f32); + (35, Double3, "3d", None, 8, 3, f64); + (36, S8_4, "4s8", None, 1, 4, i8); + (37, U8_4, "4u8", None, 1, 4, u8); + (38, S16_4, "4s16", None, 2, 4, i16); + (39, U16_4, "4u16", None, 2, 4, u16); + (40, S32_4, "4s32", Some("vs32"), 4, 4, i32); + (41, U32_4, "4u32", Some("vu32"), 4, 4, u32); + (42, S64_4, "4s64", None, 8, 4, i64); + (43, U64_4, "4u64", None, 8, 4, u64); + (44, Float4, "4f", Some("vf"), 4, 4, f32); + (45, Double4, "4d", None, 8, 4, f64); // 46 = Attribute // no 47 - (48, Vs8, "vs8", None, 1, 16, s8); - (49, Vu8, "vu8", None, 1, 16, u8); - (50, Vs16, "vs16", None, 1, 8, s16); - (51, Vu16, "vu16", None, 1, 8, u16); - (52, Boolean, "bool", Some("b"), 1, 1, bool); - (53, Boolean2, "2b", None, 1, 2, bool); - (54, Boolean3, "3b", None, 1, 3, bool); - (55, Boolean4, "4b", None, 1, 4, bool); - (56, Vb, "vb", None, 1, 16, bool); + (48, Vs8, "vs8", None, 1, 16, i8); + (49, Vu8, "vu8", None, 1, 16, u8); + (50, Vs16, "vs16", None, 1, 8, i16); + (51, Vu16, "vu16", None, 1, 8, u16); + (52, Boolean, "bool", Some("b"), 1, 1, bool); + (53, Boolean2, "2b", None, 1, 2, bool); + (54, Boolean3, "3b", None, 1, 3, bool); + (55, Boolean4, "4b", None, 1, 4, bool); + (56, Vb, "vb", None, 1, 16, bool); - ( 1, NodeStart, "void", None, 0, 0, invalid); - (46, Attribute, "attr", None, 0, 0, invalid); + ( 1, NodeStart, "void", None, 0, 0, InvalidConverter); + (46, Attribute, "attr", None, 0, 0, InvalidConverter); - (190, NodeEnd, "nodeEnd", None, 0, 0, invalid); - (191, FileEnd, "fileEnd", None, 0, 0, invalid); + (190, NodeEnd, "nodeEnd", None, 0, 0, InvalidConverter); + (191, FileEnd, "fileEnd", None, 0, 0, InvalidConverter); } diff --git a/testcases_out.kbin b/testcases_out.kbin new file mode 100644 index 0000000..f258d5c Binary files /dev/null and b/testcases_out.kbin differ