import json import pathlib import subprocess from collections.abc import MutableMapping, MutableSequence, Mapping, Sequence from typing import Dict, List, Optional, Tuple, Type, Union from argparse import ArgumentParser from enum import Enum, Flag, auto from types import FunctionType, LambdaType ARGPARSER = ArgumentParser( prog='json2bin.py', description='Tool for converting a collection of JSON documents into\na NARC via a constructed parsing schema' ) ARGPARSER.add_argument('--knarc', required=True, help='Path to the knarc executable') ARGPARSER.add_argument('--source-dir', required=True, help='Source directory with subdirs for each data element') ARGPARSER.add_argument('--private-dir', required=True, help='Directory where intermediate files will be written') ARGPARSER.add_argument('--output-dir', required=True, help='Output directory where generated files will be written') class OptionalBehavior(Enum): DISALLOW = 0 SKIP = auto() PAD = auto() class OutputMode(Enum): SINGLE_FILE = 0 MULTI_FILE = auto() class Parser(): __slots__ = ('registry', 'padding_index', 'field_index', 'alignment_index') def __init__(self): self.registry = {} self.alignment_index = 0 self.padding_index = 0 self.field_index = 0 def register_name(self, func: Optional[Union[FunctionType, LambdaType]]) -> 'Parser': ''' Register a function for processing the name key within the JSON structure. This differs from the standard function registration in that the process is much simpler; names are not stored directly within the data binaries, so there is no need to specify a size value, nor is the name related to any constants definition. ''' if not func: self.registry['name'] = lambda s: s else: self.registry['name'] = func return self def register(self, field_name: str, size: Union[int, Tuple[int,int]], func: Union[FunctionType, LambdaType], const_type: Optional[Type[Enum]] = None, optional: OptionalBehavior = OptionalBehavior.DISALLOW) -> 'Parser': ''' Register a function for processing a given key within the JSON structure, along with a size of the field in bytes and any constants definition needed to process the field into an integral value. ''' self.registry[f'{self.field_index:04}_{field_name}'] = (func, size, const_type, optional) self.field_index += 1 return self def align(self, alignment: int, value: int = 0) -> 'Parser': ''' Specify an instance of padding to a given alignment. ''' self.registry[f'align_{self.alignment_index}'] = (alignment, value) self.alignment_index += 1 return self def pad(self, size: int, value: int = 0) -> 'Parser': ''' Specify an instance of padding of a given size. ''' self.registry[f'padding_{self.padding_index}'] = (size, value) self.padding_index += 1 return self def _walk(self, data: dict, key_seq: Sequence[str]) -> any: data_val = data for step in key_seq: if isinstance(data_val, list): data_val = data_val[int(step)] else: data_val = data_val.get(step, {}) # All future walks will return {} return data_val def parse(self, data: Mapping) -> bytes: ''' Parse the given JSON structure according to the currently-defined data schema. ''' binary = bytearray([]) for key in self.registry: if key == 'name': self.registry[key](data[key]) elif key.startswith('padding_'): size, val = self.registry[key] binary.extend(bytearray([val] * size)) elif key.startswith('align_'): alignment, val = self.registry[key] offset = len(binary) % alignment if offset != 0: binary.extend(bytearray([val] * (alignment - offset))) else: parse_func, size, const_type, optional = self.registry[key] data_key = key[5:] # first 4 characters are a key-prefix data_val = self._walk(data, data_key.split('.')) if data_val == {} or data_val is None: if optional == OptionalBehavior.DISALLOW: print(json.dumps(data, indent=4)) raise KeyError(data_key) elif optional == OptionalBehavior.SKIP: continue elif optional == OptionalBehavior.PAD: binary.extend((0).to_bytes(size, 'little')) continue binary.extend(parse_func(data_val, size, const_type)) return binary def pack_flags(flags: Sequence[str], size: int, consts: Type[Flag]) -> bytes: ''' Pack a list of flag constants into a bitmask. Flag values are defined by the specified consts type, which must descend from the enum.Flag type. ''' result = consts(0) for flag_name in flags: result = result | consts[flag_name] return result.value.to_bytes(size, 'little') def parse_const(val: str, size: int, consts: Type[Enum]) -> bytes: ''' Simple parse wrapper for a value belonging to a set of constants, represented in JSON as a raw string name. ''' return consts[val].value.to_bytes(size, 'little') def parse_int(val: int, size: int, _consts: Type[Enum] = None) -> bytes: ''' Simple parse wrapper for an integer. ''' return val.to_bytes(size, 'little') def parse_sint(val: int, size: int, _consts: Type[Enum] = None) -> bytes: ''' Simple parse wrapper for a signed integer. ''' if val < 0: val = val + (1 << (size * 8)) return val.to_bytes(size, 'little') def _parse(fname_in: str, schema: Parser) -> bytes: with open(fname_in, 'r', encoding='utf8') as input_file: input_json = json.load(input_file) return schema.parse(input_json) def _write(output_bin: bytes, output_idx: int, output_dir: Optional[str]): output_fname = f'{output_idx:04}.bin' if output_dir: output_fname = pathlib.Path(output_dir) / output_fname with open(output_fname, 'wb+') as output_file: output_file.write(output_bin) def _process(fname_in: str, schema: Parser, index_func: FunctionType) -> (any, any): fname_in_path = pathlib.Path(fname_in) output_bin = _parse(fname_in, schema) output_idx = index_func(fname_in_path) return (output_idx, output_bin) def json2bin(target: str, schema: Parser, private_dir: Optional[str], output_dir: Optional[str], index_func: FunctionType, glob_pattern: str='*.json', narc_name: Optional[str] = None, narc_packer: Optional[str] = None, output_mode: OutputMode = OutputMode.MULTI_FILE, skip_stems: Sequence[str] = []): private_dir = pathlib.Path(private_dir) output_dir = pathlib.Path(output_dir) if not narc_name or not narc_packer: raise RuntimeError('Missing narc_name or narc_packer input in batch mode; halting') private_dir.mkdir(exist_ok=True, parents=True) binaries = {} for fname_in in pathlib.Path(target).glob(glob_pattern): if fname_in.parent.stem in skip_stems or fname_in.parent.parent.stem in skip_stems: continue (output_idx, output_bin) = _process(fname_in, schema, index_func) if output_mode == OutputMode.SINGLE_FILE: binaries[output_idx] = output_bin elif output_mode == OutputMode.MULTI_FILE: _write(output_bin, output_idx, private_dir) if output_mode == OutputMode.SINGLE_FILE: merged = bytearray([]) for idx in sorted(binaries.keys()): merged.extend(binaries[idx]) _write(merged, 0, private_dir) subprocess.run([ pathlib.Path(narc_packer), '-d', private_dir, '-p', output_dir / f'{narc_name}.narc' ])