pokeplatinum/tools/json2bin/json2bin.py

import json
import pathlib
import subprocess

from argparse import ArgumentParser
from enum import Enum, Flag, auto
from types import FunctionType, LambdaType


ARGPARSER = ArgumentParser(
    prog='json2bin.py',
    description='Tool for converting a collection of JSON documents into\na NARC via a constructed parsing schema'
)
ARGPARSER.add_argument('--knarc', required=True,
                       help='Path to the knarc executable')
ARGPARSER.add_argument('--source-dir', required=True,
                       help='Source directory with subdirs for each data element')
ARGPARSER.add_argument('--private-dir', required=True,
                       help='Directory where intermediate files will be written')
ARGPARSER.add_argument('--output-dir', required=True,
                       help='Output directory where generated files will be written')


class OptionalBehavior(Enum):
    DISALLOW = 0
    SKIP = auto()
    PAD = auto()


class Parser():
    __slots__ = ('registry', 'padding_index', 'field_index', 'alignment_index')


    def __init__(self):
        self.registry = {}
        self.alignment_index = 0
        self.padding_index = 0
        self.field_index = 0


    def register_name(self, func: FunctionType | LambdaType | None) -> 'Parser':
        '''
            Register a function for processing the name key within the JSON
            structure. This differs from the standard function registration in
            that the process is much simpler; names are not stored directly
            within the data binaries, so there is no need to specify a size
            value, nor is the name related to any constants definition.
        '''
        if not func:
            self.registry['name'] = lambda s: s
        else:
            self.registry['name'] = func
        return self


    def register(self,
                 field_name: str,
                 size: int | tuple[int,int],
                 func: FunctionType | LambdaType,
                 const_type: type[Enum] | None = None,
                 optional: OptionalBehavior = OptionalBehavior.DISALLOW) -> 'Parser':
        '''
            Register a function for processing a given key within the JSON
            structure, along with a size of the field in bytes and any
            constants definition needed to process the field into an integral
            value.
        '''
        self.registry[f'{self.field_index:04}_{field_name}'] = (func, size, const_type, optional)
        self.field_index += 1
        return self


    def align(self, alignment: int, value: int = 0) -> 'Parser':
        '''
            Specify an instance of padding to a given alignment.
        '''
        self.registry[f'align_{self.alignment_index}'] = (alignment, value)
        self.alignment_index += 1
        return self


    def pad(self, size: int, value: int = 0) -> 'Parser':
        '''
            Specify an instance of padding of a given size.
        '''
        self.registry[f'padding_{self.padding_index}'] = (size, value)
        self.padding_index += 1
        return self


    def _walk(self, data: dict, key_seq: list[str]) -> any:
        data_val = data
        for step in key_seq:
            if type(data_val) == list:
                data_val = data_val[int(step)]
            else:
                data_val = data_val.get(step, {}) # All future walks will return {}
        return data_val


    def parse(self, data: dict) -> bytes:
        '''
            Parse the given JSON structure according to the currently-defined
            data schema.
        '''
        binary = bytearray([])
        for key in self.registry:
            if key == 'name':
                self.registry[key](data[key])
            elif key.startswith('padding_'):
                size, val = self.registry[key]
                binary.extend(bytearray([val] * size))
            elif key.startswith('align_'):
                alignment, val = self.registry[key]
                offset = len(binary) % alignment
                if offset != 0:
                    binary.extend(bytearray([val] * (alignment - offset)))
            else:
                parse_func, size, const_type, optional = self.registry[key]
                data_key = key[5:] # first 4 characters are a key-prefix
                data_val = self._walk(data, data_key.split('.'))
                if data_val == {} or data_val is None:
                    if optional == OptionalBehavior.DISALLOW:
                        print(json.dumps(data, indent=4))
                        raise KeyError(data_key)
                    elif optional == OptionalBehavior.SKIP:
                        continue
                    elif optional == OptionalBehavior.PAD:
                        binary.extend((0).to_bytes(size, 'little'))
                        continue

                binary.extend(parse_func(data_val, size, const_type))
        return binary


def pack_flags(flags: list[str], size: int, consts: type[Flag]) -> bytes:
    '''
        Pack a list of flag constants into a bitmask. Flag values are defined
        by the specified consts type, which must descend from the enum.Flag
        type.
    '''
    result = consts(0)
    for flag_name in flags:
        result = result | consts[flag_name]
    return result.value.to_bytes(size, 'little')


def parse_const(val: str, size: int, consts: type[Enum]) -> bytes:
    '''
        Simple parse wrapper for a value belonging to a set of constants,
        represented in JSON as a raw string name.
    '''
    return consts[val].value.to_bytes(size, 'little')


def parse_int(val: int, size: int, _consts: type[Enum] = None) -> bytes:
    '''
        Simple parse wrapper for an integer.
    '''
    return val.to_bytes(size, 'little')


def parse_sint(val: int, size: int, _consts: type[Enum] = None) -> bytes:
    '''
        Simple parse wrapper for a signed integer.
    '''
    if val < 0:
        val = val + (1 << (size * 8))
    return val.to_bytes(size, 'little')


def parse(fname_in: str, schema: Parser) -> bytes:
    with open(fname_in, 'r', encoding='utf8') as input_file:
        input_json = json.load(input_file)
        return schema.parse(input_json)


def write(output_bin: bytes, output_idx: int, output_dir: str | None):
    output_fname = f'{output_idx:04}.bin'
    if output_dir:
        output_fname = pathlib.Path(output_dir) / output_fname
    with open(output_fname, 'wb+') as output_file:
        output_file.write(output_bin)


def process(fname_in: str,
            schema: Parser,
            output_dir: str | None,
            index_func: FunctionType):
    fname_in_path = pathlib.Path(fname_in)
    output_bin = parse(fname_in, schema)
    output_idx = index_func(fname_in_path)
    write(output_bin, output_idx, output_dir)


def json2bin(target: str,
             schema: Parser,
             private_dir: str | None,
             output_dir: str | None,
             index_func: FunctionType,
             glob_pattern: str='*.json',
             narc_name: str | None=None,
             narc_packer: str | None=None):
    private_dir = pathlib.Path(private_dir)
    output_dir = pathlib.Path(output_dir)

    if not narc_name or not narc_packer:
        raise RuntimeError('Missing narc_name or narc_packer input in batch mode; halting')

    private_dir.mkdir(exist_ok=True, parents=True)
    for fname_in in pathlib.Path(target).glob(glob_pattern):
        process(fname_in, schema, private_dir, index_func)

    subprocess.run([
        pathlib.Path(narc_packer),
        '-d', private_dir,
        '-p', output_dir / f'{narc_name}.narc'
    ])