From 225400a290296dcaa0f8d5071c8c3301c26f7958 Mon Sep 17 00:00:00 2001 From: Will Toohey Date: Mon, 12 Feb 2018 22:54:17 +0000 Subject: [PATCH] Don't load entire blob into memory - faster, less crashing on biiig files --- ifstools/handlers/GenericFile.py | 4 +-- ifstools/ifs.py | 43 +++++++++++++++++++++----------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/ifstools/handlers/GenericFile.py b/ifstools/handlers/GenericFile.py index a14b783..7a210e4 100644 --- a/ifstools/handlers/GenericFile.py +++ b/ifstools/handlers/GenericFile.py @@ -27,9 +27,7 @@ class GenericFile(Node): return self._load_from_filesystem() def _load_from_ifs(self, convert_kbin = True): - end = self.start + self.size - assert self.start <= len(self.ifs_data) and end <= len(self.ifs_data) - data = self.ifs_data[self.start:end] + data = self.ifs_data.get(self.start, self.size) if convert_kbin and self.name.endswith('.xml') and KBinXML.is_binary_xml(data): data = KBinXML(data).to_text().encode('utf8') diff --git a/ifstools/ifs.py b/ifstools/ifs.py index 2f56758..f0f3357 100644 --- a/ifstools/ifs.py +++ b/ifstools/ifs.py @@ -31,6 +31,16 @@ def _load(args): f.preload(use_cache) return f.full_path +class FileBlob(object): + ''' a basic wrapper around a file to deal with IFS data offset ''' + def __init__(self, file, offset): + self.file = file + self.offset = offset + + def get(self, offset, size): + self.file.seek(offset + self.offset) + return self.file.read(size) + class IFS: def __init__(self, path): if isfile(path): @@ -38,7 +48,7 @@ class IFS: elif isdir(path): self.load_dir(path) else: - raise IOError('Input path does not exist') + raise IOError('Input path {} does not exist'.format(path)) def load_ifs(self, path): self.is_file = True @@ -48,25 +58,26 @@ class IFS: self.folder_out = splitext(name)[0] + '_ifs' self.default_out = self.folder_out - with open(path, 'rb') as f: - file = ByteBuffer(f.read()) + self.file = open(path, 'rb') + header = ByteBuffer(self.file.read(36)) - signature = file.get_u32() + signature = header.get_u32() if signature != SIGNATURE: raise IOError('Given file was not an IFS file!') - self.file_version = file.get_u16() + self.file_version = header.get_u16() # next u16 is just NOT(version) - assert file.get_u16() ^ self.file_version == 0xFFFF - self.time = file.get_u32() - ifs_tree_size = file.get_u32() - manifest_end = file.get_u32() - self.data_blob = bytes(file.data[manifest_end:]) + assert header.get_u16() ^ self.file_version == 0xFFFF + self.time = header.get_u32() + ifs_tree_size = header.get_u32() + manifest_end = header.get_u32() + self.data_blob = FileBlob(self.file, manifest_end) if self.file_version > 1: # md5 of manifest, unchecked - file.offset += 16 + header.offset += 16 - self.manifest = KBinXML(file.data[file.offset:]) + self.file.seek(header.offset) + self.manifest = KBinXML(self.file.read(manifest_end-header.offset)) self.tree = GenericFolder(self.data_blob, self.manifest.xml_doc) # IFS files repacked with other tools usually have wrong values - don't validate this @@ -110,6 +121,10 @@ class IFS: return tree + def close(self): + if self.file: + self.file.close() + def __str__(self): return str(self.tree) @@ -147,9 +162,7 @@ class IFS: i.extract(progress=progress, use_cache=use_cache, recurse=recurse, tex_only=tex_only, extract_manifest=extract_manifest, path=rpath.replace('.ifs','_ifs')) - ''' If you can get shared memory for IFS.data_blob working, this will - be a lot faster. As it is, it gets pickled for every file, and - is 3x slower than the serial implementation even with image extraction + ''' Todo: reimplement this since we're using file objects now ''' # extract the tree '''p = Pool()