Vastly improved IFS extraction which names afp/bsi/geo files correctly.

2026-07-02 00:11:48 -05:00 · 2021-04-14 01:08:13 +00:00 · 2021-04-14 01:08:13 +00:00 · 520181785d
commit 520181785d
parent d486cfa54e
3 changed files with 99 additions and 54 deletions
--- a/bemani/format/afp/swf.py
+++ b/bemani/format/afp/swf.py
@ -1,4 +1,3 @@
-from hashlib import md5
 import os
 import struct
 import sys
@ -703,7 +702,7 @@ class SWF(TrackedCoverage, VerboseOutput):
            # I'm not sure what the unknown value is. It doesn't seem to be parsed by either BishiBashi or Jubeat
            # when I've looked, but it does appear to be non-zero sometimes in Pop'n Music animations.
            shape_reference = f"{self.exported_name}_shape{shape_id}"
-            self.vprint(f"{prefix}    Tag ID: {shape_id}, AFP Reference: {shape_reference}, IFS GEO Filename: {md5(shape_reference.encode('utf-8')).hexdigest()} Unknown: {unknown}")
+            self.vprint(f"{prefix}    Tag ID: {shape_id}, AFP Reference: {shape_reference}, Unknown: {unknown}")

            return AP2ShapeTag(shape_id, shape_reference)
        elif tagid == AP2Tag.AP2_DEFINE_SPRITE:
--- a/bemani/format/ifs.py
+++ b/bemani/format/ifs.py
@ -3,7 +3,7 @@ import io
 import os
 import struct
 from PIL import Image  # type: ignore
-from typing import Dict, List, Tuple, Optional
+from typing import Dict, List, Tuple

 from bemani.protocol.binary import BinaryEncoding
 from bemani.protocol.xml import XmlEncoding
@ -20,8 +20,8 @@ class IFS:

    def __init__(self, data: bytes, decode_binxml: bool=False, decode_textures: bool=False) -> None:
        self.__files: Dict[str, bytes] = {}
-        self.__texdata: Dict[str, Node] = {}
-        self.__mappings: Dict[str, str] = {}
+        self.__formats: Dict[str, str] = {}
+        self.__compressed: Dict[str, bool] = {}
        self.__sizes: Dict[str, Tuple[int, int]] = {}
        self.__decode_binxml = decode_binxml
        self.__decode_textures = decode_textures
@ -81,6 +81,7 @@ class IFS:
                for subchild in node.children:
                    get_children(os.path.join(parent, f"{real_name}/"), subchild)

+        # Recursively walk the entire filesystem extracting files and their locations.
        get_children("/", header)

        for fn in files:
@ -88,71 +89,114 @@ class IFS:
            filedata = data[start:(start + size)]
            self.__files[fn] = filedata

-        if self.__decode_textures:
-            # We must fix up the name of the textures since we're decoding them
-            def fix_name(hashname: str) -> str:
-                path = os.path.dirname(hashname)
-                filename = os.path.basename(hashname)
+        # Now, find all of the index files that are available.
+        for filename in list(self.__files.keys()):
+            abs_filename = ("/" if filename.startswith("/") else "") + filename

-                texlist = self.__get_texlist_for_file(hashname)
+            if abs_filename.endswith("/texturelist.xml"):
+                # This is a texture index.
+                texdir = os.path.dirname(filename)

-                if texlist is not None and texlist.name == 'texturelist':
-                    for child in texlist.children:
-                        if child.name != 'texture':
+                benc = BinaryEncoding()
+                texdata = benc.decode(self.__files[filename])
+
+                if texdata.name != 'texturelist':
+                    raise Exception(f"Unexpected name {texdata.name} in texture list!")
+                if texdata.attribute('compress') == 'avslz':
+                    compressed = True
+                else:
+                    compressed = False
+
+                for child in texdata.children:
+                    if child.name != 'texture':
+                        continue
+
+                    textfmt = child.attribute('format')
+
+                    for subchild in child.children:
+                        if subchild.name != 'image':
                            continue
+                        md5sum = hashlib.md5(subchild.attribute('name').encode(benc.encoding)).hexdigest()
+                        oldname = os.path.join(texdir, md5sum)
+                        newname = os.path.join(texdir, subchild.attribute('name'))

-                        textfmt = child.attribute('format')
+                        if oldname in self.__files:
+                            supported = False
+                            if self.__decode_textures:
+                                if textfmt in ["argb8888rev"]:
+                                    # This is a supported file to decode
+                                    newname += ".png"
+                                    supported = True

-                        for subchild in child.children:
-                            if subchild.name != 'image':
-                                continue
-                            md5sum = hashlib.md5(subchild.attribute('name').encode(benc.encoding)).hexdigest()
+                            # Remove old index, update file to new index.
+                            self.__files[newname] = self.__files[oldname]
+                            del self.__files[oldname]

-                            if md5sum == filename:
-                                if textfmt == "argb8888rev":
-                                    name = f'{subchild.attribute("name")}.png'
-                                else:
-                                    name = subchild.attribute('name')
-                                newpath = os.path.join(path, name)
+                            # Remember the attributes for this file so we can extract it later.
+                            self.__compressed[newname] = compressed
+
+                            if supported:
+                                # Only pop down the format and sizes if we support extracting.
+                                self.__formats[newname] = textfmt

                                rect = subchild.child_value('imgrect')
                                if rect is not None:
-                                    self.__mappings[newpath] = textfmt
-                                    self.__sizes[newpath] = (
+                                    self.__sizes[newname] = (
                                        (rect[1] - rect[0]) // 2,
                                        (rect[3] - rect[2]) // 2,
                                    )
+            elif abs_filename.endswith("/afplist.xml"):
+                # This is a texture index.
+                afpdir = os.path.dirname(filename)
+                bsidir = os.path.join(afpdir, "bsi")
+                geodir = os.path.join(os.path.dirname(afpdir), "geo")

-                                return newpath
+                benc = BinaryEncoding()
+                afpdata = benc.decode(self.__files[filename])

-                return hashname
+                if afpdata.name != 'afplist':
+                    raise Exception(f"Unexpected name {afpdata.name} in afp list!")

-            self.__files = {fix_name(fn): self.__files[fn] for fn in self.__files}
+                for child in afpdata.children:
+                    if child.name != 'afp':
+                        continue
+
+                    # First, fix up the afp files themselves.
+                    name = child.attribute('name')
+                    md5sum = hashlib.md5(name.encode(benc.encoding)).hexdigest()
+
+                    for fixdir in [afpdir, bsidir]:
+                        oldname = os.path.join(fixdir, md5sum)
+                        newname = os.path.join(fixdir, name)
+
+                        if oldname in self.__files:
+                            # Remove old index, update file to new index.
+                            self.__files[newname] = self.__files[oldname]
+                            del self.__files[oldname]
+
+                    # Now, fix up the shape files as well.
+                    geodata = child.child_value("geo")
+                    if geodata is not None:
+                        for geoid in geodata:
+                            geoname = f"{name}_shape{geoid}"
+                            md5sum = hashlib.md5(geoname.encode(benc.encoding)).hexdigest()
+
+                            oldname = os.path.join(geodir, md5sum)
+                            newname = os.path.join(geodir, geoname)
+
+                            if oldname in self.__files:
+                                # Remove old index, update file to new index.
+                                self.__files[newname] = self.__files[oldname]
+                                del self.__files[oldname]

    @property
    def filenames(self) -> List[str]:
        return [f for f in self.__files]

-    def __get_texlist_for_file(self, filename: str) -> Optional[Node]:
-        texlist = os.path.join(os.path.dirname(filename), 'texturelist.xml')
-        if texlist != filename and texlist in self.__files:
-            if texlist not in self.__texdata and texlist in self.__files:
-                benc = BinaryEncoding()
-                self.__texdata[texlist] = benc.decode(self.__files[texlist])
-
-            return self.__texdata.get(texlist)
-        return None
-
    def read_file(self, filename: str) -> bytes:
-        # If this is a texture folder, first we need to grab the texturelist.xml file
-        # to figure out if this is compressed or not.
-        decompress = False
-        texlist = self.__get_texlist_for_file(filename)
-        if texlist is not None and texlist.name == 'texturelist':
-            if texlist.attribute('compress') == 'avslz':
-                # We should decompress!
-                decompress = True
-
+        # First, figure out if this file is stored compressed or not. If it is, decompress
+        # it so that we have the raw data available to us.
+        decompress = self.__compressed.get(filename, False)
        filedata = self.__files[filename]
        if decompress:
            uncompressed_size, compressed_size = struct.unpack('>II', filedata[0:8])
@ -168,9 +212,11 @@ class IFS:
            if filexml is not None:
                filedata = str(filexml).encode('utf-8')

-        if self.__decode_textures and filename in self.__mappings and filename in self.__sizes:
-            fmt = self.__mappings.get(filename)
-            wh = self.__sizes.get(filename)
+        if self.__decode_textures and filename in self.__formats and filename in self.__sizes:
+            fmt = self.__formats[filename]
+            wh = self.__sizes[filename]
+
+            # Decode the image data itself.
            if fmt == "argb8888rev":
                if len(filedata) < (wh[0] * wh[1] * 4):
                    left = (wh[0] * wh[1] * 4) - len(filedata)
--- a/bemani/utils/ifsutils.py
+++ b/bemani/utils/ifsutils.py
@ -19,12 +19,12 @@ def main() -> None:
    )
    parser.add_argument(
        "--convert-xml-files",
-        help="Convert xml files that are in binary.",
+        help="Convert xml files that are in binary to readable text.",
        action="store_true",
    )
    parser.add_argument(
        "--convert-texture-files",
-        help="Convert texture files that are in game-format.",
+        help="Convert texture files that are in game-format to PNG files.",
        action="store_true",
    )
    args = parser.parse_args()