Youjose · mos9527 · Aug 12, 2025
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,7 @@
+*.pyc
+*.pyd
+*.temp
+build/
+*.egg-info
+temp_*
+*.aprof
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,43 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Test: ACBEdit",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "Tests.test_ACBEdit",
+            "justMyCode": false
+        },    
+        {
+            "name": "Test: USMBuild",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "Tests.test_USMBuild",
+            "justMyCode": false
+        },    
+        {
+            "name": "Test: USMDecode",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "Tests.test_USMDecode",
+            "justMyCode": false
+        },
+        {
+            "name": "Test: CPK Unpack",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "Tests.test_CPKUnpack",
+            "justMyCode": false
+        },
+        {
+            "name": "Test: CPK Build",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "Tests.test_CPKBuild",
+            "justMyCode": false
+        },        
+    ]
+}
diff --git a/CriCodecs/hca.h b/CriCodecs/hca.h
@@ -78,7 +78,7 @@ void clHCA_ReadSamples16(clHCA *, signed short *outSamples);
 /* Sets a 64 bit encryption key, to properly decode blocks. This may be called
  * multiple times to change the key, before or after clHCA_DecodeHeader.
  * Key is ignored if the file is not encrypted. */
-void setkey(clHCA *, unsigned long long keycode);
+void clHCA_SetKey(clHCA *, unsigned long long keycode);
 
 /* Tests a single frame for validity, mainly to test if current key is correct.
  * Returns <0 on incorrect block (wrong key), 0 on silent block (not useful to determine)
@@ -95,4 +95,4 @@ void clHCA_DecodeReset(clHCA * hca);
 }
 #endif
 
-#endif
+#endif
diff --git a/PyCriCodecs/__init__.py b/PyCriCodecs/__init__.py
@@ -3,7 +3,6 @@
 from .chunk import *
 from .cpk import CPK, CPKBuilder
 from .usm import USM, USMBuilder
-from .utf import UTF, UTFBuilder
+from .utf import UTF, UTFBuilder, UTFViewer
 from .acb import ACB, ACBBuilder
 from .awb import AWB, AWBBuilder
-from .ivf import IVF
diff --git a/PyCriCodecs/acb.py b/PyCriCodecs/acb.py
@@ -1,180 +1,100 @@
 from struct import iter_unpack
+from typing import BinaryIO, List
+from io import BytesIO
 from .chunk import *
-from .utf import UTF, UTFBuilder
+from .utf import UTF, UTFBuilder, UTFViewer
 from .awb import AWB, AWBBuilder
 from .hca import HCA
+from copy import deepcopy
 import os
 
-# TODO revamp the whole ACB class. ACB is a lot more complex with those @UTF tables.
-class ACB(UTF):
-    """ An ACB is basically a giant @UTF table. Use this class to extract any ACB. """
-    __slots__ = ["filename", "payload", "filename", "awb"]
-    payload: list
-    filename: str
-    awb: AWB
+# Credit:
+# - github.com/vgmstream/vgmstream which is why this is possible at all
+# - Original work by https://github.com/Youjose/PyCriCodecs
+# See Research/ACBSchema.py for more details.
+
+class CueNameTable(UTFViewer):
+    CueIndex: int
+    CueName: str
+
+
+class CueTable(UTFViewer):
+    CueId: int
+    ReferenceIndex: int
+    ReferenceType: int
+
+
+class SequenceTable(UTFViewer):
+    TrackIndex: bytes
+    Type: int
+
+
+class SynthTable(UTFViewer):
+    ReferenceItems: bytes
+
+
+class TrackEventTable(UTFViewer):
+    Command: bytes
+
+
+class TrackTable(UTFViewer):
+    EventIndex: int
+
 
+class WaveformTable(UTFViewer):
+    EncodeType: int
+    MemoryAwbId: int
+    NumChannels: int
+    NumSamples: int
+    SamplingRate: int
+    Streaming: int
+
+
+class ACBTable(UTFViewer):
+    AcbGuid: bytes
+    Name: str
+    Version: int
+    VersionString: str
+
+    AwbFile: bytes
+    CueNameTable: List[CueNameTable]
+    CueTable: List[CueTable]
+    SequenceTable: List[SequenceTable]
+    SynthTable: List[SynthTable]
+    TrackEventTable: List[TrackEventTable]
+    TrackTable: List[TrackTable]
+    WaveformTable: List[WaveformTable]
+
+
+class ACB(UTF):
+    """An ACB is basically a giant @UTF table. Use this class to extract any ACB, and potentially modifiy it in place."""
     def __init__(self, filename) -> None:
-        self.payload = UTF(filename).get_payload()
-        self.filename = filename
-        self.acbparse(self.payload)
-        # TODO check on ACB version.
-
-    def acbparse(self, payload: list) -> None:
-        """ Recursively parse the payload. """
-        for dict in range(len(payload)):
-            for k, v in payload[dict].items():
-                if v[0] == UTFTypeValues.bytes:
-                    if v[1].startswith(UTFType.UTF.value): #or v[1].startswith(UTFType.EUTF.value): # ACB's never gets encrypted? 
-                        par = UTF(v[1]).get_payload()
-                        payload[dict][k] = par
-                        self.acbparse(par)
-        self.load_awb()
-
-    def load_awb(self) -> None:
-        # There are two types of ACB's, one that has an AWB file inside it,
-        # and one with an AWB pair.
-        if self.payload[0]['AwbFile'][1] == b'':
-            if type(self.filename) == str:
-                awbObj = AWB(os.path.join(os.path.dirname(self.filename), self.payload[0]['Name'][1]+".awb"))
-            else:
-                awbObj = AWB(self.payload[0]['Name'][1]+".awb")
-        else:
-            awbObj = AWB(self.payload[0]['AwbFile'][1])
-        self.awb = awbObj
-
-    # revamping...
-    def exp_extract(self, decode: bool = False, key = 0):
-        # There are two types of ACB's, one that has an AWB file inside it,
-        # and one with an AWB pair. Or multiple AWB's.
-
-        # TODO Add multiple AWB loading.
-        if self.payload[0]['AwbFile'][1] == b'':
-            if type(self.filename) == str:
-                awbObj = AWB(os.path.join(os.path.dirname(self.filename), self.payload[0]['Name'][1]+".awb"))
-            else:
-                awbObj = AWB(self.payload[0]['Name'][1]+".awb")
-        else:
-            awbObj = AWB(self.payload[0]['AwbFile'][1])
-
-        pl = self.payload[0]
-        names = [] # Where all filenames will end up in.
-        # cuename > cue > block > sequence > track > track_event > command > synth > waveform
-        # seems to be the general way to do it, some may repeat, and some may go back to other tables.
-        # I will try to make this code go through all of them in advance. 
-
-        """ Load Cue names and indexes. """
-        cue_names_and_indexes: list = []
-        for i in pl["CueNameTable"]:
-            cue_names_and_indexes.append((i["CueIndex"], i["CueName"]))
-        srt_names = sorted(cue_names_and_indexes, key=lambda x: x[0])
-
-        """ Go through all cues and match wavforms or names. """
-        for i in cue_names_and_indexes:
-
-            cue_Info = pl["CueTable"][i[0]]
-            ref_type = cue_Info["ReferenceType"][1]
-            wavform = pl["WaveformTable"][i[0]]
-
-            if ref_type == 1:
-                usememory: bool = wavform['Streaming'][1] == 0
-
-                if "Id" in wavform:
-                    wavform["MemoryAwbId"] = wavform["Id"] # Old ACB's use "Id", so we default it to the new MemoryAwbId slot.
-
-                if usememory:
-                    assert len(wavform['MemoryAwbId']) == len(srt_names) # Will error if not so. TODO add extracting without filenames references.
-                    names = [y[1][1] for _,y in sorted(zip([x[1] for x in pl["WaveformTable"]], srt_names), key=lambda z: z[0])]
-                    break # We break, since we did everything in the line above. I don't think ref_type changes between cues.
-
-                else:
-                    # TODO
-                    raise NotImplementedError("ACB needs multiple AWB's, not unsupported yet.")
-
-            elif ref_type == 2:
-                # TODO
-                raise NotImplementedError("Unsupported ReferenceType.")
-
-            elif ref_type == 3:
-                sequence = pl['SequenceTable'][i[0]]
-                track_type = sequence['Type'][1] # Unused but will leave it here if needed.
-                for tr_idx in iter_unpack(">H", sequence['TrackIndex'][1]):
-                    # TODO I am here currently.
-                    pass
-
-            elif ref_type == 8:
-                # TODO
-                raise NotImplementedError("Unsupported ReferenceType.")
-
-            else:
-                raise NotImplementedError("Unknown ReferenceType inside ACB.")
-
-    def parse_type1(self):
-        pass
-
-    def parse_type2(self):
-        pass
-
-    def parse_type3(self):
-        pass
-
-    def parse_type8(self):
-        pass
-
-    def parse_cues(self):
-        pass
-
-    def parse_synth(self):
-        pass
-
-    def parse_wavform(self):
-        pass
-
-    def parse_tracktable(self):
-        pass
-
-    def parse_commands(self):
-        pass
-
-    def parse_sequence(self):
-        pass
-
-    def extract(self, decode: bool = False, key: int = 0, dirname: str = ""):
-        """ Extracts audio files in an AWB/ACB without preserving filenames. """
-        if dirname:
-            os.makedirs(dirname, exist_ok=True)
-        filename = 0
-        for i in self.awb.getfiles():
-            Extension: str = self.get_extension(self.payload[0]['WaveformTable'][filename]['EncodeType'][1])
-            if decode and Extension == ".hca":
-                    hca = HCA(i, key=key, subkey=self.awb.subkey).decode()
-                    open(os.path.join(dirname, str(filename)+".wav"), "wb").write(hca)
-                    filename += 1
-            else:
-                open(os.path.join(dirname, f"{filename}{Extension}"), "wb").write(i)
-                filename += 1
-
-    def get_extension(self, EncodeType: int) -> str:
-        if EncodeType == 0 or EncodeType == 3:
-            return ".adx" # Maybe 0 is ahx?
-        elif EncodeType == 2 or EncodeType == 6:
-            return ".hca"
-        elif EncodeType == 7 or EncodeType == 10:
-            return ".vag"
-        elif EncodeType == 8:
-            return ".at3"
-        elif EncodeType == 9:
-            return ".bcwav"
-        elif EncodeType == 11 or EncodeType == 18:
-            return ".at9"
-        elif EncodeType == 12:
-            return ".xma"
-        elif EncodeType == 13 or EncodeType == 4 or EncodeType == 5:
-            return ".dsp"
-        elif EncodeType == 19:
-            return ".m4a"
-        else:
-            return ""
-
-# TODO Have to finish correct ACB extracting first.
-class ACBBuilder(UTFBuilder):
-    pass
+        super().__init__(filename,recursive=True)
+
+    @property
+    def payload(self) -> dict:
+        """Retrives the only top-level UTF table dict within the ACB file."""
+        return self.dictarray[0]
+
+    @property
+    def view(self) -> ACBTable:
+        """Returns a view of the ACB file, with all known tables mapped to their respective classes."""
+        return ACBTable(self.payload)
+
+    # TODO: Extraction routines
+    # See Research/ACBSchema.py. vgmstream presented 4 possible permutations of subsong retrieval.
+
+class ACBBuilder:
+    acb: ACB
+
+    def __init__(self, acb: ACB) -> None:
+        self.acb = acb
+
+    def build(self) -> bytes:
+        """Builds an ACB binary blob from the current ACB object.
+
+        The object may be modified in place before building, which will be reflected in the output binary.
+        """
+        payload = deepcopy(self.acb.dictarray)
+        binary = UTFBuilder(payload, encoding=self.acb.encoding, table_name=self.acb.table_name)
+        return binary.bytes()
diff --git a/PyCriCodecs/adx.py b/PyCriCodecs/adx.py
@@ -4,11 +4,13 @@ class ADX:
     """ADX Module for decoding and encoding ADX files, pass the either `adx file` or `wav file` in bytes to either `decode` or `encode` respectively."""  
 
     # Decodes ADX to WAV.
+    @staticmethod
     def decode(data: bytes) -> bytes:
         """ Decodes ADX to WAV. """
         return CriCodecs.AdxDecode(bytes(data))
 
     # Encodes WAV to ADX.
+    @staticmethod
     def encode(data: bytes, BitDepth = 0x4, Blocksize = 0x12, Encoding = 3, AdxVersion = 0x4, Highpass_Frequency = 0x1F4, Filter = 0, force_not_looping = False) -> bytes:
         """ Encodes WAV to ADX. """
         return CriCodecs.AdxEncode(bytes(data), BitDepth, Blocksize, Encoding, Highpass_Frequency, Filter, AdxVersion, force_not_looping)