Enable E2E testing for the numerical discrepancy framework for XNNPACK. (#12723)

Juntian777 · facebook-github-bot · commit 25c5e2468c63 · 2025-07-22T15:47:08.000-07:00
Summary:

This PR introduces an end-to-end test framework for ExecuTorch's XNNPACK backend. It adds utilities to generate ETRecord and ETDump files with debug buffers for models, enabling numerical gap checks between runtime and AOT outputs. The PR also includes a test for the Vision Transformer (ViT) model to verify numeric gap thresholds. Additionally, it adds necessary build targets and runtime support for the new event tracer feature. This improves testing and debugging capabilities for ExecuTorch's XNNPACK backend.

Differential Revision: D78380933
diff --git a/devtools/inspector/_inspector.py b/devtools/inspector/_inspector.py
@@ -1401,6 +1401,13 @@ def calculate_numeric_gap(self, distance: str = "MSE"):
         runtime_intermediate_outputs, runtime_debug_handle_to_op_names = (
             self._get_runtime_intermediate_outputs_and_op_names()
         )
+        if (
+            len(aot_intermediate_outputs) == 0
+            or len(runtime_debug_handle_to_op_names) == 0
+        ):
+            raise ValueError(
+                "Inspector Events' debug_data is not populated properly which is required for calculating numerical gap"
+            )
         mapping = map_runtime_aot_intermediate_outputs(
             aot_intermediate_outputs, runtime_intermediate_outputs
         )
diff --git a/devtools/tests/xnnpack/TARGETS b/devtools/tests/xnnpack/TARGETS
@@ -0,0 +1,42 @@
+load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load("@fbcode_macros//build_defs:python_library.bzl", "python_library")
+load("@fbsource//tools/target_determinator/macros:ci.bzl", "ci")
+
+oncall("executorch")
+
+python_library(
+    name = "xnnpack_test_utils",
+    srcs = [
+        "xnnpack_test_utils.py",
+    ],
+    deps = [
+        "//caffe2:torch",
+        "//executorch/devtools/bundled_program:config",
+        "//executorch/devtools/bundled_program:core",
+        "//executorch/devtools/bundled_program/serialize:lib",
+        "//executorch/devtools:lib",
+        "//executorch/exir:lib",
+        "//executorch/backends/xnnpack/partition:xnnpack_partitioner",
+        "//executorch/backends/xnnpack/utils:xnnpack_utils",
+        "//executorch/extension/pybindings:portable_lib",
+    ],
+)
+
+
+python_unittest(
+    name = "torchvision_vit_test",
+    srcs = [
+        "torchvision_vit_test.py",
+    ],
+    # You still need to pass `-c executorch.event_tracer_enabled:true`
+    # if you want to manually invoke buck.
+    labels = ci.labels(
+        ci.buckconfig("executorch.event_tracer_enabled", "true"),
+    ),
+    deps = [
+        "//executorch/devtools/tests/xnnpack:xnnpack_test_utils",
+        "//executorch/exir/fb:bento_deps",
+        "//executorch/extension/fb/ptez:lib",
+        "//fair_infra/data/iopath/iopath:iopath",
+    ],
+)
diff --git a/devtools/tests/xnnpack/torchvision_vit_test.py b/devtools/tests/xnnpack/torchvision_vit_test.py
@@ -0,0 +1,81 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import os
+import unittest
+
+import torch
+
+from executorch.devtools.tests.xnnpack.xnnpack_test_utils import (
+    check_disturbance,
+    check_numeric_gap,
+    generate_etrecord_and_etdump,
+)
+
+from torchvision import models
+
+
+class TestViTModel(unittest.TestCase):
+    def setUp(self):
+        vit = models.vision_transformer.vit_b_16(weights="IMAGENET1K_V1")
+        self.model = vit.eval()
+        self.model_inputs = (torch.randn(1, 3, 224, 224),)
+
+    def test_numeric_gap(self):
+        etrecord_path, etdump_path, debug_buffer_path = generate_etrecord_and_etdump(
+            self.model,
+            self.model_inputs,
+        )
+
+        # Check if the output files exist
+        self.assertTrue(
+            os.path.exists(etrecord_path), f"ETRecord not found: {etrecord_path}"
+        )
+        self.assertTrue(os.path.exists(etdump_path), f"ETDump not found: {etdump_path}")
+        self.assertTrue(
+            os.path.exists(debug_buffer_path),
+            f"Debug buffer not found: {debug_buffer_path}",
+        )
+
+        metric = "MSE"
+        max_allowed_gap = 1e-6
+        is_within_threshold, max_gap = check_numeric_gap(
+            etdump_path,
+            etrecord_path,
+            debug_buffer_path,
+            metric=metric,
+            max_allowed_gap=max_allowed_gap,
+        )
+
+        # Check if the numeric gap is within threshold
+        self.assertTrue(
+            is_within_threshold,
+            f"Numeric gap {max_gap} exceeds allowed threshold {max_allowed_gap}",
+        )
+
+    def test_numeric_gap_with_disturbance(self):
+        # Check if we can detect the first numeric gap directly affected by the disturbance
+        etrecord_path, etdump_path, debug_buffer_path = generate_etrecord_and_etdump(
+            self.model,
+            self.model_inputs,
+            disturb=True,
+        )
+
+        metric = "MSE"
+        max_allowed_gap = 1e-6
+        disturbance_threshold = 1e-3
+        is_within_thresholds = check_disturbance(
+            etdump_path,
+            etrecord_path,
+            debug_buffer_path,
+            metric=metric,
+            row=1,
+            max_allowed_gap=max_allowed_gap,
+            disturbance_threshold=disturbance_threshold,
+        )
+
+        self.assertTrue(is_within_thresholds)
diff --git a/devtools/tests/xnnpack/xnnpack_test_utils.py b/devtools/tests/xnnpack/xnnpack_test_utils.py
@@ -0,0 +1,195 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import copy
+import os
+import tempfile
+
+import uuid
+
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
+
+from executorch.devtools import BundledProgram, generate_etrecord
+from executorch.devtools.bundled_program.config import MethodTestCase, MethodTestSuite
+from executorch.exir import to_edge
+
+from executorch.extension.pybindings.portable_lib import (
+    _load_for_executorch_from_buffer,  # @manual
+)
+from torch.export import export
+
+
+def _generate_new_paths():
+    temp_dir = tempfile.mkdtemp()
+
+    # Use uuid to generate unique filenames
+    etrecord_filename = f"etrecord_{uuid.uuid4().hex}.bin"
+    etdump_filename = f"etdump_{uuid.uuid4().hex}.etdp"
+    debug_buffer_filename = f"debug_buffer_{uuid.uuid4().hex}.bin"
+    etrecord_path = os.path.join(temp_dir, etrecord_filename)
+    etdump_path = os.path.join(temp_dir, etdump_filename)
+    debug_buffer_path = os.path.join(temp_dir, debug_buffer_filename)
+    return etrecord_path, etdump_path, debug_buffer_path
+
+
+def generate_etrecord_and_etdump(
+    model,
+    model_inputs,
+    debug_buffer_size=1024 * 1024 * 1024,
+    method_name="forward",
+    num_test_cases=2,
+    disturb=False,
+):
+    """
+    Helper to generate ETRecord and ETDump (with debug buffer) for a model.
+
+    Returns:
+        Tuple of (etrecord_path, etdump_path, debug_buffer_path)
+    """
+
+    etrecord_path, etdump_path, debug_buffer_path = _generate_new_paths()
+
+    aten_model = export(model, model_inputs, strict=True)
+
+    edge_compile_config = get_xnnpack_edge_compile_config()
+
+    edge_program_manager = to_edge(aten_model, compile_config=edge_compile_config)
+
+    edge_program_manager_copy = copy.deepcopy(edge_program_manager)
+
+    # Apply the disturbance if the flag is set
+    if disturb:
+        import torch
+
+        for _, exported_program in edge_program_manager_copy._edge_programs.items():
+            for module in exported_program.graph_module.modules():
+                if not isinstance(module, torch.fx.GraphModule):
+                    continue
+                for node in module.graph.nodes:
+                    if node.op == "call_function" and node.name == "aten_add_tensor":
+                        node.target = torch.ops.aten.sub.Tensor
+                module.recompile()
+                module.graph.eliminate_dead_code()
+
+    edge_program_manager = edge_program_manager.to_backend(XnnpackPartitioner())
+
+    et_program_manager = edge_program_manager.to_executorch()
+
+    method_graphs = {method_name: export(model, model_inputs, strict=True)}
+    inputs = [list(model_inputs) for _ in range(num_test_cases)]
+    method_test_suites = [
+        MethodTestSuite(
+            method_name=method_name,
+            test_cases=[
+                MethodTestCase(
+                    inputs=inp, expected_outputs=getattr(model, method_name)(*inp)
+                )
+                for inp in inputs
+            ],
+        )
+    ]
+    executorch_program = (
+        to_edge(method_graphs, compile_config=edge_compile_config)
+        .to_backend(XnnpackPartitioner())
+        .to_executorch()
+    )
+    bundled_program = BundledProgram(executorch_program, method_test_suites)
+
+    # Generate ETRecord
+    generate_etrecord(etrecord_path, edge_program_manager_copy, bundled_program)
+
+    # Generate ETDump and debug buffer
+    buff = et_program_manager.buffer
+    executorch_module = _load_for_executorch_from_buffer(
+        buff,
+        enable_etdump=True,
+        debug_buffer_size=debug_buffer_size,
+    )
+    executorch_module.run_method(method_name, tuple(model_inputs))
+    executorch_module.write_etdump_result_to_file(etdump_path, debug_buffer_path)
+
+    return etrecord_path, etdump_path, debug_buffer_path
+
+
+from typing import Tuple
+
+import pandas as pd
+from executorch.devtools import Inspector
+
+
+def check_numeric_gap(
+    etdump_path: str,
+    etrecord_path: str,
+    debug_buffer_path: str,
+    metric: str,
+    max_allowed_gap: float,
+) -> Tuple[bool, float]:
+    """
+    Create an Inspector and check if the maximum numeric gap for a given metric is less than the allowed threshold.
+    Args:
+        etdump_path: Path to the ETDump file.
+        etrecord_path: Path to the ETRecord file.
+        debug_buffer_path: Path to the debug buffer file.
+        metric: The metric name to calculate the numeric gap for (e.g., "MSE").
+        max_allowed_gap: The maximum allowed gap threshold.
+    Returns:
+        A tuple (is_within_threshold, max_gap) where:
+        - is_within_threshold (bool): True if max gap < max_allowed_gap, else False.
+        - max_gap (float): The maximum gap value found.
+    """
+    inspector = Inspector(
+        etdump_path=etdump_path,
+        etrecord=etrecord_path,
+        debug_buffer_path=debug_buffer_path,
+    )
+    df: pd.DataFrame = inspector.calculate_numeric_gap(metric)
+    max_gap = df["gap"].apply(lambda x: max(x) if isinstance(x, list) else x).max()
+    is_within_threshold = max_gap < max_allowed_gap
+    return is_within_threshold, max_gap
+
+
+def check_disturbance(
+    etdump_path: str,
+    etrecord_path: str,
+    debug_buffer_path: str,
+    metric: str,
+    row: int,
+    max_allowed_gap: float,
+    disturbance_threshold: float,
+) -> bool:
+    """
+    Check if the given row in the DataFrame has a gap greater than the disturbance threshold.
+
+    Args:
+        etdump_path: Path to the ETDump file.
+        etrecord_path: Path to the ETRecord file.
+        debug_buffer_path: Path to the debug buffer file.
+        metric: The metric name to calculate the numeric gap for (e.g., "MSE").
+        disturbance_threshold: The threshold to detect a disturbance.
+        max_allowed_gap: The maximum allowed gap threshold before the disturbance(row).
+        row: The row number to check for a disturbance.
+    """
+    inspector = Inspector(
+        etdump_path=etdump_path,
+        etrecord=etrecord_path,
+        debug_buffer_path=debug_buffer_path,
+    )
+    df: pd.DataFrame = inspector.calculate_numeric_gap(metric)
+
+    # Get the maximum gap for the given row
+    disturbance_row_gap = max(df.loc[row, "gap"])
+    # Get the maximum gap for the rows before the given row
+    if row > 0:
+        before_disturbance_row_gap = max(df.loc[: row - 1, "gap"].apply(max))
+    else:
+        before_disturbance_row_gap = 0
+
+    return (
+        disturbance_row_gap > disturbance_threshold
+        and before_disturbance_row_gap < max_allowed_gap
+    )

Original file line number	Diff line number	Diff line change
`@@ -1401,6 +1401,13 @@ def calculate_numeric_gap(self, distance: str = "MSE"):`
`1401`	`1401`	`runtime_intermediate_outputs, runtime_debug_handle_to_op_names = (`
`1402`	`1402`	`self._get_runtime_intermediate_outputs_and_op_names()`
`1403`	`1403`	`)`
	`1404`	`+ if (`
	`1405`	`+ len(aot_intermediate_outputs) == 0`
	`1406`	`+ or len(runtime_debug_handle_to_op_names) == 0`
	`1407`	`+ ):`
	`1408`	`+ raise ValueError(`
	`1409`	`+ "Inspector Events' debug_data is not populated properly which is required for calculating numerical gap"`
	`1410`	`+ )`
`1404`	`1411`	`mapping = map_runtime_aot_intermediate_outputs(`
`1405`	`1412`	`aot_intermediate_outputs, runtime_intermediate_outputs`
`1406`	`1413`	`)`