[quant][fx] Don't assume bias is a keyword-argument (pytorch#71426)

peterbell10 · pytorchmergebot · commit 7ea96a7293cf · 2022-02-01T16:59:26.000Z
Summary: Pull Request resolved: pytorch#71426 dbr quantization makes faulty assumptions about which arguments are passed as keyword arguments and which are passed as positional arguments. This happens to work currently due to a quirk of how `__torch_function__` is implemented in python functions, but will break when the operators are moved to C++. Test Plan: Imported from OSS Reviewed By: george-qi Differential Revision: D33754262 Pulled By: albanD fbshipit-source-id: 63515d7a166449726e1beaba6659443b6261742d (cherry picked from commit f7b1884)
diff --git a/torch/ao/quantization/_dbr/auto_trace_rewriter.py b/torch/ao/quantization/_dbr/auto_trace_rewriter.py
@@ -147,11 +147,12 @@ def create_node(self, kind, target, args, kwargs, name=None, type_expr=None):
 
                 # TODO move op-specific logic out of here
                 if target is torch.ops.quantized.linear:
-                    new_args = [*args]
-                    new_args.append(additional_kwargs['scale'])
-                    new_args.append(additional_kwargs['zero_point'])
-                    args = tuple(new_args)
-                    del kwargs['bias']
+                    def linear_rewrite_args(input, weight, bias=None):
+                        return (input, weight,
+                                additional_kwargs['scale'],
+                                additional_kwargs['zero_point'])
+                    args = linear_rewrite_args(*args, **kwargs)
+                    kwargs = {}
                 elif old_target != F.conv2d or target is F.conv2d:
                     kwargs.update(**additional_kwargs)
                 else:
diff --git a/torch/ao/quantization/_dbr/model_utils.py b/torch/ao/quantization/_dbr/model_utils.py
@@ -11,6 +11,7 @@
     ObserverBase,
     FakeQuantizeBase,
 )
+from typing import Optional
 
 def pack_weights_for_functionals(
     module: torch.nn.Module,
@@ -66,10 +67,18 @@ def pack_weights_for_functionals(
 
             elif seen_op_info.type == F.linear:
                 # fetch all the info needed for packed params
-                assert seen_op_info.packable_tensor_idx_to_name[1] is not None
-                weight = getattr(module, seen_op_info.packable_tensor_idx_to_name[1])
-                bias_name = seen_op_info.packable_tensor_kwarg_name_to_name['bias']
-                bias = getattr(module, bias_name) if bias_name else None
+                def get_tensor_param_name(idx: int, name: str) -> Optional[str]:
+                    param_name = seen_op_info.packable_tensor_idx_to_name.get(idx, None)
+                    if param_name is not None:
+                        return param_name
+                    return seen_op_info.packable_tensor_kwarg_name_to_name.get(name, None)
+
+                weight_name = get_tensor_param_name(1, 'weight')
+                assert weight_name is not None
+                weight = getattr(module, weight_name)
+
+                bias_name = get_tensor_param_name(2, 'bias')
+                bias = getattr(module, bias_name) if bias_name is not None else None
 
                 # quantize the weight
                 # TODO: create weight observers from qconfig.weight
diff --git a/torch/ao/quantization/_dbr/quantization_state.py b/torch/ao/quantization/_dbr/quantization_state.py
@@ -441,7 +441,7 @@ def op_convert_before_hook(
 
         # TODO move op-specific logic out of here
         if op is torch.ops.quantized.linear:
-            del kwargs['bias']
+            kwargs.pop('bias', None)
 
         return op, tuple(new_args), kwargs
 
@@ -666,7 +666,7 @@ def _first_call_op_prepare_before_hook_create_subgraphs(
         of this op in `self`.
         """
         op_packing_only_uses_module_attributes = \
-            get_op_packing_only_uses_module_attributes(op, args, root_module)
+            get_op_packing_only_uses_module_attributes(op, args, kwargs, root_module)
         arg_tensor_infos: List[Optional[QTensorInfo]] = []
         for arg in args:
             if isinstance(arg, (list, tuple)):
@@ -684,6 +684,8 @@ def _first_call_op_prepare_before_hook_create_subgraphs(
             packable_tensor_arg_idxs = get_packable_tensor_arg_idxs(op)
             if packable_tensor_arg_idxs is not None:
                 for arg_idx in packable_tensor_arg_idxs:
+                    if arg_idx >= len(args):
+                        continue
                     arg = args[arg_idx]
                     param_name = get_param_name(root_module, arg)
                     packable_tensor_idx_to_name[arg_idx] = param_name
@@ -697,6 +699,8 @@ def _first_call_op_prepare_before_hook_create_subgraphs(
                 get_packable_tensor_kwarg_names(op)
             if packable_tensor_kwarg_names is not None:
                 for kwarg_name in packable_tensor_kwarg_names:
+                    if kwarg_name not in kwargs:
+                        continue
                     kwarg = kwargs[kwarg_name]
                     kwarg_name_on_module = get_param_name(root_module, kwarg)
                     packable_tensor_kwarg_name_to_name[kwarg_name] = \
diff --git a/torch/ao/quantization/_dbr/utils.py b/torch/ao/quantization/_dbr/utils.py
@@ -301,9 +301,15 @@ def get_func_output_dtype_type(
 
     return FuncOutputDTypeType.DTYPE_DEPENDS_ON_QCONFIG
 
+def get_weight_argument_info(op: Callable) -> Optional[Tuple[int, str]]:
+    if op in (F.linear, F.conv2d):
+        return (1, 'weight')
+    return None
+
 def get_op_packing_only_uses_module_attributes(
     op: Callable,
     args: Tuple[Any, ...],
+    kwargs: Dict[str, Any],
     module: torch.nn.Module,
 ) -> bool:
     """
@@ -316,12 +322,13 @@ def get_op_packing_only_uses_module_attributes(
     """
     # check for ops which need packed weights but the weights are
     # coming from another function
-    packable_tensor_arg_idxs = get_packable_tensor_arg_idxs(op)
-    if packable_tensor_arg_idxs is not None:
-        for arg_idx in packable_tensor_arg_idxs:
-            arg_name_in_root = get_param_name(module, args[arg_idx])
-            if arg_name_in_root is None:
-                return False
+    info = get_weight_argument_info(op)
+    if info is not None:
+        idx, name = info
+        param_name = args[idx] if idx < len(args) else kwargs[name]
+        arg_name_in_root = get_param_name(module, param_name)
+        if arg_name_in_root is None:
+            return False
     return True
 
 def get_quantized_op(
@@ -372,16 +379,16 @@ def get_packable_tensor_arg_idxs(op: Callable) -> Optional[List[int]]:
     if op == F.conv2d:
         return [1, 2]
     elif op == F.linear:
-        return [1]
+        return [1, 2]
     return None
 
 def get_packable_tensor_kwarg_names(op: Callable) -> Optional[List[str]]:
     """
     Returns tensor kwarg names which correspond to parameters which will
     need to be packed.
     """
-    if op == F.linear:
-        return ['bias']
+    if op in (F.conv2d, F.linear):
+        return ['weight', 'bias']
     return None
 
 def get_param_name(module: torch.nn.Module, arg: Any) -> Optional[str]:
@@ -409,8 +416,8 @@ def get_packable_arg_idxs(op: Callable) -> Optional[List[int]]:
         # weight, bias, stride, padding, dilation, groups
         return [1, 2, 3, 4, 5, 6]
     elif op == F.linear:
-        # weight
-        return [1]
+        # weight, bias
+        return [1, 2]
     return None
 
 def get_weight_arg_idx(op: Callable) -> Optional[int]: