yangbo93
diff --git a/‎research/object_detection/core/model.py
Lines changed: 36 additions & 1 deletion b/‎research/object_detection/core/model.py
Lines changed: 36 additions & 1 deletion
diff --git a/‎research/object_detection/core/model_test.py
Lines changed: 3 additions & 0 deletions b/‎research/object_detection/core/model_test.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎research/object_detection/core/standard_fields.py
Lines changed: 8 additions & 0 deletions b/‎research/object_detection/core/standard_fields.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎research/object_detection/data_decoders/tf_example_decoder.py
Lines changed: 131 additions & 1 deletion b/‎research/object_detection/data_decoders/tf_example_decoder.py
Lines changed: 131 additions & 1 deletion
diff --git a/‎research/object_detection/data_decoders/tf_example_decoder_test.py
Lines changed: 91 additions & 6 deletions b/‎research/object_detection/data_decoders/tf_example_decoder_test.py
Lines changed: 91 additions & 6 deletions
diff --git a/‎research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
Lines changed: 3 additions & 0 deletions b/‎research/object_detection/dataset_tools/context_rcnn/generate_detection_data_tf1_test.py
Lines changed: 3 additions & 0 deletions
@@ -391,7 +391,9 @@ def regularization_losses(self):
     pass
 
   @abc.abstractmethod
-  def restore_map(self, fine_tune_checkpoint_type='detection'):
+  def restore_map(self,
+                  fine_tune_checkpoint_type='detection',
+                  load_all_detection_checkpoint_vars=False):
     """Returns a map of variables to load from a foreign checkpoint.
 
     Returns a map of variable names to load from a checkpoint to variables in
@@ -407,13 +409,46 @@ def restore_map(self, fine_tune_checkpoint_type='detection'):
         checkpoint (with compatible variable names) or to restore from a
         classification checkpoint for initialization prior to training.
         Valid values: `detection`, `classification`. Default 'detection'.
+      load_all_detection_checkpoint_vars: whether to load all variables (when
+         `fine_tune_checkpoint_type` is `detection`). If False, only variables
+         within the feature extractor scope are included. Default False.
 
     Returns:
       A dict mapping variable names (to load from a checkpoint) to variables in
       the model graph.
     """
     pass
 
+  @abc.abstractmethod
+  def restore_from_objects(self, fine_tune_checkpoint_type='detection'):
+    """Returns a map of variables to load from a foreign checkpoint.
+
+    Returns a dictionary of Tensorflow 2 Trackable objects (e.g. tf.Module
+    or Checkpoint). This enables the model to initialize based on weights from
+    another task. For example, the feature extractor variables from a
+    classification model can be used to bootstrap training of an object
+    detector. When loading from an object detection model, the checkpoint model
+    should have the same parameters as this detection model with exception of
+    the num_classes parameter.
+
+    Note that this function is intended to be used to restore Keras-based
+    models when running Tensorflow 2, whereas restore_map (above) is intended
+    to be used to restore Slim-based models when running Tensorflow 1.x.
+
+    TODO(jonathanhuang,rathodv): Check tf_version and raise unimplemented
+    error for both restore_map and restore_from_objects depending on version.
+
+    Args:
+      fine_tune_checkpoint_type: whether to restore from a full detection
+        checkpoint (with compatible variable names) or to restore from a
+        classification checkpoint for initialization prior to training.
+        Valid values: `detection`, `classification`. Default 'detection'.
+
+    Returns:
+      A dict mapping keys to Trackable objects (tf.Module or Checkpoint).
+    """
+    pass
+
   @abc.abstractmethod
   def updates(self):
     """Returns a list of update operators for this model.
 
@@ -57,6 +57,9 @@ def updates(self):
   def restore_map(self):
     return {}
 
+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
   def regularization_losses(self):
     return []
 
 
@@ -66,6 +66,11 @@ class InputDataFields(object):
     groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
     groundtruth_label_weights: groundtruth label weights.
     groundtruth_weights: groundtruth weight factor for bounding boxes.
+    groundtruth_dp_num_points: The number of DensePose sampled points for each
+      instance.
+    groundtruth_dp_part_ids: Part indices for DensePose points.
+    groundtruth_dp_surface_coords: Image locations and UV coordinates for
+      DensePose points.
     num_groundtruth_boxes: number of groundtruth boxes.
     is_annotated: whether an image has been labeled or not.
     true_image_shapes: true shapes of images in the resized images, as resized
@@ -108,6 +113,9 @@ class InputDataFields(object):
   groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
   groundtruth_label_weights = 'groundtruth_label_weights'
   groundtruth_weights = 'groundtruth_weights'
+  groundtruth_dp_num_points = 'groundtruth_dp_num_points'
+  groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
+  groundtruth_dp_surface_coords = 'groundtruth_dp_surface_coords'
   num_groundtruth_boxes = 'num_groundtruth_boxes'
   is_annotated = 'is_annotated'
   true_image_shape = 'true_image_shape'
 
@@ -30,6 +30,7 @@
 from object_detection.core import standard_fields as fields
 from object_detection.protos import input_reader_pb2
 from object_detection.utils import label_map_util
+from object_detection.utils import shape_utils
 
 # pylint: disable=g-import-not-at-top
 try:
@@ -170,7 +171,8 @@ def __init__(self,
                num_additional_channels=0,
                load_multiclass_scores=False,
                load_context_features=False,
-               expand_hierarchy_labels=False):
+               expand_hierarchy_labels=False,
+               load_dense_pose=False):
     """Constructor sets keys_to_features and items_to_handlers.
 
     Args:
@@ -201,6 +203,7 @@ def __init__(self,
         account the provided hierarchy in the label_map_proto_file. For positive
         classes, the labels are extended to ancestor. For negative classes,
         the labels are expanded to descendants.
+      load_dense_pose: Whether to load DensePose annotations.
 
     Raises:
       ValueError: If `instance_mask_type` option is not one of
@@ -371,6 +374,34 @@ def __init__(self,
                     self._decode_png_instance_masks))
       else:
         raise ValueError('Did not recognize the `instance_mask_type` option.')
+    if load_dense_pose:
+      self.keys_to_features['image/object/densepose/num'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/part_index'] = (
+          tf.VarLenFeature(tf.int64))
+      self.keys_to_features['image/object/densepose/x'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/y'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/u'] = (
+          tf.VarLenFeature(tf.float32))
+      self.keys_to_features['image/object/densepose/v'] = (
+          tf.VarLenFeature(tf.float32))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_num_points] = (
+              slim_example_decoder.Tensor('image/object/densepose/num'))
+      self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
+          slim_example_decoder.ItemHandlerCallback(
+              ['image/object/densepose/part_index',
+               'image/object/densepose/num'], self._dense_pose_part_indices))
+      self.items_to_handlers[
+          fields.InputDataFields.groundtruth_dp_surface_coords] = (
+              slim_example_decoder.ItemHandlerCallback(
+                  ['image/object/densepose/x', 'image/object/densepose/y',
+                   'image/object/densepose/u', 'image/object/densepose/v',
+                   'image/object/densepose/num'],
+                  self._dense_pose_surface_coordinates))
+
     if label_map_proto_file:
       # If the label_map_proto is provided, try to use it in conjunction with
       # the class text, and fall back to a materialized ID.
@@ -547,6 +578,14 @@ def expand_field(field_name):
       group_of = fields.InputDataFields.groundtruth_group_of
       tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
 
+    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
+      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
+          dtype=tf.int32)
+      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
+          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
+          dtype=tf.int32)
+
     return tensor_dict
 
   def _reshape_keypoints(self, keys_to_tensors):
@@ -697,6 +736,97 @@ def decode_png_mask(image_buffer):
         lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
         lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
 
+  def _dense_pose_part_indices(self, keys_to_tensors):
+    """Creates a tensor that contains part indices for each DensePose point.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 2-D int32 tensor of shape [num_instances, num_points] where each element
+      contains the DensePose part index (0-23). The value `num_points`
+      corresponds to the maximum number of sampled points across all instances
+      in the image. Note that instances with less sampled points will be padded
+      with zeros in the last dimension.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    part_index = keys_to_tensors['image/object/densepose/part_index']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(part_index, tf.SparseTensor):
+      part_index = tf.sparse_tensor_to_dense(part_index)
+    part_index = tf.cast(part_index, dtype=tf.int32)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+    def pad_parts_tensor(instance_ind):
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      part_inds = part_index[points_range_start:points_range_end]
+      return shape_utils.pad_or_clip_nd(part_inds,
+                                        output_shape=[max_points_per_instance])
+
+    return tf.map_fn(pad_parts_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.int32)
+
+  def _dense_pose_surface_coordinates(self, keys_to_tensors):
+    """Creates a tensor that contains surface coords for each DensePose point.
+
+    Args:
+      keys_to_tensors: a dictionary from keys to tensors.
+
+    Returns:
+      A 3-D float32 tensor of shape [num_instances, num_points, 4] where each
+      point contains (y, x, v, u) data for each sampled DensePose point. The
+      (y, x) coordinate has normalized image locations for the point, and (v, u)
+      contains the surface coordinate (also normalized) for the part. The value
+      `num_points` corresponds to the maximum number of sampled points across
+      all instances in the image. Note that instances with less sampled points
+      will be padded with zeros in dim=1.
+    """
+    num_points_per_instances = keys_to_tensors['image/object/densepose/num']
+    dp_y = keys_to_tensors['image/object/densepose/y']
+    dp_x = keys_to_tensors['image/object/densepose/x']
+    dp_v = keys_to_tensors['image/object/densepose/v']
+    dp_u = keys_to_tensors['image/object/densepose/u']
+    if isinstance(num_points_per_instances, tf.SparseTensor):
+      num_points_per_instances = tf.sparse_tensor_to_dense(
+          num_points_per_instances)
+    if isinstance(dp_y, tf.SparseTensor):
+      dp_y = tf.sparse_tensor_to_dense(dp_y)
+    if isinstance(dp_x, tf.SparseTensor):
+      dp_x = tf.sparse_tensor_to_dense(dp_x)
+    if isinstance(dp_v, tf.SparseTensor):
+      dp_v = tf.sparse_tensor_to_dense(dp_v)
+    if isinstance(dp_u, tf.SparseTensor):
+      dp_u = tf.sparse_tensor_to_dense(dp_u)
+    max_points_per_instance = tf.cast(
+        tf.math.reduce_max(num_points_per_instances), dtype=tf.int32)
+    num_points_cumulative = tf.concat([
+        [0], tf.math.cumsum(num_points_per_instances)], axis=0)
+
+    def pad_surface_coordinates_tensor(instance_ind):
+      """Pads DensePose surface coordinates for each instance."""
+      points_range_start = num_points_cumulative[instance_ind]
+      points_range_end = num_points_cumulative[instance_ind + 1]
+      y = dp_y[points_range_start:points_range_end]
+      x = dp_x[points_range_start:points_range_end]
+      v = dp_v[points_range_start:points_range_end]
+      u = dp_u[points_range_start:points_range_end]
+      # Create [num_points_i, 4] tensor, where num_points_i is the number of
+      # sampled points for instance i.
+      unpadded_tensor = tf.stack([y, x, v, u], axis=1)
+      return shape_utils.pad_or_clip_nd(
+          unpadded_tensor, output_shape=[max_points_per_instance, 4])
+
+    return tf.map_fn(pad_surface_coordinates_tensor,
+                     tf.range(tf.size(num_points_per_instances)),
+                     dtype=tf.float32)
+
   def _expand_image_label_hierarchy(self, image_classes, image_confidences):
     """Expand image level labels according to the hierarchy.
 
 
@@ -1096,8 +1096,8 @@ def graph_fn():
       return example_decoder.decode(tf.convert_to_tensor(example))
 
     tensor_dict = self.execute_cpu(graph_fn, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
+    self.assertNotIn(fields.InputDataFields.groundtruth_instance_masks,
+                     tensor_dict)
 
   def testDecodeImageLabels(self):
     image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
@@ -1116,8 +1116,7 @@ def graph_fn_1():
       return example_decoder.decode(tf.convert_to_tensor(example))
 
     tensor_dict = self.execute_cpu(graph_fn_1, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
     self.assertAllEqual(
         tensor_dict[fields.InputDataFields.groundtruth_image_classes],
         np.array([1, 2]))
@@ -1152,8 +1151,7 @@ def graph_fn_2():
       return example_decoder.decode(tf.convert_to_tensor(example))
 
     tensor_dict = self.execute_cpu(graph_fn_2, [])
-    self.assertTrue(
-        fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+    self.assertIn(fields.InputDataFields.groundtruth_image_classes, tensor_dict)
     self.assertAllEqual(
         tensor_dict[fields.InputDataFields.groundtruth_image_classes],
         np.array([1, 3]))
@@ -1345,6 +1343,93 @@ def graph_fn():
         expected_image_confidence,
         tensor_dict[fields.InputDataFields.groundtruth_image_confidences])
 
+  def testDecodeDensePose(self):
+    image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+    encoded_jpeg, _ = self._create_encoded_and_decoded_data(
+        image_tensor, 'jpeg')
+    bbox_ymins = [0.0, 4.0, 2.0]
+    bbox_xmins = [1.0, 5.0, 8.0]
+    bbox_ymaxs = [2.0, 6.0, 1.0]
+    bbox_xmaxs = [3.0, 7.0, 3.3]
+    densepose_num = [0, 4, 2]
+    densepose_part_index = [2, 2, 3, 4, 2, 9]
+    densepose_x = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+    densepose_y = [0.9, 0.8, 0.7, 0.6, 0.5, 0.4]
+    densepose_u = [0.01, 0.02, 0.03, 0.04, 0.05, 0.06]
+    densepose_v = [0.99, 0.98, 0.97, 0.96, 0.95, 0.94]
+
+    def graph_fn():
+      example = tf.train.Example(
+          features=tf.train.Features(
+              feature={
+                  'image/encoded':
+                      dataset_util.bytes_feature(encoded_jpeg),
+                  'image/format':
+                      dataset_util.bytes_feature(six.b('jpeg')),
+                  'image/object/bbox/ymin':
+                      dataset_util.float_list_feature(bbox_ymins),
+                  'image/object/bbox/xmin':
+                      dataset_util.float_list_feature(bbox_xmins),
+                  'image/object/bbox/ymax':
+                      dataset_util.float_list_feature(bbox_ymaxs),
+                  'image/object/bbox/xmax':
+                      dataset_util.float_list_feature(bbox_xmaxs),
+                  'image/object/densepose/num':
+                      dataset_util.int64_list_feature(densepose_num),
+                  'image/object/densepose/part_index':
+                      dataset_util.int64_list_feature(densepose_part_index),
+                  'image/object/densepose/x':
+                      dataset_util.float_list_feature(densepose_x),
+                  'image/object/densepose/y':
+                      dataset_util.float_list_feature(densepose_y),
+                  'image/object/densepose/u':
+                      dataset_util.float_list_feature(densepose_u),
+                  'image/object/densepose/v':
+                      dataset_util.float_list_feature(densepose_v),
+
+              })).SerializeToString()
+
+      example_decoder = tf_example_decoder.TfExampleDecoder(
+          load_dense_pose=True)
+      output = example_decoder.decode(tf.convert_to_tensor(example))
+      dp_num_points = output[fields.InputDataFields.groundtruth_dp_num_points]
+      dp_part_ids = output[fields.InputDataFields.groundtruth_dp_part_ids]
+      dp_surface_coords = output[
+          fields.InputDataFields.groundtruth_dp_surface_coords]
+      return dp_num_points, dp_part_ids, dp_surface_coords
+
+    dp_num_points, dp_part_ids, dp_surface_coords = self.execute_cpu(
+        graph_fn, [])
+
+    expected_dp_num_points = [0, 4, 2]
+    expected_dp_part_ids = [
+        [0, 0, 0, 0],
+        [2, 2, 3, 4],
+        [2, 9, 0, 0]
+    ]
+    expected_dp_surface_coords = np.array(
+        [
+            # Instance 0 (no points).
+            [[0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+            # Instance 1 (4 points).
+            [[0.9, 0.1, 0.99, 0.01],
+             [0.8, 0.2, 0.98, 0.02],
+             [0.7, 0.3, 0.97, 0.03],
+             [0.6, 0.4, 0.96, 0.04]],
+            # Instance 2 (2 points).
+            [[0.5, 0.5, 0.95, 0.05],
+             [0.4, 0.6, 0.94, 0.06],
+             [0., 0., 0., 0.],
+             [0., 0., 0., 0.]],
+        ], dtype=np.float32)
+
+    self.assertAllEqual(dp_num_points, expected_dp_num_points)
+    self.assertAllEqual(dp_part_ids, expected_dp_part_ids)
+    self.assertAllClose(dp_surface_coords, expected_dp_surface_coords)
+
 
 if __name__ == '__main__':
   tf.test.main()
@@ -67,6 +67,9 @@ def postprocess(self, prediction_dict, true_image_shapes):
   def restore_map(self, checkpoint_path, fine_tune_checkpoint_type):
     pass
 
+  def restore_from_objects(self, fine_tune_checkpoint_type):
+    pass
+
   def loss(self, prediction_dict, true_image_shapes):
     pass