|
30 | 30 | from object_detection.core import standard_fields as fields
|
31 | 31 | from object_detection.protos import input_reader_pb2
|
32 | 32 | from object_detection.utils import label_map_util
|
| 33 | +from object_detection.utils import shape_utils |
33 | 34 |
|
34 | 35 | # pylint: disable=g-import-not-at-top
|
35 | 36 | try:
|
@@ -170,7 +171,8 @@ def __init__(self,
|
170 | 171 | num_additional_channels=0,
|
171 | 172 | load_multiclass_scores=False,
|
172 | 173 | load_context_features=False,
|
173 |
| - expand_hierarchy_labels=False): |
| 174 | + expand_hierarchy_labels=False, |
| 175 | + load_dense_pose=False): |
174 | 176 | """Constructor sets keys_to_features and items_to_handlers.
|
175 | 177 |
|
176 | 178 | Args:
|
@@ -201,6 +203,7 @@ def __init__(self,
|
201 | 203 | account the provided hierarchy in the label_map_proto_file. For positive
|
202 | 204 | classes, the labels are extended to ancestor. For negative classes,
|
203 | 205 | the labels are expanded to descendants.
|
| 206 | + load_dense_pose: Whether to load DensePose annotations. |
204 | 207 |
|
205 | 208 | Raises:
|
206 | 209 | ValueError: If `instance_mask_type` option is not one of
|
@@ -371,6 +374,34 @@ def __init__(self,
|
371 | 374 | self._decode_png_instance_masks))
|
372 | 375 | else:
|
373 | 376 | raise ValueError('Did not recognize the `instance_mask_type` option.')
|
| 377 | + if load_dense_pose: |
| 378 | + self.keys_to_features['image/object/densepose/num'] = ( |
| 379 | + tf.VarLenFeature(tf.int64)) |
| 380 | + self.keys_to_features['image/object/densepose/part_index'] = ( |
| 381 | + tf.VarLenFeature(tf.int64)) |
| 382 | + self.keys_to_features['image/object/densepose/x'] = ( |
| 383 | + tf.VarLenFeature(tf.float32)) |
| 384 | + self.keys_to_features['image/object/densepose/y'] = ( |
| 385 | + tf.VarLenFeature(tf.float32)) |
| 386 | + self.keys_to_features['image/object/densepose/u'] = ( |
| 387 | + tf.VarLenFeature(tf.float32)) |
| 388 | + self.keys_to_features['image/object/densepose/v'] = ( |
| 389 | + tf.VarLenFeature(tf.float32)) |
| 390 | + self.items_to_handlers[ |
| 391 | + fields.InputDataFields.groundtruth_dp_num_points] = ( |
| 392 | + slim_example_decoder.Tensor('image/object/densepose/num')) |
| 393 | + self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = ( |
| 394 | + slim_example_decoder.ItemHandlerCallback( |
| 395 | + ['image/object/densepose/part_index', |
| 396 | + 'image/object/densepose/num'], self._dense_pose_part_indices)) |
| 397 | + self.items_to_handlers[ |
| 398 | + fields.InputDataFields.groundtruth_dp_surface_coords] = ( |
| 399 | + slim_example_decoder.ItemHandlerCallback( |
| 400 | + ['image/object/densepose/x', 'image/object/densepose/y', |
| 401 | + 'image/object/densepose/u', 'image/object/densepose/v', |
| 402 | + 'image/object/densepose/num'], |
| 403 | + self._dense_pose_surface_coordinates)) |
| 404 | + |
374 | 405 | if label_map_proto_file:
|
375 | 406 | # If the label_map_proto is provided, try to use it in conjunction with
|
376 | 407 | # the class text, and fall back to a materialized ID.
|
@@ -547,6 +578,14 @@ def expand_field(field_name):
|
547 | 578 | group_of = fields.InputDataFields.groundtruth_group_of
|
548 | 579 | tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)
|
549 | 580 |
|
| 581 | + if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict: |
| 582 | + tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast( |
| 583 | + tensor_dict[fields.InputDataFields.groundtruth_dp_num_points], |
| 584 | + dtype=tf.int32) |
| 585 | + tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast( |
| 586 | + tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids], |
| 587 | + dtype=tf.int32) |
| 588 | + |
550 | 589 | return tensor_dict
|
551 | 590 |
|
552 | 591 | def _reshape_keypoints(self, keys_to_tensors):
|
@@ -697,6 +736,97 @@ def decode_png_mask(image_buffer):
|
697 | 736 | lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
|
698 | 737 | lambda: tf.zeros(tf.cast(tf.stack([0, height, width]), dtype=tf.int32)))
|
699 | 738 |
|
| 739 | + def _dense_pose_part_indices(self, keys_to_tensors): |
| 740 | + """Creates a tensor that contains part indices for each DensePose point. |
| 741 | +
|
| 742 | + Args: |
| 743 | + keys_to_tensors: a dictionary from keys to tensors. |
| 744 | +
|
| 745 | + Returns: |
| 746 | + A 2-D int32 tensor of shape [num_instances, num_points] where each element |
| 747 | + contains the DensePose part index (0-23). The value `num_points` |
| 748 | + corresponds to the maximum number of sampled points across all instances |
| 749 | + in the image. Note that instances with less sampled points will be padded |
| 750 | + with zeros in the last dimension. |
| 751 | + """ |
| 752 | + num_points_per_instances = keys_to_tensors['image/object/densepose/num'] |
| 753 | + part_index = keys_to_tensors['image/object/densepose/part_index'] |
| 754 | + if isinstance(num_points_per_instances, tf.SparseTensor): |
| 755 | + num_points_per_instances = tf.sparse_tensor_to_dense( |
| 756 | + num_points_per_instances) |
| 757 | + if isinstance(part_index, tf.SparseTensor): |
| 758 | + part_index = tf.sparse_tensor_to_dense(part_index) |
| 759 | + part_index = tf.cast(part_index, dtype=tf.int32) |
| 760 | + max_points_per_instance = tf.cast( |
| 761 | + tf.math.reduce_max(num_points_per_instances), dtype=tf.int32) |
| 762 | + num_points_cumulative = tf.concat([ |
| 763 | + [0], tf.math.cumsum(num_points_per_instances)], axis=0) |
| 764 | + |
| 765 | + def pad_parts_tensor(instance_ind): |
| 766 | + points_range_start = num_points_cumulative[instance_ind] |
| 767 | + points_range_end = num_points_cumulative[instance_ind + 1] |
| 768 | + part_inds = part_index[points_range_start:points_range_end] |
| 769 | + return shape_utils.pad_or_clip_nd(part_inds, |
| 770 | + output_shape=[max_points_per_instance]) |
| 771 | + |
| 772 | + return tf.map_fn(pad_parts_tensor, |
| 773 | + tf.range(tf.size(num_points_per_instances)), |
| 774 | + dtype=tf.int32) |
| 775 | + |
| 776 | + def _dense_pose_surface_coordinates(self, keys_to_tensors): |
| 777 | + """Creates a tensor that contains surface coords for each DensePose point. |
| 778 | +
|
| 779 | + Args: |
| 780 | + keys_to_tensors: a dictionary from keys to tensors. |
| 781 | +
|
| 782 | + Returns: |
| 783 | + A 3-D float32 tensor of shape [num_instances, num_points, 4] where each |
| 784 | + point contains (y, x, v, u) data for each sampled DensePose point. The |
| 785 | + (y, x) coordinate has normalized image locations for the point, and (v, u) |
| 786 | + contains the surface coordinate (also normalized) for the part. The value |
| 787 | + `num_points` corresponds to the maximum number of sampled points across |
| 788 | + all instances in the image. Note that instances with less sampled points |
| 789 | + will be padded with zeros in dim=1. |
| 790 | + """ |
| 791 | + num_points_per_instances = keys_to_tensors['image/object/densepose/num'] |
| 792 | + dp_y = keys_to_tensors['image/object/densepose/y'] |
| 793 | + dp_x = keys_to_tensors['image/object/densepose/x'] |
| 794 | + dp_v = keys_to_tensors['image/object/densepose/v'] |
| 795 | + dp_u = keys_to_tensors['image/object/densepose/u'] |
| 796 | + if isinstance(num_points_per_instances, tf.SparseTensor): |
| 797 | + num_points_per_instances = tf.sparse_tensor_to_dense( |
| 798 | + num_points_per_instances) |
| 799 | + if isinstance(dp_y, tf.SparseTensor): |
| 800 | + dp_y = tf.sparse_tensor_to_dense(dp_y) |
| 801 | + if isinstance(dp_x, tf.SparseTensor): |
| 802 | + dp_x = tf.sparse_tensor_to_dense(dp_x) |
| 803 | + if isinstance(dp_v, tf.SparseTensor): |
| 804 | + dp_v = tf.sparse_tensor_to_dense(dp_v) |
| 805 | + if isinstance(dp_u, tf.SparseTensor): |
| 806 | + dp_u = tf.sparse_tensor_to_dense(dp_u) |
| 807 | + max_points_per_instance = tf.cast( |
| 808 | + tf.math.reduce_max(num_points_per_instances), dtype=tf.int32) |
| 809 | + num_points_cumulative = tf.concat([ |
| 810 | + [0], tf.math.cumsum(num_points_per_instances)], axis=0) |
| 811 | + |
| 812 | + def pad_surface_coordinates_tensor(instance_ind): |
| 813 | + """Pads DensePose surface coordinates for each instance.""" |
| 814 | + points_range_start = num_points_cumulative[instance_ind] |
| 815 | + points_range_end = num_points_cumulative[instance_ind + 1] |
| 816 | + y = dp_y[points_range_start:points_range_end] |
| 817 | + x = dp_x[points_range_start:points_range_end] |
| 818 | + v = dp_v[points_range_start:points_range_end] |
| 819 | + u = dp_u[points_range_start:points_range_end] |
| 820 | + # Create [num_points_i, 4] tensor, where num_points_i is the number of |
| 821 | + # sampled points for instance i. |
| 822 | + unpadded_tensor = tf.stack([y, x, v, u], axis=1) |
| 823 | + return shape_utils.pad_or_clip_nd( |
| 824 | + unpadded_tensor, output_shape=[max_points_per_instance, 4]) |
| 825 | + |
| 826 | + return tf.map_fn(pad_surface_coordinates_tensor, |
| 827 | + tf.range(tf.size(num_points_per_instances)), |
| 828 | + dtype=tf.float32) |
| 829 | + |
700 | 830 | def _expand_image_label_hierarchy(self, image_classes, image_confidences):
|
701 | 831 | """Expand image level labels according to the hierarchy.
|
702 | 832 |
|
|
0 commit comments