yangbo93
diff --git a/‎research/object_detection/core/standard_fields.py
Lines changed: 5 additions & 0 deletions b/‎research/object_detection/core/standard_fields.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎research/object_detection/eval_util_test.py
Lines changed: 8 additions & 1 deletion b/‎research/object_detection/eval_util_test.py
Lines changed: 8 additions & 1 deletion
diff --git a/‎research/object_detection/metrics/coco_evaluation.py
Lines changed: 12 additions & 6 deletions b/‎research/object_detection/metrics/coco_evaluation.py
Lines changed: 12 additions & 6 deletions
diff --git a/‎research/object_detection/metrics/coco_evaluation_test.py
Lines changed: 15 additions & 0 deletions b/‎research/object_detection/metrics/coco_evaluation_test.py
Lines changed: 15 additions & 0 deletions
@@ -70,6 +70,9 @@ class InputDataFields(object):
     groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
     groundtruth_keypoint_weights: groundtruth weight factor for keypoints.
     groundtruth_label_weights: groundtruth label weights.
+    groundtruth_verified_negative_classes: groundtruth verified negative classes
+    groundtruth_not_exhaustive_classes: groundtruth not-exhaustively labeled
+      classes.
     groundtruth_weights: groundtruth weight factor for bounding boxes.
     groundtruth_dp_num_points: The number of DensePose sampled points for each
       instance.
@@ -120,6 +123,8 @@ class InputDataFields(object):
   groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
   groundtruth_keypoint_weights = 'groundtruth_keypoint_weights'
   groundtruth_label_weights = 'groundtruth_label_weights'
+  groundtruth_verified_neg_classes = 'groundtruth_verified_neg_classes'
+  groundtruth_not_exhaustive_classes = 'groundtruth_not_exhaustive_classes'
   groundtruth_weights = 'groundtruth_weights'
   groundtruth_dp_num_points = 'groundtruth_dp_num_points'
   groundtruth_dp_part_ids = 'groundtruth_dp_part_ids'
 
@@ -85,6 +85,8 @@ def _make_evaluation_dict(self,
     groundtruth_boxes = tf.constant([[0., 0., 1., 1.]])
     groundtruth_classes = tf.constant([1])
     groundtruth_instance_masks = tf.ones(shape=[1, 20, 20], dtype=tf.uint8)
+    original_image_spatial_shapes = tf.constant([[20, 20]], dtype=tf.int32)
+
     groundtruth_keypoints = tf.constant([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]])
     if resized_groundtruth_masks:
       groundtruth_instance_masks = tf.ones(shape=[1, 10, 10], dtype=tf.uint8)
@@ -100,6 +102,8 @@ def _make_evaluation_dict(self,
       groundtruth_keypoints = tf.tile(
           tf.expand_dims(groundtruth_keypoints, 0),
           multiples=[batch_size, 1, 1])
+      original_image_spatial_shapes = tf.tile(original_image_spatial_shapes,
+                                              multiples=[batch_size, 1])
 
     detections = {
         detection_fields.detection_boxes: detection_boxes,
@@ -112,7 +116,10 @@ def _make_evaluation_dict(self,
         input_data_fields.groundtruth_boxes: groundtruth_boxes,
         input_data_fields.groundtruth_classes: groundtruth_classes,
         input_data_fields.groundtruth_keypoints: groundtruth_keypoints,
-        input_data_fields.groundtruth_instance_masks: groundtruth_instance_masks
+        input_data_fields.groundtruth_instance_masks:
+            groundtruth_instance_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shapes
     }
     if batch_size > 1:
       return eval_util.result_dict_for_batched_example(
 
@@ -1191,40 +1191,46 @@ def update_op(image_id_batched, groundtruth_boxes_batched,
                   groundtruth_instance_masks_batched,
                   groundtruth_is_crowd_batched, num_gt_boxes_per_image,
                   detection_scores_batched, detection_classes_batched,
-                  detection_masks_batched, num_det_boxes_per_image):
+                  detection_masks_batched, num_det_boxes_per_image,
+                  original_image_spatial_shape):
       """Update op for metrics."""
 
       for (image_id, groundtruth_boxes, groundtruth_classes,
            groundtruth_instance_masks, groundtruth_is_crowd, num_gt_box,
            detection_scores, detection_classes,
-           detection_masks, num_det_box) in zip(
+           detection_masks, num_det_box, original_image_shape) in zip(
                image_id_batched, groundtruth_boxes_batched,
                groundtruth_classes_batched, groundtruth_instance_masks_batched,
                groundtruth_is_crowd_batched, num_gt_boxes_per_image,
                detection_scores_batched, detection_classes_batched,
-               detection_masks_batched, num_det_boxes_per_image):
+               detection_masks_batched, num_det_boxes_per_image,
+               original_image_spatial_shape):
         self.add_single_ground_truth_image_info(
             image_id, {
                 'groundtruth_boxes':
                     groundtruth_boxes[:num_gt_box],
                 'groundtruth_classes':
                     groundtruth_classes[:num_gt_box],
                 'groundtruth_instance_masks':
-                    groundtruth_instance_masks[:num_gt_box],
+                    groundtruth_instance_masks[:num_gt_box][
+                        :original_image_shape[0], :original_image_shape[1]],
                 'groundtruth_is_crowd':
                     groundtruth_is_crowd[:num_gt_box]
             })
         self.add_single_detected_image_info(
             image_id, {
                 'detection_scores': detection_scores[:num_det_box],
                 'detection_classes': detection_classes[:num_det_box],
-                'detection_masks': detection_masks[:num_det_box]
+                'detection_masks': detection_masks[:num_det_box][
+                    :original_image_shape[0], :original_image_shape[1]]
             })
 
     # Unpack items from the evaluation dictionary.
     input_data_fields = standard_fields.InputDataFields
     detection_fields = standard_fields.DetectionResultFields
     image_id = eval_dict[input_data_fields.key]
+    original_image_spatial_shape = eval_dict[
+        input_data_fields.original_image_spatial_shape]
     groundtruth_boxes = eval_dict[input_data_fields.groundtruth_boxes]
     groundtruth_classes = eval_dict[input_data_fields.groundtruth_classes]
     groundtruth_instance_masks = eval_dict[
@@ -1276,7 +1282,7 @@ def update_op(image_id_batched, groundtruth_boxes_batched,
         image_id, groundtruth_boxes, groundtruth_classes,
         groundtruth_instance_masks, groundtruth_is_crowd,
         num_gt_boxes_per_image, detection_scores, detection_classes,
-        detection_masks, num_det_boxes_per_image
+        detection_masks, num_det_boxes_per_image, original_image_spatial_shape
     ], [])
 
   def get_estimator_eval_metric_ops(self, eval_dict):
 
@@ -1601,6 +1601,7 @@ def testAddEvalDict(self):
     groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
     groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
     groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
     detection_scores = tf.placeholder(tf.float32, shape=(None))
     detection_classes = tf.placeholder(tf.float32, shape=(None))
     detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
@@ -1612,6 +1613,8 @@ def testAddEvalDict(self):
         input_data_fields.groundtruth_boxes: groundtruth_boxes,
         input_data_fields.groundtruth_classes: groundtruth_classes,
         input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shape,
         detection_fields.detection_scores: detection_scores,
         detection_fields.detection_classes: detection_classes,
         detection_fields.detection_masks: detection_masks,
@@ -1637,6 +1640,7 @@ def testAddEvalDict(self):
                           np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
                           mode='constant')
                   ]),
+              original_image_spatial_shape: np.array([[120, 120]]),
               detection_scores:
                   np.array([.9, .8]),
               detection_classes:
@@ -1661,6 +1665,7 @@ def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
     groundtruth_boxes = tf.placeholder(tf.float32, shape=(None, 4))
     groundtruth_classes = tf.placeholder(tf.float32, shape=(None))
     groundtruth_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
+    original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
     detection_scores = tf.placeholder(tf.float32, shape=(None))
     detection_classes = tf.placeholder(tf.float32, shape=(None))
     detection_masks = tf.placeholder(tf.uint8, shape=(None, None, None))
@@ -1672,6 +1677,8 @@ def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
         input_data_fields.groundtruth_boxes: groundtruth_boxes,
         input_data_fields.groundtruth_classes: groundtruth_classes,
         input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shape,
         detection_fields.detection_scores: detection_scores,
         detection_fields.detection_classes: detection_classes,
         detection_fields.detection_masks: detection_masks,
@@ -1701,6 +1708,7 @@ def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
                           np.ones([50, 50], dtype=np.uint8), ((0, 70), (0, 70)),
                           mode='constant')
                   ]),
+              original_image_spatial_shape: np.array([[120, 120], [120, 120]]),
               detection_scores:
                   np.array([.9, .8]),
               detection_classes:
@@ -1725,6 +1733,7 @@ def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
                                                      dtype=np.uint8),
                                              ((0, 0), (10, 10), (10, 10)),
                                              mode='constant'),
+                   original_image_spatial_shape: np.array([[70, 70]]),
                    detection_scores: np.array([.8]),
                    detection_classes: np.array([1]),
                    detection_masks: np.pad(np.ones([1, 50, 50], dtype=np.uint8),
@@ -1740,6 +1749,7 @@ def testGetOneMAPWithMatchingGroundtruthAndDetections(self):
                                                      dtype=np.uint8),
                                              ((0, 0), (10, 10), (10, 10)),
                                              mode='constant'),
+                   original_image_spatial_shape: np.array([[45, 45]]),
                    detection_scores: np.array([.8]),
                    detection_classes: np.array([1]),
                    detection_masks: np.pad(np.ones([1, 25, 25],
@@ -1778,6 +1788,7 @@ def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
     groundtruth_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
     groundtruth_masks = tf.placeholder(
         tf.uint8, shape=(batch_size, None, None, None))
+    original_image_spatial_shape = tf.placeholder(tf.int32, shape=(None, 2))
     detection_scores = tf.placeholder(tf.float32, shape=(batch_size, None))
     detection_classes = tf.placeholder(tf.float32, shape=(batch_size, None))
     detection_masks = tf.placeholder(
@@ -1790,6 +1801,8 @@ def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
         input_data_fields.groundtruth_boxes: groundtruth_boxes,
         input_data_fields.groundtruth_classes: groundtruth_classes,
         input_data_fields.groundtruth_instance_masks: groundtruth_masks,
+        input_data_fields.original_image_spatial_shape:
+            original_image_spatial_shape,
         detection_fields.detection_scores: detection_scores,
         detection_fields.detection_classes: detection_classes,
         detection_fields.detection_masks: detection_masks,
@@ -1826,6 +1839,8 @@ def testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched(self):
                           mode='constant')
                   ],
                            axis=0),
+              original_image_spatial_shape: np.array(
+                  [[100, 100], [100, 100], [100, 100]]),
               detection_scores:
                   np.array([[.8], [.8], [.8]]),
               detection_classes: