Skip to content

Commit e6328af

Browse files
authored
[Validate] Add 3D Cuboid Metrics as well as Metadata and Attribute filters to Metrics (#269)
1 parent 496d961 commit e6328af

22 files changed

+2403
-245
lines changed

.circleci/config.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,11 @@ jobs:
1717
- run:
1818
name: Install Environment Dependencies
1919
command: | # install dependencies
20-
apt-get -y install curl
21-
pip install --upgrade pip
20+
apt-get update
21+
apt-get -y install curl libgeos-dev
22+
pip install --upgrade pip
2223
pip install poetry
23-
poetry install
24+
poetry install -E shapely
2425
2526
- run:
2627
name: Black Formatting Check # Only validation, without re-formatting

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,15 @@ All notable changes to the [Nucleus Python Client](https://github.com/scaleapi/n
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [0.9.0](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.9.0) - 2022-04-07
8+
9+
### Added
10+
11+
- Validate metrics support metadata and field filtering on input annotation and predictions
12+
- 3D/Cuboid metrics: Recall, Precision, 3D IOU and birds eye 2D IOU```
13+
- Shapely can be used for metric development if the optional scale-nucleus[shapely] is installed
14+
- Full support for passing parameters to evaluation configurations
15+
716
## [0.8.4](https://github.com/scaleapi/nucleus-python-client/releases/tag/v0.8.4) - 2022-04-06
817
- Changing `camera_params` of dataset items can now be done through the dataset method `update_items_metadata`
918

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,3 +179,24 @@ cd docs
179179
sphinx-autobuild . ./_build/html --watch ../nucleus
180180
```
181181
`sphinx-autobuild` will spin up a server on localhost (port 8000 by default) that will watch for and automatically rebuild a version of the API reference based on your local docstring changes.
182+
183+
184+
## Custom Metrics using Shapely in scale-validate
185+
186+
Certain metrics use `shapely` which is added as an optional dependency.
187+
```bash
188+
pip install scale-nucleus[metrics]
189+
```
190+
191+
Note that you might need to install a local GEOS package since Shapely doesn't provide binaries bundled with GEOS for every platform.
192+
193+
```bash
194+
#Mac OS
195+
brew install geos
196+
# Ubuntu/Debian flavors
197+
apt-get install libgeos-dev
198+
```
199+
200+
To develop it locally use
201+
202+
`poetry install --extra shapely`

nucleus/annotation.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,7 @@ class SegmentationAnnotation(Annotation):
557557
annotations: List[Segment]
558558
reference_id: str
559559
annotation_id: Optional[str] = None
560+
# metadata: Optional[dict] = None # TODO(sc: 422637)
560561

561562
def __post_init__(self):
562563
if not self.mask_url:
@@ -574,6 +575,7 @@ def from_json(cls, payload: dict):
574575
],
575576
reference_id=payload[REFERENCE_ID_KEY],
576577
annotation_id=payload.get(ANNOTATION_ID_KEY, None),
578+
# metadata=payload.get(METADATA_KEY, None), # TODO(sc: 422637)
577579
)
578580

579581
def to_payload(self) -> dict:
@@ -582,6 +584,7 @@ def to_payload(self) -> dict:
582584
MASK_URL_KEY: self.mask_url,
583585
ANNOTATIONS_KEY: [ann.to_payload() for ann in self.annotations],
584586
ANNOTATION_ID_KEY: self.annotation_id,
587+
# METADATA_KEY: self.metadata, # TODO(sc: 422637)
585588
}
586589

587590
payload[REFERENCE_ID_KEY] = self.reference_id

nucleus/metrics/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
from .base import Metric, ScalarResult
22
from .categorization_metrics import CategorizationF1
3+
from .cuboid_metrics import CuboidIOU, CuboidPrecision, CuboidRecall
4+
from .filtering import (
5+
FieldFilter,
6+
ListOfOrAndFilters,
7+
MetadataFilter,
8+
apply_filters,
9+
)
310
from .polygon_metrics import (
411
PolygonAveragePrecision,
512
PolygonIOU,

nucleus/metrics/base.py

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
import sys
22
from abc import ABC, abstractmethod
33
from dataclasses import dataclass
4-
from typing import Iterable, List
4+
from typing import Iterable, List, Optional, Union
55

66
from nucleus.annotation import AnnotationList
7+
from nucleus.metrics.filtering import (
8+
ListOfAndFilters,
9+
ListOfOrAndFilters,
10+
apply_filters,
11+
)
712
from nucleus.prediction import PredictionList
813

914

@@ -86,12 +91,107 @@ def __call__(
8691
metric(annotations, predictions)
8792
"""
8893

94+
def __init__(
95+
self,
96+
annotation_filters: Optional[
97+
Union[ListOfOrAndFilters, ListOfAndFilters]
98+
] = None,
99+
prediction_filters: Optional[
100+
Union[ListOfOrAndFilters, ListOfAndFilters]
101+
] = None,
102+
):
103+
"""
104+
Args:
105+
annotation_filters: Filter predicates. Allowed formats are:
106+
ListOfAndFilters where each Filter forms a chain of AND predicates.
107+
or
108+
ListOfOrAndFilters where Filters are expressed in disjunctive normal form (DNF), like
109+
[[MetadataFilter("short_haired", "==", True), FieldFilter("label", "in", ["cat", "dog"]), ...].
110+
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
111+
each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
112+
(AND), forming a more selective `and` multiple field predicate.
113+
Finally, the most outer list combines these filters as a disjunction (OR).
114+
prediction_filters: Filter predicates. Allowed formats are:
115+
ListOfAndFilters where each Filter forms a chain of AND predicates.
116+
or
117+
ListOfOrAndFilters where Filters are expressed in disjunctive normal form (DNF), like
118+
[[MetadataFilter("short_haired", "==", True), FieldFilter("label", "in", ["cat", "dog"]), ...].
119+
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
120+
each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
121+
(AND), forming a more selective `and` multiple field predicate.
122+
Finally, the most outer list combines these filters as a disjunction (OR).
123+
"""
124+
self.annotation_filters = annotation_filters
125+
self.prediction_filters = prediction_filters
126+
89127
@abstractmethod
90-
def __call__(
128+
def call_metric(
91129
self, annotations: AnnotationList, predictions: PredictionList
92130
) -> MetricResult:
93131
"""A metric must override this method and return a metric result, given annotations and predictions."""
94132

133+
def __call__(
134+
self, annotations: AnnotationList, predictions: PredictionList
135+
) -> MetricResult:
136+
annotations = self._filter_annotations(annotations)
137+
predictions = self._filter_predictions(predictions)
138+
return self.call_metric(annotations, predictions)
139+
140+
def _filter_annotations(self, annotations: AnnotationList):
141+
if (
142+
self.annotation_filters is None
143+
or len(self.annotation_filters) == 0
144+
):
145+
return annotations
146+
annotations.box_annotations = apply_filters(
147+
annotations.box_annotations, self.annotation_filters
148+
)
149+
annotations.line_annotations = apply_filters(
150+
annotations.line_annotations, self.annotation_filters
151+
)
152+
annotations.polygon_annotations = apply_filters(
153+
annotations.polygon_annotations, self.annotation_filters
154+
)
155+
annotations.cuboid_annotations = apply_filters(
156+
annotations.cuboid_annotations, self.annotation_filters
157+
)
158+
annotations.category_annotations = apply_filters(
159+
annotations.category_annotations, self.annotation_filters
160+
)
161+
annotations.multi_category_annotations = apply_filters(
162+
annotations.multi_category_annotations, self.annotation_filters
163+
)
164+
annotations.segmentation_annotations = apply_filters(
165+
annotations.segmentation_annotations, self.annotation_filters
166+
)
167+
return annotations
168+
169+
def _filter_predictions(self, predictions: PredictionList):
170+
if (
171+
self.prediction_filters is None
172+
or len(self.prediction_filters) == 0
173+
):
174+
return predictions
175+
predictions.box_predictions = apply_filters(
176+
predictions.box_predictions, self.prediction_filters
177+
)
178+
predictions.line_predictions = apply_filters(
179+
predictions.line_predictions, self.prediction_filters
180+
)
181+
predictions.polygon_predictions = apply_filters(
182+
predictions.polygon_predictions, self.prediction_filters
183+
)
184+
predictions.cuboid_predictions = apply_filters(
185+
predictions.cuboid_predictions, self.prediction_filters
186+
)
187+
predictions.category_predictions = apply_filters(
188+
predictions.category_predictions, self.prediction_filters
189+
)
190+
predictions.segmentation_predictions = apply_filters(
191+
predictions.segmentation_predictions, self.prediction_filters
192+
)
193+
return predictions
194+
95195
@abstractmethod
96196
def aggregate_score(self, results: List[MetricResult]) -> ScalarResult:
97197
"""A metric must define how to aggregate results from single items to a single ScalarResult.

nucleus/metrics/categorization_metrics.py

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from abc import abstractmethod
22
from dataclasses import dataclass
3-
from typing import List, Set, Tuple, Union
3+
from typing import List, Optional, Set, Tuple, Union
44

55
from sklearn.metrics import f1_score
66

77
from nucleus.annotation import AnnotationList, CategoryAnnotation
88
from nucleus.metrics.base import Metric, MetricResult, ScalarResult
9+
from nucleus.metrics.filtering import ListOfAndFilters, ListOfOrAndFilters
910
from nucleus.metrics.filters import confidence_filter
1011
from nucleus.prediction import CategoryPrediction, PredictionList
1112

@@ -56,12 +57,37 @@ class CategorizationMetric(Metric):
5657
def __init__(
5758
self,
5859
confidence_threshold: float = 0.0,
60+
annotation_filters: Optional[
61+
Union[ListOfOrAndFilters, ListOfAndFilters]
62+
] = None,
63+
prediction_filters: Optional[
64+
Union[ListOfOrAndFilters, ListOfAndFilters]
65+
] = None,
5966
):
6067
"""Initializes CategorizationMetric abstract object.
6168
6269
Args:
6370
confidence_threshold: minimum confidence threshold for predictions to be taken into account for evaluation. Must be in [0, 1]. Default 0.0
71+
annotation_filters: Filter predicates. Allowed formats are:
72+
ListOfAndFilters where each Filter forms a chain of AND predicates.
73+
or
74+
ListOfOrAndFilters where Filters are expressed in disjunctive normal form (DNF), like
75+
[[MetadataFilter("short_haired", "==", True), FieldFilter("label", "in", ["cat", "dog"]), ...].
76+
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
77+
each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
78+
(AND), forming a more selective `and` multiple field predicate.
79+
Finally, the most outer list combines these filters as a disjunction (OR).
80+
prediction_filters: Filter predicates. Allowed formats are:
81+
ListOfAndFilters where each Filter forms a chain of AND predicates.
82+
or
83+
ListOfOrAndFilters where Filters are expressed in disjunctive normal form (DNF), like
84+
[[MetadataFilter("short_haired", "==", True), FieldFilter("label", "in", ["cat", "dog"]), ...].
85+
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
86+
each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
87+
(AND), forming a more selective `and` multiple field predicate.
88+
Finally, the most outer list combines these filters as a disjunction (OR).
6489
"""
90+
super().__init__(annotation_filters, prediction_filters)
6591
assert 0 <= confidence_threshold <= 1
6692
self.confidence_threshold = confidence_threshold
6793

@@ -83,7 +109,7 @@ def eval(
83109
def aggregate_score(self, results: List[CategorizationResult]) -> ScalarResult: # type: ignore[override]
84110
pass
85111

86-
def __call__(
112+
def call_metric(
87113
self, annotations: AnnotationList, predictions: PredictionList
88114
) -> CategorizationResult:
89115
if self.confidence_threshold > 0:
@@ -139,7 +165,15 @@ class CategorizationF1(CategorizationMetric):
139165
"""Evaluation method that matches categories and returns a CategorizationF1Result that aggregates to the F1 score"""
140166

141167
def __init__(
142-
self, confidence_threshold: float = 0.0, f1_method: str = "macro"
168+
self,
169+
confidence_threshold: float = 0.0,
170+
f1_method: str = "macro",
171+
annotation_filters: Optional[
172+
Union[ListOfOrAndFilters, ListOfAndFilters]
173+
] = None,
174+
prediction_filters: Optional[
175+
Union[ListOfOrAndFilters, ListOfAndFilters]
176+
] = None,
143177
):
144178
"""
145179
Args:
@@ -169,8 +203,28 @@ def __init__(
169203
Calculate metrics for each instance, and find their average (only
170204
meaningful for multilabel classification where this differs from
171205
:func:`accuracy_score`).
206+
annotation_filters: Filter predicates. Allowed formats are:
207+
ListOfAndFilters where each Filter forms a chain of AND predicates.
208+
or
209+
ListOfOrAndFilters where Filters are expressed in disjunctive normal form (DNF), like
210+
[[MetadataFilter("short_haired", "==", True), FieldFilter("label", "in", ["cat", "dog"]), ...].
211+
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
212+
each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
213+
(AND), forming a more selective `and` multiple field predicate.
214+
Finally, the most outer list combines these filters as a disjunction (OR).
215+
prediction_filters: Filter predicates. Allowed formats are:
216+
ListOfAndFilters where each Filter forms a chain of AND predicates.
217+
or
218+
ListOfOrAndFilters where Filters are expressed in disjunctive normal form (DNF), like
219+
[[MetadataFilter("short_haired", "==", True), FieldFilter("label", "in", ["cat", "dog"]), ...].
220+
DNF allows arbitrary boolean logical combinations of single field predicates. The innermost structures
221+
each describe a single column predicate. The list of inner predicates is interpreted as a conjunction
222+
(AND), forming a more selective `and` multiple field predicate.
223+
Finally, the most outer list combines these filters as a disjunction (OR).
172224
"""
173-
super().__init__(confidence_threshold)
225+
super().__init__(
226+
confidence_threshold, annotation_filters, prediction_filters
227+
)
174228
assert (
175229
f1_method in F1_METHODS
176230
), f"Invalid f1_method {f1_method}, expected one of {F1_METHODS}"

0 commit comments

Comments
 (0)