taosdata · zitsen · May 7, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
@@ -388,11 +388,8 @@ def _validate_params(parsed_input):
     has_threshold, top_n = _validate_result_constraints(result_obj, algo_type)
     source_arr, source_ts_window = _parse_source_data(parsed_input["source_data"])
 
-    exclude_contained = _validate_bool_field(result_obj, "exclude_contained")
-    if algo_type != "dtw" and "exclude_contained" in result_obj:
-        raise ValueError('"result.exclude_contained" can only be set for dtw algorithm')
-
     exclude_source = _validate_bool_field(result_obj, "exclude_source")
+    exclude_overlap = _validate_bool_field(result_obj, "exclude_overlap")
 
     min_window, max_window = _validate_min_max_window(
         algo_params.get("min_window", None),
@@ -434,8 +431,8 @@ def _validate_params(parsed_input):
         "max_window": max_window,
         "window_size_step": window_size_step,
         "window_sliding_step": window_sliding_step,
-        "exclude_contained": exclude_contained,
         "exclude_source": exclude_source,
+        "exclude_overlap": exclude_overlap,
         "is_profile_list": is_profile_list
     }
 
@@ -461,42 +458,44 @@ def _validate_possible_candidates(source_arr, data_list_size, min_window, max_wi
             )
 
 
-def _is_interval_contained(inner_window, outer_window):
-    """Return whether ``outer_window`` strictly contains ``inner_window``.
+def _is_interval_overlapping(window_a, window_b):
+    """Return whether ``window_a`` and ``window_b`` overlap.
 
-    "Strict" means ``outer_window`` fully covers ``inner_window`` and the two
-    windows are not identical. At least one outer bound must extend beyond the
-    corresponding inner bound, so equal start/end bounds do not count as
-    containment.
+    Endpoint-touching multi-point windows (e.g. [1,5] and [5,8]) are treated as
+    adjacent and not overlapping.  Two single-point windows [t,t] are considered
+    overlapping when they share the same timestamp.
     """
-    return (outer_window[0] <= inner_window[0]
-            and outer_window[1] >= inner_window[1]
-            and (outer_window[0] < inner_window[0] or outer_window[1] > inner_window[1]))
+    # Single-point windows with identical timestamps are the same point and overlap.
+    if window_a[0] == window_a[1] and window_b[0] == window_b[1]:
+        return window_a[0] == window_b[0]
+    return window_a[0] < window_b[1] and window_b[0] < window_a[1]
+
 
+def _filter_exclude_overlap(matches, limit=None):
+    """Greedily keep matches whose ts_window does not overlap with any already-kept match.
 
-def _filter_exclude_contained(matches, limit=None):
+    matches must be sorted best-first. For each candidate, it is discarded if its
+    ts_window overlaps with an already-kept match's ts_window (adjacent windows
+    sharing only an endpoint are not considered overlapping).
+    """
     if len(matches) <= 1:
         return matches
 
-    # matches are expected to be sorted by criteria ascending (best first for DTW).
-    # We greedily keep each match unless it is in a strict containment relationship
-    # (either direction) with an already-kept match.  Because we process best-first,
-    # every already-kept match has a better (or equal) criteria value, so the current
-    # match is always the worse one in any containment pair and should be discarded.
     kept = []  # list of (ts_window, original_index)
 
     for idx, match in enumerate(matches):
         ts_window = match.get("ts_window")
         if not isinstance(ts_window, (list, tuple)) or len(ts_window) != 2:
             raise ValueError(f'matches[{idx}].ts_window must be a [start_ts, end_ts] pair')
+        if ts_window[0] > ts_window[1]:
+            raise ValueError(f'matches[{idx}].ts_window must satisfy start_ts <= end_ts')
 
-        in_containment = any(
-            _is_interval_contained(ts_window, k_window)
-            or _is_interval_contained(k_window, ts_window)
+        has_overlap = any(
+            _is_interval_overlapping(ts_window, k_window)
             for k_window, _ in kept
         )
 
-        if not in_containment:
+        if not has_overlap:
             kept.append((ts_window, idx))
 
             if limit is not None and len(kept) >= limit:
@@ -506,9 +505,9 @@ def _filter_exclude_contained(matches, limit=None):
     return [m for i, m in enumerate(matches) if i in kept_indices]
 
 
-# When exclude_contained is active, the heap is oversampled by this factor so
-# that containment filtering still yields target_rows results in most cases.
-_CONTAINMENT_OVERSAMPLE = 8
+# When exclusion filters are active, the heap is oversampled by this factor so
+# that filtering still yields target_rows results in most cases.
+_EXCLUSION_OVERSAMPLE = 8
 
 def _heap_key(algo_type, criteria_val, seq_idx):
     # Higher heap key means a better candidate after normalization of the metric:
@@ -536,14 +535,14 @@ def do_profile_search_impl(req_json):
     max_window = parsed["max_window"]
     window_size_step = parsed["window_size_step"]
     window_sliding_step = parsed["window_sliding_step"]
-    exclude_contained = parsed["exclude_contained"]
     exclude_source = parsed["exclude_source"]
+    exclude_overlap = parsed["exclude_overlap"]
 
     source_norm = _normalize_series(source_arr, norm_type)
     metric_type = "dtw_distance" if algo_type == "dtw" else "cosine_similarity"
     threshold = float(result_obj["threshold"]) if has_threshold else None
     target_rows = ProfileSearchLimits.MAX_PROFILE_SEARCH_RESULTS if top_n is None else top_n
-    need_exclusion_filter = (algo_type == "dtw" and exclude_contained)
+    need_exclusion_filter = exclude_overlap
 
     def _build_candidates():
         if parsed["is_profile_list"]:
@@ -558,10 +557,10 @@ def _build_candidates():
         )
 
     # Score all candidates once.
-    # - Without exclude_contained: stream results directly into a fixed-size heap,
+    # - Without exclude_overlap: stream results directly into a fixed-size heap,
     #   discarding weaker candidates on the fly.  No retry is needed so there is no
     #   reason to accumulate a separate all_passed list.
-    # - With exclude_contained: every passing result is saved in all_passed so that
+    # - With exclude_overlap: every passing result is saved in all_passed so that
     #   the retry loop can rebuild the heap with a larger limit without recomputing
     #   any distances.
     all_passed = [] if need_exclusion_filter else None
@@ -611,9 +610,9 @@ def _build_candidates():
             top_heap.sort(key=lambda x: (-x[2]["criteria"], x[1]))
         matches = [x[2] for x in top_heap]
     else:
-        # exclude_contained is active: rebuild the heap from all_passed with a
-        # progressively larger heap_limit until containment filtering yields enough results.
-        oversample = _CONTAINMENT_OVERSAMPLE
+        # Exclusion filters are active: rebuild the heap from all_passed with a
+        # progressively larger heap_limit until filtering yields enough results.
+        oversample = _EXCLUSION_OVERSAMPLE
         matches = []
         total_passed = len(all_passed)
 
@@ -628,21 +627,20 @@ def _build_candidates():
                 elif key > top_heap[0][0]:
                     heapq.heapreplace(top_heap, heap_item)
 
-            if algo_type != "dtw":
-                raise RuntimeError('exclude_contained logic requires algo_type to be "dtw"')
-
-            top_heap.sort(key=lambda x: (x[2]["criteria"], x[1]))
+            if algo_type == "dtw":
+                top_heap.sort(key=lambda x: (x[2]["criteria"], x[1]))
+            else:
+                top_heap.sort(key=lambda x: (-x[2]["criteria"], x[1]))
 
             matches = [x[2] for x in top_heap]
 
-            matches = _filter_exclude_contained(matches, limit=target_rows)
-            matches = matches[:target_rows]
+            matches = _filter_exclude_overlap(matches, limit=target_rows)
 
             # Got enough results, or all passing candidates already fit in the heap.
             if len(matches) >= target_rows or total_passed <= heap_limit:
                 break
 
-            # The heap was saturated and containment filtering removed too many entries.
+            # The heap was saturated and filtering removed too many entries.
             # Double the oversample factor and rebuild from the cached scored list.
             oversample *= 2
 

@@ -310,8 +310,8 @@ def do_profile_search(request, api_version):
     - Or return all profiles with distance below the threshold when using dtw.
     - Or return all profiles with similarity above the threshold when using cosine similarity.
     - "num" and "threshold" cannot be set at the same time.
-    - "exclude_contained" is only applicable for dtw and means whether to exclude the worse matched profile in a strict-containment pair, keeping the better one (the match with the smaller distance). For example, if there are two matched profiles with ts window [1, 5] and [2, 4], and one strictly contains the other, the worse match will be excluded if "exclude_contained" is set to true. 
     - "exclude_source" is applicable for all algorithms and means whether to exclude the matched profile that contains the source profile. For example, if the source profile has ts window [2, 4], the matched profile with ts window [2, 4] will be excluded if "exclude_source" is set to true.
+    - "exclude_overlap" is applicable for all algorithms and means whether to exclude any matched profile that overlaps with a better-ranked result. For example, if there are two matched profiles with ts window [1, 5] and [4, 6], the profile [4, 6] will be excluded if "exclude_overlap" is set to true. Endpoint-touching windows are treated as adjacent/non-overlapping, so windows such as [1, 5] and [5, 9] are not excluded by "exclude_overlap".    
     - Threshold-based results are capped at 500 matches.
     target_data.ts may be either:
     - a unix timestamp list, such as [1, 2, 3, 4, 5, 6]
@@ -330,8 +330,8 @@ def do_profile_search(request, api_version):
         },
         "result": {
             "num": 3,
-            "exclude_contained": true,
-            "exclude_source": true
+            "exclude_source": true,
+            "exclude_overlap": true
         },
         "source_data": {
             "ts": [1000, 2000, 3000, 4000, 5000],