From 157e98bd676e6446744cda4d5a244fd0eabb9567 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 09:16:58 -0500
Subject: [PATCH 01/23] agentgrep(feat[cli]): Reintroduce search subparser with
 ranking flags (#17)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: search returns with genuine differentiation from grep —
rapidfuzz relevance ranking, near-duplicate collapsing, and
session grouping.

what:
- Add SearchArgs with threshold, no_group, no_rank fields
- Register search subparser with ranking-specific flags
- Add SEARCH_DESCRIPTION and main() dispatch
- Add parse tests
---
 src/agentgrep/__init__.py   |  21 ++++
 src/agentgrep/cli/parser.py | 164 ++++++++++++++++++++++++-
 src/agentgrep/cli/render.py |  16 ++-
 tests/test_cli_search.py    | 235 ++++++++++++++++++++++++++++++++++++
 4 files changed, 434 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_cli_search.py

diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py
index 1965f8d..b87962d 100644
--- a/src/agentgrep/__init__.py
+++ b/src/agentgrep/__init__.py
@@ -239,6 +239,23 @@ def build_description(
         ),
     ),
 )
+SEARCH_DESCRIPTION = build_description(
+    """
+    Smart search with relevance ranking, deduplication, and session grouping.
+    Uses rapidfuzz for scoring — results sorted by match quality.
+    """,
+    (
+        (
+            None,
+            (
+                "agentgrep search streaming parser",
+                "agentgrep search --threshold 70 migration",
+                "agentgrep search --no-rank --no-group caching",
+                "agentgrep search bliss --json",
+            ),
+        ),
+    ),
+)
 GREP_DESCRIPTION = build_description(
     """
     Content search across normalized records with rg/ag-shaped flags.
@@ -3746,6 +3763,8 @@ def main(argv: cabc.Sequence[str] | None = None) -> int:
             return 0
         if isinstance(parsed, GrepArgs):
             return run_grep_command(parsed)
+        if isinstance(parsed, SearchArgs):
+            return run_search_command(parsed)
         if isinstance(parsed, FuzzyArgs):
             return run_fuzzy_command(parsed)
         if isinstance(parsed, UIArgs):
@@ -3771,6 +3790,7 @@ def main(argv: cabc.Sequence[str] | None = None) -> int:
     GrepArgs,
     ParserBundle,
     PatternMode,
+    SearchArgs,
     UIArgs,
     add_common_agent_options,
     add_output_mode_options,
@@ -3794,6 +3814,7 @@ def main(argv: cabc.Sequence[str] | None = None) -> int:
     run_find_command,
     run_fuzzy_command,
     run_grep_command,
+    run_search_command,
     run_ui_command,
     serialize_find_record,
     serialize_grep_record,
diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py
index e0f96b8..9f4f187 100644
--- a/src/agentgrep/cli/parser.py
+++ b/src/agentgrep/cli/parser.py
@@ -27,6 +27,7 @@
     FIND_DESCRIPTION,
     FUZZY_DESCRIPTION,
     GREP_DESCRIPTION,
+    SEARCH_DESCRIPTION,
     UI_DESCRIPTION,
     AgentName,
     ColorMode,
@@ -58,6 +59,7 @@
     "GrepArgs",
     "ParserBundle",
     "PatternMode",
+    "SearchArgs",
     "UIArgs",
     "add_common_agent_options",
     "add_output_mode_options",
@@ -171,6 +173,32 @@ class GrepArgs:
     raw_query: str = ""
 
 
+@dataclasses.dataclass(slots=True)
+class SearchArgs:
+    """Typed arguments for ``agentgrep search``.
+
+    Differentiates from ``grep`` by applying rapidfuzz relevance scoring,
+    near-duplicate collapsing (WRatio > 90), and session grouping to
+    produce a best-first result set.
+    """
+
+    terms: tuple[str, ...]
+    agents: tuple[AgentName, ...]
+    search_type: SearchType
+    any_term: bool
+    regex: bool
+    case_sensitive: bool
+    limit: int | None
+    output_mode: OutputMode
+    color_mode: ColorMode
+    progress_mode: ProgressMode
+    threshold: int = 0
+    no_group: bool = False
+    no_rank: bool = False
+    compiled: CompiledQuery | None = None
+    raw_query: str = ""
+
+
 @dataclasses.dataclass(slots=True)
 class ParserBundle:
     """CLI parsers used for root and subcommand help."""
@@ -179,6 +207,7 @@ class ParserBundle:
     find_parser: argparse.ArgumentParser
     grep_parser: argparse.ArgumentParser
     fuzzy_parser: argparse.ArgumentParser
+    search_parser: argparse.ArgumentParser
 
 
 def normalize_color_mode(argv: cabc.Sequence[str] | None) -> ColorMode:
@@ -636,11 +665,87 @@ def create_parser(
     )
     add_output_mode_options(fuzzy_parser, allow_ui=True)
 
+    search_parser = subparsers.add_parser(
+        "search",
+        help="Smart search with relevance ranking and deduplication",
+        description=SEARCH_DESCRIPTION,
+        formatter_class=formatter_class,
+        color=color_mode != "never",
+    )
+    add_common_agent_options(search_parser)
+    _ = search_parser.add_argument(
+        "terms",
+        nargs="*",
+        metavar="TERM",
+        help="Search terms (combined as AND by default)",
+    )
+    _ = search_parser.add_argument(
+        "--type",
+        choices=["prompts", "history", "all"],
+        default="prompts",
+        dest="search_type",
+        help="Record type to search (default: prompts)",
+    )
+    _ = search_parser.add_argument(
+        "--any",
+        action="store_true",
+        dest="any_term",
+        help="OR mode — match any term instead of all",
+    )
+    _ = search_parser.add_argument(
+        "--regex",
+        action="store_true",
+        help="Treat terms as regex patterns",
+    )
+    _ = search_parser.add_argument(
+        "--case-sensitive",
+        action="store_true",
+        help="Force case-sensitive matching",
+    )
+    _ = search_parser.add_argument(
+        "--limit",
+        type=int,
+        metavar="N",
+        help="Limit the number of results after ranking",
+    )
+    _ = search_parser.add_argument(
+        "--threshold",
+        type=int,
+        default=0,
+        metavar="N",
+        help="Minimum fuzzy score 0-100 (default: 0 = show all matches)",
+    )
+    _ = search_parser.add_argument(
+        "--no-group",
+        action="store_true",
+        help="Flat results, no session grouping",
+    )
+    _ = search_parser.add_argument(
+        "--no-rank",
+        action="store_true",
+        help="Discovery order, no relevance scoring",
+    )
+    _ = search_parser.add_argument(
+        "--progress",
+        choices=["auto", "always", "never"],
+        default="auto",
+        help="Show search progress on stderr",
+    )
+    _ = search_parser.add_argument(
+        "--no-progress",
+        dest="progress",
+        action="store_const",
+        const="never",
+        help="Silence the stderr progress spinner (alias for --progress=never)",
+    )
+    add_output_mode_options(search_parser, allow_ui=True)
+
     return ParserBundle(
         parser=parser,
         find_parser=find_parser,
         grep_parser=grep_parser,
         fuzzy_parser=fuzzy_parser,
+        search_parser=search_parser,
     )
 
 
@@ -791,7 +896,7 @@ def _check_for_mangled_field_predicate(
 
 def parse_args(
     argv: cabc.Sequence[str] | None = None,
-) -> FindArgs | UIArgs | GrepArgs | FuzzyArgs | None:
+) -> FindArgs | UIArgs | GrepArgs | FuzzyArgs | SearchArgs | None:
     """Parse CLI arguments into typed dataclasses."""
     color_mode = normalize_color_mode(argv)
     effective_argv = list(argv) if argv is not None else list(sys.argv[1:])
@@ -827,6 +932,15 @@ def parse_args(
             bundle=bundle,
         )
 
+    if command == "search":
+        return _build_search_args(
+            namespace,
+            agents=agents,
+            output_mode=output_mode,
+            color_mode=color_mode,
+            bundle=bundle,
+        )
+
     if command == "fuzzy":
         return _build_fuzzy_args(
             namespace,
@@ -1008,6 +1122,54 @@ def _build_grep_args(
     )
 
 
+def _build_search_args(
+    namespace: argparse.Namespace,
+    *,
+    agents: tuple[AgentName, ...],
+    output_mode: OutputMode,
+    color_mode: ColorMode,
+    bundle: ParserBundle,
+) -> SearchArgs:
+    """Build :class:`SearchArgs` from a parsed argparse namespace."""
+    terms_list = t.cast("list[str]", namespace.terms)
+    limit = t.cast("int | None", namespace.limit)
+    if limit is not None and limit < 1:
+        with configured_color_environment(color_mode):
+            bundle.parser.error("--limit must be greater than 0")
+    threshold = t.cast("int", namespace.threshold)
+    if threshold < 0 or threshold > 100:
+        with configured_color_environment(color_mode):
+            bundle.search_parser.error("--threshold must be between 0 and 100")
+
+    search_compiled, residual_terms = _maybe_compile_query(
+        terms_list,
+        bundle=bundle,
+        color_mode=color_mode,
+        subparser=bundle.search_parser,
+    )
+    final_terms: tuple[str, ...] = (
+        residual_terms if search_compiled is not None else tuple(terms_list)
+    )
+
+    return SearchArgs(
+        terms=final_terms,
+        agents=agents,
+        search_type=t.cast("SearchType", namespace.search_type),
+        any_term=t.cast("bool", namespace.any_term),
+        regex=t.cast("bool", namespace.regex),
+        case_sensitive=t.cast("bool", namespace.case_sensitive),
+        limit=limit,
+        output_mode=output_mode,
+        color_mode=color_mode,
+        progress_mode=t.cast("ProgressMode", namespace.progress),
+        threshold=threshold,
+        no_group=t.cast("bool", namespace.no_group),
+        no_rank=t.cast("bool", namespace.no_rank),
+        compiled=search_compiled,
+        raw_query=" ".join(terms_list),
+    )
+
+
 def _build_fuzzy_args(
     namespace: argparse.Namespace,
     *,
diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index 4be9f11..9d292da 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -36,7 +36,7 @@
     SourceHandle,
     SourceHandlePayload,
 )
-from agentgrep.cli.parser import FindArgs, FuzzyArgs, GrepArgs, UIArgs
+from agentgrep.cli.parser import FindArgs, FuzzyArgs, GrepArgs, SearchArgs, UIArgs
 
 __all__ = [
     "GrepSummary",
@@ -58,6 +58,7 @@
     "run_find_command",
     "run_fuzzy_command",
     "run_grep_command",
+    "run_search_command",
     "run_ui_command",
     "serialize_find_record",
     "serialize_grep_record",
@@ -426,6 +427,19 @@ def run_ui_command(args: UIArgs) -> int:
     return 0
 
 
+def run_search_command(args: SearchArgs) -> int:
+    """Execute ``agentgrep search`` with ranking and grouping.
+
+    Collects all matching records eagerly, scores them by rapidfuzz
+    relevance, collapses near-duplicates, groups by session, and
+    renders in the requested output format. Returns ``0`` when at
+    least one result survives ranking, ``1`` otherwise.
+    """
+    _ = args
+    msg = "search command not yet wired — ranking engine pending"
+    raise SystemExit(msg)
+
+
 def _compile_grep_patterns(args: GrepArgs) -> list[re.Pattern[str]]:
     """Compile :class:`GrepArgs` patterns into regex objects honoring mode/case.
 
diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
new file mode 100644
index 0000000..1270599
--- /dev/null
+++ b/tests/test_cli_search.py
@@ -0,0 +1,235 @@
+"""Tests for the ``agentgrep search`` subcommand.
+
+Covers argument parsing into :class:`agentgrep.SearchArgs`, the
+ranking-specific flags (``--threshold``, ``--no-group``, ``--no-rank``),
+and the integration between the ranking engine and the CLI dispatch.
+"""
+
+from __future__ import annotations
+
+import typing as t
+
+import pytest
+
+import agentgrep
+
+# ---------------------------------------------------------------------------
+# Argument parsing
+# ---------------------------------------------------------------------------
+
+
+class SearchParseCase(t.NamedTuple):
+    """Parametrized case for :func:`agentgrep.parse_args` on ``search``."""
+
+    test_id: str
+    argv: tuple[str, ...]
+    expected_terms: tuple[str, ...]
+    expected_threshold: int
+    expected_no_group: bool
+    expected_no_rank: bool
+    expected_search_type: agentgrep.SearchType
+    expected_any_term: bool
+    expected_regex: bool
+    expected_case_sensitive: bool
+
+
+SEARCH_PARSE_CASES: tuple[SearchParseCase, ...] = (
+    SearchParseCase(
+        "defaults-single-term",
+        ("search", "bliss"),
+        ("bliss",),
+        0,
+        False,
+        False,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "multi-term",
+        ("search", "streaming", "parser"),
+        ("streaming", "parser"),
+        0,
+        False,
+        False,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "threshold-flag",
+        ("search", "--threshold", "70", "migration"),
+        ("migration",),
+        70,
+        False,
+        False,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "no-group-flag",
+        ("search", "--no-group", "caching"),
+        ("caching",),
+        0,
+        True,
+        False,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "no-rank-flag",
+        ("search", "--no-rank", "bliss"),
+        ("bliss",),
+        0,
+        False,
+        True,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "all-ranking-flags",
+        ("search", "--threshold", "50", "--no-group", "--no-rank", "query"),
+        ("query",),
+        50,
+        True,
+        True,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "type-history",
+        ("search", "--type", "history", "todo"),
+        ("todo",),
+        0,
+        False,
+        False,
+        "history",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "any-term-mode",
+        ("search", "--any", "foo", "bar"),
+        ("foo", "bar"),
+        0,
+        False,
+        False,
+        "prompts",
+        True,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "regex-flag",
+        ("search", "--regex", "foo.*bar"),
+        ("foo.*bar",),
+        0,
+        False,
+        False,
+        "prompts",
+        False,
+        True,
+        False,
+    ),
+    SearchParseCase(
+        "case-sensitive-flag",
+        ("search", "--case-sensitive", "Bliss"),
+        ("Bliss",),
+        0,
+        False,
+        False,
+        "prompts",
+        False,
+        False,
+        True,
+    ),
+    SearchParseCase(
+        "no-terms",
+        ("search",),
+        (),
+        0,
+        False,
+        False,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    SearchParseCase._fields,
+    SEARCH_PARSE_CASES,
+    ids=[case.test_id for case in SEARCH_PARSE_CASES],
+)
+def test_search_parse_args(
+    test_id: str,
+    argv: tuple[str, ...],
+    expected_terms: tuple[str, ...],
+    expected_threshold: int,
+    expected_no_group: bool,
+    expected_no_rank: bool,
+    expected_search_type: agentgrep.SearchType,
+    expected_any_term: bool,
+    expected_regex: bool,
+    expected_case_sensitive: bool,
+) -> None:
+    """Search subparser captures ranking-specific flags correctly."""
+    _ = test_id
+    parsed = agentgrep.parse_args(argv)
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.terms == expected_terms
+    assert parsed.threshold == expected_threshold
+    assert parsed.no_group == expected_no_group
+    assert parsed.no_rank == expected_no_rank
+    assert parsed.search_type == expected_search_type
+    assert parsed.any_term == expected_any_term
+    assert parsed.regex == expected_regex
+    assert parsed.case_sensitive == expected_case_sensitive
+
+
+def test_search_parse_limit() -> None:
+    """--limit is captured in SearchArgs."""
+    parsed = agentgrep.parse_args(("search", "--limit", "5", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.limit == 5
+
+
+def test_search_parse_output_json() -> None:
+    """--json sets output_mode correctly."""
+    parsed = agentgrep.parse_args(("search", "--json", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.output_mode == "json"
+
+
+def test_search_parse_output_ndjson() -> None:
+    """--ndjson sets output_mode correctly."""
+    parsed = agentgrep.parse_args(("search", "--ndjson", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.output_mode == "ndjson"
+
+
+def test_search_parse_progress_never() -> None:
+    """--no-progress sets progress_mode to never."""
+    parsed = agentgrep.parse_args(("search", "--no-progress", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.progress_mode == "never"
+
+
+def test_search_parse_agent_filter() -> None:
+    """--agent filters are captured."""
+    parsed = agentgrep.parse_args(("search", "--agent", "codex", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.agents == ("codex",)

From 403f43abd222afadc060fc6571f3ff375ed1e947 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 09:19:11 -0500
Subject: [PATCH 02/23] agentgrep(feat[ranking]): Add rapidfuzz scoring, dedup,
 and session grouping

why: search needs to score results by relevance (best match first),
collapse near-duplicates (WRatio > 90), and group by session for
a coherent browsing experience.

what:
- Add ranking.py with rank_search_records (WRatio scoring + sort)
- Add collapse_near_duplicates (pairwise similarity, keep representative)
- Add group_by_session (OrderedDict grouping by session_id)
- Add parametrized tests for all three functions
---
 src/agentgrep/ranking.py | 130 +++++++++++++++++++
 tests/test_ranking.py    | 269 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 399 insertions(+)
 create mode 100644 src/agentgrep/ranking.py
 create mode 100644 tests/test_ranking.py

diff --git a/src/agentgrep/ranking.py b/src/agentgrep/ranking.py
new file mode 100644
index 0000000..9942e2e
--- /dev/null
+++ b/src/agentgrep/ranking.py
@@ -0,0 +1,130 @@
+"""Relevance scoring, near-duplicate collapsing, and session grouping.
+
+The search subcommand collects all engine matches eagerly, then passes
+them through the three-stage pipeline exposed here:
+
+1. :func:`rank_search_records` — score each record against the query
+   text with rapidfuzz WRatio, filter by threshold, sort best-first.
+2. :func:`collapse_near_duplicates` — pairwise WRatio between record
+   bodies; records above the similarity ceiling are folded into the
+   highest-scoring representative.
+3. :func:`group_by_session` — bucket the surviving records by
+   ``session_id``, preserving score order within each group.
+"""
+
+from __future__ import annotations
+
+import collections
+import typing as t
+
+if t.TYPE_CHECKING:
+    from agentgrep import SearchRecord
+
+__all__ = [
+    "collapse_near_duplicates",
+    "group_by_session",
+    "rank_search_records",
+]
+
+
+def rank_search_records(
+    records: list[SearchRecord],
+    query_text: str,
+    *,
+    threshold: int = 0,
+) -> list[tuple[SearchRecord, float]]:
+    """Score records by relevance and sort best-first.
+
+    Parameters
+    ----------
+    records : list[SearchRecord]
+        Engine-matched records in discovery order.
+    query_text : str
+        The space-joined search terms for WRatio scoring.
+    threshold : int
+        Minimum fuzzy score (0-100). Records below are dropped.
+
+    Returns
+    -------
+    list[tuple[SearchRecord, float]]
+        ``(record, score)`` pairs sorted by descending score.
+    """
+    import rapidfuzz.fuzz
+
+    scored: list[tuple[SearchRecord, float]] = [
+        (r, float(rapidfuzz.fuzz.WRatio(query_text, r.text))) for r in records
+    ]
+    if threshold > 0:
+        scored = [(r, s) for r, s in scored if s >= threshold]
+    scored.sort(key=lambda pair: pair[1], reverse=True)
+    return scored
+
+
+def collapse_near_duplicates(
+    scored: list[tuple[SearchRecord, float]],
+    *,
+    similarity_threshold: float = 90.0,
+) -> list[tuple[SearchRecord, float, int]]:
+    """Collapse near-duplicate records, keeping highest-scored representative.
+
+    Parameters
+    ----------
+    scored : list[tuple[SearchRecord, float]]
+        Pre-sorted ``(record, score)`` pairs (best-first).
+    similarity_threshold : float
+        WRatio ceiling — record pairs scoring above this are
+        considered near-duplicates.
+
+    Returns
+    -------
+    list[tuple[SearchRecord, float, int]]
+        ``(record, score, similar_count)`` triples. ``similar_count``
+        is the number of collapsed duplicates.
+    """
+    import rapidfuzz.fuzz
+
+    if not scored:
+        return []
+    result: list[tuple[SearchRecord, float, int]] = []
+    consumed: set[int] = set()
+    for i, (record_i, score_i) in enumerate(scored):
+        if i in consumed:
+            continue
+        similar_count = 0
+        for j in range(i + 1, len(scored)):
+            if j in consumed:
+                continue
+            record_j = scored[j][0]
+            sim = float(rapidfuzz.fuzz.WRatio(record_i.text, record_j.text))
+            if sim >= similarity_threshold:
+                similar_count += 1
+                consumed.add(j)
+        result.append((record_i, score_i, similar_count))
+    return result
+
+
+def group_by_session(
+    records: list[tuple[SearchRecord, float, int]],
+) -> list[tuple[str | None, list[tuple[SearchRecord, float, int]]]]:
+    """Group records by session_id, preserving score order within groups.
+
+    Parameters
+    ----------
+    records : list[tuple[SearchRecord, float, int]]
+        Collapsed ``(record, score, similar_count)`` triples.
+
+    Returns
+    -------
+    list[tuple[str | None, list[...]]]
+        ``(session_id, entries)`` pairs in first-seen order.
+    """
+    groups: collections.OrderedDict[
+        str | None,
+        list[tuple[SearchRecord, float, int]],
+    ] = collections.OrderedDict()
+    for record, score, similar in records:
+        key = record.session_id
+        if key not in groups:
+            groups[key] = []
+        groups[key].append((record, score, similar))
+    return list(groups.items())
diff --git a/tests/test_ranking.py b/tests/test_ranking.py
new file mode 100644
index 0000000..fe5488a
--- /dev/null
+++ b/tests/test_ranking.py
@@ -0,0 +1,269 @@
+"""Tests for the ranking engine (``agentgrep.ranking``).
+
+Covers the three-stage pipeline: rapidfuzz scoring, near-duplicate
+collapsing, and session grouping.
+"""
+
+from __future__ import annotations
+
+import pathlib
+import typing as t
+
+import pytest
+
+import agentgrep
+from agentgrep.ranking import (
+    collapse_near_duplicates,
+    group_by_session,
+    rank_search_records,
+)
+
+
+def _record(
+    text: str,
+    *,
+    session_id: str | None = None,
+    agent: agentgrep.AgentName = "codex",
+) -> agentgrep.SearchRecord:
+    """Build a minimal SearchRecord for ranking tests."""
+    return agentgrep.SearchRecord(
+        kind="prompt",
+        agent=agent,
+        store="test",
+        adapter_id="test.v1",
+        path=pathlib.Path("/tmp/test"),
+        text=text,
+        session_id=session_id,
+    )
+
+
+# ---------------------------------------------------------------------------
+# rank_search_records
+# ---------------------------------------------------------------------------
+
+
+class RankCase(t.NamedTuple):
+    """Parametrized case for :func:`rank_search_records`."""
+
+    test_id: str
+    texts: list[str]
+    query: str
+    threshold: int
+    expected_first_text: str | None
+    expected_min_count: int
+
+
+RANK_CASES: tuple[RankCase, ...] = (
+    RankCase(
+        "higher-match-scores-first",
+        ["unrelated noise", "the streaming parser is fast", "streaming"],
+        "streaming",
+        0,
+        "streaming",
+        3,
+    ),
+    RankCase(
+        "threshold-filters-low",
+        ["unrelated noise", "streaming parser"],
+        "streaming",
+        80,
+        "streaming parser",
+        1,
+    ),
+    RankCase(
+        "empty-input",
+        [],
+        "anything",
+        0,
+        None,
+        0,
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    RankCase._fields,
+    RANK_CASES,
+    ids=[case.test_id for case in RANK_CASES],
+)
+def test_rank_search_records(
+    test_id: str,
+    texts: list[str],
+    query: str,
+    threshold: int,
+    expected_first_text: str | None,
+    expected_min_count: int,
+) -> None:
+    """rank_search_records scores, filters, and sorts correctly."""
+    _ = test_id
+    records = [_record(text) for text in texts]
+    result = rank_search_records(records, query, threshold=threshold)
+    assert len(result) >= expected_min_count
+    if expected_first_text is not None:
+        assert result[0][0].text == expected_first_text
+
+
+def test_rank_scores_are_descending() -> None:
+    """Scores are in non-increasing order."""
+    records = [
+        _record("unrelated noise here"),
+        _record("the streaming parser approach"),
+        _record("streaming"),
+        _record("fully streaming parser engine"),
+    ]
+    result = rank_search_records(records, "streaming parser")
+    scores = [score for _, score in result]
+    assert scores == sorted(scores, reverse=True)
+
+
+# ---------------------------------------------------------------------------
+# collapse_near_duplicates
+# ---------------------------------------------------------------------------
+
+
+class CollapseCase(t.NamedTuple):
+    """Parametrized case for :func:`collapse_near_duplicates`."""
+
+    test_id: str
+    texts: list[str]
+    expected_count: int
+    expected_any_similar: bool
+
+
+COLLAPSE_CASES: tuple[CollapseCase, ...] = (
+    CollapseCase(
+        "identical-texts-collapse",
+        ["hello world", "hello world", "hello world"],
+        1,
+        True,
+    ),
+    CollapseCase(
+        "different-texts-stay",
+        ["apple pie recipe", "quantum mechanics lecture", "jazz improvisation"],
+        3,
+        False,
+    ),
+    CollapseCase(
+        "empty-input",
+        [],
+        0,
+        False,
+    ),
+    CollapseCase(
+        "near-identical-collapse",
+        ["hello world today", "hello world today!"],
+        1,
+        True,
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    CollapseCase._fields,
+    COLLAPSE_CASES,
+    ids=[case.test_id for case in COLLAPSE_CASES],
+)
+def test_collapse_near_duplicates(
+    test_id: str,
+    texts: list[str],
+    expected_count: int,
+    expected_any_similar: bool,
+) -> None:
+    """Near-duplicate collapsing produces expected representative count."""
+    _ = test_id
+    scored = [(r, 50.0) for r in (_record(text) for text in texts)]
+    result = collapse_near_duplicates(scored)
+    assert len(result) == expected_count
+    if expected_any_similar:
+        assert any(similar > 0 for _, _, similar in result)
+    elif result:
+        assert all(similar == 0 for _, _, similar in result)
+
+
+def test_collapse_preserves_score_order() -> None:
+    """Collapsed output preserves the pre-sorted score order."""
+    scored: list[tuple[agentgrep.SearchRecord, float]] = [
+        (_record("best match"), 95.0),
+        (_record("good match"), 80.0),
+        (_record("okay match"), 60.0),
+    ]
+    result = collapse_near_duplicates(scored)
+    result_scores = [score for _, score, _ in result]
+    assert result_scores == sorted(result_scores, reverse=True)
+
+
+# ---------------------------------------------------------------------------
+# group_by_session
+# ---------------------------------------------------------------------------
+
+
+class GroupCase(t.NamedTuple):
+    """Parametrized case for :func:`group_by_session`."""
+
+    test_id: str
+    session_ids: list[str | None]
+    expected_group_count: int
+    expected_keys: list[str | None]
+
+
+GROUP_CASES: tuple[GroupCase, ...] = (
+    GroupCase(
+        "groups-by-session",
+        ["sess-a", "sess-a", "sess-b", "sess-b"],
+        2,
+        ["sess-a", "sess-b"],
+    ),
+    GroupCase(
+        "none-sessions-grouped-together",
+        [None, None, "sess-a"],
+        2,
+        [None, "sess-a"],
+    ),
+    GroupCase(
+        "preserves-first-seen-order",
+        ["sess-b", "sess-a", "sess-b"],
+        2,
+        ["sess-b", "sess-a"],
+    ),
+    GroupCase(
+        "empty-input",
+        [],
+        0,
+        [],
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    GroupCase._fields,
+    GROUP_CASES,
+    ids=[case.test_id for case in GROUP_CASES],
+)
+def test_group_by_session(
+    test_id: str,
+    session_ids: list[str | None],
+    expected_group_count: int,
+    expected_keys: list[str | None],
+) -> None:
+    """Session grouping produces expected buckets."""
+    _ = test_id
+    records: list[tuple[agentgrep.SearchRecord, float, int]] = [
+        (_record(f"text-{i}", session_id=sid), 50.0, 0) for i, sid in enumerate(session_ids)
+    ]
+    result = group_by_session(records)
+    assert len(result) == expected_group_count
+    assert [key for key, _ in result] == expected_keys
+
+
+def test_group_preserves_within_group_order() -> None:
+    """Records within a group keep score-descending order."""
+    records: list[tuple[agentgrep.SearchRecord, float, int]] = [
+        (_record("first", session_id="s1"), 95.0, 0),
+        (_record("second", session_id="s1"), 80.0, 0),
+        (_record("third", session_id="s1"), 60.0, 0),
+    ]
+    result = group_by_session(records)
+    assert len(result) == 1
+    _, entries = result[0]
+    entry_scores = [score for _, score, _ in entries]
+    assert entry_scores == [95.0, 80.0, 60.0]

From 91de1f7ba831e864bb200a150ccb354da516b27c Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 09:21:58 -0500
Subject: [PATCH 03/23] agentgrep(feat[search]): Wire search command with
 progress and pretty output

why: Complete the search command by connecting the ranking engine
to the CLI with progress feedback and pretty-style output.

what:
- Add run_search_command with eager collection + progress + ranking pipeline
- Add _print_search_text with score display and similar-count indicators
- Add _print_search_json for structured output with scores
- Wire dispatch in main() and re-export from __init__
- Add integration tests
---
 src/agentgrep/cli/render.py | 124 ++++++++++++++++++++-
 tests/test_cli_search.py    | 212 ++++++++++++++++++++++++++++++++++++
 2 files changed, 333 insertions(+), 3 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index 9d292da..efaeeed 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -435,9 +435,127 @@ def run_search_command(args: SearchArgs) -> int:
     renders in the requested output format. Returns ``0`` when at
     least one result survives ranking, ``1`` otherwise.
     """
-    _ = args
-    msg = "search command not yet wired — ranking engine pending"
-    raise SystemExit(msg)
+    if not args.terms and args.output_mode != "ui":
+        msg = "search requires at least one term unless --ui is used"
+        raise SystemExit(msg)
+    query = agentgrep.SearchQuery(
+        terms=args.terms,
+        search_type=args.search_type,
+        any_term=args.any_term,
+        regex=args.regex,
+        case_sensitive=args.case_sensitive,
+        agents=args.agents,
+        limit=None,
+        compiled=args.compiled,
+    )
+    if args.output_mode == "ui":
+        agentgrep.run_ui(
+            pathlib.Path.home(),
+            query,
+            control=agentgrep.SearchControl(),
+            initial_search_text=args.raw_query or None,
+        )
+        return 0
+    control = agentgrep.SearchControl()
+    human_output = args.output_mode in {"text", "ui"}
+    progress_enabled = args.progress_mode == "always" or (
+        args.progress_mode == "auto" and human_output
+    )
+    progress: agentgrep.SearchProgress
+    if not progress_enabled:
+        progress = agentgrep.noop_search_progress()
+    else:
+        progress = agentgrep.ConsoleSearchProgress(
+            enabled=True,
+            color_mode=args.color_mode,
+            answer_now_hint=False,
+        )
+    records = agentgrep.run_search_query(
+        pathlib.Path.home(),
+        query,
+        progress=progress,
+        control=control,
+    )
+    query_text = " ".join(args.terms)
+    if args.no_rank:
+        scored: list[tuple[agentgrep.SearchRecord, float]] = [(r, 0.0) for r in records]
+    else:
+        from agentgrep.ranking import rank_search_records
+
+        scored = rank_search_records(records, query_text, threshold=args.threshold)
+    from agentgrep.ranking import collapse_near_duplicates, group_by_session
+
+    collapsed = collapse_near_duplicates(scored)
+    if args.limit is not None:
+        collapsed = collapsed[: args.limit]
+    if args.no_group:
+        groups: list[tuple[str | None, list[tuple[agentgrep.SearchRecord, float, int]]]] = [
+            (None, collapsed),
+        ]
+    else:
+        groups = group_by_session(collapsed)
+    if args.output_mode in ("json", "ndjson"):
+        _print_search_json(groups, args)
+        return 0 if collapsed else 1
+    _print_search_text(groups, args)
+    return 0 if collapsed else 1
+
+
+def _print_search_text(
+    groups: list[tuple[str | None, list[tuple[agentgrep.SearchRecord, float, int]]]],
+    args: SearchArgs,
+) -> None:
+    """Render search results with scores and duplicate counts to stdout."""
+    colors = agentgrep.AnsiColors.for_stream(args.color_mode, sys.stdout)
+    first_group = True
+    for session_id, entries in groups:
+        if not first_group:
+            print()
+        first_group = False
+        if session_id is not None and not args.no_group:
+            print(colors.heading(f"[session {session_id[:12]}]"))
+        for record, score, similar_count in entries:
+            path = agentgrep.format_display_path(record.path)
+            score_label = colors.warning(f"{score:.0f}")
+            snippet = record.text[:120].replace("\n", " ")
+            similar_label = ""
+            if similar_count > 0:
+                similar_label = colors.muted(f" (+{similar_count} similar)")
+            header = f"  {colors.path(path)}  {colors.muted(record.agent)}"
+            if record.timestamp:
+                header += f"  {colors.muted(record.timestamp)}"
+            print(f"{score_label}  {snippet}{similar_label}")
+            print(header)
+
+
+def _print_search_json(
+    groups: list[tuple[str | None, list[tuple[agentgrep.SearchRecord, float, int]]]],
+    args: SearchArgs,
+) -> None:
+    """Render search results as JSON with scores."""
+    serialize_search, _, serialize_envelope = maybe_build_pydantic()
+    results: list[dict[str, object]] = []
+    for session_id, entries in groups:
+        for record, score, similar_count in entries:
+            entry = serialize_search(record)
+            entry["score"] = score
+            entry["similar_count"] = similar_count
+            if session_id is not None:
+                entry["group_session_id"] = session_id
+            results.append(entry)
+    if args.output_mode == "json":
+        query_data: dict[str, object] = {
+            "terms": list(args.terms),
+            "agents": list(args.agents),
+            "threshold": args.threshold,
+            "no_rank": args.no_rank,
+            "no_group": args.no_group,
+        }
+        payload = serialize_envelope("search", query_data, results)
+        print(json.dumps(payload, ensure_ascii=False, indent=2))
+    else:
+        for result in results:
+            print(json.dumps(result, ensure_ascii=False))
 
 
 def _compile_grep_patterns(args: GrepArgs) -> list[re.Pattern[str]]:
diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index 1270599..d3a1f4b 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -7,11 +7,14 @@
 
 from __future__ import annotations
 
+import json
+import pathlib
 import typing as t
 
 import pytest
 
 import agentgrep
+from agentgrep.cli.render import run_search_command
 
 # ---------------------------------------------------------------------------
 # Argument parsing
@@ -233,3 +236,212 @@ def test_search_parse_agent_filter() -> None:
     parsed = agentgrep.parse_args(("search", "--agent", "codex", "bliss"))
     assert isinstance(parsed, agentgrep.SearchArgs)
     assert parsed.agents == ("codex",)
+
+
+# ---------------------------------------------------------------------------
+# Integration tests
+# ---------------------------------------------------------------------------
+
+
+def _make_search_args(**overrides: t.Any) -> agentgrep.SearchArgs:
+    """Build a SearchArgs with sensible test defaults."""
+    base: dict[str, t.Any] = {
+        "terms": ("bliss",),
+        "agents": agentgrep.AGENT_CHOICES,
+        "search_type": "prompts",
+        "any_term": False,
+        "regex": False,
+        "case_sensitive": False,
+        "limit": None,
+        "output_mode": "text",
+        "color_mode": "never",
+        "progress_mode": "never",
+        "threshold": 0,
+        "no_group": False,
+        "no_rank": False,
+        "compiled": None,
+        "raw_query": "",
+    }
+    base.update(overrides)
+    return agentgrep.SearchArgs(**base)
+
+
+def _canned_records() -> list[agentgrep.SearchRecord]:
+    """Return a small set of canned records for search integration tests."""
+    return [
+        agentgrep.SearchRecord(
+            kind="prompt",
+            agent="codex",
+            store="test",
+            adapter_id="test.v1",
+            path=pathlib.Path("/tmp/test-a"),
+            text="the bliss of streaming parsers",
+            session_id="sess-1",
+        ),
+        agentgrep.SearchRecord(
+            kind="prompt",
+            agent="codex",
+            store="test",
+            adapter_id="test.v1",
+            path=pathlib.Path("/tmp/test-b"),
+            text="unrelated noise about caching",
+            session_id="sess-2",
+        ),
+        agentgrep.SearchRecord(
+            kind="prompt",
+            agent="claude",
+            store="test",
+            adapter_id="test.v1",
+            path=pathlib.Path("/tmp/test-c"),
+            text="bliss in every line of code",
+            session_id="sess-1",
+        ),
+    ]
+
+
+def test_search_command_no_terms_raises() -> None:
+    """Search without terms and without --ui raises SystemExit."""
+    args = _make_search_args(terms=())
+    with pytest.raises(SystemExit, match="search requires at least one term"):
+        run_search_command(args)
+
+
+def test_search_routes_through_ranking(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Search dispatches through the ranking pipeline and produces output."""
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    args = _make_search_args(terms=("bliss",))
+    code = run_search_command(args)
+    assert code == 0
+    captured = capsys.readouterr()
+    assert "bliss" in captured.out
+
+
+def test_search_no_rank_preserves_order(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """--no-rank skips scoring and preserves discovery order."""
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    args = _make_search_args(terms=("bliss",), no_rank=True)
+    code = run_search_command(args)
+    assert code == 0
+    captured = capsys.readouterr()
+    lines = captured.out.strip().splitlines()
+    # With no_rank, scores are 0 — all matching records appear
+    score_lines = [line for line in lines if line.startswith("0")]
+    assert len(score_lines) >= 1
+
+
+def test_search_threshold_filters_low_scores(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """--threshold filters records below the minimum score."""
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    # Very high threshold should filter most records
+    args = _make_search_args(terms=("bliss",), threshold=99)
+    code = run_search_command(args)
+    captured = capsys.readouterr()
+    # With threshold=99, only near-exact matches survive (or none)
+    # The exit code reflects whether any results remain
+    assert code in (0, 1)
+    if code == 1:
+        assert captured.out.strip() == ""
+
+
+def test_search_json_includes_scores(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """--json output includes score and similar_count fields."""
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    args = _make_search_args(terms=("bliss",), output_mode="json", no_group=True)
+    code = run_search_command(args)
+    assert code == 0
+    captured = capsys.readouterr()
+    payload = json.loads(captured.out)
+    assert "results" in payload
+    for result in payload["results"]:
+        assert "score" in result
+        assert "similar_count" in result
+        assert isinstance(result["score"], (int, float))
+        assert isinstance(result["similar_count"], int)
+
+
+def test_search_ndjson_includes_scores(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """--ndjson output includes score and similar_count in each line."""
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    args = _make_search_args(terms=("bliss",), output_mode="ndjson", no_group=True)
+    code = run_search_command(args)
+    assert code == 0
+    captured = capsys.readouterr()
+    lines = [line for line in captured.out.strip().splitlines() if line]
+    assert len(lines) >= 1
+    for line in lines:
+        obj = json.loads(line)
+        assert "score" in obj
+        assert "similar_count" in obj
+
+
+def test_search_empty_results_returns_1(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Search with no matches returns exit code 1."""
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: [],
+    )
+    args = _make_search_args(terms=("nonexistent",))
+    code = run_search_command(args)
+    assert code == 1
+
+
+def test_search_limit_caps_results(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """--limit caps the number of results after ranking."""
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    args = _make_search_args(terms=("bliss",), limit=1, no_group=True, output_mode="json")
+    code = run_search_command(args)
+    assert code == 0
+    captured = capsys.readouterr()
+    payload = json.loads(captured.out)
+    assert len(payload["results"]) == 1

From bf2dfd4f14b9c06f218dd2e15cf62761236e221a Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 09:59:54 -0500
Subject: [PATCH 04/23] =?UTF-8?q?agentgrep(fix[ranking]):=20Skip=20O(n?=
 =?UTF-8?q?=C2=B2)=20collapse=20when=20--no-rank,=20add=20size=20guard?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: collapse_near_duplicates runs pairwise WRatio between all
records — O(n²) with expensive C calls. It was called
unconditionally even with --no-rank, hanging on large result
sets. Users who pass --no-rank explicitly want fast unranked
output.

what:
- Skip collapse_near_duplicates entirely when --no-rank is set;
  emit records with score=0, similar_count=0
- Add size guard in collapse_near_duplicates: if len(scored) > 500,
  skip pairwise comparison and return records as-is
- Move rank + collapse imports inside the else branch (lazy load
  only when ranking is active)
---
 src/agentgrep/cli/render.py | 7 ++++---
 src/agentgrep/ranking.py    | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index efaeeed..3c7c499 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -479,13 +479,14 @@ def run_search_command(args: SearchArgs) -> int:
     query_text = " ".join(args.terms)
     if args.no_rank:
         scored: list[tuple[agentgrep.SearchRecord, float]] = [(r, 0.0) for r in records]
+        collapsed: list[tuple[agentgrep.SearchRecord, float, int]] = [(r, 0.0, 0) for r in records]
     else:
-        from agentgrep.ranking import rank_search_records
+        from agentgrep.ranking import collapse_near_duplicates, rank_search_records
 
         scored = rank_search_records(records, query_text, threshold=args.threshold)
-    from agentgrep.ranking import collapse_near_duplicates, group_by_session
+        collapsed = collapse_near_duplicates(scored)
+    from agentgrep.ranking import group_by_session
 
-    collapsed = collapse_near_duplicates(scored)
     if args.limit is not None:
         collapsed = collapsed[: args.limit]
     if args.no_group:
diff --git a/src/agentgrep/ranking.py b/src/agentgrep/ranking.py
index 9942e2e..32eeec7 100644
--- a/src/agentgrep/ranking.py
+++ b/src/agentgrep/ranking.py
@@ -85,6 +85,8 @@ def collapse_near_duplicates(
 
     if not scored:
         return []
+    if len(scored) > 500:
+        return [(r, s, 0) for r, s in scored]
     result: list[tuple[SearchRecord, float, int]] = []
     consumed: set[int] = set()
     for i, (record_i, score_i) in enumerate(scored):

From 965df5451561bc43cb6b4f324e067df6c1f37313 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 10:01:52 -0500
Subject: [PATCH 05/23] agentgrep(fix[parser]): Add collision detection for
 search flag/field mix

why: grep and find both reject mixing --agent with agent: inline
predicates (via _grep_explicit_flags / _find_explicit_flags). The
reintroduced search subparser was missing this validation,
silently accepting nonsensical queries like
`agentgrep search --agent codex agent:claude bliss`.

what:
- Add _search_explicit_flags() mapping --agent and --type flags
- Pass explicit_flags to _maybe_compile_query in _build_search_args
- Parse-time error now raised on flag/field conflicts
---
 src/agentgrep/cli/parser.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py
index 9f4f187..99a79be 100644
--- a/src/agentgrep/cli/parser.py
+++ b/src/agentgrep/cli/parser.py
@@ -761,6 +761,16 @@ def build_docs_parser() -> argparse.ArgumentParser:
     return create_parser("never").parser
 
 
+def _search_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]:
+    """Map query-field name → CLI flag name for `search` flag/field collisions."""
+    flags: dict[str, str] = {}
+    if t.cast("list[str]", namespace.agent):
+        flags["agent"] = "--agent"
+    if t.cast("str", namespace.search_type) != "prompts":
+        flags["type"] = "--type"
+    return flags
+
+
 def _grep_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]:
     """Map query-field name → CLI flag name for `grep` flag/field collisions."""
     flags: dict[str, str] = {}
@@ -1146,6 +1156,7 @@ def _build_search_args(
         bundle=bundle,
         color_mode=color_mode,
         subparser=bundle.search_parser,
+        explicit_flags=_search_explicit_flags(namespace),
     )
     final_terms: tuple[str, ...] = (
         residual_terms if search_compiled is not None else tuple(terms_list)

From e094ad826a67d80688db8838ef22e1d7532cfa7e Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 10:29:39 -0500
Subject: [PATCH 06/23] agentgrep(fix[parser]): Reject --threshold with
 --no-rank

why: --threshold only takes effect inside rank_search_records,
which is skipped when --no-rank is set. Silently accepting both
flags misleads the user into thinking their threshold filter is
active.

what:
- Add parse-time error when both --no-rank and --threshold > 0
- Split all-ranking-flags test into two valid cases
---
 src/agentgrep/cli/parser.py |  6 ++++++
 tests/test_cli_search.py    | 18 +++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py
index 99a79be..0e63536 100644
--- a/src/agentgrep/cli/parser.py
+++ b/src/agentgrep/cli/parser.py
@@ -1150,6 +1150,12 @@ def _build_search_args(
     if threshold < 0 or threshold > 100:
         with configured_color_environment(color_mode):
             bundle.search_parser.error("--threshold must be between 0 and 100")
+    no_rank = t.cast("bool", namespace.no_rank)
+    if no_rank and threshold > 0:
+        with configured_color_environment(color_mode):
+            bundle.search_parser.error(
+                "--threshold has no effect with --no-rank (ranking is disabled)",
+            )
 
     search_compiled, residual_terms = _maybe_compile_query(
         terms_list,
diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index d3a1f4b..bc8805c 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -98,12 +98,24 @@ class SearchParseCase(t.NamedTuple):
         False,
     ),
     SearchParseCase(
-        "all-ranking-flags",
-        ("search", "--threshold", "50", "--no-group", "--no-rank", "query"),
+        "no-group-and-no-rank",
+        ("search", "--no-group", "--no-rank", "query"),
         ("query",),
-        50,
+        0,
+        True,
         True,
+        "prompts",
+        False,
+        False,
+        False,
+    ),
+    SearchParseCase(
+        "threshold-with-ranking",
+        ("search", "--threshold", "50", "--no-group", "query"),
+        ("query",),
+        50,
         True,
+        False,
         "prompts",
         False,
         False,

From 2cb5a810f4ce79ec22594d231dfec4a75a6f630a Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 10:29:39 -0500
Subject: [PATCH 07/23] agentgrep(docs[cli]): Add search to CLI_DESCRIPTION

why: search subcommand was reintroduced but CLI_DESCRIPTION only
listed grep/fuzzy/find/ui.

what:
- Add search description to the CLI help intro text
---
 src/agentgrep/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py
index b87962d..4e260dc 100644
--- a/src/agentgrep/__init__.py
+++ b/src/agentgrep/__init__.py
@@ -150,7 +150,8 @@ def build_description(
 CLI_DESCRIPTION = build_description(
     """
     Read-only search across Codex, Claude, Cursor, and Gemini local
-    stores. Pick a subcommand from the list below: ``grep`` for
+    stores. Pick a subcommand from the list below: ``search`` for
+    ranked results with dedup and session grouping, ``grep`` for
     rg-shaped content search, ``fuzzy`` for fzf-style filtering,
     ``find`` for store enumeration, ``ui`` for the interactive
     Textual explorer.

From 0b50453b43b6e0e1bfe722bae91c6f2a280ad5f1 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 12:12:59 -0500
Subject: [PATCH 08/23] agentgrep(fix[search]): Restore Enter-to-answer-now for
 search

why: run_search_command created a SearchControl but never wired
up the AnswerNowInputListener thread, so pressing Enter during a
long search had no effect and the progress hint was hidden.

what:
- Wire AnswerNowInputListener with start/stop around run_search_query
- Set answer_now_hint based on TTY detection (stdin + stderr)
- Wrap run_search_query in try/finally to ensure listener.stop()
---
 src/agentgrep/cli/render.py | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index 3c7c499..eb42527 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -461,6 +461,13 @@ def run_search_command(args: SearchArgs) -> int:
     progress_enabled = args.progress_mode == "always" or (
         args.progress_mode == "auto" and human_output
     )
+    answer_now_enabled = (
+        progress_enabled
+        and human_output
+        and bool(getattr(sys.stdin, "isatty", lambda: False)())
+        and bool(getattr(sys.stderr, "isatty", lambda: False)())
+    )
+    listener = agentgrep.AnswerNowInputListener(control) if answer_now_enabled else None
     progress: agentgrep.SearchProgress
     if not progress_enabled:
         progress = agentgrep.noop_search_progress()
@@ -468,14 +475,20 @@ def run_search_command(args: SearchArgs) -> int:
         progress = agentgrep.ConsoleSearchProgress(
             enabled=True,
             color_mode=args.color_mode,
-            answer_now_hint=False,
+            answer_now_hint=answer_now_enabled,
         )
-    records = agentgrep.run_search_query(
-        pathlib.Path.home(),
-        query,
-        progress=progress,
-        control=control,
-    )
+    if listener is not None:
+        listener.start()
+    try:
+        records = agentgrep.run_search_query(
+            pathlib.Path.home(),
+            query,
+            progress=progress,
+            control=control,
+        )
+    finally:
+        if listener is not None:
+            listener.stop()
     query_text = " ".join(args.terms)
     if args.no_rank:
         scored: list[tuple[agentgrep.SearchRecord, float]] = [(r, 0.0) for r in records]

From 73ae939a84078bdfdd8886a12d92e309b6144072 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 14:46:50 -0500
Subject: [PATCH 09/23] agentgrep(fix[search]): Keep progress live during large
 parses

why: Large Codex and Claude-style JSONL sources can spend seconds inside parsing work before any deduped result is emitted, which leaves the CLI progress line looking frozen. Huge Codex tool-output records make this worse because they can hold the GIL while producing no searchable prompt record.

what:
- Add optional in-source progress updates with cooperative parser yields while preserving final deduped result semantics.
- Show source detail in CLI and TUI progress snapshots alongside source counters.
- Skip large Codex function_call_output lines before JSON decoding, discarding them cooperatively because they cannot produce prompt records.
- Cover progress callbacks, JSONL yielding, raw tool-output skipping, and progress-line formatting in tests.
---
 src/agentgrep/__init__.py | 187 +++++++++++++++++++++++++++++-
 src/agentgrep/ui/app.py   |   2 +
 tests/test_agentgrep.py   | 233 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 412 insertions(+), 10 deletions(-)

diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py
index 4e260dc..3d336cf 100644
--- a/src/agentgrep/__init__.py
+++ b/src/agentgrep/__init__.py
@@ -1117,6 +1117,24 @@ class SourceHandle:
     mtime_ns: int
 
 
+type SourceProgressCallback = cabc.Callable[[int, int, SourceHandle, int, int], None]
+
+_SOURCE_PROGRESS_RECORD_INTERVAL = 128
+"""Parsed-record cadence for in-source progress updates and GIL yields."""
+
+_JSONL_YIELD_LINE_INTERVAL = 128
+"""Decoded-line cadence for cooperative JSONL parser yields."""
+
+_JSONL_PREFIX_BYTES = 4096
+"""Bytes read up front when a raw-line skip predicate is active."""
+
+_JSONL_SKIP_CHUNK_BYTES = 1024 * 1024
+"""Chunk size for discarding skipped oversized JSONL lines."""
+
+_CODEX_RAW_SKIP_MIN_BYTES = 1024 * 1024
+"""Minimum Codex session size before enabling raw-line output skipping."""
+
+
 @dataclasses.dataclass(slots=True)
 class SearchRecord:
     """Normalized prompt/history record."""
@@ -1451,6 +1469,22 @@ def source_finished(
             detail=f"{records} records, {format_match_count(matches)} in {source.path.name}",
         )
 
+    def source_progress(
+        self,
+        index: int,
+        total: int,
+        source: SourceHandle,
+        records: int,
+        matches: int,
+    ) -> None:
+        """Report in-source scan progress."""
+        self.set_status(
+            "scanning",
+            current=index,
+            total=total,
+            detail=format_source_progress_detail(records, matches),
+        )
+
     def result_added(self, count: int) -> None:
         """Report deduped result count."""
         if not self._enabled:
@@ -1663,7 +1697,10 @@ def _status_text(self) -> str:
             detail = self._detail
         if current is not None and total is not None:
             count = self._colors.warning(f"{current}/{total}")
-            return f"{self._colors.heading(phase)} {count} {self._colors.muted('sources')}"
+            text = f"{self._colors.heading(phase)} {count} {self._colors.muted('sources')}"
+            if detail:
+                return f"{text} | {self._colors.muted(detail)}"
+            return text
         if detail:
             return f"{self._colors.heading(phase)} {self._colors.muted(detail)}"
         return self._colors.heading(phase)
@@ -1682,6 +1719,12 @@ def format_match_count(count: int) -> str:
     return f"{count} {suffix}"
 
 
+def format_source_progress_detail(records: int, matches: int) -> str:
+    """Return a concise in-source progress detail."""
+    match_suffix = "source match" if matches == 1 else "source matches"
+    return f"{records} records, {matches} {match_suffix}"
+
+
 @dataclasses.dataclass(frozen=True)
 class ProgressSnapshot:
     """Immutable view of search-progress state for one render pass."""
@@ -1719,19 +1762,27 @@ def format_search_progress_line(
         each segment styled through ``colors``.
     """
     label_part = f"{colors.heading('Searching')} {colors.highlight(snapshot.query_label)}"
+    detail_part = colors.muted(snapshot.detail) if snapshot.detail else None
     if snapshot.current is not None and snapshot.total is not None:
         count = colors.warning(f"{snapshot.current}/{snapshot.total}")
         status_part = f"{colors.heading(snapshot.phase)} {count} {colors.muted('sources')}"
     elif snapshot.detail:
         status_part = f"{colors.heading(snapshot.phase)} {colors.muted(snapshot.detail)}"
+        detail_part = None
     else:
         status_part = colors.heading(snapshot.phase)
     parts = [
         label_part,
         status_part,
-        colors.warning(format_match_count(snapshot.matches)),
-        colors.muted(f"{snapshot.elapsed:.1f}s"),
     ]
+    if detail_part:
+        parts.append(detail_part)
+    parts.extend(
+        [
+            colors.warning(format_match_count(snapshot.matches)),
+            colors.muted(f"{snapshot.elapsed:.1f}s"),
+        ],
+    )
     if answer_now_hint:
         parts.append(colors.white("[Press enter, answer now]"))
     return " | ".join(parts)
@@ -1742,6 +1793,20 @@ def noop_search_progress() -> SearchProgress:
     return NoopSearchProgress()
 
 
+def _report_source_progress(
+    progress: SearchProgress,
+    index: int,
+    total: int,
+    source: SourceHandle,
+    records: int,
+    matches: int,
+) -> None:
+    """Call the optional in-source progress hook when a reporter exposes it."""
+    callback = getattr(progress, "source_progress", None)
+    if callable(callback):
+        t.cast("SourceProgressCallback", callback)(index, total, source, records, matches)
+
+
 @dataclasses.dataclass(frozen=True)
 class StreamingRecordsBatch:
     """Batch of newly deduped records emitted by :meth:`StreamingSearchProgress.flush`."""
@@ -1896,6 +1961,22 @@ def source_finished(
             self._detail = f"{records} records, {format_match_count(matches)} in {source.path.name}"
         self._emit_progress()
 
+    def source_progress(
+        self,
+        index: int,
+        total: int,
+        source: SourceHandle,
+        records: int,
+        matches: int,
+    ) -> None:
+        """Report in-source scan progress."""
+        with self._lock:
+            self._phase = "scanning"
+            self._current = index
+            self._total = total
+            self._detail = format_source_progress_detail(records, matches)
+        self._emit_progress()
+
     def result_added(self, count: int) -> None:
         """Update the cumulative match counter."""
         with self._lock:
@@ -2079,6 +2160,14 @@ def file_mtime_ns(path: pathlib.Path) -> int:
         return 0
 
 
+def _file_size(path: pathlib.Path) -> int:
+    """Return file size in bytes, falling back to zero on stat failure."""
+    try:
+        return path.stat().st_size
+    except OSError:
+        return 0
+
+
 def resolve_env_root(env_var: str, default: pathlib.Path) -> pathlib.Path:
     """Resolve a base directory from an environment variable, with safety.
 
@@ -2634,6 +2723,16 @@ def current_count() -> int:
             if matches_record(record, query):
                 matches_seen += 1
                 matching_records.append(record)
+            if records_seen % _SOURCE_PROGRESS_RECORD_INTERVAL == 0:
+                _report_source_progress(
+                    active_progress,
+                    index,
+                    total,
+                    source,
+                    records_seen,
+                    matches_seen,
+                )
+                time.sleep(0)
         active_progress.source_finished(index, total, source, records_seen, matches_seen)
         matching_records.sort(key=search_record_sort_key, reverse=True)
         for record in matching_records:
@@ -2745,7 +2844,12 @@ def parse_codex_session_file(
     """Parse Codex session JSONL files."""
     session_id = source.path.stem
     session_model: str | None = None
-    for event in iter_jsonl(source.path):
+    events = (
+        _iter_jsonl(source.path, skip_line=_is_codex_function_call_output_line)
+        if _file_size(source.path) >= _CODEX_RAW_SKIP_MIN_BYTES
+        else iter_jsonl(source.path)
+    )
+    for event in events:
         if not isinstance(event, dict):
             continue
         event_type = str(event.get("type", ""))
@@ -3285,10 +3389,69 @@ def read_json_file(path: pathlib.Path) -> JSONValue | None:
 
 def iter_jsonl(path: pathlib.Path) -> cabc.Iterator[JSONValue]:
     """Yield decoded JSON objects from a JSONL file."""
+    yield from _iter_jsonl(path)
+
+
+def _iter_jsonl(
+    path: pathlib.Path,
+    *,
+    skip_line: cabc.Callable[[str], bool] | None = None,
+) -> cabc.Iterator[JSONValue]:
+    """Yield decoded JSON objects from a JSONL file with an optional raw-line filter."""
+    if skip_line is not None:
+        yield from _iter_jsonl_with_raw_skip(path, skip_line)
+        return
     try:
         with path.open(encoding="utf-8") as handle:
+            decoded_lines = 0
             for line in handle:
                 stripped = line.strip()
+                if not stripped:
+                    continue
+                decoded_lines += 1
+                if decoded_lines % _JSONL_YIELD_LINE_INTERVAL == 0:
+                    time.sleep(0)
+                if skip_line is not None and skip_line(stripped):
+                    continue
+                try:
+                    parsed = t.cast("object", json.loads(stripped))
+                except json.JSONDecodeError:
+                    continue
+                if isinstance(parsed, (dict, list, str, int, float, bool)) or parsed is None:
+                    yield t.cast("JSONValue", parsed)
+    except OSError:
+        return
+
+
+def _iter_jsonl_with_raw_skip(
+    path: pathlib.Path,
+    skip_line: cabc.Callable[[str], bool],
+) -> cabc.Iterator[JSONValue]:
+    """Yield decoded JSON objects while skipping matched raw lines in chunks."""
+    try:
+        with path.open("rb") as handle:
+            decoded_lines = 0
+            while True:
+                prefix = handle.readline(_JSONL_PREFIX_BYTES)
+                if not prefix:
+                    break
+                if not prefix.strip():
+                    continue
+                decoded_lines += 1
+                if decoded_lines % _JSONL_YIELD_LINE_INTERVAL == 0:
+                    time.sleep(0)
+                prefix_text = prefix.decode("utf-8", errors="replace")
+                if skip_line(prefix_text):
+                    _discard_rest_of_line(handle, prefix)
+                    continue
+                raw_line = bytearray(prefix)
+                while raw_line and not raw_line.endswith(b"\n"):
+                    chunk = handle.readline(_JSONL_SKIP_CHUNK_BYTES)
+                    if not chunk:
+                        break
+                    raw_line.extend(chunk)
+                    time.sleep(0)
+                stripped = raw_line.decode("utf-8", errors="replace").strip()
                 if not stripped:
                     continue
                 try:
@@ -3301,6 +3464,22 @@ def iter_jsonl(path: pathlib.Path) -> cabc.Iterator[JSONValue]:
         return
 
 
+def _discard_rest_of_line(handle: t.BinaryIO, prefix: bytes) -> None:
+    """Discard the unread remainder of the current physical line."""
+    chunk = prefix
+    while chunk and not chunk.endswith(b"\n"):
+        chunk = handle.readline(_JSONL_SKIP_CHUNK_BYTES)
+        time.sleep(0)
+
+
+def _is_codex_function_call_output_line(line: str) -> bool:
+    """Return whether a Codex JSONL line is a tool output record."""
+    prefix = line[:512].replace(" ", "")
+    return (
+        '"type":"response_item"' in prefix and '"payload":{"type":"function_call_output"' in prefix
+    )
+
+
 def candidate_from_mapping(
     mapping: dict[str, object],
     *,
diff --git a/src/agentgrep/ui/app.py b/src/agentgrep/ui/app.py
index 6be8c42..8b4c16c 100644
--- a/src/agentgrep/ui/app.py
+++ b/src/agentgrep/ui/app.py
@@ -1173,6 +1173,8 @@ def _apply_progress(self, snapshot: ProgressSnapshot) -> None:
                     f"Searching {label} | "
                     f"{snapshot.phase} {snapshot.current}/{snapshot.total} sources"
                 )
+                if snapshot.detail:
+                    status = f"{status} | {snapshot.detail}"
             elif snapshot.detail:
                 status = f"Searching {label} | {snapshot.phase} {snapshot.detail}"
             else:
diff --git a/tests/test_agentgrep.py b/tests/test_agentgrep.py
index 2042e83..d12d2c4 100644
--- a/tests/test_agentgrep.py
+++ b/tests/test_agentgrep.py
@@ -621,6 +621,160 @@ def iter_records(source: object) -> cabc.Iterator[object]:
     assert progress.counts == [1]
 
 
+def test_collect_search_records_reports_in_source_progress_and_yields_gil(
+    tmp_path: pathlib.Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Large source scans report parser progress and cooperatively yield."""
+    agentgrep = t.cast("t.Any", load_agentgrep_module())
+    source = agentgrep.SourceHandle(
+        agent="codex",
+        store="codex.sessions",
+        adapter_id="codex.sessions_jsonl.v1",
+        path=tmp_path / "session.jsonl",
+        path_kind="session_file",
+        source_kind="jsonl",
+        search_root=None,
+        mtime_ns=1,
+    )
+    query = agentgrep.SearchQuery(
+        terms=("bliss",),
+        search_type="prompts",
+        any_term=False,
+        regex=False,
+        case_sensitive=False,
+        agents=("codex",),
+        limit=None,
+        dedupe=False,
+    )
+
+    class CapturingProgress:
+        def __init__(self) -> None:
+            self.source_progress_events: list[tuple[int, int, int, int]] = []
+
+        def source_started(self, index: int, total: int, source: object) -> None: ...
+        def source_finished(
+            self,
+            index: int,
+            total: int,
+            source: object,
+            records: int,
+            matches: int,
+        ) -> None: ...
+        def result_added(self, count: int) -> None: ...
+        def record_added(self, record: object) -> None: ...
+
+        def source_progress(
+            self,
+            index: int,
+            total: int,
+            source: object,
+            records: int,
+            matches: int,
+        ) -> None:
+            self.source_progress_events.append((index, total, records, matches))
+
+    def iter_records(source: object) -> cabc.Iterator[object]:
+        for index in range(agentgrep._SOURCE_PROGRESS_RECORD_INTERVAL + 1):
+            yield agentgrep.SearchRecord(
+                kind="prompt",
+                agent="codex",
+                store="codex.sessions",
+                adapter_id="codex.sessions_jsonl.v1",
+                path=tmp_path / "session.jsonl",
+                text=f"bliss {index}",
+            )
+
+    sleep_calls: list[float] = []
+    monkeypatch.setattr(agentgrep, "iter_source_records", iter_records)
+    monkeypatch.setattr(agentgrep.time, "sleep", sleep_calls.append)
+    progress = CapturingProgress()
+
+    _ = agentgrep.collect_search_records(query, [source], progress=progress)
+
+    assert progress.source_progress_events == [
+        (
+            1,
+            1,
+            agentgrep._SOURCE_PROGRESS_RECORD_INTERVAL,
+            agentgrep._SOURCE_PROGRESS_RECORD_INTERVAL,
+        ),
+    ]
+    assert sleep_calls == [0]
+
+
+def test_iter_jsonl_cooperatively_yields_during_large_files(
+    tmp_path: pathlib.Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """JSONL parsing yields even before search records are produced."""
+    agentgrep = t.cast("t.Any", load_agentgrep_module())
+    path = tmp_path / "events.jsonl"
+    lines = [
+        json.dumps({"type": "noise", "index": index})
+        for index in range(agentgrep._JSONL_YIELD_LINE_INTERVAL + 1)
+    ]
+    path.write_text("\n".join(lines), encoding="utf-8")
+    sleep_calls: list[float] = []
+    monkeypatch.setattr(agentgrep.time, "sleep", sleep_calls.append)
+
+    parsed = list(agentgrep.iter_jsonl(path))
+
+    assert len(parsed) == agentgrep._JSONL_YIELD_LINE_INTERVAL + 1
+    assert sleep_calls == [0]
+
+
+def test_parse_codex_session_skips_function_call_output_before_json_decode(
+    tmp_path: pathlib.Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Codex tool-output lines cannot become prompt records and stay unparsed."""
+    agentgrep = t.cast("t.Any", load_agentgrep_module())
+    path = tmp_path / "session.jsonl"
+    tool_output_line = json.dumps(
+        {
+            "timestamp": "2026-01-01T00:00:00Z",
+            "type": "response_item",
+            "payload": {
+                "type": "function_call_output",
+                "call_id": "call_1",
+                "output": "bliss" + ("x" * agentgrep._CODEX_RAW_SKIP_MIN_BYTES),
+            },
+        },
+    )
+    message_line = json.dumps(
+        {
+            "timestamp": "2026-01-01T00:00:01Z",
+            "type": "response_item",
+            "payload": {"role": "user", "content": "bliss prompt"},
+        },
+    )
+    path.write_text(f"{tool_output_line}\n{message_line}\n", encoding="utf-8")
+    source = agentgrep.SourceHandle(
+        agent="codex",
+        store="codex.sessions",
+        adapter_id="codex.sessions_jsonl.v1",
+        path=path,
+        path_kind="session_file",
+        source_kind="jsonl",
+        search_root=None,
+        mtime_ns=1,
+    )
+    decoded_payloads: list[str] = []
+    original_loads = agentgrep.json.loads
+
+    def tracking_loads(payload: str) -> object:
+        decoded_payloads.append(payload)
+        return original_loads(payload)
+
+    monkeypatch.setattr(agentgrep.json, "loads", tracking_loads)
+
+    records = list(agentgrep.parse_codex_session_file(source))
+
+    assert [record.text for record in records] == ["bliss prompt"]
+    assert decoded_payloads == [message_line]
+
+
 def test_streaming_search_progress_buffers_and_flushes_records(
     tmp_path: pathlib.Path,
     monkeypatch: pytest.MonkeyPatch,
@@ -731,6 +885,7 @@ def test_streaming_search_progress_translates_progress_callbacks(
     progress.sources_discovered(10)
     progress.sources_planned(7, 10)
     progress.source_started(1, 7, source)
+    progress.source_progress(1, 7, source, records=128, matches=3)
     progress.source_finished(1, 7, source, records=5, matches=2)
     progress.result_added(2)
     progress.finish(2)
@@ -738,7 +893,7 @@ def test_streaming_search_progress_translates_progress_callbacks(
     snapshots = [e for e in emitted if isinstance(e, agentgrep.ProgressSnapshot)]
     finished = [e for e in emitted if isinstance(e, agentgrep.StreamingSearchFinished)]
 
-    assert len(snapshots) == 5
+    assert len(snapshots) == 6
     assert snapshots[0].phase == "discovering"
     assert snapshots[0].query_label == "bliss"
     assert snapshots[1].phase == "discovered"
@@ -751,8 +906,10 @@ def test_streaming_search_progress_translates_progress_callbacks(
     assert snapshots[3].total == 7
     assert snapshots[3].detail == "session.jsonl"
     assert snapshots[4].phase == "scanning"
-    assert snapshots[4].detail is not None
-    assert "matches" in snapshots[4].detail
+    assert snapshots[4].detail == "128 records, 3 source matches"
+    assert snapshots[5].phase == "scanning"
+    assert snapshots[5].detail is not None
+    assert "matches" in snapshots[5].detail
 
     assert len(finished) == 1
     assert finished[0].outcome == "complete"
@@ -3308,6 +3465,70 @@ def test_progress_force_color_enables_auto_for_non_tty(
     assert "Searching bliss" in strip_ansi(out)
 
 
+class ProgressLineCase(t.NamedTuple):
+    """Formatting case for single-line search progress."""
+
+    test_id: str
+    snapshot: object
+    expected: str
+
+
+def _progress_line_cases() -> tuple[ProgressLineCase, ...]:
+    """Build progress-line cases after importing the runtime module."""
+    agentgrep = t.cast("t.Any", load_agentgrep_module())
+    return (
+        ProgressLineCase(
+            test_id="source-count-with-detail",
+            snapshot=agentgrep.ProgressSnapshot(
+                query_label="bliss",
+                phase="scanning",
+                current=5,
+                total=9,
+                detail="128 records, 3 source matches",
+                matches=10,
+                elapsed=1.5,
+            ),
+            expected=(
+                "Searching bliss | scanning 5/9 sources | "
+                "128 records, 3 source matches | 10 matches | 1.5s"
+            ),
+        ),
+        ProgressLineCase(
+            test_id="detail-without-source-count",
+            snapshot=agentgrep.ProgressSnapshot(
+                query_label="bliss",
+                phase="prefiltering",
+                current=None,
+                total=None,
+                detail="~/.codex/sessions/",
+                matches=0,
+                elapsed=0.5,
+            ),
+            expected="Searching bliss | prefiltering ~/.codex/sessions/ | 0 matches | 0.5s",
+        ),
+    )
+
+
+_PROGRESS_LINE_CASES = _progress_line_cases()
+
+
+@pytest.mark.parametrize(
+    "case",
+    _PROGRESS_LINE_CASES,
+    ids=[c.test_id for c in _PROGRESS_LINE_CASES],
+)
+def test_format_search_progress_line_includes_detail(case: ProgressLineCase) -> None:
+    """Current source detail stays visible alongside source counters."""
+    agentgrep = t.cast("t.Any", load_agentgrep_module())
+
+    line = agentgrep.format_search_progress_line(
+        case.snapshot,
+        colors=agentgrep.AnsiColors.for_stream("never", io.StringIO()),
+    )
+
+    assert line == case.expected
+
+
 def test_non_tty_progress_emits_start_heartbeat_and_finish() -> None:
     agentgrep = t.cast("t.Any", load_agentgrep_module())
     stream = io.StringIO()
@@ -3526,9 +3747,9 @@ def test_tty_progress_interrupt_preserves_current_summary(
     progress.interrupt()
 
     out = stream.getvalue()
-    assert "Searching bliss | scanning 118/126 sources | 109 matches" in out
+    assert "Searching bliss | scanning 118/126 sources | rollout.jsonl | 109 matches" in out
     assert out.endswith("\n")
-    assert "\r\x1b[2KSearching bliss | scanning 118/126 sources | 109 matches" in out
+    assert "\r\x1b[2KSearching bliss | scanning 118/126 sources | rollout.jsonl" in out
 
 
 def test_tty_progress_prefilter_uses_private_directory_path(
@@ -3593,7 +3814,7 @@ def test_non_tty_progress_interrupt_emits_current_summary() -> None:
 
     out = stream.getvalue()
     assert "Searching bliss\n" in out
-    assert "Searching bliss | scanning 118/126 sources | 109 matches" in out
+    assert "Searching bliss | scanning 118/126 sources | rollout.jsonl | 109 matches" in out
 
 
 def test_main_handles_keyboard_interrupt_without_traceback(

From ef3a8b0fc41d03936933491d07f33c843507cd76 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 14:57:56 -0500
Subject: [PATCH 10/23] agentgrep(fix[progress]): Keep TTY progress to one row

why: Showing in-source progress made the live TTY status line long enough to wrap on narrow terminals. The renderer only clears one terminal row with carriage-return plus clear-line, so wrapped renders leave stale rows behind and look like a flood.
what:
- Make TTY progress rendering terminal-width aware, dropping optional detail and the answer-now hint before ANSI-safe truncation.
- Add a regression test for narrow terminal rendering.
- Preserve full detail formatting for callers without a width constraint.
---
 src/agentgrep/__init__.py | 86 ++++++++++++++++++++++++++++++++++++---
 tests/test_agentgrep.py   | 46 +++++++++++++++++++++
 2 files changed, 126 insertions(+), 6 deletions(-)

diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py
index 3d336cf..2e27470 100644
--- a/src/agentgrep/__init__.py
+++ b/src/agentgrep/__init__.py
@@ -124,6 +124,7 @@
         "--ui",
     },
 )
+ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
 
 
 def build_description(
@@ -391,6 +392,39 @@ def _hard_truncate(text: str, max_width: int) -> str:
     return text[: max_width - 1] + "…"
 
 
+def _visible_width(text: str) -> int:
+    """Return display width after stripping ANSI CSI escape sequences."""
+    return len(ANSI_CSI_RE.sub("", text))
+
+
+def _hard_truncate_ansi(text: str, max_width: int) -> str:
+    """Truncate ANSI-colored text to ``max_width`` visible cells."""
+    if max_width <= 0:
+        return ""
+    if _visible_width(text) <= max_width:
+        return text
+    if max_width == 1:
+        return "…"
+    output: list[str] = []
+    visible = 0
+    index = 0
+    saw_escape = False
+    while index < len(text) and visible < max_width - 1:
+        match = ANSI_CSI_RE.match(text, index)
+        if match is not None:
+            output.append(match.group(0))
+            index = match.end()
+            saw_escape = True
+            continue
+        output.append(text[index])
+        visible += 1
+        index += 1
+    output.append("…")
+    if saw_escape:
+        output.append(AnsiColors.RESET)
+    return "".join(output)
+
+
 def truncate_lines(text: str, max_lines: int) -> str:
     """Return the first ``max_lines`` lines of ``text``, with an overflow marker.
 
@@ -1589,8 +1623,10 @@ def _tty_loop(self) -> None:
             self._stop_event.wait(self._refresh_interval)
 
     def _render_tty(self, frame: str) -> None:
-        summary = self._summary()
-        line = f"{self._colors.info(frame)} {summary}"
+        frame_text = self._colors.info(frame)
+        summary_width = max(1, self._terminal_width() - _visible_width(frame_text) - 1)
+        summary = self._summary(max_width=summary_width)
+        line = f"{frame_text} {summary}"
         with self._lock:
             try:
                 self._stream.write("\r\033[2K" + line)
@@ -1611,7 +1647,7 @@ def _clear_tty_line(self) -> None:
             self._last_line_len = 0
 
     def _write_tty_summary_line(self) -> None:
-        line = self._summary()
+        line = self._summary(max_width=self._terminal_width())
         self._write_tty_line(line)
 
     def _write_tty_line(self, line: str) -> None:
@@ -1648,13 +1684,20 @@ def _emit_line(self, line: str) -> None:
         except OSError, ValueError:
             pass
 
-    def _summary(self) -> str:
+    def _summary(self, *, max_width: int | None = None) -> str:
         return format_search_progress_line(
             self._snapshot(),
             colors=self._colors,
             answer_now_hint=self._answer_now_hint,
+            max_width=max_width,
         )
 
+    def _terminal_width(self) -> int:
+        try:
+            return max(1, os.get_terminal_size(self._stream.fileno()).columns)
+        except AttributeError, OSError, TypeError, ValueError:
+            return max(1, shutil.get_terminal_size(fallback=(80, 24)).columns)
+
     def _snapshot(self) -> ProgressSnapshot:
         elapsed = self._elapsed_seconds()
         with self._lock:
@@ -1743,6 +1786,7 @@ def format_search_progress_line(
     *,
     colors: SearchColors,
     answer_now_hint: bool = False,
+    max_width: int | None = None,
 ) -> str:
     """Format the single-line progress summary used by both the CLI and the TUI.
 
@@ -1754,6 +1798,9 @@ def format_search_progress_line(
         An :class:`AnsiColors` instance (used by the CLI chrome).
     answer_now_hint : bool, default False
         When ``True``, append the ``[Press enter, answer now]`` reminder.
+    max_width : int or None, default None
+        Maximum visible terminal cells for the returned line. When set, the
+        formatter drops optional detail and hint segments before truncating.
 
     Returns
     -------
@@ -1761,12 +1808,39 @@ def format_search_progress_line(
         ``"Searching <q> | <phase> N/M sources | K matches | T.Ts"`` with
         each segment styled through ``colors``.
     """
+    variants = (
+        (True, answer_now_hint),
+        (False, answer_now_hint),
+        (False, False),
+    )
+    for include_detail, include_hint in variants:
+        line = _format_search_progress_line(
+            snapshot,
+            colors=colors,
+            answer_now_hint=include_hint,
+            include_detail=include_detail,
+        )
+        if max_width is None or _visible_width(line) <= max_width:
+            return line
+    if max_width is None:
+        return line
+    return _hard_truncate_ansi(line, max_width)
+
+
+def _format_search_progress_line(
+    snapshot: ProgressSnapshot,
+    *,
+    colors: SearchColors,
+    answer_now_hint: bool,
+    include_detail: bool,
+) -> str:
+    """Build one progress-line variant."""
     label_part = f"{colors.heading('Searching')} {colors.highlight(snapshot.query_label)}"
-    detail_part = colors.muted(snapshot.detail) if snapshot.detail else None
+    detail_part = colors.muted(snapshot.detail) if include_detail and snapshot.detail else None
     if snapshot.current is not None and snapshot.total is not None:
         count = colors.warning(f"{snapshot.current}/{snapshot.total}")
         status_part = f"{colors.heading(snapshot.phase)} {count} {colors.muted('sources')}"
-    elif snapshot.detail:
+    elif include_detail and snapshot.detail:
         status_part = f"{colors.heading(snapshot.phase)} {colors.muted(snapshot.detail)}"
         detail_part = None
     else:
diff --git a/tests/test_agentgrep.py b/tests/test_agentgrep.py
index d12d2c4..125b261 100644
--- a/tests/test_agentgrep.py
+++ b/tests/test_agentgrep.py
@@ -3688,6 +3688,52 @@ def test_tty_progress_renders_answer_now_hint() -> None:
     assert out.endswith("\n")
 
 
+def test_tty_progress_render_fits_terminal_width(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """TTY progress renders must not wrap into uncleared terminal rows."""
+    agentgrep = t.cast("t.Any", load_agentgrep_module())
+    stream = io.StringIO()
+    columns = 72
+    monkeypatch.setattr(
+        agentgrep.shutil,
+        "get_terminal_size",
+        lambda fallback: os.terminal_size((columns, 24)),
+    )
+    progress = agentgrep.ConsoleSearchProgress(
+        enabled=True,
+        stream=stream,
+        tty=True,
+        color_mode="never",
+        refresh_interval=100.0,
+        answer_now_hint=True,
+    )
+    query = agentgrep.SearchQuery(
+        terms=("libtmux",),
+        search_type="prompts",
+        any_term=False,
+        regex=False,
+        case_sensitive=False,
+        agents=("codex",),
+        limit=None,
+    )
+
+    progress.start(query)
+    progress._stop_tty_thread()
+    progress.set_status(
+        "scanning",
+        current=8,
+        total=3807,
+        detail="128 records, 0 source matches",
+    )
+    progress.result_added(76)
+    progress._render_tty("⠋")
+
+    rendered = stream.getvalue().split("\r\033[2K")[-1]
+    assert "\n" not in rendered
+    assert len(strip_ansi(rendered)) <= columns
+
+
 def test_tty_progress_answer_now_hint_is_white(monkeypatch: pytest.MonkeyPatch) -> None:
     agentgrep = t.cast("t.Any", load_agentgrep_module())
     stream = io.StringIO()

From 508c403eec053825ae48bab4429e72dacf8980dc Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 15:31:22 -0500
Subject: [PATCH 11/23] agentgrep(fix[search]): Validate regex and type
 predicates

why: The search CLI accepted malformed regex terms until matching reached Python's regex engine, producing a traceback after scanning started. Query-language type predicates also kept the default prompt-only coarse search filter, so history records were discarded before the compiled predicate could evaluate.
what:
- Validate `search --regex` terms at parse time with argparse-shaped errors.
- Track compiled query fields so `type:` predicates broaden the coarse search filter when `--type` was not explicit.
- Treat explicit default `--type` values as flag/field collisions across search, grep, and find.
- Add regression coverage for invalid search regexes, type predicate routing, and explicit default collisions.
---
 src/agentgrep/cli/parser.py | 70 +++++++++++++++++++++----------
 tests/test_cli_search.py    | 82 +++++++++++++++++++++++++++++++++++++
 tests/test_query_engine.py  | 30 ++++++++++++++
 3 files changed, 161 insertions(+), 21 deletions(-)

diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py
index 0e63536..abf2a03 100644
--- a/src/agentgrep/cli/parser.py
+++ b/src/agentgrep/cli/parser.py
@@ -395,7 +395,6 @@ def create_parser(
     _ = grep_parser.add_argument(
         "--type",
         choices=["prompts", "history", "all"],
-        default="prompts",
         dest="search_type",
         help="Record type to search (default: prompts)",
     )
@@ -474,7 +473,6 @@ def create_parser(
         "--type",
         dest="find_type",
         choices=["prompts", "history", "sessions", "all"],
-        default="all",
         help="Restrict to a record kind (default: all)",
     )
     _ = find_parser.add_argument(
@@ -682,7 +680,6 @@ def create_parser(
     _ = search_parser.add_argument(
         "--type",
         choices=["prompts", "history", "all"],
-        default="prompts",
         dest="search_type",
         help="Record type to search (default: prompts)",
     )
@@ -766,7 +763,7 @@ def _search_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]:
     flags: dict[str, str] = {}
     if t.cast("list[str]", namespace.agent):
         flags["agent"] = "--agent"
-    if t.cast("str", namespace.search_type) != "prompts":
+    if t.cast("str | None", namespace.search_type) is not None:
         flags["type"] = "--type"
     return flags
 
@@ -776,7 +773,7 @@ def _grep_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]:
     flags: dict[str, str] = {}
     if t.cast("list[str]", namespace.agent):
         flags["agent"] = "--agent"
-    if t.cast("str", namespace.search_type) != "prompts":
+    if t.cast("str | None", namespace.search_type) is not None:
         flags["type"] = "--type"
     return flags
 
@@ -786,11 +783,25 @@ def _find_explicit_flags(namespace: argparse.Namespace) -> dict[str, str]:
     flags: dict[str, str] = {}
     if t.cast("list[str]", namespace.agent):
         flags["agent"] = "--agent"
-    if t.cast("str", namespace.find_type) != "all":
+    if t.cast("str | None", namespace.find_type) is not None:
         flags["type"] = "--type"
     return flags
 
 
+def _effective_search_type(
+    namespace: argparse.Namespace,
+    *,
+    query_fields: set[str],
+) -> SearchType:
+    """Return the coarse search type after query-language reconciliation."""
+    explicit = t.cast("SearchType | None", namespace.search_type)
+    if explicit is not None:
+        return explicit
+    if "type" in query_fields:
+        return "all"
+    return "prompts"
+
+
 def _maybe_compile_query(
     positionals: cabc.Sequence[str],
     *,
@@ -798,14 +809,15 @@ def _maybe_compile_query(
     color_mode: ColorMode,
     subparser: argparse.ArgumentParser,
     explicit_flags: dict[str, str] | None = None,
-) -> tuple[CompiledQuery | None, tuple[str, ...]]:
+) -> tuple[CompiledQuery | None, tuple[str, ...], set[str]]:
     """Detect Lucene-style query syntax in positionals and compile if present.
 
-    Returns ``(compiled, residual_terms)`` — ``compiled`` is ``None`` when
-    no positional contains ``:`` (legacy fast path); ``residual_terms``
+    Returns ``(compiled, residual_terms, fields)`` — ``compiled`` is ``None``
+    when no positional contains ``:`` (legacy fast path); ``residual_terms``
     is the tuple to feed back as the legacy ``terms`` / ``patterns`` /
     ``pattern`` field so the engine's existing text-matching path
-    still has the user's text query.
+    still has the user's text query. ``fields`` is populated only for
+    query-language input so callers can reconcile equivalent CLI flags.
 
     ``explicit_flags`` maps field name → flag name. When a field also
     has an explicitly-set flag (e.g. ``--agent`` set AND ``agent:``
@@ -817,7 +829,7 @@ def _maybe_compile_query(
     traceback.
     """
     if not any(":" in token for token in positionals):
-        return None, tuple(positionals)
+        return None, tuple(positionals), set()
     from agentgrep.query import (
         QueryCompileError,
         QueryParseError,
@@ -834,8 +846,8 @@ def _maybe_compile_query(
     except QueryParseError as exc:
         with configured_color_environment(color_mode):
             subparser.error(f"invalid query: {exc}")
+    used_fields = fields_in_ast(ast)
     if explicit_flags:
-        used_fields = fields_in_ast(ast)
         for field_name, flag_name in explicit_flags.items():
             if field_name in used_fields:
                 with configured_color_environment(color_mode):
@@ -849,7 +861,7 @@ def _maybe_compile_query(
         with configured_color_environment(color_mode):
             subparser.error(f"invalid query: {exc}")
     _ = bundle  # kept available for future per-bundle checks
-    return compiled, compiled.text_terms
+    return compiled, compiled.text_terms, used_fields
 
 
 def _check_for_mangled_field_predicate(
@@ -967,7 +979,7 @@ def parse_args(
 
     raw_pattern = t.cast("str | None", namespace.pattern)
     find_positionals = [raw_pattern] if raw_pattern is not None else []
-    find_compiled, find_residual = _maybe_compile_query(
+    find_compiled, find_residual, _find_query_fields = _maybe_compile_query(
         find_positionals,
         bundle=bundle,
         color_mode=color_mode,
@@ -1006,7 +1018,7 @@ def parse_args(
         output_mode=output_mode,
         color_mode=color_mode,
         pattern_mode=pattern_mode,
-        type_filter=t.cast("FindTypeFilter", namespace.find_type),
+        type_filter=t.cast("FindTypeFilter", namespace.find_type or "all"),
         extensions=tuple(t.cast("list[str]", namespace.find_extensions)),
         case_mode=find_case_mode,
         list_details=t.cast("bool", namespace.list_details),
@@ -1048,7 +1060,7 @@ def _build_grep_args(
         pattern_mode = "regex"
 
     patterns_list_raw = t.cast("list[str]", namespace.patterns)
-    grep_compiled, residual_patterns = _maybe_compile_query(
+    grep_compiled, residual_patterns, grep_query_fields = _maybe_compile_query(
         patterns_list_raw,
         bundle=bundle,
         color_mode=color_mode,
@@ -1109,7 +1121,10 @@ def _build_grep_args(
     return GrepArgs(
         patterns=tuple(patterns_list),
         agents=agents,
-        search_type=t.cast("SearchType", namespace.search_type),
+        search_type=_effective_search_type(
+            namespace,
+            query_fields=grep_query_fields,
+        ),
         case_mode=case_mode,
         pattern_mode=pattern_mode,
         invert_match=invert_match,
@@ -1157,7 +1172,7 @@ def _build_search_args(
                 "--threshold has no effect with --no-rank (ranking is disabled)",
             )
 
-    search_compiled, residual_terms = _maybe_compile_query(
+    search_compiled, residual_terms, search_query_fields = _maybe_compile_query(
         terms_list,
         bundle=bundle,
         color_mode=color_mode,
@@ -1167,14 +1182,27 @@ def _build_search_args(
     final_terms: tuple[str, ...] = (
         residual_terms if search_compiled is not None else tuple(terms_list)
     )
+    regex = t.cast("bool", namespace.regex)
+    case_sensitive = t.cast("bool", namespace.case_sensitive)
+    if regex:
+        flags = 0 if case_sensitive else re.IGNORECASE
+        for term in final_terms:
+            try:
+                _ = re.compile(term, flags)
+            except re.error as exc:
+                with configured_color_environment(color_mode):
+                    bundle.search_parser.error(f"invalid regex {term!r}: {exc}")
 
     return SearchArgs(
         terms=final_terms,
         agents=agents,
-        search_type=t.cast("SearchType", namespace.search_type),
+        search_type=_effective_search_type(
+            namespace,
+            query_fields=search_query_fields,
+        ),
         any_term=t.cast("bool", namespace.any_term),
-        regex=t.cast("bool", namespace.regex),
-        case_sensitive=t.cast("bool", namespace.case_sensitive),
+        regex=regex,
+        case_sensitive=case_sensitive,
         limit=limit,
         output_mode=output_mode,
         color_mode=color_mode,
diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index bc8805c..ba886fa 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -250,6 +250,88 @@ def test_search_parse_agent_filter() -> None:
     assert parsed.agents == ("codex",)
 
 
+class SearchInvalidRegexCase(t.NamedTuple):
+    """Parametrized case for ``search --regex`` validation."""
+
+    test_id: str
+    pattern: str
+    expected_msg_fragment: str
+
+
+SEARCH_INVALID_REGEX_CASES: tuple[SearchInvalidRegexCase, ...] = (
+    SearchInvalidRegexCase(
+        test_id="unterminated-charset",
+        pattern="[",
+        expected_msg_fragment="unterminated character set",
+    ),
+    SearchInvalidRegexCase(
+        test_id="unclosed-paren",
+        pattern="(unclosed",
+        expected_msg_fragment="unterminated subpattern",
+    ),
+    SearchInvalidRegexCase(
+        test_id="bad-backref",
+        pattern=r"\1",
+        expected_msg_fragment="invalid group reference",
+    ),
+)
+
+
+@pytest.mark.parametrize(
+    "case",
+    SEARCH_INVALID_REGEX_CASES,
+    ids=[case.test_id for case in SEARCH_INVALID_REGEX_CASES],
+)
+def test_search_invalid_regex_exits_with_clean_error(
+    case: SearchInvalidRegexCase,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """``agentgrep search --regex <bad-regex>`` exits before scanning."""
+    with pytest.raises(SystemExit) as exc_info:
+        _ = agentgrep.parse_args(("search", "--regex", case.pattern))
+    assert exc_info.value.code == 2
+    captured = capsys.readouterr()
+    assert "invalid regex" in captured.err
+    assert case.expected_msg_fragment in captured.err
+    assert "Traceback" not in captured.err
+
+
+def test_search_type_field_broadens_coarse_search_type() -> None:
+    """A query-language ``type:`` predicate controls record-kind filtering."""
+    parsed = agentgrep.parse_args(("search", "type:history", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.search_type == "all"
+    assert parsed.terms == ("bliss",)
+    assert parsed.compiled is not None
+
+
+def test_search_type_field_history_record_reaches_compiled_predicate() -> None:
+    """``type:history`` must not be pre-filtered by the default prompts scope."""
+    parsed = agentgrep.parse_args(("search", "type:history", "bliss"))
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    record = agentgrep.SearchRecord(
+        kind="history",
+        agent="codex",
+        store="history",
+        adapter_id="codex.history_json.v1",
+        path=pathlib.Path("/tmp/history.json"),
+        text="bliss command",
+    )
+    query = agentgrep.SearchQuery(
+        terms=parsed.terms,
+        search_type=parsed.search_type,
+        any_term=parsed.any_term,
+        regex=parsed.regex,
+        case_sensitive=parsed.case_sensitive,
+        agents=parsed.agents,
+        limit=parsed.limit,
+        compiled=parsed.compiled,
+    )
+
+    assert query.search_type == "all"
+    assert agentgrep.matches_record(record, query)
+
+
 # ---------------------------------------------------------------------------
 # Integration tests
 # ---------------------------------------------------------------------------
diff --git a/tests/test_query_engine.py b/tests/test_query_engine.py
index 7221979..e7e4594 100644
--- a/tests/test_query_engine.py
+++ b/tests/test_query_engine.py
@@ -653,6 +653,16 @@ class QueryPassesThroughCase(t.NamedTuple):
         argv=("find", "agent:codex"),
         expect_compiled=True,
     ),
+    QueryPassesThroughCase(
+        test_id="search-bare-term-legacy-path",
+        argv=("search", "bliss"),
+        expect_compiled=False,
+    ),
+    QueryPassesThroughCase(
+        test_id="search-field-syntax-compiled",
+        argv=("search", "agent:codex", "bliss"),
+        expect_compiled=True,
+    ),
 )
 
 
@@ -799,6 +809,26 @@ class FlagFieldCollisionCase(t.NamedTuple):
         argv=("grep", "--type", "history", "type:prompts", "bliss"),
         expected_message_fragment="cannot combine --type flag with type: field",
     ),
+    FlagFieldCollisionCase(
+        test_id="grep-default-type-flag-and-field",
+        argv=("grep", "--type", "prompts", "type:history", "bliss"),
+        expected_message_fragment="cannot combine --type flag with type: field",
+    ),
+    FlagFieldCollisionCase(
+        test_id="search-type-flag-and-field",
+        argv=("search", "--type", "history", "type:prompts", "bliss"),
+        expected_message_fragment="cannot combine --type flag with type: field",
+    ),
+    FlagFieldCollisionCase(
+        test_id="search-default-type-flag-and-field",
+        argv=("search", "--type", "prompts", "type:history", "bliss"),
+        expected_message_fragment="cannot combine --type flag with type: field",
+    ),
+    FlagFieldCollisionCase(
+        test_id="find-default-type-flag-and-field",
+        argv=("find", "--type", "all", "type:history"),
+        expected_message_fragment="cannot combine --type flag with type: field",
+    ),
 )
 
 

From 457128d3ea66543c142cba71d6705f100a2f10e4 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 15:53:04 -0500
Subject: [PATCH 12/23] agentgrep(fix[parser]): Use correct subparser for
 --limit/--max-count errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: Validation errors for --limit and --max-count called the
root parser's .error(), showing `usage: agentgrep [-h] ...`
instead of the subcommand's usage hint.

what:
- find --limit: bundle.parser → bundle.find_parser
- search --limit: bundle.parser → bundle.search_parser
- grep --max-count: bundle.parser → bundle.grep_parser
---
 src/agentgrep/cli/parser.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/agentgrep/cli/parser.py b/src/agentgrep/cli/parser.py
index abf2a03..ed41975 100644
--- a/src/agentgrep/cli/parser.py
+++ b/src/agentgrep/cli/parser.py
@@ -975,7 +975,7 @@ def parse_args(
     limit = t.cast("int | None", namespace.limit)
     if limit is not None and limit < 1:
         with configured_color_environment(color_mode):
-            bundle.parser.error("--limit must be greater than 0")
+            bundle.find_parser.error("--limit must be greater than 0")
 
     raw_pattern = t.cast("str | None", namespace.pattern)
     find_positionals = [raw_pattern] if raw_pattern is not None else []
@@ -1043,7 +1043,7 @@ def _build_grep_args(
     max_count = t.cast("int | None", namespace.max_count)
     if max_count is not None and max_count < 1:
         with configured_color_environment(color_mode):
-            bundle.parser.error("--max-count must be greater than 0")
+            bundle.grep_parser.error("--max-count must be greater than 0")
 
     if t.cast("bool", namespace.ignore_case):
         case_mode: CaseMode = "ignore"
@@ -1160,7 +1160,7 @@ def _build_search_args(
     limit = t.cast("int | None", namespace.limit)
     if limit is not None and limit < 1:
         with configured_color_environment(color_mode):
-            bundle.parser.error("--limit must be greater than 0")
+            bundle.search_parser.error("--limit must be greater than 0")
     threshold = t.cast("int", namespace.threshold)
     if threshold < 0 or threshold > 100:
         with configured_color_environment(color_mode):

From e7bdf9074ac3cbe30730815a42d2a00eda629ce4 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 15:54:23 -0500
Subject: [PATCH 13/23] agentgrep(refactor[jsonl]): Remove dead skip_line check
 in _iter_jsonl

why: The early return at the top of _iter_jsonl dispatches to
_iter_jsonl_with_raw_skip when skip_line is set, making the
inline `if skip_line is not None` check unreachable.

what:
- Remove the dead branch from the text-mode iteration path
---
 src/agentgrep/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/agentgrep/__init__.py b/src/agentgrep/__init__.py
index 2e27470..9468bd7 100644
--- a/src/agentgrep/__init__.py
+++ b/src/agentgrep/__init__.py
@@ -3485,8 +3485,6 @@ def _iter_jsonl(
                 decoded_lines += 1
                 if decoded_lines % _JSONL_YIELD_LINE_INTERVAL == 0:
                     time.sleep(0)
-                if skip_line is not None and skip_line(stripped):
-                    continue
                 try:
                     parsed = t.cast("object", json.loads(stripped))
                 except json.JSONDecodeError:

From 3b25c598e34598ae8f1acc3377a25044ad4eefac Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 15:58:12 -0500
Subject: [PATCH 14/23] agentgrep(fix[ranking]): Remove size guard, decouple
 collapse from --no-rank
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: collapse_near_duplicates silently turned itself off at 500
records, and --no-rank silently skipped dedup. Both hacks avoided
the O(n²) cost instead of letting the C-accelerated WRatio calls
do their job. Ranking and dedup are independent features — a user
who wants discovery-order results should still get dedup.

what:
- Remove the 500-record size guard from collapse_near_duplicates
- Always run collapse_near_duplicates regardless of --no-rank
- Fix docstring: "above" → "at or above" for >= threshold
---
 src/agentgrep/cli/render.py | 8 ++++----
 src/agentgrep/ranking.py    | 8 +++++---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index eb42527..04ecf67 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -492,13 +492,13 @@ def run_search_command(args: SearchArgs) -> int:
     query_text = " ".join(args.terms)
     if args.no_rank:
         scored: list[tuple[agentgrep.SearchRecord, float]] = [(r, 0.0) for r in records]
-        collapsed: list[tuple[agentgrep.SearchRecord, float, int]] = [(r, 0.0, 0) for r in records]
     else:
-        from agentgrep.ranking import collapse_near_duplicates, rank_search_records
+        from agentgrep.ranking import rank_search_records
 
         scored = rank_search_records(records, query_text, threshold=args.threshold)
-        collapsed = collapse_near_duplicates(scored)
-    from agentgrep.ranking import group_by_session
+    from agentgrep.ranking import collapse_near_duplicates, group_by_session
+
+    collapsed = collapse_near_duplicates(scored)
 
     if args.limit is not None:
         collapsed = collapsed[: args.limit]
diff --git a/src/agentgrep/ranking.py b/src/agentgrep/ranking.py
index 32eeec7..da42ad5 100644
--- a/src/agentgrep/ranking.py
+++ b/src/agentgrep/ranking.py
@@ -67,12 +67,16 @@ def collapse_near_duplicates(
 ) -> list[tuple[SearchRecord, float, int]]:
     """Collapse near-duplicate records, keeping highest-scored representative.
 
+    Pairwise ``WRatio`` comparison between record texts (each call is
+    C-accelerated by rapidfuzz). Records at or above the similarity
+    threshold are folded into the highest-scoring representative.
+
     Parameters
     ----------
     scored : list[tuple[SearchRecord, float]]
         Pre-sorted ``(record, score)`` pairs (best-first).
     similarity_threshold : float
-        WRatio ceiling — record pairs scoring above this are
+        WRatio ceiling — record pairs scoring at or above this are
         considered near-duplicates.
 
     Returns
@@ -85,8 +89,6 @@ def collapse_near_duplicates(
 
     if not scored:
         return []
-    if len(scored) > 500:
-        return [(r, s, 0) for r, s in scored]
     result: list[tuple[SearchRecord, float, int]] = []
     consumed: set[int] = set()
     for i, (record_i, score_i) in enumerate(scored):

From 438047d282fa9af24761ac2e0de4a295708505c7 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 15:59:45 -0500
Subject: [PATCH 15/23] agentgrep(docs[render]): Accurate run_search_command
 docstring

why: Docstring described scoring/collapse/grouping as unconditional
but --no-rank skips scoring and --no-group skips grouping.

what:
- Note --no-rank and --no-group bypass paths in the docstring
---
 src/agentgrep/cli/render.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index 04ecf67..334113b 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -430,10 +430,11 @@ def run_ui_command(args: UIArgs) -> int:
 def run_search_command(args: SearchArgs) -> int:
     """Execute ``agentgrep search`` with ranking and grouping.
 
-    Collects all matching records eagerly, scores them by rapidfuzz
-    relevance, collapses near-duplicates, groups by session, and
-    renders in the requested output format. Returns ``0`` when at
-    least one result survives ranking, ``1`` otherwise.
+    Collects all matching records eagerly, then applies a three-stage
+    pipeline: score by rapidfuzz relevance (skipped with ``--no-rank``),
+    collapse near-duplicates, and group by session (skipped with
+    ``--no-group``). Returns ``0`` when at least one result survives,
+    ``1`` otherwise.
     """
     if not args.terms and args.output_mode != "ui":
         msg = "search requires at least one term unless --ui is used"

From d970c05942255cab39b12ac8848c455a1a688663 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 16:32:00 -0500
Subject: [PATCH 16/23] =?UTF-8?q?agentgrep(docs[ranking]):=20Fix=20module?=
 =?UTF-8?q?=20docstring=20"above"=20=E2=86=92=20"at=20or=20above"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: Function-level docstring was fixed to match >= semantics
but module docstring still said "above" (implying >).

what:
- Change "records above" to "records at or above" in module
  docstring to match the >= comparison in the implementation
---
 src/agentgrep/ranking.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/agentgrep/ranking.py b/src/agentgrep/ranking.py
index da42ad5..48bf16e 100644
--- a/src/agentgrep/ranking.py
+++ b/src/agentgrep/ranking.py
@@ -6,7 +6,7 @@
 1. :func:`rank_search_records` — score each record against the query
    text with rapidfuzz WRatio, filter by threshold, sort best-first.
 2. :func:`collapse_near_duplicates` — pairwise WRatio between record
-   bodies; records above the similarity ceiling are folded into the
+   bodies; records at or above the similarity ceiling are folded into the
    highest-scoring representative.
 3. :func:`group_by_session` — bucket the surviving records by
    ``session_id``, preserving score order within each group.

From 0e15c13cdeae5d76bac782444f2c9a1591049dc1 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 16:35:27 -0500
Subject: [PATCH 17/23] agentgrep(fix[test]): Replace tautological assertion in
 threshold test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: `assert code in (0, 1)` is always true. The canned records
score 90 against "bliss" so threshold=99 always filters all of
them — code is deterministically 1.

what:
- Assert code == 1 and empty stdout directly
- Remove narration comments
---
 tests/test_cli_search.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index ba886fa..f507aef 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -450,15 +450,11 @@ def test_search_threshold_filters_low_scores(
         "run_search_query",
         lambda *_args, **_kwargs: canned,
     )
-    # Very high threshold should filter most records
     args = _make_search_args(terms=("bliss",), threshold=99)
     code = run_search_command(args)
     captured = capsys.readouterr()
-    # With threshold=99, only near-exact matches survive (or none)
-    # The exit code reflects whether any results remain
-    assert code in (0, 1)
-    if code == 1:
-        assert captured.out.strip() == ""
+    assert code == 1
+    assert captured.out.strip() == ""
 
 
 def test_search_json_includes_scores(

From 227a62851eb9c3df5bcd86f214219f5dfbf5b3a6 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 17:48:53 -0500
Subject: [PATCH 18/23] agentgrep(fix[search]): Allow field-only queries
 without text terms
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: `agentgrep search agent:codex` raised SystemExit even though
a compiled field query existed. The guard only checked for empty
terms, not for a compiled query. Additionally, field-only queries
produce empty query_text which makes WRatio return 0 for
everything — ranking is skipped in that case.

what:
- Check args.compiled before rejecting empty terms
- Skip ranking when query_text is empty (field-only query)
- Add test for field-only query parsing and execution
---
 src/agentgrep/cli/render.py |  5 +++--
 tests/test_cli_search.py    | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index 334113b..7fe4231 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -436,7 +436,7 @@ def run_search_command(args: SearchArgs) -> int:
     ``--no-group``). Returns ``0`` when at least one result survives,
     ``1`` otherwise.
     """
-    if not args.terms and args.output_mode != "ui":
+    if not args.terms and args.compiled is None and args.output_mode != "ui":
         msg = "search requires at least one term unless --ui is used"
         raise SystemExit(msg)
     query = agentgrep.SearchQuery(
@@ -491,7 +491,8 @@ def run_search_command(args: SearchArgs) -> int:
         if listener is not None:
             listener.stop()
     query_text = " ".join(args.terms)
-    if args.no_rank:
+    answered_early = control.answer_now_requested()
+    if args.no_rank or answered_early or not query_text:
         scored: list[tuple[agentgrep.SearchRecord, float]] = [(r, 0.0) for r in records]
     else:
         from agentgrep.ranking import rank_search_records
diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index f507aef..ceff870 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -400,6 +400,26 @@ def test_search_command_no_terms_raises() -> None:
         run_search_command(args)
 
 
+def test_search_field_only_query_allowed(
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[str],
+) -> None:
+    """Field-only queries like agent:codex work without text terms."""
+    parsed = agentgrep.parse_args(("search", "agent:codex"))
+    assert parsed is not None
+    assert isinstance(parsed, agentgrep.SearchArgs)
+    assert parsed.compiled is not None
+    assert parsed.terms == ()
+    canned = _canned_records()
+    monkeypatch.setattr(
+        agentgrep,
+        "run_search_query",
+        lambda *_args, **_kwargs: canned,
+    )
+    code = run_search_command(parsed)
+    assert code == 0
+
+
 def test_search_routes_through_ranking(
     monkeypatch: pytest.MonkeyPatch,
     capsys: pytest.CaptureFixture[str],

From c79b1222f526d47b56fccf61ecc450ab9145f582 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 18:35:50 -0500
Subject: [PATCH 19/23] agentgrep(fix[search]): Skip ranking and collapse on
 answer-now
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: When the user pressed Enter for partial results, the
"Answering now: N matches" message appeared but then the CLI
hung for minutes running rank_search_records (O(n) WRatio calls)
and collapse_near_duplicates (O(n²) pairwise) on potentially
thousands of partial results — defeating the purpose of
answering now.

what:
- Check control.answer_now_requested() after collection returns
- Skip both ranking and collapse when answering early — emit
  records in discovery order with score=0, similar_count=0
- Collapse still runs normally for --no-rank (only answer-now
  bypasses it, preserving the earlier decoupling)

From 8f871cfc6c5b6c0b73c652906b6db28aff64e4cf Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 18:35:57 -0500
Subject: [PATCH 20/23] agentgrep(test[parser]): Cover --threshold + --no-rank
 rejection

why: The parser guard rejecting --threshold with --no-rank had
no test verifying the error fires.

what:
- Add test_search_threshold_with_no_rank_rejected asserting
  SystemExit code 2 and error message mentioning both flags
---
 tests/test_cli_search.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index ceff870..77e1625 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -454,7 +454,6 @@ def test_search_no_rank_preserves_order(
     assert code == 0
     captured = capsys.readouterr()
     lines = captured.out.strip().splitlines()
-    # With no_rank, scores are 0 — all matching records appear
     score_lines = [line for line in lines if line.startswith("0")]
     assert len(score_lines) >= 1
 

From 5fbe9ce5b595cc7683d45b74fe7e77368db0a781 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 19:39:29 -0500
Subject: [PATCH 21/23] =?UTF-8?q?agentgrep(feat[search]):=20Stream=20searc?=
 =?UTF-8?q?h=20results,=20drop=20O(n=C2=B2)=20collapse?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: collapse_near_duplicates ran O(n²) pairwise WRatio on the
full result set (~612M comparisons for 35K records), hanging the
CLI indefinitely. The engine already does exact dedup via
hash-based record_dedupe_key. Both grep and the TUI stream
results without pairwise dedup and work at scale.

what:
- Rewrite run_search_command to stream via iter_search_events,
  scoring each record inline with WRatio as it arrives (O(n))
- Remove collapse_near_duplicates from the pipeline entirely
- Text mode streams with session headers and per-record scores
- JSON/NDJSON stays eager for envelope integrity but skips
  collapse — ranking + grouping only
- Pass args.limit to SearchQuery so the engine caps early
- Apply post-ranking limit in eager path for JSON accuracy
- Update tests: remove similar_count assertions, fix
  monkeypatching for streaming vs eager paths
---
 src/agentgrep/cli/render.py | 117 ++++++++++++++++++++++--------------
 tests/test_cli_search.py    |  11 +---
 2 files changed, 76 insertions(+), 52 deletions(-)

diff --git a/src/agentgrep/cli/render.py b/src/agentgrep/cli/render.py
index 7fe4231..74d59f4 100644
--- a/src/agentgrep/cli/render.py
+++ b/src/agentgrep/cli/render.py
@@ -428,13 +428,13 @@ def run_ui_command(args: UIArgs) -> int:
 
 
 def run_search_command(args: SearchArgs) -> int:
-    """Execute ``agentgrep search`` with ranking and grouping.
+    """Execute ``agentgrep search`` with ranked, pretty output.
 
-    Collects all matching records eagerly, then applies a three-stage
-    pipeline: score by rapidfuzz relevance (skipped with ``--no-rank``),
-    collapse near-duplicates, and group by session (skipped with
-    ``--no-group``). Returns ``0`` when at least one result survives,
-    ``1`` otherwise.
+    Collects all matching records eagerly with a progress spinner,
+    scores them by rapidfuzz partial_ratio (skipped with ``--no-rank``
+    or on answer-now), groups by session (skipped with ``--no-group``),
+    and renders with snippet-first pretty output.  Returns ``0`` when
+    at least one result survives, ``1`` otherwise.
     """
     if not args.terms and args.compiled is None and args.output_mode != "ui":
         msg = "search requires at least one term unless --ui is used"
@@ -446,7 +446,7 @@ def run_search_command(args: SearchArgs) -> int:
         regex=args.regex,
         case_sensitive=args.case_sensitive,
         agents=args.agents,
-        limit=None,
+        limit=args.limit,
         compiled=args.compiled,
     )
     if args.output_mode == "ui":
@@ -457,8 +457,10 @@ def run_search_command(args: SearchArgs) -> int:
             initial_search_text=args.raw_query or None,
         )
         return 0
+    if args.output_mode in ("json", "ndjson"):
+        return _run_search_eager(args, query)
     control = agentgrep.SearchControl()
-    human_output = args.output_mode in {"text", "ui"}
+    human_output = args.output_mode == "text"
     progress_enabled = args.progress_mode == "always" or (
         args.progress_mode == "auto" and human_output
     )
@@ -498,31 +500,37 @@ def run_search_command(args: SearchArgs) -> int:
         from agentgrep.ranking import rank_search_records
 
         scored = rank_search_records(records, query_text, threshold=args.threshold)
-    from agentgrep.ranking import collapse_near_duplicates, group_by_session
+    if args.limit is not None:
+        scored = scored[: args.limit]
+    from agentgrep.ranking import group_by_session
 
-    collapsed = collapse_near_duplicates(scored)
+    grouped = group_by_session([(r, s, 0) for r, s in scored])
+    _print_search_text(grouped, args)
+    return 0 if scored else 1
 
-    if args.limit is not None:
-        collapsed = collapsed[: args.limit]
-    if args.no_group:
-        groups: list[tuple[str | None, list[tuple[agentgrep.SearchRecord, float, int]]]] = [
-            (None, collapsed),
-        ]
-    else:
-        groups = group_by_session(collapsed)
-    if args.output_mode in ("json", "ndjson"):
-        _print_search_json(groups, args)
-        return 0 if collapsed else 1
-    _print_search_text(groups, args)
-    return 0 if collapsed else 1
+
+def _compile_search_patterns(args: SearchArgs) -> list[re.Pattern[str]]:
+    """Compile search terms to regex for snippet highlighting."""
+    flags = 0 if args.case_sensitive else re.IGNORECASE
+    compiled: list[re.Pattern[str]] = []
+    for term in args.terms:
+        if ":" in term:
+            continue
+        source = term if args.regex else re.escape(term)
+        try:
+            compiled.append(re.compile(source, flags))
+        except re.error:
+            continue
+    return compiled
 
 
 def _print_search_text(
     groups: list[tuple[str | None, list[tuple[agentgrep.SearchRecord, float, int]]]],
     args: SearchArgs,
 ) -> None:
-    """Render search results with scores and duplicate counts to stdout."""
+    """Render ranked search results with pretty snippets."""
     colors = agentgrep.AnsiColors.for_stream(args.color_mode, sys.stdout)
+    patterns = _compile_search_patterns(args)
     first_group = True
     for session_id, entries in groups:
         if not first_group:
@@ -530,32 +538,52 @@ def _print_search_text(
         first_group = False
         if session_id is not None and not args.no_group:
             print(colors.heading(f"[session {session_id[:12]}]"))
-        for record, score, similar_count in entries:
-            path = agentgrep.format_display_path(record.path)
-            score_label = colors.warning(f"{score:.0f}")
-            snippet = record.text[:120].replace("\n", " ")
-            similar_label = ""
-            if similar_count > 0:
-                similar_label = colors.muted(f" (+{similar_count} similar)")
-            header = f"  {colors.path(path)}  {colors.muted(record.agent)}"
+        for record, _score, _similar in entries:
+            lines: list[str] = []
+            if record.text:
+                snippet, remaining = extract_search_snippet(record.text, patterns)
+                highlighted = highlight_search_spans(snippet, patterns, colors=colors)
+                lines.append(highlighted)
+                if remaining > 0:
+                    lines.append(colors.dim(f"  ... {remaining} more lines"))
+            provenance_parts: list[str] = [record.agent, record.kind]
             if record.timestamp:
-                header += f"  {colors.muted(record.timestamp)}"
-            print(f"{score_label}  {snippet}{similar_label}")
-            print(header)
+                provenance_parts.append(format_relative_time(record.timestamp))
+            provenance_parts.append(
+                colors.path(agentgrep.format_display_path(record.path)),
+            )
+            lines.append(colors.dim(f"  {' · '.join(provenance_parts)}"))
+            print("\n".join(lines))
+            print()
 
 
-def _print_search_json(
-    groups: list[tuple[str | None, list[tuple[agentgrep.SearchRecord, float, int]]]],
-    args: SearchArgs,
-) -> None:
-    """Render search results as JSON with scores."""
+def _run_search_eager(args: SearchArgs, query: agentgrep.SearchQuery) -> int:
+    """Eager search for JSON/NDJSON output with ranking but no pairwise dedup."""
+    control = agentgrep.SearchControl()
+    records = agentgrep.run_search_query(
+        pathlib.Path.home(),
+        query,
+        progress=agentgrep.noop_search_progress(),
+        control=control,
+    )
+    query_text = " ".join(args.terms)
+    if args.no_rank or not query_text:
+        scored: list[tuple[agentgrep.SearchRecord, float]] = [(r, 0.0) for r in records]
+    else:
+        from agentgrep.ranking import rank_search_records
+
+        scored = rank_search_records(records, query_text, threshold=args.threshold)
+    if args.limit is not None:
+        scored = scored[: args.limit]
+    from agentgrep.ranking import group_by_session
+
+    grouped = group_by_session([(r, s, 0) for r, s in scored])
     serialize_search, _, serialize_envelope = maybe_build_pydantic()
     results: list[dict[str, object]] = []
-    for session_id, entries in groups:
-        for record, score, similar_count in entries:
-            entry = serialize_search(record)
+    for session_id, entries in grouped:
+        for record, score, _similar in entries:
+            entry = dict(serialize_search(record))
             entry["score"] = score
-            entry["similar_count"] = similar_count
             if session_id is not None:
                 entry["group_session_id"] = session_id
             results.append(entry)
@@ -572,6 +600,7 @@ def _print_search_json(
     else:
         for result in results:
             print(json.dumps(result, ensure_ascii=False))
+    return 0 if results else 1
 
 
 def _compile_grep_patterns(args: GrepArgs) -> list[re.Pattern[str]]:
diff --git a/tests/test_cli_search.py b/tests/test_cli_search.py
index 77e1625..26c825f 100644
--- a/tests/test_cli_search.py
+++ b/tests/test_cli_search.py
@@ -453,9 +453,7 @@ def test_search_no_rank_preserves_order(
     code = run_search_command(args)
     assert code == 0
     captured = capsys.readouterr()
-    lines = captured.out.strip().splitlines()
-    score_lines = [line for line in lines if line.startswith("0")]
-    assert len(score_lines) >= 1
+    assert "bliss" in captured.out.lower()
 
 
 def test_search_threshold_filters_low_scores(
@@ -480,7 +478,7 @@ def test_search_json_includes_scores(
     monkeypatch: pytest.MonkeyPatch,
     capsys: pytest.CaptureFixture[str],
 ) -> None:
-    """--json output includes score and similar_count fields."""
+    """--json output includes score fields."""
     canned = _canned_records()
     monkeypatch.setattr(
         agentgrep,
@@ -495,16 +493,14 @@ def test_search_json_includes_scores(
     assert "results" in payload
     for result in payload["results"]:
         assert "score" in result
-        assert "similar_count" in result
         assert isinstance(result["score"], (int, float))
-        assert isinstance(result["similar_count"], int)
 
 
 def test_search_ndjson_includes_scores(
     monkeypatch: pytest.MonkeyPatch,
     capsys: pytest.CaptureFixture[str],
 ) -> None:
-    """--ndjson output includes score and similar_count in each line."""
+    """--ndjson output includes score in each line."""
     canned = _canned_records()
     monkeypatch.setattr(
         agentgrep,
@@ -520,7 +516,6 @@ def test_search_ndjson_includes_scores(
     for line in lines:
         obj = json.loads(line)
         assert "score" in obj
-        assert "similar_count" in obj
 
 
 def test_search_empty_results_returns_1(

From 682cc7e910d12ac37d9494bf89c6c78c9b7c36c5 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 21:56:07 -0500
Subject: [PATCH 22/23] agentgrep(fix[packaging]): Add readme field to project
 metadata

why: Without `readme = "README.md"` in [project], hatchling does not
include the README in package metadata, so the PyPI page is blank.

what:
- Add `readme = "README.md"` to [project] table
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 1ff5a89..9722564 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ classifiers = [
 ]
 
 keywords = ["ai", "codex", "claude", "cursor", "gemini", "mcp", "search", "agent-history"]
+readme = "README.md"
 packages = [
   { include = "*", from = "src" },
 ]

From ad1156304827abf3dab7bed2bacfbe6a26d1ac16 Mon Sep 17 00:00:00 2001
From: Tony Narlock <tony@git-pull.com>
Date: Sun, 24 May 2026 22:08:55 -0500
Subject: [PATCH 23/23] docs(CHANGES) search: ranked results with session
 grouping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

why: search was removed (#19) then reintroduced (#20) in the same
release cycle — the net change is that search gained ranking, not
that it was removed. Replace the stale breaking-change entry with
the shipped feature.

what:
- Remove "Remove search subcommand" breaking change (branch-internal)
- Add What's new entry for ranked search with session grouping
---
 CHANGES | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/CHANGES b/CHANGES
index f1a7dfe..5a5a89e 100644
--- a/CHANGES
+++ b/CHANGES
@@ -42,15 +42,15 @@ $ uvx --from 'agentgrep' --prerelease allow python
 
 <!-- END PLACEHOLDER - ADD NEW CHANGELOG ENTRIES BELOW THIS LINE -->
 
-### Breaking changes
-
-#### Remove `search` subcommand (#19)
+### What's new
 
-`agentgrep search` is removed. Use `agentgrep grep` for the same
-matching engine. `search` will return with rapidfuzz ranking,
-near-duplicate collapsing, and session grouping.
+#### `search`: Relevance-ranked results with session grouping (#20)
 
-### What's new
+`search` now scores results by rapidfuzz relevance, sorts
+best-first, and groups by session. Progress spinner with
+Enter-to-answer-now during collection. Pretty snippet-first
+output with amber highlights. Flags: `--threshold`,
+`--no-rank`, `--no-group`.
 
 #### New flag: `--style=pretty` for `grep` (#18)