reddit2/cli.py at main · Esashiero/reddit2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#!/usr/bin/env python3
"""
Reddit AI Curator CLI
Command-line interface for the intelligent search engine.
"""

import os
import sys
import json
import argparse
import time
from datetime import datetime
from typing import Optional, List

# Add project root to path
sys.path.append(os.getcwd())

from dotenv import load_dotenv
from app.services.search_engine import intent_search


from app.core import get_container
from app.services.llm_base import LLMProviderFactory


def setup_env():
    """Load environment variables and setup services."""
    load_dotenv()
    if not os.getenv("REDDIT_CLIENT_ID"):
        print("Error: Environment variables not set. Please check your .env file.", file=sys.stderr)
        sys.exit(1)

    # Initialize container with LLM provider
    container = get_container()

    # Register mistral provider
    def mistral_factory():
        return LLMProviderFactory.get_provider("mistral")

    container.register("mistral", mistral_factory)

    # Register default llm_provider (defaults to mistral)
    container.register("llm_provider", mistral_factory)


def get_relative_time(timestamp: float) -> str:
    """Convert UTC timestamp to relative time string."""
    now = time.time()
    diff = now - timestamp

    if diff < 60:
        return f"{int(diff)}s ago"
    if diff < 3600:
        return f"{int(diff // 60)}m ago"
    if diff < 86400:
        return f"{int(diff // 3600)}h ago"
    if diff < 2592000:
        return f"{int(diff // 86400)}d ago"
    if diff < 31536000:
        return f"{int(diff // 2592000)}mo ago"
    return f"{int(diff // 31536000)}y ago"


def format_text_output(result, verbose: bool = False):
    """Format search results as human-readable text."""
    print(f"\n{'=' * 60}")
    print(f"🚀 SEARCH COMPLETE")
    print(f"{'=' * 60}")

    print(f"\n📋 Query: {result.query_variants[0] if result.query_variants else 'N/A'}")
    print(f"📊 Found {result.total_found} relevant posts (scanned {result.total_scanned})")
    print(f"⏱️  Duration: {result.duration_seconds:.2f}s")

    if not result.posts:
        print("\n❌ No results found matching your criteria.")
        return

    print(f"\n🏆 Top Results:")
    print(f"{'-' * 60}")

    for i, post in enumerate(result.posts, 1):
        score = post.get("score", 0)
        title = post.get("title", "")
        subreddit = post.get("subreddit", "unknown")
        url = post.get("url", "")
        reasoning = post.get("reasoning", "")
        content = post.get("content", post.get("selftext", ""))
        created_utc = post.get("created_utc")
        author = post.get("author", "anonymous")

        date_str = get_relative_time(created_utc) if created_utc else "unknown date"

        print(f"\n{i}. [{int(score)}] {title}")
        print(f"   🕒 {date_str} | 👤 u/{author} | 📁 r/{subreddit}")
        print(f"   🔗 {url}")

        # Show content preview (or full content if short)
        if content:
            preview_len = 300
            preview = content[:preview_len].replace("\n", " ").strip()
            if len(content) > preview_len:
                preview += "..."
            print(f"\n   \"{preview}\"")

        if verbose and reasoning:
            print(f"\n   💡 AI Reasoning: {reasoning}")

            # Print detailed matches if available
            breakdown = post.get("scoring_breakdown", {})
            if breakdown:
                core = [m["criterion"] for m in breakdown.get("core_matches", []) if m["matched"]]
                bonus = [m["criterion"] for m in breakdown.get("bonus_matches", []) if m["matched"]]
                if core:
                    print(f"   ✅ Core Match: {', '.join(core)}")
                if bonus:
                    print(f"   🌟 Bonus Match: {', '.join(bonus)}")


def run_search(args):
    """Execute the search command."""
    if not args.description:
        print("Error: Description is required.", file=sys.stderr)
        return

    try:
        # Determine clarification mode
        # Default is True unless --no-clarify is passed
        clarify = not args.no_clarify

        if args.json:
            # Force no interactive clarification if JSON output requested
            clarify = False

        if not args.json:
            print(f"🔍 Starting search for: '{args.description}'", file=sys.stderr, flush=True)
            if clarify:
                print("ℹ️  Interactive mode enabled. The AI may ask clarifying questions.", file=sys.stderr, flush=True)
            else:
                print("⚡ Quick mode enabled. Skipping interactive clarification.", file=sys.stderr, flush=True)

            # Print configuration summary
            print(f"\n⚙️  Configuration:", file=sys.stderr)
            print(f"   • Sort: {args.sort}", file=sys.stderr)
            print(f"   • Time: {args.time_filter}", file=sys.stderr)
            print(f"   • Candidate Pool: {args.pool_size}", file=sys.stderr)
            print(f"   • Batch Size: {args.batch_size}", file=sys.stderr)
            print(f"   • Target Results: {args.limit}", file=sys.stderr)
            print(f"   • Discovery: {'Enabled' if args.discover else 'Disabled'}", file=sys.stderr)
            print(f"   • Deep Scan: {'Enabled' if args.deep_scan else 'Disabled'}\n", file=sys.stderr, flush=True)

        # Execute search
        result = intent_search(
            description=args.description,
            clarify=clarify,
            target_posts=args.limit,
            provider=args.provider,
            subreddits=args.subreddits.split(",") if args.subreddits else None,
            discover=args.discover,
            sort=args.sort,
            time_filter=args.time_filter,
            pool_size=args.pool_size,
            batch_size=args.batch_size,
            deep_scan=args.deep_scan
        )

        if args.json:
            output = {
                "session_id": result.session_id,
                "total_found": result.total_found,
                "total_scanned": result.total_scanned,
                "duration": result.duration_seconds,
                "posts": result.posts
            }
            print(json.dumps(output, indent=2))
        else:
            format_text_output(result, verbose=args.verbose)

    except KeyboardInterrupt:
        print("\n\n⚠️  Search cancelled by user.")
        sys.exit(0)
    except Exception as e:
        print(f"\n❌ Error: {str(e)}", file=sys.stderr)
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)


def main():
    parser = argparse.ArgumentParser(description="Reddit AI Curator CLI")
    subparsers = parser.add_subparsers(dest="command", help="Command to run")

    # Search Command
    search_parser = subparsers.add_parser("search", help="Intelligent intent-based search")
    search_parser.add_argument("description", help="Natural language description of what you're looking for")
    search_parser.add_argument("--no-clarify", action="store_true", help="Skip interactive clarification questions")
    search_parser.add_argument("--limit", type=int, default=10, help="Maximum number of posts to return (default: 10)")
    search_parser.add_argument("--json", action="store_true", help="Output results as JSON (implicitly disables clarification)")
    search_parser.add_argument("--provider", default="mistral", help="LLM provider to use (default: mistral)")
    search_parser.add_argument("--verbose", "-v", action="store_true", help="Show detailed debug info")
    search_parser.add_argument("--discover", action="store_true", help="Auto-discover relevant subreddits using AI agent")
    search_parser.add_argument("--subreddits", help="Comma-separated list of subreddits to search manually")
    search_parser.add_argument("--sort", default="relevance", choices=["relevance", "hot", "top", "new"], help="Sort order (default: relevance)")
    search_parser.add_argument("--time-filter", default="month", choices=["all", "year", "month", "week", "day", "hour"], help="Time filter (default: month)")
    search_parser.add_argument("--pool-size", type=int, default=100, help="Number of candidates to fetch per query (default: 100)")
    search_parser.add_argument("--batch-size", type=int, default=10, help="Number of posts to analyze per batch (default: 10)")
    search_parser.add_argument("--deep-scan", action="store_true", help="Enable deep scanning with fallback queries for maximum recall")

    # Parse arguments
    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        return

    # Setup
    setup_env()

    if args.command == "search":
        run_search(args)


if __name__ == "__main__":
    main()