diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml new file mode 100644 index 0000000000..2e0612bb7a --- /dev/null +++ b/.github/workflows/deploy.yml @@ -0,0 +1,22 @@ +name: Deploy AUGUR +on: + push: + branches: + - main +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Deploy via SSH + uses: appleboy/ssh-action@v1.0.3 + with: + host: ${{ secrets.VPS_HOST }} + username: ${{ secrets.VPS_USER }} + key: ${{ secrets.VPS_KEY }} + port: ${{ secrets.VPS_PORT }} + script: | + cd /opt/mirofish + git fetch origin + git reset --hard origin/main + docker compose restart mirofish + echo "✅ Deploy AUGUR concluído: $(date)" diff --git a/Dockerfile b/Dockerfile index e656468603..5be8e8cc97 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,34 @@ FROM python:3.11 -# 安装 Node.js (满足 >=18)及必要工具 +# Instalar Node.js e ferramentas necessarias RUN apt-get update \ && apt-get install -y --no-install-recommends nodejs npm \ && rm -rf /var/lib/apt/lists/* -# 从 uv 官方镜像复制 uv +# uv para gerenciar deps Python COPY --from=ghcr.io/astral-sh/uv:0.9.26 /uv /uvx /bin/ WORKDIR /app -# 先复制依赖描述文件以利用缓存 +# Deps Node (cache layer) COPY package.json package-lock.json ./ COPY frontend/package.json frontend/package-lock.json ./frontend/ + +# Deps Python (cache layer) COPY backend/pyproject.toml backend/uv.lock ./backend/ -# 安装依赖(Node + Python) +# Instalar todas as dependencias RUN npm ci \ && npm ci --prefix frontend \ && cd backend && uv sync --frozen -# 复制项目源码 +# Instalar extras que nao estao no uv.lock ainda +RUN pip install fpdf2 flask-jwt-extended --break-system-packages --quiet 2>/dev/null || true + +# Copiar codigo fonte COPY . . EXPOSE 3000 5001 -# 同时启动前后端(开发模式) -CMD ["npm", "run", "dev"] \ No newline at end of file +# Iniciar frontend + backend +CMD ["npm", "run", "dev"] diff --git a/backend/app/__init__.py b/backend/app/__init__.py index aba624bba9..c09edd63e6 100644 --- a/backend/app/__init__.py +++ b/backend/app/__init__.py @@ -1,12 +1,10 @@ """ -MiroFish Backend - Flask应用工厂 +AUGUR Backend - Flask """ import os import warnings -# 抑制 multiprocessing resource_tracker 的警告(来自第三方库如 transformers) -# 需要在所有其他导入之前设置 warnings.filterwarnings("ignore", message=".*resource_tracker.*") from flask import Flask, request @@ -17,64 +15,85 @@ def create_app(config_class=Config): - """Flask应用工厂函数""" + """Flask app factory""" app = Flask(__name__) app.config.from_object(config_class) - # 设置JSON编码:确保中文直接显示(而不是 \uXXXX 格式) - # Flask >= 2.3 使用 app.json.ensure_ascii,旧版本使用 JSON_AS_ASCII 配置 if hasattr(app, 'json') and hasattr(app.json, 'ensure_ascii'): app.json.ensure_ascii = False - # 设置日志 logger = setup_logger('mirofish') - # 只在 reloader 子进程中打印启动信息(避免 debug 模式下打印两次) is_reloader_process = os.environ.get('WERKZEUG_RUN_MAIN') == 'true' debug_mode = app.config.get('DEBUG', False) should_log_startup = not debug_mode or is_reloader_process if should_log_startup: logger.info("=" * 50) - logger.info("MiroFish Backend 启动中...") + logger.info("MiroFish Backend ...") logger.info("=" * 50) - # 启用CORS - CORS(app, resources={r"/api/*": {"origins": "*"}}) + # CORS + cors_origins = os.environ.get('CORS_ORIGINS', '*').split(',') + CORS(app, resources={r"/api/*": {"origins": cors_origins}}) - # 注册模拟进程清理函数(确保服务器关闭时终止所有模拟进程) + # JWT Auth (opcional) + try: + from .auth import init_jwt, auth_bp + jwt = init_jwt(app) + app.register_blueprint(auth_bp) + if should_log_startup: + if jwt: + logger.info("JWT Auth configurado") + else: + logger.info("JWT Auth nao disponivel (flask-jwt-extended nao instalado)") + except Exception as e: + if should_log_startup: + logger.warning(f"Auth nao carregado: {e}") + + # Simulation Runner from .services.simulation_runner import SimulationRunner SimulationRunner.register_cleanup() if should_log_startup: - logger.info("已注册模拟进程清理函数") + logger.info("Simulacao") - # 请求日志中间件 @app.before_request def log_request(): logger = get_logger('mirofish.request') - logger.debug(f"请求: {request.method} {request.path}") + logger.debug(f": {request.method} {request.path}") if request.content_type and 'json' in request.content_type: - logger.debug(f"请求体: {request.get_json(silent=True)}") + logger.debug(f": {request.get_json(silent=True)}") @app.after_request def log_response(response): logger = get_logger('mirofish.request') - logger.debug(f"响应: {response.status_code}") + logger.debug(f": {response.status_code}") return response - # 注册蓝图 - from .api import graph_bp, simulation_bp, report_bp + # Blueprints principais + from .api import graph_bp, simulation_bp, report_bp, analytics_bp + from .api.public import public_bp app.register_blueprint(graph_bp, url_prefix='/api/graph') app.register_blueprint(simulation_bp, url_prefix='/api/simulation') app.register_blueprint(report_bp, url_prefix='/api/report') + app.register_blueprint(analytics_bp, url_prefix='/api/analytics') + app.register_blueprint(public_bp, url_prefix='/api/public') + + # Share API (opcional) + try: + from .api.share import share_bp + app.register_blueprint(share_bp) + if should_log_startup: + logger.info("Share API disponivel") + except Exception as e: + if should_log_startup: + logger.warning(f"Share API nao disponivel: {e}") - # 健康检查 @app.route('/health') def health(): return {'status': 'ok', 'service': 'MiroFish Backend'} if should_log_startup: - logger.info("MiroFish Backend 启动完成") + logger.info("MiroFish Backend pronto") return app - diff --git a/backend/app/api/__init__.py b/backend/app/api/__init__.py index ffda743a31..e081cc5882 100644 --- a/backend/app/api/__init__.py +++ b/backend/app/api/__init__.py @@ -1,14 +1,15 @@ """ -API路由模块 +API """ from flask import Blueprint -graph_bp = Blueprint('graph', __name__) +graph_bp = Blueprint('graph', __name__) simulation_bp = Blueprint('simulation', __name__) -report_bp = Blueprint('report', __name__) +report_bp = Blueprint('report', __name__) +analytics_bp = Blueprint('analytics', __name__) -from . import graph # noqa: E402, F401 +from . import graph # noqa: E402, F401 from . import simulation # noqa: E402, F401 -from . import report # noqa: E402, F401 - +from . import report # noqa: E402, F401 +from . import analytics # noqa: E402, F401 diff --git a/backend/app/api/agent_preview_endpoints.py b/backend/app/api/agent_preview_endpoints.py new file mode 100644 index 0000000000..7f182b90ee --- /dev/null +++ b/backend/app/api/agent_preview_endpoints.py @@ -0,0 +1,292 @@ +""" +AUGUR Agent Preview — Endpoints para preview e customização de agentes. + +Adicionar ao final de backend/app/api/simulation.py + +Endpoints: + POST /api/simulation/preview-agents — Gera preview dos agentes para aprovação + POST /api/simulation/custom-agent — Gera perfil completo a partir de descrição livre + POST /api/simulation/approve-agents — Salva lista aprovada de agentes +""" + +# ============================================================ +# ADICIONAR AO FINAL DE backend/app/api/simulation.py +# ============================================================ + +# --- Endpoint 1: Preview de agentes --- + +""" +@simulation_bp.route('/preview-agents', methods=['POST']) +def preview_agents(): + ''' + Gera preview dos agentes para o usuário revisar antes de iniciar. + Usa o mesmo pipeline de generate-profiles, mas retorna para aprovação. + + JSON: + { + "graph_id": "mirofish_xxxx", + "entity_types": null, // null = todos + "num_agents": 20 + } + + Retorna: + { + "success": true, + "data": { + "agents": [ + { + "id": "agent_001", + "name": "Maria da Silva", + "username": "maria_silva", + "bio": "Dona de casa, 52 anos...", + "persona": "Texto completo da persona...", + "age": 52, + "gender": "female", + "mbti": "ISFJ", + "profession": "Dona de casa", + "source_entity_type": "Consumer", + "tipo": "Neutro", + "_custom": false + } + ], + "count": 20, + "distribution": {"Apoiador": 6, "Neutro": 7, "Resistente": 4, "Cauteloso": 3} + } + } + ''' + try: + data = request.get_json() or {} + graph_id = data.get('graph_id') + + if not graph_id: + return jsonify({"success": False, "error": "graph_id obrigatório"}), 400 + + entity_types = data.get('entity_types') + num_agents = data.get('num_agents', 20) + + # Ler entidades do grafo + reader = ZepEntityReader() + filtered = reader.filter_defined_entities( + graph_id=graph_id, + defined_entity_types=entity_types, + enrich_with_edges=True + ) + + if filtered.filtered_count == 0: + return jsonify({"success": False, "error": "Nenhuma entidade encontrada no grafo"}), 400 + + # Limitar ao número solicitado + entities = filtered.entities[:num_agents] + + # Gerar perfis + generator = OasisProfileGenerator(graph_id=graph_id) + profiles = generator.generate_profiles_from_entities( + entities=entities, + use_llm=True, + graph_id=graph_id + ) + + # Formatar para preview + agents = [] + for i, p in enumerate(profiles): + agent = p.to_dict() + agent['id'] = f'agent_{i:03d}' + agent['_custom'] = False + agents.append(agent) + + # Calcular distribuição + distribution = {} + for a in agents: + tipo = a.get('source_entity_type', 'Outro') + distribution[tipo] = distribution.get(tipo, 0) + 1 + + return jsonify({ + "success": True, + "data": { + "agents": agents, + "count": len(agents), + "distribution": distribution + } + }) + + except Exception as e: + logger.error(f"Preview agents falhou: {e}") + return jsonify({"success": False, "error": str(e)}), 500 +""" + + +# --- Endpoint 2: Gerar agente customizado --- + +""" +@simulation_bp.route('/custom-agent', methods=['POST']) +def create_custom_agent(): + ''' + Gera um perfil completo a partir de uma descrição em texto livre. + + JSON: + { + "description": "Marcia, 35 anos, compra tenis pela Netshoes, nunca entra em loja fisica", + "simulation_requirement": "abertura de loja de calçados em Pádua", + "graph_id": "mirofish_xxxx" + } + + Retorna: + { + "success": true, + "data": { + "id": "custom_001", + "name": "Márcia Oliveira", + "bio": "Compradora online, 35 anos...", + "persona": "...", + "age": 35, + "profession": "Analista de Marketing", + "source_entity_type": "Consumer", + "_custom": true + } + } + ''' + try: + data = request.get_json() or {} + description = data.get('description', '') + + if not description or len(description) < 5: + return jsonify({"success": False, "error": "Descrição muito curta"}), 400 + + simulation_requirement = data.get('simulation_requirement', '') + + # Usar LLM para gerar perfil completo + from ..utils.llm_client import LLMClient + from ..config import Config + + llm = LLMClient() + + prompt = f"""Gere um perfil de agente para simulação de opinião pública em redes sociais. + +DESCRIÇÃO DO USUÁRIO: {description} +CONTEXTO DA SIMULAÇÃO: {simulation_requirement} + +Gere um JSON com: +- "name": Nome completo fictício brasileiro +- "username": Username para redes sociais (snake_case) +- "bio": Biografia curta (máx 200 chars) em PT-BR +- "persona": Descrição detalhada (máx 1500 chars) incluindo personalidade, comportamento, motivações +- "age": Idade (número inteiro) +- "gender": "male" ou "female" +- "mbti": Tipo MBTI +- "profession": Profissão em PT-BR +- "interested_topics": Lista de 3-5 tópicos de interesse +- "source_entity_type": Tipo mais próximo (Consumer, Influencer, Competitor, Professional, Person) + +Retorne APENAS JSON válido. Tudo em PT-BR exceto gender e mbti.""" + + result = llm.chat_json( + messages=[ + {"role": "system", "content": "Você gera perfis de agentes para simulação. Retorne apenas JSON válido."}, + {"role": "user", "content": prompt} + ], + temperature=0.7, + max_tokens=1000 + ) + + # Adicionar marcadores + result['id'] = f'custom_{int(time.time())}' + result['_custom'] = True + + return jsonify({ + "success": True, + "data": result + }) + + except Exception as e: + logger.error(f"Custom agent falhou: {e}") + return jsonify({"success": False, "error": str(e)}), 500 +""" + + +# --- Endpoint 3: Aprovar lista de agentes --- + +""" +@simulation_bp.route('/approve-agents', methods=['POST']) +def approve_agents(): + ''' + Salva a lista aprovada de agentes para uso na simulação. + Chamado após o usuário revisar o preview. + + JSON: + { + "simulation_id": "sim_xxxx", + "agents": [...] // Lista completa de agentes aprovados + } + + Salva em: data/simulations/{sim_id}/approved_profiles.json + ''' + try: + data = request.get_json() or {} + simulation_id = data.get('simulation_id') + agents = data.get('agents', []) + + if not simulation_id: + return jsonify({"success": False, "error": "simulation_id obrigatório"}), 400 + + if not agents: + return jsonify({"success": False, "error": "Lista de agentes vazia"}), 400 + + # Salvar como reddit_profiles.json (formato que a simulação espera) + sim_dir = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id) + os.makedirs(sim_dir, exist_ok=True) + + profiles_path = os.path.join(sim_dir, "reddit_profiles.json") + + # Converter para formato Reddit se necessário + reddit_profiles = [] + for agent in agents: + profile = { + "username": agent.get("username", agent.get("name", "agent").replace(" ", "_").lower()), + "name": agent.get("name", ""), + "bio": agent.get("bio", ""), + "persona": agent.get("persona", agent.get("bio", "")), + "age": agent.get("age", 30), + "gender": agent.get("gender", "female"), + "mbti": agent.get("mbti", "INFP"), + "country": agent.get("country", "Brasil"), + "profession": agent.get("profession", ""), + "interested_topics": agent.get("interested_topics", []), + } + reddit_profiles.append(profile) + + with open(profiles_path, 'w', encoding='utf-8') as f: + json.dump(reddit_profiles, f, ensure_ascii=False, indent=2) + + logger.info(f"Approved {len(reddit_profiles)} agents for simulation {simulation_id}") + + return jsonify({ + "success": True, + "data": { + "simulation_id": simulation_id, + "agents_count": len(reddit_profiles), + "profiles_path": profiles_path + } + }) + + except Exception as e: + logger.error(f"Approve agents falhou: {e}") + return jsonify({"success": False, "error": str(e)}), 500 +""" + +# ============================================================ +# NOTA DE INTEGRAÇÃO +# ============================================================ +# +# Para ativar estes endpoints: +# 1. Copiar o código dos 3 endpoints (sem as aspas triplas) +# para o final de backend/app/api/simulation.py +# 2. Importar time no topo do arquivo: import time +# +# Para conectar ao frontend: +# 1. Copiar AgentPreview.vue para frontend/src/components/ +# 2. No SimulationView.vue, adicionar tela 'preview' entre +# 'agents' e 'pipeline' +# 3. Após gerar agentes, mostrar preview com AgentPreview +# 4. No confirmarAgentes(), chamar preview-agents e mostrar +# +# ============================================================ diff --git a/backend/app/api/analytics.py b/backend/app/api/analytics.py new file mode 100644 index 0000000000..f3d06a2287 --- /dev/null +++ b/backend/app/api/analytics.py @@ -0,0 +1,246 @@ +""" +AUGUR Analytics API +Extrai dados ricos das simulações para visualização no ReportView. + +Endpoint: GET /api/analytics/ +""" + +import json +import os +import sqlite3 +import traceback +from flask import Blueprint, jsonify +from ..utils.locale import t +import logging + +logger = logging.getLogger(__name__) + +analytics_bp = Blueprint('analytics', __name__) + +# Usar Config para consistência com simulation.py +from ..config import Config + + +def _sim_dir(simulation_id: str) -> str: + primary = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id) + if os.path.exists(primary): + return primary + for alt in [f'/app/uploads/simulations/{simulation_id}', + os.path.join(os.path.dirname(__file__), '..', '..', 'uploads', 'simulations', simulation_id)]: + if os.path.exists(alt): + return alt + return primary + + +def _query_db(db_path: str, query: str, params=()): + """Executa query no SQLite e retorna lista de dicts.""" + if not os.path.exists(db_path): + return [] + try: + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + cur = conn.execute(query, params) + rows = [dict(r) for r in cur.fetchall()] + conn.close() + return rows + except Exception as e: + logger.warning(f"DB query error ({db_path}): {e}") + return [] + + +def _read_actions(jsonl_path: str) -> list: + """Lê o arquivo actions.jsonl e retorna lista de eventos.""" + if not os.path.exists(jsonl_path): + return [] + events = [] + try: + with open(jsonl_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line: + try: + events.append(json.loads(line)) + except json.JSONDecodeError: + pass + except Exception as e: + logger.warning(f"Actions read error: {e}") + return events + + +def _extract_rounds(events: list) -> list: + """Extrai dados por rodada dos eventos.""" + rounds = {} + for ev in events: + r = ev.get('round') + if r is None: + continue + if r not in rounds: + rounds[r] = {'round': r, 'actions': 0, 'timestamp': None} + if ev.get('event_type') == 'round_end': + rounds[r]['actions'] = ev.get('actions_count', 0) + rounds[r]['timestamp'] = ev.get('timestamp') + return sorted(rounds.values(), key=lambda x: x['round']) + + +@analytics_bp.route('/', methods=['GET']) +def get_analytics(simulation_id: str): + """ + Retorna dados analíticos ricos de uma simulação. + + Returns: + { + "success": true, + "data": { + "simulation_id": "sim_xxx", + "twitter": { rounds, totals, top_posts, top_agents }, + "reddit": { rounds, totals, top_posts, top_agents }, + "combined": { total_interactions, peak_round, most_active_agent } + } + } + """ + try: + sim_dir = _sim_dir(simulation_id) + if not os.path.exists(sim_dir): + # Retornar dados vazios em vez de 404 para não quebrar o frontend + return jsonify({"success": True, "data": { + "simulation_id": simulation_id, + "twitter": {"rounds": [], "totals": {"posts": 0, "comments": 0, "likes": 0, "follows": 0}, "top_posts": [], "top_agents": [], "engagement": []}, + "reddit": {"rounds": [], "totals": {"posts": 0, "comments": 0, "likes": 0}, "top_posts": [], "top_agents": []}, + "combined": {"total_interactions": 0, "rounds": []} + }}) + + # ─── Twitter ────────────────────────────────────────── + tw_db = os.path.join(sim_dir, 'twitter_simulation.db') + tw_jsonl = os.path.join(sim_dir, 'twitter', 'actions.jsonl') + + tw_events = _read_actions(tw_jsonl) + tw_rounds = _extract_rounds(tw_events) + + tw_totals = { + 'posts': _query_db(tw_db, 'SELECT COUNT(*) as n FROM post')[0].get('n', 0) if _query_db(tw_db, 'SELECT COUNT(*) as n FROM post') else 0, + 'comments': _query_db(tw_db, 'SELECT COUNT(*) as n FROM comment')[0].get('n', 0) if _query_db(tw_db, 'SELECT COUNT(*) as n FROM comment') else 0, + 'likes': _query_db(tw_db, 'SELECT COUNT(*) as n FROM "like"')[0].get('n', 0) if _query_db(tw_db, 'SELECT COUNT(*) as n FROM "like"') else 0, + 'follows': _query_db(tw_db, 'SELECT COUNT(*) as n FROM follow')[0].get('n', 0) if _query_db(tw_db, 'SELECT COUNT(*) as n FROM follow') else 0, + } + + tw_top_posts = _query_db(tw_db, ''' + SELECT p.post_id, p.content, p.num_likes, p.num_dislikes, p.num_reports, + u.name, u.user_name + FROM post p LEFT JOIN user u ON p.user_id = u.user_id + ORDER BY p.num_likes DESC LIMIT 10 + ''') + + tw_top_agents = _query_db(tw_db, ''' + SELECT u.user_id, u.name, u.user_name, u.bio, + u.num_followers, u.num_followings, + COUNT(p.post_id) as posts_count, + COALESCE(SUM(p.num_likes), 0) as total_likes_received + FROM user u LEFT JOIN post p ON u.user_id = p.user_id + GROUP BY u.user_id + ORDER BY total_likes_received DESC LIMIT 10 + ''') + + # Engajamento por agente (posts + likes recebidos) + tw_engagement = _query_db(tw_db, ''' + SELECT u.name, + COUNT(DISTINCT p.post_id) as posts, + COALESCE(SUM(p.num_likes), 0) as likes_received, + COALESCE(SUM(p.num_dislikes), 0) as dislikes_received + FROM user u LEFT JOIN post p ON u.user_id = p.user_id + WHERE u.name IS NOT NULL AND u.name != '' + GROUP BY u.user_id + HAVING posts > 0 + ORDER BY posts DESC LIMIT 15 + ''') + + # ─── Reddit ─────────────────────────────────────────── + rd_db = os.path.join(sim_dir, 'reddit_simulation.db') + rd_jsonl = os.path.join(sim_dir, 'reddit', 'actions.jsonl') + + rd_events = _read_actions(rd_jsonl) + rd_rounds = _extract_rounds(rd_events) + + rd_totals = { + 'posts': _query_db(rd_db, 'SELECT COUNT(*) as n FROM post')[0].get('n', 0) if _query_db(rd_db, 'SELECT COUNT(*) as n FROM post') else 0, + 'comments': _query_db(rd_db, 'SELECT COUNT(*) as n FROM comment')[0].get('n', 0) if _query_db(rd_db, 'SELECT COUNT(*) as n FROM comment') else 0, + 'likes': _query_db(rd_db, 'SELECT COUNT(*) as n FROM "like"')[0].get('n', 0) if _query_db(rd_db, 'SELECT COUNT(*) as n FROM "like"') else 0, + 'follows': _query_db(rd_db, 'SELECT COUNT(*) as n FROM follow')[0].get('n', 0) if _query_db(rd_db, 'SELECT COUNT(*) as n FROM follow') else 0, + } + + rd_top_posts = _query_db(rd_db, ''' + SELECT p.post_id, p.content, p.num_likes, p.num_dislikes, p.num_reports, + u.name, u.user_name + FROM post p LEFT JOIN user u ON p.user_id = u.user_id + ORDER BY p.num_likes DESC LIMIT 10 + ''') + + rd_top_agents = _query_db(rd_db, ''' + SELECT u.user_id, u.name, u.user_name, u.bio, + u.num_followers, u.num_followings, + COUNT(p.post_id) as posts_count, + COALESCE(SUM(p.num_likes), 0) as total_likes_received + FROM user u LEFT JOIN post p ON u.user_id = p.user_id + GROUP BY u.user_id + ORDER BY total_likes_received DESC LIMIT 10 + ''') + + # ─── Combined metrics ───────────────────────────────── + all_rounds = sorted( + set([r['round'] for r in tw_rounds] + [r['round'] for r in rd_rounds]) + ) + tw_by_round = {r['round']: r['actions'] for r in tw_rounds} + rd_by_round = {r['round']: r['actions'] for r in rd_rounds} + + combined_rounds = [ + { + 'round': r, + 'twitter': tw_by_round.get(r, 0), + 'reddit': rd_by_round.get(r, 0), + 'total': tw_by_round.get(r, 0) + rd_by_round.get(r, 0), + } + for r in all_rounds + ] + + total_interactions = tw_totals['posts'] + rd_totals['posts'] + tw_totals['comments'] + rd_totals['comments'] + peak_round = max(combined_rounds, key=lambda x: x['total'], default={'round': 0, 'total': 0}) + + # Simulation start/end timestamps + sim_start = next((e.get('timestamp') for e in tw_events if e.get('event_type') == 'simulation_start'), None) + sim_end = next((e.get('timestamp') for e in tw_events if e.get('event_type') == 'simulation_end'), None) + + return jsonify({ + "success": True, + "data": { + "simulation_id": simulation_id, + "twitter": { + "rounds": tw_rounds, + "totals": tw_totals, + "top_posts": tw_top_posts, + "top_agents": tw_top_agents, + "engagement": tw_engagement, + }, + "reddit": { + "rounds": rd_rounds, + "totals": rd_totals, + "top_posts": rd_top_posts, + "top_agents": rd_top_agents, + }, + "combined": { + "rounds": combined_rounds, + "total_interactions": total_interactions, + "peak_round": peak_round, + "simulation_start": sim_start, + "simulation_end": sim_end, + "total_rounds": len(all_rounds), + } + } + }) + + except Exception as e: + logger.error(f"Analytics error for {simulation_id}: {e}\n{traceback.format_exc()}") + return jsonify({"success": True, "data": { + "simulation_id": simulation_id, + "twitter": {"rounds": [], "totals": {"posts": 0, "comments": 0, "likes": 0, "follows": 0}, "top_posts": [], "top_agents": [], "engagement": []}, + "reddit": {"rounds": [], "totals": {"posts": 0, "comments": 0, "likes": 0}, "top_posts": [], "top_agents": []}, + "combined": {"total_interactions": 0, "rounds": []} + }}) diff --git a/backend/app/api/graph.py b/backend/app/api/graph.py index 759ff48b0e..41ed7777df 100644 --- a/backend/app/api/graph.py +++ b/backend/app/api/graph.py @@ -1,6 +1,5 @@ """ -图谱相关API路由 -采用项目上下文机制,服务端持久化状态 +GrafoAPI """ import os @@ -13,30 +12,35 @@ from ..services.ontology_generator import OntologyGenerator from ..services.graph_builder import GraphBuilderService from ..services.text_processor import TextProcessor +from ..services.ontology_prompts_v2 import detect_sector_and_decision + +try: + from ..services.market_research import MarketResearcher, build_market_context_section + HAS_MARKET_RESEARCH = True +except ImportError: + HAS_MARKET_RESEARCH = False from ..utils.file_parser import FileParser from ..utils.logger import get_logger from ..utils.locale import t, get_locale, set_locale from ..models.task import TaskManager, TaskStatus from ..models.project import ProjectManager, ProjectStatus -# 获取日志器 logger = get_logger('mirofish.api') def allowed_file(filename: str) -> bool: - """检查文件扩展名是否允许""" + """""" if not filename or '.' not in filename: return False ext = os.path.splitext(filename)[1].lower().lstrip('.') return ext in Config.ALLOWED_EXTENSIONS -# ============== 项目管理接口 ============== +# ============== ============== @graph_bp.route('/project/', methods=['GET']) def get_project(project_id: str): """ - 获取项目详情 """ project = ProjectManager.get_project(project_id) @@ -55,7 +59,6 @@ def get_project(project_id: str): @graph_bp.route('/project/list', methods=['GET']) def list_projects(): """ - 列出所有项目 """ limit = request.args.get('limit', 50, type=int) projects = ProjectManager.list_projects(limit=limit) @@ -70,7 +73,6 @@ def list_projects(): @graph_bp.route('/project/', methods=['DELETE']) def delete_project(project_id: str): """ - 删除项目 """ success = ProjectManager.delete_project(project_id) @@ -89,7 +91,7 @@ def delete_project(project_id: str): @graph_bp.route('/project//reset', methods=['POST']) def reset_project(project_id: str): """ - 重置项目状态(用于重新构建图谱) + Grafo """ project = ProjectManager.get_project(project_id) @@ -99,7 +101,7 @@ def reset_project(project_id: str): "error": t('api.projectNotFound', id=project_id) }), 404 - # 重置到本体已生成状态 + # Gerar if project.ontology: project.status = ProjectStatus.ONTOLOGY_GENERATED else: @@ -117,22 +119,20 @@ def reset_project(project_id: str): }) -# ============== 接口1:上传文件并生成本体 ============== +# ============== 1Gerar ============== @graph_bp.route('/ontology/generate', methods=['POST']) def generate_ontology(): """ - 接口1:上传文件,分析生成本体定义 + 1AnáliseGerar - 请求方式:multipart/form-data + multipart/form-data - 参数: - files: 上传的文件(PDF/MD/TXT),可多个 - simulation_requirement: 模拟需求描述(必填) - project_name: 项目名称(可选) - additional_context: 额外说明(可选) + files: PDF/MD/TXT + simulation_requirement: Descrição dos requisitos da simulação + project_name: + additional_context: - 返回: { "success": true, "data": { @@ -148,15 +148,14 @@ def generate_ontology(): } """ try: - logger.info("=== 开始生成本体定义 ===") + logger.info("=== Gerar ===") - # 获取参数 simulation_requirement = request.form.get('simulation_requirement', '') project_name = request.form.get('project_name', 'Unnamed Project') additional_context = request.form.get('additional_context', '') - logger.debug(f"项目名称: {project_name}") - logger.debug(f"模拟需求: {simulation_requirement[:100]}...") + logger.debug(f": {project_name}") + logger.debug(f"Simulação: {simulation_requirement[:100]}...") if not simulation_requirement: return jsonify({ @@ -164,7 +163,6 @@ def generate_ontology(): "error": t('api.requireSimulationRequirement') }), 400 - # 获取上传的文件 uploaded_files = request.files.getlist('files') if not uploaded_files or all(not f.filename for f in uploaded_files): return jsonify({ @@ -172,18 +170,15 @@ def generate_ontology(): "error": t('api.requireFileUpload') }), 400 - # 创建项目 project = ProjectManager.create_project(name=project_name) project.simulation_requirement = simulation_requirement - logger.info(f"创建项目: {project.project_id}") + logger.info(f": {project.project_id}") - # 保存文件并提取文本 document_texts = [] all_text = "" for file in uploaded_files: if file and file.filename and allowed_file(file.filename): - # 保存文件到项目目录 file_info = ProjectManager.save_file_to_project( project.project_id, file, @@ -194,7 +189,6 @@ def generate_ontology(): "size": file_info["size"] }) - # 提取文本 text = FileParser.extract_text(file_info["path"]) text = TextProcessor.preprocess_text(text) document_texts.append(text) @@ -207,13 +201,35 @@ def generate_ontology(): "error": t('api.noDocProcessed') }), 400 - # 保存提取的文本 project.total_text_length = len(all_text) ProjectManager.save_extracted_text(project.project_id, all_text) - logger.info(f"文本提取完成,共 {len(all_text)} 字符") + logger.info(f" {len(all_text)} ") - # 生成本体 - logger.info("调用 LLM 生成本体定义...") + # AUGUR v2: Pesquisa de mercado via Perplexity (antes da ontologia) + market_data = None + if HAS_MARKET_RESEARCH: + try: + researcher = MarketResearcher() + if researcher.is_available: + sector, decision = detect_sector_and_decision(simulation_requirement) + logger.info(f"Market research: setor={sector}, decisao={decision}") + market_data = researcher.research( + simulation_requirement=simulation_requirement, + sector=sector, + decision=decision, + ) + # Injetar dados reais como contexto adicional + if market_data.get("contexto_formatado"): + if additional_context: + additional_context += "\n\n" + market_data["contexto_formatado"] + else: + additional_context = market_data["contexto_formatado"] + logger.info(f"Market research: {market_data['queries_executadas']} queries, {len(market_data.get('fontes_unicas',[]))} fontes, {market_data['tempo_segundos']}s") + except Exception as e: + logger.warning(f"Market research falhou (non-fatal): {e}") + + # Gerar ontologia + logger.info(" LLM Gerar...") generator = OntologyGenerator() ontology = generator.generate( document_texts=document_texts, @@ -221,19 +237,21 @@ def generate_ontology(): additional_context=additional_context if additional_context else None ) - # 保存本体到项目 entity_count = len(ontology.get("entity_types", [])) edge_count = len(ontology.get("edge_types", [])) - logger.info(f"本体生成完成: {entity_count} 个实体类型, {edge_count} 个关系类型") + logger.info(f"Gerar: {entity_count} Entidade, {edge_count} Relacionamento") project.ontology = { "entity_types": ontology.get("entity_types", []), "edge_types": ontology.get("edge_types", []) } project.analysis_summary = ontology.get("analysis_summary", "") + # AUGUR v2: salvar dados de mercado para uso no relatório + if market_data and market_data.get("dados_mercado"): + project.market_research = market_data project.status = ProjectStatus.ONTOLOGY_GENERATED ProjectManager.save_project(project) - logger.info(f"=== 本体生成完成 === 项目ID: {project.project_id}") + logger.info(f"=== Gerar === ID: {project.project_id}") return jsonify({ "success": True, @@ -255,49 +273,47 @@ def generate_ontology(): }), 500 -# ============== 接口2:构建图谱 ============== +# ============== 2Grafo ============== @graph_bp.route('/build', methods=['POST']) def build_graph(): """ - 接口2:根据project_id构建图谱 + 2project_idGrafo - 请求(JSON): + JSON { - "project_id": "proj_xxxx", // 必填,来自接口1 - "graph_name": "图谱名称", // 可选 - "chunk_size": 500, // 可选,默认500 - "chunk_overlap": 50 // 可选,默认50 + "project_id": "proj_xxxx", // 1 + "graph_name": "Grafo", // + "chunk_size": 500, // 500 + "chunk_overlap": 50 // 50 } - 返回: { "success": true, "data": { "project_id": "proj_xxxx", "task_id": "task_xxxx", - "message": "图谱构建任务已启动" + "message": "Grafo" } } """ try: - logger.info("=== 开始构建图谱 ===") + logger.info("=== Grafo ===") - # 检查配置 + # Configuração errors = [] if not Config.ZEP_API_KEY: errors.append(t('api.zepApiKeyMissing')) if errors: - logger.error(f"配置错误: {errors}") + logger.error(f"Configuração: {errors}") return jsonify({ "success": False, "error": t('api.configError', details="; ".join(errors)) }), 500 - # 解析请求 data = request.get_json() or {} project_id = data.get('project_id') - logger.debug(f"请求参数: project_id={project_id}") + logger.debug(f": project_id={project_id}") if not project_id: return jsonify({ @@ -305,7 +321,6 @@ def build_graph(): "error": t('api.requireProjectId') }), 400 - # 获取项目 project = ProjectManager.get_project(project_id) if not project: return jsonify({ @@ -313,8 +328,7 @@ def build_graph(): "error": t('api.projectNotFound', id=project_id) }), 404 - # 检查项目状态 - force = data.get('force', False) # 强制重新构建 + force = data.get('force', False) # if project.status == ProjectStatus.CREATED: return jsonify({ @@ -329,23 +343,21 @@ def build_graph(): "task_id": project.graph_build_task_id }), 400 - # 如果强制重建,重置状态 if force and project.status in [ProjectStatus.GRAPH_BUILDING, ProjectStatus.FAILED, ProjectStatus.GRAPH_COMPLETED]: project.status = ProjectStatus.ONTOLOGY_GENERATED project.graph_id = None project.graph_build_task_id = None project.error = None - # 获取配置 + # Configuração graph_name = data.get('graph_name', project.name or 'MiroFish Graph') chunk_size = data.get('chunk_size', project.chunk_size or Config.DEFAULT_CHUNK_SIZE) chunk_overlap = data.get('chunk_overlap', project.chunk_overlap or Config.DEFAULT_CHUNK_OVERLAP) - # 更新项目配置 + # Configuração project.chunk_size = chunk_size project.chunk_overlap = chunk_overlap - # 获取提取的文本 text = ProjectManager.get_extracted_text(project_id) if not text: return jsonify({ @@ -353,7 +365,6 @@ def build_graph(): "error": t('api.textNotFound') }), 400 - # 获取本体 ontology = project.ontology if not ontology: return jsonify({ @@ -361,12 +372,10 @@ def build_graph(): "error": t('api.ontologyNotFound') }), 400 - # 创建异步任务 task_manager = TaskManager() - task_id = task_manager.create_task(f"构建图谱: {graph_name}") - logger.info(f"创建图谱构建任务: task_id={task_id}, project_id={project_id}") + task_id = task_manager.create_task(f"Grafo: {graph_name}") + logger.info(f"Grafo: task_id={task_id}, project_id={project_id}") - # 更新项目状态 project.status = ProjectStatus.GRAPH_BUILDING project.graph_build_task_id = task_id ProjectManager.save_project(project) @@ -374,22 +383,20 @@ def build_graph(): # Capture locale before spawning background thread current_locale = get_locale() - # 启动后台任务 def build_task(): set_locale(current_locale) build_logger = get_logger('mirofish.build') try: - build_logger.info(f"[{task_id}] 开始构建图谱...") + build_logger.info(f"[{task_id}] Grafo...") task_manager.update_task( task_id, status=TaskStatus.PROCESSING, message=t('progress.initGraphService') ) - # 创建图谱构建服务 + # Grafo builder = GraphBuilderService(api_key=Config.ZEP_API_KEY) - # 分块 task_manager.update_task( task_id, message=t('progress.textChunking'), @@ -402,7 +409,7 @@ def build_task(): ) total_chunks = len(chunks) - # 创建图谱 + # Grafo task_manager.update_task( task_id, message=t('progress.creatingZepGraph'), @@ -410,11 +417,10 @@ def build_task(): ) graph_id = builder.create_graph(name=graph_name) - # 更新项目的graph_id + # graph_id project.graph_id = graph_id ProjectManager.save_project(project) - # 设置本体 task_manager.update_task( task_id, message=t('progress.settingOntology'), @@ -422,7 +428,7 @@ def build_task(): ) builder.set_ontology(graph_id, ontology) - # 添加文本(progress_callback 签名是 (msg, progress_ratio)) + # progress_callback (msg, progress_ratio) def add_progress_callback(msg, progress_ratio): progress = 15 + int(progress_ratio * 40) # 15% - 55% task_manager.update_task( @@ -444,7 +450,7 @@ def add_progress_callback(msg, progress_ratio): progress_callback=add_progress_callback ) - # 等待Zep处理完成(查询每个episode的processed状态) + # Zepepisodeprocessed task_manager.update_task( task_id, message=t('progress.waitingZepProcess'), @@ -461,7 +467,7 @@ def wait_progress_callback(msg, progress_ratio): builder._wait_for_episodes(episode_uuids, wait_progress_callback) - # 获取图谱数据 + # Grafo task_manager.update_task( task_id, message=t('progress.fetchingGraphData'), @@ -469,15 +475,13 @@ def wait_progress_callback(msg, progress_ratio): ) graph_data = builder.get_graph_data(graph_id) - # 更新项目状态 project.status = ProjectStatus.GRAPH_COMPLETED ProjectManager.save_project(project) node_count = graph_data.get("node_count", 0) edge_count = graph_data.get("edge_count", 0) - build_logger.info(f"[{task_id}] 图谱构建完成: graph_id={graph_id}, 节点={node_count}, 边={edge_count}") + build_logger.info(f"[{task_id}] Grafo: graph_id={graph_id}, ={node_count}, ={edge_count}") - # 完成 task_manager.update_task( task_id, status=TaskStatus.COMPLETED, @@ -493,8 +497,8 @@ def wait_progress_callback(msg, progress_ratio): ) except Exception as e: - # 更新项目状态为失败 - build_logger.error(f"[{task_id}] 图谱构建失败: {str(e)}") + # Falhou + build_logger.error(f"[{task_id}] GrafoFalhou: {str(e)}") build_logger.debug(traceback.format_exc()) project.status = ProjectStatus.FAILED @@ -508,7 +512,6 @@ def wait_progress_callback(msg, progress_ratio): error=traceback.format_exc() ) - # 启动后台线程 thread = threading.Thread(target=build_task, daemon=True) thread.start() @@ -529,12 +532,11 @@ def wait_progress_callback(msg, progress_ratio): }), 500 -# ============== 任务查询接口 ============== +# ============== ============== @graph_bp.route('/task/', methods=['GET']) def get_task(task_id: str): """ - 查询任务状态 """ task = TaskManager().get_task(task_id) @@ -553,7 +555,6 @@ def get_task(task_id: str): @graph_bp.route('/tasks', methods=['GET']) def list_tasks(): """ - 列出所有任务 """ tasks = TaskManager().list_tasks() @@ -564,12 +565,12 @@ def list_tasks(): }) -# ============== 图谱数据接口 ============== +# ============== Grafo ============== @graph_bp.route('/data/', methods=['GET']) def get_graph_data(graph_id: str): """ - 获取图谱数据(节点和边) + Grafo """ try: if not Config.ZEP_API_KEY: @@ -597,7 +598,7 @@ def get_graph_data(graph_id: str): @graph_bp.route('/delete/', methods=['DELETE']) def delete_graph(graph_id: str): """ - 删除Zep图谱 + ZepGrafo """ try: if not Config.ZEP_API_KEY: diff --git a/backend/app/api/public.py b/backend/app/api/public.py new file mode 100644 index 0000000000..4093fcb6c5 --- /dev/null +++ b/backend/app/api/public.py @@ -0,0 +1,151 @@ +""" +AUGUR Public API +Endpoints públicos para compartilhar relatórios via token. +Não requer autenticação. +""" + +import uuid +import json +import os +import logging +from flask import Blueprint, jsonify, request +from ..services.report_agent import ReportManager +from ..config import Config + +logger = logging.getLogger(__name__) + +public_bp = Blueprint('public', __name__) + + +def _get_report_by_token(token: str): + """Busca relatório pelo public_token.""" + reports_dir = os.path.join(Config.UPLOAD_FOLDER, 'reports') + if not os.path.exists(reports_dir): + return None + + for report_id in os.listdir(reports_dir): + meta_path = os.path.join(reports_dir, report_id, 'meta.json') + if not os.path.exists(meta_path): + # Formato antigo + meta_path = os.path.join(reports_dir, f"{report_id}.json") + if not os.path.exists(meta_path): + continue + + try: + with open(meta_path, 'r', encoding='utf-8') as f: + data = json.load(f) + if data.get('public_token') == token: + return ReportManager.get_report(report_id) + except Exception: + continue + + return None + + +@public_bp.route('/report/', methods=['GET']) +def get_public_report(token: str): + """Obter relatório público via token.""" + report = _get_report_by_token(token) + if not report: + return jsonify({"success": False, "error": "Relatório não encontrado ou link expirado"}), 404 + + data = report.to_dict() + # Remover campos sensíveis + data.pop('graph_id', None) + + return jsonify({"success": True, "data": data}) + + +@public_bp.route('/report//chat', methods=['POST']) +def public_chat(token: str): + """Chat com ReportAgent via link público.""" + report = _get_report_by_token(token) + if not report: + return jsonify({"success": False, "error": "Relatório não encontrado"}), 404 + + body = request.get_json() or {} + message = body.get('message', '') + chat_history = body.get('chat_history', []) + + if not message: + return jsonify({"success": False, "error": "Mensagem obrigatória"}), 400 + + # Proxy para o chat existente + from ..api.report import _do_chat + try: + response = _do_chat(report.simulation_id, message, chat_history) + return jsonify({"success": True, "data": {"response": response}}) + except Exception as e: + return jsonify({"success": False, "error": str(e)}), 500 + + +@public_bp.route('/report//interview', methods=['POST']) +def public_interview(token: str): + """Entrevista com agente via link público.""" + report = _get_report_by_token(token) + if not report: + return jsonify({"success": False, "error": "Relatório não encontrado"}), 404 + + body = request.get_json() or {} + + # Proxy para interview existente + from ..api.simulation import _do_interview + try: + response = _do_interview( + report.simulation_id, + body.get('agent_id'), + body.get('prompt', ''), + body.get('platform', 'twitter') + ) + return jsonify({"success": True, "data": response}) + except Exception as e: + return jsonify({"success": False, "error": str(e)}), 500 + + +@public_bp.route('/report//analytics', methods=['GET']) +def public_analytics(token: str): + """Analytics da simulação via link público.""" + report = _get_report_by_token(token) + if not report: + return jsonify({"success": False, "error": "Relatório não encontrado"}), 404 + + from ..api.analytics import get_analytics + # Inject simulation_id into the request context + try: + from flask import g + return get_analytics(report.simulation_id) + except Exception as e: + return jsonify({"success": False, "error": str(e)}), 500 + + +# ═══ Utility: gerar token para relatório ═══ + +@public_bp.route('/report//share', methods=['POST']) +def generate_share_link(report_id: str): + """Gerar link público para um relatório.""" + report = ReportManager.get_report(report_id) + if not report: + return jsonify({"success": False, "error": "Relatório não encontrado"}), 404 + + # Verificar se já tem token + meta_path = os.path.join(Config.UPLOAD_FOLDER, 'reports', report_id, 'meta.json') + if not os.path.exists(meta_path): + meta_path = os.path.join(Config.UPLOAD_FOLDER, 'reports', f"{report_id}.json") + + with open(meta_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + if not data.get('public_token'): + data['public_token'] = str(uuid.uuid4())[:12] + with open(meta_path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + token = data['public_token'] + + return jsonify({ + "success": True, + "data": { + "token": token, + "url": f"/r/{token}" + } + }) diff --git a/backend/app/api/report.py b/backend/app/api/report.py index d7f2a4d03a..110141057f 100644 --- a/backend/app/api/report.py +++ b/backend/app/api/report.py @@ -1,10 +1,15 @@ """ -Report API路由 -提供模拟报告生成、获取、对话等接口 +Report API +SimulaçãoRelatórioGerar """ import os import traceback + +try: + from ..services.pdf_generator import PDFGenerator, HAS_FPDF +except ImportError: + HAS_FPDF = False import threading from flask import request, jsonify, send_file @@ -20,30 +25,29 @@ logger = get_logger('mirofish.api.report') -# ============== 报告生成接口 ============== +# ============== RelatórioGerar ============== @report_bp.route('/generate', methods=['POST']) def generate_report(): """ - 生成模拟分析报告(异步任务) + GerarSimulaçãoAnáliseRelatório - 这是一个耗时操作,接口会立即返回task_id, - 使用 GET /api/report/generate/status 查询进度 + task_id + GET /api/report/generate/status - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "force_regenerate": false // 可选,强制重新生成 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "force_regenerate": false // Gerar } - 返回: { "success": true, "data": { "simulation_id": "sim_xxxx", "task_id": "task_xxxx", "status": "generating", - "message": "报告生成任务已启动" + "message": "RelatórioGerar" } } """ @@ -59,7 +63,7 @@ def generate_report(): force_regenerate = data.get('force_regenerate', False) - # 获取模拟信息 + # Simulação manager = SimulationManager() state = manager.get_simulation(simulation_id) @@ -69,7 +73,7 @@ def generate_report(): "error": t('api.simulationNotFound', id=simulation_id) }), 404 - # 检查是否已有报告 + # Relatório if not force_regenerate: existing_report = ReportManager.get_report_by_simulation(simulation_id) if existing_report and existing_report.status == ReportStatus.COMPLETED: @@ -84,7 +88,6 @@ def generate_report(): } }) - # 获取项目信息 project = ProjectManager.get_project(state.project_id) if not project: return jsonify({ @@ -106,11 +109,10 @@ def generate_report(): "error": t('api.missingSimRequirement') }), 400 - # 提前生成 report_id,以便立即返回给前端 + # Gerar report_id import uuid report_id = f"report_{uuid.uuid4().hex[:12]}" - # 创建异步任务 task_manager = TaskManager() task_id = task_manager.create_task( task_type="report_generate", @@ -124,7 +126,6 @@ def generate_report(): # Capture locale before spawning background thread current_locale = get_locale() - # 定义后台任务 def run_generate(): set_locale(current_locale) try: @@ -135,14 +136,14 @@ def run_generate(): message=t('api.initReportAgent') ) - # 创建Report Agent + # Report Agent agent = ReportAgent( graph_id=graph_id, simulation_id=simulation_id, simulation_requirement=simulation_requirement ) - # 进度回调 + # Callback de progresso def progress_callback(stage, progress, message): task_manager.update_task( task_id, @@ -150,13 +151,13 @@ def progress_callback(stage, progress, message): message=f"[{stage}] {message}" ) - # 生成报告(传入预先生成的 report_id) + # GerarRelatórioGerar report_id report = agent.generate_report( progress_callback=progress_callback, report_id=report_id ) - # 保存报告 + # Relatório ReportManager.save_report(report) if report.status == ReportStatus.COMPLETED: @@ -172,10 +173,9 @@ def progress_callback(stage, progress, message): task_manager.fail_task(task_id, report.error or t('api.reportGenerateFailed')) except Exception as e: - logger.error(f"报告生成失败: {str(e)}") + logger.error(f"RelatórioGerarFalhou: {str(e)}") task_manager.fail_task(task_id, str(e)) - # 启动后台线程 thread = threading.Thread(target=run_generate, daemon=True) thread.start() @@ -192,7 +192,7 @@ def progress_callback(stage, progress, message): }) except Exception as e: - logger.error(f"启动报告生成任务失败: {str(e)}") + logger.error(f"RelatórioGerarFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -203,15 +203,14 @@ def progress_callback(stage, progress, message): @report_bp.route('/generate/status', methods=['POST']) def get_generate_status(): """ - 查询报告生成任务进度 + RelatórioGerar - 请求(JSON): + JSON { - "task_id": "task_xxxx", // 可选,generate返回的task_id - "simulation_id": "sim_xxxx" // 可选,模拟ID + "task_id": "task_xxxx", // generatetask_id + "simulation_id": "sim_xxxx" // ID da simulação } - 返回: { "success": true, "data": { @@ -228,7 +227,7 @@ def get_generate_status(): task_id = data.get('task_id') simulation_id = data.get('simulation_id') - # 如果提供了simulation_id,先检查是否已有完成的报告 + # simulation_idRelatório if simulation_id: existing_report = ReportManager.get_report_by_simulation(simulation_id) if existing_report and existing_report.status == ReportStatus.COMPLETED: @@ -265,21 +264,20 @@ def get_generate_status(): }) except Exception as e: - logger.error(f"查询任务状态失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e) }), 500 -# ============== 报告获取接口 ============== +# ============== Relatório ============== @report_bp.route('/', methods=['GET']) def get_report(report_id: str): """ - 获取报告详情 + Obter relatório - 返回: { "success": true, "data": { @@ -302,13 +300,36 @@ def get_report(report_id: str): "error": t('api.reportNotFound', id=report_id) }), 404 + report_dict = report.to_dict() + + # Adicionar project_id e project_name via simulation + if report_dict.get('simulation_id') and not report_dict.get('project_id'): + try: + from ..services.simulation_manager import SimulationManager + manager = SimulationManager() + sims = manager.list_simulations() + sim = next((s for s in sims if s.simulation_id == report_dict['simulation_id']), None) + if sim: + report_dict['project_id'] = sim.project_id + except Exception: + pass + + # Enriquecer com nome do projeto e cliente + if report_dict.get('project_id'): + try: + proj = ProjectManager.get_project(report_dict['project_id']) + if proj: + report_dict['project_name'] = proj.name + except Exception: + pass + return jsonify({ "success": True, - "data": report.to_dict() + "data": report_dict }) except Exception as e: - logger.error(f"获取报告失败: {str(e)}") + logger.error(f"Obter relatórioFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -319,9 +340,8 @@ def get_report(report_id: str): @report_bp.route('/by-simulation/', methods=['GET']) def get_report_by_simulation(simulation_id: str): """ - 根据模拟ID获取报告 + ID da simulaçãoObter relatório - 返回: { "success": true, "data": { @@ -347,7 +367,7 @@ def get_report_by_simulation(simulation_id: str): }) except Exception as e: - logger.error(f"获取报告失败: {str(e)}") + logger.error(f"Obter relatórioFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -358,13 +378,12 @@ def get_report_by_simulation(simulation_id: str): @report_bp.route('/list', methods=['GET']) def list_reports(): """ - 列出所有报告 + Relatório - Query参数: - simulation_id: 按模拟ID过滤(可选) - limit: 返回数量限制(默认50) + Query + simulation_id: ID da simulação + limit: 50 - 返回: { "success": true, "data": [...], @@ -387,7 +406,7 @@ def list_reports(): }) except Exception as e: - logger.error(f"列出报告失败: {str(e)}") + logger.error(f"Listar relatóriosFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -398,9 +417,11 @@ def list_reports(): @report_bp.route('//download', methods=['GET']) def download_report(report_id: str): """ - 下载报告(Markdown格式) - - 返回Markdown文件 + Gera e retorna PDF profissional do relatório. + Fallback para Markdown se fpdf2 não estiver instalado. + Query params: + - format: 'pdf' (default) ou 'md' + - client: nome do cliente para personalização """ try: report = ReportManager.get_report(report_id) @@ -411,29 +432,81 @@ def download_report(report_id: str): "error": t('api.reportNotFound', id=report_id) }), 404 + fmt = request.args.get('format', 'pdf').lower() + client_name = request.args.get('client', '') + + # Montar dados do relatório + report_data = { + "outline": { + "title": report.outline.title if report.outline else "Relatório", + "summary": report.outline.summary if report.outline else "", + "sections": [ + {"title": s.title, "content": s.content or "", "key": getattr(s, 'key', '')} + for s in (report.outline.sections if report.outline else []) + ] + }, + "completed_at": getattr(report, 'completed_at', None), + "created_at": getattr(report, 'created_at', None), + "structured": getattr(report, 'structured', None), + } + + # Enriquecer structured.meta com nome do projeto + if report_data.get("structured") and report.simulation_id: + try: + from ..services.simulation_manager import SimulationManager + manager = SimulationManager() + sims = manager.list_simulations() + sim = next((s for s in sims if s.simulation_id == report.simulation_id), None) + if sim and sim.project_id: + proj = ProjectManager.get_project(sim.project_id) + if proj and report_data["structured"].get("meta"): + report_data["structured"]["meta"]["projeto"] = proj.name + report_data["project_name"] = proj.name + except Exception: + pass + + # ═══ PDF ═══ + if fmt == 'pdf' and HAS_FPDF: + report_folder = ReportManager._ensure_report_folder(report_id) + pdf_path = os.path.join(report_folder, f"{report_id}.pdf") + + PDFGenerator.generate(report_data, pdf_path, client_name=client_name) + + return send_file( + pdf_path, + as_attachment=True, + download_name=f"AUGUR-{report_id[:8]}.pdf", + mimetype='application/pdf' + ) + + # ═══ Fallback: Markdown ═══ md_path = ReportManager._get_report_markdown_path(report_id) if not os.path.exists(md_path): - # 如果MD文件不存在,生成一个临时文件 import tempfile - with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: - f.write(report.markdown_content) + md_content = f"# {report_data['outline']['title']}\n\n" + md_content += f"{report_data['outline']['summary']}\n\n" + for s in report_data['outline']['sections']: + md_content += f"## {s['title']}\n\n{s['content']}\n\n" + + with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False, encoding='utf-8') as f: + f.write(md_content) temp_path = f.name return send_file( temp_path, as_attachment=True, - download_name=f"{report_id}.md" + download_name=f"AUGUR-{report_id[:8]}.md" ) return send_file( md_path, as_attachment=True, - download_name=f"{report_id}.md" + download_name=f"AUGUR-{report_id[:8]}.md" ) except Exception as e: - logger.error(f"下载报告失败: {str(e)}") + logger.error(f"Download falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -443,7 +516,7 @@ def download_report(report_id: str): @report_bp.route('/', methods=['DELETE']) def delete_report(report_id: str): - """删除报告""" + """Relatório""" try: success = ReportManager.delete_report(report_id) @@ -459,7 +532,7 @@ def delete_report(report_id: str): }) except Exception as e: - logger.error(f"删除报告失败: {str(e)}") + logger.error(f"RelatórioFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -467,32 +540,31 @@ def delete_report(report_id: str): }), 500 -# ============== Report Agent对话接口 ============== +# ============== Report Agent ============== @report_bp.route('/chat', methods=['POST']) def chat_with_report_agent(): """ - 与Report Agent对话 + Dialogar com o Report Agent - Report Agent可以在对话中自主调用检索工具来回答问题 + O Report Agent pode chamar ferramentas de busca autonomamente durante o diálogo - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "message": "请解释一下舆情走向", // 必填,用户消息 - "chat_history": [ // 可选,对话历史 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "message": "", // Obrigatório, mensagem do usuário + "chat_history": [ // Opcional, histórico do diálogo {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."} ] } - 返回: { "success": true, "data": { - "response": "Agent回复...", - "tool_calls": [调用的工具列表], - "sources": [信息来源] + "response": "Agent...", + "tool_calls": [Lista de ferramentas chamadas], + "sources": [Fontes de informação] } } """ @@ -515,7 +587,7 @@ def chat_with_report_agent(): "error": t('api.requireMessage') }), 400 - # 获取模拟和项目信息 + # Simulação manager = SimulationManager() state = manager.get_simulation(simulation_id) @@ -541,7 +613,7 @@ def chat_with_report_agent(): simulation_requirement = project.simulation_requirement or "" - # 创建Agent并进行对话 + # Agent agent = ReportAgent( graph_id=graph_id, simulation_id=simulation_id, @@ -556,7 +628,7 @@ def chat_with_report_agent(): }) except Exception as e: - logger.error(f"对话失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -564,22 +636,21 @@ def chat_with_report_agent(): }), 500 -# ============== 报告进度与分章节接口 ============== +# ============== RelatórioSeção ============== @report_bp.route('//progress', methods=['GET']) def get_report_progress(report_id: str): """ - 获取报告生成进度(实时) + Obter relatórioGerar - 返回: { "success": true, "data": { "status": "generating", "progress": 45, - "message": "正在生成章节: 关键发现", - "current_section": "关键发现", - "completed_sections": ["执行摘要", "模拟背景"], + "message": "GerarSeção: ", + "current_section": "", + "completed_sections": ["", "Simulação"], "updated_at": "2025-12-09T..." } } @@ -599,7 +670,7 @@ def get_report_progress(report_id: str): }) except Exception as e: - logger.error(f"获取报告进度失败: {str(e)}") + logger.error(f"Obter relatórioFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -610,11 +681,10 @@ def get_report_progress(report_id: str): @report_bp.route('//sections', methods=['GET']) def get_report_sections(report_id: str): """ - 获取已生成的章节列表(分章节输出) + GerarSeçãoSeção - 前端可以轮询此接口获取已生成的章节内容,无需等待整个报告完成 + GerarSeçãoConteúdoRelatório - 返回: { "success": true, "data": { @@ -623,7 +693,7 @@ def get_report_sections(report_id: str): { "filename": "section_01.md", "section_index": 1, - "content": "## 执行摘要\\n\\n..." + "content": "## \\n\\n..." }, ... ], @@ -635,7 +705,7 @@ def get_report_sections(report_id: str): try: sections = ReportManager.get_generated_sections(report_id) - # 获取报告状态 + # Obter relatório report = ReportManager.get_report(report_id) is_complete = report is not None and report.status == ReportStatus.COMPLETED @@ -650,7 +720,7 @@ def get_report_sections(report_id: str): }) except Exception as e: - logger.error(f"获取章节列表失败: {str(e)}") + logger.error(f"SeçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -661,14 +731,13 @@ def get_report_sections(report_id: str): @report_bp.route('//section/', methods=['GET']) def get_single_section(report_id: str, section_index: int): """ - 获取单个章节内容 + SeçãoConteúdo - 返回: { "success": true, "data": { "filename": "section_01.md", - "content": "## 执行摘要\\n\\n..." + "content": "## \\n\\n..." } } """ @@ -694,7 +763,7 @@ def get_single_section(report_id: str, section_index: int): }) except Exception as e: - logger.error(f"获取章节内容失败: {str(e)}") + logger.error(f"SeçãoConteúdoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -702,16 +771,15 @@ def get_single_section(report_id: str, section_index: int): }), 500 -# ============== 报告状态检查接口 ============== +# ============== Status do relatório ============== @report_bp.route('/check/', methods=['GET']) def check_report_status(simulation_id: str): """ - 检查模拟是否有报告,以及报告状态 + SimulaçãoRelatórioStatus do relatório - 用于前端判断是否解锁Interview功能 + Interview - 返回: { "success": true, "data": { @@ -730,7 +798,7 @@ def check_report_status(simulation_id: str): report_status = report.status.value if report else None report_id = report.report_id if report else None - # 只有报告完成后才解锁interview + # Relatóriointerview interview_unlocked = has_report and report.status == ReportStatus.COMPLETED return jsonify({ @@ -745,7 +813,7 @@ def check_report_status(simulation_id: str): }) except Exception as e: - logger.error(f"检查报告状态失败: {str(e)}") + logger.error(f"Status do relatórioFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -753,22 +821,21 @@ def check_report_status(simulation_id: str): }), 500 -# ============== Agent 日志接口 ============== +# ============== Agent ============== @report_bp.route('//agent-log', methods=['GET']) def get_agent_log(report_id: str): """ - 获取 Report Agent 的详细执行日志 + Report Agent - 实时获取报告生成过程中的每一步动作,包括: - - 报告开始、规划开始/完成 - - 每个章节的开始、工具调用、LLM响应、完成 - - 报告完成或失败 + Obter relatórioGerar + - Relatório/ + - SeçãoFerramentaLLM + - RelatórioFalhou - Query参数: - from_line: 从第几行开始读取(可选,默认0,用于增量获取) + Query + from_line: 0 - 返回: { "success": true, "data": { @@ -779,7 +846,7 @@ def get_agent_log(report_id: str): "report_id": "report_xxxx", "action": "tool_call", "stage": "generating", - "section_title": "执行摘要", + "section_title": "", "section_index": 1, "details": { "tool_name": "insight_forge", @@ -806,7 +873,7 @@ def get_agent_log(report_id: str): }) except Exception as e: - logger.error(f"获取Agent日志失败: {str(e)}") + logger.error(f"AgentFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -817,9 +884,8 @@ def get_agent_log(report_id: str): @report_bp.route('//agent-log/stream', methods=['GET']) def stream_agent_log(report_id: str): """ - 获取完整的 Agent 日志(一次性获取全部) + Agent - 返回: { "success": true, "data": { @@ -840,7 +906,7 @@ def stream_agent_log(report_id: str): }) except Exception as e: - logger.error(f"获取Agent日志失败: {str(e)}") + logger.error(f"AgentFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -848,27 +914,25 @@ def stream_agent_log(report_id: str): }), 500 -# ============== 控制台日志接口 ============== +# ============== ============== @report_bp.route('//console-log', methods=['GET']) def get_console_log(report_id: str): """ - 获取 Report Agent 的控制台输出日志 + Report Agent - 实时获取报告生成过程中的控制台输出(INFO、WARNING等), - 这与 agent-log 接口返回的结构化 JSON 日志不同, - 是纯文本格式的控制台风格日志。 + Obter relatórioGerarINFOWARNING + agent-log JSON - Query参数: - from_line: 从第几行开始读取(可选,默认0,用于增量获取) + Query + from_line: 0 - 返回: { "success": true, "data": { "logs": [ - "[19:46:14] INFO: 搜索完成: 找到 15 条相关事实", - "[19:46:14] INFO: 图谱搜索: graph_id=xxx, query=...", + "[19:46:14] INFO: : 15 ", + "[19:46:14] INFO: Grafo: graph_id=xxx, query=...", ... ], "total_lines": 100, @@ -888,7 +952,7 @@ def get_console_log(report_id: str): }) except Exception as e: - logger.error(f"获取控制台日志失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -899,9 +963,7 @@ def get_console_log(report_id: str): @report_bp.route('//console-log/stream', methods=['GET']) def stream_console_log(report_id: str): """ - 获取完整的控制台日志(一次性获取全部) - 返回: { "success": true, "data": { @@ -922,7 +984,7 @@ def stream_console_log(report_id: str): }) except Exception as e: - logger.error(f"获取控制台日志失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -930,17 +992,17 @@ def stream_console_log(report_id: str): }), 500 -# ============== 工具调用接口(供调试使用)============== +# ============== Ferramenta============== @report_bp.route('/tools/search', methods=['POST']) def search_graph_tool(): """ - 图谱搜索工具接口(供调试使用) + GrafoFerramenta - 请求(JSON): + JSON { "graph_id": "mirofish_xxxx", - "query": "搜索查询", + "query": "", "limit": 10 } """ @@ -972,7 +1034,7 @@ def search_graph_tool(): }) except Exception as e: - logger.error(f"图谱搜索失败: {str(e)}") + logger.error(f"GrafoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -983,9 +1045,9 @@ def search_graph_tool(): @report_bp.route('/tools/statistics', methods=['POST']) def get_graph_statistics_tool(): """ - 图谱统计工具接口(供调试使用) + GrafoFerramenta - 请求(JSON): + JSON { "graph_id": "mirofish_xxxx" } @@ -1012,7 +1074,7 @@ def get_graph_statistics_tool(): }) except Exception as e: - logger.error(f"获取图谱统计失败: {str(e)}") + logger.error(f"GrafoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), diff --git a/backend/app/api/share.py b/backend/app/api/share.py new file mode 100644 index 0000000000..47df6e08ed --- /dev/null +++ b/backend/app/api/share.py @@ -0,0 +1,130 @@ +""" +AUGUR — Link público para clientes +Gera código de acesso e serve relatório público sem login. +""" +import os +import json +import uuid +import hashlib +import logging +from datetime import datetime +from flask import Blueprint, jsonify, request + +logger = logging.getLogger(__name__) + +share_bp = Blueprint('share', __name__, url_prefix='/api/share') + +SHARE_DIR = os.environ.get('SHARE_DATA_DIR', '/app/data/shares') + + +def _ensure_dir(): + os.makedirs(SHARE_DIR, exist_ok=True) + + +def _share_path(code): + return os.path.join(SHARE_DIR, f"{code}.json") + + +@share_bp.route('/create', methods=['POST']) +def create_share(): + """ + Gerar link público para um relatório. + Body: { "report_id": "xxx", "client_name": "Empresa ABC" } + Returns: { "code": "abc123", "url": "/relatorio-publico/abc123" } + """ + _ensure_dir() + data = request.get_json() or {} + report_id = data.get('report_id', '') + client_name = data.get('client_name', '') + + if not report_id: + return jsonify({"success": False, "error": "report_id obrigatório"}), 400 + + # Gerar código curto (6 chars) + raw = f"{report_id}-{uuid.uuid4().hex[:8]}" + code = hashlib.sha256(raw.encode()).hexdigest()[:8].upper() + + share_data = { + "code": code, + "report_id": report_id, + "client_name": client_name, + "created_at": datetime.utcnow().isoformat(), + "views": 0, + "active": True + } + + with open(_share_path(code), 'w') as f: + json.dump(share_data, f, indent=2) + + logger.info(f"Share created: {code} for report {report_id}") + + return jsonify({ + "success": True, + "data": { + "code": code, + "url": f"/relatorio-publico/{code}", + "report_id": report_id, + "client_name": client_name + } + }) + + +@share_bp.route('/', methods=['GET']) +def get_shared_report(code): + """ + Acessar relatório público pelo código. + Retorna os mesmos dados de /api/report/:id mas sem autenticação. + """ + _ensure_dir() + spath = _share_path(code.upper()) + + if not os.path.exists(spath): + return jsonify({"success": False, "error": "Código inválido ou expirado"}), 404 + + with open(spath) as f: + share_data = json.load(f) + + if not share_data.get('active', True): + return jsonify({"success": False, "error": "Link desativado"}), 403 + + # Incrementar views + share_data['views'] = share_data.get('views', 0) + 1 + share_data['last_viewed'] = datetime.utcnow().isoformat() + with open(spath, 'w') as f: + json.dump(share_data, f, indent=2) + + # Carregar relatório real + report_id = share_data.get('report_id', '') + try: + from ..services.report_agent import ReportManager + report = ReportManager.get_report(report_id) + if not report: + return jsonify({"success": False, "error": "Relatório não encontrado"}), 404 + + report_dict = report.to_dict() + report_dict['client_name'] = share_data.get('client_name', '') + report_dict['share_code'] = code + report_dict['share_views'] = share_data['views'] + + return jsonify({"success": True, "data": report_dict}) + + except Exception as e: + logger.error(f"Share access error: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@share_bp.route('//deactivate', methods=['POST']) +def deactivate_share(code): + """Desativar link público.""" + _ensure_dir() + spath = _share_path(code.upper()) + if not os.path.exists(spath): + return jsonify({"success": False, "error": "Não encontrado"}), 404 + + with open(spath) as f: + share_data = json.load(f) + share_data['active'] = False + with open(spath, 'w') as f: + json.dump(share_data, f, indent=2) + + return jsonify({"success": True, "message": "Link desativado"}) diff --git a/backend/app/api/simulation.py b/backend/app/api/simulation.py index 3a8e1e3fc8..fac2aa8b69 100644 --- a/backend/app/api/simulation.py +++ b/backend/app/api/simulation.py @@ -1,9 +1,11 @@ """ -模拟相关API路由 -Step2: Zep实体读取与过滤、OASIS模拟准备与运行(全程自动化) +SimulaçãoAPI +Step2: ZepEntidadeOASISSimulação """ import os +import json +import time import traceback from flask import request, jsonify, send_file @@ -20,41 +22,39 @@ logger = get_logger('mirofish.api.simulation') -# Interview prompt 优化前缀 -# 添加此前缀可以避免Agent调用工具,直接用文本回复 -INTERVIEW_PROMPT_PREFIX = "结合你的人设、所有的过往记忆与行动,不调用任何工具直接用文本回复我:" +# Interview prompt +# AgentFerramenta +INTERVIEW_PROMPT_PREFIX = "Ferramenta" def optimize_interview_prompt(prompt: str) -> str: """ - 优化Interview提问,添加前缀避免Agent调用工具 + InterviewAgentFerramenta Args: - prompt: 原始提问 + prompt: Returns: - 优化后的提问 """ if not prompt: return prompt - # 避免重复添加前缀 if prompt.startswith(INTERVIEW_PROMPT_PREFIX): return prompt return f"{INTERVIEW_PROMPT_PREFIX}{prompt}" -# ============== 实体读取接口 ============== +# ============== Entidade ============== @simulation_bp.route('/entities/', methods=['GET']) def get_graph_entities(graph_id: str): """ - 获取图谱中的所有实体(已过滤) + GrafoEntidade - 只返回符合预定义实体类型的节点(Labels不只是Entity的节点) + EntidadeLabelsEntity - Query参数: - entity_types: 逗号分隔的实体类型列表(可选,用于进一步过滤) - enrich: 是否获取相关边信息(默认true) + Query + entity_types: Entidade + enrich: true """ try: if not Config.ZEP_API_KEY: @@ -67,7 +67,7 @@ def get_graph_entities(graph_id: str): entity_types = [t.strip() for t in entity_types_str.split(',') if t.strip()] if entity_types_str else None enrich = request.args.get('enrich', 'true').lower() == 'true' - logger.info(f"获取图谱实体: graph_id={graph_id}, entity_types={entity_types}, enrich={enrich}") + logger.info(f"GrafoEntidade: graph_id={graph_id}, entity_types={entity_types}, enrich={enrich}") reader = ZepEntityReader() result = reader.filter_defined_entities( @@ -82,7 +82,7 @@ def get_graph_entities(graph_id: str): }) except Exception as e: - logger.error(f"获取图谱实体失败: {str(e)}") + logger.error(f"GrafoEntidadeFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -92,7 +92,7 @@ def get_graph_entities(graph_id: str): @simulation_bp.route('/entities//', methods=['GET']) def get_entity_detail(graph_id: str, entity_uuid: str): - """获取单个实体的详细信息""" + """Entidade""" try: if not Config.ZEP_API_KEY: return jsonify({ @@ -115,7 +115,7 @@ def get_entity_detail(graph_id: str, entity_uuid: str): }) except Exception as e: - logger.error(f"获取实体详情失败: {str(e)}") + logger.error(f"EntidadeFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -125,7 +125,7 @@ def get_entity_detail(graph_id: str, entity_uuid: str): @simulation_bp.route('/entities//by-type/', methods=['GET']) def get_entities_by_type(graph_id: str, entity_type: str): - """获取指定类型的所有实体""" + """Entidade""" try: if not Config.ZEP_API_KEY: return jsonify({ @@ -152,7 +152,7 @@ def get_entities_by_type(graph_id: str, entity_type: str): }) except Exception as e: - logger.error(f"获取实体失败: {str(e)}") + logger.error(f"EntidadeFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -160,24 +160,23 @@ def get_entities_by_type(graph_id: str, entity_type: str): }), 500 -# ============== 模拟管理接口 ============== +# ============== Simulação ============== @simulation_bp.route('/create', methods=['POST']) def create_simulation(): """ - 创建新的模拟 + Simulação - 注意:max_rounds等参数由LLM智能生成,无需手动设置 + max_roundsLLMGerar - 请求(JSON): + JSON { - "project_id": "proj_xxxx", // 必填 - "graph_id": "mirofish_xxxx", // 可选,如不提供则从project获取 - "enable_twitter": true, // 可选,默认true - "enable_reddit": true // 可选,默认true + "project_id": "proj_xxxx", // + "graph_id": "mirofish_xxxx", // project + "enable_twitter": true, // true + "enable_reddit": true // true } - 返回: { "success": true, "data": { @@ -229,7 +228,7 @@ def create_simulation(): }) except Exception as e: - logger.error(f"创建模拟失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -239,16 +238,15 @@ def create_simulation(): def _check_simulation_prepared(simulation_id: str) -> tuple: """ - 检查模拟是否已经准备完成 + Simulação - 检查条件: - 1. state.json 存在且 status 为 "ready" - 2. 必要文件存在:reddit_profiles.json, twitter_profiles.csv, simulation_config.json + 1. state.json status "ready" + 2. reddit_profiles.json, twitter_profiles.csv, simulation_config.json - 注意:运行脚本(run_*.py)保留在 backend/scripts/ 目录,不再复制到模拟目录 + (run_*.py) backend/scripts/ Simulação Args: - simulation_id: 模拟ID + simulation_id: ID da simulação Returns: (is_prepared: bool, info: dict) @@ -258,11 +256,10 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: simulation_dir = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id) - # 检查目录是否存在 if not os.path.exists(simulation_dir): - return False, {"reason": "模拟目录不存在"} + return False, {"reason": "Simulação"} - # 必要文件列表(不包括脚本,脚本位于 backend/scripts/) + # backend/scripts/ required_files = [ "state.json", "simulation_config.json", @@ -270,7 +267,6 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: "twitter_profiles.csv" ] - # 检查文件是否存在 existing_files = [] missing_files = [] for f in required_files: @@ -282,12 +278,12 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: if missing_files: return False, { - "reason": "缺少必要文件", + "reason": "", "missing_files": missing_files, "existing_files": existing_files } - # 检查state.json中的状态 + # state.json state_file = os.path.join(simulation_dir, "state.json") try: import json @@ -297,20 +293,18 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: status = state_data.get("status", "") config_generated = state_data.get("config_generated", False) - # 详细日志 - logger.debug(f"检测模拟准备状态: {simulation_id}, status={status}, config_generated={config_generated}") - - # 如果 config_generated=True 且文件存在,认为准备完成 - # 以下状态都说明准备工作已完成: - # - ready: 准备完成,可以运行 - # - preparing: 如果 config_generated=True 说明已完成 - # - running: 正在运行,说明准备早就完成了 - # - completed: 运行完成,说明准备早就完成了 - # - stopped: 已停止,说明准备早就完成了 - # - failed: 运行失败(但准备是完成的) + logger.debug(f"Simulação: {simulation_id}, status={status}, config_generated={config_generated}") + + # config_generated=True + # Concluído + # - ready: + # - preparing: config_generated=True Concluído + # - running: + # - completed: + # - stopped: + # - failed: Falhou prepared_statuses = ["ready", "preparing", "running", "completed", "stopped", "failed"] if status in prepared_statuses and config_generated: - # 获取文件统计信息 profiles_file = os.path.join(simulation_dir, "reddit_profiles.json") config_file = os.path.join(simulation_dir, "simulation_config.json") @@ -320,7 +314,7 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: profiles_data = json.load(f) profiles_count = len(profiles_data) if isinstance(profiles_data, list) else 0 - # 如果状态是preparing但文件已完成,自动更新状态为ready + # preparingConcluídoready if status == "preparing": try: state_data["status"] = "ready" @@ -328,12 +322,12 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: state_data["updated_at"] = datetime.now().isoformat() with open(state_file, 'w', encoding='utf-8') as f: json.dump(state_data, f, ensure_ascii=False, indent=2) - logger.info(f"自动更新模拟状态: {simulation_id} preparing -> ready") + logger.info(f"Simulação: {simulation_id} preparing -> ready") status = "ready" except Exception as e: - logger.warning(f"自动更新状态失败: {e}") + logger.warning(f"Falhou: {e}") - logger.info(f"模拟 {simulation_id} 检测结果: 已准备完成 (status={status}, config_generated={config_generated})") + logger.info(f"Simulação {simulation_id} Resultado: (status={status}, config_generated={config_generated})") return True, { "status": status, "entities_count": state_data.get("entities_count", 0), @@ -345,55 +339,52 @@ def _check_simulation_prepared(simulation_id: str) -> tuple: "existing_files": existing_files } else: - logger.warning(f"模拟 {simulation_id} 检测结果: 未准备完成 (status={status}, config_generated={config_generated})") + logger.warning(f"Simulação {simulation_id} Resultado: (status={status}, config_generated={config_generated})") return False, { - "reason": f"状态不在已准备列表中或config_generated为false: status={status}, config_generated={config_generated}", + "reason": f"config_generatedfalse: status={status}, config_generated={config_generated}", "status": status, "config_generated": config_generated } except Exception as e: - return False, {"reason": f"读取状态文件失败: {str(e)}"} + return False, {"reason": f"Falhou: {str(e)}"} @simulation_bp.route('/prepare', methods=['POST']) def prepare_simulation(): """ - 准备模拟环境(异步任务,LLM智能生成所有参数) + SimulaçãoLLMGerar - 这是一个耗时操作,接口会立即返回task_id, - 使用 GET /api/simulation/prepare/status 查询进度 + task_id + GET /api/simulation/prepare/status - 特性: - - 自动检测已完成的准备工作,避免重复生成 - - 如果已准备完成,直接返回已有结果 - - 支持强制重新生成(force_regenerate=true) + - ConcluídoGerar + - Resultado + - Gerarforce_regenerate=true - 步骤: - 1. 检查是否已有完成的准备工作 - 2. 从Zep图谱读取并过滤实体 - 3. 为每个实体生成OASIS Agent Profile(带重试机制) - 4. LLM智能生成模拟配置(带重试机制) - 5. 保存配置文件和预设脚本 + 1. + 2. ZepGrafoEntidade + 3. EntidadeGerarOASIS Agent Profile + 4. LLMGerarConfiguração da simulação + 5. Configuração - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "entity_types": ["Student", "PublicFigure"], // 可选,指定实体类型 - "use_llm_for_profiles": true, // 可选,是否用LLM生成人设 - "parallel_profile_count": 5, // 可选,并行生成人设数量,默认5 - "force_regenerate": false // 可选,强制重新生成,默认false + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "entity_types": ["Student", "PublicFigure"], // Entidade + "use_llm_for_profiles": true, // LLMGerar + "parallel_profile_count": 5, // Gerar5 + "force_regenerate": false // Gerarfalse } - 返回: { "success": true, "data": { "simulation_id": "sim_xxxx", - "task_id": "task_xxxx", // 新任务时返回 + "task_id": "task_xxxx", // "status": "preparing|ready", - "message": "准备任务已启动|已有完成的准备工作", - "already_prepared": true|false // 是否已准备完成 + "message": "|", + "already_prepared": true|false // } } """ @@ -421,17 +412,17 @@ def prepare_simulation(): "error": t('api.simulationNotFound', id=simulation_id) }), 404 - # 检查是否强制重新生成 + # Gerar force_regenerate = data.get('force_regenerate', False) - logger.info(f"开始处理 /prepare 请求: simulation_id={simulation_id}, force_regenerate={force_regenerate}") + logger.info(f" /prepare : simulation_id={simulation_id}, force_regenerate={force_regenerate}") - # 检查是否已经准备完成(避免重复生成) + # Gerar if not force_regenerate: - logger.debug(f"检查模拟 {simulation_id} 是否已准备完成...") + logger.debug(f"Simulação {simulation_id} ...") is_prepared, prepare_info = _check_simulation_prepared(simulation_id) - logger.debug(f"检查结果: is_prepared={is_prepared}, prepare_info={prepare_info}") + logger.debug(f"Resultado: is_prepared={is_prepared}, prepare_info={prepare_info}") if is_prepared: - logger.info(f"模拟 {simulation_id} 已准备完成,跳过重复生成") + logger.info(f"Simulação {simulation_id} Gerar") return jsonify({ "success": True, "data": { @@ -443,9 +434,8 @@ def prepare_simulation(): } }) else: - logger.info(f"模拟 {simulation_id} 未准备完成,将启动准备任务") + logger.info(f"Simulação {simulation_id} ") - # 从项目获取必要信息 project = ProjectManager.get_project(state.project_id) if not project: return jsonify({ @@ -453,7 +443,7 @@ def prepare_simulation(): "error": t('api.projectNotFound', id=state.project_id) }), 404 - # 获取模拟需求 + # Simulação simulation_requirement = project.simulation_requirement or "" if not simulation_requirement: return jsonify({ @@ -461,33 +451,31 @@ def prepare_simulation(): "error": t('api.projectMissingRequirement') }), 400 - # 获取文档文本 document_text = ProjectManager.get_extracted_text(state.project_id) or "" entity_types_list = data.get('entity_types') use_llm_for_profiles = data.get('use_llm_for_profiles', True) parallel_profile_count = data.get('parallel_profile_count', 5) - # ========== 同步获取实体数量(在后台任务启动前) ========== - # 这样前端在调用prepare后立即就能获取到预期Agent总数 + # ========== Entidade ========== + # prepareAgent try: - logger.info(f"同步获取实体数量: graph_id={state.graph_id}") + logger.info(f"Entidade: graph_id={state.graph_id}") reader = ZepEntityReader() - # 快速读取实体(不需要边信息,只统计数量) + # Entidade filtered_preview = reader.filter_defined_entities( graph_id=state.graph_id, defined_entity_types=entity_types_list, - enrich_with_edges=False # 不获取边信息,加快速度 + enrich_with_edges=False # ) - # 保存实体数量到状态(供前端立即获取) + # Entidade state.entities_count = filtered_preview.filtered_count state.entity_types = list(filtered_preview.entity_types) - logger.info(f"预期实体数量: {filtered_preview.filtered_count}, 类型: {filtered_preview.entity_types}") + logger.info(f"Entidade: {filtered_preview.filtered_count}, : {filtered_preview.entity_types}") except Exception as e: - logger.warning(f"同步获取实体数量失败(将在后台任务中重试): {e}") - # 失败不影响后续流程,后台任务会重新获取 + logger.warning(f"EntidadeFalhou: {e}") + # Falhou - # 创建异步任务 task_manager = TaskManager() task_id = task_manager.create_task( task_type="simulation_prepare", @@ -497,14 +485,13 @@ def prepare_simulation(): } ) - # 更新模拟状态(包含预先获取的实体数量) + # SimulaçãoEntidade state.status = SimulationStatus.PREPARING manager._save_simulation_state(state) # Capture locale before spawning background thread current_locale = get_locale() - # 定义后台任务 def run_prepare(): set_locale(current_locale) try: @@ -515,12 +502,10 @@ def run_prepare(): message=t('progress.startPreparingEnv') ) - # 准备模拟(带进度回调) - # 存储阶段进度详情 + # SimulaçãoCallback de progresso stage_details = {} def progress_callback(stage, progress, message, **kwargs): - # 计算总进度 stage_weights = { "reading": (0, 20), # 0-20% "generating_profiles": (20, 70), # 20-70% @@ -531,7 +516,6 @@ def progress_callback(stage, progress, message, **kwargs): start, end = stage_weights.get(stage, (0, 100)) current_progress = int(start + (end - start) * progress / 100) - # 构建详细进度信息 stage_names = { "reading": t('progress.readingGraphEntities'), "generating_profiles": t('progress.generatingProfiles'), @@ -542,7 +526,6 @@ def progress_callback(stage, progress, message, **kwargs): stage_index = list(stage_weights.keys()).index(stage) + 1 if stage in stage_weights else 1 total_stages = len(stage_weights) - # 更新阶段详情 stage_details[stage] = { "stage_name": stage_names.get(stage, stage), "stage_progress": progress, @@ -551,7 +534,6 @@ def progress_callback(stage, progress, message, **kwargs): "item_name": kwargs.get("item_name", "") } - # 构建详细进度信息 detail = stage_details[stage] progress_detail_data = { "current_stage": stage, @@ -564,7 +546,6 @@ def progress_callback(stage, progress, message, **kwargs): "item_description": message } - # 构建简洁消息 if detail["total"] > 0: detailed_message = ( f"[{stage_index}/{total_stages}] {stage_names.get(stage, stage)}: " @@ -590,24 +571,22 @@ def progress_callback(stage, progress, message, **kwargs): parallel_profile_count=parallel_profile_count ) - # 任务完成 task_manager.complete_task( task_id, result=result_state.to_simple_dict() ) except Exception as e: - logger.error(f"准备模拟失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") task_manager.fail_task(task_id, str(e)) - # 更新模拟状态为失败 + # SimulaçãoFalhou state = manager.get_simulation(simulation_id) if state: state.status = SimulationStatus.FAILED state.error = str(e) manager._save_simulation_state(state) - # 启动后台线程 thread = threading.Thread(target=run_prepare, daemon=True) thread.start() @@ -619,8 +598,8 @@ def progress_callback(stage, progress, message, **kwargs): "status": "preparing", "message": t('api.prepareStarted'), "already_prepared": False, - "expected_entities_count": state.entities_count, # 预期的Agent总数 - "entity_types": state.entity_types # 实体类型列表 + "expected_entities_count": state.entities_count, # Agent + "entity_types": state.entity_types # Entidade } }) @@ -631,7 +610,7 @@ def progress_callback(stage, progress, message, **kwargs): }), 404 except Exception as e: - logger.error(f"启动准备任务失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -642,19 +621,16 @@ def progress_callback(stage, progress, message, **kwargs): @simulation_bp.route('/prepare/status', methods=['POST']) def get_prepare_status(): """ - 查询准备任务进度 - 支持两种查询方式: - 1. 通过task_id查询正在进行的任务进度 - 2. 通过simulation_id检查是否已有完成的准备工作 + 1. task_id + 2. simulation_id - 请求(JSON): + JSON { - "task_id": "task_xxxx", // 可选,prepare返回的task_id - "simulation_id": "sim_xxxx" // 可选,模拟ID(用于检查已完成的准备) + "task_id": "task_xxxx", // preparetask_id + "simulation_id": "sim_xxxx" // ID da simulaçãoConcluído } - 返回: { "success": true, "data": { @@ -662,8 +638,8 @@ def get_prepare_status(): "status": "processing|completed|ready", "progress": 45, "message": "...", - "already_prepared": true|false, // 是否已有完成的准备 - "prepare_info": {...} // 已准备完成时的详细信息 + "already_prepared": true|false, // + "prepare_info": {...} // } } """ @@ -675,7 +651,7 @@ def get_prepare_status(): task_id = data.get('task_id') simulation_id = data.get('simulation_id') - # 如果提供了simulation_id,先检查是否已准备完成 + # simulation_id if simulation_id: is_prepared, prepare_info = _check_simulation_prepared(simulation_id) if is_prepared: @@ -691,10 +667,10 @@ def get_prepare_status(): } }) - # 如果没有task_id,返回错误 + # task_id if not task_id: if simulation_id: - # 有simulation_id但未准备完成 + # simulation_id return jsonify({ "success": True, "data": { @@ -714,7 +690,7 @@ def get_prepare_status(): task = task_manager.get_task(task_id) if not task: - # 任务不存在,但如果有simulation_id,检查是否已准备完成 + # simulation_id if simulation_id: is_prepared, prepare_info = _check_simulation_prepared(simulation_id) if is_prepared: @@ -745,7 +721,7 @@ def get_prepare_status(): }) except Exception as e: - logger.error(f"查询任务状态失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e) @@ -754,7 +730,7 @@ def get_prepare_status(): @simulation_bp.route('/', methods=['GET']) def get_simulation(simulation_id: str): - """获取模拟状态""" + """Simulação""" try: manager = SimulationManager() state = manager.get_simulation(simulation_id) @@ -767,7 +743,7 @@ def get_simulation(simulation_id: str): result = state.to_dict() - # 如果模拟已准备好,附加运行说明 + # Simulação if state.status == SimulationStatus.READY: result["run_instructions"] = manager.get_run_instructions(simulation_id) @@ -777,7 +753,7 @@ def get_simulation(simulation_id: str): }) except Exception as e: - logger.error(f"获取模拟状态失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -788,10 +764,10 @@ def get_simulation(simulation_id: str): @simulation_bp.route('/list', methods=['GET']) def list_simulations(): """ - 列出所有模拟 + Simulaçãoenriquecido com runner_status, current_round, total_rounds e report_id) - Query参数: - project_id: 按项目ID过滤(可选) + Query + project_id: ID """ try: project_id = request.args.get('project_id') @@ -799,14 +775,49 @@ def list_simulations(): manager = SimulationManager() simulations = manager.list_simulations(project_id=project_id) + enriched = [] + for sim in simulations: + sim_dict = sim.to_dict() + + # Enriquecer com simulation_config (simulation_requirement, rodadas) + config = manager.get_simulation_config(sim.simulation_id) + if config: + sim_dict['simulation_requirement'] = config.get('simulation_requirement', '') + time_cfg = config.get('time_config', {}) + recommended_rounds = int( + time_cfg.get('total_simulation_hours', 0) * 60 / + max(time_cfg.get('minutes_per_round', 60), 1) + ) + else: + sim_dict.setdefault('simulation_requirement', '') + recommended_rounds = 0 + + # Enriquecer com run_state (runner_status, current_round, total_rounds) + run_state = SimulationRunner.get_run_state(sim.simulation_id) + if run_state: + sim_dict['runner_status'] = run_state.runner_status.value + sim_dict['current_round'] = run_state.current_round + sim_dict['total_rounds'] = run_state.total_rounds if run_state.total_rounds > 0 else recommended_rounds + sim_dict['progress_percent'] = run_state.progress_percent if hasattr(run_state, 'progress_percent') else 0 + else: + sim_dict.setdefault('runner_status', 'idle') + sim_dict.setdefault('current_round', 0) + sim_dict.setdefault('total_rounds', recommended_rounds) + sim_dict.setdefault('progress_percent', 0) + + # Enriquecer com report_id + sim_dict['report_id'] = _get_report_id_for_simulation(sim.simulation_id) + + enriched.append(sim_dict) + return jsonify({ "success": True, - "data": [s.to_dict() for s in simulations], - "count": len(simulations) + "data": enriched, + "count": len(enriched) }) except Exception as e: - logger.error(f"列出模拟失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -816,22 +827,22 @@ def list_simulations(): def _get_report_id_for_simulation(simulation_id: str) -> str: """ - 获取 simulation 对应的最新 report_id + simulation report_id - 遍历 reports 目录,找出 simulation_id 匹配的 report, - 如果有多个则返回最新的(按 created_at 排序) + reports simulation_id report + created_at Args: - simulation_id: 模拟ID + simulation_id: ID da simulação Returns: - report_id 或 None + report_id None """ import json from datetime import datetime - # reports 目录路径:backend/uploads/reports - # __file__ 是 app/api/simulation.py,需要向上两级到 backend/ + # reports backend/uploads/reports + # __file__ app/api/simulation.py backend/ reports_dir = os.path.join(os.path.dirname(__file__), '../../uploads/reports') if not os.path.exists(reports_dir): return None @@ -864,34 +875,32 @@ def _get_report_id_for_simulation(simulation_id: str) -> str: if not matching_reports: return None - # 按创建时间倒序排序,返回最新的 matching_reports.sort(key=lambda x: x.get("created_at", ""), reverse=True) return matching_reports[0].get("report_id") except Exception as e: - logger.warning(f"查找 simulation {simulation_id} 的 report 失败: {e}") + logger.warning(f" simulation {simulation_id} report Falhou: {e}") return None @simulation_bp.route('/history', methods=['GET']) def get_simulation_history(): """ - 获取历史模拟列表(带项目详情) + Simulação - 用于首页历史项目展示,返回包含项目名称、描述等丰富信息的模拟列表 + Simulação - Query参数: - limit: 返回数量限制(默认20) + Query + limit: 20 - 返回: { "success": true, "data": [ { "simulation_id": "sim_xxxx", "project_id": "proj_xxxx", - "project_name": "武大舆情分析", - "simulation_requirement": "如果武汉大学发布...", + "project_name": "Análise", + "simulation_requirement": "...", "status": "completed", "entities_count": 68, "profiles_count": 68, @@ -914,18 +923,17 @@ def get_simulation_history(): manager = SimulationManager() simulations = manager.list_simulations()[:limit] - # 增强模拟数据,只从 Simulation 文件读取 + # Simulação Simulation enriched_simulations = [] for sim in simulations: sim_dict = sim.to_dict() - # 获取模拟配置信息(从 simulation_config.json 读取 simulation_requirement) + # Configuração da simulação simulation_config.json simulation_requirement config = manager.get_simulation_config(sim.simulation_id) if config: sim_dict["simulation_requirement"] = config.get("simulation_requirement", "") time_config = config.get("time_config", {}) sim_dict["total_simulation_hours"] = time_config.get("total_simulation_hours", 0) - # 推荐轮数(后备值) recommended_rounds = int( time_config.get("total_simulation_hours", 0) * 60 / max(time_config.get("minutes_per_round", 60), 1) @@ -935,35 +943,32 @@ def get_simulation_history(): sim_dict["total_simulation_hours"] = 0 recommended_rounds = 0 - # 获取运行状态(从 run_state.json 读取用户设置的实际轮数) + # run_state.json run_state = SimulationRunner.get_run_state(sim.simulation_id) if run_state: sim_dict["current_round"] = run_state.current_round sim_dict["runner_status"] = run_state.runner_status.value - # 使用用户设置的 total_rounds,若无则使用推荐轮数 + # total_rounds sim_dict["total_rounds"] = run_state.total_rounds if run_state.total_rounds > 0 else recommended_rounds else: sim_dict["current_round"] = 0 sim_dict["runner_status"] = "idle" sim_dict["total_rounds"] = recommended_rounds - # 获取关联项目的文件列表(最多3个) project = ProjectManager.get_project(sim.project_id) if project and hasattr(project, 'files') and project.files: sim_dict["files"] = [ - {"filename": f.get("filename", "未知文件")} + {"filename": f.get("filename", "")} for f in project.files[:3] ] else: sim_dict["files"] = [] - # 获取关联的 report_id(查找该 simulation 最新的 report) + # report_id simulation report sim_dict["report_id"] = _get_report_id_for_simulation(sim.simulation_id) - # 添加版本号 sim_dict["version"] = "v1.0.2" - # 格式化日期 try: created_date = sim_dict.get("created_at", "")[:10] sim_dict["created_date"] = created_date @@ -979,7 +984,7 @@ def get_simulation_history(): }) except Exception as e: - logger.error(f"获取历史模拟失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -990,10 +995,10 @@ def get_simulation_history(): @simulation_bp.route('//profiles', methods=['GET']) def get_simulation_profiles(simulation_id: str): """ - 获取模拟的Agent Profile + SimulaçãoAgent Profile - Query参数: - platform: 平台类型(reddit/twitter,默认reddit) + Query + platform: reddit/twitterreddit """ try: platform = request.args.get('platform', 'reddit') @@ -1017,7 +1022,7 @@ def get_simulation_profiles(simulation_id: str): }), 404 except Exception as e: - logger.error(f"获取Profile失败: {str(e)}") + logger.error(f"ProfileFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1028,25 +1033,24 @@ def get_simulation_profiles(simulation_id: str): @simulation_bp.route('//profiles/realtime', methods=['GET']) def get_simulation_profiles_realtime(simulation_id: str): """ - 实时获取模拟的Agent Profile(用于在生成过程中实时查看进度) + SimulaçãoAgent ProfileGerar - 与 /profiles 接口的区别: - - 直接读取文件,不经过 SimulationManager - - 适用于生成过程中的实时查看 - - 返回额外的元数据(如文件修改时间、是否正在生成等) + /profiles + - SimulationManager + - Gerar + - Gerar - Query参数: - platform: 平台类型(reddit/twitter,默认reddit) + Query + platform: reddit/twitterreddit - 返回: { "success": true, "data": { "simulation_id": "sim_xxxx", "platform": "reddit", "count": 15, - "total_expected": 93, // 预期总数(如果有) - "is_generating": true, // 是否正在生成 + "total_expected": 93, // + "is_generating": true, // Gerar "file_exists": true, "file_modified_at": "2025-12-04T18:20:00", "profiles": [...] @@ -1060,7 +1064,7 @@ def get_simulation_profiles_realtime(simulation_id: str): try: platform = request.args.get('platform', 'reddit') - # 获取模拟目录 + # Simulação sim_dir = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id) if not os.path.exists(sim_dir): @@ -1069,19 +1073,16 @@ def get_simulation_profiles_realtime(simulation_id: str): "error": t('api.simulationNotFound', id=simulation_id) }), 404 - # 确定文件路径 if platform == "reddit": profiles_file = os.path.join(sim_dir, "reddit_profiles.json") else: profiles_file = os.path.join(sim_dir, "twitter_profiles.csv") - # 检查文件是否存在 file_exists = os.path.exists(profiles_file) profiles = [] file_modified_at = None if file_exists: - # 获取文件修改时间 file_stat = os.stat(profiles_file) file_modified_at = datetime.fromtimestamp(file_stat.st_mtime).isoformat() @@ -1094,10 +1095,10 @@ def get_simulation_profiles_realtime(simulation_id: str): reader = csv.DictReader(f) profiles = list(reader) except (json.JSONDecodeError, Exception) as e: - logger.warning(f"读取 profiles 文件失败(可能正在写入中): {e}") + logger.warning(f" profiles Falhou: {e}") profiles = [] - # 检查是否正在生成(通过 state.json 判断) + # Gerar state.json is_generating = False total_expected = None @@ -1127,7 +1128,7 @@ def get_simulation_profiles_realtime(simulation_id: str): }) except Exception as e: - logger.error(f"实时获取Profile失败: {str(e)}") + logger.error(f"ProfileFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1138,24 +1139,23 @@ def get_simulation_profiles_realtime(simulation_id: str): @simulation_bp.route('//config/realtime', methods=['GET']) def get_simulation_config_realtime(simulation_id: str): """ - 实时获取模拟配置(用于在生成过程中实时查看进度) + Configuração da simulaçãoGerar - 与 /config 接口的区别: - - 直接读取文件,不经过 SimulationManager - - 适用于生成过程中的实时查看 - - 返回额外的元数据(如文件修改时间、是否正在生成等) - - 即使配置还没生成完也能返回部分信息 + /config + - SimulationManager + - Gerar + - Gerar + - ConfiguraçãoGerar - 返回: { "success": true, "data": { "simulation_id": "sim_xxxx", "file_exists": true, "file_modified_at": "2025-12-04T18:20:00", - "is_generating": true, // 是否正在生成 - "generation_stage": "generating_config", // 当前生成阶段 - "config": {...} // 配置内容(如果存在) + "is_generating": true, // Gerar + "generation_stage": "generating_config", // Gerar + "config": {...} // ConfiguraçãoConteúdo } } """ @@ -1163,7 +1163,7 @@ def get_simulation_config_realtime(simulation_id: str): from datetime import datetime try: - # 获取模拟目录 + # Simulação sim_dir = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id) if not os.path.exists(sim_dir): @@ -1172,16 +1172,14 @@ def get_simulation_config_realtime(simulation_id: str): "error": t('api.simulationNotFound', id=simulation_id) }), 404 - # 配置文件路径 + # Configuração config_file = os.path.join(sim_dir, "simulation_config.json") - # 检查文件是否存在 file_exists = os.path.exists(config_file) config = None file_modified_at = None if file_exists: - # 获取文件修改时间 file_stat = os.stat(config_file) file_modified_at = datetime.fromtimestamp(file_stat.st_mtime).isoformat() @@ -1189,10 +1187,10 @@ def get_simulation_config_realtime(simulation_id: str): with open(config_file, 'r', encoding='utf-8') as f: config = json.load(f) except (json.JSONDecodeError, Exception) as e: - logger.warning(f"读取 config 文件失败(可能正在写入中): {e}") + logger.warning(f" config Falhou: {e}") config = None - # 检查是否正在生成(通过 state.json 判断) + # Gerar state.json is_generating = False generation_stage = None config_generated = False @@ -1206,7 +1204,6 @@ def get_simulation_config_realtime(simulation_id: str): is_generating = status == "preparing" config_generated = state_data.get("config_generated", False) - # 判断当前阶段 if is_generating: if state_data.get("profiles_generated", False): generation_stage = "generating_config" @@ -1217,7 +1214,6 @@ def get_simulation_config_realtime(simulation_id: str): except Exception: pass - # 构建返回数据 response_data = { "simulation_id": simulation_id, "file_exists": file_exists, @@ -1228,7 +1224,7 @@ def get_simulation_config_realtime(simulation_id: str): "config": config } - # 如果配置存在,提取一些关键统计信息 + # Configuração if config: response_data["summary"] = { "total_agents": len(config.get("agent_configs", [])), @@ -1247,7 +1243,7 @@ def get_simulation_config_realtime(simulation_id: str): }) except Exception as e: - logger.error(f"实时获取Config失败: {str(e)}") + logger.error(f"ConfigFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1258,14 +1254,13 @@ def get_simulation_config_realtime(simulation_id: str): @simulation_bp.route('//config', methods=['GET']) def get_simulation_config(simulation_id: str): """ - 获取模拟配置(LLM智能生成的完整配置) - - 返回包含: - - time_config: 时间配置(模拟时长、轮次、高峰/低谷时段) - - agent_configs: 每个Agent的活动配置(活跃度、发言频率、立场等) - - event_config: 事件配置(初始帖子、热点话题) - - platform_configs: 平台配置 - - generation_reasoning: LLM的配置推理说明 + Configuração da simulaçãoLLMGerarConfiguração + + - time_config: ConfiguraçãoSimulação/ + - agent_configs: AgentConfiguração + - event_config: Configuração + - platform_configs: Configuração + - generation_reasoning: LLMConfiguração """ try: manager = SimulationManager() @@ -1283,7 +1278,7 @@ def get_simulation_config(simulation_id: str): }) except Exception as e: - logger.error(f"获取配置失败: {str(e)}") + logger.error(f"ConfiguraçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1293,7 +1288,7 @@ def get_simulation_config(simulation_id: str): @simulation_bp.route('//config/download', methods=['GET']) def download_simulation_config(simulation_id: str): - """下载模拟配置文件""" + """Configuração da simulação""" try: manager = SimulationManager() sim_dir = manager._get_simulation_dir(simulation_id) @@ -1312,7 +1307,7 @@ def download_simulation_config(simulation_id: str): ) except Exception as e: - logger.error(f"下载配置失败: {str(e)}") + logger.error(f"ConfiguraçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1323,19 +1318,18 @@ def download_simulation_config(simulation_id: str): @simulation_bp.route('/script//download', methods=['GET']) def download_simulation_script(script_name: str): """ - 下载模拟运行脚本文件(通用脚本,位于 backend/scripts/) + Simulação backend/scripts/ - script_name可选值: + script_name - run_twitter_simulation.py - run_reddit_simulation.py - run_parallel_simulation.py - action_logger.py """ try: - # 脚本位于 backend/scripts/ 目录 + # backend/scripts/ scripts_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../scripts')) - # 验证脚本名称 allowed_scripts = [ "run_twitter_simulation.py", "run_reddit_simulation.py", @@ -1364,7 +1358,7 @@ def download_simulation_script(script_name: str): ) except Exception as e: - logger.error(f"下载脚本失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1372,19 +1366,19 @@ def download_simulation_script(script_name: str): }), 500 -# ============== Profile生成接口(独立使用) ============== +# ============== ProfileGerar ============== @simulation_bp.route('/generate-profiles', methods=['POST']) def generate_profiles(): """ - 直接从图谱生成OASIS Agent Profile(不创建模拟) + GrafoGerarOASIS Agent ProfileSimulação - 请求(JSON): + JSON { - "graph_id": "mirofish_xxxx", // 必填 - "entity_types": ["Student"], // 可选 - "use_llm": true, // 可选 - "platform": "reddit" // 可选 + "graph_id": "mirofish_xxxx", // + "entity_types": ["Student"], // + "use_llm": true, // + "platform": "reddit" // } """ try: @@ -1438,7 +1432,7 @@ def generate_profiles(): }) except Exception as e: - logger.error(f"生成Profile失败: {str(e)}") + logger.error(f"GerarProfileFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1446,35 +1440,34 @@ def generate_profiles(): }), 500 -# ============== 模拟运行控制接口 ============== +# ============== Simulação ============== @simulation_bp.route('/start', methods=['POST']) def start_simulation(): """ - 开始运行模拟 + Simulação - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "platform": "parallel", // 可选: twitter / reddit / parallel (默认) - "max_rounds": 100, // 可选: 最大模拟轮数,用于截断过长的模拟 - "enable_graph_memory_update": false, // 可选: 是否将Agent活动动态更新到Zep图谱记忆 - "force": false // 可选: 强制重新开始(会停止运行中的模拟并清理日志) + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "platform": "parallel", // : twitter / reddit / parallel () + "max_rounds": 100, // : SimulaçãoSimulação + "enable_graph_memory_update": false, // : AgentZepGrafo + "force": false // : Simulação } - 关于 force 参数: - - 启用后,如果模拟正在运行或已完成,会先停止并清理运行日志 - - 清理的内容包括:run_state.json, actions.jsonl, simulation.log 等 - - 不会清理配置文件(simulation_config.json)和 profile 文件 - - 适用于需要重新运行模拟的场景 + force + - SimulaçãoConcluído + - Conteúdorun_state.json, actions.jsonl, simulation.log + - Configuraçãosimulation_config.json profile + - Simulação - 关于 enable_graph_memory_update: - - 启用后,模拟中所有Agent的活动(发帖、评论、点赞等)都会实时更新到Zep图谱 - - 这可以让图谱"记住"模拟过程,用于后续分析或AI对话 - - 需要模拟关联的项目有有效的 graph_id - - 采用批量更新机制,减少API调用次数 + enable_graph_memory_update + - SimulaçãoAgentZepGrafo + - Grafo""SimulaçãoAnáliseAI + - Simulação graph_id + - API - 返回: { "success": true, "data": { @@ -1484,8 +1477,8 @@ def start_simulation(): "twitter_running": true, "reddit_running": true, "started_at": "2025-12-01T10:00:00", - "graph_memory_update_enabled": true, // 是否启用了图谱记忆更新 - "force_restarted": true // 是否是强制重新开始 + "graph_memory_update_enabled": true, // Grafo + "force_restarted": true // } } """ @@ -1500,11 +1493,11 @@ def start_simulation(): }), 400 platform = data.get('platform', 'parallel') - max_rounds = data.get('max_rounds') # 可选:最大模拟轮数 - enable_graph_memory_update = data.get('enable_graph_memory_update', False) # 可选:是否启用图谱记忆更新 - force = data.get('force', False) # 可选:强制重新开始 + max_rounds = data.get('max_rounds') # Simulação + enable_graph_memory_update = data.get('enable_graph_memory_update', False) # Grafo + force = data.get('force', False) # - # 验证 max_rounds 参数 + # max_rounds if max_rounds is not None: try: max_rounds = int(max_rounds) @@ -1525,7 +1518,7 @@ def start_simulation(): "error": t('api.invalidPlatform', platform=platform) }), 400 - # 检查模拟是否已准备好 + # Simulação manager = SimulationManager() state = manager.get_simulation(simulation_id) @@ -1537,57 +1530,53 @@ def start_simulation(): force_restarted = False - # 智能处理状态:如果准备工作已完成,允许重新启动 + # Concluído if state.status != SimulationStatus.READY: - # 检查准备工作是否已完成 + # Concluído is_prepared, prepare_info = _check_simulation_prepared(simulation_id) if is_prepared: - # 准备工作已完成,检查是否有正在运行的进程 + # Concluído if state.status == SimulationStatus.RUNNING: - # 检查模拟进程是否真的在运行 + # Simulação run_state = SimulationRunner.get_run_state(simulation_id) if run_state and run_state.runner_status.value == "running": - # 进程确实在运行 if force: - # 强制模式:停止运行中的模拟 - logger.info(f"强制模式:停止运行中的模拟 {simulation_id}") + # Simulação + logger.info(f"Simulação {simulation_id}") try: SimulationRunner.stop_simulation(simulation_id) except Exception as e: - logger.warning(f"停止模拟时出现警告: {str(e)}") + logger.warning(f"Simulação: {str(e)}") else: return jsonify({ "success": False, "error": t('api.simRunningForceHint') }), 400 - # 如果是强制模式,清理运行日志 if force: - logger.info(f"强制模式:清理模拟日志 {simulation_id}") + logger.info(f"Simulação {simulation_id}") cleanup_result = SimulationRunner.cleanup_simulation_logs(simulation_id) if not cleanup_result.get("success"): - logger.warning(f"清理日志时出现警告: {cleanup_result.get('errors')}") + logger.warning(f": {cleanup_result.get('errors')}") force_restarted = True - # 进程不存在或已结束,重置状态为 ready - logger.info(f"模拟 {simulation_id} 准备工作已完成,重置状态为 ready(原状态: {state.status.value})") + # ready + logger.info(f"Simulação {simulation_id} Concluído ready: {state.status.value}") state.status = SimulationStatus.READY manager._save_simulation_state(state) else: - # 准备工作未完成 return jsonify({ "success": False, "error": t('api.simNotReady', status=state.status.value) }), 400 - # 获取图谱ID(用于图谱记忆更新) + # ID do grafoGrafo graph_id = None if enable_graph_memory_update: - # 从模拟状态或项目中获取 graph_id + # Simulação graph_id graph_id = state.graph_id if not graph_id: - # 尝试从项目中获取 project = ProjectManager.get_project(state.project_id) if project: graph_id = project.graph_id @@ -1598,9 +1587,9 @@ def start_simulation(): "error": t('api.graphIdRequiredForMemory') }), 400 - logger.info(f"启用图谱记忆更新: simulation_id={simulation_id}, graph_id={graph_id}") + logger.info(f"Grafo: simulation_id={simulation_id}, graph_id={graph_id}") - # 启动模拟 + # Simulação run_state = SimulationRunner.start_simulation( simulation_id=simulation_id, platform=platform, @@ -1609,7 +1598,7 @@ def start_simulation(): graph_id=graph_id ) - # 更新模拟状态 + # Simulação state.status = SimulationStatus.RUNNING manager._save_simulation_state(state) @@ -1633,7 +1622,7 @@ def start_simulation(): }), 400 except Exception as e: - logger.error(f"启动模拟失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1644,14 +1633,13 @@ def start_simulation(): @simulation_bp.route('/stop', methods=['POST']) def stop_simulation(): """ - 停止模拟 + Simulação - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx" // 必填,模拟ID + "simulation_id": "sim_xxxx" // Obrigatório, ID da simulação } - 返回: { "success": true, "data": { @@ -1673,7 +1661,7 @@ def stop_simulation(): run_state = SimulationRunner.stop_simulation(simulation_id) - # 更新模拟状态 + # Simulação manager = SimulationManager() state = manager.get_simulation(simulation_id) if state: @@ -1692,7 +1680,7 @@ def stop_simulation(): }), 400 except Exception as e: - logger.error(f"停止模拟失败: {str(e)}") + logger.error(f"SimulaçãoFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1700,14 +1688,13 @@ def stop_simulation(): }), 500 -# ============== 实时状态监控接口 ============== +# ============== ============== @simulation_bp.route('//run-status', methods=['GET']) def get_run_status(simulation_id: str): """ - 获取模拟运行实时状态(用于前端轮询) + Simulação - 返回: { "success": true, "data": { @@ -1752,7 +1739,7 @@ def get_run_status(simulation_id: str): }) except Exception as e: - logger.error(f"获取运行状态失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1763,14 +1750,12 @@ def get_run_status(simulation_id: str): @simulation_bp.route('//run-status/detail', methods=['GET']) def get_run_status_detail(simulation_id: str): """ - 获取模拟运行详细状态(包含所有动作) + Simulação - 用于前端展示实时动态 - Query参数: - platform: 过滤平台(twitter/reddit,可选) + Query + platform: twitter/reddit - 返回: { "success": true, "data": { @@ -1792,8 +1777,8 @@ def get_run_status_detail(simulation_id: str): }, ... ], - "twitter_actions": [...], # Twitter 平台的所有动作 - "reddit_actions": [...] # Reddit 平台的所有动作 + "twitter_actions": [...], # Twitter + "reddit_actions": [...] # Reddit } } """ @@ -1813,13 +1798,11 @@ def get_run_status_detail(simulation_id: str): } }) - # 获取完整的动作列表 all_actions = SimulationRunner.get_all_actions( simulation_id=simulation_id, platform=platform_filter ) - # 分平台获取动作 twitter_actions = SimulationRunner.get_all_actions( simulation_id=simulation_id, platform="twitter" @@ -1830,7 +1813,7 @@ def get_run_status_detail(simulation_id: str): platform="reddit" ) if not platform_filter or platform_filter == "reddit" else [] - # 获取当前轮次的动作(recent_actions 只展示最新一轮) + # recent_actions current_round = run_state.current_round recent_actions = SimulationRunner.get_all_actions( simulation_id=simulation_id, @@ -1838,13 +1821,12 @@ def get_run_status_detail(simulation_id: str): round_num=current_round ) if current_round > 0 else [] - # 获取基础状态信息 result = run_state.to_dict() result["all_actions"] = [a.to_dict() for a in all_actions] result["twitter_actions"] = [a.to_dict() for a in twitter_actions] result["reddit_actions"] = [a.to_dict() for a in reddit_actions] result["rounds_count"] = len(run_state.rounds) - # recent_actions 只展示当前最新一轮两个平台的内容 + # recent_actions Conteúdo result["recent_actions"] = [a.to_dict() for a in recent_actions] return jsonify({ @@ -1853,7 +1835,7 @@ def get_run_status_detail(simulation_id: str): }) except Exception as e: - logger.error(f"获取详细状态失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1864,16 +1846,15 @@ def get_run_status_detail(simulation_id: str): @simulation_bp.route('//actions', methods=['GET']) def get_simulation_actions(simulation_id: str): """ - 获取模拟中的Agent动作历史 + SimulaçãoAgent - Query参数: - limit: 返回数量(默认100) - offset: 偏移量(默认0) - platform: 过滤平台(twitter/reddit) - agent_id: 过滤Agent ID - round_num: 过滤轮次 + Query + limit: 100 + offset: 0 + platform: twitter/reddit + agent_id: Agent ID + round_num: - 返回: { "success": true, "data": { @@ -1907,7 +1888,7 @@ def get_simulation_actions(simulation_id: str): }) except Exception as e: - logger.error(f"获取动作历史失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1918,15 +1899,13 @@ def get_simulation_actions(simulation_id: str): @simulation_bp.route('//timeline', methods=['GET']) def get_simulation_timeline(simulation_id: str): """ - 获取模拟时间线(按轮次汇总) + Simulação - 用于前端展示进度条和时间线视图 - Query参数: - start_round: 起始轮次(默认0) - end_round: 结束轮次(默认全部) + Query + start_round: 0 + end_round: - 返回每轮的汇总信息 """ try: start_round = request.args.get('start_round', 0, type=int) @@ -1947,7 +1926,7 @@ def get_simulation_timeline(simulation_id: str): }) except Exception as e: - logger.error(f"获取时间线失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1958,9 +1937,9 @@ def get_simulation_timeline(simulation_id: str): @simulation_bp.route('//agent-stats', methods=['GET']) def get_agent_stats(simulation_id: str): """ - 获取每个Agent的统计信息 + Agent - 用于前端展示Agent活跃度排行、动作分布等 + Agent """ try: stats = SimulationRunner.get_agent_stats(simulation_id) @@ -1974,7 +1953,7 @@ def get_agent_stats(simulation_id: str): }) except Exception as e: - logger.error(f"获取Agent统计失败: {str(e)}") + logger.error(f"AgentFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -1982,19 +1961,19 @@ def get_agent_stats(simulation_id: str): }), 500 -# ============== 数据库查询接口 ============== +# ============== ============== @simulation_bp.route('//posts', methods=['GET']) def get_simulation_posts(simulation_id: str): """ - 获取模拟中的帖子 + Simulação - Query参数: - platform: 平台类型(twitter/reddit) - limit: 返回数量(默认50) - offset: 偏移量 + Query + platform: twitter/reddit + limit: 50 + offset: - 返回帖子列表(从SQLite数据库读取) + SQLite """ try: platform = request.args.get('platform', 'reddit') @@ -2027,8 +2006,10 @@ def get_simulation_posts(simulation_id: str): try: cursor.execute(""" - SELECT * FROM post - ORDER BY created_at DESC + SELECT p.*, u.name as author, u.user_name as username + FROM post p + LEFT JOIN user u ON p.user_id = u.user_id + ORDER BY p.created_at DESC LIMIT ? OFFSET ? """, (limit, offset)) @@ -2054,7 +2035,7 @@ def get_simulation_posts(simulation_id: str): }) except Exception as e: - logger.error(f"获取帖子失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2065,12 +2046,12 @@ def get_simulation_posts(simulation_id: str): @simulation_bp.route('//comments', methods=['GET']) def get_simulation_comments(simulation_id: str): """ - 获取模拟中的评论(仅Reddit) + SimulaçãoReddit - Query参数: - post_id: 过滤帖子ID(可选) - limit: 返回数量 - offset: 偏移量 + Query + post_id: ID + limit: + offset: """ try: post_id = request.args.get('post_id') @@ -2129,7 +2110,7 @@ def get_simulation_comments(simulation_id: str): }) except Exception as e: - logger.error(f"获取评论失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2137,31 +2118,31 @@ def get_simulation_comments(simulation_id: str): }), 500 -# ============== Interview 采访接口 ============== +# ============== Interview ============== @simulation_bp.route('/interview', methods=['POST']) def interview_agent(): """ - 采访单个Agent + Agent - 注意:此功能需要模拟环境处于运行状态(完成模拟循环后进入等待命令模式) + SimulaçãoSimulação - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "agent_id": 0, // 必填,Agent ID - "prompt": "你对这件事有什么看法?", // 必填,采访问题 - "platform": "twitter", // 可选,指定平台(twitter/reddit) - // 不指定时:双平台模拟同时采访两个平台 - "timeout": 60 // 可选,超时时间(秒),默认60 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "agent_id": 0, // Agent ID + "prompt": "", // + "platform": "twitter", // twitter/reddit + // Simulação + "timeout": 60 // 60 } - 返回(不指定platform,双平台模式): + platform { "success": true, "data": { "agent_id": 0, - "prompt": "你对这件事有什么看法?", + "prompt": "", "result": { "agent_id": 0, "prompt": "...", @@ -2174,15 +2155,15 @@ def interview_agent(): } } - 返回(指定platform): + platform { "success": true, "data": { "agent_id": 0, - "prompt": "你对这件事有什么看法?", + "prompt": "", "result": { "agent_id": 0, - "response": "我认为...", + "response": "...", "platform": "twitter", "timestamp": "2025-12-08T10:00:00" }, @@ -2196,7 +2177,7 @@ def interview_agent(): simulation_id = data.get('simulation_id') agent_id = data.get('agent_id') prompt = data.get('prompt') - platform = data.get('platform') # 可选:twitter/reddit/None + platform = data.get('platform') # twitter/reddit/None timeout = data.get('timeout', 60) if not simulation_id: @@ -2217,35 +2198,152 @@ def interview_agent(): "error": t('api.requirePrompt') }), 400 - # 验证platform参数 + # platform if platform and platform not in ("twitter", "reddit"): return jsonify({ "success": False, "error": t('api.invalidInterviewPlatform') }), 400 - # 检查环境状态 - if not SimulationRunner.check_env_alive(simulation_id): + # Verificar se simulacao ja concluiu (nao adianta IPC) + sim_completed = False + try: + run_state = SimulationRunner.get_run_state(simulation_id) + if run_state: + rs = run_state.runner_status.value if hasattr(run_state.runner_status, 'value') else str(run_state.runner_status) + sim_completed = rs.lower() in ('completed','finished','stopped','failed','idle') + except: + pass + + # Se simulacao esta viva E nao concluiu, usar IPC + if not sim_completed and SimulationRunner.check_env_alive(simulation_id): + optimized_prompt = optimize_interview_prompt(prompt) + result = SimulationRunner.interview_agent( + simulation_id=simulation_id, + agent_id=agent_id, + prompt=optimized_prompt, + platform=platform, + timeout=timeout + ) return jsonify({ - "success": False, - "error": t('api.envNotRunning') - }), 400 - - # 优化prompt,添加前缀避免Agent调用工具 - optimized_prompt = optimize_interview_prompt(prompt) + "success": result.get("success", False), + "data": result + }) - result = SimulationRunner.interview_agent( - simulation_id=simulation_id, - agent_id=agent_id, - prompt=optimized_prompt, - platform=platform, - timeout=timeout - ) - - return jsonify({ - "success": result.get("success", False), - "data": result - }) + # Simulacao concluida — usar LLM direto com perfil do agente + logger.info(f"Interview offline: sim={simulation_id}, agent={agent_id}") + try: + from ..utils.llm_client import LLMClient + from datetime import datetime + + # Carregar perfil do agente + manager = SimulationManager() + profiles = manager.get_profiles(simulation_id, platform=platform or 'reddit') + agent_profile = None + if isinstance(profiles, list) and len(profiles) > int(agent_id or 0): + agent_profile = profiles[int(agent_id)] + + profile_text = "" + if agent_profile: + name = agent_profile.get('name', agent_profile.get('user_name', f'Agente {agent_id}')) + bio = agent_profile.get('bio', agent_profile.get('description', '')) + personality = agent_profile.get('personality', agent_profile.get('mbti', '')) + profile_text = f"Nome: {name}\nBio: {bio}\nPersonalidade: {personality}" + else: + profile_text = f"Agente {agent_id} da simulacao" + + # Carregar contexto do relatorio + report_context = "" + report_obj = None + try: + from ..services.report_agent import ReportManager + report_obj = ReportManager.get_report_by_simulation(simulation_id) + if report_obj and report_obj.outline: + report_context = report_obj.outline.summary or "" + except: + pass + + # Carregar posts/acoes deste agente na simulacao + agent_posts = "" + try: + import json, os + sim_dir = os.path.join(SimulationRunner.RUN_STATE_DIR, simulation_id) + for fname in ['twitter/actions.jsonl', 'reddit/actions.jsonl']: + fpath = os.path.join(sim_dir, fname) + if os.path.exists(fpath): + with open(fpath, 'r') as af: + for line in af: + try: + ev = json.loads(line.strip()) + if str(ev.get('agent_id','')) == str(agent_id) and ev.get('content'): + agent_posts += f"- {ev.get('content','')[:200]}\n" + except: + pass + if agent_posts: + agent_posts = agent_posts[:2000] # Limitar tamanho + except: + pass + + # Carregar secoes-chave do relatorio para contexto completo + report_sections = "" + try: + if report_obj and report_obj.outline and report_obj.outline.sections: + for sec in report_obj.outline.sections: + report_sections += f"\n## {sec.title}\n{(sec.content or '')[:1200]}\n" + report_sections = report_sections[:8000] + except: + pass + + llm = LLMClient(model='gpt-5.4-mini') # mini para chat rapido + messages = [ + {"role": "system", "content": f"""Voce e uma PESSOA REAL chamada que vive nessa cidade e conhece esse mercado. Voce nao e um analista, voce e alguem que VIVE isso no dia a dia. + +Seu perfil: +{profile_text} + +O que voce sabe sobre essa situacao (voce viu, ouviu, vivenciou): +{report_context[:1500]} + +Dados e fatos que voce conhece: +{report_sections} + +Coisas que voce ja disse sobre isso: +{agent_posts if agent_posts else "(voce ainda nao comentou publicamente)"} + +COMO VOCE FALA: +- Fale como gente normal, nao como consultor. Use "eu acho", "olha", "na real", "tipo assim" +- Cite numeros e fatos CONCRETOS que voce viu acima (ex: "vi que a Pecanha tem 3000 clientes cadastrados") +- Tenha OPINIAO forte — voce nao e neutro, voce tem vivencia +- Fale em primeira pessoa, conte experiencias, de exemplos do cotidiano +- Seja direto, sem enrolacao academica +- Se nao sabe algo, diga "isso eu nao sei te dizer" em vez de inventar +- NAO use bullet points nem listas — fale como numa conversa +- MAXIMO 150 palavras"""}, + {"role": "user", "content": prompt} + ] + + response = llm.chat(messages=messages, temperature=0.7, max_tokens=1500) + + return jsonify({ + "success": True, + "data": { + "agent_id": agent_id, + "prompt": prompt, + "result": { + "agent_id": agent_id, + "response": response, + "platform": platform or "twitter", + "offline": True + }, + "timestamp": datetime.utcnow().isoformat() + } + }) + except Exception as e2: + logger.error(f"Interview offline falhou: {e2}") + return jsonify({ + "success": False, + "error": f"Simulacao concluida e entrevista offline falhou: {str(e2)}" + }), 500 except ValueError as e: return jsonify({ @@ -2260,7 +2358,7 @@ def interview_agent(): }), 504 except Exception as e: - logger.error(f"Interview失败: {str(e)}") + logger.error(f"InterviewFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2271,30 +2369,29 @@ def interview_agent(): @simulation_bp.route('/interview/batch', methods=['POST']) def interview_agents_batch(): """ - 批量采访多个Agent + Agent - 注意:此功能需要模拟环境处于运行状态 + Simulação - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "interviews": [ // 必填,采访列表 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "interviews": [ // { "agent_id": 0, - "prompt": "你对A有什么看法?", - "platform": "twitter" // 可选,指定该Agent的采访平台 + "prompt": "A", + "platform": "twitter" // Agent }, { "agent_id": 1, - "prompt": "你对B有什么看法?" // 不指定platform则使用默认值 + "prompt": "B" // platform } ], - "platform": "reddit", // 可选,默认平台(被每项的platform覆盖) - // 不指定时:双平台模拟每个Agent同时采访两个平台 - "timeout": 120 // 可选,超时时间(秒),默认120 + "platform": "reddit", // platform + // SimulaçãoAgent + "timeout": 120 // 120 } - 返回: { "success": true, "data": { @@ -2317,7 +2414,7 @@ def interview_agents_batch(): simulation_id = data.get('simulation_id') interviews = data.get('interviews') - platform = data.get('platform') # 可选:twitter/reddit/None + platform = data.get('platform') # twitter/reddit/None timeout = data.get('timeout', 120) if not simulation_id: @@ -2332,14 +2429,13 @@ def interview_agents_batch(): "error": t('api.requireInterviews') }), 400 - # 验证platform参数 + # platform if platform and platform not in ("twitter", "reddit"): return jsonify({ "success": False, "error": t('api.invalidInterviewPlatform') }), 400 - # 验证每个采访项 for i, interview in enumerate(interviews): if 'agent_id' not in interview: return jsonify({ @@ -2351,7 +2447,7 @@ def interview_agents_batch(): "success": False, "error": t('api.interviewListMissingPrompt', index=i+1) }), 400 - # 验证每项的platform(如果有) + # platform item_platform = interview.get('platform') if item_platform and item_platform not in ("twitter", "reddit"): return jsonify({ @@ -2359,14 +2455,13 @@ def interview_agents_batch(): "error": t('api.interviewListInvalidPlatform', index=i+1) }), 400 - # 检查环境状态 if not SimulationRunner.check_env_alive(simulation_id): return jsonify({ "success": False, "error": t('api.envNotRunning') }), 400 - # 优化每个采访项的prompt,添加前缀避免Agent调用工具 + # promptAgentFerramenta optimized_interviews = [] for interview in interviews: optimized_interview = interview.copy() @@ -2398,7 +2493,7 @@ def interview_agents_batch(): }), 504 except Exception as e: - logger.error(f"批量Interview失败: {str(e)}") + logger.error(f"InterviewFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2409,20 +2504,19 @@ def interview_agents_batch(): @simulation_bp.route('/interview/all', methods=['POST']) def interview_all_agents(): """ - 全局采访 - 使用相同问题采访所有Agent + - Agent - 注意:此功能需要模拟环境处于运行状态 + Simulação - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "prompt": "你对这件事整体有什么看法?", // 必填,采访问题(所有Agent使用相同问题) - "platform": "reddit", // 可选,指定平台(twitter/reddit) - // 不指定时:双平台模拟每个Agent同时采访两个平台 - "timeout": 180 // 可选,超时时间(秒),默认180 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "prompt": "", // Agent + "platform": "reddit", // twitter/reddit + // SimulaçãoAgent + "timeout": 180 // 180 } - 返回: { "success": true, "data": { @@ -2444,7 +2538,7 @@ def interview_all_agents(): simulation_id = data.get('simulation_id') prompt = data.get('prompt') - platform = data.get('platform') # 可选:twitter/reddit/None + platform = data.get('platform') # twitter/reddit/None timeout = data.get('timeout', 180) if not simulation_id: @@ -2459,21 +2553,20 @@ def interview_all_agents(): "error": t('api.requirePrompt') }), 400 - # 验证platform参数 + # platform if platform and platform not in ("twitter", "reddit"): return jsonify({ "success": False, "error": t('api.invalidInterviewPlatform') }), 400 - # 检查环境状态 if not SimulationRunner.check_env_alive(simulation_id): return jsonify({ "success": False, "error": t('api.envNotRunning') }), 400 - # 优化prompt,添加前缀避免Agent调用工具 + # promptAgentFerramenta optimized_prompt = optimize_interview_prompt(prompt) result = SimulationRunner.interview_all_agents( @@ -2501,7 +2594,7 @@ def interview_all_agents(): }), 504 except Exception as e: - logger.error(f"全局Interview失败: {str(e)}") + logger.error(f"InterviewFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2512,20 +2605,19 @@ def interview_all_agents(): @simulation_bp.route('/interview/history', methods=['POST']) def get_interview_history(): """ - 获取Interview历史记录 + Interview - 从模拟数据库中读取所有Interview记录 + SimulaçãoInterview - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "platform": "reddit", // 可选,平台类型(reddit/twitter) - // 不指定则返回两个平台的所有历史 - "agent_id": 0, // 可选,只获取该Agent的采访历史 - "limit": 100 // 可选,返回数量,默认100 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "platform": "reddit", // reddit/twitter + // + "agent_id": 0, // Agent + "limit": 100 // 100 } - 返回: { "success": true, "data": { @@ -2533,8 +2625,8 @@ def get_interview_history(): "history": [ { "agent_id": 0, - "response": "我认为...", - "prompt": "你对这件事有什么看法?", + "response": "...", + "prompt": "", "timestamp": "2025-12-08T10:00:00", "platform": "reddit" }, @@ -2547,7 +2639,7 @@ def get_interview_history(): data = request.get_json() or {} simulation_id = data.get('simulation_id') - platform = data.get('platform') # 不指定则返回两个平台的历史 + platform = data.get('platform') # agent_id = data.get('agent_id') limit = data.get('limit', 100) @@ -2573,7 +2665,7 @@ def get_interview_history(): }) except Exception as e: - logger.error(f"获取Interview历史失败: {str(e)}") + logger.error(f"InterviewFalhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2584,16 +2676,15 @@ def get_interview_history(): @simulation_bp.route('/env-status', methods=['POST']) def get_env_status(): """ - 获取模拟环境状态 + Simulação - 检查模拟环境是否存活(可以接收Interview命令) + SimulaçãoInterview - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx" // 必填,模拟ID + "simulation_id": "sim_xxxx" // Obrigatório, ID da simulação } - 返回: { "success": true, "data": { @@ -2601,7 +2692,7 @@ def get_env_status(): "env_alive": true, "twitter_available": true, "reddit_available": true, - "message": "环境正在运行,可以接收Interview命令" + "message": "Interview" } } """ @@ -2618,7 +2709,6 @@ def get_env_status(): env_alive = SimulationRunner.check_env_alive(simulation_id) - # 获取更详细的状态信息 env_status = SimulationRunner.get_env_status_detail(simulation_id) if env_alive: @@ -2638,7 +2728,7 @@ def get_env_status(): }) except Exception as e: - logger.error(f"获取环境状态失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), @@ -2649,24 +2739,23 @@ def get_env_status(): @simulation_bp.route('/close-env', methods=['POST']) def close_simulation_env(): """ - 关闭模拟环境 + Simulação - 向模拟发送关闭环境命令,使其优雅退出等待命令模式。 + Simulação - 注意:这不同于 /stop 接口,/stop 会强制终止进程, - 而此接口会让模拟优雅地关闭环境并退出。 + /stop /stop + Simulação - 请求(JSON): + JSON { - "simulation_id": "sim_xxxx", // 必填,模拟ID - "timeout": 30 // 可选,超时时间(秒),默认30 + "simulation_id": "sim_xxxx", // Obrigatório, ID da simulação + "timeout": 30 // 30 } - 返回: { "success": true, "data": { - "message": "环境关闭命令已发送", + "message": "", "result": {...}, "timestamp": "2025-12-08T10:00:01" } @@ -2689,7 +2778,7 @@ def close_simulation_env(): timeout=timeout ) - # 更新模拟状态 + # Simulação manager = SimulationManager() state = manager.get_simulation(simulation_id) if state: @@ -2708,9 +2797,181 @@ def close_simulation_env(): }), 400 except Exception as e: - logger.error(f"关闭环境失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return jsonify({ "success": False, "error": str(e), "traceback": traceback.format_exc() }), 500 + + +# ============================================================ +# AGENT PREVIEW — Preview, customização e aprovação de agentes +# ============================================================ + +@simulation_bp.route('/preview-agents', methods=['POST']) +def preview_agents(): + """Gera preview dos agentes para revisão antes de iniciar simulação.""" + try: + data = request.get_json() or {} + graph_id = data.get('graph_id') + + if not graph_id: + return jsonify({"success": False, "error": "graph_id obrigatório"}), 400 + + entity_types = data.get('entity_types') + num_agents = data.get('num_agents', 20) + + reader = ZepEntityReader() + filtered = reader.filter_defined_entities( + graph_id=graph_id, + defined_entity_types=entity_types, + enrich_with_edges=True + ) + + if filtered.filtered_count == 0: + return jsonify({"success": False, "error": "Nenhuma entidade encontrada no grafo"}), 400 + + entities = filtered.entities[:num_agents] + + generator = OasisProfileGenerator(graph_id=graph_id) + profiles = generator.generate_profiles_from_entities( + entities=entities, + use_llm=True, + graph_id=graph_id + ) + + agents = [] + for i, p in enumerate(profiles): + agent = p.to_dict() + agent['id'] = f'agent_{i:03d}' + agent['_custom'] = False + agents.append(agent) + + distribution = {} + for a in agents: + tipo = a.get('source_entity_type', 'Outro') + distribution[tipo] = distribution.get(tipo, 0) + 1 + + return jsonify({ + "success": True, + "data": { + "agents": agents, + "count": len(agents), + "distribution": distribution + } + }) + + except Exception as e: + logger.error(f"Preview agents falhou: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@simulation_bp.route('/custom-agent', methods=['POST']) +def create_custom_agent(): + """Gera perfil completo de agente a partir de descrição em texto livre.""" + try: + data = request.get_json() or {} + description = data.get('description', '') + + if not description or len(description) < 5: + return jsonify({"success": False, "error": "Descrição muito curta"}), 400 + + simulation_requirement = data.get('simulation_requirement', '') + + from ..utils.llm_client import LLMClient + + llm = LLMClient() + + prompt = f"""Gere um perfil de agente para simulação de opinião pública em redes sociais. + +DESCRIÇÃO DO USUÁRIO: {description} +CONTEXTO DA SIMULAÇÃO: {simulation_requirement} + +Gere um JSON com: +- "name": Nome completo fictício brasileiro +- "username": Username para redes sociais (snake_case) +- "bio": Biografia curta (máx 200 chars) em PT-BR +- "persona": Descrição detalhada (máx 1500 chars) incluindo personalidade, comportamento, motivações +- "age": Idade (número inteiro) +- "gender": "male" ou "female" +- "mbti": Tipo MBTI +- "profession": Profissão em PT-BR +- "interested_topics": Lista de 3-5 tópicos de interesse +- "source_entity_type": Tipo mais próximo (Consumer, Influencer, Competitor, Professional, Person) + +Retorne APENAS JSON válido. Tudo em PT-BR exceto gender e mbti.""" + + result = llm.chat_json( + messages=[ + {"role": "system", "content": "Você gera perfis de agentes para simulação. Retorne apenas JSON válido."}, + {"role": "user", "content": prompt} + ], + temperature=0.7, + max_tokens=1000 + ) + + import time as _time + result['id'] = f'custom_{int(_time.time())}' + result['_custom'] = True + + return jsonify({"success": True, "data": result}) + + except Exception as e: + logger.error(f"Custom agent falhou: {e}") + return jsonify({"success": False, "error": str(e)}), 500 + + +@simulation_bp.route('/approve-agents', methods=['POST']) +def approve_agents(): + """Salva lista aprovada de agentes para uso na simulação.""" + try: + import json as _json + + data = request.get_json() or {} + simulation_id = data.get('simulation_id') + agents = data.get('agents', []) + + if not simulation_id: + return jsonify({"success": False, "error": "simulation_id obrigatório"}), 400 + + if not agents: + return jsonify({"success": False, "error": "Lista de agentes vazia"}), 400 + + sim_dir = os.path.join(Config.OASIS_SIMULATION_DATA_DIR, simulation_id) + os.makedirs(sim_dir, exist_ok=True) + + profiles_path = os.path.join(sim_dir, "reddit_profiles.json") + + reddit_profiles = [] + for agent in agents: + profile = { + "username": agent.get("username", agent.get("name", "agent").replace(" ", "_").lower()), + "name": agent.get("name", ""), + "bio": agent.get("bio", ""), + "persona": agent.get("persona", agent.get("bio", "")), + "age": agent.get("age", 30), + "gender": agent.get("gender", "female"), + "mbti": agent.get("mbti", "INFP"), + "country": agent.get("country", "Brasil"), + "profession": agent.get("profession", ""), + "interested_topics": agent.get("interested_topics", []), + } + reddit_profiles.append(profile) + + with open(profiles_path, 'w', encoding='utf-8') as f: + _json.dump(reddit_profiles, f, ensure_ascii=False, indent=2) + + logger.info(f"Approved {len(reddit_profiles)} agents for simulation {simulation_id}") + + return jsonify({ + "success": True, + "data": { + "simulation_id": simulation_id, + "agents_count": len(reddit_profiles), + } + }) + + except Exception as e: + logger.error(f"Approve agents falhou: {e}") + return jsonify({"success": False, "error": str(e)}), 500 diff --git a/backend/app/auth.py b/backend/app/auth.py new file mode 100644 index 0000000000..09248c96fc --- /dev/null +++ b/backend/app/auth.py @@ -0,0 +1,197 @@ +""" +AUGUR — Autenticação JWT +Requer: pip install flask-jwt-extended --break-system-packages + +Uso: + 1. POST /api/auth/register → {email, password, name} + 2. POST /api/auth/login → {email, password} → retorna {token} + 3. Rotas protegidas: adicionar @jwt_required() no endpoint +""" +import os +import json +import hashlib +import secrets +import logging +from datetime import timedelta +from functools import wraps + +from flask import Blueprint, request, jsonify + +logger = logging.getLogger(__name__) + +try: + from flask_jwt_extended import ( + JWTManager, create_access_token, jwt_required, + get_jwt_identity, get_jwt + ) + HAS_JWT = True +except ImportError: + HAS_JWT = False + logger.warning("flask-jwt-extended não instalado. Auth desabilitado.") + # Stubs para não quebrar imports + def jwt_required(optional=False): + def decorator(fn): + @wraps(fn) + def wrapper(*args, **kwargs): + return fn(*args, **kwargs) + return wrapper + return decorator + def get_jwt_identity(): + return None + +auth_bp = Blueprint('auth', __name__, url_prefix='/api/auth') + +# ═══ Storage simples (JSON file) — substituir por DB real em produção ═══ +USERS_FILE = os.path.join(os.path.dirname(__file__), '..', '..', 'data', 'users.json') + + +def _ensure_data_dir(): + os.makedirs(os.path.dirname(USERS_FILE), exist_ok=True) + if not os.path.exists(USERS_FILE): + with open(USERS_FILE, 'w') as f: + json.dump([], f) + + +def _load_users() -> list: + _ensure_data_dir() + with open(USERS_FILE, 'r') as f: + return json.load(f) + + +def _save_users(users: list): + _ensure_data_dir() + with open(USERS_FILE, 'w') as f: + json.dump(users, f, ensure_ascii=False, indent=2) + + +def _hash_password(password: str, salt: str = None) -> tuple: + if salt is None: + salt = secrets.token_hex(16) + hashed = hashlib.pbkdf2_hmac('sha256', password.encode(), salt.encode(), 100000).hex() + return hashed, salt + + +def _verify_password(password: str, hashed: str, salt: str) -> bool: + check, _ = _hash_password(password, salt) + return check == hashed + + +def init_jwt(app): + """Inicializa JWT no app Flask.""" + if not HAS_JWT: + logger.warning("JWT não configurado — flask-jwt-extended não instalado") + return None + + app.config['JWT_SECRET_KEY'] = os.environ.get('JWT_SECRET_KEY', secrets.token_hex(32)) + app.config['JWT_ACCESS_TOKEN_EXPIRES'] = timedelta(days=7) + app.config['JWT_TOKEN_LOCATION'] = ['headers'] + + jwt = JWTManager(app) + + @jwt.expired_token_loader + def expired_token_callback(jwt_header, jwt_payload): + return jsonify({"success": False, "error": "Token expirado. Faça login novamente."}), 401 + + @jwt.invalid_token_loader + def invalid_token_callback(error): + return jsonify({"success": False, "error": "Token inválido."}), 401 + + @jwt.unauthorized_loader + def missing_token_callback(error): + return jsonify({"success": False, "error": "Token não fornecido. Faça login."}), 401 + + logger.info("JWT configurado com sucesso") + return jwt + + +# ═══ ENDPOINTS ═══ + +@auth_bp.route('/register', methods=['POST']) +def register(): + """Registrar novo usuário.""" + data = request.get_json() + email = (data.get('email') or '').strip().lower() + password = data.get('password', '') + name = data.get('name', '').strip() + + if not email or not password: + return jsonify({"success": False, "error": "Email e senha são obrigatórios."}), 400 + + if len(password) < 6: + return jsonify({"success": False, "error": "Senha deve ter no mínimo 6 caracteres."}), 400 + + users = _load_users() + if any(u['email'] == email for u in users): + return jsonify({"success": False, "error": "Email já registrado."}), 409 + + hashed, salt = _hash_password(password) + user = { + "id": secrets.token_hex(8), + "email": email, + "name": name or email.split("@")[0], + "password_hash": hashed, + "password_salt": salt, + "created_at": __import__('datetime').datetime.utcnow().isoformat() + } + users.append(user) + _save_users(users) + + if HAS_JWT: + token = create_access_token(identity=user['id'], additional_claims={"email": email, "name": user['name']}) + else: + token = "jwt-not-configured" + + return jsonify({ + "success": True, + "data": { + "token": token, + "user": {"id": user['id'], "email": email, "name": user['name']} + } + }), 201 + + +@auth_bp.route('/login', methods=['POST']) +def login(): + """Login.""" + data = request.get_json() + email = (data.get('email') or '').strip().lower() + password = data.get('password', '') + + if not email or not password: + return jsonify({"success": False, "error": "Email e senha são obrigatórios."}), 400 + + users = _load_users() + user = next((u for u in users if u['email'] == email), None) + + if not user or not _verify_password(password, user['password_hash'], user['password_salt']): + return jsonify({"success": False, "error": "Email ou senha inválidos."}), 401 + + if HAS_JWT: + token = create_access_token(identity=user['id'], additional_claims={"email": email, "name": user['name']}) + else: + token = "jwt-not-configured" + + return jsonify({ + "success": True, + "data": { + "token": token, + "user": {"id": user['id'], "email": email, "name": user['name']} + } + }) + + +@auth_bp.route('/me', methods=['GET']) +@jwt_required() +def me(): + """Dados do usuário logado.""" + user_id = get_jwt_identity() + users = _load_users() + user = next((u for u in users if u['id'] == user_id), None) + + if not user: + return jsonify({"success": False, "error": "Usuário não encontrado."}), 404 + + return jsonify({ + "success": True, + "data": {"id": user['id'], "email": user['email'], "name": user['name']} + }) diff --git a/backend/app/config.py b/backend/app/config.py index 953dfa50a2..f8c02ea13b 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,54 +1,57 @@ """ -配置管理 -统一从项目根目录的 .env 文件加载配置 +Configuração + .env Configuração """ import os from dotenv import load_dotenv -# 加载项目根目录的 .env 文件 -# 路径: MiroFish/.env (相对于 backend/app/config.py) +# .env +# : MiroFish/.env ( backend/app/config.py) project_root_env = os.path.join(os.path.dirname(__file__), '../../.env') if os.path.exists(project_root_env): load_dotenv(project_root_env, override=True) else: - # 如果根目录没有 .env,尝试加载环境变量(用于生产环境) + # .env load_dotenv(override=True) class Config: - """Flask配置类""" + """FlaskConfiguração""" - # Flask配置 + # FlaskConfiguração SECRET_KEY = os.environ.get('SECRET_KEY', 'mirofish-secret-key') DEBUG = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true' - # JSON配置 - 禁用ASCII转义,让中文直接显示(而不是 \uXXXX 格式) + # JSONConfiguração - ASCII \uXXXX JSON_AS_ASCII = False - # LLM配置(统一使用OpenAI格式) + # LLMConfiguraçãoOpenAI LLM_API_KEY = os.environ.get('LLM_API_KEY') LLM_BASE_URL = os.environ.get('LLM_BASE_URL', 'https://api.openai.com/v1') - LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-4o-mini') + LLM_MODEL_NAME = os.environ.get('LLM_MODEL_NAME', 'gpt-5.4-mini') - # Zep配置 + # ZepConfiguração ZEP_API_KEY = os.environ.get('ZEP_API_KEY') - # 文件上传配置 + # Perplexity — pesquisa de mercado (opcional) + PERPLEXITY_API_KEY = os.environ.get('PERPLEXITY_API_KEY', '') + + # Configuração MAX_CONTENT_LENGTH = 50 * 1024 * 1024 # 50MB UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), '../uploads') ALLOWED_EXTENSIONS = {'pdf', 'md', 'txt', 'markdown'} - # 文本处理配置 - DEFAULT_CHUNK_SIZE = 500 # 默认切块大小 - DEFAULT_CHUNK_OVERLAP = 50 # 默认重叠大小 + # Configuração + DEFAULT_CHUNK_SIZE = 500 # + DEFAULT_CHUNK_OVERLAP = 50 # - # OASIS模拟配置 + # OASISConfiguração da simulação OASIS_DEFAULT_MAX_ROUNDS = int(os.environ.get('OASIS_DEFAULT_MAX_ROUNDS', '10')) OASIS_SIMULATION_DATA_DIR = os.path.join(os.path.dirname(__file__), '../uploads/simulations') - # OASIS平台可用动作配置 + # OASISConfiguração OASIS_TWITTER_ACTIONS = [ 'CREATE_POST', 'LIKE_POST', 'REPOST', 'FOLLOW', 'DO_NOTHING', 'QUOTE_POST' ] @@ -58,18 +61,17 @@ class Config: 'TREND', 'REFRESH', 'DO_NOTHING', 'FOLLOW', 'MUTE' ] - # Report Agent配置 + # Report AgentConfiguração REPORT_AGENT_MAX_TOOL_CALLS = int(os.environ.get('REPORT_AGENT_MAX_TOOL_CALLS', '5')) REPORT_AGENT_MAX_REFLECTION_ROUNDS = int(os.environ.get('REPORT_AGENT_MAX_REFLECTION_ROUNDS', '2')) REPORT_AGENT_TEMPERATURE = float(os.environ.get('REPORT_AGENT_TEMPERATURE', '0.5')) @classmethod def validate(cls): - """验证必要配置""" + """Configuração""" errors = [] if not cls.LLM_API_KEY: - errors.append("LLM_API_KEY 未配置") + errors.append("LLM_API_KEY not configured") if not cls.ZEP_API_KEY: - errors.append("ZEP_API_KEY 未配置") + errors.append("ZEP_API_KEY not configured") return errors - diff --git a/backend/app/models/__init__.py b/backend/app/models/__init__.py index 55bec61951..0855c7cb53 100644 --- a/backend/app/models/__init__.py +++ b/backend/app/models/__init__.py @@ -1,5 +1,4 @@ """ -数据模型模块 """ from .task import TaskManager, TaskStatus diff --git a/backend/app/models/project.py b/backend/app/models/project.py index 089789374e..b85ee5ce38 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -1,6 +1,4 @@ """ -项目上下文管理 -用于在服务端持久化项目状态,避免前端在接口间传递大量数据 """ import os @@ -15,45 +13,43 @@ class ProjectStatus(str, Enum): - """项目状态""" - CREATED = "created" # 刚创建,文件已上传 - ONTOLOGY_GENERATED = "ontology_generated" # 本体已生成 - GRAPH_BUILDING = "graph_building" # 图谱构建中 - GRAPH_COMPLETED = "graph_completed" # 图谱构建完成 - FAILED = "failed" # 失败 + """""" + CREATED = "created" # + ONTOLOGY_GENERATED = "ontology_generated" # Gerar + GRAPH_BUILDING = "graph_building" # Grafo + GRAPH_COMPLETED = "graph_completed" # Grafo + FAILED = "failed" # Falhou @dataclass class Project: - """项目数据模型""" + """""" project_id: str name: str status: ProjectStatus created_at: str updated_at: str - # 文件信息 files: List[Dict[str, str]] = field(default_factory=list) # [{filename, path, size}] total_text_length: int = 0 - # 本体信息(接口1生成后填充) + # 1Gerar ontology: Optional[Dict[str, Any]] = None analysis_summary: Optional[str] = None - # 图谱信息(接口2完成后填充) + # Grafo2 graph_id: Optional[str] = None graph_build_task_id: Optional[str] = None - # 配置 + # Configuração simulation_requirement: Optional[str] = None chunk_size: int = 500 chunk_overlap: int = 50 - # 错误信息 error: Optional[str] = None def to_dict(self) -> Dict[str, Any]: - """转换为字典""" + """""" return { "project_id": self.project_id, "name": self.name, @@ -74,7 +70,7 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> 'Project': - """从字典创建""" + """""" status = data.get('status', 'created') if isinstance(status, str): status = ProjectStatus(status) @@ -99,46 +95,44 @@ def from_dict(cls, data: Dict[str, Any]) -> 'Project': class ProjectManager: - """项目管理器 - 负责项目的持久化存储和检索""" + """ - Busca""" - # 项目存储根目录 PROJECTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'projects') @classmethod def _ensure_projects_dir(cls): - """确保项目目录存在""" + """""" os.makedirs(cls.PROJECTS_DIR, exist_ok=True) @classmethod def _get_project_dir(cls, project_id: str) -> str: - """获取项目目录路径""" + """""" return os.path.join(cls.PROJECTS_DIR, project_id) @classmethod def _get_project_meta_path(cls, project_id: str) -> str: - """获取项目元数据文件路径""" + """""" return os.path.join(cls._get_project_dir(project_id), 'project.json') @classmethod def _get_project_files_dir(cls, project_id: str) -> str: - """获取项目文件存储目录""" + """""" return os.path.join(cls._get_project_dir(project_id), 'files') @classmethod def _get_project_text_path(cls, project_id: str) -> str: - """获取项目提取文本存储路径""" + """""" return os.path.join(cls._get_project_dir(project_id), 'extracted_text.txt') @classmethod def create_project(cls, name: str = "Unnamed Project") -> Project: """ - 创建新项目 Args: - name: 项目名称 + name: Returns: - 新创建的Project对象 + Project """ cls._ensure_projects_dir() @@ -153,20 +147,18 @@ def create_project(cls, name: str = "Unnamed Project") -> Project: updated_at=now ) - # 创建项目目录结构 project_dir = cls._get_project_dir(project_id) files_dir = cls._get_project_files_dir(project_id) os.makedirs(project_dir, exist_ok=True) os.makedirs(files_dir, exist_ok=True) - # 保存项目元数据 cls.save_project(project) return project @classmethod def save_project(cls, project: Project) -> None: - """保存项目元数据""" + """""" project.updated_at = datetime.now().isoformat() meta_path = cls._get_project_meta_path(project.project_id) @@ -176,13 +168,12 @@ def save_project(cls, project: Project) -> None: @classmethod def get_project(cls, project_id: str) -> Optional[Project]: """ - 获取项目 Args: - project_id: 项目ID + project_id: ID Returns: - Project对象,如果不存在返回None + ProjectNone """ meta_path = cls._get_project_meta_path(project_id) @@ -197,13 +188,11 @@ def get_project(cls, project_id: str) -> Optional[Project]: @classmethod def list_projects(cls, limit: int = 50) -> List[Project]: """ - 列出所有项目 Args: - limit: 返回数量限制 + limit: Returns: - 项目列表,按创建时间倒序 """ cls._ensure_projects_dir() @@ -213,7 +202,6 @@ def list_projects(cls, limit: int = 50) -> List[Project]: if project: projects.append(project) - # 按创建时间倒序排序 projects.sort(key=lambda p: p.created_at, reverse=True) return projects[:limit] @@ -221,13 +209,11 @@ def list_projects(cls, limit: int = 50) -> List[Project]: @classmethod def delete_project(cls, project_id: str) -> bool: """ - 删除项目及其所有文件 Args: - project_id: 项目ID + project_id: ID Returns: - 是否删除成功 """ project_dir = cls._get_project_dir(project_id) @@ -240,28 +226,25 @@ def delete_project(cls, project_id: str) -> bool: @classmethod def save_file_to_project(cls, project_id: str, file_storage, original_filename: str) -> Dict[str, str]: """ - 保存上传的文件到项目目录 Args: - project_id: 项目ID - file_storage: Flask的FileStorage对象 - original_filename: 原始文件名 + project_id: ID + file_storage: FlaskFileStorage + original_filename: Returns: - 文件信息字典 {filename, path, size} + {filename, path, size} """ files_dir = cls._get_project_files_dir(project_id) os.makedirs(files_dir, exist_ok=True) - # 生成安全的文件名 + # Gerar ext = os.path.splitext(original_filename)[1].lower() safe_filename = f"{uuid.uuid4().hex[:8]}{ext}" file_path = os.path.join(files_dir, safe_filename) - # 保存文件 file_storage.save(file_path) - # 获取文件大小 file_size = os.path.getsize(file_path) return { @@ -273,14 +256,14 @@ def save_file_to_project(cls, project_id: str, file_storage, original_filename: @classmethod def save_extracted_text(cls, project_id: str, text: str) -> None: - """保存提取的文本""" + """""" text_path = cls._get_project_text_path(project_id) with open(text_path, 'w', encoding='utf-8') as f: f.write(text) @classmethod def get_extracted_text(cls, project_id: str) -> Optional[str]: - """获取提取的文本""" + """""" text_path = cls._get_project_text_path(project_id) if not os.path.exists(text_path): @@ -291,7 +274,7 @@ def get_extracted_text(cls, project_id: str) -> Optional[str]: @classmethod def get_project_files(cls, project_id: str) -> List[str]: - """获取项目的所有文件路径""" + """""" files_dir = cls._get_project_files_dir(project_id) if not os.path.exists(files_dir): diff --git a/backend/app/models/task.py b/backend/app/models/task.py index dfebed23ba..505c2c975b 100644 --- a/backend/app/models/task.py +++ b/backend/app/models/task.py @@ -1,6 +1,5 @@ """ -任务状态管理 -用于跟踪长时间运行的任务(如图谱构建) +Grafo """ import uuid @@ -14,30 +13,30 @@ class TaskStatus(str, Enum): - """任务状态枚举""" - PENDING = "pending" # 等待中 - PROCESSING = "processing" # 处理中 - COMPLETED = "completed" # 已完成 - FAILED = "failed" # 失败 + """""" + PENDING = "pending" # Aguardando + PROCESSING = "processing" # + COMPLETED = "completed" # Concluído + FAILED = "failed" # Falhou @dataclass class Task: - """任务数据类""" + """""" task_id: str task_type: str status: TaskStatus created_at: datetime updated_at: datetime - progress: int = 0 # 总进度百分比 0-100 - message: str = "" # 状态消息 - result: Optional[Dict] = None # 任务结果 - error: Optional[str] = None # 错误信息 - metadata: Dict = field(default_factory=dict) # 额外元数据 - progress_detail: Dict = field(default_factory=dict) # 详细进度信息 + progress: int = 0 # 0-100 + message: str = "" # + result: Optional[Dict] = None # Resultado + error: Optional[str] = None # + metadata: Dict = field(default_factory=dict) # + progress_detail: Dict = field(default_factory=dict) # def to_dict(self) -> Dict[str, Any]: - """转换为字典""" + """""" return { "task_id": self.task_id, "task_type": self.task_type, @@ -55,15 +54,13 @@ def to_dict(self) -> Dict[str, Any]: class TaskManager: """ - 任务管理器 - 线程安全的任务状态管理 """ _instance = None _lock = threading.Lock() def __new__(cls): - """单例模式""" + """""" if cls._instance is None: with cls._lock: if cls._instance is None: @@ -74,14 +71,13 @@ def __new__(cls): def create_task(self, task_type: str, metadata: Optional[Dict] = None) -> str: """ - 创建新任务 Args: - task_type: 任务类型 - metadata: 额外元数据 + task_type: + metadata: Returns: - 任务ID + ID """ task_id = str(uuid.uuid4()) now = datetime.now() @@ -101,7 +97,7 @@ def create_task(self, task_type: str, metadata: Optional[Dict] = None) -> str: return task_id def get_task(self, task_id: str) -> Optional[Task]: - """获取任务""" + """""" with self._task_lock: return self._tasks.get(task_id) @@ -116,16 +112,15 @@ def update_task( progress_detail: Optional[Dict] = None ): """ - 更新任务状态 Args: - task_id: 任务ID - status: 新状态 - progress: 进度 - message: 消息 - result: 结果 - error: 错误信息 - progress_detail: 详细进度信息 + task_id: ID + status: + progress: + message: + result: Resultado + error: + progress_detail: """ with self._task_lock: task = self._tasks.get(task_id) @@ -145,7 +140,7 @@ def update_task( task.progress_detail = progress_detail def complete_task(self, task_id: str, result: Dict): - """标记任务完成""" + """""" self.update_task( task_id, status=TaskStatus.COMPLETED, @@ -155,7 +150,7 @@ def complete_task(self, task_id: str, result: Dict): ) def fail_task(self, task_id: str, error: str): - """标记任务失败""" + """Falhou""" self.update_task( task_id, status=TaskStatus.FAILED, @@ -164,7 +159,7 @@ def fail_task(self, task_id: str, error: str): ) def list_tasks(self, task_type: Optional[str] = None) -> list: - """列出任务""" + """""" with self._task_lock: tasks = list(self._tasks.values()) if task_type: @@ -172,7 +167,7 @@ def list_tasks(self, task_type: Optional[str] = None) -> list: return [t.to_dict() for t in sorted(tasks, key=lambda x: x.created_at, reverse=True)] def cleanup_old_tasks(self, max_age_hours: int = 24): - """清理旧任务""" + """""" from datetime import timedelta cutoff = datetime.now() - timedelta(hours=max_age_hours) diff --git a/backend/app/schemas/__init__.py b/backend/app/schemas/__init__.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/backend/app/schemas/__init__.py @@ -0,0 +1 @@ + diff --git a/backend/app/schemas/report_schema.py b/backend/app/schemas/report_schema.py new file mode 100644 index 0000000000..c84f69d555 --- /dev/null +++ b/backend/app/schemas/report_schema.py @@ -0,0 +1,375 @@ +""" +AUGUR — Contrato de Dados do Relatório v1.1 + +Define o schema completo que todas as camadas do pipeline usam: + Zep (ontologia) → OpenAI (análise) → PDF/Web (renderização) + +Regra: se o campo não existe no JSON, a seção não renderiza. +Se existe, renderiza exatamente como definido. Zero surpresas. + +Caminho: backend/app/schemas/report_schema.py +""" + +from typing import TypedDict, Optional +from enum import Enum +import json +import re + + +# ============================================================ +# ENUMS +# ============================================================ + +class Veredicto(str, Enum): + GO = "GO" + NO_GO = "NO-GO" + AJUSTAR = "AJUSTAR" + +class Impacto(str, Enum): + ALTO = "Alto" + MEDIO = "Medio" + BAIXO = "Baixo" + +class Urgencia(str, Enum): + URGENTE = "Urgente" + ALTA = "Alta" + MEDIA = "Media" + BAIXA = "Baixa" + +class TipoAgente(str, Enum): + APOIADOR = "Apoiador" + NEUTRO = "Neutro" + RESISTENTE = "Resistente" + CAUTELOSO = "Cauteloso" + +class SetorNegocio(str, Enum): + VAREJO_LOCAL = "varejo_local" + SAAS_B2B = "saas_b2b" + INDUSTRIA_FMCG = "industria_fmcg" + TELECOM_ISP = "telecom_isp" + ENERGIA_TECH = "energia_tech" + ALIMENTACAO = "alimentacao" + MARKETPLACE_APP = "marketplace_app" + SERVICOS = "servicos" + FRANQUIA = "franquia" + +class TipoDecisao(str, Enum): + NOVO_NEGOCIO = "novo_negocio" + NOVO_PRODUTO = "novo_produto" + PROMOCAO_CAMPANHA = "promocao_campanha" + EXPANSAO_GEOGRAFICA = "expansao_geografica" + EXPANSAO_FRANQUIA = "expansao_franquia" + PRECIFICACAO = "precificacao" + + +# ============================================================ +# SEÇÕES DO RELATÓRIO — TypeDicts +# ============================================================ + +class MetaAnalise(TypedDict): + projeto: str + setor: str # SetorNegocio + tipo_decisao: str # TipoDecisao + data_geracao: str # ISO 8601 + modelo_ia: str + num_agentes: int + num_rodadas: int + periodo_simulado_meses: int + + +class VeredictoPrincipal(TypedDict): + tipo: str # GO | NO-GO | AJUSTAR + score_viabilidade: int # 0-100 + frase_chave: str # max 200 chars + resumo_executivo: str + leitura_para_decisao: str + top5_fatos: list[dict] # [{titulo, descricao}] + + +class DashboardKPIs(TypedDict): + ticket_medio: str + volume_breakeven: str + margem_bruta_alvo: str + capital_giro_necessario: str + recompra_alvo: str + vendas_por_indicacao: str + erosao_margem_sazonal: str + breakeven_cenario1: str + contatos_mes_inicial: str + conversao_inicial: str + faturamento_maduro: str + prob_sobrevivencia_24m: str + investimento_total_estimado: str + composicao_investimento: list[dict] # [{item, valor}] + sinais_consolidacao: list[str] + sinais_alerta: list[str] + sinais_risco_critico: list[str] + + +class Cenario(TypedDict): + nome: str + probabilidade: int # 0-100, soma dos 3 = 100 + impacto_financeiro: str + breakeven: str + faturamento_m24: str + margem_bruta: str + recompra: str + risco_central: str + capital_giro: str + descricao: str + citacao_agente: str + projecao_faturamento_24m: list[float] # 25 valores (M0-M24) + + +class CenariosFuturos(TypedDict): + cenarios: list[Cenario] # EXATAMENTE 3 + ponto_bifurcacao: str + + +class FatorRisco(TypedDict): + numero: int + titulo: str # NUNCA truncar + probabilidade: int + impacto: str # Alto | Medio | Baixo + descricao: str + citacao_agente: str + + +class FatoresRisco(TypedDict): + texto_introducao: str + riscos: list[FatorRisco] # 5-7 riscos + + +class DistribuicaoEmocional(TypedDict): + emocoes: list[dict] # [{nome, percentual}] + saldo_positivo_vs_negativo: str + texto_confianca: str + citacao_confianca: str + texto_ceticismo: str + citacao_ceticismo: str + texto_empolgacao: str + texto_medo: str + evolucao_24m: dict[str, list[float]] + + +class PerfilAgente(TypedDict): + nome: str + descricao: str + tipo: str # Apoiador | Neutro | Resistente | Cauteloso + posicao_espectro: float # 0.0 a 1.0 + citacao_chave: str + papel_na_dinamica: str + + +class BlocoForca(TypedDict): + nome: str + base_clientes: str + descricao: str + poder_relativo: int # 1-10 + citacao: Optional[str] + + +class MapaForcas(TypedDict): + blocos: list[BlocoForca] + hierarquia_poder: str + coalizao_entrante: str + + +class FaseCronologia(TypedDict): + nome: str + periodo: str + mes_inicio: int + mes_fim: int + descricao: str + citacao: str + marcos: list[str] + + +class Cronologia(TypedDict): + fases: list[FaseCronologia] # 4 fases + + +class PadraoEmergente(TypedDict): + numero: int + titulo: str + descricao: str + + +class Recomendacao(TypedDict): + rank: int + titulo: str + descricao: str + citacao: Optional[str] + impacto_relativo: int # 0-100 + + +class ItemChecklist(TypedDict): + titulo: str + timing: str + justificativa: str + condicao_mensuravel: str + prioridade: str # Urgencia + + +class Previsao(TypedDict): + periodo: str + titulo: str + probabilidade: int + margem_erro: int + descricao: str + + +class Posicionamento(TypedDict): + percebido_descricao: str + percebido_citacao: str + desejado_descricao: str + desejado_citacao: str + rotulos_a_evitar: list[str] + posicionamento_vencedor: str + players: list[dict] # [{nome, x, y, papel}] + + +class ROIAnalise(TypedDict): + riscos_evitados: list[dict] # [{titulo, valor_risco, solucao}] + custo_analise: str + risco_total_evitado: str + roi_multiplicador: str + citacoes: list[str] + + +class DadoMercado(TypedDict): + titulo: str + conteudo: str + fontes: list[str] + + +class ContextoMercado(TypedDict): + """Seção NOVA: Dados de mercado verificados via Perplexity.""" + localizacao: str + setor_detalhe: str + dados: list[DadoMercado] + fontes_unicas: list[str] + total_queries: int + disclaimer: str + + +class SinteseFinal(TypedDict): + scores: dict # {viabilidade_financeira: 67, demanda: 81, ...} + veredicto_final: str + cenario_mais_provavel: str + risco_principal: str + direcionamento: list[str] + sinais_consolidacao: list[str] + sinais_alerta: list[str] + sinais_risco: list[str] + + +# ============================================================ +# SCHEMA COMPLETO +# ============================================================ + +class AugurReportSchema(TypedDict): + meta: MetaAnalise + veredicto: VeredictoPrincipal + dashboard: DashboardKPIs + contexto_mercado: Optional[ContextoMercado] # Perplexity — dados verificados + cenarios: CenariosFuturos + riscos: FatoresRisco + emocional: DistribuicaoEmocional + agentes: list[PerfilAgente] + forcas: MapaForcas + cronologia: Cronologia + padroes: list[PadraoEmergente] + recomendacoes: list[Recomendacao] + checklist: list[ItemChecklist] + previsoes: list[Previsao] + posicionamento: Posicionamento + roi: ROIAnalise + sintese: SinteseFinal + + +# ============================================================ +# MAPEAMENTO CAMPO → GRÁFICO +# ============================================================ + +GRAFICOS_POR_SECAO = { + "veredicto": {"gauge_semicircular": {"dados": "veredicto.tipo"}}, + "dashboard": { + "kpi_grid_3x4": {"dados": "dashboard.*"}, + "semaforo_3col": {"dados": "dashboard.sinais_*"}, + }, + "contexto_mercado": { + "cards_dados_verificados": {"dados": "contexto_mercado.dados[]"}, + }, + "cenarios": { + "barras_horizontais": {"dados": "cenarios[].probabilidade"}, + "area_chart_24m": {"dados": "cenarios[].projecao_faturamento_24m"}, + "tabela_comparativa": {"dados": "cenarios[]"}, + }, + "riscos": {"scatter_prob_x_impacto": {"dados": "riscos[].probabilidade+impacto"}}, + "emocional": { + "radar_6_eixos": {"dados": "emocional.emocoes[]"}, + "barras_horizontais": {"dados": "emocional.emocoes[]"}, + "linhas_evolucao_24m": {"dados": "emocional.evolucao_24m"}, + }, + "agentes": {"espectro_horizontal": {"dados": "agentes[].posicao_espectro"}}, + "forcas": {"grafo_rede": {"dados": "forcas.blocos"}}, + "cronologia": {"timeline_horizontal": {"dados": "cronologia.fases[]"}}, + "recomendacoes": {"stack_ranking_barras": {"dados": "recomendacoes[].impacto_relativo"}}, + "previsoes": {"barras_com_error_bars": {"dados": "previsoes[].probabilidade+margem_erro"}}, + "posicionamento": {"scatter_2d": {"dados": "posicionamento.players[]"}}, + "roi": {"barras_comparativas": {"dados": "roi.riscos_evitados[]"}}, + "sintese": {"radar_5_eixos": {"dados": "sintese.scores"}}, +} + + +# ============================================================ +# VALIDAÇÃO +# ============================================================ + +def validar_report_json(data: dict) -> list[str]: + erros = [] + + campos = [ + 'meta', 'veredicto', 'dashboard', 'cenarios', 'riscos', + 'emocional', 'agentes', 'forcas', 'cronologia', 'padroes', + 'recomendacoes', 'checklist', 'previsoes', 'posicionamento', + 'roi', 'sintese' + ] + for c in campos: + if c not in data: + erros.append(f"Campo obrigatorio ausente: {c}") + + if 'cenarios' in data: + cens = data['cenarios'].get('cenarios', []) + if len(cens) != 3: + erros.append(f"Deve ter 3 cenarios, tem {len(cens)}") + probs = sum(c.get('probabilidade', 0) for c in cens) + if probs != 100: + erros.append(f"Probabilidades somam {probs}, devem somar 100") + + if 'riscos' in data: + n = len(data['riscos'].get('riscos', [])) + if n < 3 or n > 7: + erros.append(f"Deve ter 3-7 riscos, tem {n}") + + if 'agentes' in data and len(data['agentes']) < 3: + erros.append("Deve ter pelo menos 3 agentes") + + if 'recomendacoes' in data: + n = len(data['recomendacoes']) + if n < 3 or n > 5: + erros.append(f"Deve ter 3-5 recomendacoes, tem {n}") + + if 'veredicto' in data: + v = data['veredicto'] + if v.get('tipo') not in ['GO', 'NO-GO', 'AJUSTAR']: + erros.append(f"Veredicto invalido: {v.get('tipo')}") + + # Chinês + json_str = json.dumps(data, ensure_ascii=False) + chinese = re.findall(r'[\u4e00-\u9fff]', json_str) + if chinese: + erros.append(f"CRITICO: {len(chinese)} caracteres chineses no JSON") + + return erros diff --git a/backend/app/services/__init__.py b/backend/app/services/__init__.py index 8db85d86f3..28f7a2b2c8 100644 --- a/backend/app/services/__init__.py +++ b/backend/app/services/__init__.py @@ -1,5 +1,4 @@ """ -业务服务模块 """ from .ontology_generator import OntologyGenerator diff --git a/backend/app/services/graph_builder.py b/backend/app/services/graph_builder.py index 37c9969c79..7b9ad54e54 100644 --- a/backend/app/services/graph_builder.py +++ b/backend/app/services/graph_builder.py @@ -1,6 +1,6 @@ """ -图谱构建服务 -接口2:使用Zep API构建Standalone Graph +Grafo +2Zep APIStandalone Graph """ import os @@ -22,7 +22,7 @@ @dataclass class GraphInfo: - """图谱信息""" + """Grafo""" graph_id: str node_count: int edge_count: int @@ -39,14 +39,14 @@ def to_dict(self) -> Dict[str, Any]: class GraphBuilderService: """ - 图谱构建服务 - 负责调用Zep API构建知识图谱 + Grafo + Zep APIGrafo """ def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or Config.ZEP_API_KEY if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") + raise ValueError("ZEP_API_KEY Configuração") self.client = Zep(api_key=self.api_key) self.task_manager = TaskManager() @@ -61,20 +61,19 @@ def build_graph_async( batch_size: int = 3 ) -> str: """ - 异步构建图谱 + Grafo Args: - text: 输入文本 - ontology: 本体定义(来自接口1的输出) - graph_name: 图谱名称 - chunk_size: 文本块大小 - chunk_overlap: 块重叠大小 - batch_size: 每批发送的块数量 + text: + ontology: 1 + graph_name: Grafo + chunk_size: + chunk_overlap: + batch_size: Returns: - 任务ID + ID """ - # 创建任务 task_id = self.task_manager.create_task( task_type="graph_build", metadata={ @@ -87,7 +86,6 @@ def build_graph_async( # Capture locale before spawning background thread current_locale = get_locale() - # 在后台线程中执行构建 thread = threading.Thread( target=self._build_graph_worker, args=(task_id, text, ontology, graph_name, chunk_size, chunk_overlap, batch_size, current_locale) @@ -106,9 +104,9 @@ def _build_graph_worker( chunk_size: int, chunk_overlap: int, batch_size: int, - locale: str = 'zh' + locale: str = 'pt' ): - """图谱构建工作线程""" + """Grafo""" set_locale(locale) try: self.task_manager.update_task( @@ -118,7 +116,7 @@ def _build_graph_worker( message=t('progress.startBuildingGraph') ) - # 1. 创建图谱 + # 1. Grafo graph_id = self.create_graph(graph_name) self.task_manager.update_task( task_id, @@ -126,7 +124,6 @@ def _build_graph_worker( message=t('progress.graphCreated', graphId=graph_id) ) - # 2. 设置本体 self.set_ontology(graph_id, ontology) self.task_manager.update_task( task_id, @@ -134,8 +131,15 @@ def _build_graph_worker( message=t('progress.ontologySet') ) - # 3. 文本分块 chunks = TextProcessor.split_text(text, chunk_size, chunk_overlap) + + # Prefixar chunks com instrução PT-BR para influenciar extração do Zep + _CHUNK_LANG_PREFIX = ( + "[IDIOMA: Português do Brasil] " + "[INSTRUÇÃO: Extraia entidades, fatos e relacionamentos em português do Brasil. " + "Traduza qualquer conteúdo em outro idioma para português.] \n\n" + ) + chunks = [f"{_CHUNK_LANG_PREFIX}{chunk}" for chunk in chunks] total_chunks = len(chunks) self.task_manager.update_task( task_id, @@ -143,7 +147,6 @@ def _build_graph_worker( message=t('progress.textSplit', count=total_chunks) ) - # 4. 分批发送数据 episode_uuids = self.add_text_batches( graph_id, chunks, batch_size, lambda msg, prog: self.task_manager.update_task( @@ -153,7 +156,7 @@ def _build_graph_worker( ) ) - # 5. 等待Zep处理完成 + # 5. Zep self.task_manager.update_task( task_id, progress=60, @@ -169,7 +172,7 @@ def _build_graph_worker( ) ) - # 6. 获取图谱信息 + # 6. Grafo self.task_manager.update_task( task_id, progress=90, @@ -178,7 +181,6 @@ def _build_graph_worker( graph_info = self._get_graph_info(graph_id) - # 完成 self.task_manager.complete_task(task_id, { "graph_id": graph_id, "graph_info": graph_info.to_dict(), @@ -191,7 +193,7 @@ def _build_graph_worker( self.task_manager.fail_task(task_id, error_msg) def create_graph(self, name: str) -> str: - """创建Zep图谱(公开方法)""" + """ZepGrafo""" graph_id = f"mirofish_{uuid.uuid4().hex[:16]}" self.client.graph.create( @@ -203,74 +205,69 @@ def create_graph(self, name: str) -> str: return graph_id def set_ontology(self, graph_id: str, ontology: Dict[str, Any]): - """设置图谱本体(公开方法)""" + """Grafo""" import warnings from typing import Optional from pydantic import Field from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel - # 抑制 Pydantic v2 关于 Field(default=None) 的警告 - # 这是 Zep SDK 要求的用法,警告来自动态类创建,可以安全忽略 + # Pydantic v2 Field(default=None) + # Zep SDK warnings.filterwarnings('ignore', category=UserWarning, module='pydantic') - # Zep 保留名称,不能作为属性名 RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'} def safe_attr_name(attr_name: str) -> str: - """将保留名称转换为安全名称""" + """""" if attr_name.lower() in RESERVED_NAMES: return f"entity_{attr_name}" return attr_name - # 动态创建实体类型 + # Entidade entity_types = {} for entity_def in ontology.get("entity_types", []): name = entity_def["name"] description = entity_def.get("description", f"A {name} entity.") - # 创建属性字典和类型注解(Pydantic v2 需要) + # Pydantic v2 attrs = {"__doc__": description} annotations = {} for attr_def in entity_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 + attr_name = safe_attr_name(attr_def["name"]) # attr_desc = attr_def.get("description", attr_name) - # Zep API 需要 Field 的 description,这是必需的 + # Zep API Field description attrs[attr_name] = Field(description=attr_desc, default=None) - annotations[attr_name] = Optional[EntityText] # 类型注解 + annotations[attr_name] = Optional[EntityText] # attrs["__annotations__"] = annotations - # 动态创建类 entity_class = type(name, (EntityModel,), attrs) entity_class.__doc__ = description entity_types[name] = entity_class - # 动态创建边类型 edge_definitions = {} for edge_def in ontology.get("edge_types", []): name = edge_def["name"] description = edge_def.get("description", f"A {name} relationship.") - # 创建属性字典和类型注解 attrs = {"__doc__": description} annotations = {} for attr_def in edge_def.get("attributes", []): - attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称 + attr_name = safe_attr_name(attr_def["name"]) # attr_desc = attr_def.get("description", attr_name) - # Zep API 需要 Field 的 description,这是必需的 + # Zep API Field description attrs[attr_name] = Field(description=attr_desc, default=None) - annotations[attr_name] = Optional[str] # 边属性用str类型 + annotations[attr_name] = Optional[str] # str attrs["__annotations__"] = annotations - # 动态创建类 class_name = ''.join(word.capitalize() for word in name.split('_')) edge_class = type(class_name, (EdgeModel,), attrs) edge_class.__doc__ = description - # 构建source_targets + # source_targets source_targets = [] for st in edge_def.get("source_targets", []): source_targets.append( @@ -283,7 +280,7 @@ def safe_attr_name(attr_name: str) -> str: if source_targets: edge_definitions[name] = (edge_class, source_targets) - # 调用Zep API设置本体 + # Zep API if entity_types or edge_definitions: self.client.graph.set_ontology( graph_ids=[graph_id], @@ -298,7 +295,7 @@ def add_text_batches( batch_size: int = 3, progress_callback: Optional[Callable] = None ) -> List[str]: - """分批添加文本到图谱,返回所有 episode 的 uuid 列表""" + """Grafo episode uuid """ episode_uuids = [] total_chunks = len(chunks) @@ -314,27 +311,25 @@ def add_text_batches( progress ) - # 构建episode数据 + # episode episodes = [ EpisodeData(data=chunk, type="text") for chunk in batch_chunks ] - # 发送到Zep try: batch_result = self.client.graph.add_batch( graph_id=graph_id, episodes=episodes ) - # 收集返回的 episode uuid + # episode uuid if batch_result and isinstance(batch_result, list): for ep in batch_result: ep_uuid = getattr(ep, 'uuid_', None) or getattr(ep, 'uuid', None) if ep_uuid: episode_uuids.append(ep_uuid) - # 避免请求过快 time.sleep(1) except Exception as e: @@ -350,7 +345,7 @@ def _wait_for_episodes( progress_callback: Optional[Callable] = None, timeout: int = 600 ): - """等待所有 episode 处理完成(通过查询每个 episode 的 processed 状态)""" + """ episode episode processed """ if not episode_uuids: if progress_callback: progress_callback(t('progress.noEpisodesWait'), 1.0) @@ -373,7 +368,7 @@ def _wait_for_episodes( ) break - # 检查每个 episode 的处理状态 + # episode for ep_uuid in list(pending_episodes): try: episode = self.client.graph.episode.get(uuid_=ep_uuid) @@ -384,7 +379,6 @@ def _wait_for_episodes( completed_count += 1 except Exception as e: - # 忽略单个查询错误,继续 pass elapsed = int(time.time() - start_time) @@ -395,20 +389,18 @@ def _wait_for_episodes( ) if pending_episodes: - time.sleep(3) # 每3秒检查一次 + time.sleep(3) # 3 if progress_callback: progress_callback(t('progress.processingComplete', completed=completed_count, total=total_episodes), 1.0) def _get_graph_info(self, graph_id: str) -> GraphInfo: - """获取图谱信息""" - # 获取节点(分页) + """Grafo""" nodes = fetch_all_nodes(self.client, graph_id) - # 获取边(分页) edges = fetch_all_edges(self.client, graph_id) - # 统计实体类型 + # Entidade entity_types = set() for node in nodes: if node.labels: @@ -425,25 +417,23 @@ def _get_graph_info(self, graph_id: str) -> GraphInfo: def get_graph_data(self, graph_id: str) -> Dict[str, Any]: """ - 获取完整图谱数据(包含详细信息) + Grafo Args: - graph_id: 图谱ID + graph_id: ID do grafo Returns: - 包含nodes和edges的字典,包括时间信息、属性等详细数据 + nodesedges """ nodes = fetch_all_nodes(self.client, graph_id) edges = fetch_all_edges(self.client, graph_id) - # 创建节点映射用于获取节点名称 node_map = {} for node in nodes: node_map[node.uuid_] = node.name or "" nodes_data = [] for node in nodes: - # 获取创建时间 created_at = getattr(node, 'created_at', None) if created_at: created_at = str(created_at) @@ -459,20 +449,19 @@ def get_graph_data(self, graph_id: str) -> Dict[str, Any]: edges_data = [] for edge in edges: - # 获取时间信息 created_at = getattr(edge, 'created_at', None) valid_at = getattr(edge, 'valid_at', None) invalid_at = getattr(edge, 'invalid_at', None) expired_at = getattr(edge, 'expired_at', None) - # 获取 episodes + # episodes episodes = getattr(edge, 'episodes', None) or getattr(edge, 'episode_ids', None) if episodes and not isinstance(episodes, list): episodes = [str(episodes)] elif episodes: episodes = [str(e) for e in episodes] - # 获取 fact_type + # fact_type fact_type = getattr(edge, 'fact_type', None) or edge.name or "" edges_data.append({ @@ -501,6 +490,5 @@ def get_graph_data(self, graph_id: str) -> Dict[str, Any]: } def delete_graph(self, graph_id: str): - """删除图谱""" + """Grafo""" self.client.graph.delete(graph_id=graph_id) - diff --git a/backend/app/services/market_research.py b/backend/app/services/market_research.py new file mode 100644 index 0000000000..55e1342434 --- /dev/null +++ b/backend/app/services/market_research.py @@ -0,0 +1,356 @@ +""" +AUGUR Market Research — Pesquisa de mercado via Perplexity API. + +Executa ANTES da simulação para calibrar o grafo Zep com dados reais. +Cada nicho (setor × tipo_decisao) gera queries específicas. + +Custo: ~R$0,25-0,50 por simulação (8-12 queries) +Tempo: +20-40 segundos (queries paralelas) + +Caminho: backend/app/services/market_research.py +""" + +import os +import json +import logging +import re +import time +from typing import Optional +from concurrent.futures import ThreadPoolExecutor, as_completed + +logger = logging.getLogger(__name__) + +PERPLEXITY_API_KEY = os.environ.get('PERPLEXITY_API_KEY', '') +PERPLEXITY_URL = "https://api.perplexity.ai/chat/completions" +PERPLEXITY_MODEL = "sonar" + + +# ============================================================ +# TEMPLATES DE QUERIES POR NICHO +# ============================================================ + +QUERY_TEMPLATES = { + "varejo_local": { + "novo_negocio": [ + "população {location} IBGE dados demográficos renda média", + "lojas de {setor_detalhe} em {location} concorrentes principais", + "preço médio {setor_detalhe} varejo {location} região", + "aluguel comercial {location} centro preço metro quadrado", + "comércio {location} economia local crescimento", + "marketplaces digitais mais usados em cidades pequenas interior Brasil 2025 2026", + "sazonalidade vendas {setor_detalhe} Brasil meses fortes", + "taxa inadimplência crediário varejo Brasil 2025 2026", + ], + "novo_produto": [ + "mercado {setor_detalhe} Brasil tamanho market share 2025 2026", + "concorrentes {setor_detalhe} principais marcas Brasil", + "preço médio {setor_detalhe} varejo atacado", + "tendências consumo {setor_detalhe} Brasil 2025 2026", + "canais distribuição {setor_detalhe} Brasil", + ], + "promocao_campanha": [ + "promoções mais eficazes varejo brasileiro 2025 2026", + "ROI promoções sorteio premiação varejo Brasil", + "engajamento redes sociais promoção varejo Brasil", + "regulamentação promoção sorteio SEAE Caixa Econômica", + ], + }, + "saas_b2b": { + "novo_negocio": [ + "mercado SaaS Brasil tamanho TAM SAM 2025 2026", + "concorrentes {setor_detalhe} SaaS Brasil pricing", + "CAC médio SaaS B2B Brasil 2025 2026", + "churn rate SaaS B2B Brasil benchmark", + "investimento seed SaaS Brasil valores médios 2025 2026", + "canais aquisição SaaS B2B Brasil mais eficientes", + ], + "novo_produto": [ + "mercado {setor_detalhe} Brasil concorrentes funcionalidades", + "pricing SaaS {setor_detalhe} Brasil modelos", + "demanda {setor_detalhe} PME Brasil", + "integrações mais pedidas {setor_detalhe} SaaS", + ], + }, + "industria_fmcg": { + "novo_produto": [ + "mercado {setor_detalhe} Brasil tamanho market share 2025 2026", + "concorrentes {setor_detalhe} principais marcas participação mercado", + "preço {setor_detalhe} atacado varejo margem distribuição", + "canais distribuição {setor_detalhe} Brasil supermercados atacado", + "regulamentação {setor_detalhe} ANVISA INMETRO requisitos", + "tendências consumo {setor_detalhe} Brasil sustentabilidade", + "custo produção {setor_detalhe} matéria prima Brasil", + ], + }, + "alimentacao": { + "novo_negocio": [ + "restaurantes {setor_detalhe} em {location} concorrentes avaliações", + "preço médio refeição {setor_detalhe} {location}", + "custo aluguel ponto comercial {location} alimentação", + "taxa iFood Rappi restaurante 2025 2026 comissão", + "vigilância sanitária {location} requisitos restaurante", + "população {location} hábitos alimentares", + ], + "promocao_campanha": [ + "promoções delivery app resultados ROI 2025 2026", + "engajamento promoção restaurante redes sociais Brasil", + "custo aquisição cliente delivery app Brasil", + "promoções Copa do Mundo restaurantes delivery resultados", + ], + }, + "marketplace_app": { + "promocao_campanha": [ + "promoções apps delivery Brasil ROI resultados 2025 2026", + "custo aquisição usuário app delivery Brasil", + "retenção usuário após promoção app delivery", + "regulamentação promoção sorteio app digital SEAE", + "promoções Copa do Mundo apps resultados engajamento", + "concorrentes {setor_detalhe} promoções recentes", + ], + "expansao_franquia": [ + "{setor_detalhe} franquia modelo custos taxa", + "mercado delivery {location} concorrentes", + "população {location} dados demográficos delivery", + "custo operação franquia delivery {location}", + ], + }, + "telecom_isp": { + "expansao_geografica": [ + "provedores internet {location} concorrentes preços", + "cobertura fibra óptica {location} infraestrutura", + "população {location} domicílios conectados internet", + "Anatel dados telecomunicações {location} região", + "custo implantação fibra óptica cidade pequena Brasil", + "ticket médio internet banda larga {location} região", + ], + }, + "energia_tech": { + "novo_produto": [ + "mercado veículos elétricos Brasil 2025 2026 crescimento", + "estações carregamento elétrico Brasil quantidade localização", + "preço carregador veículo elétrico Brasil concorrentes", + "regulamentação ANEEL estação carregamento elétrico", + "demanda carregamento elétrico rodovias Brasil", + "incentivos governo veículos elétricos Brasil 2025 2026", + ], + }, + "servicos": { + "novo_negocio": [ + "{setor_detalhe} em {location} concorrentes preços", + "demanda {setor_detalhe} {location} região", + "preço médio {setor_detalhe} Brasil 2025 2026", + "regulamentação {setor_detalhe} conselho classe requisitos", + "canais aquisição clientes {setor_detalhe} Brasil", + ], + }, +} + +GENERIC_QUERIES = [ + "{requirement} mercado Brasil dados 2025 2026", + "{requirement} concorrentes principais", + "{requirement} preço custo investimento", + "{requirement} tendências oportunidades riscos", + "população economia {location} dados recentes", +] + + +# ============================================================ +# HELPERS +# ============================================================ + +def extract_location(text: str) -> str: + """Extrai cidade/região do texto da simulação.""" + patterns = [ + r'em\s+([A-Z][a-zà-ú]+(?:\s+(?:de|do|da|dos|das)\s+)?(?:[A-Z][a-zà-ú]+)?(?:\s+(?:de|do|da|dos|das)\s+)?(?:[A-Z][a-zà-ú]+)?)', + r'(?:para|em|de)\s+([A-Z][a-zà-ú]+(?:\s+[A-Z][a-zà-ú]+)*)', + ] + stopwords = {'Brasil', 'Brasileiro', 'Nacional', 'Empresa', 'Loja', 'App', 'Novo', 'Nova'} + for pattern in patterns: + match = re.search(pattern, text) + if match: + loc = match.group(1).strip() + if loc not in stopwords and len(loc) > 3: + return loc + return "Brasil" + + +def extract_sector_detail(text: str) -> str: + """Extrai detalhe do setor (ex: 'calçados', 'papel higiênico').""" + cleaned = re.sub(r'^(abertura de|lancamento de|lançar|abrir|criar|montar)\s+', '', text.lower()) + cleaned = re.sub(r'\s+(em|para|de|do|da)\s+.*$', '', cleaned) + words = cleaned.split()[:4] + return ' '.join(words) if words else text[:30] + + +# ============================================================ +# SERVIÇO PRINCIPAL +# ============================================================ + +class MarketResearcher: + + def __init__(self, api_key: str = None): + self.api_key = api_key or PERPLEXITY_API_KEY + if not self.api_key: + logger.warning("PERPLEXITY_API_KEY não configurada. Market research desabilitado.") + + @property + def is_available(self) -> bool: + return bool(self.api_key) + + def research(self, simulation_requirement: str, sector: str, decision: str, max_queries: int = 8) -> dict: + if not self.is_available: + return self._empty_result() + + start = time.time() + location = extract_location(simulation_requirement) + setor_detalhe = extract_sector_detail(simulation_requirement) + + logger.info(f"Market research: setor={sector}, decisao={decision}, local={location}, detalhe={setor_detalhe}") + + queries = self._get_queries(sector, decision, simulation_requirement, location, setor_detalhe, max_queries) + results = self._execute_parallel(queries) + + fontes_unicas = list(set(f for r in results for f in r.get("fontes", []))) + contexto = self._format_context(results, location, setor_detalhe) + elapsed = time.time() - start + custo = len(results) * 0.005 + + logger.info(f"Market research: {len(results)} queries, {len(fontes_unicas)} fontes, {elapsed:.1f}s, ~${custo:.3f}") + + return { + "dados_mercado": results, + "fontes_unicas": fontes_unicas, + "contexto_formatado": contexto, + "custo_estimado": custo, + "queries_executadas": len(results), + "tempo_segundos": round(elapsed, 1), + "localizacao_detectada": location, + "setor_detalhe": setor_detalhe, + } + + def _get_queries(self, sector, decision, requirement, location, setor_detalhe, max_queries) -> list[str]: + sector_templates = QUERY_TEMPLATES.get(sector, {}) + templates = sector_templates.get(decision, GENERIC_QUERIES) + + queries = [] + for t in templates[:max_queries]: + q = t.format( + requirement=requirement[:80], + location=location, + setor_detalhe=setor_detalhe, + ) + queries.append(q) + return queries + + def _execute_parallel(self, queries: list[str], max_workers: int = 4) -> list[dict]: + results = [] + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(self._query_perplexity, q): q for q in queries} + for future in as_completed(futures): + query = futures[future] + try: + results.append(future.result()) + except Exception as e: + logger.warning(f"Query falhou: {query[:50]}... - {e}") + results.append({"query": query, "resposta": f"Erro: {e}", "fontes": []}) + return results + + def _query_perplexity(self, query: str) -> dict: + import requests + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + payload = { + "model": PERPLEXITY_MODEL, + "messages": [ + { + "role": "system", + "content": ( + "Você é um pesquisador de mercado brasileiro. " + "Responda de forma concisa e factual com dados numéricos quando possível. " + "Cite fontes. Responda em português do Brasil. " + "Foque em dados de 2024-2026." + ), + }, + {"role": "user", "content": query}, + ], + "max_tokens": 500, + "temperature": 0.1, + "return_citations": True, + } + + response = requests.post(PERPLEXITY_URL, headers=headers, json=payload, timeout=30) + response.raise_for_status() + data = response.json() + + answer = data.get("choices", [{}])[0].get("message", {}).get("content", "") + citations = data.get("citations", []) + + return { + "query": query, + "resposta": answer, + "fontes": citations if isinstance(citations, list) else [], + } + + def _format_context(self, results: list[dict], location: str, setor_detalhe: str) -> str: + parts = [ + f"## DADOS DE MERCADO VERIFICADOS (Pesquisa Perplexity — {len(results)} consultas)", + f"## Localização: {location}", + f"## Setor: {setor_detalhe}", + "", + "Use estes dados para CALIBRAR as entidades e atributos da ontologia.", + "", + ] + for i, r in enumerate(results, 1): + if r.get("resposta") and "Erro" not in r["resposta"]: + parts.append(f"### Dado {i}: {r['query'][:60]}") + parts.append(r["resposta"]) + if r.get("fontes"): + parts.append(f"Fontes: {', '.join(r['fontes'][:3])}") + parts.append("") + return "\n".join(parts) + + def _empty_result(self) -> dict: + return { + "dados_mercado": [], "fontes_unicas": [], "contexto_formatado": "", + "custo_estimado": 0, "queries_executadas": 0, "tempo_segundos": 0, + "localizacao_detectada": "", "setor_detalhe": "", + } + + +# ============================================================ +# SEÇÃO DO RELATÓRIO: Contexto de Mercado +# ============================================================ + +def build_market_context_section(research_data: dict) -> dict: + """Monta seção 'Contexto de Mercado' para o AugurReportSchema.""" + if not research_data or not research_data.get("dados_mercado"): + return {} + + dados = research_data["dados_mercado"] + dados_validos = [d for d in dados if d.get("resposta") and "Erro" not in d["resposta"] and len(d["resposta"]) > 20] + + if not dados_validos: + return {} + + return { + "localizacao": research_data.get("localizacao_detectada", ""), + "setor_detalhe": research_data.get("setor_detalhe", ""), + "dados": [ + { + "titulo": d["query"][:80], + "conteudo": d["resposta"], + "fontes": d.get("fontes", [])[:3], + } + for d in dados_validos + ], + "fontes_unicas": research_data.get("fontes_unicas", [])[:15], + "total_queries": research_data.get("queries_executadas", 0), + "disclaimer": ( + f"Dados obtidos de fontes públicas via Perplexity AI em " + f"{research_data.get('tempo_segundos', 0)}s. " + "Verifique informações críticas antes de tomar decisões." + ), + } diff --git a/backend/app/services/oasis_profile_generator.py b/backend/app/services/oasis_profile_generator.py index 7704a627eb..ccd4e529a5 100644 --- a/backend/app/services/oasis_profile_generator.py +++ b/backend/app/services/oasis_profile_generator.py @@ -1,11 +1,10 @@ """ -OASIS Agent Profile生成器 -将Zep图谱中的实体转换为OASIS模拟平台所需的Agent Profile格式 +OASIS Agent ProfileGerar +ZepGrafoEntidadeOASISSimulaçãoAgent Profile -优化改进: -1. 调用Zep检索功能二次丰富节点信息 -2. 优化提示词生成非常详细的人设 -3. 区分个人实体和抽象群体实体 +1. ZepBusca +2. Gerar +3. EntidadeEntidade """ import json @@ -28,23 +27,21 @@ @dataclass class OasisAgentProfile: - """OASIS Agent Profile数据结构""" - # 通用字段 + """OASIS Agent Profile""" user_id: int user_name: str name: str bio: str persona: str - # 可选字段 - Reddit风格 + # - Reddit karma: int = 1000 - # 可选字段 - Twitter风格 + # - Twitter friend_count: int = 100 follower_count: int = 150 statuses_count: int = 500 - # 额外人设信息 age: Optional[int] = None gender: Optional[str] = None mbti: Optional[str] = None @@ -52,17 +49,17 @@ class OasisAgentProfile: profession: Optional[str] = None interested_topics: List[str] = field(default_factory=list) - # 来源实体信息 + # Entidade source_entity_uuid: Optional[str] = None source_entity_type: Optional[str] = None created_at: str = field(default_factory=lambda: datetime.now().strftime("%Y-%m-%d")) def to_reddit_format(self) -> Dict[str, Any]: - """转换为Reddit平台格式""" + """Reddit""" profile = { "user_id": self.user_id, - "username": self.user_name, # OASIS 库要求字段名为 username(无下划线) + "username": self.user_name, # OASIS username "name": self.name, "bio": self.bio, "persona": self.persona, @@ -70,7 +67,6 @@ def to_reddit_format(self) -> Dict[str, Any]: "created_at": self.created_at, } - # 添加额外人设信息(如果有) if self.age: profile["age"] = self.age if self.gender: @@ -87,10 +83,10 @@ def to_reddit_format(self) -> Dict[str, Any]: return profile def to_twitter_format(self) -> Dict[str, Any]: - """转换为Twitter平台格式""" + """Twitter""" profile = { "user_id": self.user_id, - "username": self.user_name, # OASIS 库要求字段名为 username(无下划线) + "username": self.user_name, # OASIS username "name": self.name, "bio": self.bio, "persona": self.persona, @@ -100,7 +96,6 @@ def to_twitter_format(self) -> Dict[str, Any]: "created_at": self.created_at, } - # 添加额外人设信息 if self.age: profile["age"] = self.age if self.gender: @@ -117,7 +112,7 @@ def to_twitter_format(self) -> Dict[str, Any]: return profile def to_dict(self) -> Dict[str, Any]: - """转换为完整字典格式""" + """""" return { "user_id": self.user_id, "user_name": self.user_name, @@ -142,17 +137,16 @@ def to_dict(self) -> Dict[str, Any]: class OasisProfileGenerator: """ - OASIS Profile生成器 + OASIS ProfileGerar - 将Zep图谱中的实体转换为OASIS模拟所需的Agent Profile + ZepGrafoEntidadeOASISSimulaçãoAgent Profile - 优化特性: - 1. 调用Zep图谱检索功能获取更丰富的上下文 - 2. 生成非常详细的人设(包括基本信息、职业经历、性格特征、社交媒体行为等) - 3. 区分个人实体和抽象群体实体 + 1. ZepGrafoBusca + 2. Gerar + 3. EntidadeEntidade """ - # MBTI类型列表 + # MBTI MBTI_TYPES = [ "INTJ", "INTP", "ENTJ", "ENTP", "INFJ", "INFP", "ENFJ", "ENFP", @@ -160,19 +154,18 @@ class OasisProfileGenerator: "ISTP", "ISFP", "ESTP", "ESFP" ] - # 常见国家列表 COUNTRIES = [ "China", "US", "UK", "Japan", "Germany", "France", "Canada", "Australia", "Brazil", "India", "South Korea" ] - # 个人类型实体(需要生成具体人设) + # EntidadeGerar INDIVIDUAL_ENTITY_TYPES = [ "student", "alumni", "professor", "person", "publicfigure", "expert", "faculty", "official", "journalist", "activist" ] - # 群体/机构类型实体(需要生成群体代表人设) + # /EntidadeGerar GROUP_ENTITY_TYPES = [ "university", "governmentagency", "organization", "ngo", "mediaoutlet", "company", "institution", "group", "community" @@ -191,14 +184,14 @@ def __init__( self.model_name = model_name or Config.LLM_MODEL_NAME if not self.api_key: - raise ValueError("LLM_API_KEY 未配置") + raise ValueError("LLM_API_KEY Configuração") self.client = OpenAI( api_key=self.api_key, base_url=self.base_url ) - # Zep客户端用于检索丰富上下文 + # ZepBusca self.zep_api_key = zep_api_key or Config.ZEP_API_KEY self.zep_client = None self.graph_id = graph_id @@ -207,7 +200,7 @@ def __init__( try: self.zep_client = Zep(api_key=self.zep_api_key) except Exception as e: - logger.warning(f"Zep客户端初始化失败: {e}") + logger.warning(f"ZepFalhou: {e}") def generate_profile_from_entity( self, @@ -216,27 +209,25 @@ def generate_profile_from_entity( use_llm: bool = True ) -> OasisAgentProfile: """ - 从Zep实体生成OASIS Agent Profile + ZepEntidadeGerarOASIS Agent Profile Args: - entity: Zep实体节点 - user_id: 用户ID(用于OASIS) - use_llm: 是否使用LLM生成详细人设 + entity: ZepEntidade + user_id: IDOASIS + use_llm: LLMGerar Returns: OasisAgentProfile """ entity_type = entity.get_entity_type() or "Entity" - # 基础信息 name = entity.name user_name = self._generate_username(name) - # 构建上下文信息 context = self._build_entity_context(entity) if use_llm: - # 使用LLM生成详细人设 + # LLMGerar profile_data = self._generate_profile_with_llm( entity_name=name, entity_type=entity_type, @@ -245,7 +236,7 @@ def generate_profile_from_entity( context=context ) else: - # 使用规则生成基础人设 + # Gerar profile_data = self._generate_profile_rule_based( entity_name=name, entity_type=entity_type, @@ -274,27 +265,24 @@ def generate_profile_from_entity( ) def _generate_username(self, name: str) -> str: - """生成用户名""" - # 移除特殊字符,转换为小写 + """Gerar""" username = name.lower().replace(" ", "_") username = ''.join(c for c in username if c.isalnum() or c == '_') - # 添加随机后缀避免重复 suffix = random.randint(100, 999) return f"{username}_{suffix}" def _search_zep_for_entity(self, entity: EntityNode) -> Dict[str, Any]: """ - 使用Zep图谱混合搜索功能获取实体相关的丰富信息 + ZepGrafoEntidade - Zep没有内置混合搜索接口,需要分别搜索edges和nodes然后合并结果。 - 使用并行请求同时搜索,提高效率。 + ZepedgesnodesResultado Args: - entity: 实体节点对象 + entity: Entidade Returns: - 包含facts, node_summaries, context的字典 + facts, node_summaries, context """ import concurrent.futures @@ -309,15 +297,15 @@ def _search_zep_for_entity(self, entity: EntityNode) -> Dict[str, Any]: "context": "" } - # 必须有graph_id才能进行搜索 + # graph_id if not self.graph_id: - logger.debug(f"跳过Zep检索:未设置graph_id") + logger.debug(f"ZepBuscagraph_id") return results comprehensive_query = t('progress.zepSearchQuery', name=entity_name) def search_edges(): - """搜索边(事实/关系)- 带重试机制""" + """/Relacionamento- """ max_retries = 3 last_exception = None delay = 2.0 @@ -334,15 +322,15 @@ def search_edges(): except Exception as e: last_exception = e if attempt < max_retries - 1: - logger.debug(f"Zep边搜索第 {attempt + 1} 次失败: {str(e)[:80]}, 重试中...") + logger.debug(f"Zep {attempt + 1} Falhou: {str(e)[:80]}, ...") time.sleep(delay) delay *= 2 else: - logger.debug(f"Zep边搜索在 {max_retries} 次尝试后仍失败: {e}") + logger.debug(f"Zep {max_retries} Falhou: {e}") return None def search_nodes(): - """搜索节点(实体摘要)- 带重试机制""" + """Entidade- """ max_retries = 3 last_exception = None delay = 2.0 @@ -359,24 +347,24 @@ def search_nodes(): except Exception as e: last_exception = e if attempt < max_retries - 1: - logger.debug(f"Zep节点搜索第 {attempt + 1} 次失败: {str(e)[:80]}, 重试中...") + logger.debug(f"Zep {attempt + 1} Falhou: {str(e)[:80]}, ...") time.sleep(delay) delay *= 2 else: - logger.debug(f"Zep节点搜索在 {max_retries} 次尝试后仍失败: {e}") + logger.debug(f"Zep {max_retries} Falhou: {e}") return None try: - # 并行执行edges和nodes搜索 + # edgesnodes with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: edge_future = executor.submit(search_edges) node_future = executor.submit(search_nodes) - # 获取结果 + # Resultado edge_result = edge_future.result(timeout=30) node_result = node_future.result(timeout=30) - # 处理边搜索结果 + # Resultado all_facts = set() if edge_result and hasattr(edge_result, 'edges') and edge_result.edges: for edge in edge_result.edges: @@ -384,58 +372,56 @@ def search_nodes(): all_facts.add(edge.fact) results["facts"] = list(all_facts) - # 处理节点搜索结果 + # Resultado all_summaries = set() if node_result and hasattr(node_result, 'nodes') and node_result.nodes: for node in node_result.nodes: if hasattr(node, 'summary') and node.summary: all_summaries.add(node.summary) if hasattr(node, 'name') and node.name and node.name != entity_name: - all_summaries.add(f"相关实体: {node.name}") + all_summaries.add(f"Entidade: {node.name}") results["node_summaries"] = list(all_summaries) - # 构建综合上下文 context_parts = [] if results["facts"]: - context_parts.append("事实信息:\n" + "\n".join(f"- {f}" for f in results["facts"][:20])) + context_parts.append(":\n" + "\n".join(f"- {f}" for f in results["facts"][:20])) if results["node_summaries"]: - context_parts.append("相关实体:\n" + "\n".join(f"- {s}" for s in results["node_summaries"][:10])) + context_parts.append("Entidade:\n" + "\n".join(f"- {s}" for s in results["node_summaries"][:10])) results["context"] = "\n\n".join(context_parts) - logger.info(f"Zep混合检索完成: {entity_name}, 获取 {len(results['facts'])} 条事实, {len(results['node_summaries'])} 个相关节点") + logger.info(f"ZepBusca: {entity_name}, {len(results['facts'])} , {len(results['node_summaries'])} ") except concurrent.futures.TimeoutError: - logger.warning(f"Zep检索超时 ({entity_name})") + logger.warning(f"ZepBusca ({entity_name})") except Exception as e: - logger.warning(f"Zep检索失败 ({entity_name}): {e}") + logger.warning(f"ZepBuscaFalhou ({entity_name}): {e}") return results def _build_entity_context(self, entity: EntityNode) -> str: """ - 构建实体的完整上下文信息 + Entidade - 包括: - 1. 实体本身的边信息(事实) - 2. 关联节点的详细信息 - 3. Zep混合检索到的丰富信息 + 1. Entidade + 2. + 3. ZepBusca """ context_parts = [] - # 1. 添加实体属性信息 + # 1. Entidade if entity.attributes: attrs = [] for key, value in entity.attributes.items(): if value and str(value).strip(): attrs.append(f"- {key}: {value}") if attrs: - context_parts.append("### 实体属性\n" + "\n".join(attrs)) + context_parts.append("### Entidade\n" + "\n".join(attrs)) - # 2. 添加相关边信息(事实/关系) + # 2. /Relacionamento existing_facts = set() if entity.related_edges: relationships = [] - for edge in entity.related_edges: # 不限制数量 + for edge in entity.related_edges: # fact = edge.get("fact", "") edge_name = edge.get("edge_name", "") direction = edge.get("direction", "") @@ -445,22 +431,20 @@ def _build_entity_context(self, entity: EntityNode) -> str: existing_facts.add(fact) elif edge_name: if direction == "outgoing": - relationships.append(f"- {entity.name} --[{edge_name}]--> (相关实体)") + relationships.append(f"- {entity.name} --[{edge_name}]--> (Entidade)") else: - relationships.append(f"- (相关实体) --[{edge_name}]--> {entity.name}") + relationships.append(f"- (Entidade) --[{edge_name}]--> {entity.name}") if relationships: - context_parts.append("### 相关事实和关系\n" + "\n".join(relationships)) + context_parts.append("### Relacionamento\n" + "\n".join(relationships)) - # 3. 添加关联节点的详细信息 if entity.related_nodes: related_info = [] - for node in entity.related_nodes: # 不限制数量 + for node in entity.related_nodes: # node_name = node.get("name", "") node_labels = node.get("labels", []) node_summary = node.get("summary", "") - # 过滤掉默认标签 custom_labels = [l for l in node_labels if l not in ["Entity", "Node"]] label_str = f" ({', '.join(custom_labels)})" if custom_labels else "" @@ -470,28 +454,27 @@ def _build_entity_context(self, entity: EntityNode) -> str: related_info.append(f"- **{node_name}**{label_str}") if related_info: - context_parts.append("### 关联实体信息\n" + "\n".join(related_info)) + context_parts.append("### Entidade\n" + "\n".join(related_info)) - # 4. 使用Zep混合检索获取更丰富的信息 + # 4. ZepBusca zep_results = self._search_zep_for_entity(entity) if zep_results.get("facts"): - # 去重:排除已存在的事实 new_facts = [f for f in zep_results["facts"] if f not in existing_facts] if new_facts: - context_parts.append("### Zep检索到的事实信息\n" + "\n".join(f"- {f}" for f in new_facts[:15])) + context_parts.append("### ZepBusca\n" + "\n".join(f"- {f}" for f in new_facts[:15])) if zep_results.get("node_summaries"): - context_parts.append("### Zep检索到的相关节点\n" + "\n".join(f"- {s}" for s in zep_results["node_summaries"][:10])) + context_parts.append("### ZepBusca\n" + "\n".join(f"- {s}" for s in zep_results["node_summaries"][:10])) return "\n\n".join(context_parts) def _is_individual_entity(self, entity_type: str) -> bool: - """判断是否是个人类型实体""" + """Entidade""" return entity_type.lower() in self.INDIVIDUAL_ENTITY_TYPES def _is_group_entity(self, entity_type: str) -> bool: - """判断是否是群体/机构类型实体""" + """/Entidade""" return entity_type.lower() in self.GROUP_ENTITY_TYPES def _generate_profile_with_llm( @@ -503,11 +486,11 @@ def _generate_profile_with_llm( context: str ) -> Dict[str, Any]: """ - 使用LLM生成非常详细的人设 + LLMGerar - 根据实体类型区分: - - 个人实体:生成具体的人物设定 - - 群体/机构实体:生成代表性账号设定 + Entidade + - EntidadeGerar + - /EntidadeGerar """ is_individual = self._is_individual_entity(entity_type) @@ -521,7 +504,7 @@ def _generate_profile_with_llm( entity_name, entity_type, entity_summary, entity_attributes, context ) - # 尝试多次生成,直到成功或达到最大重试次数 + # Gerar max_attempts = 3 last_error = None @@ -534,34 +517,33 @@ def _generate_profile_with_llm( {"role": "user", "content": prompt} ], response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 - # 不设置max_tokens,让LLM自由发挥 + temperature=0.7 - (attempt * 0.1) # + # max_tokensLLM ) content = response.choices[0].message.content - # 检查是否被截断(finish_reason不是'stop') + # finish_reason'stop' finish_reason = response.choices[0].finish_reason if finish_reason == 'length': - logger.warning(f"LLM输出被截断 (attempt {attempt+1}), 尝试修复...") + logger.warning(f"LLM (attempt {attempt+1}), ...") content = self._fix_truncated_json(content) - # 尝试解析JSON + # JSON try: result = json.loads(content) - # 验证必需字段 if "bio" not in result or not result["bio"]: result["bio"] = entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}" if "persona" not in result or not result["persona"]: - result["persona"] = entity_summary or f"{entity_name}是一个{entity_type}。" + result["persona"] = entity_summary or f"{entity_name}{entity_type}" return result except json.JSONDecodeError as je: - logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(je)[:80]}") + logger.warning(f"JSONFalhou (attempt {attempt+1}): {str(je)[:80]}") - # 尝试修复JSON + # JSON result = self._try_fix_json(content, entity_name, entity_type, entity_summary) if result.get("_fixed"): del result["_fixed"] @@ -570,75 +552,63 @@ def _generate_profile_with_llm( last_error = je except Exception as e: - logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}") + logger.warning(f"LLMFalhou (attempt {attempt+1}): {str(e)[:80]}") last_error = e import time - time.sleep(1 * (attempt + 1)) # 指数退避 + time.sleep(1 * (attempt + 1)) # - logger.warning(f"LLM生成人设失败({max_attempts}次尝试): {last_error}, 使用规则生成") + logger.warning(f"LLMGerarFalhou{max_attempts}: {last_error}, Gerar") return self._generate_profile_rule_based( entity_name, entity_type, entity_summary, entity_attributes ) def _fix_truncated_json(self, content: str) -> str: - """修复被截断的JSON(输出被max_tokens限制截断)""" + """JSONmax_tokens""" import re - # 如果JSON被截断,尝试闭合它 + # JSON content = content.strip() - # 计算未闭合的括号 open_braces = content.count('{') - content.count('}') open_brackets = content.count('[') - content.count(']') - # 检查是否有未闭合的字符串 - # 简单检查:如果最后一个引号后没有逗号或闭合括号,可能是字符串被截断 if content and content[-1] not in '",}]': - # 尝试闭合字符串 content += '"' - # 闭合括号 content += ']' * open_brackets content += '}' * open_braces return content def _try_fix_json(self, content: str, entity_name: str, entity_type: str, entity_summary: str = "") -> Dict[str, Any]: - """尝试修复损坏的JSON""" + """JSON""" import re - # 1. 首先尝试修复被截断的情况 + # 1. Caso content = self._fix_truncated_json(content) - # 2. 尝试提取JSON部分 + # 2. JSON json_match = re.search(r'\{[\s\S]*\}', content) if json_match: json_str = json_match.group() - # 3. 处理字符串中的换行符问题 - # 找到所有字符串值并替换其中的换行符 def fix_string_newlines(match): s = match.group(0) - # 替换字符串内的实际换行符为空格 s = s.replace('\n', ' ').replace('\r', ' ') - # 替换多余空格 s = re.sub(r'\s+', ' ', s) return s - # 匹配JSON字符串值 + # JSON json_str = re.sub(r'"[^"\\]*(?:\\.[^"\\]*)*"', fix_string_newlines, json_str) - # 4. 尝试解析 try: result = json.loads(json_str) result["_fixed"] = True return result except json.JSONDecodeError as e: - # 5. 如果还是失败,尝试更激进的修复 + # 5. Falhou try: - # 移除所有控制字符 json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', json_str) - # 替换所有连续空白 json_str = re.sub(r'\s+', ' ', json_str) result = json.loads(json_str) result["_fixed"] = True @@ -646,32 +616,36 @@ def fix_string_newlines(match): except: pass - # 6. 尝试从内容中提取部分信息 + # 6. Conteúdo bio_match = re.search(r'"bio"\s*:\s*"([^"]*)"', content) - persona_match = re.search(r'"persona"\s*:\s*"([^"]*)', content) # 可能被截断 + persona_match = re.search(r'"persona"\s*:\s*"([^"]*)', content) # bio = bio_match.group(1) if bio_match else (entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}") - persona = persona_match.group(1) if persona_match else (entity_summary or f"{entity_name}是一个{entity_type}。") + persona = persona_match.group(1) if persona_match else (entity_summary or f"{entity_name}{entity_type}") - # 如果提取到了有意义的内容,标记为已修复 + # Conteúdo if bio_match or persona_match: - logger.info(f"从损坏的JSON中提取了部分信息") + logger.info(f"JSON") return { "bio": bio, "persona": persona, "_fixed": True } - # 7. 完全失败,返回基础结构 - logger.warning(f"JSON修复失败,返回基础结构") + # 7. Falhou + logger.warning(f"JSONFalhou") return { "bio": entity_summary[:200] if entity_summary else f"{entity_type}: {entity_name}", - "persona": entity_summary or f"{entity_name}是一个{entity_type}。" + "persona": entity_summary or f"{entity_name}{entity_type}" } def _get_system_prompt(self, is_individual: bool) -> str: - """获取系统提示词""" - base_prompt = "你是社交媒体用户画像生成专家。生成详细、真实的人设用于舆论模拟,最大程度还原已有现实情况。必须返回有效的JSON格式,所有字符串值不能包含未转义的换行符。" + """System prompt para geração de perfis de agentes.""" + base_prompt = """Você é um especialista em geração de personas de usuários de redes sociais para simulação do mercado brasileiro. +REGRA CRÍTICA DE IDIOMA: TODO conteúdo textual (bio, persona, interested_topics) DEVE ser escrito em português do Brasil (PT-BR). +NÃO escreva em chinês, inglês ou qualquer outro idioma. Apenas PT-BR. +Crie personas detalhadas e realistas. Retorne JSON válido. Valores string não devem conter quebras de linha não escapadas. +NUNCA use caracteres chineses em nenhum campo.""" return f"{base_prompt}\n\n{get_language_instruction()}" def _build_individual_persona_prompt( @@ -682,45 +656,43 @@ def _build_individual_persona_prompt( entity_attributes: Dict[str, Any], context: str ) -> str: - """构建个人实体的详细人设提示词""" + """Entidade""" - attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无" - context_str = context[:3000] if context else "无额外上下文" + attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "" + context_str = context[:3000] if context else "" - return f"""为实体生成详细的社交媒体用户人设,最大程度还原已有现实情况。 + return f"""Com base nas informações da entidade abaixo, gere um perfil realista de usuário de rede social. Retorne APENAS um JSON válido. -实体名称: {entity_name} -实体类型: {entity_type} -实体摘要: {entity_summary} -实体属性: {attrs_str} +ENTIDADE: +- Nome: {entity_name} +- Tipo: {entity_type} +- Descrição: {entity_summary} +- Atributos: {attrs_str} -上下文信息: +CONTEXTO ADICIONAL: {context_str} -请生成JSON,包含以下字段: +GERE um JSON com os seguintes campos: -1. bio: 社交媒体简介,200字 -2. persona: 详细人设描述(2000字的纯文本),需包含: - - 基本信息(年龄、职业、教育背景、所在地) - - 人物背景(重要经历、与事件的关联、社会关系) - - 性格特征(MBTI类型、核心性格、情绪表达方式) - - 社交媒体行为(发帖频率、内容偏好、互动风格、语言特点) - - 立场观点(对话题的态度、可能被激怒/感动的内容) - - 独特特征(口头禅、特殊经历、个人爱好) - - 个人记忆(人设的重要部分,要介绍这个个体与事件的关联,以及这个个体在事件中的已有动作与反应) -3. age: 年龄数字(必须是整数) -4. gender: 性别,必须是英文: "male" 或 "female" -5. mbti: MBTI类型(如INTJ、ENFP等) -6. country: 国家(使用中文,如"中国") -7. profession: 职业 -8. interested_topics: 感兴趣话题数组 +1. "bio": Biografia curta (máx 200 caracteres) em português do Brasil +2. "persona": Descrição detalhada (máx 2000 caracteres) em português do Brasil incluindo: + - Personalidade e comportamento + - Relacionamentos e influências + - Tipo MBTI + - Que tipo de conteúdo publica nas redes sociais + - Interesses e motivações +3. "age": Idade numérica (número inteiro) +4. "gender": DEVE ser "male" ou "female" (em inglês) +5. "mbti": Tipo MBTI (ex: "INTJ", "ENFP") +6. "country": País (ex: "Brasil") +7. "profession": Profissão em português +8. "interested_topics": Lista de tópicos de interesse em português -重要: -- 所有字段值必须是字符串或数字,不要使用换行符 -- persona必须是一段连贯的文字描述 -- {get_language_instruction()} (gender字段必须用英文male/female) -- 内容要与实体信息保持一致 -- age必须是有效的整数,gender必须是"male"或"female" +REGRAS OBRIGATÓRIAS: +- TODOS os textos (bio, persona, profession, interested_topics) em PORTUGUÊS DO BRASIL +- gender DEVE ser "male" ou "female" (inglês) +- Retorne APENAS JSON válido, sem texto adicional +- {get_language_instruction()} """ def _build_group_persona_prompt( @@ -731,45 +703,39 @@ def _build_group_persona_prompt( entity_attributes: Dict[str, Any], context: str ) -> str: - """构建群体/机构实体的详细人设提示词""" + """/Entidade""" - attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "无" - context_str = context[:3000] if context else "无额外上下文" + attrs_str = json.dumps(entity_attributes, ensure_ascii=False) if entity_attributes else "" + context_str = context[:3000] if context else "" - return f"""为机构/群体实体生成详细的社交媒体账号设定,最大程度还原已有现实情况。 + return f"""Com base nas informações da entidade/organização abaixo, gere um perfil representativo para simulação em redes sociais. Retorne APENAS um JSON válido. -实体名称: {entity_name} -实体类型: {entity_type} -实体摘要: {entity_summary} -实体属性: {attrs_str} +ENTIDADE/ORGANIZAÇÃO: +- Nome: {entity_name} +- Tipo: {entity_type} +- Descrição: {entity_summary} +- Atributos: {attrs_str} -上下文信息: +CONTEXTO ADICIONAL: {context_str} -请生成JSON,包含以下字段: +GERE um JSON com os seguintes campos: -1. bio: 官方账号简介,200字,专业得体 -2. persona: 详细账号设定描述(2000字的纯文本),需包含: - - 机构基本信息(正式名称、机构性质、成立背景、主要职能) - - 账号定位(账号类型、目标受众、核心功能) - - 发言风格(语言特点、常用表达、禁忌话题) - - 发布内容特点(内容类型、发布频率、活跃时间段) - - 立场态度(对核心话题的官方立场、面对争议的处理方式) - - 特殊说明(代表的群体画像、运营习惯) - - 机构记忆(机构人设的重要部分,要介绍这个机构与事件的关联,以及这个机构在事件中的已有动作与反应) -3. age: 固定填30(机构账号的虚拟年龄) -4. gender: 固定填"other"(机构账号使用other表示非个人) -5. mbti: MBTI类型,用于描述账号风格,如ISTJ代表严谨保守 -6. country: 国家(使用中文,如"中国") -7. profession: 机构职能描述 -8. interested_topics: 关注领域数组 +1. "bio": Biografia institucional curta (máx 200 caracteres) em português do Brasil +2. "persona": Descrição detalhada do perfil público (máx 2000 caracteres) em português do Brasil +3. "age": 30 +4. "gender": "other" +5. "mbti": Tipo MBTI (ex: "ISTJ") +6. "country": "Brasil" +7. "profession": Setor de atuação em português +8. "interested_topics": Lista de tópicos em português -重要: -- 所有字段值必须是字符串或数字,不允许null值 -- persona必须是一段连贯的文字描述,不要使用换行符 -- {get_language_instruction()} (gender字段必须用英文"other") -- age必须是整数30,gender必须是字符串"other" -- 机构账号发言要符合其身份定位""" +REGRAS: +- TODOS os textos em PORTUGUÊS DO BRASIL +- gender = "other" para organizações +- Retorne APENAS JSON válido +- {get_language_instruction()} +""" def _generate_profile_rule_based( self, @@ -778,9 +744,9 @@ def _generate_profile_rule_based( entity_summary: str, entity_attributes: Dict[str, Any] ) -> Dict[str, Any]: - """使用规则生成基础人设""" + """Gerar""" - # 根据实体类型生成不同的人设 + # EntidadeGerar entity_type_lower = entity_type.lower() if entity_type_lower in ["student", "alumni"]: @@ -811,10 +777,10 @@ def _generate_profile_rule_based( return { "bio": f"Official account for {entity_name}. News and updates.", "persona": f"{entity_name} is a media entity that reports news and facilitates public discourse. The account shares timely updates and engages with the audience on current events.", - "age": 30, # 机构虚拟年龄 - "gender": "other", # 机构使用other - "mbti": "ISTJ", # 机构风格:严谨保守 - "country": "中国", + "age": 30, # + "gender": "other", # other + "mbti": "ISTJ", # + "country": "", "profession": "Media", "interested_topics": ["General News", "Current Events", "Public Affairs"], } @@ -823,16 +789,15 @@ def _generate_profile_rule_based( return { "bio": f"Official account of {entity_name}.", "persona": f"{entity_name} is an institutional entity that communicates official positions, announcements, and engages with stakeholders on relevant matters.", - "age": 30, # 机构虚拟年龄 - "gender": "other", # 机构使用other - "mbti": "ISTJ", # 机构风格:严谨保守 - "country": "中国", + "age": 30, # + "gender": "other", # other + "mbti": "ISTJ", # + "country": "", "profession": entity_type, "interested_topics": ["Public Policy", "Community", "Official Announcements"], } else: - # 默认人设 return { "bio": entity_summary[:150] if entity_summary else f"{entity_type}: {entity_name}", "persona": entity_summary or f"{entity_name} is a {entity_type.lower()} participating in social discussions.", @@ -845,7 +810,7 @@ def _generate_profile_rule_based( } def set_graph_id(self, graph_id: str): - """设置图谱ID用于Zep检索""" + """ID do grafoZepBusca""" self.graph_id = graph_id def generate_profiles_from_entities( @@ -859,52 +824,51 @@ def generate_profiles_from_entities( output_platform: str = "reddit" ) -> List[OasisAgentProfile]: """ - 批量从实体生成Agent Profile(支持并行生成) + EntidadeGerarAgent ProfileGerar Args: - entities: 实体列表 - use_llm: 是否使用LLM生成详细人设 - progress_callback: 进度回调函数 (current, total, message) - graph_id: 图谱ID,用于Zep检索获取更丰富上下文 - parallel_count: 并行生成数量,默认5 - realtime_output_path: 实时写入的文件路径(如果提供,每生成一个就写入一次) - output_platform: 输出平台格式 ("reddit" 或 "twitter") + entities: Entidade + use_llm: LLMGerar + progress_callback: Função callback de progresso (current, total, message) + graph_id: ID do grafoZepBusca + parallel_count: Gerar5 + realtime_output_path: Gerar + output_platform: ("reddit" "twitter") Returns: - Agent Profile列表 + Agent Profile """ import concurrent.futures from threading import Lock - # 设置graph_id用于Zep检索 + # graph_idZepBusca if graph_id: self.graph_id = graph_id total = len(entities) - profiles = [None] * total # 预分配列表保持顺序 - completed_count = [0] # 使用列表以便在闭包中修改 + profiles = [None] * total # + completed_count = [0] # lock = Lock() - # 实时写入文件的辅助函数 def save_profiles_realtime(): - """实时保存已生成的 profiles 到文件""" + """Gerar profiles """ if not realtime_output_path: return with lock: - # 过滤出已生成的 profiles + # Gerar profiles existing_profiles = [p for p in profiles if p is not None] if not existing_profiles: return try: if output_platform == "reddit": - # Reddit JSON 格式 + # Reddit JSON profiles_data = [p.to_reddit_format() for p in existing_profiles] with open(realtime_output_path, 'w', encoding='utf-8') as f: json.dump(profiles_data, f, ensure_ascii=False, indent=2) else: - # Twitter CSV 格式 + # Twitter CSV import csv profiles_data = [p.to_twitter_format() for p in existing_profiles] if profiles_data: @@ -914,13 +878,13 @@ def save_profiles_realtime(): writer.writeheader() writer.writerows(profiles_data) except Exception as e: - logger.warning(f"实时保存 profiles 失败: {e}") + logger.warning(f" profiles Falhou: {e}") # Capture locale before spawning thread pool workers current_locale = get_locale() def generate_single_profile(idx: int, entity: EntityNode) -> tuple: - """生成单个profile的工作函数""" + """Gerarprofile""" set_locale(current_locale) entity_type = entity.get_entity_type() or "Entity" @@ -931,14 +895,14 @@ def generate_single_profile(idx: int, entity: EntityNode) -> tuple: use_llm=use_llm ) - # 实时输出生成的人设到控制台和日志 + # Gerar self._print_generated_profile(entity.name, entity_type, profile) return idx, profile, None except Exception as e: - logger.error(f"生成实体 {entity.name} 的人设失败: {str(e)}") - # 创建一个基础profile + logger.error(f"GerarEntidade {entity.name} Falhou: {str(e)}") + # profile fallback_profile = OasisAgentProfile( user_id=idx, user_name=self._generate_username(entity.name), @@ -950,20 +914,18 @@ def generate_single_profile(idx: int, entity: EntityNode) -> tuple: ) return idx, fallback_profile, str(e) - logger.info(f"开始并行生成 {total} 个Agent人设(并行数: {parallel_count})...") + logger.info(f"Gerar {total} Agent: {parallel_count}...") print(f"\n{'='*60}") - print(f"开始生成Agent人设 - 共 {total} 个实体,并行数: {parallel_count}") + print(f"GerarAgent - {total} Entidade: {parallel_count}") print(f"{'='*60}\n") - # 使用线程池并行执行 with concurrent.futures.ThreadPoolExecutor(max_workers=parallel_count) as executor: - # 提交所有任务 future_to_entity = { executor.submit(generate_single_profile, idx, entity): (idx, entity) for idx, entity in enumerate(entities) } - # 收集结果 + # Resultado for future in concurrent.futures.as_completed(future_to_entity): idx, entity = future_to_entity[future] entity_type = entity.get_entity_type() or "Entity" @@ -976,23 +938,22 @@ def generate_single_profile(idx: int, entity: EntityNode) -> tuple: completed_count[0] += 1 current = completed_count[0] - # 实时写入文件 save_profiles_realtime() if progress_callback: progress_callback( current, total, - f"已完成 {current}/{total}: {entity.name}({entity_type})" + f"Concluído {current}/{total}: {entity.name}({entity_type})" ) if error: - logger.warning(f"[{current}/{total}] {entity.name} 使用备用人设: {error}") + logger.warning(f"[{current}/{total}] {entity.name} : {error}") else: - logger.info(f"[{current}/{total}] 成功生成人设: {entity.name} ({entity_type})") + logger.info(f"[{current}/{total}] Gerar: {entity.name} ({entity_type})") except Exception as e: - logger.error(f"处理实体 {entity.name} 时发生异常: {str(e)}") + logger.error(f"Entidade {entity.name} : {str(e)}") with lock: completed_count[0] += 1 profiles[idx] = OasisAgentProfile( @@ -1004,44 +965,43 @@ def generate_single_profile(idx: int, entity: EntityNode) -> tuple: source_entity_uuid=entity.uuid, source_entity_type=entity_type, ) - # 实时写入文件(即使是备用人设) save_profiles_realtime() print(f"\n{'='*60}") - print(f"人设生成完成!共生成 {len([p for p in profiles if p])} 个Agent") + print(f"GerarGerar {len([p for p in profiles if p])} Agent") print(f"{'='*60}\n") return profiles def _print_generated_profile(self, entity_name: str, entity_type: str, profile: OasisAgentProfile): - """实时输出生成的人设到控制台(完整内容,不截断)""" + """GerarConteúdo completo, sem truncar""" separator = "-" * 70 - # 构建完整输出内容(不截断) - topics_str = ', '.join(profile.interested_topics) if profile.interested_topics else '无' + # Conteúdo + topics_str = ', '.join(profile.interested_topics) if profile.interested_topics else '' output_lines = [ f"\n{separator}", t('progress.profileGenerated', name=entity_name, type=entity_type), f"{separator}", - f"用户名: {profile.user_name}", + f": {profile.user_name}", + f"", f"", - f"【简介】", f"{profile.bio}", f"", - f"【详细人设】", + f"", f"{profile.persona}", f"", - f"【基本属性】", - f"年龄: {profile.age} | 性别: {profile.gender} | MBTI: {profile.mbti}", - f"职业: {profile.profession} | 国家: {profile.country}", - f"兴趣话题: {topics_str}", + f"", + f": {profile.age} | : {profile.gender} | MBTI: {profile.mbti}", + f": {profile.profession} | : {profile.country}", + f": {topics_str}", separator ] output = "\n".join(output_lines) - # 只输出到控制台(避免重复,logger不再输出完整内容) + # loggerConteúdo print(output) def save_profiles( @@ -1051,16 +1011,16 @@ def save_profiles( platform: str = "reddit" ): """ - 保存Profile到文件(根据平台选择正确格式) + Profile - OASIS平台格式要求: - - Twitter: CSV格式 - - Reddit: JSON格式 + OASIS + - Twitter: CSV + - Reddit: JSON Args: - profiles: Profile列表 - file_path: 文件路径 - platform: 平台类型 ("reddit" 或 "twitter") + profiles: Profile + file_path: + platform: ("reddit" "twitter") """ if platform == "twitter": self._save_twitter_csv(profiles, file_path) @@ -1069,73 +1029,69 @@ def save_profiles( def _save_twitter_csv(self, profiles: List[OasisAgentProfile], file_path: str): """ - 保存Twitter Profile为CSV格式(符合OASIS官方要求) - - OASIS Twitter要求的CSV字段: - - user_id: 用户ID(根据CSV顺序从0开始) - - name: 用户真实姓名 - - username: 系统中的用户名 - - user_char: 详细人设描述(注入到LLM系统提示中,指导Agent行为) - - description: 简短的公开简介(显示在用户资料页面) - - user_char vs description 区别: - - user_char: 内部使用,LLM系统提示,决定Agent如何思考和行动 - - description: 外部显示,其他用户可见的简介 + Twitter ProfileCSVOASIS + + OASIS TwitterCSV + - user_id: IDCSV0 + - name: + - username: + - user_char: LLMAgent + - description: + + user_char vs description + - user_char: LLMAgent + - description: """ import csv - # 确保文件扩展名是.csv + # .csv if not file_path.endswith('.csv'): file_path = file_path.replace('.json', '.csv') with open(file_path, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) - # 写入OASIS要求的表头 + # OASIS headers = ['user_id', 'name', 'username', 'user_char', 'description'] writer.writerow(headers) - # 写入数据行 for idx, profile in enumerate(profiles): - # user_char: 完整人设(bio + persona),用于LLM系统提示 + # user_char: bio + personaLLM user_char = profile.bio if profile.persona and profile.persona != profile.bio: user_char = f"{profile.bio} {profile.persona}" - # 处理换行符(CSV中用空格替代) user_char = user_char.replace('\n', ' ').replace('\r', ' ') - # description: 简短简介,用于外部显示 + # description: description = profile.bio.replace('\n', ' ').replace('\r', ' ') row = [ - idx, # user_id: 从0开始的顺序ID - profile.name, # name: 真实姓名 - profile.user_name, # username: 用户名 - user_char, # user_char: 完整人设(内部LLM使用) - description # description: 简短简介(外部显示) + idx, # user_id: 0ID + profile.name, # name: + profile.user_name, # username: + user_char, # user_char: LLM + description # description: ] writer.writerow(row) - logger.info(f"已保存 {len(profiles)} 个Twitter Profile到 {file_path} (OASIS CSV格式)") + logger.info(f" {len(profiles)} Twitter Profile {file_path} (OASIS CSV)") def _normalize_gender(self, gender: Optional[str]) -> str: """ - 标准化gender字段为OASIS要求的英文格式 + genderOASIS - OASIS要求: male, female, other + OASIS: male, female, other """ if not gender: return "other" gender_lower = gender.lower().strip() - # 中文映射 gender_map = { - "男": "male", - "女": "female", - "机构": "other", - "其他": "other", - # 英文已有 + "": "male", + "": "female", + "": "other", + "": "other", "male": "male", "female": "female", "other": "other", @@ -1145,41 +1101,39 @@ def _normalize_gender(self, gender: Optional[str]) -> str: def _save_reddit_json(self, profiles: List[OasisAgentProfile], file_path: str): """ - 保存Reddit Profile为JSON格式 - - 使用与 to_reddit_format() 一致的格式,确保 OASIS 能正确读取。 - 必须包含 user_id 字段,这是 OASIS agent_graph.get_agent() 匹配的关键! - - 必需字段: - - user_id: 用户ID(整数,用于匹配 initial_posts 中的 poster_agent_id) - - username: 用户名 - - name: 显示名称 - - bio: 简介 - - persona: 详细人设 - - age: 年龄(整数) - - gender: "male", "female", 或 "other" - - mbti: MBTI类型 - - country: 国家 + Reddit ProfileJSON + + to_reddit_format() OASIS + user_id OASIS agent_graph.get_agent() + + - user_id: ID initial_posts poster_agent_id + - username: + - name: + - bio: + - persona: + - age: + - gender: "male", "female", "other" + - mbti: MBTI + - country: """ data = [] for idx, profile in enumerate(profiles): - # 使用与 to_reddit_format() 一致的格式 + # to_reddit_format() item = { - "user_id": profile.user_id if profile.user_id is not None else idx, # 关键:必须包含 user_id + "user_id": profile.user_id if profile.user_id is not None else idx, # user_id "username": profile.user_name, "name": profile.name, "bio": profile.bio[:150] if profile.bio else f"{profile.name}", "persona": profile.persona or f"{profile.name} is a participant in social discussions.", "karma": profile.karma if profile.karma else 1000, "created_at": profile.created_at, - # OASIS必需字段 - 确保都有默认值 + # OASIS - "age": profile.age if profile.age else 30, "gender": self._normalize_gender(profile.gender), "mbti": profile.mbti if profile.mbti else "ISTJ", - "country": profile.country if profile.country else "中国", + "country": profile.country if profile.country else "", } - # 可选字段 if profile.profession: item["profession"] = profile.profession if profile.interested_topics: @@ -1190,16 +1144,14 @@ def _save_reddit_json(self, profiles: List[OasisAgentProfile], file_path: str): with open(file_path, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2) - logger.info(f"已保存 {len(profiles)} 个Reddit Profile到 {file_path} (JSON格式,包含user_id字段)") + logger.info(f" {len(profiles)} Reddit Profile {file_path} (JSONuser_id)") - # 保留旧方法名作为别名,保持向后兼容 def save_profiles_to_json( self, profiles: List[OasisAgentProfile], file_path: str, platform: str = "reddit" ): - """[已废弃] 请使用 save_profiles() 方法""" - logger.warning("save_profiles_to_json已废弃,请使用save_profiles方法") + """[] save_profiles() """ + logger.warning("save_profiles_to_jsonsave_profiles") self.save_profiles(profiles, file_path, platform) - diff --git a/backend/app/services/ontology_generator.py b/backend/app/services/ontology_generator.py index 01a3d799a5..8fd303cecb 100644 --- a/backend/app/services/ontology_generator.py +++ b/backend/app/services/ontology_generator.py @@ -1,6 +1,6 @@ """ -本体生成服务 -接口1:分析文本内容,生成适合社会模拟的实体和关系类型定义 +Gerar +1AnáliseConteúdoGerarSimulaçãoEntidadeRelacionamento """ import json @@ -9,174 +9,128 @@ from typing import Dict, Any, List, Optional from ..utils.llm_client import LLMClient from ..utils.locale import get_language_instruction +from .ontology_prompts_v2 import detect_sector_and_decision, get_ontology_system_prompt_v3 logger = logging.getLogger(__name__) def _to_pascal_case(name: str) -> str: - """将任意格式的名称转换为 PascalCase(如 'works_for' -> 'WorksFor', 'person' -> 'Person')""" - # 按非字母数字字符分割 + """ PascalCase 'works_for' -> 'WorksFor', 'person' -> 'Person'""" parts = re.split(r'[^a-zA-Z0-9]+', name) - # 再按 camelCase 边界分割(如 'camelCase' -> ['camel', 'Case']) + # camelCase 'camelCase' -> ['camel', 'Case'] words = [] for part in parts: words.extend(re.sub(r'([a-z])([A-Z])', r'\1_\2', part).split('_')) - # 每个词首字母大写,过滤空串 result = ''.join(word.capitalize() for word in words if word) return result if result else 'Unknown' -# 本体生成的系统提示词 -ONTOLOGY_SYSTEM_PROMPT = """你是一个专业的知识图谱本体设计专家。你的任务是分析给定的文本内容和模拟需求,设计适合**社交媒体舆论模拟**的实体类型和关系类型。 +# Gerar +ONTOLOGY_SYSTEM_PROMPT = """\ +Você é um especialista em design de ontologias para grafos de conhecimento. Sua tarefa é analisar o conteúdo textual e os requisitos de simulação fornecidos, e projetar tipos de entidades e relacionamentos adequados para **simulação de opinião pública em redes sociais no mercado brasileiro**. -**重要:你必须输出有效的JSON格式数据,不要输出任何其他内容。** +**IMPORTANTE: Retorne APENAS dados em formato JSON válido. Não retorne nada além do JSON.** -## 核心任务背景 +## Contexto da Tarefa -我们正在构建一个**社交媒体舆论模拟系统**。在这个系统中: -- 每个实体都是一个可以在社交媒体上发声、互动、传播信息的"账号"或"主体" -- 实体之间会相互影响、转发、评论、回应 -- 我们需要模拟舆论事件中各方的反应和信息传播路径 +Estamos construindo um **sistema de simulação de opinião pública em redes sociais**. Neste sistema: +- Cada entidade é uma "conta" ou "agente" que pode publicar, interagir e propagar informações em redes sociais +- Entidades influenciam umas às outras, repostam, comentam e respondem +- Precisamos simular as reações e caminhos de propagação de informação de todas as partes em eventos de opinião pública -因此,**实体必须是现实中真实存在的、可以在社媒上发声和互动的主体**: +Portanto, **entidades devem ser agentes do mundo real que podem publicar e interagir em redes sociais**: -**可以是**: -- 具体的个人(公众人物、当事人、意见领袖、专家学者、普通人) -- 公司、企业(包括其官方账号) -- 组织机构(大学、协会、NGO、工会等) -- 政府部门、监管机构 -- 媒体机构(报纸、电视台、自媒体、网站) -- 社交媒体平台本身 -- 特定群体代表(如校友会、粉丝团、维权群体等) +**Permitido**: +- Indivíduos específicos (figuras públicas, formadores de opinião, especialistas, pessoas comuns) +- Empresas e negócios (incluindo suas contas oficiais) +- Organizações (universidades, associações, ONGs, sindicatos, etc.) +- Órgãos governamentais e agências reguladoras +- Organizações de mídia (jornais, TVs, influenciadores, sites, etc.) +- Plataformas de redes sociais +- Representantes de grupos específicos (associações, fã-clubes, grupos de advocacy, etc.) -**不可以是**: -- 抽象概念(如"舆论"、"情绪"、"趋势") -- 主题/话题(如"学术诚信"、"教育改革") -- 观点/态度(如"支持方"、"反对方") +**NÃO Permitido**: +- Conceitos abstratos (como "opinião pública", "emoção", "tendência") +- Tópicos/temas (como "integridade acadêmica", "reforma educacional") +- Pontos de vista/atitudes (como "apoiadores", "oponentes") -## 输出格式 +## Formato de Saída -请输出JSON格式,包含以下结构: +Retorne JSON com a seguinte estrutura: ```json { "entity_types": [ { - "name": "实体类型名称(英文,PascalCase)", - "description": "简短描述(英文,不超过100字符)", + "name": "NomeTipoEntidade (Português do Brasil, PascalCase. Ex: ConsumidorJovem, LojaLocal, InfluenciadorDigital)", + "description": "Descrição breve em PORTUGUÊS DO BRASIL (máx 100 caracteres)", "attributes": [ { - "name": "属性名(英文,snake_case)", + "name": "nome_atributo (português, snake_case. Ex: faixa_etaria, poder_aquisitivo)", "type": "text", - "description": "属性描述" + "description": "Descrição do atributo em português" } ], - "examples": ["示例实体1", "示例实体2"] + "examples": ["Exemplo 1 em português", "Exemplo 2 em português"] } ], "edge_types": [ { - "name": "关系类型名称(英文,UPPER_SNAKE_CASE)", - "description": "简短描述(英文,不超过100字符)", + "name": "NOME_RELACIONAMENTO (Português, UPPER_SNAKE_CASE. Ex: COMPETE_COM, INFLUENCIA, REGULA)", + "description": "Descrição breve em PORTUGUÊS DO BRASIL (máx 100 caracteres)", "source_targets": [ - {"source": "源实体类型", "target": "目标实体类型"} + {"source": "TipoOrigem", "target": "TipoDestino"} ], "attributes": [] } ], - "analysis_summary": "对文本内容的简要分析说明" + "analysis_summary": "Análise breve do conteúdo em PORTUGUÊS DO BRASIL" } ``` -## 设计指南(极其重要!) +## Diretrizes de Design -### 1. 实体类型设计 - 必须严格遵守 +### 1. Tipos de Entidades — Seguir Rigorosamente -**数量要求:必须正好10个实体类型** +**Quantidade: exatamente 10 tipos de entidades** -**层次结构要求(必须同时包含具体类型和兜底类型)**: +**Hierarquia (obrigatória)**: -你的10个实体类型必须包含以下层次: +A. **Tipos de fallback (obrigatórios, últimos 2 da lista)**: + - `Person`: Qualquer pessoa física sem tipo específico + - `Organization`: Qualquer organização sem tipo específico -A. **兜底类型(必须包含,放在列表最后2个)**: - - `Person`: 任何自然人个体的兜底类型。当一个人不属于其他更具体的人物类型时,归入此类。 - - `Organization`: 任何组织机构的兜底类型。当一个组织不属于其他更具体的组织类型时,归入此类。 +B. **Tipos específicos (8, baseados no conteúdo)**: + - Projete tipos para os papéis principais do texto + - Cada tipo deve ter limites claros sem sobreposição -B. **具体类型(8个,根据文本内容设计)**: - - 针对文本中出现的主要角色,设计更具体的类型 - - 例如:如果文本涉及学术事件,可以有 `Student`, `Professor`, `University` - - 例如:如果文本涉及商业事件,可以有 `Company`, `CEO`, `Employee` +### 2. Tipos de Relacionamento +- Quantidade: 6-10 +- Devem refletir conexões reais em redes sociais -**为什么需要兜底类型**: -- 文本中会出现各种人物,如"中小学教师"、"路人甲"、"某位网友" -- 如果没有专门的类型匹配,他们应该被归入 `Person` -- 同理,小型组织、临时团体等应该归入 `Organization` +### 3. Atributos +- 1-3 atributos-chave por tipo +- Nomes proibidos: `name`, `uuid`, `group_id`, `created_at`, `summary` +- Recomendado: `full_name`, `title`, `role`, `position`, `location`, `description` -**具体类型的设计原则**: -- 从文本中识别出高频出现或关键的角色类型 -- 每个具体类型应该有明确的边界,避免重叠 -- description 必须清晰说明这个类型和兜底类型的区别 +## Referência de Tipos -### 2. 关系类型设计 +**Pessoa**: Student, Professor, Journalist, Celebrity, Executive, Official, Lawyer, Doctor, Person (fallback) +**Organização**: University, Company, GovernmentAgency, MediaOutlet, Hospital, School, NGO, Organization (fallback) +**Relacionamentos**: WORKS_FOR, STUDIES_AT, AFFILIATED_WITH, REPRESENTS, REGULATES, REPORTS_ON, COMMENTS_ON, RESPONDS_TO, SUPPORTS, OPPOSES, COLLABORATES_WITH, COMPETES_WITH -- 数量:6-10个 -- 关系应该反映社媒互动中的真实联系 -- 确保关系的 source_targets 涵盖你定义的实体类型 - -### 3. 属性设计 - -- 每个实体类型1-3个关键属性 -- **注意**:属性名不能使用 `name`、`uuid`、`group_id`、`created_at`、`summary`(这些是系统保留字) -- 推荐使用:`full_name`, `title`, `role`, `position`, `location`, `description` 等 - -## 实体类型参考 - -**个人类(具体)**: -- Student: 学生 -- Professor: 教授/学者 -- Journalist: 记者 -- Celebrity: 明星/网红 -- Executive: 高管 -- Official: 政府官员 -- Lawyer: 律师 -- Doctor: 医生 - -**个人类(兜底)**: -- Person: 任何自然人(不属于上述具体类型时使用) - -**组织类(具体)**: -- University: 高校 -- Company: 公司企业 -- GovernmentAgency: 政府机构 -- MediaOutlet: 媒体机构 -- Hospital: 医院 -- School: 中小学 -- NGO: 非政府组织 - -**组织类(兜底)**: -- Organization: 任何组织机构(不属于上述具体类型时使用) - -## 关系类型参考 - -- WORKS_FOR: 工作于 -- STUDIES_AT: 就读于 -- AFFILIATED_WITH: 隶属于 -- REPRESENTS: 代表 -- REGULATES: 监管 -- REPORTS_ON: 报道 -- COMMENTS_ON: 评论 -- RESPONDS_TO: 回应 -- SUPPORTS: 支持 -- OPPOSES: 反对 -- COLLABORATES_WITH: 合作 -- COMPETES_WITH: 竞争 +⚠️ REGRA ABSOLUTA DE IDIOMA ⚠️ +- Nomes de tipos: INGLÊS (PascalCase / UPPER_SNAKE_CASE / snake_case) +- Descrições, examples, analysis_summary: PORTUGUÊS DO BRASIL +- ZERO caracteres chineses permitidos em qualquer campo +- Texto fonte em chinês ou inglês → TRADUZA para português """ class OntologyGenerator: """ - 本体生成器 - 分析文本内容,生成实体和关系类型定义 + Gerar + AnáliseConteúdoGerarEntidadeRelacionamento """ def __init__(self, llm_client: Optional[LLMClient] = None): @@ -189,17 +143,16 @@ def generate( additional_context: Optional[str] = None ) -> Dict[str, Any]: """ - 生成本体定义 + Gerar Args: - document_texts: 文档文本列表 - simulation_requirement: 模拟需求描述 - additional_context: 额外上下文 + document_texts: + simulation_requirement: Descrição dos requisitos da simulação + additional_context: Returns: - 本体定义(entity_types, edge_types等) + entity_types, edge_types """ - # 构建用户消息 user_message = self._build_user_message( document_texts, simulation_requirement, @@ -207,25 +160,29 @@ def generate( ) lang_instruction = get_language_instruction() - system_prompt = f"{ONTOLOGY_SYSTEM_PROMPT}\n\n{lang_instruction}\nIMPORTANT: Entity type names MUST be in English PascalCase (e.g., 'PersonEntity', 'MediaOrganization'). Relationship type names MUST be in English UPPER_SNAKE_CASE (e.g., 'WORKS_FOR'). Attribute names MUST be in English snake_case. Only description fields and analysis_summary should use the specified language above." + + # AUGUR v2: detecção bidimensional setor + tipo de decisão + sector, decision = detect_sector_and_decision(simulation_requirement) + logger.info(f"AUGUR v2 ontology: setor={sector}, decisao={decision}") + + system_prompt = f"{get_ontology_system_prompt_v3(sector, decision)}\n\n{lang_instruction}" messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message} ] - # 调用LLM result = self.llm_client.chat_json( messages=messages, temperature=0.3, max_tokens=4096 ) - # 验证和后处理 result = self._validate_and_process(result) + result["_augur_meta"] = {"setor": sector, "tipo_decisao": decision} return result - # 传给 LLM 的文本最大长度(5万字) + # LLM 5 MAX_TEXT_LENGTH_FOR_LLM = 50000 def _build_user_message( @@ -234,50 +191,48 @@ def _build_user_message( simulation_requirement: str, additional_context: Optional[str] ) -> str: - """构建用户消息""" + """""" - # 合并文本 combined_text = "\n\n---\n\n".join(document_texts) original_length = len(combined_text) - # 如果文本超过5万字,截断(仅影响传给LLM的内容,不影响图谱构建) + # 5LLMConteúdoGrafo if len(combined_text) > self.MAX_TEXT_LENGTH_FOR_LLM: combined_text = combined_text[:self.MAX_TEXT_LENGTH_FOR_LLM] - combined_text += f"\n\n...(原文共{original_length}字,已截取前{self.MAX_TEXT_LENGTH_FOR_LLM}字用于本体分析)..." + combined_text += f"\n\n...(original text: {original_length} chars, truncated to first {self.MAX_TEXT_LENGTH_FOR_LLM} chars for ontology analysis)..." - message = f"""## 模拟需求 + message = f"""## Simulation Requirement {simulation_requirement} -## 文档内容 +## Document Content {combined_text} """ if additional_context: message += f""" -## 额外说明 +## Additional Notes {additional_context} """ message += """ -请根据以上内容,设计适合社会舆论模拟的实体类型和关系类型。 - -**必须遵守的规则**: -1. 必须正好输出10个实体类型 -2. 最后2个必须是兜底类型:Person(个人兜底)和 Organization(组织兜底) -3. 前8个是根据文本内容设计的具体类型 -4. 所有实体类型必须是现实中可以发声的主体,不能是抽象概念 -5. 属性名不能使用 name、uuid、group_id 等保留字,用 full_name、org_name 等替代 +Based on the above content, design entity types and relationship types suitable for social media public opinion simulation. + +**Mandatory rules**: +1. Must output exactly 10 entity types +2. The last 2 must be fallback types: Person (individual fallback) and Organization (organization fallback) +3. The first 8 are specific types designed based on text content +4. All entity types must be real-world agents that can post on social media, NOT abstract concepts +5. Attribute names cannot use name, uuid, group_id, etc. (reserved words) — use full_name, org_name, etc. instead """ return message def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: - """验证和后处理结果""" + """Resultado""" - # 确保必要字段存在 if "entity_types" not in result: result["entity_types"] = [] if "edge_types" not in result: @@ -285,11 +240,11 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: if "analysis_summary" not in result: result["analysis_summary"] = "" - # 验证实体类型 - # 记录原始名称到 PascalCase 的映射,用于后续修正 edge 的 source_targets 引用 + # Entidade + # PascalCase edge source_targets entity_name_map = {} for entity in result["entity_types"]: - # 强制将 entity name 转为 PascalCase(Zep API 要求) + # entity name PascalCaseZep API if "name" in entity: original_name = entity["name"] entity["name"] = _to_pascal_case(original_name) @@ -300,19 +255,19 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: entity["attributes"] = [] if "examples" not in entity: entity["examples"] = [] - # 确保description不超过100字符 + # description100 if len(entity.get("description", "")) > 100: entity["description"] = entity["description"][:97] + "..." - # 验证关系类型 + # Relacionamento for edge in result["edge_types"]: - # 强制将 edge name 转为 SCREAMING_SNAKE_CASE(Zep API 要求) + # edge name SCREAMING_SNAKE_CASEZep API if "name" in edge: original_name = edge["name"] edge["name"] = original_name.upper() if edge["name"] != original_name: logger.warning(f"Edge type name '{original_name}' auto-converted to '{edge['name']}'") - # 修正 source_targets 中的实体名称引用,与转换后的 PascalCase 保持一致 + # source_targets Entidade PascalCase for st in edge.get("source_targets", []): if st.get("source") in entity_name_map: st["source"] = entity_name_map[st["source"]] @@ -325,11 +280,11 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: if len(edge.get("description", "")) > 100: edge["description"] = edge["description"][:97] + "..." - # Zep API 限制:最多 10 个自定义实体类型,最多 10 个自定义边类型 + # Zep API 10 Entidade 10 MAX_ENTITY_TYPES = 10 MAX_EDGE_TYPES = 10 - # 去重:按 name 去重,保留首次出现的 + # name seen_names = set() deduped = [] for entity in result["entity_types"]: @@ -341,7 +296,6 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: logger.warning(f"Duplicate entity type '{name}' removed during validation") result["entity_types"] = deduped - # 兜底类型定义 person_fallback = { "name": "Person", "description": "Any individual person not fitting other specific person types.", @@ -362,12 +316,10 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: "examples": ["small business", "community group"] } - # 检查是否已有兜底类型 entity_names = {e["name"] for e in result["entity_types"]} has_person = "Person" in entity_names has_organization = "Organization" in entity_names - # 需要添加的兜底类型 fallbacks_to_add = [] if not has_person: fallbacks_to_add.append(person_fallback) @@ -378,17 +330,12 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: current_count = len(result["entity_types"]) needed_slots = len(fallbacks_to_add) - # 如果添加后会超过 10 个,需要移除一些现有类型 if current_count + needed_slots > MAX_ENTITY_TYPES: - # 计算需要移除多少个 to_remove = current_count + needed_slots - MAX_ENTITY_TYPES - # 从末尾移除(保留前面更重要的具体类型) result["entity_types"] = result["entity_types"][:-to_remove] - # 添加兜底类型 result["entity_types"].extend(fallbacks_to_add) - # 最终确保不超过限制(防御性编程) if len(result["entity_types"]) > MAX_ENTITY_TYPES: result["entity_types"] = result["entity_types"][:MAX_ENTITY_TYPES] @@ -399,29 +346,29 @@ def _validate_and_process(self, result: Dict[str, Any]) -> Dict[str, Any]: def generate_python_code(self, ontology: Dict[str, Any]) -> str: """ - 将本体定义转换为Python代码(类似ontology.py) + Pythonontology.py Args: - ontology: 本体定义 + ontology: Returns: - Python代码字符串 + Python """ code_lines = [ '"""', - '自定义实体类型定义', - '由MiroFish自动生成,用于社会舆论模拟', + 'Entidade', + 'MiroFishGerarSimulação', '"""', '', 'from pydantic import Field', 'from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel', '', '', - '# ============== 实体类型定义 ==============', + '# ============== Entidade ==============', '', ] - # 生成实体类型 + # GerarEntidade for entity in ontology.get("entity_types", []): name = entity["name"] desc = entity.get("description", f"A {name} entity.") @@ -444,13 +391,13 @@ def generate_python_code(self, ontology: Dict[str, Any]) -> str: code_lines.append('') code_lines.append('') - code_lines.append('# ============== 关系类型定义 ==============') + code_lines.append('# ============== Relacionamento ==============') code_lines.append('') - # 生成关系类型 + # GerarRelacionamento for edge in ontology.get("edge_types", []): name = edge["name"] - # 转换为PascalCase类名 + # PascalCase class_name = ''.join(word.capitalize() for word in name.split('_')) desc = edge.get("description", f"A {name} relationship.") @@ -472,8 +419,8 @@ def generate_python_code(self, ontology: Dict[str, Any]) -> str: code_lines.append('') code_lines.append('') - # 生成类型字典 - code_lines.append('# ============== 类型配置 ==============') + # Gerar + code_lines.append('# ============== Configuração ==============') code_lines.append('') code_lines.append('ENTITY_TYPES = {') for entity in ontology.get("entity_types", []): @@ -489,7 +436,7 @@ def generate_python_code(self, ontology: Dict[str, Any]) -> str: code_lines.append('}') code_lines.append('') - # 生成边的source_targets映射 + # Gerarsource_targets code_lines.append('EDGE_SOURCE_TARGETS = {') for edge in ontology.get("edge_types", []): name = edge["name"] @@ -503,4 +450,3 @@ def generate_python_code(self, ontology: Dict[str, Any]) -> str: code_lines.append('}') return '\n'.join(code_lines) - diff --git a/backend/app/services/ontology_prompts_v2.py b/backend/app/services/ontology_prompts_v2.py new file mode 100644 index 0000000000..86431eb7ff --- /dev/null +++ b/backend/app/services/ontology_prompts_v2.py @@ -0,0 +1,453 @@ +""" +AUGUR Ontology Prompts v3 — Sistema bidimensional: SETOR × TIPO DE DECISÃO. + +Evolução da v2: em vez de só detectar o nicho da indústria, +agora detecta DUAS dimensões: + 1. SETOR: qual indústria/mercado + 2. TIPO DE DECISÃO: o que o cliente quer fazer + +Cada combinação gera entidades e relações diferentes. + +Caminho no repo: backend/app/services/ontology_prompts_v3.py +""" + +import json +import unicodedata +from typing import Tuple + +# ============================================================ +# EIXO 1 — SETORES +# ============================================================ + +SETORES = { + "varejo_local": { + "nome": "Varejo local / comércio físico", + "keywords": ["loja", "calcado", "roupa", "varejo", "comercio", "sapato", "tenis", + "moda", "boutique", "ponto comercial", "shopping", "centro", + "eletronico", "movel", "decoracao", "brinquedo", "papelaria", + "otica", "joalheria", "bolsa", "acessorio", "perfumaria", + "moveis", "colchao", "eletrodomestico", "cama", "mesa", + "farmacia", "drogaria", "pet shop", "livraria", "floricultura", + "importadora", "revenda", "concessionaria", "automovel", "carro", + "moto", "bicicleta", "coworking", "cemiterio"], + }, + "saas_b2b": { + "nome": "SaaS / software / plataforma digital", + "keywords": ["saas", "software", "plataforma", "sistema", "erp", "crm", + "b2b", "api", "cloud", "assinatura", "startup", "automacao", + "inteligencia artificial", "dashboard", "converzas", "chatbot", + "gestao", "gerenciamento", "ferramenta", "solucao digital", + "onboarding", "login", "usuario", "mensalidade"], + }, + "industria_fmcg": { + "nome": "Indústria / bens de consumo / FMCG", + "keywords": ["fabrica", "industria", "papel", "higienico", "fabricante", + "producao", "manufatura", "cpg", "fmcg", "embalagem", + "distribuidora", "atacado", "companhia", "copapa", + "alimento industrializado", "bebida", "cerveja", "refrigerante", + "medicamento", "farmaceutica", "laboratorio", "generico", + "remedio", "principio ativo", "anvisa", "cosmetico", + "quimico", "siderurgica", "mineracao", "celulose"], + }, + "telecom_isp": { + "nome": "Telecomunicações / internet / ISP", + "keywords": ["internet", "provedor", "fibra", "banda larga", "isp", + "telecom", "telecomunicacao", "wifi", "rede", "conectividade", + "milla", "dados moveis", "5g", "antena", "torre"], + }, + "energia_tech": { + "nome": "Energia / solar / EV / tecnologia limpa", + "keywords": ["solar", "energia", "fotovoltaico", "carregador", "eletrico", + "veiculo eletrico", "bateria", "sustentavel", "evo", + "eletroposto", "estacao de carga", "painel solar"], + }, + "alimentacao": { + "nome": "Alimentação / restaurante / delivery / food", + "keywords": ["restaurante", "lanchonete", "padaria", "food", "delivery", + "comida", "cafe", "bar", "pizza", "hamburguer", "acai", + "confeitaria", "marmita", "ifood", "rappi", "food truck", + "menu certo", "refeicao", "cardapio"], + }, + "marketplace_app": { + "nome": "App / marketplace / plataforma de intermediação", + "keywords": ["app", "aplicativo", "marketplace", "plataforma", "uber", + "99", "delivery", "intermediacao", "dois lados", "comissao", + "usuario", "motorista", "entregador", "assinante"], + }, + "servicos": { + "nome": "Serviços profissionais / consultoria / saúde", + "keywords": ["consultoria", "agencia", "clinica", "escritorio", "academia", + "salao", "personal", "advocacia", "contabilidade", "marketing", + "design", "arquitetura", "curso", "mentoria", "terapia", + "evento", "produtora", "casamento", "festa", "buffet", + "escola", "ensino", "treinamento", "coaching", "veterinaria", + "construtora", "imobiliaria", "turismo", "viagem"], + }, + "franquia": { + "nome": "Franquia / rede / expansão de marca", + "keywords": ["franquia", "franqueado", "franqueador", "rede", "unidade", + "padronizacao", "royalties", "taxa de franquia", "manual"], + }, +} + + +# ============================================================ +# EIXO 2 — TIPOS DE DECISÃO +# ============================================================ + +TIPOS_DECISAO = { + "novo_negocio": { + "nome": "Abrir novo negócio / empresa / loja", + "keywords": ["abrir", "abertura", "novo", "nova", "comecar", "iniciar", + "empreender", "montar", "criar", "inaugurar", "fundar"], + "entidades_extras": [ + {"papel": "Entrante", "descricao": "O novo negócio proposto"}, + {"papel": "Incumbente", "descricao": "Concorrente(s) já estabelecido(s)"}, + {"papel": "ConsumidorAlvo", "descricao": "Público-alvo primário"}, + ], + "perguntas_chave": [ + "Qual o investimento total necessário?", + "Quando atinge o break-even?", + "Quem são os concorrentes e qual sua vantagem?", + "Qual o tamanho do mercado local?", + ], + }, + "novo_produto": { + "nome": "Lançar novo produto / linha / SKU", + "keywords": ["lancar", "lancamento", "novo produto", "nova linha", + "sku", "produto", "inovacao", "extensao de linha", + "versao", "modelo", "variante"], + "entidades_extras": [ + {"papel": "ProdutoNovo", "descricao": "O produto/linha sendo lançado"}, + {"papel": "ProdutoExistente", "descricao": "Produtos atuais da empresa"}, + {"papel": "ConcorrenteDireto", "descricao": "Quem já vende produto similar"}, + {"papel": "CanalDistribuicao", "descricao": "Como o produto chega ao consumidor"}, + ], + "perguntas_chave": [ + "O mercado precisa desse produto?", + "Canibaliza produtos existentes?", + "Qual o preço ideal vs concorrência?", + "Quais canais de distribuição usar?", + ], + }, + "promocao_campanha": { + "nome": "Promoção / campanha / ação de marketing", + "keywords": ["promocao", "campanha", "desconto", "sorteio", "dar", + "ganhar", "premiar", "copa", "black friday", "natal", + "dia das maes", "acao", "marketing", "engajamento", + "fidelizacao", "cashback", "cupom", "brinde", "tv", + "volta as aulas", "dia dos pais", "dia dos namorados", + "pascoa", "carnaval", "dia das criancas", "liquidacao", + "saldao", "aniversario", "inauguracao"], + "entidades_extras": [ + {"papel": "BaseAtual", "descricao": "Clientes/usuários atuais da empresa"}, + {"papel": "PublicoNovo", "descricao": "Público que a promoção quer atrair"}, + {"papel": "ConcorrenteReacao", "descricao": "Como concorrentes vão reagir"}, + {"papel": "CustoPromocao", "descricao": "Investimento total na ação"}, + {"papel": "MidiaCanal", "descricao": "Canais de divulgação da promoção"}, + ], + "perguntas_chave": [ + "Qual o ROI esperado da promoção?", + "Quantos clientes novos atrai vs custo?", + "O concorrente vai contra-atacar?", + "A mecânica é clara e atrativa?", + "Gera retenção ou só pico temporário?", + ], + }, + "expansao_geografica": { + "nome": "Expandir para nova cidade / região / filial", + "keywords": ["filial", "expandir", "expansao", "nova cidade", "abrir em", + "regional", "nova unidade", "ponto", "sucursal", "regiao", + "expandindo", "ampliar", "novas cidades", "segunda unidade", + "terceira unidade", "interior", "capital", "outra cidade"], + "entidades_extras": [ + {"papel": "MatrizOrigem", "descricao": "A empresa/loja original que já opera"}, + {"papel": "MercadoDestino", "descricao": "A nova cidade/região alvo"}, + {"papel": "ConcorrenteLocal", "descricao": "Quem já opera no destino"}, + {"papel": "ConsumidorLocal", "descricao": "Perfil do público na nova região"}, + ], + "perguntas_chave": [ + "O modelo que funciona na origem replica no destino?", + "Qual o tamanho do mercado local?", + "Quem já domina essa praça?", + "Qual o custo de operação remota?", + ], + }, + "expansao_franquia": { + "nome": "Abrir franquia / unidade franqueada", + "keywords": ["franquia", "franqueado", "franquear", "rede", "unidade", + "modelo de franquia", "royalties"], + "entidades_extras": [ + {"papel": "Franqueador", "descricao": "A marca que vende a franquia"}, + {"papel": "Franqueado", "descricao": "Quem compra e opera a unidade"}, + {"papel": "MercadoLocal", "descricao": "A cidade/bairro onde vai operar"}, + {"papel": "ConcorrenteLocal", "descricao": "Quem já compete nessa praça"}, + ], + "perguntas_chave": [ + "A marca tem força suficiente na região?", + "O modelo financeiro da franquia fecha?", + "Qual o suporte do franqueador?", + "A praça comporta mais uma unidade?", + ], + }, + "precificacao": { + "nome": "Mudança de preço / estratégia de pricing", + "keywords": ["preco", "precificacao", "pricing", "aumentar preco", + "desconto permanente", "tabela", "margem", "reajuste"], + "entidades_extras": [ + {"papel": "BaseClientes", "descricao": "Clientes atuais que serão afetados"}, + {"papel": "ConcorrentePreco", "descricao": "Referência de preço do mercado"}, + {"papel": "Elasticidade", "descricao": "Sensibilidade do cliente a preço"}, + ], + "perguntas_chave": [ + "Quantos clientes perco com esse aumento?", + "O concorrente está mais caro ou mais barato?", + "A percepção de valor justifica o preço?", + ], + }, +} + + +# ============================================================ +# DETECÇÃO BIDIMENSIONAL +# ============================================================ + +def detect_sector_and_decision(simulation_requirement: str) -> Tuple[str, str]: + """ + Detecta SETOR e TIPO DE DECISÃO a partir do texto. + + Returns: + (setor_key, decisao_key) + + Exemplos: + "abrir loja de calçados em Pádua" → ("varejo_local", "novo_negocio") + "Menu Certo promoção TV Copa" → ("marketplace_app", "promocao_campanha") + "Copapa lançar papel higiênico" → ("industria_fmcg", "novo_produto") + "Milla Internet filial São Fidélis" → ("telecom_isp", "expansao_geografica") + """ + text = simulation_requirement.lower() + # Remover acentos para matching (promoção → promocao, farmácias → farmacias) + text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('ascii') + + # Detectar setor + sector_scores = {} + for key, template in SETORES.items(): + score = 0 + for kw in template["keywords"]: + kw_clean = unicodedata.normalize('NFKD', kw).encode('ascii', 'ignore').decode('ascii') + if kw_clean in text: + score += 2 + if len(kw_clean) > 5: score += 3 + sector_scores[key] = score + + best_sector = max(sector_scores, key=sector_scores.get) + if sector_scores[best_sector] == 0: + best_sector = "varejo_local" # fallback + + # Detectar tipo de decisão + decision_scores = {} + for key, template in TIPOS_DECISAO.items(): + score = 0 + for kw in template["keywords"]: + kw_clean = unicodedata.normalize('NFKD', kw).encode('ascii', 'ignore').decode('ascii') + if kw_clean in text: + score += 2 + if len(kw_clean) > 5: score += 3 + decision_scores[key] = score + + best_decision = max(decision_scores, key=decision_scores.get) + if decision_scores[best_decision] == 0: + best_decision = "novo_negocio" # fallback + + # Ajustes de contexto cruzado + # Se tem "franquia" no setor, o tipo provavelmente é expansão + if best_sector == "franquia" and best_decision == "novo_negocio": + best_decision = "expansao_franquia" + sector_scores.pop("franquia") + best_sector = max(sector_scores, key=sector_scores.get) if sector_scores else "varejo_local" + + # Se detectou "delivery" ou "app" no alimentação, pode ser marketplace + if best_sector == "alimentacao" and any(kw in text for kw in ["app", "delivery", "aplicativo"]): + if sector_scores.get("marketplace_app", 0) > 0: + best_sector = "marketplace_app" + + return best_sector, best_decision + + +def get_ontology_system_prompt_v3(sector: str, decision: str) -> str: + """ + Gera prompt de ontologia combinando SETOR + TIPO DE DECISÃO. + + Cada combinação gera entidades diferentes: + - varejo_local + novo_negocio → Incumbente, Entrante, ConsumidorFiel... + - marketplace_app + promocao_campanha → BaseAtual, PublicoNovo, CustoPromocao... + - industria_fmcg + novo_produto → ProdutoNovo, CanalDistribuicao, ConcorrenteDireto... + """ + setor_info = SETORES.get(sector, SETORES["varejo_local"]) + decisao_info = TIPOS_DECISAO.get(decision, TIPOS_DECISAO["novo_negocio"]) + + # Montar entidades da decisão + entities_text = "" + for i, ent in enumerate(decisao_info["entidades_extras"], 1): + entities_text += f" {i}. **{ent['papel']}**: {ent['descricao']}\n" + + # Montar perguntas-chave + perguntas_text = "\n".join(f" - {p}" for p in decisao_info["perguntas_chave"]) + + return f"""\ +Você é especialista em design de ontologias para grafos de conhecimento. +Projete entidades e relacionamentos para simulação de opinião pública. + +══════════════════════════════════════════════════════════════ +【CONTEXTO BIDIMENSIONAL】 +══════════════════════════════════════════════════════════════ + +**SETOR:** {setor_info['nome']} +**TIPO DE DECISÃO:** {decisao_info['nome']} + +Esta combinação determina QUAIS ENTIDADES são relevantes. +Não invente entidades genéricas — foque nas que importam para ESTA decisão. + +══════════════════════════════════════════════════════════════ +【ENTIDADES OBRIGATÓRIAS PARA ESTE TIPO DE DECISÃO】 +══════════════════════════════════════════════════════════════ + +{entities_text} + +Adapte os nomes e atributos ao SETOR específico ({setor_info['nome']}). +Adicione 4-6 entidades extras relevantes para este setor. +Os 2 últimos DEVEM ser fallback: Person e Organization. +Total: EXATAMENTE 10 entity_types. + +══════════════════════════════════════════════════════════════ +【PERGUNTAS QUE O RELATÓRIO DEVE RESPONDER】 +══════════════════════════════════════════════════════════════ + +As entidades e relações devem gerar dados suficientes para responder: + +{perguntas_text} + +Projete entidades que PRODUZAM esses dados durante a simulação. + +══════════════════════════════════════════════════════════════ +【FORMATO DE SAÍDA — JSON】 +══════════════════════════════════════════════════════════════ + +{{ + "setor_detectado": "{sector}", + "tipo_decisao": "{decision}", + "entity_types": [ + {{ + "name": "PascalCase em inglês", + "papel": "papel funcional (Entrante, Incumbente, etc.)", + "description": "PT-BR (max 100 chars)", + "attributes": [{{"name": "snake_case", "type": "text", "description": "PT-BR"}}], + "examples": ["Exemplo em PT-BR"] + }} + ], + "edge_types": [ + {{ + "name": "UPPER_SNAKE_CASE", + "description": "PT-BR", + "source_targets": [{{"source": "Tipo", "target": "Tipo"}}] + }} + ], + "analysis_summary": "PT-BR", + "perguntas_chave": {json.dumps(decisao_info['perguntas_chave'], ensure_ascii=False)} +}} + +══════════════════════════════════════════════════════════════ +【REGRAS】 +══════════════════════════════════════════════════════════════ + +1. EXATAMENTE 10 entity_types (8 específicos + Person + Organization) +2. Cada entidade DEVE ter "papel" funcional +3. Nomes técnicos: INGLÊS (PascalCase / UPPER_SNAKE_CASE / snake_case) +4. Descrições: PORTUGUÊS DO BRASIL +5. ZERO caracteres chineses +6. Entidades = AGENTES reais que podem agir, NÃO conceitos abstratos +7. Atributos proibidos: name, uuid, group_id, created_at, summary +""" + + +# ============================================================ +# TABELA DE EXEMPLOS — Para referência e testes +# ============================================================ + +EXEMPLOS_CENARIOS = [ + { + "descricao": "Menu Certo app de delivery quer fazer promocao de dar uma TV na Copa", + "setor_esperado": "marketplace_app", + "decisao_esperada": "promocao_campanha", + "entidades_chave": ["BaseUsuariosAtual", "PublicoNovo", "ConcorrenteDelivery", + "CustoPromocao", "MidiaCanal", "RestaurantesParceiros"], + }, + { + "descricao": "Copapa companhia de papeis quer lancar novo papel higienico", + "setor_esperado": "industria_fmcg", + "decisao_esperada": "novo_produto", + "entidades_chave": ["ProdutoNovo", "ProdutoExistente", "ConcorrenteFMCG", + "Varejista", "ConsumidorFinal", "CanalDistribuicao"], + }, + { + "descricao": "Evo Solar quer lancar eletro carregadores Evo Mov", + "setor_esperado": "energia_tech", + "decisao_esperada": "novo_produto", + "entidades_chave": ["CarregadorEV", "MercadoVeiculosEletricos", "ConcorrenteTech", + "ConsumidorEV", "ReguladorEnergia", "PontosInstalacao"], + }, + { + "descricao": "Menu Certo quer abrir franquia em Nova Friburgo", + "setor_esperado": "marketplace_app", + "decisao_esperada": "expansao_franquia", + "entidades_chave": ["Franqueador", "MercadoLocal", "ConcorrenteLocal", + "ConsumidorLocal", "RestaurantesRegiao"], + }, + { + "descricao": "Milla Internet quer abrir filial em Sao Fidelis", + "setor_esperado": "telecom_isp", + "decisao_esperada": "expansao_geografica", + "entidades_chave": ["ProvedorOrigem", "MercadoDestino", "ConcorrenteISP", + "ConsumidorLocal", "InfraestruturaFibra"], + }, + { + "descricao": "itcast quer lancar o SaaS Converzas", + "setor_esperado": "saas_b2b", + "decisao_esperada": "novo_produto", + "entidades_chave": ["NovoProdutoSaaS", "ICPDecisor", "ConcorrenteSaaS", + "CanalAquisicao", "Integrador"], + }, + { + "descricao": "Fernando quer abrir loja de moveis em Criciuma", + "setor_esperado": "varejo_local", + "decisao_esperada": "novo_negocio", + "entidades_chave": ["NovaLoja", "Incumbente", "ConsumidorAlvo", + "CanalDigital", "CanalInformal"], + }, +] + + +if __name__ == "__main__": + print("=" * 70) + print("TESTE DE DETECÇÃO BIDIMENSIONAL") + print("=" * 70) + + acertos = 0 + total = len(EXEMPLOS_CENARIOS) + + for ex in EXEMPLOS_CENARIOS: + setor, decisao = detect_sector_and_decision(ex["descricao"]) + ok_setor = setor == ex["setor_esperado"] + ok_decisao = decisao == ex["decisao_esperada"] + ok = ok_setor and ok_decisao + if ok: acertos += 1 + + status = "✓" if ok else "✗" + print(f"\n{status} {ex['descricao'][:60]}") + print(f" Setor: {setor:20} {'✓' if ok_setor else '✗ esperado: ' + ex['setor_esperado']}") + print(f" Decisao: {decisao:20} {'✓' if ok_decisao else '✗ esperado: ' + ex['decisao_esperada']}") + + print(f"\n{'=' * 70}") + print(f"RESULTADO: {acertos}/{total} cenarios corretos ({acertos/total*100:.0f}%)") + print(f"Setores: {len(SETORES)} | Tipos decisao: {len(TIPOS_DECISAO)}") + print(f"Combinacoes possiveis: {len(SETORES) * len(TIPOS_DECISAO)}") diff --git a/backend/app/services/pdf_generator.py b/backend/app/services/pdf_generator.py new file mode 100644 index 0000000000..87dd6e5642 --- /dev/null +++ b/backend/app/services/pdf_generator.py @@ -0,0 +1,948 @@ +""" +AUGUR PDF Generator v2 — Consome AugurReportSchema JSON diretamente. + +Zero regex. Zero parsing de texto livre. +Todos os dados vêm do JSON estruturado produzido pelo report_agent v2. + +Caminho no repo: backend/app/services/pdf_generator_v2.py + +Uso: + from app.services.pdf_generator_v2 import PDFGeneratorV2 + + pdf_bytes = PDFGeneratorV2.generate(report_structured_json) +""" + +import os +import re +import json +import tempfile +import logging + +logger = logging.getLogger(__name__) + +try: + import matplotlib + matplotlib.use('Agg') + import matplotlib.pyplot as plt + import numpy as np + HAS_MPL = True +except ImportError: + HAS_MPL = False + +from fpdf import FPDF + +# ============================================================ +# PALETA +# ============================================================ +TEAL = (0, 229, 195) +PURPLE = (124, 111, 247) +DARK = (26, 26, 46) +GRAY = (107, 114, 128) +LGRAY = (229, 231, 235) +RED = (255, 90, 90) +AMBER = (245, 166, 35) +GREEN = (34, 197, 94) +BLUE = (59, 130, 246) +WHITE = (255, 255, 255) + + +def _c(text): + if not isinstance(text, str): return str(text) + return (text.replace('\u2014','-').replace('\u2013','-') + .replace('\u201c','"').replace('\u201d','"') + .replace('\u2018',"'").replace('\u2019',"'") + .replace('\u2022','-').replace('\u2192','->').replace('\u2026','...')) + + +def _mpl_save(fig) -> str: + path = os.path.join(tempfile.gettempdir(), f'augur_{id(fig)}.png') + fig.savefig(path, dpi=200, bbox_inches='tight', pad_inches=0.15, facecolor='white') + plt.close(fig) + return path + + +def _rgb(c): return (c[0]/255, c[1]/255, c[2]/255) + + +# ============================================================ +# CHART GENERATORS — Cada um recebe dados do schema +# ============================================================ + +def chart_gauge(verdict_type: str) -> str: + fig, ax = plt.subplots(figsize=(4.5, 2.5)) + ax.set_xlim(-1.4,1.4); ax.set_ylim(-0.3,1.3); ax.set_aspect('equal'); ax.axis('off') + colors = [(180,126,_rgb(RED)),(126,72,(1,.6,.2)),(72,54,_rgb(AMBER)),(54,36,(.5,.85,.5)),(36,0,_rgb(GREEN))] + for start,end,color in colors: + from matplotlib.patches import Arc + ax.add_patch(Arc((0,0),2.2,2.2,angle=0,theta1=end,theta2=start,linewidth=18,color=color,capstyle='butt')) + v = verdict_type.upper().strip() + ang = {'GO':12,'NO-GO':168,'AJUSTAR':63}.get(v, 63) + rad = np.radians(ang) + ax.annotate('',xy=(0.85*np.cos(rad),0.85*np.sin(rad)),xytext=(0,0),arrowprops=dict(arrowstyle='->',color=_rgb(DARK),lw=2.5)) + ax.plot(0,0,'o',color=_rgb(DARK),markersize=8,zorder=5) + ax.text(-1.25,-.15,'NO-GO',fontsize=9,fontweight='bold',color=_rgb(RED),ha='center') + ax.text(0,1.25,'AJUSTAR',fontsize=9,fontweight='bold',color=_rgb(AMBER),ha='center') + ax.text(1.25,-.15,'GO',fontsize=9,fontweight='bold',color=_rgb(GREEN),ha='center') + ax.text(0,-.25,v,fontsize=16,fontweight='bold',color=_rgb(DARK),ha='center') + return _mpl_save(fig) + + +def chart_scenarios_bars(cenarios: list) -> str: + fig, ax = plt.subplots(figsize=(6.5, 2.5)) + names = [c.get('nome','')[:45] for c in cenarios] + probs = [c.get('probabilidade',33) for c in cenarios] + colors = [_rgb(TEAL), _rgb(AMBER), _rgb(RED)][:len(cenarios)] + bars = ax.barh(range(len(names)), probs, color=colors, height=0.55) + for b,p in zip(bars,probs): + ax.text(b.get_width()+1.5, b.get_y()+b.get_height()/2, f'{p}%', va='center', fontsize=13, fontweight='bold') + ax.set_yticks(range(len(names))); ax.set_yticklabels(names, fontsize=9) + ax.set_xlim(0,62); ax.set_xlabel('Probabilidade (%)', fontsize=9, color=_rgb(GRAY)) + ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False); ax.spines['left'].set_visible(False) + ax.tick_params(left=False) + return _mpl_save(fig) + + +def chart_financial_projection(cenarios: list) -> str: + fig, ax = plt.subplots(figsize=(6.5, 3.5)) + colors = [_rgb(TEAL), _rgb(AMBER), _rgb(RED)] + for i, c in enumerate(cenarios): + proj = c.get('projecao_faturamento_24m', []) + if proj: + months = np.arange(len(proj)) + ax.fill_between(months, proj, alpha=0.1, color=colors[i%3]) + ax.plot(months, proj, '-', color=colors[i%3], linewidth=2.5 - i*0.5, + label=f"{c.get('nome','')[:30]} ({c.get('probabilidade',0)}%)") + ax.axhline(y=30, color='#d1d5db', linestyle='--', linewidth=0.8) + ax.text(24.5, 30, 'Break-even', fontsize=7, color=_rgb(GRAY), va='center') + ax.set_xlim(0,24); ax.set_xlabel('Meses', fontsize=9, color=_rgb(GRAY)) + ax.set_ylabel('Faturamento (R$ mil/mes)', fontsize=9, color=_rgb(GRAY)) + ax.legend(fontsize=7, loc='upper left', framealpha=0.9) + ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False) + return _mpl_save(fig) + + +def chart_risk_scatter(riscos: list) -> str: + fig, ax = plt.subplots(figsize=(5.5, 4)) + imap = {'alto':95,'medio':60,'medio-alto':75,'baixo':30,'critico':100} + ax.axhspan(75,100,color='#fff0f0',zorder=0); ax.axhspan(50,75,color='#fffbf0',zorder=0) + for r in riscos: + prob = r.get('probabilidade',50) + impact_val = imap.get(r.get('impacto','medio').lower().replace('é','e'),60) + color = _rgb(RED) if impact_val > 75 else (_rgb(AMBER) if impact_val > 50 else _rgb(GREEN)) + ax.scatter(prob, impact_val, s=350, color=color, alpha=0.85, edgecolors='white', linewidth=1.5, zorder=4) + ax.text(prob, impact_val, f"R{r.get('numero',0)}", ha='center', va='center', fontsize=8, fontweight='bold', color='white', zorder=5) + ax.set_xlim(40,100); ax.set_ylim(20,105) + ax.set_xlabel('Probabilidade (%)', fontsize=9); ax.set_ylabel('Impacto', fontsize=9) + ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False) + # Legend + for i, r in enumerate(riscos[:7]): + ax.text(101, 100-i*8, f"R{r.get('numero',i+1)} {r.get('titulo','')[:30]}", fontsize=6, color=_rgb(GRAY)) + return _mpl_save(fig) + + +def chart_emotion_radar(emocoes: list) -> str: + fig = plt.figure(figsize=(7, 3.5)) + labels = [e.get('nome','') for e in emocoes] + values = [e.get('percentual',0) for e in emocoes] + N = len(labels) + angles = [n/N * 2 * np.pi for n in range(N)] + [0] + values_c = values + [values[0]] + ax = fig.add_subplot(121, polar=True) + ax.set_theta_offset(np.pi/2); ax.set_theta_direction(-1) + ax.plot(angles, values_c, 'o-', linewidth=2, color=_rgb(PURPLE), markersize=4) + ax.fill(angles, values_c, alpha=0.15, color=_rgb(PURPLE)) + ax.set_xticks(angles[:-1]); ax.set_xticklabels(labels, fontsize=7.5) + ax.set_ylim(0, max(values)*1.3) + # Bars + ax2 = fig.add_subplot(122) + bar_colors = [_rgb(TEAL),_rgb(AMBER),_rgb(PURPLE),_rgb(RED),_rgb(BLUE),_rgb(GRAY)] + bars = ax2.barh(range(N), values, color=[bar_colors[i%6] for i in range(N)], height=0.5) + for b,v in zip(bars,values): ax2.text(b.get_width()+0.5, b.get_y()+b.get_height()/2, f'{v}%', va='center', fontsize=9, fontweight='bold') + ax2.set_yticks(range(N)); ax2.set_yticklabels(labels, fontsize=9); ax2.invert_yaxis() + ax2.set_xlim(0,40); ax2.spines['top'].set_visible(False); ax2.spines['right'].set_visible(False); ax2.spines['left'].set_visible(False) + ax2.tick_params(left=False) + fig.tight_layout() + return _mpl_save(fig) + + +def chart_emotion_timeline(evolucao: dict) -> str: + fig, ax = plt.subplots(figsize=(6.5, 3)) + style = {'confianca':(_rgb(TEAL),'-',2.5), 'ceticismo':(_rgb(AMBER),'-',2.5), + 'empolgacao':(_rgb(PURPLE),'--',1.5), 'medo':(_rgb(RED),':',1.5)} + for key, data in evolucao.items(): + s = style.get(key, (_rgb(GRAY),'-',1)) + ax.plot(range(len(data)), data, linestyle=s[1], color=s[0], linewidth=s[2], label=key.capitalize()) + for x,lbl in [(3,'Fim curiosidade'),(6,'Teste'),(12,'Break-even'),(18,'Consolidacao')]: + ax.axvline(x=x, color='#e5e7eb', linestyle='--', linewidth=0.8) + ax.text(x, ax.get_ylim()[1]*0.95, lbl, fontsize=6.5, color=_rgb(GRAY), ha='center') + ax.set_xlim(0,24); ax.set_xlabel('Meses', fontsize=9); ax.set_ylabel('Intensidade (%)', fontsize=9) + ax.legend(fontsize=7.5, loc='upper right'); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False) + return _mpl_save(fig) + + +def chart_agent_spectrum(agentes: list) -> str: + fig, ax = plt.subplots(figsize=(6.5, 2)) + ax.set_xlim(-0.5,10.5); ax.set_ylim(-0.8,1.5); ax.axis('off') + gradient = np.linspace(0,1,256).reshape(1,-1) + cmap = matplotlib.colors.LinearSegmentedColormap.from_list('', [_rgb(TEAL),'#e5e7eb',_rgb(RED)]) + ax.imshow(gradient, aspect='auto', cmap=cmap, extent=[0,10,0,0.4], zorder=1) + ax.text(0,-0.3,'Apoiador',fontsize=8,fontweight='bold',color=_rgb(TEAL)) + ax.text(5,-0.3,'Neutro',fontsize=8,color=_rgb(GRAY),ha='center') + ax.text(10,-0.3,'Resistente',fontsize=8,fontweight='bold',color=_rgb(RED),ha='right') + type_colors = {'Apoiador':_rgb(TEAL),'Neutro':_rgb(AMBER),'Resistente':_rgb(RED),'Cauteloso':_rgb(PURPLE)} + for a in agentes: + x = a.get('posicao_espectro',0.5) * 10 + color = type_colors.get(a.get('tipo','Neutro'), _rgb(GRAY)) + ax.plot(x, 0.2, 'o', color=color, markersize=14, zorder=4, markeredgecolor='white', markeredgewidth=1.5) + ax.text(x, 0.7, a.get('nome',''), fontsize=7.5, fontweight='bold', ha='center', linespacing=1.1) + ax.text(x, 1.1, a.get('papel_na_dinamica',''), fontsize=6, ha='center', color=_rgb(GRAY)) + return _mpl_save(fig) + + +def chart_stack_ranking(recomendacoes: list) -> str: + fig, ax = plt.subplots(figsize=(6.5, 2.8)) + colors = [_rgb(TEAL),_rgb(PURPLE),_rgb(AMBER),_rgb(BLUE),_rgb(GREEN)] + labels = [r.get('titulo','') for r in recomendacoes] + values = [r.get('impacto_relativo',50) for r in recomendacoes] + bars = ax.barh(range(len(labels)), values, color=[colors[i%5] for i in range(len(labels))], height=0.55, alpha=0.7) + for b,l in zip(bars,labels): + ax.text(2, b.get_y()+b.get_height()/2, l[:55], va='center', fontsize=8.5, fontweight='bold', + color='white' if b.get_width()>60 else _rgb(DARK), zorder=5) + ax.set_xlim(0,110); ax.set_yticks([]); ax.invert_yaxis() + ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False); ax.spines['left'].set_visible(False); ax.spines['bottom'].set_visible(False) + ax.tick_params(bottom=False, labelbottom=False) + return _mpl_save(fig) + + +def chart_confidence_bars(previsoes: list) -> str: + fig, ax = plt.subplots(figsize=(6.5, 4)) + labels = [p.get('titulo','')[:30] for p in previsoes] + probs = [p.get('probabilidade',50) for p in previsoes] + margins = [p.get('margem_erro',5) for p in previsoes] + colors = [_rgb(TEAL) if p>=70 else (_rgb(AMBER) if p>=55 else _rgb(RED)) for p in probs] + bars = ax.barh(range(len(labels)), probs, color=colors, height=0.5, alpha=0.7) + ax.errorbar(probs, range(len(labels)), xerr=margins, fmt='none', ecolor=_rgb(DARK), elinewidth=1.2, capsize=4) + for i,(p,m) in enumerate(zip(probs,margins)): + ax.text(min(p+m+2,100), i, f'{p}% +/-{m}', va='center', fontsize=8, fontweight='bold') + ax.set_yticks(range(len(labels))); ax.set_yticklabels(labels, fontsize=8); ax.set_xlim(0,105); ax.invert_yaxis() + ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False); ax.spines['left'].set_visible(False) + ax.tick_params(left=False) + return _mpl_save(fig) + + +def chart_positioning(players: list) -> str: + fig, ax = plt.subplots(figsize=(5.5, 5)) + ax.axhline(y=50,color='#e5e7eb',lw=0.8); ax.axvline(x=50,color='#e5e7eb',lw=0.8) + role_colors = {'Incumbente':_rgb(RED),'Entrante':_rgb(TEAL),'CanalDigital':_rgb(GRAY), + 'CanalInformal':_rgb(AMBER),'desejado':_rgb(GREEN),'atual':_rgb(BLUE)} + for p in players: + x,y = p.get('x',50), p.get('y',50) + color = role_colors.get(p.get('papel',''), _rgb(GRAY)) + ax.scatter(x,y,s=300,color=color,alpha=0.5,edgecolors=color,linewidth=1.5,zorder=3) + ax.text(x,y-5,p.get('nome',''),fontsize=7.5,ha='center',fontweight='bold',zorder=5) + ax.set_xlim(0,100); ax.set_ylim(0,100) + ax.set_xlabel('Preco acessivel <----> Preco premium', fontsize=8, color=_rgb(GRAY)) + ax.set_ylabel('Funcional <----> Aspiracional', fontsize=8, color=_rgb(GRAY)) + ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False) + return _mpl_save(fig) + + +def chart_final_radar(scores: dict) -> str: + fig = plt.figure(figsize=(4.5,4.5)) + labels = list(scores.keys()); values = list(scores.values()) + N = len(labels) + angles = [n/N*2*np.pi for n in range(N)] + [0] + values_c = values + [values[0]] + ax = fig.add_subplot(111, polar=True) + ax.set_theta_offset(np.pi/2); ax.set_theta_direction(-1) + ax.plot(angles, values_c, 'o-', linewidth=2.5, color=_rgb(TEAL), markersize=6) + ax.fill(angles, values_c, alpha=0.15, color=_rgb(TEAL)) + for a,v in zip(angles[:-1],values): ax.text(a, v+8, str(v), ha='center', fontsize=11, fontweight='bold') + ax.set_xticks(angles[:-1]); ax.set_xticklabels([l.replace('_',' ').title() for l in labels], fontsize=8.5) + ax.set_ylim(0,100) + total = sum(values)//len(values) + ax.set_title(f'Viabilidade Geral: {total}/100', fontsize=12, fontweight='bold', pad=20) + return _mpl_save(fig) + + +def chart_roi(riscos_evitados: list, custo_analise: str, risco_total: str) -> str: + fig, ax = plt.subplots(figsize=(5.5, 3)) + cats = [r.get('titulo','')[:15] for r in riscos_evitados] + ['Total'] + # Parse R$ values + def parse_val(s): + m = re.search(r'(\d+)', str(s).replace('.','')) + return int(m.group(1)) if m else 50 + sem = [parse_val(r.get('valor_risco','50k')) for r in riscos_evitados] + sem.append(parse_val(risco_total)) + com = [parse_val(custo_analise)] * len(riscos_evitados) + [parse_val(custo_analise)] + x = np.arange(len(cats)); width=0.35 + ax.bar(x-width/2, sem, width, label='Sem AUGUR (risco)', color=_rgb(RED), alpha=0.6) + ax.bar(x+width/2, com, width, label='Com AUGUR (invest.)', color=_rgb(TEAL), alpha=0.7) + for i,v in enumerate(sem): ax.text(i-width/2, v+2, f'R${v}k', ha='center', fontsize=7, color=_rgb(RED), fontweight='bold') + for i,v in enumerate(com): ax.text(i+width/2, v+2, f'R${v}k', ha='center', fontsize=7, color=_rgb(TEAL), fontweight='bold') + ax.set_xticks(x); ax.set_xticklabels(cats, fontsize=7.5) + ax.legend(fontsize=8); ax.spines['top'].set_visible(False); ax.spines['right'].set_visible(False) + return _mpl_save(fig) + + +# ============================================================ +# PDF GENERATOR V2 — Consome AugurReportSchema +# ============================================================ + +class PDFGeneratorV2: + """ + Gera PDF a partir do JSON estruturado (AugurReportSchema). + + ZERO regex. ZERO parsing de texto livre. + Cada seção renderiza diretamente do campo tipado do JSON. + """ + + @classmethod + def generate(cls, data: dict, output_path: str = None) -> bytes: + """ + Gera o PDF completo a partir do JSON estruturado. + + Args: + data: JSON no formato AugurReportSchema + output_path: caminho para salvar (opcional) + + Returns: + bytes do PDF + """ + from io import BytesIO + + pdf = cls._create_pdf() + + meta = data.get('meta', {}) + veredicto = data.get('veredicto', {}) + dashboard = data.get('dashboard', {}) + cenarios_data = data.get('cenarios', {}) + cenarios = cenarios_data.get('cenarios', []) + riscos_data = data.get('riscos', {}) + riscos = riscos_data.get('riscos', []) + emocional = data.get('emocional', {}) + agentes = data.get('agentes', []) + forcas = data.get('forcas', {}) + cronologia = data.get('cronologia', {}) + padroes = data.get('padroes', []) + recomendacoes = data.get('recomendacoes', []) + checklist = data.get('checklist', []) + previsoes = data.get('previsoes', []) + posicionamento = data.get('posicionamento', {}) + roi = data.get('roi', {}) + sintese = data.get('sintese', {}) + + # P1: Capa + cls._page_cover(pdf, meta, veredicto) + # P2: Decisão 30s + cls._page_decision(pdf, veredicto, dashboard) + # P3: Sumário + Metodologia + cls._page_toc(pdf, meta) + # P4: Resumo Executivo + cls._page_executive(pdf, veredicto) + # P5: Dashboard KPIs (NOVO) + cls._page_dashboard(pdf, dashboard) + # P6: Cenários Futuros + cls._page_scenarios(pdf, cenarios, cenarios_data) + # P7: Cenários Financeiros (NOVO) + cls._page_financial(pdf, cenarios) + # P8-9: Fatores de Risco + cls._page_risks(pdf, riscos_data, riscos) + # P10: Análise Emocional + cls._page_emotions(pdf, emocional) + # P11: Perfis dos Agentes (NOVO) + cls._page_agents(pdf, agentes) + # P12: Mapa de Forças + cls._page_forces(pdf, forcas) + # P13: Cronologia + cls._page_timeline(pdf, cronologia) + # P14: Padrões Emergentes + cls._page_patterns(pdf, padroes) + # P15: Recomendações + cls._page_recommendations(pdf, recomendacoes) + # P16: Checklist (NOVO) + cls._page_checklist(pdf, checklist) + # P17: Previsões + cls._page_predictions(pdf, previsoes) + # P18: Posicionamento + cls._page_positioning(pdf, posicionamento) + # P19: ROI (NOVO) + cls._page_roi(pdf, roi) + # P20: Síntese Final + cls._page_synthesis(pdf, sintese, veredicto) + # P21: Contracapa + cls._page_back(pdf, veredicto) + + if output_path: + pdf.output(output_path) + with open(output_path, 'rb') as f: + return f.read() + else: + return pdf.output() + + @classmethod + def _create_pdf(cls): + """Cria instância do FPDF com fontes e configuração.""" + pdf = FPDF('P', 'mm', 'A4') + pdf.set_auto_page_break(auto=True, margin=18) + pdf.add_font('DejaVu', '', '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf') + pdf.add_font('DejaVu', 'B', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf') + pdf.add_font('DejaVu', 'I', '/usr/share/fonts/truetype/dejavu/DejaVuSans-Oblique.ttf') + pdf.add_font('Mono', '', '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf') + pdf.add_font('Mono', 'B', '/usr/share/fonts/truetype/dejavu/DejaVuSansMono-Bold.ttf') + pdf._margin = 18 + pdf._cw = 210 - 36 # content width + return pdf + + # ── Helper methods ── + + @classmethod + def _header(cls, pdf, section_num=None, section_total=16, title="", is_new=False): + """Standard page header with section tag.""" + pdf.add_page() + # Top accent line + pdf.set_draw_color(*TEAL); pdf.set_line_width(0.8) + pdf.line(18, 14, 192, 14) + # Header text + pdf.set_font('DejaVu', '', 7); pdf.set_text_color(*GRAY) + pdf.set_xy(18, 8) + pdf.cell(174, 5, _c(f'AUGUR Relatorio de Previsao'), 0, 0, 'L') + pdf.set_y(18) + # Section tag + if section_num: + tag = f'SECAO {section_num:02d} DE {section_total}' + if is_new: tag += ' - NOVO' + color = TEAL if is_new else TEAL + pdf.set_fill_color(240, 253, 250) + pdf.set_font('DejaVu', 'B', 8); pdf.set_text_color(*color) + w = pdf.get_string_width(tag) + 12 + pdf.cell(w, 7, _c(tag), 0, 1, 'C', True); pdf.ln(3) + # Title + if title: + pdf.set_font('DejaVu', 'B', 18); pdf.set_text_color(*DARK) + pdf.multi_cell(174, 8, _c(title), 0, 'L'); pdf.ln(2) + + @classmethod + def _body(cls, pdf, text, size=9.5): + pdf.set_font('DejaVu', '', size); pdf.set_text_color(75, 85, 99) + pdf.multi_cell(174, 5.5, _c(text), 0, 'L'); pdf.ln(2) + + @classmethod + def _bold(cls, pdf, text, size=9.5): + pdf.set_font('DejaVu', 'B', size); pdf.set_text_color(*DARK) + pdf.multi_cell(174, 5.5, _c(text), 0, 'L'); pdf.ln(1) + + @classmethod + def _quote(cls, pdf, text): + y = pdf.get_y() + pdf.set_font('DejaVu', 'I', 9); pdf.set_text_color(90, 90, 110) + lines = pdf.multi_cell(162, 5, _c(f'"{text}"'), 0, 'L', dry_run=True, output='LINES') + h = len(lines) * 5 + 6 + pdf.set_fill_color(245, 245, 250); pdf.rect(18, y, 174, h, 'F') + pdf.set_draw_color(*PURPLE); pdf.set_line_width(0.6); pdf.line(18, y, 18, y+h) + pdf.set_xy(24, y+3); pdf.multi_cell(162, 5, _c(f'"{text}"'), 0, 'L') + pdf.ln(3) + + @classmethod + def _chart(cls, pdf, chart_path, w=None): + if not chart_path or not os.path.exists(chart_path): return + if w is None: w = 174 + x = 18 + (174 - w) / 2 + pdf.image(chart_path, x=x, w=w); pdf.ln(3) + + @classmethod + def _kpi_grid(cls, pdf, items): + """items: [(value, label, color), ...]""" + n = len(items); w = (174 - (n-1)*3) / n; y0 = pdf.get_y() + for i, (val, label, color) in enumerate(items): + x = 18 + i*(w+3) + pdf.set_fill_color(248,248,252); pdf.rect(x, y0, w, 20, 'DF') + pdf.set_font('Mono', 'B', 13); pdf.set_text_color(*color) + pdf.set_xy(x, y0+2); pdf.cell(w, 8, _c(val), 0, 0, 'C') + pdf.set_font('DejaVu', '', 7); pdf.set_text_color(*GRAY) + pdf.set_xy(x, y0+11); pdf.cell(w, 5, _c(label), 0, 0, 'C') + pdf.set_y(y0 + 24) + + @classmethod + def _footer_line(cls, pdf): + pdf.set_y(-14); pdf.set_font('DejaVu','',7); pdf.set_text_color(*GRAY) + pdf.cell(87, 5, _c('augur.itcast.com.br'), 0, 0, 'L') + pdf.cell(87, 5, _c(f'Pagina {pdf.page_no()-1}'), 0, 0, 'R') + + # ── Page builders ── + # Each method consumes ONE section of the schema JSON directly. + # No parsing. No regex. Just data → layout. + + @classmethod + def _page_cover(cls, pdf, meta, veredicto): + pdf.add_page() + pdf.set_fill_color(*WHITE); pdf.rect(0,0,210,297,'F') + pdf.set_fill_color(*TEAL); pdf.rect(0,0,6,297,'F') # left bar + pdf.set_fill_color(*TEAL); pdf.rect(6,30,204,1.5,'F') + pdf.set_xy(18,50); pdf.set_font('Mono','B',38); pdf.set_text_color(*DARK) + pdf.cell(0,18,_c('A U G U R'),0,1,'C') + pdf.set_font('DejaVu','',10); pdf.set_text_color(*GRAY) + pdf.cell(0,7,_c('Plataforma de Previsao de Mercado por IA'),0,1,'C') + pdf.ln(12) + pdf.set_font('DejaVu','',11); pdf.set_text_color(*DARK) + pdf.cell(0,6,_c('Relatorio de Previsao:'),0,1,'C'); pdf.ln(2) + pdf.set_font('DejaVu','B',15) + pdf.multi_cell(0,8,_c(meta.get('projeto','Projeto AUGUR')),0,'C') + # Gauge + if HAS_MPL: + pdf.ln(10) + gp = chart_gauge(veredicto.get('tipo','AJUSTAR')) + cls._chart(pdf, gp, w=85) + # Verdict badge + vt = veredicto.get('tipo','AJUSTAR') + vc = AMBER if vt=='AJUSTAR' else (GREEN if vt=='GO' else RED) + pdf.ln(2); y = pdf.get_y(); bw=90; bx=(210-bw)/2 + pdf.set_fill_color(255,248,230); pdf.set_draw_color(*vc); pdf.set_line_width(0.8) + pdf.rect(bx,y,bw,12,'DF') + pdf.set_font('DejaVu','B',13); pdf.set_text_color(*vc) + pdf.set_xy(bx,y+1); pdf.cell(bw,10,_c(f'VEREDICTO: {vt}'),0,1,'C') + pdf.ln(6) + pdf.set_font('DejaVu','I',9); pdf.set_text_color(*GRAY); pdf.set_x(25) + pdf.multi_cell(160,5,_c(veredicto.get('frase_chave','')),0,'C') + # Footer + pdf.set_y(255); pdf.set_draw_color(*LGRAY); pdf.line(50,pdf.get_y(),160,pdf.get_y()); pdf.ln(6) + pdf.set_font('DejaVu','',9); pdf.set_text_color(*GRAY) + pdf.cell(0,5,_c(meta.get('data_geracao','')[:10]),0,1,'C') + pdf.set_font('DejaVu','B',8); pdf.set_text_color(180,180,190) + pdf.cell(0,5,_c('CONFIDENCIAL'),0,1,'C') + pdf.set_fill_color(*TEAL); pdf.rect(6,285,204,1.5,'F') + + @classmethod + def _page_decision(cls, pdf, veredicto, dashboard): + cls._header(pdf, title='Decisao em 30 segundos') + cls._body(pdf, 'O que voce precisa saber para decidir agora:') + cls._kpi_grid(pdf, [ + (dashboard.get('capital_giro_necessario','?'), 'Capital giro crediario', DARK), + (dashboard.get('breakeven_cenario1','?'), 'Break-even provavel', PURPLE), + (dashboard.get('prob_sobrevivencia_24m','?'), 'Prob. sobrevivencia', TEAL), + (dashboard.get('margem_bruta_alvo','?'), 'Margem bruta alvo', AMBER), + ]) + cls._bold(pdf, 'Fatos que definem esta decisao:') + for f in veredicto.get('top5_fatos', []): + cls._bold(pdf, f' {f.get("titulo","")}', 9) + cls._body(pdf, f' {f.get("descricao","")}', 8.5) + cls._quote(pdf, veredicto.get('frase_chave','')) + + @classmethod + def _page_toc(cls, pdf, meta): + cls._header(pdf, title='Sumario') + sections = ['01 Resumo Executivo','02 Dashboard de KPIs (NOVO)','03 Cenarios Futuros', + '04 Cenarios Financeiros (NOVO)','05 Fatores de Risco','06 Analise Emocional', + '07 Perfis dos Agentes (NOVO)','08 Mapa de Forcas','09 Cronologia', + '10 Padroes Emergentes','11 Recomendacoes Estrategicas','12 Checklist GO (NOVO)', + '13 Previsoes com Intervalo de Confianca','14 Posicionamento', + '15 ROI da Analise (NOVO)','16 Sintese Final'] + for s in sections: + is_new = 'NOVO' in s + pdf.set_font('DejaVu','B' if is_new else '',9.5) + pdf.set_text_color(*(TEAL if is_new else DARK)) + pdf.cell(0,6.5,_c(s),0,1,'L') + pdf.ln(6); cls._bold(pdf, 'Sobre esta analise', 10) + cls._body(pdf, f'Projeto: {meta.get("projeto","")}\nAgentes: {meta.get("num_agentes",0)} perfis sinteticos\nRodadas: {meta.get("num_rodadas",0)} ciclos ({meta.get("periodo_simulado_meses",24)} meses)\nModelo: {meta.get("modelo_ia","GPT-5.4")}', 8.5) + + @classmethod + def _page_executive(cls, pdf, veredicto): + cls._header(pdf, 1, title='Resumo Executivo') + cls._bold(pdf, veredicto.get('resumo_executivo','')) + if veredicto.get('leitura_para_decisao'): + cls._bold(pdf, 'Leitura para decisao:', 9) + cls._body(pdf, veredicto.get('leitura_para_decisao','')) + + @classmethod + def _page_dashboard(cls, pdf, dashboard): + cls._header(pdf, 2, title='Dashboard de KPIs - 24 Meses', is_new=True) + d = dashboard + cls._kpi_grid(pdf, [ + (d.get('ticket_medio','?'),'Ticket medio',TEAL), + (d.get('volume_breakeven','?'),'Volume break-even',PURPLE), + (d.get('margem_bruta_alvo','?'),'Margem bruta',DARK), + (d.get('capital_giro_necessario','?'),'Capital giro',AMBER), + ]) + cls._kpi_grid(pdf, [ + (d.get('recompra_alvo','?'),'Recompra alvo',TEAL), + (d.get('vendas_por_indicacao','?'),'Vendas indicacao',PURPLE), + (d.get('erosao_margem_sazonal','?'),'Erosao sazonal',RED), + (d.get('breakeven_cenario1','?'),'Break-even cen.1',DARK), + ]) + if d.get('investimento_total_estimado'): + cls._bold(pdf, f'Investimento total estimado: {d["investimento_total_estimado"]}') + for item in d.get('composicao_investimento',[]): + cls._body(pdf, f' - {item.get("item","")}: {item.get("valor","")}', 8.5) + # Semaforo + cls._bold(pdf, 'Faixas de monitoramento') + # Simple 3-column layout + y0 = pdf.get_y(); w = (174-6)/3 + for i,(items,color,bg,title) in enumerate([ + (d.get('sinais_consolidacao',[]),(15,80,10),(234,243,222),'Consolidacao'), + (d.get('sinais_alerta',[]),(133,79,11),(250,238,218),'Alerta'), + (d.get('sinais_risco_critico',[]),(163,45,45),(252,235,235),'Risco critico'), + ]): + x = 18 + i*(w+3) + pdf.set_fill_color(*bg); pdf.rect(x,y0,w,45,'F') + pdf.set_font('DejaVu','B',8); pdf.set_text_color(*color) + pdf.set_xy(x+3,y0+2); pdf.cell(w-6,5,_c(title),0,1,'L') + pdf.set_font('DejaVu','',7.5); cur_y = y0+8 + for it in items: + pdf.set_xy(x+3,cur_y); pdf.multi_cell(w-6,4,_c(f'- {it}'),0,'L') + cur_y = pdf.get_y() + pdf.set_y(y0+50) + + @classmethod + def _page_scenarios(cls, pdf, cenarios, cenarios_data): + cls._header(pdf, 3, title='Cenarios Futuros') + if HAS_MPL and cenarios: cls._chart(pdf, chart_scenarios_bars(cenarios), w=150) + for c in cenarios: + cls._bold(pdf, f'{c.get("nome","")}') + pdf.set_font('Mono','B',9); pdf.set_text_color(*TEAL) + pdf.cell(0,5,_c(f'Probabilidade: {c.get("probabilidade",0)}% | Break-even: {c.get("breakeven","")}'),0,1,'L'); pdf.ln(2) + cls._body(pdf, c.get('descricao','')) + if c.get('citacao_agente'): cls._quote(pdf, c['citacao_agente']) + if cenarios_data.get('ponto_bifurcacao'): + cls._body(pdf, f'Ponto de bifurcacao: {cenarios_data["ponto_bifurcacao"]}', 9) + + @classmethod + def _page_financial(cls, pdf, cenarios): + cls._header(pdf, 4, title='Cenarios Financeiros Comparados', is_new=True) + if HAS_MPL and cenarios and cenarios[0].get('projecao_faturamento_24m'): + cls._chart(pdf, chart_financial_projection(cenarios), w=155) + # Table + cls._bold(pdf, 'Comparativo por cenario') + col_w = [45,38,38,38] + headers = ['Metrica'] + [f'Cen. {i+1} ({c.get("probabilidade",0)}%)' for i,c in enumerate(cenarios)] + rows = [ + ['Break-even'] + [c.get('breakeven','') for c in cenarios], + ['Faturamento M24'] + [c.get('faturamento_m24','') for c in cenarios], + ['Margem bruta'] + [c.get('margem_bruta','') for c in cenarios], + ['Risco central'] + [c.get('risco_central','') for c in cenarios], + ] + y = pdf.get_y() + for i, h in enumerate(headers[:4]): + x = 18 + sum(col_w[:i]) + pdf.set_fill_color(240,253,250); pdf.set_font('DejaVu','B',8); pdf.set_text_color(*TEAL) + pdf.set_xy(x,y); pdf.cell(col_w[i],7,_c(h),1,0,'C',True) + pdf.ln(7) + for row in rows: + y = pdf.get_y() + for i, val in enumerate(row[:4]): + x = 18 + sum(col_w[:i]) + pdf.set_font('DejaVu','' if i>0 else 'B',8); pdf.set_text_color(*(DARK if i==0 else GRAY)) + pdf.set_xy(x,y); pdf.cell(col_w[i],6,_c(val),1,0,'C' if i>0 else 'L') + pdf.ln(6) + + @classmethod + def _page_risks(cls, pdf, riscos_data, riscos): + cls._header(pdf, 5, title='Fatores de Risco') + cls._body(pdf, riscos_data.get('texto_introducao','')) + if HAS_MPL and riscos: cls._chart(pdf, chart_risk_scatter(riscos), w=135) + for r in riscos: + y = pdf.get_y() + if y > 255: pdf.add_page(); y = pdf.get_y() + imp = r.get('impacto','Medio') + pdf.set_fill_color(*(RED if imp=='Alto' else AMBER)); pdf.rect(18,y,3,18,'F') + pdf.set_font('DejaVu','B',9.5); pdf.set_text_color(*DARK); pdf.set_xy(24,y) + pdf.cell(0,5,_c(f'#{r.get("numero",0)} {r.get("titulo","")}'),0,1,'L') + pdf.set_font('Mono','',8); pdf.set_text_color(*GRAY); pdf.set_x(24) + pdf.cell(0,4,_c(f'Probabilidade: {r.get("probabilidade",0)}% | Impacto: {imp}'),0,1,'L') + pdf.set_font('DejaVu','',8.5); pdf.set_text_color(75,85,99); pdf.set_x(24) + pdf.multi_cell(168,4.5,_c(r.get('descricao','')),0,'L'); pdf.ln(1) + if r.get('citacao_agente'): cls._quote(pdf, r['citacao_agente']) + + @classmethod + def _page_emotions(cls, pdf, emocional): + cls._header(pdf, 6, title='Analise Emocional') + emocoes = emocional.get('emocoes',[]) + if HAS_MPL and emocoes: cls._chart(pdf, chart_emotion_radar(emocoes), w=160) + cls._body(pdf, emocional.get('saldo_positivo_vs_negativo','')) + if emocional.get('texto_confianca'): + cls._bold(pdf, 'Confianca - emocao mais relevante') + cls._body(pdf, emocional['texto_confianca']) + if emocional.get('citacao_confianca'): cls._quote(pdf, emocional['citacao_confianca']) + if emocional.get('texto_ceticismo'): + cls._bold(pdf, 'Ceticismo - defesa natural') + cls._body(pdf, emocional['texto_ceticismo']) + if emocional.get('citacao_ceticismo'): cls._quote(pdf, emocional['citacao_ceticismo']) + evolucao = emocional.get('evolucao_24m',{}) + if HAS_MPL and evolucao: + cls._bold(pdf, 'Evolucao temporal das emocoes') + cls._chart(pdf, chart_emotion_timeline(evolucao), w=150) + + @classmethod + def _page_agents(cls, pdf, agentes): + cls._header(pdf, 7, title='Perfis dos Agentes - Quem disse o que', is_new=True) + cls._body(pdf, 'Cada agente e um perfil sintetico que representa um segmento real do mercado.') + type_colors = {'Apoiador':TEAL,'Neutro':AMBER,'Resistente':RED,'Cauteloso':PURPLE} + for a in agentes: + y = pdf.get_y() + if y > 248: pdf.add_page(); y = pdf.get_y() + color = type_colors.get(a.get('tipo','Neutro'), GRAY) + pdf.set_fill_color(*color); pdf.rect(18,y,3,22,'F') + pdf.set_font('DejaVu','B',10); pdf.set_text_color(*DARK); pdf.set_x(24) + pdf.cell(40,5,_c(a.get('nome','')),0,0,'L') + pdf.set_font('DejaVu','',8); pdf.set_text_color(*color) + pdf.cell(0,5,_c(f' {a.get("papel_na_dinamica","")}'),0,1,'L') + pdf.set_font('DejaVu','',8.5); pdf.set_text_color(*GRAY); pdf.set_x(24) + pdf.cell(0,4.5,_c(a.get('descricao','')),0,1,'L') + pdf.set_font('DejaVu','I',8.5); pdf.set_text_color(90,90,110); pdf.set_x(24) + pdf.multi_cell(168,4.5,_c(f'"{a.get("citacao_chave","")}"'),0,'L'); pdf.ln(4) + if HAS_MPL and agentes: + cls._bold(pdf, 'Espectro de posicionamento') + cls._chart(pdf, chart_agent_spectrum(agentes), w=155) + + @classmethod + def _page_forces(cls, pdf, forcas): + cls._header(pdf, 8, title='Mapa de Forcas') + blocos = forcas.get('blocos', []) + if HAS_MPL and len(blocos) >= 2: + fig, ax = plt.subplots(figsize=(5.5, 3.5)) + n = len(blocos) + for i, b in enumerate(blocos): + poder = b.get('poder_relativo', 5) + angle = (i / n) * 2 * np.pi - np.pi/2 + x = 0.6 * np.cos(angle) + y = 0.6 * np.sin(angle) + size = max(poder, 1) * 60 + color = _rgb(TEAL) if i == 0 else (_rgb(RED) if i == n-1 else _rgb(AMBER)) + ax.scatter(x, y, s=size, color=color, alpha=0.3, zorder=2) + ax.scatter(x, y, s=size*0.3, color=color, alpha=0.8, zorder=3) + nome = _c(b.get('nome', '')[:25]) + ax.text(x, y + 0.15, nome, ha='center', fontsize=6.5, fontweight='bold', color=_rgb(DARK)) + ax.text(x, y - 0.1, f"Poder: {poder}/10", ha='center', fontsize=5.5, color=_rgb(GRAY)) + ax.set_xlim(-1.2, 1.2); ax.set_ylim(-1.0, 1.0); ax.axis('off') + path = _mpl_save(fig) + cls._chart(pdf, path, w=130) + for b in blocos: + cls._bold(pdf, b.get('nome',''), 9.5) + cls._body(pdf, f'{b.get("base_clientes","")}. {b.get("descricao","")}', 8.5) + if b.get('citacao'): cls._quote(pdf, b['citacao']) + if forcas.get('hierarquia_poder'): + cls._bold(pdf, 'Hierarquia de poder:', 9) + cls._body(pdf, forcas['hierarquia_poder'], 8.5) + + @classmethod + def _page_timeline(cls, pdf, cronologia): + cls._header(pdf, 9, title='Cronologia da Simulacao') + for fase in cronologia.get('fases',[]): + cls._bold(pdf, f'{fase.get("periodo","")}: {fase.get("nome","")}') + cls._body(pdf, fase.get('descricao','')) + if fase.get('citacao'): cls._quote(pdf, fase['citacao']) + + @classmethod + def _page_patterns(cls, pdf, padroes): + cls._header(pdf, 10, title='Padroes Emergentes') + for p in padroes: + cls._bold(pdf, f'{p.get("numero","")}. {p.get("titulo","")}', 9.5) + cls._body(pdf, p.get('descricao',''), 8.5) + + @classmethod + def _page_recommendations(cls, pdf, recomendacoes): + cls._header(pdf, 11, title='Recomendacoes Estrategicas') + cls._body(pdf, 'Stack ranking: #1 decide sobrevivencia.') + if HAS_MPL and recomendacoes: + cls._chart(pdf, chart_stack_ranking(recomendacoes), w=155) + for r in recomendacoes: + cls._bold(pdf, r.get('titulo',''), 10) + cls._body(pdf, r.get('descricao',''), 8.5) + if r.get('citacao'): cls._quote(pdf, r['citacao']) + + @classmethod + def _page_checklist(cls, pdf, checklist): + cls._header(pdf, 12, title='Checklist: AJUSTAR para GO', is_new=True) + cls._body(pdf, 'Condicoes mensuraveis para transformar AJUSTAR em GO.') + for item in checklist: + y = pdf.get_y() + if y > 265: pdf.add_page() + pdf.set_draw_color(*AMBER); pdf.set_line_width(0.6) + pdf.ellipse(19, pdf.get_y()+1, 5, 5) + pdf.set_font('DejaVu','B',9); pdf.set_text_color(*DARK); pdf.set_x(27) + pdf.cell(0,5,_c(item.get('titulo','')),0,1,'L') + pdf.set_font('DejaVu','',8); pdf.set_text_color(*GRAY); pdf.set_x(27) + pdf.cell(0,4,_c(f'{item.get("timing","")} - {item.get("justificativa","")}'),0,1,'L') + pdf.ln(3) + + @classmethod + def _page_predictions(cls, pdf, previsoes): + cls._header(pdf, 13, title='Previsoes com Intervalo de Confianca') + if HAS_MPL and previsoes: cls._chart(pdf, chart_confidence_bars(previsoes), w=155) + for p in previsoes: + pdf.set_font('DejaVu','B',8.5); pdf.set_text_color(*DARK) + pdf.cell(95,4.5,_c(p.get('titulo','')),0,0,'L') + pdf.set_font('Mono','',8); pdf.set_text_color(*TEAL) + pdf.cell(30,4.5,_c(f'{p.get("probabilidade",0)}% +/-{p.get("margem_erro",0)}'),0,0,'C') + pdf.set_font('DejaVu','',8); pdf.set_text_color(*GRAY) + pdf.cell(0,4.5,_c(p.get('descricao','')),0,1,'L'); pdf.ln(1) + + @classmethod + def _page_positioning(cls, pdf, posicionamento): + cls._header(pdf, 14, title='Posicionamento Percebido vs Desejado') + players = posicionamento.get('players',[]) + if HAS_MPL and players: cls._chart(pdf, chart_positioning(players), w=130) + cls._bold(pdf, 'Percebido:') + cls._body(pdf, posicionamento.get('percebido_descricao','')) + if posicionamento.get('percebido_citacao'): cls._quote(pdf, posicionamento['percebido_citacao']) + cls._bold(pdf, 'Desejado:') + cls._body(pdf, posicionamento.get('desejado_descricao','')) + if posicionamento.get('desejado_citacao'): cls._quote(pdf, posicionamento['desejado_citacao']) + evitar = posicionamento.get('rotulos_a_evitar',[]) + if evitar: + cls._bold(pdf, 'Rotulos a evitar:', 9) + cls._body(pdf, '. '.join(f'{i+1}. {r}' for i,r in enumerate(evitar)), 8.5) + if posicionamento.get('posicionamento_vencedor'): + pdf.set_font('DejaVu','B',12); pdf.set_text_color(*TEAL) + pdf.cell(0,8,_c(f'Posicionamento vencedor: "{posicionamento["posicionamento_vencedor"]}"'),0,1,'C') + + @classmethod + def _page_roi(cls, pdf, roi): + cls._header(pdf, 15, title='ROI da Analise', is_new=True) + cls._body(pdf, 'Custo de errar vs custo de saber.') + riscos_ev = roi.get('riscos_evitados',[]) + if HAS_MPL and riscos_ev: + cls._chart(pdf, chart_roi(riscos_ev, roi.get('custo_analise','5k'), roi.get('risco_total_evitado','150k')), w=130) + for r in riscos_ev: + pdf.set_font('DejaVu','B',9); pdf.set_text_color(*RED) + pdf.cell(90,5,_c(r.get('titulo','')),0,0,'L') + pdf.set_font('Mono','',8); pdf.cell(0,5,_c(r.get('valor_risco','')),0,1,'R') + pdf.set_font('DejaVu','',8.5); pdf.set_text_color(*GRAY) + pdf.cell(0,4.5,_c(f'Solucao: {r.get("solucao","")}'),0,1,'L'); pdf.ln(2) + # ROI highlight box + pdf.ln(3); y=pdf.get_y() + pdf.set_fill_color(240,253,250); pdf.rect(18,y,174,18,'F') + pdf.set_font('DejaVu','B',11); pdf.set_text_color(*TEAL) + pdf.set_xy(18,y+2); pdf.cell(174,7,_c(f'Investimento: {roi.get("custo_analise","")}'),0,1,'C') + pdf.set_font('Mono','B',10) + pdf.cell(174,7,_c(f'Risco evitado: {roi.get("risco_total_evitado","")} | ROI: {roi.get("roi_multiplicador","")}'),0,1,'C') + pdf.ln(5) + for c in roi.get('citacoes',[]): cls._quote(pdf, c) + + @classmethod + def _page_synthesis(cls, pdf, sintese, veredicto): + cls._header(pdf, 16, title='Sintese e Direcionamento') + scores = sintese.get('scores',{}) + if HAS_MPL and scores: cls._chart(pdf, chart_final_radar(scores), w=100) + vt = veredicto.get('tipo','AJUSTAR') + vc = AMBER if vt=='AJUSTAR' else (GREEN if vt=='GO' else RED) + pdf.set_font('DejaVu','B',13); pdf.set_text_color(*vc) + pdf.cell(0,8,_c(f'VEREDICTO: {vt}'),0,1,'C'); pdf.ln(2) + cls._body(pdf, f'Cenario mais provavel: {sintese.get("cenario_mais_provavel","")}\nRisco principal: {sintese.get("risco_principal","")}') + cls._bold(pdf, 'Direcionamento estrategico') + for d in sintese.get('direcionamento',[]): cls._body(pdf, f'- {d}', 8.5) + + @classmethod + def _page_back(cls, pdf, veredicto): + pdf.add_page(); pdf.ln(30) + vt = veredicto.get('tipo','AJUSTAR') + pdf.set_font('DejaVu','B',14); pdf.set_text_color(*AMBER) + pdf.cell(0,10,_c(vt),0,1,'C') + pdf.set_font('Mono','B',28); pdf.set_text_color(*DARK) + pdf.cell(0,15,_c('A U G U R'),0,1,'C') + pdf.set_font('DejaVu','I',11); pdf.set_text_color(*TEAL) + pdf.cell(0,8,_c('Preveja o futuro. Antes que ele aconteca.'),0,1,'C') + pdf.ln(10) + pdf.set_font('DejaVu','',9); pdf.set_text_color(*GRAY) + pdf.cell(0,6,_c('augur.itcast.com.br'),0,1,'C') + pdf.cell(0,6,_c('contato@itcast.com.br'),0,1,'C') + pdf.ln(15) + pdf.set_font('DejaVu','',7); pdf.set_text_color(180,180,180) + pdf.multi_cell(0,4,_c('Este relatorio foi gerado por IA com base em simulacoes de opiniao publica. Os resultados representam cenarios possiveis e nao garantem resultados futuros.'),0,'C') + + +# ============================================================ +# ============================================================ +# BACKWARD COMPATIBILITY +# ============================================================ +# api/report.py importa: from ..services.pdf_generator import PDFGenerator, HAS_FPDF +# e chama: PDFGenerator.generate(report_data, pdf_path, client_name=client_name) +# Mantemos compatibilidade sem alterar api/report.py +# ============================================================ + +HAS_FPDF = True + +class PDFGenerator: + """Wrapper de compatibilidade. Delega para PDFGeneratorV2.""" + + @classmethod + def generate(cls, report_data: dict, output_path: str = None, **kwargs): + """ + Aceita tanto o formato antigo (report_data com sections de texto) + quanto o novo (report_data com campo 'structured'). + """ + # Se tem campo structured (pipeline v2), usa direto + structured = report_data.get("structured", None) + if structured: + return PDFGeneratorV2.generate(structured, output_path=output_path) + + # Fallback: montar um structured mínimo a partir do formato antigo + # para que o PDFGeneratorV2 consiga renderizar algo + logger.info("PDF fallback: convertendo formato v1 para v2") + minimal = cls._convert_v1_to_v2(report_data) + return PDFGeneratorV2.generate(minimal, output_path=output_path) + + @staticmethod + def _convert_v1_to_v2(report_data: dict) -> dict: + """Converte report_data v1 (texto livre) para schema v2 (mínimo).""" + outline = report_data.get("outline", {}) + title = outline.get("title", report_data.get("title", "Relatorio AUGUR")) + summary = outline.get("summary", report_data.get("summary", "")) + sections = outline.get("sections", report_data.get("sections", [])) + + # Extrair veredicto do summary + tipo = "AJUSTAR" + for v in ["GO", "NO-GO", "AJUSTAR"]: + if v in summary.upper(): + tipo = v + break + + # Montar conteúdo de cada seção como texto + section_contents = {} + for sec in sections: + key = sec.get("key", sec.get("title", "").lower().replace(" ", "_")) + section_contents[key] = sec.get("content", "") + + return { + "meta": { + "projeto": re.sub(r'Relat[oó]rio de Previs[aã]o:\s*', '', title).strip(), + "setor": "varejo_local", + "tipo_decisao": "novo_negocio", + "data_geracao": "", + "modelo_ia": "GPT-5.4", + "num_agentes": 6, + "num_rodadas": 5, + "periodo_simulado_meses": 24, + }, + "veredicto": { + "tipo": tipo, + "score_viabilidade": 52, + "frase_chave": summary[:200] if summary else title, + "resumo_executivo": section_contents.get("resumo_executivo", summary), + "leitura_para_decisao": "", + "top5_fatos": [], + }, + "dashboard": {}, + "cenarios": {"cenarios": [], "ponto_bifurcacao": ""}, + "riscos": {"texto_introducao": "", "riscos": []}, + "emocional": {"emocoes": [], "saldo_positivo_vs_negativo": "", + "texto_confianca": "", "citacao_confianca": "", + "texto_ceticismo": "", "citacao_ceticismo": "", + "texto_empolgacao": "", "texto_medo": "", "evolucao_24m": {}}, + "agentes": [], + "forcas": {"blocos": [], "hierarquia_poder": "", "coalizao_entrante": ""}, + "cronologia": {"fases": []}, + "padroes": [], + "recomendacoes": [], + "checklist": [], + "previsoes": [], + "posicionamento": {"percebido_descricao": "", "percebido_citacao": "", + "desejado_descricao": "", "desejado_citacao": "", + "rotulos_a_evitar": [], "posicionamento_vencedor": "", + "players": []}, + "roi": {"riscos_evitados": [], "custo_analise": "", "risco_total_evitado": "", + "roi_multiplicador": "", "citacoes": []}, + "sintese": {"scores": {}, "veredicto_final": tipo, + "cenario_mais_provavel": "", "risco_principal": "", + "direcionamento": [], "sinais_consolidacao": [], + "sinais_alerta": [], "sinais_risco": []}, + } diff --git a/backend/app/services/report_agent.py b/backend/app/services/report_agent.py index cecd70b464..bc892fe3de 100644 --- a/backend/app/services/report_agent.py +++ b/backend/app/services/report_agent.py @@ -1,12 +1,11 @@ """ -Report Agent服务 -使用LangChain + Zep实现ReACT模式的模拟报告生成 - -功能: -1. 根据模拟需求和Zep图谱信息生成报告 -2. 先规划目录结构,然后分段生成 -3. 每段采用ReACT多轮思考与反思模式 -4. 支持与用户对话,在对话中自主调用检索工具 +Serviço do Report Agent +Geração de relatórios de simulação com padrão ReACT usando LangChain + Zep + +1. Gera relatórios baseados nos requisitos da simulação e informações do grafo Zep +2. Primeiro planeja a estrutura, depois gera seção por seção +3. Cada seção usa o padrão ReACT de múltiplas rodadas de raciocínio +4. Suporta diálogo com usuário, chamando ferramentas de busca autonomamente """ import os @@ -30,23 +29,37 @@ InterviewResult ) +# AUGUR v2 pipeline +try: + from ..schemas.report_schema import validar_report_json + from .report_prompts_v2 import ( + PLAN_SYSTEM_PROMPT_V2, + PLAN_USER_PROMPT_V2, + get_section_system_prompt, + parse_section_json, + assemble_report, + SECTION_KEYS_ORDERED, + ) + HAS_V2 = True +except ImportError: + HAS_V2 = False + logger = get_logger('mirofish.report_agent') class ReportLogger: """ - Report Agent 详细日志记录器 + Logger detalhado do Report Agent - 在报告文件夹中生成 agent_log.jsonl 文件,记录每一步详细动作。 - 每行是一个完整的 JSON 对象,包含时间戳、动作类型、详细内容等。 + Gera arquivo agent_log.jsonl na pasta do relatório, registrando cada ação detalhada。 + Cada linha é um objeto JSON completo com timestamp, tipo de ação e detalhes。 """ def __init__(self, report_id: str): """ - 初始化日志记录器 Args: - report_id: 报告ID,用于确定日志文件路径 + report_id: ID do relatório, para determinar o caminho do log """ self.report_id = report_id self.log_file_path = os.path.join( @@ -56,12 +69,12 @@ def __init__(self, report_id: str): self._ensure_log_file() def _ensure_log_file(self): - """确保日志文件所在目录存在""" + """""" log_dir = os.path.dirname(self.log_file_path) os.makedirs(log_dir, exist_ok=True) def _get_elapsed_time(self) -> float: - """获取从开始到现在的耗时(秒)""" + """""" return (datetime.now() - self.start_time).total_seconds() def log( @@ -73,14 +86,13 @@ def log( section_index: int = None ): """ - 记录一条日志 Args: - action: 动作类型,如 'start', 'tool_call', 'llm_response', 'section_complete' 等 - stage: 当前阶段,如 'planning', 'generating', 'completed' - details: 详细内容字典,不截断 - section_title: 当前章节标题(可选) - section_index: 当前章节索引(可选) + action: Tipo de ação, como 'start', 'tool_call', 'llm_response', 'section_complete' + stage: Fase atual, como 'planning', 'generating', 'completed' + details: Conteúdo + section_title: Seção + section_index: Seção """ log_entry = { "timestamp": datetime.now().isoformat(), @@ -93,12 +105,12 @@ def log( "details": details } - # 追加写入 JSONL 文件 + # JSONL with open(self.log_file_path, 'a', encoding='utf-8') as f: f.write(json.dumps(log_entry, ensure_ascii=False) + '\n') def log_start(self, simulation_id: str, graph_id: str, simulation_requirement: str): - """记录报告生成开始""" + """RelatórioGerar""" self.log( action="report_start", stage="pending", @@ -111,7 +123,7 @@ def log_start(self, simulation_id: str, graph_id: str, simulation_requirement: s ) def log_planning_start(self): - """记录大纲规划开始""" + """Outline""" self.log( action="planning_start", stage="planning", @@ -119,7 +131,7 @@ def log_planning_start(self): ) def log_planning_context(self, context: Dict[str, Any]): - """记录规划时获取的上下文信息""" + """""" self.log( action="planning_context", stage="planning", @@ -130,7 +142,7 @@ def log_planning_context(self, context: Dict[str, Any]): ) def log_planning_complete(self, outline_dict: Dict[str, Any]): - """记录大纲规划完成""" + """Outline""" self.log( action="planning_complete", stage="planning", @@ -141,7 +153,7 @@ def log_planning_complete(self, outline_dict: Dict[str, Any]): ) def log_section_start(self, section_title: str, section_index: int): - """记录章节生成开始""" + """SeçãoGerar""" self.log( action="section_start", stage="generating", @@ -151,7 +163,7 @@ def log_section_start(self, section_title: str, section_index: int): ) def log_react_thought(self, section_title: str, section_index: int, iteration: int, thought: str): - """记录 ReACT 思考过程""" + """Registrar processo de raciocínio ReACT""" self.log( action="react_thought", stage="generating", @@ -172,7 +184,7 @@ def log_tool_call( parameters: Dict[str, Any], iteration: int ): - """记录工具调用""" + """Registrar chamada de ferramenta""" self.log( action="tool_call", stage="generating", @@ -194,7 +206,7 @@ def log_tool_result( result: str, iteration: int ): - """记录工具调用结果(完整内容,不截断)""" + """Registrar chamada de ferramentaResultado(Conteúdo completo, sem truncar)""" self.log( action="tool_result", stage="generating", @@ -203,7 +215,7 @@ def log_tool_result( details={ "iteration": iteration, "tool_name": tool_name, - "result": result, # 完整结果,不截断 + "result": result, # Resultado "result_length": len(result), "message": t('report.toolResult', toolName=tool_name) } @@ -218,7 +230,7 @@ def log_llm_response( has_tool_calls: bool, has_final_answer: bool ): - """记录 LLM 响应(完整内容,不截断)""" + """ LLM Conteúdo completo, sem truncar""" self.log( action="llm_response", stage="generating", @@ -226,7 +238,7 @@ def log_llm_response( section_index=section_index, details={ "iteration": iteration, - "response": response, # 完整响应,不截断 + "response": response, # "response_length": len(response), "has_tool_calls": has_tool_calls, "has_final_answer": has_final_answer, @@ -241,14 +253,14 @@ def log_section_content( content: str, tool_calls_count: int ): - """记录章节内容生成完成(仅记录内容,不代表整个章节完成)""" + """SeçãoConteúdoGerarConteúdoSeção""" self.log( action="section_content", stage="generating", section_title=section_title, section_index=section_index, details={ - "content": content, # 完整内容,不截断 + "content": content, # Conteúdo completo, sem truncar "content_length": len(content), "tool_calls_count": tool_calls_count, "message": t('report.sectionContentDone', title=section_title) @@ -262,9 +274,9 @@ def log_section_full_complete( full_content: str ): """ - 记录章节生成完成 + SeçãoGerar - 前端应监听此日志来判断一个章节是否真正完成,并获取完整内容 + SeçãoConteúdo """ self.log( action="section_complete", @@ -279,7 +291,7 @@ def log_section_full_complete( ) def log_report_complete(self, total_sections: int, total_time_seconds: float): - """记录报告生成完成""" + """RelatórioGerar""" self.log( action="report_complete", stage="completed", @@ -291,7 +303,7 @@ def log_report_complete(self, total_sections: int, total_time_seconds: float): ) def log_error(self, error_message: str, stage: str, section_title: str = None): - """记录错误""" + """""" self.log( action="error", stage=stage, @@ -306,18 +318,17 @@ def log_error(self, error_message: str, stage: str, section_title: str = None): class ReportConsoleLogger: """ - Report Agent 控制台日志记录器 + Report Agent - 将控制台风格的日志(INFO、WARNING等)写入报告文件夹中的 console_log.txt 文件。 - 这些日志与 agent_log.jsonl 不同,是纯文本格式的控制台输出。 + INFOWARNINGRelatórioPasta console_log.txt + agent_log.jsonl """ def __init__(self, report_id: str): """ - 初始化控制台日志记录器 Args: - report_id: 报告ID,用于确定日志文件路径 + report_id: ID do relatório, para determinar o caminho do log """ self.report_id = report_id self.log_file_path = os.path.join( @@ -328,15 +339,14 @@ def __init__(self, report_id: str): self._setup_file_handler() def _ensure_log_file(self): - """确保日志文件所在目录存在""" + """""" log_dir = os.path.dirname(self.log_file_path) os.makedirs(log_dir, exist_ok=True) def _setup_file_handler(self): - """设置文件处理器,将日志同时写入文件""" + """""" import logging - # 创建文件处理器 self._file_handler = logging.FileHandler( self.log_file_path, mode='a', @@ -344,14 +354,13 @@ def _setup_file_handler(self): ) self._file_handler.setLevel(logging.INFO) - # 使用与控制台相同的简洁格式 formatter = logging.Formatter( '[%(asctime)s] %(levelname)s: %(message)s', datefmt='%H:%M:%S' ) self._file_handler.setFormatter(formatter) - # 添加到 report_agent 相关的 logger + # report_agent logger loggers_to_attach = [ 'mirofish.report_agent', 'mirofish.zep_tools', @@ -359,12 +368,11 @@ def _setup_file_handler(self): for logger_name in loggers_to_attach: target_logger = logging.getLogger(logger_name) - # 避免重复添加 if self._file_handler not in target_logger.handlers: target_logger.addHandler(self._file_handler) def close(self): - """关闭文件处理器并从 logger 中移除""" + """ logger """ import logging if self._file_handler: @@ -382,12 +390,12 @@ def close(self): self._file_handler = None def __del__(self): - """析构时确保关闭文件处理器""" + """""" self.close() class ReportStatus(str, Enum): - """报告状态""" + """Status do relatório""" PENDING = "pending" PLANNING = "planning" GENERATING = "generating" @@ -397,7 +405,7 @@ class ReportStatus(str, Enum): @dataclass class ReportSection: - """报告章节""" + """RelatórioSeção""" title: str content: str = "" @@ -408,7 +416,7 @@ def to_dict(self) -> Dict[str, Any]: } def to_markdown(self, level: int = 2) -> str: - """转换为Markdown格式""" + """Markdown""" md = f"{'#' * level} {self.title}\n\n" if self.content: md += f"{self.content}\n\n" @@ -417,7 +425,7 @@ def to_markdown(self, level: int = 2) -> str: @dataclass class ReportOutline: - """报告大纲""" + """Outline do relatório""" title: str summary: str sections: List[ReportSection] @@ -430,7 +438,7 @@ def to_dict(self) -> Dict[str, Any]: } def to_markdown(self) -> str: - """转换为Markdown格式""" + """Markdown""" md = f"# {self.title}\n\n" md += f"> {self.summary}\n\n" for section in self.sections: @@ -440,7 +448,7 @@ def to_markdown(self) -> str: @dataclass class Report: - """完整报告""" + """Relatório""" report_id: str simulation_id: str graph_id: str @@ -451,9 +459,10 @@ class Report: created_at: str = "" completed_at: str = "" error: Optional[str] = None + structured: Optional[Dict[str, Any]] = None # AUGUR v2: JSON estruturado def to_dict(self) -> Dict[str, Any]: - return { + d = { "report_id": self.report_id, "simulation_id": self.simulation_id, "graph_id": self.graph_id, @@ -465,420 +474,559 @@ def to_dict(self) -> Dict[str, Any]: "completed_at": self.completed_at, "error": self.error } + if self.structured: + d["structured"] = self.structured + return d # ═══════════════════════════════════════════════════════════════ -# Prompt 模板常量 +# Prompt # ═══════════════════════════════════════════════════════════════ -# ── 工具描述 ── +# ── Ferramenta ── TOOL_DESC_INSIGHT_FORGE = """\ -【深度洞察检索 - 强大的检索工具】 -这是我们强大的检索函数,专为深度分析设计。它会: -1. 自动将你的问题分解为多个子问题 -2. 从多个维度检索模拟图谱中的信息 -3. 整合语义搜索、实体分析、关系链追踪的结果 -4. 返回最全面、最深度的检索内容 - -【使用场景】 -- 需要深入分析某个话题 -- 需要了解事件的多个方面 -- 需要获取支撑报告章节的丰富素材 - -【返回内容】 -- 相关事实原文(可直接引用) -- 核心实体洞察 -- 关系链分析""" +【Busca de Insights Profundos - Ferramenta poderosa de busca】 +Função de busca poderosa, projetada para análise profunda. Ela: +1. Decompõe automaticamente sua pergunta em sub-questões +2. Busca informações no grafo da simulação de múltiplas dimensões +3. Integra resultados de busca semântica, análise de entidades e rastreamento de relações +4. Retorna o conteúdo mais abrangente e profundo + +【Quando usar】 +- Análise profunda de um tema específico +- Entender múltiplos aspectos de um evento +- Obter material rico para sustentar seções do relatório + +【Conteúdo retornado】 +- Fatos originais relevantes (citáveis diretamente) +- Insights de entidades centrais +- Análise de cadeias de relações""" TOOL_DESC_PANORAMA_SEARCH = """\ -【广度搜索 - 获取全貌视图】 -这个工具用于获取模拟结果的完整全貌,特别适合了解事件演变过程。它会: -1. 获取所有相关节点和关系 -2. 区分当前有效的事实和历史/过期的事实 -3. 帮助你了解舆情是如何演变的 - -【使用场景】 -- 需要了解事件的完整发展脉络 -- 需要对比不同阶段的舆情变化 -- 需要获取全面的实体和关系信息 - -【返回内容】 -- 当前有效事实(模拟最新结果) -- 历史/过期事实(演变记录) -- 所有涉及的实体""" +【Busca Panorâmica - Visão completa】 +Ferramenta para obter a visão completa dos resultados da simulação, ideal para entender a evolução dos eventos. Ela: +1. Obtém todos os nós e relações relevantes +2. Distingue fatos atuais válidos de fatos históricos/expirados +3. Ajuda a entender como a dinâmica evoluiu + +【Quando usar】 +- Entender a linha completa de desenvolvimento dos eventos +- Comparar mudanças entre diferentes fases da simulação +- Obter informações completas de entidades e relações + +【Conteúdo retornado】 +- Fatos atuais válidos (resultado mais recente da simulação) +- Fatos históricos/expirados (registro de evolução) +- Todas as entidades envolvidas""" TOOL_DESC_QUICK_SEARCH = """\ -【简单搜索 - 快速检索】 -轻量级的快速检索工具,适合简单、直接的信息查询。 +【Busca Rápida - Consulta simples】 +Ferramenta leve de busca rápida, ideal para consultas simples e diretas. -【使用场景】 -- 需要快速查找某个具体信息 -- 需要验证某个事实 -- 简单的信息检索 +【Quando usar】 +- Buscar rapidamente uma informação específica +- Verificar um fato +- Consultas simples de informação -【返回内容】 -- 与查询最相关的事实列表""" +【Conteúdo retornado】 +- Lista de fatos mais relevantes para a consulta""" TOOL_DESC_INTERVIEW_AGENTS = """\ -【深度采访 - 真实Agent采访(双平台)】 -调用OASIS模拟环境的采访API,对正在运行的模拟Agent进行真实采访! -这不是LLM模拟,而是调用真实的采访接口获取模拟Agent的原始回答。 -默认在Twitter和Reddit两个平台同时采访,获取更全面的观点。 - -功能流程: -1. 自动读取人设文件,了解所有模拟Agent -2. 智能选择与采访主题最相关的Agent(如学生、媒体、官方等) -3. 自动生成采访问题 -4. 调用 /api/simulation/interview/batch 接口在双平台进行真实采访 -5. 整合所有采访结果,提供多视角分析 - -【使用场景】 -- 需要从不同角色视角了解事件看法(学生怎么看?媒体怎么看?官方怎么说?) -- 需要收集多方意见和立场 -- 需要获取模拟Agent的真实回答(来自OASIS模拟环境) -- 想让报告更生动,包含"采访实录" - -【返回内容】 -- 被采访Agent的身份信息 -- 各Agent在Twitter和Reddit两个平台的采访回答 -- 关键引言(可直接引用) -- 采访摘要和观点对比 - -【重要】需要OASIS模拟环境正在运行才能使用此功能!""" - -# ── 大纲规划 prompt ── +【Entrevista Profunda - Entrevista real com Agentes (dual plataforma)】 +Chama a API de entrevista do ambiente de simulação OASIS para entrevistar Agentes reais! +Não é simulação por LLM — são chamadas reais à interface de entrevista para obter respostas originais dos Agentes. +Por padrão entrevista em Twitter e Reddit simultaneamente para perspectivas mais completas. + +Fluxo: +1. Lê automaticamente os perfis para conhecer todos os Agentes simulados +2. Seleciona inteligentemente os Agentes mais relevantes para o tema da entrevista +3. Gera automaticamente as perguntas da entrevista +4. Chama /api/simulation/interview/batch para entrevista real em duas plataformas +5. Integra todos os resultados, fornecendo análise multi-perspectiva + +【Quando usar】 +- Entender visões de diferentes papéis sobre o evento (o que consumidores pensam? empresários? analistas?) +- Coletar opiniões e posições de múltiplas partes +- Obter respostas reais dos Agentes simulados (do ambiente OASIS) +- Tornar o relatório mais vivo, incluindo "registros de entrevista" + +【Conteúdo retornado】 +- Informações de identidade dos Agentes entrevistados +- Respostas de cada Agente nas plataformas Twitter e Reddit +- Citações-chave (citáveis diretamente) +- Resumo da entrevista e comparação de pontos de vista + +【IMPORTANTE】O ambiente de simulação OASIS precisa estar rodando para usar esta funcionalidade!""" + +# ── Prompt de planejamento do outline ── PLAN_SYSTEM_PROMPT = """\ -你是一个「未来预测报告」的撰写专家,拥有对模拟世界的「上帝视角」——你可以洞察模拟中每一位Agent的行为、言论和互动。 - -【核心理念】 -我们构建了一个模拟世界,并向其中注入了特定的「模拟需求」作为变量。模拟世界的演化结果,就是对未来可能发生情况的预测。你正在观察的不是"实验数据",而是"未来的预演"。 - -【你的任务】 -撰写一份「未来预测报告」,回答: -1. 在我们设定的条件下,未来发生了什么? -2. 各类Agent(人群)是如何反应和行动? -3. 这个模拟揭示了哪些值得关注的未来趋势和风险? - -【报告定位】 -- ✅ 这是一份基于模拟的未来预测报告,揭示"如果这样,未来会怎样" -- ✅ 聚焦于预测结果:事件走向、群体反应、涌现现象、潜在风险 -- ✅ 模拟世界中的Agent言行就是对未来人群行为的预测 -- ❌ 不是对现实世界现状的分析 -- ❌ 不是泛泛而谈的舆情综述 - -【章节数量限制】 -- 最少2个章节,最多5个章节 -- 不需要子章节,每个章节直接撰写完整内容 -- 内容要精炼,聚焦于核心预测发现 -- 章节结构由你根据预测结果自主设计 - -请输出JSON格式的报告大纲,格式如下: +Você é o estrategista-chefe do AUGUR — a mais avançada plataforma de previsão de mercado por IA do mundo. +Você gera relatórios que são lidos por CEOs, diretores, investidores e conselhos de administração. +Seu relatório deve ser TÃO BOM que substitua uma consultoria de R$200.000. + +══════════════════════════════════════════════════════════════ +【FILOSOFIA — CONSELHO MUNDIAL AUGUR】 +══════════════════════════════════════════════════════════════ + +STEVE JOBS: "O relatório conta uma HISTÓRIA, não lista fatos." +JEFF BEZOS: "Comece pelo resultado. O que o cliente FAZ com isso?" +JENSEN HUANG: "Extraia dados QUANTITATIVOS, não apenas opiniões." +ROBERTO JUSTUS: "Venda o RESULTADO, não a tecnologia." +BERNARD ARNAULT: "Meça PERCEPÇÃO, não apenas sentimento." +JACK WELCH: "Ranking forçado. A recomendação #1 é a única que importa." + +══════════════════════════════════════════════════════════════ +【ESTRUTURA DO RELATÓRIO — 11 SEÇÕES OBRIGATÓRIAS】 +══════════════════════════════════════════════════════════════ + +1. **Resumo Executivo** — A seção mais importante. Deve conter: + - 🟢🟡🔴 VEREDICTO: "RECOMENDAÇÃO: LANÇAR / AJUSTAR ANTES / NÃO LANÇAR" + - UMA FRASE de decisão (máx 25 palavras) + - Índice de confiança: XX% (± margem) + - 5 KPIs-chave em formato: "Nome: Valor ↑/→/↓ (benchmark: referência)" + - Resumo de 30 segundos (máx 100 palavras) para CEO que não lerá o resto + +2. **Cenários Futuros** — 3 cenários obrigatórios: + - **Cenário Otimista**: probabilidade %, impacto FINANCEIRO estimado, descrição + - **Cenário Realista**: probabilidade %, impacto FINANCEIRO estimado, descrição + - **Cenário Pessimista**: probabilidade %, impacto FINANCEIRO estimado, descrição + - Probabilidades DEVEM somar 100% + - Cada cenário deve ter: impacto em receita, impacto em reputação, timeline + +3. **Fatores de Risco** — 3-5 riscos com STACK RANKING forçado: + - Risco #1 é o MAIS CRÍTICO (deve ser resolvido primeiro) + - Cada risco: **Nome** + descrição + probabilidade % + impacto (Alto/Médio/Baixo) + - Incluir risco regulatório se aplicável (PROCON, ANVISA, LGPD, BACEN) + +4. **Análise Emocional** — NÃO apenas positivo/negativo. Mapear: + - Emoções específicas: empolgação, confiança, ceticismo, medo, FOMO, indiferença + - % de cada emoção por grupo de agentes + - A emoção DOMINANTE e o que ela significa para o lançamento + - Citações de agentes que ilustrem cada emoção + +5. **Mapa de Forças** — Poder e influência: + - Agentes mais influentes (por nome) e por quê + - Clusters/coalizões formados + - Tensões e alianças + - Quem são os "early adopters" vs "resistentes" + +6. **Cronologia da Simulação** — Narrativa temporal estilo NETFLIX: + - Conte como HISTÓRIA: "Tudo começou na rodada 1 quando..." + - Identifique o PONTO DE VIRADA (rodada em que tudo mudou) + - Use drama e tensão narrativa, mantendo rigor factual + +7. **Padrões Emergentes** — Comportamentos orgânicos surpreendentes: + - Padrões que NINGUÉM esperava + - Comportamentos contra-intuitivos + - Tendências que se consolidaram vs que morreram + +8. **Hipóteses Causais** — Causa e efeito com evidências: + - 3-5 hipóteses sobre "por quê aconteceu X" + - Evidências a favor e contra (citando agentes) + - Nível de confiança: Alta/Média/Baixa + +9. **Estratégia de Comunicação** — Mensagem por segmento: + - Para cada grupo identificado: mensagem recomendada + - Formato: Headline + Subhead + Proof Point + - Canais recomendados com prioridade + - Tom de voz recomendado + +10. **Recomendações Estratégicas** — STACK RANKING forçado: + - Recomendação #1: A ÚNICA que move a agulha (Urgência + Prazo + Impacto) + - Recomendação #2-3: Complementares + - Recomendação #4-5: Se sobrar recursos + - Cada uma com: **Nome** + descrição + urgência + prazo + impacto estimado + +11. **Previsões com Intervalo de Confiança** — 3 previsões: + - Previsão + data + probabilidade ± margem + - Ex: "Até Ago/2027, 60% ± 12% de adoção pelo público-alvo" + +12. **Posicionamento Percebido vs Desejado** — Gap de percepção: + - Como os agentes PERCEBEM o produto/marca vs como o cliente quer ser percebido + - O GAP entre percepção real e desejada é o insight central + - Palavras-chave associadas à marca pelos agentes + - Recomendação para fechar o gap de posicionamento + +13. **Valor da Análise** — ROI desta simulação: + - "Esta simulação custou R$X. Um focus group equivalente custaria R$50.000." + - "Uma decisão errada de lançamento pode custar R$500.000+." + - Riscos financeiros evitados com as descobertas deste relatório + - Próximos passos concretos que justificam o investimento + +══════════════════════════════════════════════════════════════ +【REGRAS DE QUALIDADE — EXIGÊNCIAS DO CONSELHO】 +══════════════════════════════════════════════════════════════ + +CONTEÚDO: +- Cada afirmação DEVE citar dados da simulação (% de agentes, rodada, nome do agente) +- NUNCA escreva genérico. Cada frase deve ter EVIDÊNCIA da simulação. +- O Resumo Executivo é lido em 30 segundos. Os outros 10% lêem o resto. +- Use COMPARAÇÕES sempre que possível ("72% vs média de 45%") +- Inclua citações diretas de agentes: > "O que o agente falou..." + +FORMATAÇÃO PARA PARSING: +- Cenários: "**Nome do Cenário**" em negrito, depois texto, depois "Probabilidade: XX%" +- Riscos: "**Nome do Risco**" em negrito, "Probabilidade de ocorrência: XX%", "Impacto: Alto/Médio/Baixo" +- Recomendações: "**#N Nome**" em negrito, "Urgência: X", "Prazo: Próximos X meses" +- NUNCA use títulos Markdown (##) dentro das seções — o sistema adiciona automaticamente + +IDIOMA: +- 100% português do Brasil, profissional e acessível +- Dados em chinês ou inglês → TRADUZA +- ZERO caracteres chineses permitidos + +Retorne JSON com o formato: { - "title": "报告标题", - "summary": "报告摘要(一句话概括核心预测发现)", + "title": "Relatório de Previsão: [tema] — [veredicto GO/NO-GO/AJUSTAR]", + "summary": "VEREDICTO: [GO/NO-GO/AJUSTAR]. [Uma frase de decisão em 25 palavras]", "sections": [ - { - "title": "章节标题", - "description": "章节内容描述" - } + {"title": "Nome da seção", "description": "Instruções detalhadas para esta seção"} ] } -注意:sections数组最少2个,最多5个元素!""" +O array sections DEVE ter EXATAMENTE 13 elementos, um para cada seção acima. +""" PLAN_USER_PROMPT_TEMPLATE = """\ -【预测场景设定】 -我们向模拟世界注入的变量(模拟需求):{simulation_requirement} +【CENÁRIO DA SIMULAÇÃO】 +Hipótese testada: {simulation_requirement} -【模拟世界规模】 -- 参与模拟的实体数量: {total_nodes} -- 实体间产生的关系数量: {total_edges} -- 实体类型分布: {entity_types} -- 活跃Agent数量: {total_entities} +【ESCALA DA SIMULAÇÃO】 +- Entidades simuladas: {total_nodes} +- Relacionamentos gerados: {total_edges} +- Tipos de entidades: {entity_types} +- Agentes ativos: {total_entities} -【模拟预测到的部分未来事实样本】 +【AMOSTRA DE FATOS SIMULADOS】 {related_facts_json} -请以「上帝视角」审视这个未来预演: -1. 在我们设定的条件下,未来呈现出了什么样的状态? -2. 各类人群(Agent)是如何反应和行动的? -3. 这个模拟揭示了哪些值得关注的未来趋势? +Com base nesses dados, elabore o plano completo do relatório com as 13 seções obrigatórias. + +PARA CADA SEÇÃO, a descrição deve especificar: +- Quais aspectos da simulação explorar +- Quais agentes/entidades mencionar por nome +- Quais dados quantitativos extrair +- Que perguntas a seção deve responder -根据预测结果,设计最合适的报告章节结构。 +O Resumo Executivo DEVE incluir 3-5 KPIs extraídos dos fatos simulados. +Os Cenários DEVEM ter probabilidades que somem 100%. +Os Riscos DEVEM ter probabilidades individuais e níveis de impacto. +As Recomendações DEVEM ter urgência e prazo. -【再次提醒】报告章节数量:最少2个,最多5个,内容要精炼聚焦于核心预测发现。""" +⚠️ REGRA ABSOLUTA: Relatório INTEIRO em PORTUGUÊS DO BRASIL. +Dados em chinês → TRADUZA. NENHUM caractere chinês permitido. VIOLAÇÃO = INVÁLIDO. +Seja ESPECÍFICO — mencione nomes de entidades e dados dos fatos simulados.""" -# ── 章节生成 prompt ── +# ── Prompt de geração de seções ── SECTION_SYSTEM_PROMPT_TEMPLATE = """\ -你是一个「未来预测报告」的撰写专家,正在撰写报告的一个章节。 +Você é um especialista em redação de RELATÓRIOS DE PREVISÃO, escrevendo uma seção do relatório. -报告标题: {report_title} -报告摘要: {report_summary} -预测场景(模拟需求): {simulation_requirement} +Título do relatório: {report_title} +Resumo do relatório: {report_summary} +Cenário de previsão (requisito da simulação): {simulation_requirement} -当前要撰写的章节: {section_title} +Seção a ser redigida: {section_title} ═══════════════════════════════════════════════════════════════ -【核心理念】 +【CONCEITO CENTRAL】 ═══════════════════════════════════════════════════════════════ -模拟世界是对未来的预演。我们向模拟世界注入了特定条件(模拟需求), -模拟中Agent的行为和互动,就是对未来人群行为的预测。 +O mundo simulado é um ensaio do futuro. Injetamos condições específicas (requisito da simulação), +e o comportamento e interações dos Agentes representam previsões do comportamento futuro dos grupos. -你的任务是: -- 揭示在设定条件下,未来发生了什么 -- 预测各类人群(Agent)是如何反应和行动的 -- 发现值得关注的未来趋势、风险和机会 +Sua tarefa é: +- Revelar o que aconteceu no futuro sob as condições definidas +- Prever como cada tipo de grupo (Agente) reagiu e agiu +- Identificar tendências, riscos e oportunidades futuras relevantes -❌ 不要写成对现实世界现状的分析 -✅ 要聚焦于"未来会怎样"——模拟结果就是预测的未来 +❌ NÃO escreva como uma análise da situação atual do mundo real +✅ Foque em "o que vai acontecer" — os resultados da simulação SÃO o futuro previsto ═══════════════════════════════════════════════════════════════ -【最重要的规则 - 必须遵守】 +【REGRAS MAIS IMPORTANTES - OBRIGATÓRIAS】 ═══════════════════════════════════════════════════════════════ -1. 【必须调用工具观察模拟世界】 - - 你正在以「上帝视角」观察未来的预演 - - 所有内容必须来自模拟世界中发生的事件和Agent言行 - - 禁止使用你自己的知识来编写报告内容 - - 每个章节至少调用3次工具(最多5次)来观察模拟的世界,它代表了未来 - -2. 【必须引用Agent的原始言行】 - - Agent的发言和行为是对未来人群行为的预测 - - 在报告中使用引用格式展示这些预测,例如: - > "某类人群会表示:原文内容..." - - 这些引用是模拟预测的核心证据 - -3. 【语言一致性 - 引用内容必须翻译为报告语言】 - - 工具返回的内容可能包含与报告语言不同的表述 - - 报告必须全部使用与用户指定语言一致的语言撰写 - - 当你引用工具返回的其他语言内容时,必须将其翻译为报告语言后再写入 - - 翻译时保持原意不变,确保表述自然通顺 - - 这一规则同时适用于正文和引用块(> 格式)中的内容 - -4. 【忠实呈现预测结果】 - - 报告内容必须反映模拟世界中的代表未来的模拟结果 - - 不要添加模拟中不存在的信息 - - 如果某方面信息不足,如实说明 +1. 【OBRIGATÓRIO usar ferramentas para observar o mundo simulado】 + - Você está observando o ensaio do futuro com "visão de deus" + - Todo o conteúdo DEVE vir de eventos e falas dos Agentes no mundo simulado + - PROIBIDO usar seu próprio conhecimento para escrever o conteúdo do relatório + - Cada seção deve chamar ferramentas pelo menos 3 vezes (máximo 5) para observar o mundo simulado + +2. 【OBRIGATÓRIO citar falas e ações originais dos Agentes】 + - As falas e ações dos Agentes são previsões do comportamento futuro dos grupos + - Use formato de citação no relatório para exibir essas previsões, por exemplo: + > "Determinado grupo diria: conteúdo original..." + - Essas citações são a evidência central das previsões da simulação + +3. 【Consistência de idioma - citações DEVEM ser traduzidas para o idioma do relatório】 + - O conteúdo retornado pelas ferramentas pode estar em outro idioma + - O relatório DEVE ser escrito INTEIRAMENTE em português do Brasil + - Ao citar conteúdo em outro idioma retornado pelas ferramentas, traduza para português antes de incluir + - Mantenha o significado original ao traduzir, garantindo naturalidade + - Esta regra se aplica tanto ao texto corrido quanto aos blocos de citação (formato >) + +4. 【Apresentar fielmente os resultados da previsão】 + - O conteúdo do relatório deve refletir os resultados simulados que representam o futuro + - NÃO adicione informações que não existem na simulação + - Se houver informação insuficiente em algum aspecto, declare isso honestamente + +5. 【QUALIDADE NÍVEL CONSELHO DE ADMINISTRAÇÃO】 + - Escreva como se estivesse apresentando para o board da empresa — cada palavra tem peso + - Use dados QUANTITATIVOS da simulação: "72% dos agentes", "na rodada 4", "o grupo X reagiu com Y" + - Inclua COMPARAÇÕES: "72% positivo vs benchmark médio de 45% para o setor" + - Citações diretas de agentes traduzidas: > "Citação do agente traduzida..." + + FORMATAÇÃO POR TIPO DE SEÇÃO: + - Resumo Executivo: começar com "VEREDICTO: LANÇAR/AJUSTAR/NÃO LANÇAR" + frase de 25 palavras + 5 KPIs + - Cenários: "**Nome do Cenário**" em negrito + texto + "Probabilidade: XX%" + impacto financeiro + - Riscos: "**#N Nome do Risco**" (numerado por prioridade) + "Probabilidade: XX%" + "Impacto: Alto/Médio/Baixo" + - Análise Emocional: listar emoções com % (empolgação, confiança, ceticismo, medo, FOMO, indiferença) + - Mapa de Forças: NOMEIE agentes, descreva coalizões e tensões, identifique early adopters vs resistentes + - Cronologia: conte como NARRATIVA ("Tudo começou quando..." com ponto de virada) + - Padrões: foque no SURPREENDENTE e contra-intuitivo, não no óbvio + - Hipóteses: evidências a favor E contra, citando agentes específicos + - Comunicação: para cada segmento: headline + subhead + proof point + canal recomendado + - Recomendações: STACK RANKING — #1 é a única que importa, o resto é complementar + - Previsões: data + probabilidade ± margem de erro + + REGRA DE OURO: Se um CEO lê APENAS o Resumo Executivo (30 segundos), ele deve conseguir tomar a decisão. + NUNCA escreva genérico — cada frase DEVE ter evidência concreta da simulação. ═══════════════════════════════════════════════════════════════ -【⚠️ 格式规范 - 极其重要!】 +【⚠️⚠️⚠️ REGRA ABSOLUTA DE IDIOMA — NÃO VIOLAR】 ═══════════════════════════════════════════════════════════════ -【一个章节 = 最小内容单位】 -- 每个章节是报告的最小分块单位 -- ❌ 禁止在章节内使用任何 Markdown 标题(#、##、###、#### 等) -- ❌ 禁止在内容开头添加章节主标题 -- ✅ 章节标题由系统自动添加,你只需撰写纯正文内容 -- ✅ 使用**粗体**、段落分隔、引用、列表来组织内容,但不要用标题 +ESCREVA 100% EM PORTUGUÊS DO BRASIL. +- Dados em chinês das ferramentas → TRADUZA para PT-BR +- Dados em inglês → TRADUZA para PT-BR +- NENHUM caractere em idioma diferente de portugues permitido no output +- Citações de agentes DEVEM ser traduzidas +- VIOLAÇÃO = RELATÓRIO INVÁLIDO -【正确示例】 +═══════════════════════════════════════════════════════════════ +【⚠️ REGRAS DE FORMATAÇÃO - EXTREMAMENTE IMPORTANTE!】 +═══════════════════════════════════════════════════════════════ + +【Uma seção = unidade mínima de conteúdo】 +- Cada seção é a menor unidade do relatório +- ❌ PROIBIDO usar qualquer título Markdown (#, ##, ###, #### etc.) dentro da seção +- ❌ PROIBIDO adicionar o título da seção no início do conteúdo +- ✅ O título da seção é adicionado automaticamente pelo sistema — escreva apenas o texto corrido +- ✅ Use **negrito**, separação de parágrafos, citações e listas para organizar, mas NÃO use títulos + +【Exemplo CORRETO】 ``` -本章节分析了事件的舆论传播态势。通过对模拟数据的深入分析,我们发现... +Esta seção analisa a dinâmica de propagação na opinião pública. Através da análise profunda dos dados simulados, descobrimos... -**首发引爆阶段** +**Fase de ignição inicial** -微博作为舆情的第一现场,承担了信息首发的核心功能: +As redes sociais funcionaram como palco principal da disseminação inicial: -> "微博贡献了68%的首发声量..." +> "As redes contribuíram com 68% do volume inicial de menções..." -**情绪放大阶段** +**Fase de amplificação emocional** -抖音平台进一步放大了事件影响力: +As plataformas de vídeo curto amplificaram o impacto do evento: -- 视觉冲击力强 -- 情绪共鸣度高 +- Forte impacto visual +- Alto grau de ressonância emocional ``` -【错误示例】 +【Exemplo ERRADO】 ``` -## 执行摘要 ← 错误!不要添加任何标题 -### 一、首发阶段 ← 错误!不要用###分小节 -#### 1.1 详细分析 ← 错误!不要用####细分 +## Resumo Executivo ← ERRADO! Não adicione títulos +### 1. Fase Inicial ← ERRADO! Não use ### para subseções +#### 1.1 Análise Detalhada ← ERRADO! Não use #### para detalhar -本章节分析了... +Esta seção analisa... ``` ═══════════════════════════════════════════════════════════════ -【可用检索工具】(每章节调用3-5次) +【FERRAMENTAS DE BUSCA DISPONÍVEIS】(chamar 3-5 vezes por seção) ═══════════════════════════════════════════════════════════════ {tools_description} -【工具使用建议 - 请混合使用不同工具,不要只用一种】 -- insight_forge: 深度洞察分析,自动分解问题并多维度检索事实和关系 -- panorama_search: 广角全景搜索,了解事件全貌、时间线和演变过程 -- quick_search: 快速验证某个具体信息点 -- interview_agents: 采访模拟Agent,获取不同角色的第一人称观点和真实反应 +【Sugestões de uso — combine diferentes ferramentas, não use apenas uma】 +- insight_forge: Análise de insights profundos, decompõe a questão e busca fatos e relações multidimensionais +- panorama_search: Busca panorâmica ampla, para entender o panorama geral, timeline e evolução dos eventos +- quick_search: Verificação rápida de um ponto de informação específico +- interview_agents: Entrevistar Agentes simulados, obter perspectivas em primeira pessoa e reações reais de diferentes papéis ═══════════════════════════════════════════════════════════════ -【工作流程】 +【FLUXO DE TRABALHO】 ═══════════════════════════════════════════════════════════════ -每次回复你只能做以下两件事之一(不可同时做): +Em cada resposta, você pode fazer APENAS uma das duas coisas (nunca as duas ao mesmo tempo): -选项A - 调用工具: -输出你的思考,然后用以下格式调用一个工具: +Opção A - Chamar ferramenta: +Escreva seu raciocínio, depois chame uma ferramenta no seguinte formato: -{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}} +{{"name": "nome_da_ferramenta", "parameters": {{"nome_param": "valor_param"}}}} -系统会执行工具并把结果返回给你。你不需要也不能自己编写工具返回结果。 +O sistema executará a ferramenta e retornará o resultado. Você NÃO precisa e NÃO pode escrever o resultado da ferramenta. -选项B - 输出最终内容: -当你已通过工具获取了足够信息,以 "Final Answer:" 开头输出章节内容。 +Opção B - Gerar conteúdo final: +Quando tiver informações suficientes das ferramentas, comece com "Final Answer:" e escreva o conteúdo da seção. -⚠️ 严格禁止: -- 禁止在一次回复中同时包含工具调用和 Final Answer -- 禁止自己编造工具返回结果(Observation),所有工具结果由系统注入 -- 每次回复最多调用一个工具 +⚠️ ESTRITAMENTE PROIBIDO: +- Proibido incluir chamada de ferramenta E Final Answer na mesma resposta +- Proibido inventar resultados de ferramentas (Observation) — todos os resultados são injetados pelo sistema +- Máximo de uma chamada de ferramenta por resposta ═══════════════════════════════════════════════════════════════ -【章节内容要求】 +【REQUISITOS DO CONTEÚDO DA SEÇÃO】 ═══════════════════════════════════════════════════════════════ -1. 内容必须基于工具检索到的模拟数据 -2. 大量引用原文来展示模拟效果 -3. 使用Markdown格式(但禁止使用标题): - - 使用 **粗体文字** 标记重点(代替子标题) - - 使用列表(-或1.2.3.)组织要点 - - 使用空行分隔不同段落 - - ❌ 禁止使用 #、##、###、#### 等任何标题语法 -4. 【引用格式规范 - 必须单独成段】 - 引用必须独立成段,前后各有一个空行,不能混在段落中: - - ✅ 正确格式: +1. O conteúdo DEVE ser baseado nos dados da simulação obtidos pelas ferramentas +2. Cite abundantemente o texto original para demonstrar os efeitos da simulação +3. Use formato Markdown (mas PROIBIDO usar títulos): + - Use **texto em negrito** para destacar pontos importantes (substitua subtítulos) + - Use listas (- ou 1.2.3.) para organizar pontos + - Use linhas em branco para separar parágrafos + - ❌ PROIBIDO usar #, ##, ###, #### ou qualquer sintaxe de título +4. 【Formato de citação - DEVE ser parágrafo independente】 + Citações devem ser parágrafos independentes, com uma linha em branco antes e depois: + + ✅ Formato correto: ``` - 校方的回应被认为缺乏实质内容。 + A resposta da empresa foi considerada insuficiente. - > "校方的应对模式在瞬息万变的社交媒体环境中显得僵化和迟缓。" + > "O padrão de resposta da empresa mostrou-se rígido e lento no ambiente dinâmico das redes sociais." - 这一评价反映了公众的普遍不满。 + Esta avaliação reflete a insatisfação geral do público. ``` - ❌ 错误格式: + ❌ Formato errado: ``` - 校方的回应被认为缺乏实质内容。> "校方的应对模式..." 这一评价反映了... + A resposta foi insuficiente.> "O padrão de resposta..." Esta avaliação reflete... ``` -5. 保持与其他章节的逻辑连贯性 -6. 【避免重复】仔细阅读下方已完成的章节内容,不要重复描述相同的信息 -7. 【再次强调】不要添加任何标题!用**粗体**代替小节标题""" +5. Mantenha coerência lógica com as demais seções +6. 【Evite repetição】Leia atentamente o conteúdo das seções já concluídas abaixo, NÃO repita as mesmas informações +7. 【REFORÇANDO】NÃO adicione nenhum título! Use **negrito** no lugar de subtítulos + +⚠️ REGRA ABSOLUTA: As ferramentas podem retornar dados em CHINÊS ou INGLÊS. +Você DEVE traduzir TUDO para PORTUGUÊS DO BRASIL antes de incluir no relatório. +NÃO copie texto em chinês — traduza SEMPRE. Se uma citação está em chinês, traduza-a.""" SECTION_USER_PROMPT_TEMPLATE = """\ -已完成的章节内容(请仔细阅读,避免重复): +Conteúdo das seções já concluídas (leia atentamente para evitar repetição): {previous_content} ═══════════════════════════════════════════════════════════════ -【当前任务】撰写章节: {section_title} +【TAREFA ATUAL】Redigir a seção: {section_title} ═══════════════════════════════════════════════════════════════ -【重要提醒】 -1. 仔细阅读上方已完成的章节,避免重复相同的内容! -2. 开始前必须先调用工具获取模拟数据 -3. 请混合使用不同工具,不要只用一种 -4. 报告内容必须来自检索结果,不要使用自己的知识 +【LEMBRETES IMPORTANTES】 +1. Leia atentamente as seções concluídas acima — NÃO repita o mesmo conteúdo! +2. Antes de começar, OBRIGATÓRIO chamar ferramentas para obter dados da simulação +3. Combine diferentes ferramentas — não use apenas uma +4. O conteúdo do relatório DEVE vir dos resultados da busca — NÃO use seu próprio conhecimento +5. Escreva TUDO em português do Brasil -【⚠️ 格式警告 - 必须遵守】 -- ❌ 不要写任何标题(#、##、###、####都不行) -- ❌ 不要写"{section_title}"作为开头 -- ✅ 章节标题由系统自动添加 -- ✅ 直接写正文,用**粗体**代替小节标题 +【⚠️ ALERTA DE FORMATAÇÃO - OBRIGATÓRIO】 +- ❌ NÃO escreva nenhum título (#, ##, ###, #### — NENHUM) +- ❌ NÃO escreva "{section_title}" como início +- ✅ O título da seção é adicionado automaticamente pelo sistema +- ✅ Escreva direto o texto corrido, use **negrito** no lugar de subtítulos -请开始: -1. 首先思考(Thought)这个章节需要什么信息 -2. 然后调用工具(Action)获取模拟数据 -3. 收集足够信息后输出 Final Answer(纯正文,无任何标题)""" +Comece: +1. Primeiro pense (Thought) em quais informações esta seção precisa +2. Depois chame uma ferramenta (Action) para obter dados da simulação +3. Após coletar informações suficientes, gere o Final Answer (texto corrido, sem nenhum título)""" -# ── ReACT 循环内消息模板 ── +# ── Templates de mensagem dentro do ciclo ReACT ── REACT_OBSERVATION_TEMPLATE = """\ -Observation(检索结果): +Observação (resultado da busca): -═══ 工具 {tool_name} 返回 ═══ +═══ Ferramenta {tool_name} retornou ═══ {result} ═══════════════════════════════════════════════════════════════ -已调用工具 {tool_calls_count}/{max_tool_calls} 次(已用: {used_tools_str}){unused_hint} -- 如果信息充分:以 "Final Answer:" 开头输出章节内容(必须引用上述原文) -- 如果需要更多信息:调用一个工具继续检索 +Ferramentas chamadas: {tool_calls_count}/{max_tool_calls} (usadas: {used_tools_str}){unused_hint} +- Se a informação é suficiente: comece com "Final Answer:" e escreva o conteúdo da seção (DEVE citar o texto original acima) +- Se precisa de mais informações: chame uma ferramenta para continuar buscando ═══════════════════════════════════════════════════════════════""" REACT_INSUFFICIENT_TOOLS_MSG = ( - "【注意】你只调用了{tool_calls_count}次工具,至少需要{min_tool_calls}次。" - "请再调用工具获取更多模拟数据,然后再输出 Final Answer。{unused_hint}" + "【ATENÇÃO】Você chamou ferramentas apenas {tool_calls_count} vezes — mínimo necessário: {min_tool_calls}. " + "Chame mais ferramentas para obter mais dados da simulação antes de escrever o Final Answer.{unused_hint}" ) REACT_INSUFFICIENT_TOOLS_MSG_ALT = ( - "当前只调用了 {tool_calls_count} 次工具,至少需要 {min_tool_calls} 次。" - "请调用工具获取模拟数据。{unused_hint}" + "Ferramentas chamadas apenas {tool_calls_count} vezes — mínimo necessário: {min_tool_calls}. " + "Chame ferramentas para obter dados da simulação.{unused_hint}" ) REACT_TOOL_LIMIT_MSG = ( - "工具调用次数已达上限({tool_calls_count}/{max_tool_calls}),不能再调用工具。" - '请立即基于已获取的信息,以 "Final Answer:" 开头输出章节内容。' + "Limite de chamadas de ferramentas atingido ({tool_calls_count}/{max_tool_calls}) — não é possível chamar mais. " + 'Agora escreva imediatamente o conteúdo da seção, começando com "Final Answer:".' ) -REACT_UNUSED_TOOLS_HINT = "\n💡 你还没有使用过: {unused_list},建议尝试不同工具获取多角度信息" +REACT_UNUSED_TOOLS_HINT = "\n💡 Você ainda não usou: {unused_list} — tente ferramentas diferentes para obter informações de múltiplos ângulos" -REACT_FORCE_FINAL_MSG = "已达到工具调用限制,请直接输出 Final Answer: 并生成章节内容。" +REACT_FORCE_FINAL_MSG = "Limite de chamadas atingido. Escreva diretamente o Final Answer: com o conteúdo da seção." -# ── Chat prompt ── +# ── Prompt de chat ── CHAT_SYSTEM_PROMPT_TEMPLATE = """\ -你是一个简洁高效的模拟预测助手。 +Você é um assistente de previsão simulada, conciso e eficiente. -【背景】 -预测条件: {simulation_requirement} +【CONTEXTO】 +Condição de previsão: {simulation_requirement} -【已生成的分析报告】 +【RELATÓRIO DE ANÁLISE GERADO】 {report_content} -【规则】 -1. 优先基于上述报告内容回答问题 -2. 直接回答问题,避免冗长的思考论述 -3. 仅在报告内容不足以回答时,才调用工具检索更多数据 -4. 回答要简洁、清晰、有条理 +【REGRAS】 +1. Priorize responder com base no conteúdo do relatório acima +2. Responda diretamente, evite raciocínios longos +3. Só chame ferramentas se o conteúdo do relatório for insuficiente para responder +4. Respostas devem ser concisas, claras e organizadas +5. Responda SEMPRE em português do Brasil -【可用工具】(仅在需要时使用,最多调用1-2次) +【FERRAMENTAS DISPONÍVEIS】(use apenas quando necessário, máximo 1-2 chamadas) {tools_description} -【工具调用格式】 +【FORMATO DE CHAMADA DE FERRAMENTAS】 -{{"name": "工具名称", "parameters": {{"参数名": "参数值"}}}} +{{"name": "nome_da_ferramenta", "parameters": {{"nome_param": "valor_param"}}}} -【回答风格】 -- 简洁直接,不要长篇大论 -- 使用 > 格式引用关键内容 -- 优先给出结论,再解释原因""" +【ESTILO DE RESPOSTA】 +- Conciso e direto, sem textos longos desnecessários +- Use formato > para citar conteúdo-chave +- Primeiro dê a conclusão, depois explique os motivos""" -CHAT_OBSERVATION_SUFFIX = "\n\n请简洁回答问题。" +CHAT_OBSERVATION_SUFFIX = "\n\nResponda a pergunta de forma concisa." # ═══════════════════════════════════════════════════════════════ -# ReportAgent 主类 +# ReportAgent # ═══════════════════════════════════════════════════════════════ class ReportAgent: """ - Report Agent - 模拟报告生成Agent + Report Agent - Agent de geração de relatórios de simulação - 采用ReACT(Reasoning + Acting)模式: - 1. 规划阶段:分析模拟需求,规划报告目录结构 - 2. 生成阶段:逐章节生成内容,每章节可多次调用工具获取信息 - 3. 反思阶段:检查内容完整性和准确性 + Usa o padrão ReACT (Reasoning + Acting): + 1. Fase de planejamento: analisa requisitos e planeja estrutura + 2. Fase de geração: gera conteúdo seção por seção com múltiplas chamadas de ferramentas + 3. Fase de reflexão: verifica completude e precisão do conteúdo """ - # 最大工具调用次数(每个章节) + # Máximo de chamadas de ferramentas (por seção) MAX_TOOL_CALLS_PER_SECTION = 5 - # 最大反思轮数 + # Máximo de rodadas de reflexão MAX_REFLECTION_ROUNDS = 3 - # 对话中的最大工具调用次数 + # Máximo de chamadas de ferramentas no diálogo MAX_TOOL_CALLS_PER_CHAT = 2 def __init__( @@ -890,80 +1038,80 @@ def __init__( zep_tools: Optional[ZepToolsService] = None ): """ - 初始化Report Agent + Inicializar Report Agent Args: - graph_id: 图谱ID - simulation_id: 模拟ID - simulation_requirement: 模拟需求描述 - llm_client: LLM客户端(可选) - zep_tools: Zep工具服务(可选) + graph_id: ID do grafo + simulation_id: ID da simulação + simulation_requirement: Descrição dos requisitos da simulação + llm_client: Cliente LLM (opcional) + zep_tools: Serviço de ferramentas Zep (opcional) """ self.graph_id = graph_id self.simulation_id = simulation_id self.simulation_requirement = simulation_requirement - self.llm = llm_client or LLMClient() + self.llm = llm_client or LLMClient(model='gpt-5.4') self.zep_tools = zep_tools or ZepToolsService() - # 工具定义 + # Ferramenta self.tools = self._define_tools() - # 日志记录器(在 generate_report 中初始化) + # generate_report self.report_logger: Optional[ReportLogger] = None - # 控制台日志记录器(在 generate_report 中初始化) + # generate_report self.console_logger: Optional[ReportConsoleLogger] = None logger.info(t('report.agentInitDone', graphId=graph_id, simulationId=simulation_id)) def _define_tools(self) -> Dict[str, Dict[str, Any]]: - """定义可用工具""" + """Definir ferramentas disponíveis""" return { "insight_forge": { "name": "insight_forge", "description": TOOL_DESC_INSIGHT_FORGE, "parameters": { - "query": "你想深入分析的问题或话题", - "report_context": "当前报告章节的上下文(可选,有助于生成更精准的子问题)" + "query": "Pergunta ou tópico que deseja analisar em profundidade", + "report_context": "Contexto da seção atual do relatório (opcional, ajuda a gerar sub-perguntas mais precisas)" } }, "panorama_search": { "name": "panorama_search", "description": TOOL_DESC_PANORAMA_SEARCH, "parameters": { - "query": "搜索查询,用于相关性排序", - "include_expired": "是否包含过期/历史内容(默认True)" + "query": "Consulta de busca, para ordenação por relevância", + "include_expired": "Incluir conteúdo expirado/histórico (padrão True)" } }, "quick_search": { "name": "quick_search", "description": TOOL_DESC_QUICK_SEARCH, "parameters": { - "query": "搜索查询字符串", - "limit": "返回结果数量(可选,默认10)" + "query": "String de consulta de busca", + "limit": "Quantidade de resultados (opcional, padrão 10)" } }, "interview_agents": { "name": "interview_agents", "description": TOOL_DESC_INTERVIEW_AGENTS, "parameters": { - "interview_topic": "采访主题或需求描述(如:'了解学生对宿舍甲醛事件的看法')", - "max_agents": "最多采访的Agent数量(可选,默认5,最大10)" + "interview_topic": "Tema ou descrição da entrevista (ex: 'entender a visão dos consumidores sobre o novo produto')", + "max_agents": "Máximo de agentes a entrevistar (opcional, padrão 5, máx 10)" } } } def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_context: str = "") -> str: """ - 执行工具调用 + Executar chamada de ferramenta Args: - tool_name: 工具名称 - parameters: 工具参数 - report_context: 报告上下文(用于InsightForge) + tool_name: Ferramenta + parameters: Ferramenta + report_context: RelatórioInsightForge Returns: - 工具执行结果(文本格式) + FerramentaResultado """ logger.info(t('report.executingTool', toolName=tool_name, params=parameters)) @@ -980,7 +1128,6 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte return result.to_text() elif tool_name == "panorama_search": - # 广度搜索 - 获取全貌 query = parameters.get("query", "") include_expired = parameters.get("include_expired", True) if isinstance(include_expired, str): @@ -993,7 +1140,7 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte return result.to_text() elif tool_name == "quick_search": - # 简单搜索 - 快速检索 + # - Busca query = parameters.get("query", "") limit = parameters.get("limit", 10) if isinstance(limit, str): @@ -1006,7 +1153,7 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte return result.to_text() elif tool_name == "interview_agents": - # 深度采访 - 调用真实的OASIS采访API获取模拟Agent的回答(双平台) + # - OASISAPISimulaçãoAgent interview_topic = parameters.get("interview_topic", parameters.get("query", "")) max_agents = parameters.get("max_agents", 5) if isinstance(max_agents, str): @@ -1020,10 +1167,10 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte ) return result.to_text() - # ========== 向后兼容的旧工具(内部重定向到新工具) ========== + # ========== FerramentaFerramenta ========== elif tool_name == "search_graph": - # 重定向到 quick_search + # quick_search logger.info(t('report.redirectToQuickSearch')) return self._execute_tool("quick_search", parameters, report_context) @@ -1040,7 +1187,7 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte return json.dumps(result, ensure_ascii=False, indent=2) elif tool_name == "get_simulation_context": - # 重定向到 insight_forge,因为它更强大 + # insight_forge logger.info(t('report.redirectToInsightForge')) query = parameters.get("query", self.simulation_requirement) return self._execute_tool("insight_forge", {"query": query}, report_context) @@ -1055,26 +1202,25 @@ def _execute_tool(self, tool_name: str, parameters: Dict[str, Any], report_conte return json.dumps(result, ensure_ascii=False, indent=2) else: - return f"未知工具: {tool_name}。请使用以下工具之一: insight_forge, panorama_search, quick_search" + return f"Ferramenta desconhecida: {tool_name}. Use uma das seguintes: insight_forge, panorama_search, quick_search" except Exception as e: logger.error(t('report.toolExecFailed', toolName=tool_name, error=str(e))) - return f"工具执行失败: {str(e)}" + return f"Falha na execução da ferramenta: {str(e)}" - # 合法的工具名称集合,用于裸 JSON 兜底解析时校验 + # Ferramenta JSON VALID_TOOL_NAMES = {"insight_forge", "panorama_search", "quick_search", "interview_agents"} def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]: """ - 从LLM响应中解析工具调用 + Parsear chamadas de ferramenta da resposta do LLM - 支持的格式(按优先级): 1. {"name": "tool_name", "parameters": {...}} - 2. 裸 JSON(响应整体或单行就是一个工具调用 JSON) + 2. JSONFerramenta JSON """ tool_calls = [] - # 格式1: XML风格(标准格式) + # 1: XML xml_pattern = r'\s*(\{.*?\})\s*' for match in re.finditer(xml_pattern, response, re.DOTALL): try: @@ -1086,8 +1232,8 @@ def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]: if tool_calls: return tool_calls - # 格式2: 兜底 - LLM 直接输出裸 JSON(没包 标签) - # 只在格式1未匹配时尝试,避免误匹配正文中的 JSON + # 2: - LLM JSON + # 1 JSON stripped = response.strip() if stripped.startswith('{') and stripped.endswith('}'): try: @@ -1098,7 +1244,7 @@ def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]: except json.JSONDecodeError: pass - # 响应可能包含思考文字 + 裸 JSON,尝试提取最后一个 JSON 对象 + # + JSON JSON json_pattern = r'(\{"(?:name|tool)"\s*:.*?\})\s*$' match = re.search(json_pattern, stripped, re.DOTALL) if match: @@ -1112,11 +1258,11 @@ def _parse_tool_calls(self, response: str) -> List[Dict[str, Any]]: return tool_calls def _is_valid_tool_call(self, data: dict) -> bool: - """校验解析出的 JSON 是否是合法的工具调用""" - # 支持 {"name": ..., "parameters": ...} 和 {"tool": ..., "params": ...} 两种键名 + """ JSON Ferramenta""" + # {"name": ..., "parameters": ...} {"tool": ..., "params": ...} tool_name = data.get("name") or data.get("tool") if tool_name and tool_name in self.VALID_TOOL_NAMES: - # 统一键名为 name / parameters + # name / parameters if "tool" in data: data["name"] = data.pop("tool") if "params" in data and "parameters" not in data: @@ -1125,13 +1271,13 @@ def _is_valid_tool_call(self, data: dict) -> bool: return False def _get_tools_description(self) -> str: - """生成工具描述文本""" - desc_parts = ["可用工具:"] + """Gerar texto de descrição das ferramentas""" + desc_parts = ["Ferramentas disponíveis:"] for name, tool in self.tools.items(): params_desc = ", ".join([f"{k}: {v}" for k, v in tool["parameters"].items()]) desc_parts.append(f"- {name}: {tool['description']}") if params_desc: - desc_parts.append(f" 参数: {params_desc}") + desc_parts.append(f" Parâmetros: {params_desc}") return "\n".join(desc_parts) def plan_outline( @@ -1139,22 +1285,22 @@ def plan_outline( progress_callback: Optional[Callable] = None ) -> ReportOutline: """ - 规划报告大纲 + Planejar outline do relatório - 使用LLM分析模拟需求,规划报告的目录结构 + Usa o LLM para analisar requisitos e planejar a estrutura do relatório Args: - progress_callback: 进度回调函数 + progress_callback: Função callback de progresso Returns: - ReportOutline: 报告大纲 + ReportOutline: Outline do relatório """ logger.info(t('report.startPlanningOutline')) if progress_callback: progress_callback("planning", 0, t('progress.analyzingRequirements')) - # 首先获取模拟上下文 + # Primeiro obtém o contexto da simulação context = self.zep_tools.get_simulation_context( graph_id=self.graph_id, simulation_requirement=self.simulation_requirement @@ -1163,15 +1309,27 @@ def plan_outline( if progress_callback: progress_callback("planning", 30, t('progress.generatingOutline')) - system_prompt = f"{PLAN_SYSTEM_PROMPT}\n\n{get_language_instruction()}" - user_prompt = PLAN_USER_PROMPT_TEMPLATE.format( - simulation_requirement=self.simulation_requirement, - total_nodes=context.get('graph_statistics', {}).get('total_nodes', 0), - total_edges=context.get('graph_statistics', {}).get('total_edges', 0), - entity_types=list(context.get('graph_statistics', {}).get('entity_types', {}).keys()), - total_entities=context.get('total_entities', 0), - related_facts_json=json.dumps(context.get('related_facts', [])[:10], ensure_ascii=False, indent=2), - ) + # AUGUR v2: usar prompts estruturados se disponível + if HAS_V2: + system_prompt = f"{PLAN_SYSTEM_PROMPT_V2}\n\n{get_language_instruction()}" + user_prompt = PLAN_USER_PROMPT_V2.format( + simulation_requirement=self.simulation_requirement, + total_nodes=context.get('graph_statistics', {}).get('total_nodes', 0), + total_edges=context.get('graph_statistics', {}).get('total_edges', 0), + entity_types=list(context.get('graph_statistics', {}).get('entity_types', {}).keys()), + total_entities=context.get('total_entities', 0), + related_facts_json=json.dumps(context.get('related_facts', [])[:10], ensure_ascii=False, indent=2), + ) + else: + system_prompt = f"{PLAN_SYSTEM_PROMPT}\n\n{get_language_instruction()}" + user_prompt = PLAN_USER_PROMPT_TEMPLATE.format( + simulation_requirement=self.simulation_requirement, + total_nodes=context.get('graph_statistics', {}).get('total_nodes', 0), + total_edges=context.get('graph_statistics', {}).get('total_edges', 0), + entity_types=list(context.get('graph_statistics', {}).get('entity_types', {}).keys()), + total_entities=context.get('total_entities', 0), + related_facts_json=json.dumps(context.get('related_facts', [])[:10], ensure_ascii=False, indent=2), + ) try: response = self.llm.chat_json( @@ -1185,7 +1343,7 @@ def plan_outline( if progress_callback: progress_callback("planning", 80, t('progress.parsingOutline')) - # 解析大纲 + # Parsear outline sections = [] for section_data in response.get("sections", []): sections.append(ReportSection( @@ -1194,11 +1352,14 @@ def plan_outline( )) outline = ReportOutline( - title=response.get("title", "模拟分析报告"), + title=response.get("title", "Relatório de Análise Preditiva"), summary=response.get("summary", ""), sections=sections ) + # ── Pós-processamento: forçar português ── + outline = self._ensure_portuguese_outline(outline) + if progress_callback: progress_callback("planning", 100, t('progress.outlinePlanComplete')) @@ -1207,17 +1368,98 @@ def plan_outline( except Exception as e: logger.error(t('report.outlinePlanFailed', error=str(e))) - # 返回默认大纲(3个章节,作为fallback) + # Retornar outline padrão (3 seções, como fallback) return ReportOutline( - title="未来预测报告", - summary="基于模拟预测的未来趋势与风险分析", + title="Relatório de Previsão Futura", + summary="Análise de tendências e riscos futuros baseada em simulação preditiva", sections=[ - ReportSection(title="预测场景与核心发现"), - ReportSection(title="人群行为预测分析"), - ReportSection(title="趋势展望与风险提示") + ReportSection(title="Cenários de Previsão e Descobertas Principais"), + ReportSection(title="Análise Preditiva de Comportamento dos Grupos"), + ReportSection(title="Perspectivas de Tendências e Alertas de Risco") ] ) + @staticmethod + def _has_chinese(text: str) -> bool: + """Detecta se o texto contém caracteres chineses""" + if not text: + return False + return bool(re.search(r'[\u4e00-\u9fff]', text)) + + def _ensure_portuguese_outline(self, outline: 'ReportOutline') -> 'ReportOutline': + """ + Pós-processamento: se o título ou seções contêm chinês, + chama o LLM para traduzir para português do Brasil. + """ + texts_to_check = [outline.title, outline.summary] + [s.title for s in outline.sections] + has_any_chinese = any(self._has_chinese(t) for t in texts_to_check) + + if not has_any_chinese: + return outline + + logger.warning("Detectado chinês no outline — traduzindo para PT-BR...") + + try: + sections_json = json.dumps( + [{"title": s.title, "description": getattr(s, 'description', '')} for s in outline.sections], + ensure_ascii=False + ) + translate_prompt = ( + "Traduza o seguinte JSON para português do Brasil.\n" + "Mantenha EXATAMENTE a mesma estrutura JSON. Traduza TODOS os valores de string.\n" + "NÃO deixe NENHUM texto em chinês. Retorne APENAS o JSON traduzido.\n\n" + f'{{"title": "{outline.title}", "summary": "{outline.summary}", "sections": {sections_json}}}' + ) + + response = self.llm.chat_json( + messages=[ + {"role": "system", "content": "Você é um tradutor. Traduza todo conteúdo para português do Brasil. Retorne JSON puro."}, + {"role": "user", "content": translate_prompt} + ], + temperature=0.1 + ) + + if response and "title" in response: + outline.title = response.get("title", outline.title) + outline.summary = response.get("summary", outline.summary) + translated_sections = response.get("sections", []) + for i, ts in enumerate(translated_sections): + if i < len(outline.sections): + outline.sections[i].title = ts.get("title", outline.sections[i].title) + logger.info(f"Outline traduzido com sucesso: {outline.title}") + + except Exception as e: + logger.error(f"Falha ao traduzir outline: {e}") + default_titles = [ + "Resumo Executivo", "Cenários Futuros", "Fatores de Risco", + "Mapa de Forças", "Cronologia por Rodada", "Padrões Emergentes", + "Hipóteses Causais", "Recomendações Estratégicas", "Previsões" + ] + for i, dt in enumerate(default_titles): + if i < len(outline.sections) and self._has_chinese(outline.sections[i].title): + outline.sections[i].title = dt + if self._has_chinese(outline.title): + outline.title = "Relatório de Previsão Futura" + if self._has_chinese(outline.summary): + outline.summary = "Análise preditiva baseada em simulação multiagente" + + # Garantia final: remover caracteres chineses residuais + outline.title = self._strip_remaining_chinese(outline.title) + outline.summary = self._strip_remaining_chinese(outline.summary) + for s in outline.sections: + s.title = self._strip_remaining_chinese(s.title) + + return outline + + @staticmethod + def _strip_remaining_chinese(text: str) -> str: + """Remove caracteres chineses residuais após tentativas de tradução.""" + if not text: + return text + cleaned = re.sub(r'[\u4e00-\u9fff]+', '', text) + cleaned = re.sub(r'\n{3,}', '\n\n', cleaned) + return cleaned.strip() + def _generate_section_react( self, section: ReportSection, @@ -1227,50 +1469,64 @@ def _generate_section_react( section_index: int = 0 ) -> str: """ - 使用ReACT模式生成单个章节内容 + Gera conteúdo de uma seção usando o padrão ReACT - ReACT循环: - 1. Thought(思考)- 分析需要什么信息 - 2. Action(行动)- 调用工具获取信息 - 3. Observation(观察)- 分析工具返回结果 - 4. 重复直到信息足够或达到最大次数 - 5. Final Answer(最终回答)- 生成章节内容 + ReACT + 1. Thought- Análise + 2. Action- Ferramenta + 3. Observation- AnáliseFerramentaResultado + 4. + 5. Final Answer- GerarSeçãoConteúdo Args: - section: 要生成的章节 - outline: 完整大纲 - previous_sections: 之前章节的内容(用于保持连贯性) - progress_callback: 进度回调 - section_index: 章节索引(用于日志记录) + section: GerarSeção + outline: Outline + previous_sections: SeçãoConteúdo + progress_callback: Callback de progresso + section_index: Seção Returns: - 章节内容(Markdown格式) + SeçãoConteúdoMarkdown """ logger.info(t('report.reactGenerateSection', title=section.title)) - # 记录章节开始日志 + # Seção if self.report_logger: self.report_logger.log_section_start(section.title, section_index) - system_prompt = SECTION_SYSTEM_PROMPT_TEMPLATE.format( - report_title=outline.title, - report_summary=outline.summary, - simulation_requirement=self.simulation_requirement, - section_title=section.title, - tools_description=self._get_tools_description(), - ) - system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" + # AUGUR v2: usar prompt de seção específico por key + section_key = getattr(section, 'key', '') or section.title.lower().replace(' ', '_') - # 构建用户prompt - 每个已完成章节各传入最大4000字 + # Texto das seções já concluídas + completed_text = "" if previous_sections: previous_parts = [] for sec in previous_sections: - # 每个章节最多4000字 truncated = sec[:4000] + "..." if len(sec) > 4000 else sec previous_parts.append(truncated) - previous_content = "\n\n---\n\n".join(previous_parts) + completed_text = "\n\n---\n\n".join(previous_parts) + + if HAS_V2: + system_prompt = get_section_system_prompt( + section_key=section_key, + report_title=outline.title, + report_summary=outline.summary, + simulation_requirement=self.simulation_requirement, + tools_description=self._get_tools_description(), + completed_sections_text=completed_text, + ) + system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" else: - previous_content = "(这是第一个章节)" + system_prompt = SECTION_SYSTEM_PROMPT_TEMPLATE.format( + report_title=outline.title, + report_summary=outline.summary, + simulation_requirement=self.simulation_requirement, + section_title=section.title, + tools_description=self._get_tools_description(), + ) + system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" + + previous_content = completed_text if completed_text else "(Esta é a primeira seção)" user_prompt = SECTION_USER_PROMPT_TEMPLATE.format( previous_content=previous_content, @@ -1282,16 +1538,16 @@ def _generate_section_react( {"role": "user", "content": user_prompt} ] - # ReACT循环 + # ReACT tool_calls_count = 0 - max_iterations = 5 # 最大迭代轮数 - min_tool_calls = 3 # 最少工具调用次数 - conflict_retries = 0 # 工具调用与Final Answer同时出现的连续冲突次数 - used_tools = set() # 记录已调用过的工具名 + max_iterations = 5 # + min_tool_calls = 3 # Ferramenta + conflict_retries = 0 # FerramentaFinal Answer + used_tools = set() # Ferramenta all_tools = {"insight_forge", "panorama_search", "quick_search", "interview_agents"} - # 报告上下文,用于InsightForge的子问题生成 - report_context = f"章节标题: {section.title}\n模拟需求: {self.simulation_requirement}" + # RelatórioInsightForgeGerar + report_context = f"Título da seção: {section.title}\nRequisito da simulação: {self.simulation_requirement}" for iteration in range(max_iterations): if progress_callback: @@ -1301,32 +1557,31 @@ def _generate_section_react( t('progress.deepSearchAndWrite', current=tool_calls_count, max=self.MAX_TOOL_CALLS_PER_SECTION) ) - # 调用LLM response = self.llm.chat( messages=messages, temperature=0.5, max_tokens=4096 ) - # 检查 LLM 返回是否为 None(API 异常或内容为空) + # LLM NoneAPI Conteúdo if response is None: logger.warning(t('report.sectionIterNone', title=section.title, iteration=iteration + 1)) - # 如果还有迭代次数,添加消息并重试 + # Se ainda houver iterações, adiciona mensagem e tenta novamente if iteration < max_iterations - 1: - messages.append({"role": "assistant", "content": "(响应为空)"}) - messages.append({"role": "user", "content": "请继续生成内容。"}) + messages.append({"role": "assistant", "content": "(resposta vazia)"}) + messages.append({"role": "user", "content": "Por favor, continue gerando o conteúdo."}) continue - # 最后一次迭代也返回 None,跳出循环进入强制收尾 + # Última iteração também retornou None, sai do loop para encerramento forçado break - logger.debug(f"LLM响应: {response[:200]}...") + logger.debug(f"LLM response: {response[:200]}...") - # 解析一次,复用结果 + # Parsear uma vez, reutilizar resultado tool_calls = self._parse_tool_calls(response) has_tool_calls = bool(tool_calls) has_final_answer = "Final Answer:" in response - # ── 冲突处理:LLM 同时输出了工具调用和 Final Answer ── + # ── LLM Ferramenta Final Answer ── if has_tool_calls and has_final_answer: conflict_retries += 1 logger.warning( @@ -1334,21 +1589,21 @@ def _generate_section_react( ) if conflict_retries <= 2: - # 前两次:丢弃本次响应,要求 LLM 重新回复 + # Primeiras duas vezes: descartar resposta e pedir nova ao LLM messages.append({"role": "assistant", "content": response}) messages.append({ "role": "user", "content": ( - "【格式错误】你在一次回复中同时包含了工具调用和 Final Answer,这是不允许的。\n" - "每次回复只能做以下两件事之一:\n" - "- 调用一个工具(输出一个 块,不要写 Final Answer)\n" - "- 输出最终内容(以 'Final Answer:' 开头,不要包含 )\n" - "请重新回复,只做其中一件事。" + "【ERRO DE FORMATO】Você incluiu chamada de ferramenta E Final Answer na mesma resposta — isso não é permitido.\n" + "Cada resposta deve fazer APENAS uma das duas coisas:\n" + "- Chamar uma ferramenta (um bloco , SEM escrever Final Answer)\n" + "- Gerar conteúdo final (começar com 'Final Answer:', SEM incluir )\n" + "Responda novamente, fazendo apenas uma das duas." ), }) continue else: - # 第三次:降级处理,截断到第一个工具调用,强制执行 + # Terceira vez: tratamento degradado, truncar e executar logger.warning( t('report.sectionConflictDowngrade', title=section.title, conflictCount=conflict_retries) ) @@ -1360,7 +1615,7 @@ def _generate_section_react( has_final_answer = False conflict_retries = 0 - # 记录 LLM 响应日志 + # Registrar log de resposta do LLM if self.report_logger: self.report_logger.log_llm_response( section_title=section.title, @@ -1371,13 +1626,13 @@ def _generate_section_react( has_final_answer=has_final_answer ) - # ── 情况1:LLM 输出了 Final Answer ── + # ── Caso1:LLM gerou Final Answer ── if has_final_answer: - # 工具调用次数不足,拒绝并要求继续调工具 + # Chamadas insuficientes, rejeitar e pedir mais chamadas if tool_calls_count < min_tool_calls: messages.append({"role": "assistant", "content": response}) unused_tools = all_tools - used_tools - unused_hint = f"(这些工具还未使用,推荐用一下他们: {', '.join(unused_tools)})" if unused_tools else "" + unused_hint = f"(Estas ferramentas ainda não foram usadas, recomendamos experimentá-las: {', '.join(unused_tools)})" if unused_tools else "" messages.append({ "role": "user", "content": REACT_INSUFFICIENT_TOOLS_MSG.format( @@ -1388,8 +1643,16 @@ def _generate_section_react( }) continue - # 正常结束 final_answer = response.split("Final Answer:")[-1].strip() + + # AUGUR v2: tentar extrair JSON estruturado + if HAS_V2: + try: + section._v2_data = parse_section_json(response, section_key) + logger.info(f"v2 JSON parsed for section: {section_key}") + except Exception: + section._v2_data = None + logger.info(t('report.sectionGenDone', title=section.title, count=tool_calls_count)) if self.report_logger: @@ -1401,9 +1664,9 @@ def _generate_section_react( ) return final_answer - # ── 情况2:LLM 尝试调用工具 ── + # ── Caso2LLM Ferramenta ── if has_tool_calls: - # 工具额度已耗尽 → 明确告知,要求输出 Final Answer + # Ferramenta → Final Answer if tool_calls_count >= self.MAX_TOOL_CALLS_PER_SECTION: messages.append({"role": "assistant", "content": response}) messages.append({ @@ -1415,7 +1678,7 @@ def _generate_section_react( }) continue - # 只执行第一个工具调用 + # Ferramenta call = tool_calls[0] if len(tool_calls) > 1: logger.info(t('report.multiToolOnlyFirst', total=len(tool_calls), toolName=call['name'])) @@ -1447,7 +1710,7 @@ def _generate_section_react( tool_calls_count += 1 used_tools.add(call['name']) - # 构建未使用工具提示 + # Ferramenta unused_tools = all_tools - used_tools unused_hint = "" if unused_tools and tool_calls_count < self.MAX_TOOL_CALLS_PER_SECTION: @@ -1467,13 +1730,13 @@ def _generate_section_react( }) continue - # ── 情况3:既没有工具调用,也没有 Final Answer ── + # ── Caso3Ferramenta Final Answer ── messages.append({"role": "assistant", "content": response}) if tool_calls_count < min_tool_calls: - # 工具调用次数不足,推荐未用过的工具 + # FerramentaFerramenta unused_tools = all_tools - used_tools - unused_hint = f"(这些工具还未使用,推荐用一下他们: {', '.join(unused_tools)})" if unused_tools else "" + unused_hint = f"(Estas ferramentas ainda não foram usadas, recomendamos experimentá-las: {', '.join(unused_tools)})" if unused_tools else "" messages.append({ "role": "user", @@ -1485,8 +1748,8 @@ def _generate_section_react( }) continue - # 工具调用已足够,LLM 输出了内容但没带 "Final Answer:" 前缀 - # 直接将这段内容作为最终答案,不再空转 + # FerramentaLLM Conteúdo "Final Answer:" + # Conteúdo logger.info(t('report.sectionNoPrefix', title=section.title, count=tool_calls_count)) final_answer = response.strip() @@ -1499,7 +1762,7 @@ def _generate_section_react( ) return final_answer - # 达到最大迭代次数,强制生成内容 + # GerarConteúdo logger.warning(t('report.sectionMaxIter', title=section.title)) messages.append({"role": "user", "content": REACT_FORCE_FINAL_MSG}) @@ -1509,16 +1772,22 @@ def _generate_section_react( max_tokens=4096 ) - # 检查强制收尾时 LLM 返回是否为 None + # Encerramento forçado LLM None if response is None: logger.error(t('report.sectionForceFailed', title=section.title)) final_answer = t('report.sectionGenFailedContent') elif "Final Answer:" in response: final_answer = response.split("Final Answer:")[-1].strip() + # AUGUR v2: tentar extrair JSON estruturado + if HAS_V2: + try: + section._v2_data = parse_section_json(response, section_key) + except Exception: + section._v2_data = None else: final_answer = response - # 记录章节内容生成完成日志 + # SeçãoConteúdoGerar if self.report_logger: self.report_logger.log_section_content( section_title=section.title, @@ -1535,29 +1804,28 @@ def generate_report( report_id: Optional[str] = None ) -> Report: """ - 生成完整报告(分章节实时输出) + Gerar relatório completoSeção - 每个章节生成完成后立即保存到文件夹,不需要等待整个报告完成。 - 文件结构: + SeçãoGerarPastaRelatório reports/{report_id}/ - meta.json - 报告元信息 - outline.json - 报告大纲 - progress.json - 生成进度 - section_01.md - 第1章节 - section_02.md - 第2章节 + meta.json - Relatório + outline.json - Outline do relatório + progress.json - Gerar + section_01.md - 1Seção + section_02.md - 2Seção ... - full_report.md - 完整报告 + full_report.md - Relatório Args: - progress_callback: 进度回调函数 (stage, progress, message) - report_id: 报告ID(可选,如果不传则自动生成) + progress_callback: Função callback de progresso (stage, progress, message) + report_id: RelatórioIDGerar Returns: - Report: 完整报告 + Report: Relatório """ import uuid - # 如果没有传入 report_id,则自动生成 + # report_idGerar if not report_id: report_id = f"report_{uuid.uuid4().hex[:12]}" start_time = datetime.now() @@ -1571,14 +1839,14 @@ def generate_report( created_at=datetime.now().isoformat() ) - # 已完成的章节标题列表(用于进度追踪) + # ConcluídoSeção completed_section_titles = [] try: - # 初始化:创建报告文件夹并保存初始状态 + # RelatórioPasta ReportManager._ensure_report_folder(report_id) - # 初始化日志记录器(结构化日志 agent_log.jsonl) + # agent_log.jsonl self.report_logger = ReportLogger(report_id) self.report_logger.log_start( simulation_id=self.simulation_id, @@ -1586,7 +1854,7 @@ def generate_report( simulation_requirement=self.simulation_requirement ) - # 初始化控制台日志记录器(console_log.txt) + # console_log.txt self.console_logger = ReportConsoleLogger(report_id) ReportManager.update_progress( @@ -1595,14 +1863,13 @@ def generate_report( ) ReportManager.save_report(report) - # 阶段1: 规划大纲 + # 1: Outline report.status = ReportStatus.PLANNING ReportManager.update_progress( report_id, "planning", 5, t('progress.startPlanningOutline'), completed_sections=[] ) - # 记录规划开始日志 self.report_logger.log_planning_start() if progress_callback: @@ -1614,10 +1881,9 @@ def generate_report( ) report.outline = outline - # 记录规划完成日志 self.report_logger.log_planning_complete(outline.to_dict()) - # 保存大纲到文件 + # Outline ReportManager.save_outline(report_id, outline) ReportManager.update_progress( report_id, "planning", 15, t('progress.outlineDone', count=len(outline.sections)), @@ -1627,17 +1893,17 @@ def generate_report( logger.info(t('report.outlineSavedToFile', reportId=report_id)) - # 阶段2: 逐章节生成(分章节保存) + # 2: SeçãoGerarSeção report.status = ReportStatus.GENERATING total_sections = len(outline.sections) - generated_sections = [] # 保存内容用于上下文 + generated_sections = [] # Conteúdo for i, section in enumerate(outline.sections): section_num = i + 1 base_progress = 20 + int((i / total_sections) * 70) - # 更新进度 + # Atualizar progresso ReportManager.update_progress( report_id, "generating", base_progress, t('progress.generatingSection', title=section.title, current=section_num, total=total_sections), @@ -1652,7 +1918,7 @@ def generate_report( t('progress.generatingSection', title=section.title, current=section_num, total=total_sections) ) - # 生成主章节内容 + # GerarSeçãoConteúdo section_content = self._generate_section_react( section=section, outline=outline, @@ -1667,13 +1933,38 @@ def generate_report( ) section.content = section_content + + # ── Pós-processamento: traduzir conteúdo chinês para PT-BR ── + if section_content and self._has_chinese(section_content): + logger.warning(f"Chinês detectado na seção '{section.title}' — traduzindo...") + try: + tr = self.llm.chat( + messages=[ + {"role": "system", "content": "Traduza o texto abaixo integralmente para português do Brasil. Mantenha toda a formatação Markdown (negrito, listas, citações). NÃO deixe nenhuma palavra em chinês."}, + {"role": "user", "content": section_content} + ], + temperature=0.1 + ) + if tr and not self._has_chinese(tr): + section.content = tr + section_content = tr + logger.info(f"Seção '{section.title}' traduzida com sucesso") + except Exception as te: + logger.error(f"Falha ao traduzir seção: {te}") + + # Garantia final: nunca persistir caracteres chineses no output final + if section_content and self._has_chinese(section_content): + logger.warning(f"Ainda há chinês na seção '{section.title}' após tradução. Limpando caracteres residuais.") + section_content = self._strip_remaining_chinese(section_content) + section.content = section_content + generated_sections.append(f"## {section.title}\n\n{section_content}") - # 保存章节 + # Salvar seção ReportManager.save_section(report_id, section_num, section) completed_section_titles.append(section.title) - # 记录章节完成日志 + # Registrar log de conclusão da seção full_section_content = f"## {section.title}\n\n{section_content}" if self.report_logger: @@ -1685,7 +1976,7 @@ def generate_report( logger.info(t('report.sectionSaved', reportId=report_id, sectionNum=f"{section_num:02d}")) - # 更新进度 + # Atualizar progresso ReportManager.update_progress( report_id, "generating", base_progress + int(70 / total_sections), @@ -1694,7 +1985,7 @@ def generate_report( completed_sections=completed_section_titles ) - # 阶段3: 组装完整报告 + # 3: Relatório if progress_callback: progress_callback("generating", 95, t('progress.assemblingReport')) @@ -1703,22 +1994,21 @@ def generate_report( completed_sections=completed_section_titles ) - # 使用ReportManager组装完整报告 + # ReportManagerRelatório report.markdown_content = ReportManager.assemble_full_report(report_id, outline) report.status = ReportStatus.COMPLETED report.completed_at = datetime.now().isoformat() - # 计算总耗时 total_time_seconds = (datetime.now() - start_time).total_seconds() - # 记录报告完成日志 + # Relatório if self.report_logger: self.report_logger.log_report_complete( total_sections=total_sections, total_time_seconds=total_time_seconds ) - # 保存最终报告 + # Relatório ReportManager.save_report(report) ReportManager.update_progress( report_id, "completed", 100, t('progress.reportComplete'), @@ -1730,7 +2020,40 @@ def generate_report( logger.info(t('report.reportGenDone', reportId=report_id)) - # 关闭控制台日志记录器 + # AUGUR v2: montar JSON estruturado a partir dos dados das seções + if HAS_V2 and outline and outline.sections: + try: + section_data = {} + for sec in outline.sections: + key = getattr(sec, 'key', '') or sec.title.lower().replace(' ', '_') + v2_data = getattr(sec, '_v2_data', None) + if v2_data and not isinstance(v2_data, str): + section_data[key] = v2_data + + if section_data: + meta = { + "projeto": self.simulation_requirement, + "setor": "varejo_local", + "tipo_decisao": "novo_negocio", + "data_geracao": datetime.now().isoformat(), + "modelo_ia": self.model_name if hasattr(self, 'model_name') else "GPT-5.4", + "num_agentes": context.get('total_entities', 6) if 'context' in dir() else 6, + "num_rodadas": 5, + "periodo_simulado_meses": 24, + } + report.structured = assemble_report(section_data, meta) + + errors = validar_report_json(report.structured) + if errors: + logger.warning(f"AUGUR v2 validation: {len(errors)} errors: {errors[:3]}") + else: + logger.info(f"AUGUR v2 structured report assembled: {len(section_data)} sections") + + # Re-salvar com structured + ReportManager.save_report(report) + except Exception as e: + logger.warning(f"AUGUR v2 assembly failed (non-fatal): {e}") + if self.console_logger: self.console_logger.close() self.console_logger = None @@ -1742,11 +2065,10 @@ def generate_report( report.status = ReportStatus.FAILED report.error = str(e) - # 记录错误日志 if self.report_logger: self.report_logger.log_error(str(e), "failed") - # 保存失败状态 + # Falhou try: ReportManager.save_report(report) ReportManager.update_progress( @@ -1754,9 +2076,8 @@ def generate_report( completed_sections=completed_section_titles ) except Exception: - pass # 忽略保存失败的错误 + pass # Falhou - # 关闭控制台日志记录器 if self.console_logger: self.console_logger.close() self.console_logger = None @@ -1769,60 +2090,57 @@ def chat( chat_history: List[Dict[str, str]] = None ) -> Dict[str, Any]: """ - 与Report Agent对话 + Dialogar com o Report Agent - 在对话中Agent可以自主调用检索工具来回答问题 + AgentBuscaFerramenta Args: - message: 用户消息 - chat_history: 对话历史 + message: + chat_history: Returns: { - "response": "Agent回复", - "tool_calls": [调用的工具列表], - "sources": [信息来源] + "response": "Resposta do Agente", + "tool_calls": [Lista de ferramentas chamadas], + "sources": [Fontes de informação] } """ logger.info(t('report.agentChat', message=message[:50])) chat_history = chat_history or [] - # 获取已生成的报告内容 + # GerarRelatórioConteúdo report_content = "" try: report = ReportManager.get_report_by_simulation(self.simulation_id) if report and report.markdown_content: - # 限制报告长度,避免上下文过长 + # Relatório report_content = report.markdown_content[:15000] if len(report.markdown_content) > 15000: - report_content += "\n\n... [报告内容已截断] ..." + report_content += "\n\n... [conteúdo do relatório truncado] ..." except Exception as e: logger.warning(t('report.fetchReportFailed', error=e)) system_prompt = CHAT_SYSTEM_PROMPT_TEMPLATE.format( simulation_requirement=self.simulation_requirement, - report_content=report_content if report_content else "(暂无报告)", + report_content=report_content if report_content else "(relatório ainda não disponível)", tools_description=self._get_tools_description(), ) system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" - # 构建消息 messages = [{"role": "system", "content": system_prompt}] - # 添加历史对话 - for h in chat_history[-10:]: # 限制历史长度 + for h in chat_history[-10:]: # messages.append(h) - # 添加用户消息 messages.append({ "role": "user", "content": message }) - # ReACT循环(简化版) + # ReACT tool_calls_made = [] - max_iterations = 2 # 减少迭代轮数 + max_iterations = 2 # for iteration in range(max_iterations): response = self.llm.chat( @@ -1830,11 +2148,11 @@ def chat( temperature=0.5 ) - # 解析工具调用 + # Ferramenta tool_calls = self._parse_tool_calls(response) if not tool_calls: - # 没有工具调用,直接返回响应 + # Ferramenta clean_response = re.sub(r'.*?', '', response, flags=re.DOTALL) clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response) @@ -1844,33 +2162,31 @@ def chat( "sources": [tc.get("parameters", {}).get("query", "") for tc in tool_calls_made] } - # 执行工具调用(限制数量) + # Executar chamada de ferramenta tool_results = [] - for call in tool_calls[:1]: # 每轮最多执行1次工具调用 + for call in tool_calls[:1]: # 1Ferramenta if len(tool_calls_made) >= self.MAX_TOOL_CALLS_PER_CHAT: break result = self._execute_tool(call["name"], call.get("parameters", {})) tool_results.append({ "tool": call["name"], - "result": result[:1500] # 限制结果长度 + "result": result[:1500] # Resultado }) tool_calls_made.append(call) - # 将结果添加到消息 + # Resultado messages.append({"role": "assistant", "content": response}) - observation = "\n".join([f"[{r['tool']}结果]\n{r['result']}" for r in tool_results]) + observation = "\n".join([f"[Resultado {r['tool']}]\n{r['result']}" for r in tool_results]) messages.append({ "role": "user", "content": observation + CHAT_OBSERVATION_SUFFIX }) - # 达到最大迭代,获取最终响应 final_response = self.llm.chat( messages=messages, temperature=0.5 ) - # 清理响应 clean_response = re.sub(r'.*?', '', final_response, flags=re.DOTALL) clean_response = re.sub(r'\[TOOL_CALL\].*?\)', '', clean_response) @@ -1883,95 +2199,95 @@ def chat( class ReportManager: """ - 报告管理器 + Gerenciador de relatórios - 负责报告的持久化存储和检索 + RelatórioBusca - 文件结构(分章节输出): + Seção reports/ {report_id}/ - meta.json - 报告元信息和状态 - outline.json - 报告大纲 - progress.json - 生成进度 - section_01.md - 第1章节 - section_02.md - 第2章节 + meta.json - Relatório + outline.json - Outline do relatório + progress.json - Gerar + section_01.md - 1Seção + section_02.md - 2Seção ... - full_report.md - 完整报告 + full_report.md - Relatório """ - # 报告存储目录 + # Relatório REPORTS_DIR = os.path.join(Config.UPLOAD_FOLDER, 'reports') @classmethod def _ensure_reports_dir(cls): - """确保报告根目录存在""" + """Garantir que o diretório raiz dos relatórios exista""" os.makedirs(cls.REPORTS_DIR, exist_ok=True) @classmethod def _get_report_folder(cls, report_id: str) -> str: - """获取报告文件夹路径""" + """Obter relatórioPasta""" return os.path.join(cls.REPORTS_DIR, report_id) @classmethod def _ensure_report_folder(cls, report_id: str) -> str: - """确保报告文件夹存在并返回路径""" + """RelatórioPasta""" folder = cls._get_report_folder(report_id) os.makedirs(folder, exist_ok=True) return folder @classmethod def _get_report_path(cls, report_id: str) -> str: - """获取报告元信息文件路径""" + """Obter relatório""" return os.path.join(cls._get_report_folder(report_id), "meta.json") @classmethod def _get_report_markdown_path(cls, report_id: str) -> str: - """获取完整报告Markdown文件路径""" + """RelatórioMarkdown""" return os.path.join(cls._get_report_folder(report_id), "full_report.md") @classmethod def _get_outline_path(cls, report_id: str) -> str: - """获取大纲文件路径""" + """Outline""" return os.path.join(cls._get_report_folder(report_id), "outline.json") @classmethod def _get_progress_path(cls, report_id: str) -> str: - """获取进度文件路径""" + """""" return os.path.join(cls._get_report_folder(report_id), "progress.json") @classmethod def _get_section_path(cls, report_id: str, section_index: int) -> str: - """获取章节Markdown文件路径""" + """SeçãoMarkdown""" return os.path.join(cls._get_report_folder(report_id), f"section_{section_index:02d}.md") @classmethod def _get_agent_log_path(cls, report_id: str) -> str: - """获取 Agent 日志文件路径""" + """ Agent """ return os.path.join(cls._get_report_folder(report_id), "agent_log.jsonl") @classmethod def _get_console_log_path(cls, report_id: str) -> str: - """获取控制台日志文件路径""" + """""" return os.path.join(cls._get_report_folder(report_id), "console_log.txt") @classmethod def get_console_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]: """ - 获取控制台日志内容 + Conteúdo - 这是报告生成过程中的控制台输出日志(INFO、WARNING等), - 与 agent_log.jsonl 的结构化日志不同。 + RelatórioGerarINFOWARNING + agent_log.jsonl Args: - report_id: 报告ID - from_line: 从第几行开始读取(用于增量获取,0 表示从头开始) + report_id: RelatórioID + from_line: 0 Returns: { - "logs": [日志行列表], - "total_lines": 总行数, - "from_line": 起始行号, - "has_more": 是否还有更多日志 + "logs": [], + "total_lines": , + "from_line": , + "has_more": } """ log_path = cls._get_console_log_path(report_id) @@ -1991,26 +2307,23 @@ def get_console_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]: for i, line in enumerate(f): total_lines = i + 1 if i >= from_line: - # 保留原始日志行,去掉末尾换行符 logs.append(line.rstrip('\n\r')) return { "logs": logs, "total_lines": total_lines, "from_line": from_line, - "has_more": False # 已读取到末尾 + "has_more": False # } @classmethod def get_console_log_stream(cls, report_id: str) -> List[str]: """ - 获取完整的控制台日志(一次性获取全部) Args: - report_id: 报告ID + report_id: RelatórioID Returns: - 日志行列表 """ result = cls.get_console_log(report_id, from_line=0) return result["logs"] @@ -2018,18 +2331,18 @@ def get_console_log_stream(cls, report_id: str) -> List[str]: @classmethod def get_agent_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]: """ - 获取 Agent 日志内容 + Agent Conteúdo Args: - report_id: 报告ID - from_line: 从第几行开始读取(用于增量获取,0 表示从头开始) + report_id: RelatórioID + from_line: 0 Returns: { - "logs": [日志条目列表], - "total_lines": 总行数, - "from_line": 起始行号, - "has_more": 是否还有更多日志 + "logs": [], + "total_lines": , + "from_line": , + "has_more": } """ log_path = cls._get_agent_log_path(report_id) @@ -2053,26 +2366,25 @@ def get_agent_log(cls, report_id: str, from_line: int = 0) -> Dict[str, Any]: log_entry = json.loads(line.strip()) logs.append(log_entry) except json.JSONDecodeError: - # 跳过解析失败的行 + # Falhou continue return { "logs": logs, "total_lines": total_lines, "from_line": from_line, - "has_more": False # 已读取到末尾 + "has_more": False # } @classmethod def get_agent_log_stream(cls, report_id: str) -> List[Dict[str, Any]]: """ - 获取完整的 Agent 日志(用于一次性获取全部) + Agent Args: - report_id: 报告ID + report_id: RelatórioID Returns: - 日志条目列表 """ result = cls.get_agent_log(report_id, from_line=0) return result["logs"] @@ -2080,9 +2392,8 @@ def get_agent_log_stream(cls, report_id: str) -> List[Dict[str, Any]]: @classmethod def save_outline(cls, report_id: str, outline: ReportOutline) -> None: """ - 保存报告大纲 + Salvar outline do relatório - 在规划阶段完成后立即调用 """ cls._ensure_report_folder(report_id) @@ -2099,27 +2410,25 @@ def save_section( section: ReportSection ) -> str: """ - 保存单个章节 + Seção - 在每个章节生成完成后立即调用,实现分章节输出 + SeçãoGerarSeção Args: - report_id: 报告ID - section_index: 章节索引(从1开始) - section: 章节对象 + report_id: RelatórioID + section_index: Seção1 + section: Seção Returns: - 保存的文件路径 """ cls._ensure_report_folder(report_id) - # 构建章节Markdown内容 - 清理可能存在的重复标题 + # SeçãoMarkdownConteúdo - cleaned_content = cls._clean_section_content(section.content, section.title) md_content = f"## {section.title}\n\n" if cleaned_content: md_content += f"{cleaned_content}\n\n" - # 保存文件 file_suffix = f"section_{section_index:02d}.md" file_path = os.path.join(cls._get_report_folder(report_id), file_suffix) with open(file_path, 'w', encoding='utf-8') as f: @@ -2131,17 +2440,17 @@ def save_section( @classmethod def _clean_section_content(cls, content: str, section_title: str) -> str: """ - 清理章节内容 + SeçãoConteúdo - 1. 移除内容开头与章节标题重复的Markdown标题行 - 2. 将所有 ### 及以下级别的标题转换为粗体文本 + 1. ConteúdoSeçãoMarkdown + 2. ### Args: - content: 原始内容 - section_title: 章节标题 + content: Conteúdo + section_title: Seção Returns: - 清理后的内容 + Conteúdo """ import re @@ -2156,26 +2465,25 @@ def _clean_section_content(cls, content: str, section_title: str) -> str: for i, line in enumerate(lines): stripped = line.strip() - # 检查是否是Markdown标题行 + # Markdown heading_match = re.match(r'^(#{1,6})\s+(.+)$', stripped) if heading_match: level = len(heading_match.group(1)) title_text = heading_match.group(2).strip() - # 检查是否是与章节标题重复的标题(跳过前5行内的重复) + # Seção5 if i < 5: if title_text == section_title or title_text.replace(' ', '') == section_title.replace(' ', ''): skip_next_empty = True continue - # 将所有级别的标题(#, ##, ###, ####等)转换为粗体 - # 因为章节标题由系统添加,内容中不应有任何标题 + # #, ##, ###, #### + # SeçãoConteúdo cleaned_lines.append(f"**{title_text}**") - cleaned_lines.append("") # 添加空行 + cleaned_lines.append("") # continue - # 如果上一行是被跳过的标题,且当前行为空,也跳过 if skip_next_empty and stripped == '': skip_next_empty = False continue @@ -2183,14 +2491,11 @@ def _clean_section_content(cls, content: str, section_title: str) -> str: skip_next_empty = False cleaned_lines.append(line) - # 移除开头的空行 while cleaned_lines and cleaned_lines[0].strip() == '': cleaned_lines.pop(0) - # 移除开头的分隔线 while cleaned_lines and cleaned_lines[0].strip() in ['---', '***', '___']: cleaned_lines.pop(0) - # 同时移除分隔线后的空行 while cleaned_lines and cleaned_lines[0].strip() == '': cleaned_lines.pop(0) @@ -2207,9 +2512,9 @@ def update_progress( completed_sections: List[str] = None ) -> None: """ - 更新报告生成进度 + RelatórioGerar - 前端可以通过读取progress.json获取实时进度 + progress.json """ cls._ensure_report_folder(report_id) @@ -2227,7 +2532,7 @@ def update_progress( @classmethod def get_progress(cls, report_id: str) -> Optional[Dict[str, Any]]: - """获取报告生成进度""" + """Obter relatórioGerar""" path = cls._get_progress_path(report_id) if not os.path.exists(path): @@ -2239,9 +2544,9 @@ def get_progress(cls, report_id: str) -> Optional[Dict[str, Any]]: @classmethod def get_generated_sections(cls, report_id: str) -> List[Dict[str, Any]]: """ - 获取已生成的章节列表 + GerarSeção - 返回所有已保存的章节文件信息 + Seção """ folder = cls._get_report_folder(report_id) @@ -2255,7 +2560,7 @@ def get_generated_sections(cls, report_id: str) -> List[Dict[str, Any]]: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() - # 从文件名解析章节索引 + # Seção parts = filename.replace('.md', '').split('_') section_index = int(parts[1]) @@ -2270,26 +2575,26 @@ def get_generated_sections(cls, report_id: str) -> List[Dict[str, Any]]: @classmethod def assemble_full_report(cls, report_id: str, outline: ReportOutline) -> str: """ - 组装完整报告 + Relatório - 从已保存的章节文件组装完整报告,并进行标题清理 + SeçãoRelatório """ folder = cls._get_report_folder(report_id) - # 构建报告头部 + # Relatório md_content = f"# {outline.title}\n\n" md_content += f"> {outline.summary}\n\n" md_content += f"---\n\n" - # 按顺序读取所有章节文件 + # Seção sections = cls.get_generated_sections(report_id) for section_info in sections: md_content += section_info["content"] - # 后处理:清理整个报告的标题问题 + # Relatório md_content = cls._post_process_report(md_content, outline) - # 保存完整报告 + # Relatório full_path = cls._get_report_markdown_path(report_id) with open(full_path, 'w', encoding='utf-8') as f: f.write(md_content) @@ -2300,18 +2605,18 @@ def assemble_full_report(cls, report_id: str, outline: ReportOutline) -> str: @classmethod def _post_process_report(cls, content: str, outline: ReportOutline) -> str: """ - 后处理报告内容 + RelatórioConteúdo - 1. 移除重复的标题 - 2. 保留报告主标题(#)和章节标题(##),移除其他级别的标题(###, ####等) - 3. 清理多余的空行和分隔线 + 1. + 2. Relatório(#)Seção(##)(###, ####) + 3. Args: - content: 原始报告内容 - outline: 报告大纲 + content: RelatórioConteúdo + outline: Outline do relatório Returns: - 处理后的内容 + Conteúdo """ import re @@ -2319,7 +2624,7 @@ def _post_process_report(cls, content: str, outline: ReportOutline) -> str: processed_lines = [] prev_was_heading = False - # 收集大纲中的所有章节标题 + # OutlineSeção section_titles = set() for section in outline.sections: section_titles.add(section.title) @@ -2329,14 +2634,13 @@ def _post_process_report(cls, content: str, outline: ReportOutline) -> str: line = lines[i] stripped = line.strip() - # 检查是否是标题行 heading_match = re.match(r'^(#{1,6})\s+(.+)$', stripped) if heading_match: level = len(heading_match.group(1)) title = heading_match.group(2).strip() - # 检查是否是重复标题(在连续5行内出现相同内容的标题) + # 5Conteúdo is_duplicate = False for j in range(max(0, len(processed_lines) - 5), len(processed_lines)): prev_line = processed_lines[j].strip() @@ -2348,43 +2652,39 @@ def _post_process_report(cls, content: str, outline: ReportOutline) -> str: break if is_duplicate: - # 跳过重复标题及其后的空行 i += 1 while i < len(lines) and lines[i].strip() == '': i += 1 continue - # 标题层级处理: - # - # (level=1) 只保留报告主标题 - # - ## (level=2) 保留章节标题 - # - ### 及以下 (level>=3) 转换为粗体文本 + # - # (level=1) Relatório + # - ## (level=2) Seção + # - ### (level>=3) if level == 1: if title == outline.title: - # 保留报告主标题 + # Relatório processed_lines.append(line) prev_was_heading = True elif title in section_titles: - # 章节标题错误使用了#,修正为## + # Seção### processed_lines.append(f"## {title}") prev_was_heading = True else: - # 其他一级标题转为粗体 processed_lines.append(f"**{title}**") processed_lines.append("") prev_was_heading = False elif level == 2: if title in section_titles or title == outline.title: - # 保留章节标题 + # Seção processed_lines.append(line) prev_was_heading = True else: - # 非章节的二级标题转为粗体 + # Seção processed_lines.append(f"**{title}**") processed_lines.append("") prev_was_heading = False else: - # ### 及以下级别的标题转换为粗体文本 processed_lines.append(f"**{title}**") processed_lines.append("") prev_was_heading = False @@ -2393,12 +2693,10 @@ def _post_process_report(cls, content: str, outline: ReportOutline) -> str: continue elif stripped == '---' and prev_was_heading: - # 跳过标题后紧跟的分隔线 i += 1 continue elif stripped == '' and prev_was_heading: - # 标题后只保留一个空行 if processed_lines and processed_lines[-1].strip() != '': processed_lines.append(line) prev_was_heading = False @@ -2409,7 +2707,6 @@ def _post_process_report(cls, content: str, outline: ReportOutline) -> str: i += 1 - # 清理连续的多个空行(保留最多2个) result_lines = [] empty_count = 0 for line in processed_lines: @@ -2425,18 +2722,18 @@ def _post_process_report(cls, content: str, outline: ReportOutline) -> str: @classmethod def save_report(cls, report: Report) -> None: - """保存报告元信息和完整报告""" + """RelatórioRelatório""" cls._ensure_report_folder(report.report_id) - # 保存元信息JSON + # JSON with open(cls._get_report_path(report.report_id), 'w', encoding='utf-8') as f: json.dump(report.to_dict(), f, ensure_ascii=False, indent=2) - # 保存大纲 + # Outline if report.outline: cls.save_outline(report.report_id, report.outline) - # 保存完整Markdown报告 + # MarkdownRelatório if report.markdown_content: with open(cls._get_report_markdown_path(report.report_id), 'w', encoding='utf-8') as f: f.write(report.markdown_content) @@ -2445,11 +2742,11 @@ def save_report(cls, report: Report) -> None: @classmethod def get_report(cls, report_id: str) -> Optional[Report]: - """获取报告""" + """Obter relatório""" path = cls._get_report_path(report_id) if not os.path.exists(path): - # 兼容旧格式:检查直接存储在reports目录下的文件 + # Compatível com formato antigoreports old_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json") if os.path.exists(old_path): path = old_path @@ -2459,7 +2756,7 @@ def get_report(cls, report_id: str) -> Optional[Report]: with open(path, 'r', encoding='utf-8') as f: data = json.load(f) - # 重建Report对象 + # Report outline = None if data.get('outline'): outline_data = data['outline'] @@ -2474,8 +2771,40 @@ def get_report(cls, report_id: str) -> Optional[Report]: summary=outline_data['summary'], sections=sections ) - - # 如果markdown_content为空,尝试从full_report.md读取 + else: + # Fallback: carregar outline.json quando report.json estiver desatualizado + outline_path = cls._get_outline_path(report_id) + if os.path.exists(outline_path): + try: + with open(outline_path, 'r', encoding='utf-8') as f: + outline_data = json.load(f) + sections = [] + for s in outline_data.get('sections', []): + sections.append(ReportSection( + title=s.get('title', ''), + content=s.get('content', '') + )) + outline = ReportOutline( + title=outline_data.get('title', 'Relatório de Previsão'), + summary=outline_data.get('summary', ''), + sections=sections + ) + except Exception: + outline = None + + # Hidratar conteúdo de seções a partir dos arquivos section_XX.md + # Isso evita tela vazia no primeiro carregamento enquanto report.json ainda não refletiu tudo. + if outline and outline.sections: + generated_sections = cls.get_generated_sections(report_id) + if generated_sections: + by_index = {s.get("section_index"): s.get("content", "") for s in generated_sections} + for i, section in enumerate(outline.sections, start=1): + if not (section.content or '').strip(): + content = by_index.get(i, "") + if content: + section.content = content + + # markdown_contentfull_report.md markdown_content = data.get('markdown_content', '') if not markdown_content: full_report_path = cls._get_report_markdown_path(report_id) @@ -2498,17 +2827,17 @@ def get_report(cls, report_id: str) -> Optional[Report]: @classmethod def get_report_by_simulation(cls, simulation_id: str) -> Optional[Report]: - """根据模拟ID获取报告""" + """ID da simulaçãoObter relatório""" cls._ensure_reports_dir() for item in os.listdir(cls.REPORTS_DIR): item_path = os.path.join(cls.REPORTS_DIR, item) - # 新格式:文件夹 + # Novo formato:Pasta if os.path.isdir(item_path): report = cls.get_report(item) if report and report.simulation_id == simulation_id: return report - # 兼容旧格式:JSON文件 + # Compatível com formato antigo:Arquivo JSON elif item.endswith('.json'): report_id = item[:-5] report = cls.get_report(report_id) @@ -2519,19 +2848,19 @@ def get_report_by_simulation(cls, simulation_id: str) -> Optional[Report]: @classmethod def list_reports(cls, simulation_id: Optional[str] = None, limit: int = 50) -> List[Report]: - """列出报告""" + """Listar relatórios""" cls._ensure_reports_dir() reports = [] for item in os.listdir(cls.REPORTS_DIR): item_path = os.path.join(cls.REPORTS_DIR, item) - # 新格式:文件夹 + # Novo formato:Pasta if os.path.isdir(item_path): report = cls.get_report(item) if report: if simulation_id is None or report.simulation_id == simulation_id: reports.append(report) - # 兼容旧格式:JSON文件 + # Compatível com formato antigo:Arquivo JSON elif item.endswith('.json'): report_id = item[:-5] report = cls.get_report(report_id) @@ -2539,25 +2868,24 @@ def list_reports(cls, simulation_id: Optional[str] = None, limit: int = 50) -> L if simulation_id is None or report.simulation_id == simulation_id: reports.append(report) - # 按创建时间倒序 reports.sort(key=lambda r: r.created_at, reverse=True) return reports[:limit] @classmethod def delete_report(cls, report_id: str) -> bool: - """删除报告(整个文件夹)""" + """RelatórioPasta""" import shutil folder_path = cls._get_report_folder(report_id) - # 新格式:删除整个文件夹 + # Novo formatoPasta if os.path.exists(folder_path) and os.path.isdir(folder_path): shutil.rmtree(folder_path) logger.info(t('report.reportFolderDeleted', reportId=report_id)) return True - # 兼容旧格式:删除单独的文件 + # Compatível com formato antigo deleted = False old_json_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.json") old_md_path = os.path.join(cls.REPORTS_DIR, f"{report_id}.md") diff --git a/backend/app/services/report_prompts_v2.py b/backend/app/services/report_prompts_v2.py new file mode 100644 index 0000000000..9da57d38d5 --- /dev/null +++ b/backend/app/services/report_prompts_v2.py @@ -0,0 +1,598 @@ +""" +AUGUR Report Prompts v2 — Pipeline reconstruído de trás pra frente. + +Substitui os prompts de report_agent.py para gerar JSON estruturado +no formato definido por report_schema.py. + +O loop ReAct (tool calls → data gathering) permanece inalterado. +O que muda: o "Final Answer" agora é JSON tipado, não texto livre. + +Uso: + from app.schemas.report_schema import AugurReportSchema + from app.services.report_prompts_v2 import ( + PLAN_SYSTEM_PROMPT_V2, + PLAN_USER_PROMPT_V2, + get_section_prompt, + SECTION_OUTPUT_SCHEMAS, + REACT_OBSERVATION_TEMPLATE_V2, + ) +""" + +import json + +# ============================================================ +# PLAN PROMPT — Gera o plano com 16 seções fixas +# ============================================================ +# Diferença da v1: não pede ao LLM para "inventar" seções. +# As 16 seções são FIXAS. O LLM apenas gera o título e +# as instruções de busca para cada seção. +# ============================================================ + +PLAN_SYSTEM_PROMPT_V2 = """\ +Você é o estrategista-chefe do AUGUR — plataforma de previsão de mercado por IA. +Seu relatório substitui uma consultoria de R$200.000. + +══════════════════════════════════════════════════════════════ +【MISSÃO】 +══════════════════════════════════════════════════════════════ + +Gerar o PLANO de um relatório de previsão com EXATAMENTE 16 seções. +As seções são FIXAS — você NÃO inventa seções novas. +Você gera o título específico e as instruções de busca para cada seção. + +══════════════════════════════════════════════════════════════ +【AS 16 SEÇÕES OBRIGATÓRIAS】 +══════════════════════════════════════════════════════════════ + +1. resumo_executivo — Veredicto GO/NO-GO/AJUSTAR + frase-chave + 5 KPIs +2. dashboard_kpis — Métricas quantitativas: ticket, margem, break-even, investimento +3. cenarios_futuros — EXATAMENTE 3 cenários com probabilidades somando 100% +4. cenarios_financeiros — Projeção de faturamento 24 meses por cenário +5. fatores_risco — 5-7 riscos com probabilidade e impacto +6. analise_emocional — 6 emoções com % + evolução temporal 24 meses +7. perfis_agentes — Cada agente com nome, perfil, posição e citação-chave +8. mapa_forcas — Blocos de poder com base de clientes quantificada +9. cronologia — 4 fases: Curiosidade/Teste/Virada/Disciplina +10. padroes_emergentes — 5-7 padrões surpreendentes +11. recomendacoes — 3-5 recomendações com STACK RANKING (#1 decide sobrevivência) +12. checklist_go — 8-10 condições mensuráveis para transformar AJUSTAR em GO +13. previsoes_confianca — 6-8 previsões com probabilidade ± margem de erro +14. posicionamento — Percebido vs desejado + 3 rótulos a evitar + posicionamento vencedor +15. roi_analise — Custo de errar vs custo de saber + ROI multiplicador +16. sintese_final — Radar 5 eixos + veredicto + 3 direcionamentos + +══════════════════════════════════════════════════════════════ +【OUTPUT — JSON OBRIGATÓRIO】 +══════════════════════════════════════════════════════════════ + +{ + "title": "Relatório de Previsão: [tema] — [VEREDICTO]", + "summary": "VEREDICTO: [GO/NO-GO/AJUSTAR]. [Uma frase de 25 palavras]", + "sections": [ + { + "key": "resumo_executivo", + "title": "Resumo Executivo", + "search_instructions": "Buscar: veredicto geral, KPIs principais, citações de decisão..." + }, + ... (16 seções, cada uma com key, title e search_instructions) + ] +} + +REGRAS: +- sections DEVE ter EXATAMENTE 16 elementos +- cada section DEVE ter "key" correspondendo à lista acima +- search_instructions guia o ReAct loop sobre O QUE buscar nas ferramentas +- 100% português do Brasil +- ZERO caracteres chineses +""" + + +PLAN_USER_PROMPT_V2 = """\ +【Cenário de previsão】 +Requisito da simulação: {simulation_requirement} + +【Escala do mundo simulado】 +- Entidades na simulação: {total_nodes} +- Relações entre entidades: {total_edges} +- Distribuição de tipos: {entity_types} +- Agentes ativos: {total_entities} + +【Amostra de fatos simulados】 +{related_facts_json} + +Gere o plano do relatório com as 16 seções obrigatórias. +Adapte os search_instructions ao contexto específico desta simulação. +""" + + +# ============================================================ +# SECTION PROMPTS — Um prompt por tipo de seção +# ============================================================ +# Cada seção tem: +# - Instruções específicas de O QUE buscar +# - O schema JSON EXATO que deve retornar no "Final Answer" +# - Exemplo de output para o LLM seguir +# ============================================================ + +# Schema de output por seção (o que o LLM deve retornar como JSON) +SECTION_OUTPUT_SCHEMAS = { + "resumo_executivo": { + "tipo": "GO | NO-GO | AJUSTAR", + "score_viabilidade": "0-100 (int)", + "frase_chave": "max 200 chars, a frase que resume tudo", + "resumo_executivo": "2-4 parágrafos narrativos com dados da simulação", + "leitura_para_decisao": "1 parágrafo: o que o dono deve fazer segunda-feira", + "top5_fatos": [{"titulo": "max 60 chars", "descricao": "1-2 frases com dados"}], + }, + "dashboard_kpis": { + "ticket_medio": "range em R$ (ex: R$160-240)", + "volume_breakeven": "quantidade/mês para break-even", + "margem_bruta_alvo": "range % esperado", + "capital_giro_necessario": "range R$ para operação", + "recompra_alvo": "% mínimo para sobrevivência", + "vendas_por_indicacao": "% esperado M12-24", + "erosao_margem_sazonal": "% de perda em datas fortes", + "breakeven_cenario1": "período (ex: M11-15)", + "contatos_mes_inicial": "range contatos/mês M1-3", + "conversao_inicial": "% de conversão M1-3", + "faturamento_maduro": "R$/mês quando estável", + "prob_sobrevivencia_24m": "% probabilidade", + "investimento_total_estimado": "R$ total necessário", + "composicao_investimento": [{"item": "nome", "valor": "R$ range"}], + "sinais_consolidacao": ["lista de sinais verdes"], + "sinais_alerta": ["lista de sinais amarelos"], + "sinais_risco_critico": ["lista de sinais vermelhos"], + }, + "cenarios_futuros": { + "cenarios": [ + { + "nome": "título descritivo do cenário", + "probabilidade": "int 0-100 (soma dos 3 = 100)", + "impacto_financeiro": "positivo moderado | negativo relevante | etc", + "breakeven": "período ou 'Não ocorre em 24 meses'", + "faturamento_m24": "R$/mês no mês 24", + "margem_bruta": "range %", + "recompra": "range %", + "risco_central": "principal ameaça deste cenário", + "capital_giro": "R$ necessário", + "descricao": "2-3 parágrafos com dados da simulação", + "citacao_agente": "a citação mais reveladora", + "projecao_faturamento_24m": "[lista de 25 floats: faturamento R$mil/mês, M0 a M24]" + } + ], + "ponto_bifurcacao": "o que separa os cenários (1-2 frases)", + }, + "cenarios_financeiros": "MESMOS DADOS de cenarios_futuros (esta seção usa os mesmos dados para renderizar o gráfico area chart e a tabela comparativa)", + "fatores_risco": { + "texto_introducao": "1-2 frases contextuais", + "riscos": [ + { + "numero": "int 1-7", + "titulo": "TEXTO COMPLETO, NUNCA truncar (max 80 chars)", + "probabilidade": "int 0-100", + "impacto": "Alto | Medio | Baixo", + "descricao": "2-3 frases com dados", + "citacao_agente": "a citação mais reveladora", + } + ], + }, + "analise_emocional": { + "emocoes": [ + {"nome": "Confiança|Ceticismo|Empolgação|Medo|FOMO|Indiferença", "percentual": "int 0-100"} + ], + "saldo_positivo_vs_negativo": "ex: 49% vs 36%", + "texto_confianca": "parágrafo sobre confiança", + "citacao_confianca": "citação", + "texto_ceticismo": "parágrafo", + "citacao_ceticismo": "citação", + "texto_empolgacao": "parágrafo", + "texto_medo": "parágrafo", + "evolucao_24m": { + "confianca": "[25 floats: intensidade % M0 a M24]", + "ceticismo": "[25 floats]", + "empolgacao": "[25 floats]", + "medo": "[25 floats]" + }, + }, + "perfis_agentes": [ + { + "nome": "nome do agente", + "descricao": "perfil demográfico em 1 linha", + "tipo": "Apoiador | Neutro | Resistente | Cauteloso", + "posicao_espectro": "float 0.0 (apoiador) a 1.0 (resistente)", + "citacao_chave": "a fala mais reveladora", + "papel_na_dinamica": "ex: Early adopter pragmático", + } + ], + "mapa_forcas": { + "blocos": [ + { + "nome": "ex: Bloco dominante: lider de mercado + base fidelizada", + "base_clientes": "ex: 3.000-4.500 clientes ativos", + "descricao": "2-3 frases", + "poder_relativo": "int 1-10", + "citacao": "citação ou null", + } + ], + "hierarquia_poder": "ranking textual: 1. X (motivo). 2. Y (motivo). ...", + "coalizao_entrante": "quem pode ser aliado do entrante", + }, + "cronologia": { + "fases": [ + { + "nome": "Curiosidade | Teste | Virada | Disciplina", + "periodo": "ex: M0-3", + "mes_inicio": "int", + "mes_fim": "int", + "descricao": "2-3 frases", + "citacao": "citação do agente", + "marcos": ["lista de marcos importantes nesta fase"], + } + ], + }, + "padroes_emergentes": [ + {"numero": "int", "titulo": "título do padrão", "descricao": "2-3 frases"} + ], + "recomendacoes": [ + { + "rank": "int 1-5 (#1 = mais importante)", + "titulo": "ex: #1 Acao prioritaria que decide viabilidade do negocio", + "descricao": "2-3 frases", + "citacao": "citação de suporte ou null", + "impacto_relativo": "int 0-100 (para barra de stack ranking)", + } + ], + "checklist_go": [ + { + "titulo": "condição mensurável", + "timing": "Pré-lançamento | Mês 1 | M1-3 | M6-12 | Permanente", + "justificativa": "por que isso é necessário", + "condicao_mensuravel": "como medir se foi atingido", + "prioridade": "Urgente | Alta | Media | Baixa", + } + ], + "previsoes_confianca": [ + { + "periodo": "ex: M1-3", + "titulo": "descrição da previsão", + "probabilidade": "int 0-100", + "margem_erro": "int (± p.p.)", + "descricao": "métricas concretas", + } + ], + "posicionamento": { + "percebido_descricao": "como o mercado vai ler inicialmente", + "percebido_citacao": "citação", + "desejado_descricao": "como deveria ser lido", + "desejado_citacao": "citação", + "rotulos_a_evitar": ["lista de 3 rótulos perigosos"], + "posicionamento_vencedor": "ex: Lá eles resolvem.", + "players": [ + {"nome": "player", "x": "int 0-100 (preço)", "y": "int 0-100 (funcional→aspiracional)", "papel": "papel"} + ], + }, + "roi_analise": { + "riscos_evitados": [ + {"titulo": "nome do risco", "valor_risco": "R$ range", "solucao": "como AUGUR evitou"} + ], + "custo_analise": "R$ range", + "risco_total_evitado": "R$ range", + "roi_multiplicador": "ex: 30-75x", + "citacoes": ["2-3 citações de valor percebido"], + }, + "sintese_final": { + "scores": { + "viabilidade_financeira": "int 0-100", + "demanda": "int 0-100", + "timing": "int 0-100", + "risco_operacional": "int 0-100", + "competitividade": "int 0-100", + }, + "veredicto_final": "GO | NO-GO | AJUSTAR", + "cenario_mais_provavel": "resumo do cenário #1", + "risco_principal": "resumo do risco #1", + "direcionamento": ["3 ações prioritárias"], + "sinais_consolidacao": ["sinais verdes"], + "sinais_alerta": ["sinais amarelos"], + "sinais_risco": ["sinais vermelhos"], + }, +} + + +def get_section_system_prompt( + section_key: str, + report_title: str, + report_summary: str, + simulation_requirement: str, + tools_description: str, + completed_sections_text: str = "", +) -> str: + """ + Gera o system prompt para uma seção específica. + + O prompt mantém o loop ReAct (tool calls → data gathering) da v1, + mas muda o formato do "Final Answer" para JSON estruturado. + """ + + schema = SECTION_OUTPUT_SCHEMAS.get(section_key, {}) + schema_json = json.dumps(schema, ensure_ascii=False, indent=2) + + # Seções que compartilham dados + shared_note = "" + if section_key == "cenarios_financeiros": + shared_note = """ +NOTA: Esta seção usa os MESMOS dados da seção "cenarios_futuros". +Se cenarios_futuros já foi gerada, use os mesmos dados. +O que muda é a RENDERIZAÇÃO: esta seção gera o gráfico area chart e a tabela comparativa. +Retorne o mesmo JSON de cenarios_futuros.""" + + return f"""\ +Você é um especialista do AUGUR escrevendo a seção "{section_key}" do relatório. + +Título do relatório: {report_title} +Resumo: {report_summary} +Cenário simulado: {simulation_requirement} + +══════════════════════════════════════════════════════════════ +【MISSÃO】 +══════════════════════════════════════════════════════════════ + +Use as ferramentas para observar o mundo simulado (mínimo 3 chamadas). +Depois, gere o "Final Answer:" como um JSON ESTRUTURADO no formato abaixo. + +{shared_note} + +══════════════════════════════════════════════════════════════ +【SCHEMA JSON OBRIGATÓRIO PARA ESTA SEÇÃO】 +══════════════════════════════════════════════════════════════ + +{schema_json} + +══════════════════════════════════════════════════════════════ +【REGRAS CRÍTICAS】 +══════════════════════════════════════════════════════════════ + +1. USAR FERRAMENTAS: Mínimo 3 chamadas antes do Final Answer +2. FORMATO: Final Answer DEVE ser JSON válido no schema acima +3. DADOS REAIS: Cada número, %, citação DEVE vir da simulação +4. CITAÇÕES: Traduzir para PT-BR se necessário +5. NUNCA TRUNCAR: Títulos e textos SEMPRE completos +6. ZERO CHINÊS: Nenhum caractere chinês no output +7. PT-BR: 100% português do Brasil + +══════════════════════════════════════════════════════════════ +【FERRAMENTAS DISPONÍVEIS】(chamar 3-5 vezes) +══════════════════════════════════════════════════════════════ + +{tools_description} + +- insight_forge: Análise profunda multidimensional +- panorama_search: Visão panorâmica de eventos e timeline +- quick_search: Verificação rápida de ponto específico +- interview_agents: Entrevistar agentes para citações diretas + +══════════════════════════════════════════════════════════════ +【FLUXO DE TRABALHO】 +══════════════════════════════════════════════════════════════ + +Opção A — Chamar ferramenta: + +{{"name": "nome_ferramenta", "parameters": {{"param": "valor"}}}} + + +Opção B — Gerar Final Answer (JSON): +Final Answer: +{{ + ... JSON no schema acima ... +}} + +⚠️ PROIBIDO: ferramenta E Final Answer na mesma resposta. + +══════════════════════════════════════════════════════════════ +【SEÇÕES JÁ CONCLUÍDAS — EVITE REPETIÇÃO】 +══════════════════════════════════════════════════════════════ + +{completed_sections_text if completed_sections_text else "(nenhuma seção concluída ainda)"} +""" + + +# ============================================================ +# REACT TEMPLATES (mantém compatibilidade com o loop existente) +# ============================================================ + +REACT_OBSERVATION_TEMPLATE_V2 = """\ +Observação (resultado da busca): + +═══ Ferramenta {tool_name} retornou ═══ +{result} + +═══════════════════════════════════════════════════════════════ +Ferramentas chamadas: {tool_calls_count}/{max_tool_calls} (usadas: {used_tools_str}){unused_hint} +- Se informação suficiente: "Final Answer:" + JSON no schema da seção +- Se precisa mais dados: chame outra ferramenta +LEMBRE: o Final Answer DEVE ser JSON válido, não texto livre. +═══════════════════════════════════════════════════════════════""" + +REACT_INSUFFICIENT_TOOLS_MSG_V2 = ( + "【ATENÇÃO】Ferramentas chamadas apenas {tool_calls_count} vezes — mínimo: {min_tool_calls}. " + "Chame mais ferramentas antes do Final Answer.{unused_hint}" +) + +REACT_TOOL_LIMIT_MSG_V2 = ( + "Limite de chamadas atingido ({tool_calls_count}/{max_tool_calls}). " + 'Escreva agora o Final Answer: com o JSON estruturado da seção.' +) + +REACT_FORCE_FINAL_MSG_V2 = ( + "Limite atingido. Escreva Final Answer: com JSON da seção AGORA." +) + + +# ============================================================ +# PARSE DO FINAL ANSWER — Extrai JSON do output do LLM +# ============================================================ + +def parse_section_json(raw_response: str, section_key: str) -> dict: + """ + Extrai o JSON estruturado do Final Answer do LLM. + + O LLM pode retornar: + - "Final Answer: { ... }" (ideal) + - "Final Answer:\n```json\n{ ... }\n```" (com markdown) + - "Final Answer:\n{ ... }\n\nAlgum texto extra" (com lixo) + + Esta função extrai o JSON em todos os casos. + """ + import re + + # Extrair tudo após "Final Answer:" + if "Final Answer:" not in raw_response: + # Fallback: tentar encontrar JSON direto + json_match = re.search(r'\{[\s\S]*\}', raw_response) + if json_match: + try: + return json.loads(json_match.group()) + except json.JSONDecodeError: + pass + return {"_error": "No Final Answer found", "_raw": raw_response[:500]} + + content = raw_response.split("Final Answer:")[-1].strip() + + # Remover markdown code fences + content = re.sub(r'^```json\s*', '', content) + content = re.sub(r'^```\s*', '', content) + content = re.sub(r'\s*```$', '', content) + + # Encontrar o JSON (primeiro { até último }) + # Para arrays, primeiro [ até último ] + json_match = re.search(r'(\{[\s\S]*\}|\[[\s\S]*\])', content) + if not json_match: + return {"_error": "No JSON found in Final Answer", "_raw": content[:500]} + + json_str = json_match.group() + + try: + return json.loads(json_str) + except json.JSONDecodeError as e: + # Tentar fixes comuns + # 1. Trailing commas + json_str = re.sub(r',\s*([}\]])', r'\1', json_str) + # 2. Single quotes + # (dangerous but sometimes needed) + try: + return json.loads(json_str) + except json.JSONDecodeError: + return {"_error": f"JSON parse error: {str(e)}", "_raw": json_str[:500]} + + +# ============================================================ +# ASSEMBLER — Monta o AugurReportSchema a partir das seções +# ============================================================ + +def assemble_report(sections: dict[str, dict], meta: dict) -> dict: + """ + Monta o JSON completo do relatório a partir das seções individuais. + + Args: + sections: {"resumo_executivo": {...}, "dashboard_kpis": {...}, ...} + meta: {"projeto": "...", "nicho": "...", ...} + + Returns: + JSON no formato AugurReportSchema + """ + report = { + "meta": meta, + "veredicto": sections.get("resumo_executivo", {}), + "dashboard": sections.get("dashboard_kpis", {}), + "cenarios": sections.get("cenarios_futuros", {}), + "riscos": sections.get("fatores_risco", {}), + "emocional": sections.get("analise_emocional", {}), + "agentes": sections.get("perfis_agentes", []), + "forcas": sections.get("mapa_forcas", {}), + "cronologia": sections.get("cronologia", {}), + "padroes": sections.get("padroes_emergentes", []), + "recomendacoes": sections.get("recomendacoes", []), + "checklist": sections.get("checklist_go", []), + "previsoes": sections.get("previsoes_confianca", []), + "posicionamento": sections.get("posicionamento", {}), + "roi": sections.get("roi_analise", {}), + "sintese": sections.get("sintese_final", {}), + } + return report + + +# ============================================================ +# SECTION KEYS — Ordem de geração +# ============================================================ + +SECTION_KEYS_ORDERED = [ + "resumo_executivo", + "dashboard_kpis", + "cenarios_futuros", + "cenarios_financeiros", # usa dados de cenarios_futuros + "fatores_risco", + "analise_emocional", + "perfis_agentes", + "mapa_forcas", + "cronologia", + "padroes_emergentes", + "recomendacoes", + "checklist_go", + "previsoes_confianca", + "posicionamento", + "roi_analise", + "sintese_final", +] + +# Seções que NÃO precisam de ReAct loop (usam dados de outras seções) +SECTIONS_NO_REACT = {"cenarios_financeiros"} +# cenarios_financeiros reutiliza os dados de cenarios_futuros + + +# ============================================================ +# MIGRATION GUIDE — Como integrar no report_agent.py existente +# ============================================================ +# +# 1. No __init__.py ou no topo do report_agent.py: +# from app.schemas.report_schema import AugurReportSchema, validar_report_json +# from app.services.report_prompts_v2 import ( +# PLAN_SYSTEM_PROMPT_V2, PLAN_USER_PROMPT_V2, +# get_section_system_prompt, parse_section_json, +# assemble_report, SECTION_KEYS_ORDERED, SECTIONS_NO_REACT, +# REACT_OBSERVATION_TEMPLATE_V2, REACT_TOOL_LIMIT_MSG_V2, +# REACT_INSUFFICIENT_TOOLS_MSG_V2, REACT_FORCE_FINAL_MSG_V2, +# ) +# +# 2. Em _generate_plan(): +# ANTES: system_prompt = f"{PLAN_SYSTEM_PROMPT}\n\n{get_language_instruction()}" +# DEPOIS: system_prompt = f"{PLAN_SYSTEM_PROMPT_V2}\n\n{get_language_instruction()}" +# +# 3. Em _generate_section_react(): +# ANTES: system_prompt = SECTION_SYSTEM_PROMPT_TEMPLATE.format(...) +# DEPOIS: system_prompt = get_section_system_prompt(section_key, ...) +# +# 4. Após "Final Answer:": +# ANTES: final_answer = response.split("Final Answer:")[-1].strip() +# section["content"] = final_answer # texto livre +# DEPOIS: section_json = parse_section_json(response, section_key) +# section["data"] = section_json # JSON estruturado +# section["content"] = section_json # compatibilidade +# +# 5. Em generate_report(), após todas as seções: +# ANTES: return {"title": title, "sections": sections} +# DEPOIS: +# section_data = {s["key"]: s["data"] for s in sections} +# report_json = assemble_report(section_data, meta) +# errors = validar_report_json(report_json) +# if errors: +# logger.warning(f"Report validation errors: {errors}") +# return {"title": title, "sections": sections, "structured": report_json} +# +# 6. No pdf_generator.py: +# ANTES: parse texto com regex para extrair dados +# DEPOIS: data = report["structured"] +# cenarios = data["cenarios"]["cenarios"] +# riscos = data["riscos"]["riscos"] +# ... renderizar diretamente +# +# ============================================================ diff --git a/backend/app/services/simulation_config_generator.py b/backend/app/services/simulation_config_generator.py index cb77f6b6cd..6f30dd9ea4 100644 --- a/backend/app/services/simulation_config_generator.py +++ b/backend/app/services/simulation_config_generator.py @@ -1,13 +1,12 @@ """ -模拟配置智能生成器 -使用LLM根据模拟需求、文档内容、图谱信息自动生成细致的模拟参数 -实现全程自动化,无需人工设置参数 - -采用分步生成策略,避免一次性生成过长内容导致失败: -1. 生成时间配置 -2. 生成事件配置 -3. 分批生成Agent配置 -4. 生成平台配置 +Configuração da simulaçãoGerar +LLMSimulaçãoConteúdoGrafoGerarSimulação + +GerarGerarConteúdoFalhou +1. GerarConfiguração +2. GerarConfiguração +3. GerarAgentConfiguração +4. GerarConfiguração """ import json @@ -25,156 +24,138 @@ logger = get_logger('mirofish.simulation_config') -# 中国作息时间配置(北京时间) +# Configuração CHINA_TIMEZONE_CONFIG = { - # 深夜时段(几乎无人活动) "dead_hours": [0, 1, 2, 3, 4, 5], - # 早间时段(逐渐醒来) "morning_hours": [6, 7, 8], - # 工作时段 "work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - # 晚间高峰(最活跃) "peak_hours": [19, 20, 21, 22], - # 夜间时段(活跃度下降) "night_hours": [23], - # 活跃度系数 "activity_multipliers": { - "dead": 0.05, # 凌晨几乎无人 - "morning": 0.4, # 早间逐渐活跃 - "work": 0.7, # 工作时段中等 - "peak": 1.5, # 晚间高峰 - "night": 0.5 # 深夜下降 + "dead": 0.05, # + "morning": 0.4, # + "work": 0.7, # + "peak": 1.5, # + "night": 0.5 # } } @dataclass class AgentActivityConfig: - """单个Agent的活动配置""" + """AgentConfiguração""" agent_id: int entity_uuid: str entity_name: str entity_type: str - # 活跃度配置 (0.0-1.0) - activity_level: float = 0.5 # 整体活跃度 + # Configuração (0.0-1.0) + activity_level: float = 0.5 # - # 发言频率(每小时预期发言次数) posts_per_hour: float = 1.0 comments_per_hour: float = 2.0 - # 活跃时间段(24小时制,0-23) + # 240-23 active_hours: List[int] = field(default_factory=lambda: list(range(8, 23))) - # 响应速度(对热点事件的反应延迟,单位:模拟分钟) + # Simulação response_delay_min: int = 5 response_delay_max: int = 60 - # 情感倾向 (-1.0到1.0,负面到正面) + # (-1.01.0) sentiment_bias: float = 0.0 - # 立场(对特定话题的态度) stance: str = "neutral" # supportive, opposing, neutral, observer - # 影响力权重(决定其发言被其他Agent看到的概率) + # Agent influence_weight: float = 1.0 @dataclass class TimeSimulationConfig: - """时间模拟配置(基于中国人作息习惯)""" - # 模拟总时长(模拟小时数) - total_simulation_hours: int = 72 # 默认模拟72小时(3天) + """Configuração da simulação""" + # SimulaçãoSimulação + total_simulation_hours: int = 72 # Simulação723 - # 每轮代表的时间(模拟分钟)- 默认60分钟(1小时),加快时间流速 + # Simulação- 601 minutes_per_round: int = 60 - # 每小时激活的Agent数量范围 + # Agent agents_per_hour_min: int = 5 agents_per_hour_max: int = 20 - # 高峰时段(晚间19-22点,中国人最活跃的时间) + # 19-22 peak_hours: List[int] = field(default_factory=lambda: [19, 20, 21, 22]) peak_activity_multiplier: float = 1.5 - # 低谷时段(凌晨0-5点,几乎无人活动) off_peak_hours: List[int] = field(default_factory=lambda: [0, 1, 2, 3, 4, 5]) - off_peak_activity_multiplier: float = 0.05 # 凌晨活跃度极低 + off_peak_activity_multiplier: float = 0.05 # - # 早间时段 morning_hours: List[int] = field(default_factory=lambda: [6, 7, 8]) morning_activity_multiplier: float = 0.4 - # 工作时段 work_hours: List[int] = field(default_factory=lambda: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18]) work_activity_multiplier: float = 0.7 @dataclass class EventConfig: - """事件配置""" - # 初始事件(模拟开始时的触发事件) + """Configuração""" + # Simulação initial_posts: List[Dict[str, Any]] = field(default_factory=list) - # 定时事件(在特定时间触发的事件) scheduled_events: List[Dict[str, Any]] = field(default_factory=list) - # 热点话题关键词 hot_topics: List[str] = field(default_factory=list) - # 舆论引导方向 narrative_direction: str = "" @dataclass class PlatformConfig: - """平台特定配置""" + """Configuração""" platform: str # twitter or reddit - # 推荐算法权重 - recency_weight: float = 0.4 # 时间新鲜度 - popularity_weight: float = 0.3 # 热度 - relevance_weight: float = 0.3 # 相关性 + recency_weight: float = 0.4 # + popularity_weight: float = 0.3 # + relevance_weight: float = 0.3 # - # 病毒传播阈值(达到多少互动后触发扩散) viral_threshold: int = 10 - # 回声室效应强度(相似观点聚集程度) echo_chamber_strength: float = 0.5 @dataclass class SimulationParameters: - """完整的模拟参数配置""" - # 基础信息 + """SimulaçãoConfiguração""" simulation_id: str project_id: str graph_id: str simulation_requirement: str - # 时间配置 + # Configuração time_config: TimeSimulationConfig = field(default_factory=TimeSimulationConfig) - # Agent配置列表 + # AgentConfiguração agent_configs: List[AgentActivityConfig] = field(default_factory=list) - # 事件配置 + # Configuração event_config: EventConfig = field(default_factory=EventConfig) - # 平台配置 + # Configuração twitter_config: Optional[PlatformConfig] = None reddit_config: Optional[PlatformConfig] = None - # LLM配置 + # LLMConfiguração llm_model: str = "" llm_base_url: str = "" - # 生成元数据 + # Gerar generated_at: str = field(default_factory=lambda: datetime.now().isoformat()) - generation_reasoning: str = "" # LLM的推理说明 + generation_reasoning: str = "" # LLM def to_dict(self) -> Dict[str, Any]: - """转换为字典""" + """""" time_dict = asdict(self.time_config) return { "simulation_id": self.simulation_id, @@ -193,34 +174,32 @@ def to_dict(self) -> Dict[str, Any]: } def to_json(self, indent: int = 2) -> str: - """转换为JSON字符串""" + """JSON""" return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent) class SimulationConfigGenerator: """ - 模拟配置智能生成器 + Configuração da simulaçãoGerar - 使用LLM分析模拟需求、文档内容、图谱实体信息, - 自动生成最佳的模拟参数配置 + LLMAnáliseSimulaçãoConteúdoGrafoEntidade + GerarSimulaçãoConfiguração - 采用分步生成策略: - 1. 生成时间配置和事件配置(轻量级) - 2. 分批生成Agent配置(每批10-20个) - 3. 生成平台配置 + Gerar + 1. GerarConfiguraçãoConfiguração + 2. GerarAgentConfiguração10-20 + 3. GerarConfiguração """ - # 上下文最大字符数 MAX_CONTEXT_LENGTH = 50000 - # 每批生成的Agent数量 + # GerarAgent AGENTS_PER_BATCH = 15 - # 各步骤的上下文截断长度(字符数) - TIME_CONFIG_CONTEXT_LENGTH = 10000 # 时间配置 - EVENT_CONFIG_CONTEXT_LENGTH = 8000 # 事件配置 - ENTITY_SUMMARY_LENGTH = 300 # 实体摘要 - AGENT_SUMMARY_LENGTH = 300 # Agent配置中的实体摘要 - ENTITIES_PER_TYPE_DISPLAY = 20 # 每类实体显示数量 + TIME_CONFIG_CONTEXT_LENGTH = 10000 # Configuração + EVENT_CONFIG_CONTEXT_LENGTH = 8000 # Configuração + ENTITY_SUMMARY_LENGTH = 300 # Entidade + AGENT_SUMMARY_LENGTH = 300 # AgentConfiguraçãoEntidade + ENTITIES_PER_TYPE_DISPLAY = 20 # Entidade def __init__( self, @@ -233,7 +212,7 @@ def __init__( self.model_name = model_name or Config.LLM_MODEL_NAME if not self.api_key: - raise ValueError("LLM_API_KEY 未配置") + raise ValueError("LLM_API_KEY not configured") self.client = OpenAI( api_key=self.api_key, @@ -253,27 +232,26 @@ def generate_config( progress_callback: Optional[Callable[[int, int, str], None]] = None, ) -> SimulationParameters: """ - 智能生成完整的模拟配置(分步生成) + GerarConfiguração da simulaçãoGerar Args: - simulation_id: 模拟ID - project_id: 项目ID - graph_id: 图谱ID - simulation_requirement: 模拟需求描述 - document_text: 原始文档内容 - entities: 过滤后的实体列表 - enable_twitter: 是否启用Twitter - enable_reddit: 是否启用Reddit - progress_callback: 进度回调函数(current_step, total_steps, message) + simulation_id: ID da simulação + project_id: ID + graph_id: ID do grafo + simulation_requirement: Descrição dos requisitos da simulação + document_text: Conteúdo + entities: Entidade + enable_twitter: Twitter + enable_reddit: Reddit + progress_callback: Função callback de progresso(current_step, total_steps, message) Returns: - SimulationParameters: 完整的模拟参数 + SimulationParameters: Simulação """ - logger.info(f"开始智能生成模拟配置: simulation_id={simulation_id}, 实体数={len(entities)}") + logger.info(f"Starting smart simulation config generation: simulation_id={simulation_id}, entities={len(entities)}") - # 计算总步骤数 num_batches = math.ceil(len(entities) / self.AGENTS_PER_BATCH) - total_steps = 3 + num_batches # 时间配置 + 事件配置 + N批Agent + 平台配置 + total_steps = 3 + num_batches # Configuração + Configuração + NAgent + Configuração current_step = 0 def report_progress(step: int, message: str): @@ -283,7 +261,6 @@ def report_progress(step: int, message: str): progress_callback(step, total_steps, message) logger.info(f"[{step}/{total_steps}] {message}") - # 1. 构建基础上下文信息 context = self._build_context( simulation_requirement=simulation_requirement, document_text=document_text, @@ -292,20 +269,20 @@ def report_progress(step: int, message: str): reasoning_parts = [] - # ========== 步骤1: 生成时间配置 ========== + # ========== 1: GerarConfiguração ========== report_progress(1, t('progress.generatingTimeConfig')) num_entities = len(entities) time_config_result = self._generate_time_config(context, num_entities) time_config = self._parse_time_config(time_config_result, num_entities) reasoning_parts.append(f"{t('progress.timeConfigLabel')}: {time_config_result.get('reasoning', t('common.success'))}") - # ========== 步骤2: 生成事件配置 ========== + # ========== 2: GerarConfiguração ========== report_progress(2, t('progress.generatingEventConfig')) event_config_result = self._generate_event_config(context, simulation_requirement, entities) event_config = self._parse_event_config(event_config_result) reasoning_parts.append(f"{t('progress.eventConfigLabel')}: {event_config_result.get('reasoning', t('common.success'))}") - # ========== 步骤3-N: 分批生成Agent配置 ========== + # ========== 3-N: GerarAgentConfiguração ========== all_agent_configs = [] for batch_idx in range(num_batches): start_idx = batch_idx * self.AGENTS_PER_BATCH @@ -327,13 +304,13 @@ def report_progress(step: int, message: str): reasoning_parts.append(t('progress.agentConfigResult', count=len(all_agent_configs))) - # ========== 为初始帖子分配发布者 Agent ========== - logger.info("为初始帖子分配合适的发布者 Agent...") + # ========== Agent ========== + logger.info("Assigning suitable publisher agents to initial posts...") event_config = self._assign_initial_post_agents(event_config, all_agent_configs) assigned_count = len([p for p in event_config.initial_posts if p.get("poster_agent_id") is not None]) reasoning_parts.append(t('progress.postAssignResult', count=assigned_count)) - # ========== 最后一步: 生成平台配置 ========== + # ========== : GerarConfiguração ========== report_progress(total_steps, t('progress.generatingPlatformConfig')) twitter_config = None reddit_config = None @@ -358,7 +335,6 @@ def report_progress(step: int, message: str): echo_chamber_strength=0.6 ) - # 构建最终参数 params = SimulationParameters( simulation_id=simulation_id, project_id=project_id, @@ -374,7 +350,7 @@ def report_progress(step: int, message: str): generation_reasoning=" | ".join(reasoning_parts) ) - logger.info(f"模拟配置生成完成: {len(params.agent_configs)} 个Agent配置") + logger.info(f"Simulation config generation complete: {len(params.agent_configs)} agent configs") return params @@ -384,33 +360,31 @@ def _build_context( document_text: str, entities: List[EntityNode] ) -> str: - """构建LLM上下文,截断到最大长度""" + """LLM""" - # 实体摘要 + # Entidade entity_summary = self._summarize_entities(entities) - # 构建上下文 context_parts = [ - f"## 模拟需求\n{simulation_requirement}", - f"\n## 实体信息 ({len(entities)}个)\n{entity_summary}", + f"## Simulation Requirement\n{simulation_requirement}", + f"\n## Entity Information ({len(entities)} entities)\n{entity_summary}", ] current_length = sum(len(p) for p in context_parts) - remaining_length = self.MAX_CONTEXT_LENGTH - current_length - 500 # 留500字符余量 + remaining_length = self.MAX_CONTEXT_LENGTH - current_length - 500 # 500 if remaining_length > 0 and document_text: doc_text = document_text[:remaining_length] if len(document_text) > remaining_length: - doc_text += "\n...(文档已截断)" - context_parts.append(f"\n## 原始文档内容\n{doc_text}") + doc_text += "\n...(document truncated)" + context_parts.append(f"\n## Original Document Content\n{doc_text}") return "\n".join(context_parts) def _summarize_entities(self, entities: List[EntityNode]) -> str: - """生成实体摘要""" + """GerarEntidade""" lines = [] - # 按类型分组 by_type: Dict[str, List[EntityNode]] = {} for e in entities: t = e.get_entity_type() or "Unknown" @@ -419,20 +393,20 @@ def _summarize_entities(self, entities: List[EntityNode]) -> str: by_type[t].append(e) for entity_type, type_entities in by_type.items(): - lines.append(f"\n### {entity_type} ({len(type_entities)}个)") - # 使用配置的显示数量和摘要长度 + lines.append(f"\n### {entity_type} ({len(type_entities)} entities)") + # Configuração display_count = self.ENTITIES_PER_TYPE_DISPLAY summary_len = self.ENTITY_SUMMARY_LENGTH for e in type_entities[:display_count]: summary_preview = (e.summary[:summary_len] + "...") if len(e.summary) > summary_len else e.summary lines.append(f"- {e.name}: {summary_preview}") if len(type_entities) > display_count: - lines.append(f" ... 还有 {len(type_entities) - display_count} 个") + lines.append(f" ... and {len(type_entities) - display_count} more") return "\n".join(lines) def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any]: - """带重试的LLM调用,包含JSON修复逻辑""" + """LLMJSON""" import re max_attempts = 3 @@ -447,25 +421,24 @@ def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any {"role": "user", "content": prompt} ], response_format={"type": "json_object"}, - temperature=0.7 - (attempt * 0.1) # 每次重试降低温度 - # 不设置max_tokens,让LLM自由发挥 + temperature=0.7 - (attempt * 0.1) # + # max_tokensLLM ) content = response.choices[0].message.content finish_reason = response.choices[0].finish_reason - # 检查是否被截断 if finish_reason == 'length': - logger.warning(f"LLM输出被截断 (attempt {attempt+1})") + logger.warning(f"LLM output truncated (attempt {attempt+1})") content = self._fix_truncated_json(content) - # 尝试解析JSON + # JSON try: return json.loads(content) except json.JSONDecodeError as e: - logger.warning(f"JSON解析失败 (attempt {attempt+1}): {str(e)[:80]}") + logger.warning(f"JSON parse failed (attempt {attempt+1}): {str(e)[:80]}") - # 尝试修复JSON + # JSON fixed = self._try_fix_config_json(content) if fixed: return fixed @@ -473,44 +446,40 @@ def _call_llm_with_retry(self, prompt: str, system_prompt: str) -> Dict[str, Any last_error = e except Exception as e: - logger.warning(f"LLM调用失败 (attempt {attempt+1}): {str(e)[:80]}") + logger.warning(f"LLM call failed (attempt {attempt+1}): {str(e)[:80]}") last_error = e import time time.sleep(2 * (attempt + 1)) - raise last_error or Exception("LLM调用失败") + raise last_error or Exception("LLM call failed") def _fix_truncated_json(self, content: str) -> str: - """修复被截断的JSON""" + """JSON""" content = content.strip() - # 计算未闭合的括号 open_braces = content.count('{') - content.count('}') open_brackets = content.count('[') - content.count(']') - # 检查是否有未闭合的字符串 if content and content[-1] not in '",}]': content += '"' - # 闭合括号 content += ']' * open_brackets content += '}' * open_braces return content def _try_fix_config_json(self, content: str) -> Optional[Dict[str, Any]]: - """尝试修复配置JSON""" + """ConfiguraçãoJSON""" import re - # 修复被截断的情况 + # Caso content = self._fix_truncated_json(content) - # 提取JSON部分 + # JSON json_match = re.search(r'\{[\s\S]*\}', content) if json_match: json_str = json_match.group() - # 移除字符串中的换行符 def fix_string(match): s = match.group(0) s = s.replace('\n', ' ').replace('\r', ' ') @@ -522,7 +491,6 @@ def fix_string(match): try: return json.loads(json_str) except: - # 尝试移除所有控制字符 json_str = re.sub(r'[\x00-\x1f\x7f-\x9f]', ' ', json_str) json_str = re.sub(r'\s+', ' ', json_str) try: @@ -533,35 +501,35 @@ def fix_string(match): return None def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, Any]: - """生成时间配置""" - # 使用配置的上下文截断长度 + """GerarConfiguração""" + # Configuração context_truncated = context[:self.TIME_CONFIG_CONTEXT_LENGTH] - # 计算最大允许值(80%的agent数) + # 80%agent max_agents_allowed = max(1, int(num_entities * 0.9)) - prompt = f"""基于以下模拟需求,生成时间模拟配置。 + prompt = f"""Based on the following simulation requirements, generate the time simulation configuration. {context_truncated} -## 任务 -请生成时间配置JSON。 +## Task +Generate a time configuration JSON. -### 基本原则(仅供参考,需根据具体事件和参与群体灵活调整): -- 请根据模拟场景推断目标用户群体所在时区和作息习惯,以下为东八区(UTC+8)的参考示例 -- 凌晨0-5点几乎无人活动(活跃度系数0.05) -- 早上6-8点逐渐活跃(活跃度系数0.4) -- 工作时间9-18点中等活跃(活跃度系数0.7) -- 晚间19-22点是高峰期(活跃度系数1.5) -- 23点后活跃度下降(活跃度系数0.5) -- 一般规律:凌晨低活跃、早间渐增、工作时段中等、晚间高峰 -- **重要**:以下示例值仅供参考,你需要根据事件性质、参与群体特点来调整具体时段 - - 例如:学生群体高峰可能是21-23点;媒体全天活跃;官方机构只在工作时间 - - 例如:突发热点可能导致深夜也有讨论,off_peak_hours 可适当缩短 +### Basic Principles (for reference only, adjust based on specific events and target audience): +- Infer the target audience's timezone and daily routine based on the simulation scenario. Below is a reference example for UTC-3 (Brazil): +- Midnight 0-5am: Almost no activity (activity coefficient 0.05) +- Morning 6-8am: Gradually increasing activity (activity coefficient 0.4) +- Work hours 9am-6pm: Medium activity (activity coefficient 0.7) +- Evening 7-10pm: Peak period (activity coefficient 1.5) +- After 11pm: Activity decreases (activity coefficient 0.5) +- General pattern: low at dawn, increasing in morning, moderate during work, peak in evening +- **Important**: The example values above are for reference only. Adjust based on event nature and audience characteristics. + - Example: Student groups may peak at 9-11pm; media is active all day; official agencies only during work hours + - Example: Breaking news may cause late-night discussion, off_peak_hours can be shortened -### 返回JSON格式(不要markdown) +### Return JSON format (no markdown) -示例: +Example: {{ "total_simulation_hours": 72, "minutes_per_round": 60, @@ -571,71 +539,70 @@ def _generate_time_config(self, context: str, num_entities: int) -> Dict[str, An "off_peak_hours": [0, 1, 2, 3, 4, 5], "morning_hours": [6, 7, 8], "work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - "reasoning": "针对该事件的时间配置说明" + "reasoning": "Time configuration explanation for this event" }} -字段说明: -- total_simulation_hours (int): 模拟总时长,24-168小时,突发事件短、持续话题长 -- minutes_per_round (int): 每轮时长,30-120分钟,建议60分钟 -- agents_per_hour_min (int): 每小时最少激活Agent数(取值范围: 1-{max_agents_allowed}) -- agents_per_hour_max (int): 每小时最多激活Agent数(取值范围: 1-{max_agents_allowed}) -- peak_hours (int数组): 高峰时段,根据事件参与群体调整 -- off_peak_hours (int数组): 低谷时段,通常深夜凌晨 -- morning_hours (int数组): 早间时段 -- work_hours (int数组): 工作时段 -- reasoning (string): 简要说明为什么这样配置""" - - system_prompt = "你是社交媒体模拟专家。返回纯JSON格式,时间配置需符合模拟场景中目标用户群体的作息习惯。" +Field descriptions: +- total_simulation_hours (int): Total simulation duration, 24-168 hours. Shorter for breaking events, longer for ongoing topics +- minutes_per_round (int): Duration per round, 30-120 minutes, recommended 60 +- agents_per_hour_min (int): Minimum agents activated per hour (range: 1-{max_agents_allowed}) +- agents_per_hour_max (int): Maximum agents activated per hour (range: 1-{max_agents_allowed}) +- peak_hours (int array): Peak hours, adjust based on event audience +- off_peak_hours (int array): Low activity hours, typically late night/early morning +- morning_hours (int array): Morning hours +- work_hours (int array): Work hours +- reasoning (string): Configuração""" + + system_prompt = "Você é um especialista em simulação de redes sociais para o mercado brasileiro. O campo reasoning DEVE ser em português do Brasil. Retorne APENAS JSON puro. NUNCA escreva em chinês ou inglês." system_prompt = f"{system_prompt}\n\n{get_language_instruction()}" try: return self._call_llm_with_retry(prompt, system_prompt) except Exception as e: - logger.warning(f"时间配置LLM生成失败: {e}, 使用默认配置") + logger.warning(f"Time config LLM generation failed: {e}, Using default config") return self._get_default_time_config(num_entities) def _get_default_time_config(self, num_entities: int) -> Dict[str, Any]: - """获取默认时间配置(中国人作息)""" + """Configuração""" return { "total_simulation_hours": 72, - "minutes_per_round": 60, # 每轮1小时,加快时间流速 + "minutes_per_round": 60, # 1 "agents_per_hour_min": max(1, num_entities // 15), "agents_per_hour_max": max(5, num_entities // 5), "peak_hours": [19, 20, 21, 22], "off_peak_hours": [0, 1, 2, 3, 4, 5], "morning_hours": [6, 7, 8], "work_hours": [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], - "reasoning": "使用默认中国人作息配置(每轮1小时)" + "reasoning": "Using default schedule config (1 hour per round)" } def _parse_time_config(self, result: Dict[str, Any], num_entities: int) -> TimeSimulationConfig: - """解析时间配置结果,并验证agents_per_hour值不超过总agent数""" - # 获取原始值 + """ConfiguraçãoResultadoagents_per_houragent""" agents_per_hour_min = result.get("agents_per_hour_min", max(1, num_entities // 15)) agents_per_hour_max = result.get("agents_per_hour_max", max(5, num_entities // 5)) - # 验证并修正:确保不超过总agent数 + # agent if agents_per_hour_min > num_entities: - logger.warning(f"agents_per_hour_min ({agents_per_hour_min}) 超过总Agent数 ({num_entities}),已修正") + logger.warning(f"agents_per_hour_min ({agents_per_hour_min}) exceeds total agent count ({num_entities}), corrected") agents_per_hour_min = max(1, num_entities // 10) if agents_per_hour_max > num_entities: - logger.warning(f"agents_per_hour_max ({agents_per_hour_max}) 超过总Agent数 ({num_entities}),已修正") + logger.warning(f"agents_per_hour_max ({agents_per_hour_max}) exceeds total agent count ({num_entities}), corrected") agents_per_hour_max = max(agents_per_hour_min + 1, num_entities // 2) - # 确保 min < max + # min < max if agents_per_hour_min >= agents_per_hour_max: agents_per_hour_min = max(1, agents_per_hour_max // 2) - logger.warning(f"agents_per_hour_min >= max,已修正为 {agents_per_hour_min}") + logger.warning(f"agents_per_hour_min >= max, corrected to {agents_per_hour_min}") return TimeSimulationConfig( total_simulation_hours=result.get("total_simulation_hours", 72), - minutes_per_round=result.get("minutes_per_round", 60), # 默认每轮1小时 + minutes_per_round=result.get("minutes_per_round", 60), # 1 agents_per_hour_min=agents_per_hour_min, agents_per_hour_max=agents_per_hour_max, peak_hours=result.get("peak_hours", [19, 20, 21, 22]), off_peak_hours=result.get("off_peak_hours", [0, 1, 2, 3, 4, 5]), - off_peak_activity_multiplier=0.05, # 凌晨几乎无人 + off_peak_activity_multiplier=0.05, # morning_hours=result.get("morning_hours", [6, 7, 8]), morning_activity_multiplier=0.4, work_hours=result.get("work_hours", list(range(9, 19))), @@ -649,14 +616,14 @@ def _generate_event_config( simulation_requirement: str, entities: List[EntityNode] ) -> Dict[str, Any]: - """生成事件配置""" + """GerarConfiguração""" - # 获取可用的实体类型列表,供 LLM 参考 + # Entidade LLM entity_types_available = list(set( e.get_entity_type() or "Unknown" for e in entities )) - # 为每种类型列出代表性实体名称 + # Entidade type_examples = {} for e in entities: etype = e.get_entity_type() or "Unknown" @@ -670,54 +637,54 @@ def _generate_event_config( for t, examples in type_examples.items() ]) - # 使用配置的上下文截断长度 + # Configuração context_truncated = context[:self.EVENT_CONFIG_CONTEXT_LENGTH] - prompt = f"""基于以下模拟需求,生成事件配置。 + prompt = f"""Based on the following simulation requirements, generate the event configuration. -模拟需求: {simulation_requirement} +Simulation requirement: {simulation_requirement} {context_truncated} -## 可用实体类型及示例 +## Available Entity Types and Examples {type_info} -## 任务 -请生成事件配置JSON: -- 提取热点话题关键词 -- 描述舆论发展方向 -- 设计初始帖子内容,**每个帖子必须指定 poster_type(发布者类型)** +## Task +Generate an event configuration JSON: +- Extract hot topic keywords +- Describe the direction of public opinion development +- Design initial post content, **each post must specify poster_type (publisher type)** -**重要**: poster_type 必须从上面的"可用实体类型"中选择,这样初始帖子才能分配给合适的 Agent 发布。 -例如:官方声明应由 Official/University 类型发布,新闻由 MediaOutlet 发布,学生观点由 Student 发布。 +**Important**: poster_type must be selected from the "Available Entity Types" above, so initial posts can be assigned to the appropriate Agent for publishing. +Example: Official statements should be posted by Official/University types, news by MediaOutlet, consumer opinions by Person, etc. -返回JSON格式(不要markdown): +Return JSON format (no markdown): {{ - "hot_topics": ["关键词1", "关键词2", ...], - "narrative_direction": "<舆论发展方向描述>", + "hot_topics": ["keyword1", "keyword2", ...], + "narrative_direction": "", "initial_posts": [ - {{"content": "帖子内容", "poster_type": "实体类型(必须从可用类型中选择)"}}, + {{"content": "post content", "poster_type": "EntityType (must be from available types)"}}, ... ], - "reasoning": "<简要说明>" + "reasoning": "" }}""" - system_prompt = "你是舆论分析专家。返回纯JSON格式。注意 poster_type 必须精确匹配可用实体类型。" + system_prompt = "Você é um especialista em análise de opinião pública para o mercado brasileiro. TODO conteúdo textual (hot_topics, narrative_direction, content dos posts, reasoning) DEVE ser em português do Brasil. Retorne APENAS JSON puro. O poster_type deve corresponder exatamente aos tipos de entidade disponíveis. NUNCA escreva em chinês ou inglês." system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'poster_type' field value MUST be in English PascalCase exactly matching the available entity types. Only 'content', 'narrative_direction', 'hot_topics' and 'reasoning' fields should use the specified language." try: return self._call_llm_with_retry(prompt, system_prompt) except Exception as e: - logger.warning(f"事件配置LLM生成失败: {e}, 使用默认配置") + logger.warning(f"Event config LLM generation failed: {e}, Using default config") return { "hot_topics": [], "narrative_direction": "", "initial_posts": [], - "reasoning": "使用默认配置" + "reasoning": "Using default config" } def _parse_event_config(self, result: Dict[str, Any]) -> EventConfig: - """解析事件配置结果""" + """ConfiguraçãoResultado""" return EventConfig( initial_posts=result.get("initial_posts", []), scheduled_events=[], @@ -731,14 +698,14 @@ def _assign_initial_post_agents( agent_configs: List[AgentActivityConfig] ) -> EventConfig: """ - 为初始帖子分配合适的发布者 Agent + Agent - 根据每个帖子的 poster_type 匹配最合适的 agent_id + poster_type agent_id """ if not event_config.initial_posts: return event_config - # 按实体类型建立 agent 索引 + # Entidade agent agents_by_type: Dict[str, List[AgentActivityConfig]] = {} for agent in agent_configs: etype = agent.entity_type.lower() @@ -746,7 +713,6 @@ def _assign_initial_post_agents( agents_by_type[etype] = [] agents_by_type[etype].append(agent) - # 类型映射表(处理 LLM 可能输出的不同格式) type_aliases = { "official": ["official", "university", "governmentagency", "government"], "university": ["university", "official"], @@ -758,7 +724,7 @@ def _assign_initial_post_agents( "person": ["person", "student", "alumni"], } - # 记录每种类型已使用的 agent 索引,避免重复使用同一个 agent + # agent agent used_indices: Dict[str, int] = {} updated_posts = [] @@ -766,17 +732,15 @@ def _assign_initial_post_agents( poster_type = post.get("poster_type", "").lower() content = post.get("content", "") - # 尝试找到匹配的 agent + # agent matched_agent_id = None - # 1. 直接匹配 if poster_type in agents_by_type: agents = agents_by_type[poster_type] idx = used_indices.get(poster_type, 0) % len(agents) matched_agent_id = agents[idx].agent_id used_indices[poster_type] = idx + 1 else: - # 2. 使用别名匹配 for alias_key, aliases in type_aliases.items(): if poster_type in aliases or alias_key == poster_type: for alias in aliases: @@ -789,11 +753,10 @@ def _assign_initial_post_agents( if matched_agent_id is not None: break - # 3. 如果仍未找到,使用影响力最高的 agent + # 3. agent if matched_agent_id is None: - logger.warning(f"未找到类型 '{poster_type}' 的匹配 Agent,使用影响力最高的 Agent") + logger.warning(f"No matching agent for type '{poster_type}', using highest influence agent") if agent_configs: - # 按影响力排序,选择影响力最高的 sorted_agents = sorted(agent_configs, key=lambda a: a.influence_weight, reverse=True) matched_agent_id = sorted_agents[0].agent_id else: @@ -805,7 +768,7 @@ def _assign_initial_post_agents( "poster_agent_id": matched_agent_id }) - logger.info(f"初始帖子分配: poster_type='{poster_type}' -> agent_id={matched_agent_id}") + logger.info(f"Initial post assignment: poster_type='{poster_type}' -> agent_id={matched_agent_id}") event_config.initial_posts = updated_posts return event_config @@ -817,9 +780,9 @@ def _generate_agent_configs_batch( start_idx: int, simulation_requirement: str ) -> List[AgentActivityConfig]: - """分批生成Agent配置""" + """GerarAgentConfiguração""" - # 构建实体信息(使用配置的摘要长度) + # EntidadeConfiguração entity_list = [] summary_len = self.AGENT_SUMMARY_LENGTH for i, e in enumerate(entities): @@ -830,59 +793,59 @@ def _generate_agent_configs_batch( "summary": e.summary[:summary_len] if e.summary else "" }) - prompt = f"""基于以下信息,为每个实体生成社交媒体活动配置。 + prompt = f"""Based on the following information, generate social media activity configuration for each entity. -模拟需求: {simulation_requirement} +Simulation requirement: {simulation_requirement} -## 实体列表 +## Entity List ```json {json.dumps(entity_list, ensure_ascii=False, indent=2)} ``` -## 任务 -为每个实体生成活动配置,注意: -- **时间符合目标用户群体作息**:以下为参考(东八区),请根据模拟场景调整 -- **官方机构**(University/GovernmentAgency):活跃度低(0.1-0.3),工作时间(9-17)活动,响应慢(60-240分钟),影响力高(2.5-3.0) -- **媒体**(MediaOutlet):活跃度中(0.4-0.6),全天活动(8-23),响应快(5-30分钟),影响力高(2.0-2.5) -- **个人**(Student/Person/Alumni):活跃度高(0.6-0.9),主要晚间活动(18-23),响应快(1-15分钟),影响力低(0.8-1.2) -- **公众人物/专家**:活跃度中(0.4-0.6),影响力中高(1.5-2.0) +## Task +Generate activity configuration for each entity. Notes: +- **Timing should match target audience routines**: Below are references, adjust based on simulation scenario +- **Official institutions** (University/GovernmentAgency): Low activity (0.1-0.3), active during work hours (9-17), slow response (60-240 min), high influence (2.5-3.0) +- **Media** (MediaOutlet): Medium activity (0.4-0.6), active all day (8-23), fast response (5-30 min), high influence (2.0-2.5) +- **Individuals** (Student/Person/Alumni): High activity (0.6-0.9), mainly active in evening (18-23), fast response (1-15 min), low influence (0.8-1.2) +- **Public figures/Experts**: Medium activity (0.4-0.6), medium-high influence (1.5-2.0) -返回JSON格式(不要markdown): +Return JSON format (no markdown): {{ "agent_configs": [ {{ - "agent_id": <必须与输入一致>, + "agent_id": , "activity_level": <0.0-1.0>, - "posts_per_hour": <发帖频率>, - "comments_per_hour": <评论频率>, - "active_hours": [<活跃小时列表,考虑中国人作息>], - "response_delay_min": <最小响应延迟分钟>, - "response_delay_max": <最大响应延迟分钟>, - "sentiment_bias": <-1.0到1.0>, + "posts_per_hour": , + "comments_per_hour": , + "active_hours": [], + "response_delay_min": , + "response_delay_max": , + "sentiment_bias": <-1.0 to 1.0>, "stance": "", - "influence_weight": <影响力权重> + "influence_weight": }}, ... ] }}""" - system_prompt = "你是社交媒体行为分析专家。返回纯JSON,配置需符合模拟场景中目标用户群体的作息习惯。" + system_prompt = "Você é um especialista em análise de comportamento em redes sociais para o mercado brasileiro. TODOS os campos de linguagem natural DEVEM ser em português do Brasil (PT-BR). Retorne APENAS JSON puro. NUNCA escreva em chinês ou inglês." system_prompt = f"{system_prompt}\n\n{get_language_instruction()}\nIMPORTANT: The 'stance' field value MUST be one of the English strings: 'supportive', 'opposing', 'neutral', 'observer'. All JSON field names and numeric values must remain unchanged. Only natural language text fields should use the specified language." try: result = self._call_llm_with_retry(prompt, system_prompt) llm_configs = {cfg["agent_id"]: cfg for cfg in result.get("agent_configs", [])} except Exception as e: - logger.warning(f"Agent配置批次LLM生成失败: {e}, 使用规则生成") + logger.warning(f"Agent config batch LLM generation failed: {e}, using rule-based generation") llm_configs = {} - # 构建AgentActivityConfig对象 + # AgentActivityConfig configs = [] for i, entity in enumerate(entities): agent_id = start_idx + i cfg = llm_configs.get(agent_id, {}) - # 如果LLM没有生成,使用规则生成 + # LLMGerarusing rule-based generation if not cfg: cfg = self._generate_agent_config_by_rule(entity) @@ -906,11 +869,10 @@ def _generate_agent_configs_batch( return configs def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: - """基于规则生成单个Agent配置(中国人作息)""" + """Gerar agent configs""" entity_type = (entity.get_entity_type() or "Unknown").lower() if entity_type in ["university", "governmentagency", "ngo"]: - # 官方机构:工作时间活动,低频率,高影响力 return { "activity_level": 0.2, "posts_per_hour": 0.1, @@ -923,7 +885,6 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "influence_weight": 3.0 } elif entity_type in ["mediaoutlet"]: - # 媒体:全天活动,中等频率,高影响力 return { "activity_level": 0.5, "posts_per_hour": 0.8, @@ -936,7 +897,6 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "influence_weight": 2.5 } elif entity_type in ["professor", "expert", "official"]: - # 专家/教授:工作+晚间活动,中等频率 return { "activity_level": 0.4, "posts_per_hour": 0.3, @@ -949,12 +909,11 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "influence_weight": 2.0 } elif entity_type in ["student"]: - # 学生:晚间为主,高频率 return { "activity_level": 0.8, "posts_per_hour": 0.6, "comments_per_hour": 1.5, - "active_hours": [8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # 上午+晚间 + "active_hours": [8, 9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # + "response_delay_min": 1, "response_delay_max": 15, "sentiment_bias": 0.0, @@ -962,12 +921,11 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "influence_weight": 0.8 } elif entity_type in ["alumni"]: - # 校友:晚间为主 return { "activity_level": 0.6, "posts_per_hour": 0.4, "comments_per_hour": 0.8, - "active_hours": [12, 13, 19, 20, 21, 22, 23], # 午休+晚间 + "active_hours": [12, 13, 19, 20, 21, 22, 23], # + "response_delay_min": 5, "response_delay_max": 30, "sentiment_bias": 0.0, @@ -975,17 +933,14 @@ def _generate_agent_config_by_rule(self, entity: EntityNode) -> Dict[str, Any]: "influence_weight": 1.0 } else: - # 普通人:晚间高峰 return { "activity_level": 0.7, "posts_per_hour": 0.5, "comments_per_hour": 1.2, - "active_hours": [9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # 白天+晚间 + "active_hours": [9, 10, 11, 12, 13, 18, 19, 20, 21, 22, 23], # + "response_delay_min": 2, "response_delay_max": 20, "sentiment_bias": 0.0, "stance": "neutral", "influence_weight": 1.0 } - - diff --git a/backend/app/services/simulation_ipc.py b/backend/app/services/simulation_ipc.py index 9d70d0bea6..3050ee3e23 100644 --- a/backend/app/services/simulation_ipc.py +++ b/backend/app/services/simulation_ipc.py @@ -1,11 +1,11 @@ """ -模拟IPC通信模块 -用于Flask后端和模拟脚本之间的进程间通信 +SimulaçãoIPC +FlaskSimulação -通过文件系统实现简单的命令/响应模式: -1. Flask写入命令到 commands/ 目录 -2. 模拟脚本轮询命令目录,执行命令并写入响应到 responses/ 目录 -3. Flask轮询响应目录获取结果 +/ +1. Flask commands/ +2. Simulação responses/ +3. FlaskResultado """ import os @@ -23,14 +23,14 @@ class CommandType(str, Enum): - """命令类型""" - INTERVIEW = "interview" # 单个Agent采访 - BATCH_INTERVIEW = "batch_interview" # 批量采访 - CLOSE_ENV = "close_env" # 关闭环境 + """""" + INTERVIEW = "interview" # Agent + BATCH_INTERVIEW = "batch_interview" # + CLOSE_ENV = "close_env" # class CommandStatus(str, Enum): - """命令状态""" + """""" PENDING = "pending" PROCESSING = "processing" COMPLETED = "completed" @@ -39,7 +39,7 @@ class CommandStatus(str, Enum): @dataclass class IPCCommand: - """IPC命令""" + """IPC""" command_id: str command_type: CommandType args: Dict[str, Any] @@ -65,7 +65,7 @@ def from_dict(cls, data: Dict[str, Any]) -> 'IPCCommand': @dataclass class IPCResponse: - """IPC响应""" + """IPC""" command_id: str status: CommandStatus result: Optional[Dict[str, Any]] = None @@ -94,23 +94,22 @@ def from_dict(cls, data: Dict[str, Any]) -> 'IPCResponse': class SimulationIPCClient: """ - 模拟IPC客户端(Flask端使用) + SimulaçãoIPCFlask - 用于向模拟进程发送命令并等待响应 + Simulação """ def __init__(self, simulation_dir: str): """ - 初始化IPC客户端 + IPC Args: - simulation_dir: 模拟数据目录 + simulation_dir: Simulação """ self.simulation_dir = simulation_dir self.commands_dir = os.path.join(simulation_dir, "ipc_commands") self.responses_dir = os.path.join(simulation_dir, "ipc_responses") - # 确保目录存在 os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) @@ -122,19 +121,18 @@ def send_command( poll_interval: float = 0.5 ) -> IPCResponse: """ - 发送命令并等待响应 Args: - command_type: 命令类型 - args: 命令参数 - timeout: 超时时间(秒) - poll_interval: 轮询间隔(秒) + command_type: + args: + timeout: + poll_interval: Returns: IPCResponse Raises: - TimeoutError: 等待响应超时 + TimeoutError: """ command_id = str(uuid.uuid4()) command = IPCCommand( @@ -143,14 +141,12 @@ def send_command( args=args ) - # 写入命令文件 command_file = os.path.join(self.commands_dir, f"{command_id}.json") with open(command_file, 'w', encoding='utf-8') as f: json.dump(command.to_dict(), f, ensure_ascii=False, indent=2) - logger.info(f"发送IPC命令: {command_type.value}, command_id={command_id}") + logger.info(f"IPC: {command_type.value}, command_id={command_id}") - # 等待响应 response_file = os.path.join(self.responses_dir, f"{command_id}.json") start_time = time.time() @@ -161,30 +157,27 @@ def send_command( response_data = json.load(f) response = IPCResponse.from_dict(response_data) - # 清理命令和响应文件 try: os.remove(command_file) os.remove(response_file) except OSError: pass - logger.info(f"收到IPC响应: command_id={command_id}, status={response.status.value}") + logger.info(f"IPC: command_id={command_id}, status={response.status.value}") return response except (json.JSONDecodeError, KeyError) as e: - logger.warning(f"解析响应失败: {e}") + logger.warning(f"Falhou: {e}") time.sleep(poll_interval) - # 超时 - logger.error(f"等待IPC响应超时: command_id={command_id}") + logger.error(f"IPC: command_id={command_id}") - # 清理命令文件 try: os.remove(command_file) except OSError: pass - raise TimeoutError(f"等待命令响应超时 ({timeout}秒)") + raise TimeoutError(f" ({timeout})") def send_interview( self, @@ -194,19 +187,19 @@ def send_interview( timeout: float = 60.0 ) -> IPCResponse: """ - 发送单个Agent采访命令 + Agent Args: agent_id: Agent ID - prompt: 采访问题 - platform: 指定平台(可选) - - "twitter": 只采访Twitter平台 - - "reddit": 只采访Reddit平台 - - None: 双平台模拟时同时采访两个平台,单平台模拟时采访该平台 - timeout: 超时时间 + prompt: + platform: + - "twitter": Twitter + - "reddit": Reddit + - None: SimulaçãoSimulação + timeout: Returns: - IPCResponse,result字段包含采访结果 + IPCResponseresultResultado """ args = { "agent_id": agent_id, @@ -228,18 +221,17 @@ def send_batch_interview( timeout: float = 120.0 ) -> IPCResponse: """ - 发送批量采访命令 Args: - interviews: 采访列表,每个元素包含 {"agent_id": int, "prompt": str, "platform": str(可选)} - platform: 默认平台(可选,会被每个采访项的platform覆盖) - - "twitter": 默认只采访Twitter平台 - - "reddit": 默认只采访Reddit平台 - - None: 双平台模拟时每个Agent同时采访两个平台 - timeout: 超时时间 + interviews: {"agent_id": int, "prompt": str, "platform": str()} + platform: platform + - "twitter": Twitter + - "reddit": Reddit + - None: SimulaçãoAgent + timeout: Returns: - IPCResponse,result字段包含所有采访结果 + IPCResponseresultResultado """ args = {"interviews": interviews} if platform: @@ -253,10 +245,9 @@ def send_batch_interview( def send_close_env(self, timeout: float = 30.0) -> IPCResponse: """ - 发送关闭环境命令 Args: - timeout: 超时时间 + timeout: Returns: IPCResponse @@ -269,9 +260,9 @@ def send_close_env(self, timeout: float = 30.0) -> IPCResponse: def check_env_alive(self) -> bool: """ - 检查模拟环境是否存活 + Simulação - 通过检查 env_status.json 文件来判断 + env_status.json """ status_file = os.path.join(self.simulation_dir, "env_status.json") if not os.path.exists(status_file): @@ -287,41 +278,38 @@ def check_env_alive(self) -> bool: class SimulationIPCServer: """ - 模拟IPC服务器(模拟脚本端使用) + SimulaçãoIPCSimulação - 轮询命令目录,执行命令并返回响应 """ def __init__(self, simulation_dir: str): """ - 初始化IPC服务器 + IPC Args: - simulation_dir: 模拟数据目录 + simulation_dir: Simulação """ self.simulation_dir = simulation_dir self.commands_dir = os.path.join(simulation_dir, "ipc_commands") self.responses_dir = os.path.join(simulation_dir, "ipc_responses") - # 确保目录存在 os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) - # 环境状态 self._running = False def start(self): - """标记服务器为运行状态""" + """""" self._running = True self._update_env_status("alive") def stop(self): - """标记服务器为停止状态""" + """""" self._running = False self._update_env_status("stopped") def _update_env_status(self, status: str): - """更新环境状态文件""" + """""" status_file = os.path.join(self.simulation_dir, "env_status.json") with open(status_file, 'w', encoding='utf-8') as f: json.dump({ @@ -331,15 +319,13 @@ def _update_env_status(self, status: str): def poll_commands(self) -> Optional[IPCCommand]: """ - 轮询命令目录,返回第一个待处理的命令 Returns: - IPCCommand 或 None + IPCCommand None """ if not os.path.exists(self.commands_dir): return None - # 按时间排序获取命令文件 command_files = [] for filename in os.listdir(self.commands_dir): if filename.endswith('.json'): @@ -354,23 +340,21 @@ def poll_commands(self) -> Optional[IPCCommand]: data = json.load(f) return IPCCommand.from_dict(data) except (json.JSONDecodeError, KeyError, OSError) as e: - logger.warning(f"读取命令文件失败: {filepath}, {e}") + logger.warning(f"Falhou: {filepath}, {e}") continue return None def send_response(self, response: IPCResponse): """ - 发送响应 Args: - response: IPC响应 + response: IPC """ response_file = os.path.join(self.responses_dir, f"{response.command_id}.json") with open(response_file, 'w', encoding='utf-8') as f: json.dump(response.to_dict(), f, ensure_ascii=False, indent=2) - # 删除命令文件 command_file = os.path.join(self.commands_dir, f"{response.command_id}.json") try: os.remove(command_file) @@ -378,7 +362,7 @@ def send_response(self, response: IPCResponse): pass def send_success(self, command_id: str, result: Dict[str, Any]): - """发送成功响应""" + """""" self.send_response(IPCResponse( command_id=command_id, status=CommandStatus.COMPLETED, @@ -386,7 +370,7 @@ def send_success(self, command_id: str, result: Dict[str, Any]): )) def send_error(self, command_id: str, error: str): - """发送错误响应""" + """""" self.send_response(IPCResponse( command_id=command_id, status=CommandStatus.FAILED, diff --git a/backend/app/services/simulation_manager.py b/backend/app/services/simulation_manager.py index 0d161a9095..a1addd0394 100644 --- a/backend/app/services/simulation_manager.py +++ b/backend/app/services/simulation_manager.py @@ -1,7 +1,7 @@ """ -OASIS模拟管理器 -管理Twitter和Reddit双平台并行模拟 -使用预设脚本 + LLM智能生成配置参数 +OASISSimulação +TwitterRedditSimulação + + LLMGerarConfiguração """ import os @@ -23,60 +23,54 @@ class SimulationStatus(str, Enum): - """模拟状态""" + """Simulação""" CREATED = "created" PREPARING = "preparing" READY = "ready" RUNNING = "running" PAUSED = "paused" - STOPPED = "stopped" # 模拟被手动停止 - COMPLETED = "completed" # 模拟自然完成 + STOPPED = "stopped" # Simulação + COMPLETED = "completed" # Simulação FAILED = "failed" class PlatformType(str, Enum): - """平台类型""" + """""" TWITTER = "twitter" REDDIT = "reddit" @dataclass class SimulationState: - """模拟状态""" + """Simulação""" simulation_id: str project_id: str graph_id: str - # 平台启用状态 enable_twitter: bool = True enable_reddit: bool = True - # 状态 status: SimulationStatus = SimulationStatus.CREATED - # 准备阶段数据 entities_count: int = 0 profiles_count: int = 0 entity_types: List[str] = field(default_factory=list) - # 配置生成信息 + # ConfiguraçãoGerar config_generated: bool = False config_reasoning: str = "" - # 运行时数据 current_round: int = 0 twitter_status: str = "not_started" reddit_status: str = "not_started" - # 时间戳 created_at: str = field(default_factory=lambda: datetime.now().isoformat()) updated_at: str = field(default_factory=lambda: datetime.now().isoformat()) - # 错误信息 error: Optional[str] = None def to_dict(self) -> Dict[str, Any]: - """完整状态字典(内部使用)""" + """""" return { "simulation_id": self.simulation_id, "project_id": self.project_id, @@ -98,7 +92,7 @@ def to_dict(self) -> Dict[str, Any]: } def to_simple_dict(self) -> Dict[str, Any]: - """简化状态字典(API返回使用)""" + """API""" return { "simulation_id": self.simulation_id, "project_id": self.project_id, @@ -114,36 +108,34 @@ def to_simple_dict(self) -> Dict[str, Any]: class SimulationManager: """ - 模拟管理器 + Simulação - 核心功能: - 1. 从Zep图谱读取实体并过滤 - 2. 生成OASIS Agent Profile - 3. 使用LLM智能生成模拟配置参数 - 4. 准备预设脚本所需的所有文件 + 1. ZepGrafoEntidade + 2. GerarOASIS Agent Profile + 3. LLMGerarConfiguração da simulação + 4. """ - # 模拟数据存储目录 + # Simulação SIMULATION_DATA_DIR = os.path.join( os.path.dirname(__file__), '../../uploads/simulations' ) def __init__(self): - # 确保目录存在 os.makedirs(self.SIMULATION_DATA_DIR, exist_ok=True) - # 内存中的模拟状态缓存 + # Simulação self._simulations: Dict[str, SimulationState] = {} def _get_simulation_dir(self, simulation_id: str) -> str: - """获取模拟数据目录""" + """Simulação""" sim_dir = os.path.join(self.SIMULATION_DATA_DIR, simulation_id) os.makedirs(sim_dir, exist_ok=True) return sim_dir def _save_simulation_state(self, state: SimulationState): - """保存模拟状态到文件""" + """Simulação""" sim_dir = self._get_simulation_dir(state.simulation_id) state_file = os.path.join(sim_dir, "state.json") @@ -155,7 +147,7 @@ def _save_simulation_state(self, state: SimulationState): self._simulations[state.simulation_id] = state def _load_simulation_state(self, simulation_id: str) -> Optional[SimulationState]: - """从文件加载模拟状态""" + """Simulação""" if simulation_id in self._simulations: return self._simulations[simulation_id] @@ -199,13 +191,13 @@ def create_simulation( enable_reddit: bool = True, ) -> SimulationState: """ - 创建新的模拟 + Simulação Args: - project_id: 项目ID - graph_id: Zep图谱ID - enable_twitter: 是否启用Twitter模拟 - enable_reddit: 是否启用Reddit模拟 + project_id: ID + graph_id: ZepID do grafo + enable_twitter: TwitterSimulação + enable_reddit: RedditSimulação Returns: SimulationState @@ -223,7 +215,7 @@ def create_simulation( ) self._save_simulation_state(state) - logger.info(f"创建模拟: {simulation_id}, project={project_id}, graph={graph_id}") + logger.info(f"Simulação: {simulation_id}, project={project_id}, graph={graph_id}") return state @@ -238,30 +230,29 @@ def prepare_simulation( parallel_profile_count: int = 3 ) -> SimulationState: """ - 准备模拟环境(全程自动化) + Simulação - 步骤: - 1. 从Zep图谱读取并过滤实体 - 2. 为每个实体生成OASIS Agent Profile(可选LLM增强,支持并行) - 3. 使用LLM智能生成模拟配置参数(时间、活跃度、发言频率等) - 4. 保存配置文件和Profile文件 - 5. 复制预设脚本到模拟目录 + 1. ZepGrafoEntidade + 2. EntidadeGerarOASIS Agent ProfileLLM + 3. LLMGerarConfiguração da simulação + 4. ConfiguraçãoProfile + 5. Simulação Args: - simulation_id: 模拟ID - simulation_requirement: 模拟需求描述(用于LLM生成配置) - document_text: 原始文档内容(用于LLM理解背景) - defined_entity_types: 预定义的实体类型(可选) - use_llm_for_profiles: 是否使用LLM生成详细人设 - progress_callback: 进度回调函数 (stage, progress, message) - parallel_profile_count: 并行生成人设的数量,默认3 + simulation_id: ID da simulação + simulation_requirement: Descrição dos requisitos da simulaçãoLLMGerarConfiguração + document_text: ConteúdoLLM + defined_entity_types: Entidade + use_llm_for_profiles: LLMGerar + progress_callback: Função callback de progresso (stage, progress, message) + parallel_profile_count: Gerar3 Returns: SimulationState """ state = self._load_simulation_state(simulation_id) if not state: - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") try: state.status = SimulationStatus.PREPARING @@ -269,7 +260,7 @@ def prepare_simulation( sim_dir = self._get_simulation_dir(simulation_id) - # ========== 阶段1: 读取并过滤实体 ========== + # ========== 1: Entidade ========== if progress_callback: progress_callback("reading", 0, t('progress.connectingZepGraph')) @@ -297,11 +288,11 @@ def prepare_simulation( if filtered.filtered_count == 0: state.status = SimulationStatus.FAILED - state.error = "没有找到符合条件的实体,请检查图谱是否正确构建" + state.error = "EntidadeGrafo" self._save_simulation_state(state) return state - # ========== 阶段2: 生成Agent Profile ========== + # ========== 2: GerarAgent Profile ========== total_entities = len(filtered.entities) if progress_callback: @@ -312,7 +303,7 @@ def prepare_simulation( total=total_entities ) - # 传入graph_id以启用Zep检索功能,获取更丰富的上下文 + # graph_idZepBusca generator = OasisProfileGenerator(graph_id=state.graph_id) def profile_progress(current, total, msg): @@ -326,7 +317,7 @@ def profile_progress(current, total, msg): item_name=msg ) - # 设置实时保存的文件路径(优先使用 Reddit JSON 格式) + # Reddit JSON realtime_output_path = None realtime_platform = "reddit" if state.enable_reddit: @@ -340,16 +331,16 @@ def profile_progress(current, total, msg): entities=filtered.entities, use_llm=use_llm_for_profiles, progress_callback=profile_progress, - graph_id=state.graph_id, # 传入graph_id用于Zep检索 - parallel_count=parallel_profile_count, # 并行生成数量 - realtime_output_path=realtime_output_path, # 实时保存路径 - output_platform=realtime_platform # 输出格式 + graph_id=state.graph_id, # graph_idZepBusca + parallel_count=parallel_profile_count, # Gerar + realtime_output_path=realtime_output_path, # + output_platform=realtime_platform # ) state.profiles_count = len(profiles) - # 保存Profile文件(注意:Twitter使用CSV格式,Reddit使用JSON格式) - # Reddit 已经在生成过程中实时保存了,这里再保存一次确保完整性 + # ProfileTwitterCSVRedditJSON + # Reddit Gerar if progress_callback: progress_callback( "generating_profiles", 95, @@ -366,7 +357,7 @@ def profile_progress(current, total, msg): ) if state.enable_twitter: - # Twitter使用CSV格式!这是OASIS的要求 + # TwitterCSVOASIS generator.save_profiles( profiles=profiles, file_path=os.path.join(sim_dir, "twitter_profiles.csv"), @@ -381,7 +372,7 @@ def profile_progress(current, total, msg): total=len(profiles) ) - # ========== 阶段3: LLM智能生成模拟配置 ========== + # ========== 3: LLMGerarConfiguração da simulação ========== if progress_callback: progress_callback( "generating_config", 0, @@ -419,7 +410,7 @@ def profile_progress(current, total, msg): total=3 ) - # 保存配置文件 + # Configuração config_path = os.path.join(sim_dir, "simulation_config.json") with open(config_path, 'w', encoding='utf-8') as f: f.write(sim_params.to_json()) @@ -435,20 +426,19 @@ def profile_progress(current, total, msg): total=3 ) - # 注意:运行脚本保留在 backend/scripts/ 目录,不再复制到模拟目录 - # 启动模拟时,simulation_runner 会从 scripts/ 目录运行脚本 + # backend/scripts/ Simulação + # Simulaçãosimulation_runner scripts/ - # 更新状态 state.status = SimulationStatus.READY self._save_simulation_state(state) - logger.info(f"模拟准备完成: {simulation_id}, " + logger.info(f"Simulação: {simulation_id}, " f"entities={state.entities_count}, profiles={state.profiles_count}") return state except Exception as e: - logger.error(f"模拟准备失败: {simulation_id}, error={str(e)}") + logger.error(f"SimulaçãoFalhou: {simulation_id}, error={str(e)}") import traceback logger.error(traceback.format_exc()) state.status = SimulationStatus.FAILED @@ -457,16 +447,16 @@ def profile_progress(current, total, msg): raise def get_simulation(self, simulation_id: str) -> Optional[SimulationState]: - """获取模拟状态""" + """Simulação""" return self._load_simulation_state(simulation_id) def list_simulations(self, project_id: Optional[str] = None) -> List[SimulationState]: - """列出所有模拟""" + """Simulação""" simulations = [] if os.path.exists(self.SIMULATION_DATA_DIR): for sim_id in os.listdir(self.SIMULATION_DATA_DIR): - # 跳过隐藏文件(如 .DS_Store)和非目录文件 + # .DS_Store sim_path = os.path.join(self.SIMULATION_DATA_DIR, sim_id) if sim_id.startswith('.') or not os.path.isdir(sim_path): continue @@ -479,10 +469,10 @@ def list_simulations(self, project_id: Optional[str] = None) -> List[SimulationS return simulations def get_profiles(self, simulation_id: str, platform: str = "reddit") -> List[Dict[str, Any]]: - """获取模拟的Agent Profile""" + """SimulaçãoAgent Profile""" state = self._load_simulation_state(simulation_id) if not state: - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") sim_dir = self._get_simulation_dir(simulation_id) profile_path = os.path.join(sim_dir, f"{platform}_profiles.json") @@ -494,7 +484,7 @@ def get_profiles(self, simulation_id: str, platform: str = "reddit") -> List[Dic return json.load(f) def get_simulation_config(self, simulation_id: str) -> Optional[Dict[str, Any]]: - """获取模拟配置""" + """Configuração da simulação""" sim_dir = self._get_simulation_dir(simulation_id) config_path = os.path.join(sim_dir, "simulation_config.json") @@ -505,7 +495,7 @@ def get_simulation_config(self, simulation_id: str) -> Optional[Dict[str, Any]]: return json.load(f) def get_run_instructions(self, simulation_id: str) -> Dict[str, str]: - """获取运行说明""" + """""" sim_dir = self._get_simulation_dir(simulation_id) config_path = os.path.join(sim_dir, "simulation_config.json") scripts_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../scripts')) @@ -520,10 +510,10 @@ def get_run_instructions(self, simulation_id: str) -> Dict[str, str]: "parallel": f"python {scripts_dir}/run_parallel_simulation.py --config {config_path}", }, "instructions": ( - f"1. 激活conda环境: conda activate MiroFish\n" - f"2. 运行模拟 (脚本位于 {scripts_dir}):\n" - f" - 单独运行Twitter: python {scripts_dir}/run_twitter_simulation.py --config {config_path}\n" - f" - 单独运行Reddit: python {scripts_dir}/run_reddit_simulation.py --config {config_path}\n" - f" - 并行运行双平台: python {scripts_dir}/run_parallel_simulation.py --config {config_path}" + f"1. conda: conda activate MiroFish\n" + f"2. Simulação ( {scripts_dir}):\n" + f" - Twitter: python {scripts_dir}/run_twitter_simulation.py --config {config_path}\n" + f" - Reddit: python {scripts_dir}/run_reddit_simulation.py --config {config_path}\n" + f" - : python {scripts_dir}/run_parallel_simulation.py --config {config_path}" ) } diff --git a/backend/app/services/simulation_runner.py b/backend/app/services/simulation_runner.py index e86021f808..3becb26920 100644 --- a/backend/app/services/simulation_runner.py +++ b/backend/app/services/simulation_runner.py @@ -1,6 +1,6 @@ """ -OASIS模拟运行器 -在后台运行模拟并记录每个Agent的动作,支持实时状态监控 +OASISSimulação +SimulaçãoAgent """ import os @@ -26,15 +26,13 @@ logger = get_logger('mirofish.simulation_runner') -# 标记是否已注册清理函数 _cleanup_registered = False -# 平台检测 IS_WINDOWS = sys.platform == 'win32' class RunnerStatus(str, Enum): - """运行器状态""" + """""" IDLE = "idle" STARTING = "starting" RUNNING = "running" @@ -47,7 +45,7 @@ class RunnerStatus(str, Enum): @dataclass class AgentAction: - """Agent动作记录""" + """Agent""" round_num: int timestamp: str platform: str # twitter / reddit @@ -74,7 +72,7 @@ def to_dict(self) -> Dict[str, Any]: @dataclass class RoundSummary: - """每轮摘要""" + """""" round_num: int start_time: str end_time: Optional[str] = None @@ -100,52 +98,45 @@ def to_dict(self) -> Dict[str, Any]: @dataclass class SimulationRunState: - """模拟运行状态(实时)""" + """Simulação""" simulation_id: str runner_status: RunnerStatus = RunnerStatus.IDLE - # 进度信息 current_round: int = 0 total_rounds: int = 0 simulated_hours: int = 0 total_simulation_hours: int = 0 - # 各平台独立轮次和模拟时间(用于双平台并行显示) + # Simulação twitter_current_round: int = 0 reddit_current_round: int = 0 twitter_simulated_hours: int = 0 reddit_simulated_hours: int = 0 - # 平台状态 twitter_running: bool = False reddit_running: bool = False twitter_actions_count: int = 0 reddit_actions_count: int = 0 - # 平台完成状态(通过检测 actions.jsonl 中的 simulation_end 事件) + # actions.jsonl simulation_end twitter_completed: bool = False reddit_completed: bool = False - # 每轮摘要 rounds: List[RoundSummary] = field(default_factory=list) - # 最近动作(用于前端实时展示) recent_actions: List[AgentAction] = field(default_factory=list) max_recent_actions: int = 50 - # 时间戳 started_at: Optional[str] = None updated_at: str = field(default_factory=lambda: datetime.now().isoformat()) completed_at: Optional[str] = None - # 错误信息 error: Optional[str] = None - # 进程ID(用于停止) process_pid: Optional[int] = None def add_action(self, action: AgentAction): - """添加动作到最近动作列表""" + """""" self.recent_actions.insert(0, action) if len(self.recent_actions) > self.max_recent_actions: self.recent_actions = self.recent_actions[:self.max_recent_actions] @@ -166,7 +157,6 @@ def to_dict(self) -> Dict[str, Any]: "simulated_hours": self.simulated_hours, "total_simulation_hours": self.total_simulation_hours, "progress_percent": round(self.current_round / max(self.total_rounds, 1) * 100, 1), - # 各平台独立轮次和时间 "twitter_current_round": self.twitter_current_round, "reddit_current_round": self.reddit_current_round, "twitter_simulated_hours": self.twitter_simulated_hours, @@ -186,7 +176,7 @@ def to_dict(self) -> Dict[str, Any]: } def to_detail_dict(self) -> Dict[str, Any]: - """包含最近动作的详细信息""" + """""" result = self.to_dict() result["recent_actions"] = [a.to_dict() for a in self.recent_actions] result["rounds_count"] = len(self.rounds) @@ -195,45 +185,40 @@ def to_detail_dict(self) -> Dict[str, Any]: class SimulationRunner: """ - 模拟运行器 + Simulação - 负责: - 1. 在后台进程中运行OASIS模拟 - 2. 解析运行日志,记录每个Agent的动作 - 3. 提供实时状态查询接口 - 4. 支持暂停/停止/恢复操作 + 1. OASISSimulação + 2. Agent + 3. + 4. // """ - # 运行状态存储目录 RUN_STATE_DIR = os.path.join( os.path.dirname(__file__), '../../uploads/simulations' ) - # 脚本目录 SCRIPTS_DIR = os.path.join( os.path.dirname(__file__), '../../scripts' ) - # 内存中的运行状态 _run_states: Dict[str, SimulationRunState] = {} _processes: Dict[str, subprocess.Popen] = {} _action_queues: Dict[str, Queue] = {} _monitor_threads: Dict[str, threading.Thread] = {} - _stdout_files: Dict[str, Any] = {} # 存储 stdout 文件句柄 - _stderr_files: Dict[str, Any] = {} # 存储 stderr 文件句柄 + _stdout_files: Dict[str, Any] = {} # stdout + _stderr_files: Dict[str, Any] = {} # stderr - # 图谱记忆更新配置 + # GrafoConfiguração _graph_memory_enabled: Dict[str, bool] = {} # simulation_id -> enabled @classmethod def get_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: - """获取运行状态""" + """""" if simulation_id in cls._run_states: return cls._run_states[simulation_id] - # 尝试从文件加载 state = cls._load_run_state(simulation_id) if state: cls._run_states[simulation_id] = state @@ -241,7 +226,7 @@ def get_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: @classmethod def _load_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: - """从文件加载运行状态""" + """""" state_file = os.path.join(cls.RUN_STATE_DIR, simulation_id, "run_state.json") if not os.path.exists(state_file): return None @@ -257,7 +242,6 @@ def _load_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: total_rounds=data.get("total_rounds", 0), simulated_hours=data.get("simulated_hours", 0), total_simulation_hours=data.get("total_simulation_hours", 0), - # 各平台独立轮次和时间 twitter_current_round=data.get("twitter_current_round", 0), reddit_current_round=data.get("reddit_current_round", 0), twitter_simulated_hours=data.get("twitter_simulated_hours", 0), @@ -275,7 +259,6 @@ def _load_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: process_pid=data.get("process_pid"), ) - # 加载最近动作 actions_data = data.get("recent_actions", []) for a in actions_data: state.recent_actions.append(AgentAction( @@ -292,12 +275,12 @@ def _load_run_state(cls, simulation_id: str) -> Optional[SimulationRunState]: return state except Exception as e: - logger.error(f"加载运行状态失败: {str(e)}") + logger.error(f"Falhou: {str(e)}") return None @classmethod def _save_run_state(cls, state: SimulationRunState): - """保存运行状态到文件""" + """""" sim_dir = os.path.join(cls.RUN_STATE_DIR, state.simulation_id) os.makedirs(sim_dir, exist_ok=True) state_file = os.path.join(sim_dir, "run_state.json") @@ -314,50 +297,47 @@ def start_simulation( cls, simulation_id: str, platform: str = "parallel", # twitter / reddit / parallel - max_rounds: int = None, # 最大模拟轮数(可选,用于截断过长的模拟) - enable_graph_memory_update: bool = False, # 是否将活动更新到Zep图谱 - graph_id: str = None # Zep图谱ID(启用图谱更新时必需) + max_rounds: int = None, # SimulaçãoSimulação + enable_graph_memory_update: bool = False, # ZepGrafo + graph_id: str = None # ZepID do grafoGrafo ) -> SimulationRunState: """ - 启动模拟 + Simulação Args: - simulation_id: 模拟ID - platform: 运行平台 (twitter/reddit/parallel) - max_rounds: 最大模拟轮数(可选,用于截断过长的模拟) - enable_graph_memory_update: 是否将Agent活动动态更新到Zep图谱 - graph_id: Zep图谱ID(启用图谱更新时必需) + simulation_id: ID da simulação + platform: (twitter/reddit/parallel) + max_rounds: SimulaçãoSimulação + enable_graph_memory_update: AgentZepGrafo + graph_id: ZepID do grafoGrafo Returns: SimulationRunState """ - # 检查是否已在运行 existing = cls.get_run_state(simulation_id) if existing and existing.runner_status in [RunnerStatus.RUNNING, RunnerStatus.STARTING]: - raise ValueError(f"模拟已在运行中: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") - # 加载模拟配置 + # Configuração da simulação sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) config_path = os.path.join(sim_dir, "simulation_config.json") if not os.path.exists(config_path): - raise ValueError(f"模拟配置不存在,请先调用 /prepare 接口") + raise ValueError(f"Configuração da simulação /prepare ") with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) - # 初始化运行状态 time_config = config.get("time_config", {}) total_hours = time_config.get("total_simulation_hours", 72) minutes_per_round = time_config.get("minutes_per_round", 30) total_rounds = int(total_hours * 60 / minutes_per_round) - # 如果指定了最大轮数,则截断 if max_rounds is not None and max_rounds > 0: original_rounds = total_rounds total_rounds = min(total_rounds, max_rounds) if total_rounds < original_rounds: - logger.info(f"轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") + logger.info(f": {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") state = SimulationRunState( simulation_id=simulation_id, @@ -369,22 +349,22 @@ def start_simulation( cls._save_run_state(state) - # 如果启用图谱记忆更新,创建更新器 + # Grafo if enable_graph_memory_update: if not graph_id: - raise ValueError("启用图谱记忆更新时必须提供 graph_id") + raise ValueError("Grafo graph_id") try: ZepGraphMemoryManager.create_updater(simulation_id, graph_id) cls._graph_memory_enabled[simulation_id] = True - logger.info(f"已启用图谱记忆更新: simulation_id={simulation_id}, graph_id={graph_id}") + logger.info(f"Grafo: simulation_id={simulation_id}, graph_id={graph_id}") except Exception as e: - logger.error(f"创建图谱记忆更新器失败: {e}") + logger.error(f"GrafoFalhou: {e}") cls._graph_memory_enabled[simulation_id] = False else: cls._graph_memory_enabled[simulation_id] = False - # 确定运行哪个脚本(脚本位于 backend/scripts/ 目录) + # backend/scripts/ if platform == "twitter": script_name = "run_twitter_simulation.py" state.twitter_running = True @@ -399,57 +379,52 @@ def start_simulation( script_path = os.path.join(cls.SCRIPTS_DIR, script_name) if not os.path.exists(script_path): - raise ValueError(f"脚本不存在: {script_path}") + raise ValueError(f": {script_path}") - # 创建动作队列 action_queue = Queue() cls._action_queues[simulation_id] = action_queue - # 启动模拟进程 + # Simulação try: - # 构建运行命令,使用完整路径 - # 新的日志结构: - # twitter/actions.jsonl - Twitter 动作日志 - # reddit/actions.jsonl - Reddit 动作日志 - # simulation.log - 主进程日志 + # twitter/actions.jsonl - Twitter + # reddit/actions.jsonl - Reddit + # simulation.log - cmd = [ - sys.executable, # Python解释器 + sys.executable, # Python script_path, - "--config", config_path, # 使用完整配置文件路径 + "--config", config_path, # Configuração ] - # 如果指定了最大轮数,添加到命令行参数 if max_rounds is not None and max_rounds > 0: cmd.extend(["--max-rounds", str(max_rounds)]) - # 创建主日志文件,避免 stdout/stderr 管道缓冲区满导致进程阻塞 + # stdout/stderr main_log_path = os.path.join(sim_dir, "simulation.log") main_log_file = open(main_log_path, 'w', encoding='utf-8') - # 设置子进程环境变量,确保 Windows 上使用 UTF-8 编码 - # 这可以修复第三方库(如 OASIS)读取文件时未指定编码的问题 + # Windows UTF-8 + # OASIS env = os.environ.copy() - env['PYTHONUTF8'] = '1' # Python 3.7+ 支持,让所有 open() 默认使用 UTF-8 - env['PYTHONIOENCODING'] = 'utf-8' # 确保 stdout/stderr 使用 UTF-8 + env['PYTHONUTF8'] = '1' # Python 3.7+ open() UTF-8 + env['PYTHONIOENCODING'] = 'utf-8' # stdout/stderr UTF-8 - # 设置工作目录为模拟目录(数据库等文件会生成在此) - # 使用 start_new_session=True 创建新的进程组,确保可以通过 os.killpg 终止所有子进程 + # SimulaçãoGerar + # start_new_session=True os.killpg process = subprocess.Popen( cmd, cwd=sim_dir, stdout=main_log_file, - stderr=subprocess.STDOUT, # stderr 也写入同一个文件 + stderr=subprocess.STDOUT, # stderr text=True, - encoding='utf-8', # 显式指定编码 + encoding='utf-8', # bufsize=1, - env=env, # 传递带有 UTF-8 设置的环境变量 - start_new_session=True, # 创建新进程组,确保服务器关闭时能终止所有相关进程 + env=env, # UTF-8 + start_new_session=True, # ) - # 保存文件句柄以便后续关闭 cls._stdout_files[simulation_id] = main_log_file - cls._stderr_files[simulation_id] = None # 不再需要单独的 stderr + cls._stderr_files[simulation_id] = None # stderr state.process_pid = process.pid state.runner_status = RunnerStatus.RUNNING @@ -459,7 +434,6 @@ def start_simulation( # Capture locale before spawning monitor thread current_locale = get_locale() - # 启动监控线程 monitor_thread = threading.Thread( target=cls._monitor_simulation, args=(simulation_id, current_locale), @@ -468,7 +442,7 @@ def start_simulation( monitor_thread.start() cls._monitor_threads[simulation_id] = monitor_thread - logger.info(f"模拟启动成功: {simulation_id}, pid={process.pid}, platform={platform}") + logger.info(f"Simulação: {simulation_id}, pid={process.pid}, platform={platform}") except Exception as e: state.runner_status = RunnerStatus.FAILED @@ -479,12 +453,11 @@ def start_simulation( return state @classmethod - def _monitor_simulation(cls, simulation_id: str, locale: str = 'zh'): - """监控模拟进程,解析动作日志""" + def _monitor_simulation(cls, simulation_id: str, locale: str = 'pt'): + """Simulação""" set_locale(locale) sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) - # 新的日志结构:分平台的动作日志 twitter_actions_log = os.path.join(sim_dir, "twitter", "actions.jsonl") reddit_actions_log = os.path.join(sim_dir, "reddit", "actions.jsonl") @@ -498,75 +471,69 @@ def _monitor_simulation(cls, simulation_id: str, locale: str = 'zh'): reddit_position = 0 try: - while process.poll() is None: # 进程仍在运行 - # 读取 Twitter 动作日志 + while process.poll() is None: # + # Twitter if os.path.exists(twitter_actions_log): twitter_position = cls._read_action_log( twitter_actions_log, twitter_position, state, "twitter" ) - # 读取 Reddit 动作日志 + # Reddit if os.path.exists(reddit_actions_log): reddit_position = cls._read_action_log( reddit_actions_log, reddit_position, state, "reddit" ) - # 更新状态 cls._save_run_state(state) time.sleep(2) - # 进程结束后,最后读取一次日志 if os.path.exists(twitter_actions_log): cls._read_action_log(twitter_actions_log, twitter_position, state, "twitter") if os.path.exists(reddit_actions_log): cls._read_action_log(reddit_actions_log, reddit_position, state, "reddit") - # 进程结束 exit_code = process.returncode if exit_code == 0: state.runner_status = RunnerStatus.COMPLETED state.completed_at = datetime.now().isoformat() - logger.info(f"模拟完成: {simulation_id}") + logger.info(f"Simulação: {simulation_id}") else: state.runner_status = RunnerStatus.FAILED - # 从主日志文件读取错误信息 main_log_path = os.path.join(sim_dir, "simulation.log") error_info = "" try: if os.path.exists(main_log_path): with open(main_log_path, 'r', encoding='utf-8') as f: - error_info = f.read()[-2000:] # 取最后2000字符 + error_info = f.read()[-2000:] # 2000 except Exception: pass - state.error = f"进程退出码: {exit_code}, 错误: {error_info}" - logger.error(f"模拟失败: {simulation_id}, error={state.error}") + state.error = f": {exit_code}, : {error_info}" + logger.error(f"SimulaçãoFalhou: {simulation_id}, error={state.error}") state.twitter_running = False state.reddit_running = False cls._save_run_state(state) except Exception as e: - logger.error(f"监控线程异常: {simulation_id}, error={str(e)}") + logger.error(f": {simulation_id}, error={str(e)}") state.runner_status = RunnerStatus.FAILED state.error = str(e) cls._save_run_state(state) finally: - # 停止图谱记忆更新器 + # Grafo if cls._graph_memory_enabled.get(simulation_id, False): try: ZepGraphMemoryManager.stop_updater(simulation_id) - logger.info(f"已停止图谱记忆更新: simulation_id={simulation_id}") + logger.info(f"Grafo: simulation_id={simulation_id}") except Exception as e: - logger.error(f"停止图谱记忆更新器失败: {e}") + logger.error(f"GrafoFalhou: {e}") cls._graph_memory_enabled.pop(simulation_id, None) - # 清理进程资源 cls._processes.pop(simulation_id, None) cls._action_queues.pop(simulation_id, None) - # 关闭日志文件句柄 if simulation_id in cls._stdout_files: try: cls._stdout_files[simulation_id].close() @@ -589,18 +556,16 @@ def _read_action_log( platform: str ) -> int: """ - 读取动作日志文件 Args: - log_path: 日志文件路径 - position: 上次读取位置 - state: 运行状态对象 - platform: 平台名称 (twitter/reddit) + log_path: + position: + state: + platform: (twitter/reddit) Returns: - 新的读取位置 """ - # 检查是否启用了图谱记忆更新 + # Grafo graph_memory_enabled = cls._graph_memory_enabled.get(state.simulation_id, False) graph_updater = None if graph_memory_enabled: @@ -615,36 +580,32 @@ def _read_action_log( try: action_data = json.loads(line) - # 处理事件类型的条目 if "event_type" in action_data: event_type = action_data.get("event_type") - # 检测 simulation_end 事件,标记平台已完成 + # simulation_end Concluído if event_type == "simulation_end": if platform == "twitter": state.twitter_completed = True state.twitter_running = False - logger.info(f"Twitter 模拟已完成: {state.simulation_id}, total_rounds={action_data.get('total_rounds')}, total_actions={action_data.get('total_actions')}") + logger.info(f"Twitter SimulaçãoConcluído: {state.simulation_id}, total_rounds={action_data.get('total_rounds')}, total_actions={action_data.get('total_actions')}") elif platform == "reddit": state.reddit_completed = True state.reddit_running = False - logger.info(f"Reddit 模拟已完成: {state.simulation_id}, total_rounds={action_data.get('total_rounds')}, total_actions={action_data.get('total_actions')}") + logger.info(f"Reddit SimulaçãoConcluído: {state.simulation_id}, total_rounds={action_data.get('total_rounds')}, total_actions={action_data.get('total_actions')}") - # 检查是否所有启用的平台都已完成 - # 如果只运行了一个平台,只检查那个平台 - # 如果运行了两个平台,需要两个都完成 + # Concluído all_completed = cls._check_all_platforms_completed(state) if all_completed: state.runner_status = RunnerStatus.COMPLETED state.completed_at = datetime.now().isoformat() - logger.info(f"所有平台模拟已完成: {state.simulation_id}") + logger.info(f"SimulaçãoConcluído: {state.simulation_id}") - # 更新轮次信息(从 round_end 事件) + # round_end elif event_type == "round_end": round_num = action_data.get("round", 0) simulated_hours = action_data.get("simulated_hours", 0) - # 更新各平台独立的轮次和时间 if platform == "twitter": if round_num > state.twitter_current_round: state.twitter_current_round = round_num @@ -654,10 +615,8 @@ def _read_action_log( state.reddit_current_round = round_num state.reddit_simulated_hours = simulated_hours - # 总体轮次取两个平台的最大值 if round_num > state.current_round: state.current_round = round_num - # 总体时间取两个平台的最大值 state.simulated_hours = max(state.twitter_simulated_hours, state.reddit_simulated_hours) continue @@ -675,11 +634,10 @@ def _read_action_log( ) state.add_action(action) - # 更新轮次 if action.round_num and action.round_num > state.current_round: state.current_round = action.round_num - # 如果启用了图谱记忆更新,将活动发送到Zep + # GrafoZep if graph_updater: graph_updater.add_activity_from_dict(action_data, platform) @@ -687,52 +645,49 @@ def _read_action_log( pass return f.tell() except Exception as e: - logger.warning(f"读取动作日志失败: {log_path}, error={e}") + logger.warning(f"Falhou: {log_path}, error={e}") return position @classmethod def _check_all_platforms_completed(cls, state: SimulationRunState) -> bool: """ - 检查所有启用的平台是否都已完成模拟 + ConcluídoSimulação - 通过检查对应的 actions.jsonl 文件是否存在来判断平台是否被启用 + actions.jsonl Returns: - True 如果所有启用的平台都已完成 + True Concluído """ sim_dir = os.path.join(cls.RUN_STATE_DIR, state.simulation_id) twitter_log = os.path.join(sim_dir, "twitter", "actions.jsonl") reddit_log = os.path.join(sim_dir, "reddit", "actions.jsonl") - # 检查哪些平台被启用(通过文件是否存在判断) twitter_enabled = os.path.exists(twitter_log) reddit_enabled = os.path.exists(reddit_log) - # 如果平台被启用但未完成,则返回 False + # False if twitter_enabled and not state.twitter_completed: return False if reddit_enabled and not state.reddit_completed: return False - # 至少有一个平台被启用且已完成 + # Concluído return twitter_enabled or reddit_enabled @classmethod def _terminate_process(cls, process: subprocess.Popen, simulation_id: str, timeout: int = 10): """ - 跨平台终止进程及其子进程 Args: - process: 要终止的进程 - simulation_id: 模拟ID(用于日志) - timeout: 等待进程退出的超时时间(秒) + process: + simulation_id: ID da simulação + timeout: """ if IS_WINDOWS: - # Windows: 使用 taskkill 命令终止进程树 - # /F = 强制终止, /T = 终止进程树(包括子进程) - logger.info(f"终止进程树 (Windows): simulation={simulation_id}, pid={process.pid}") + # Windows: taskkill + # /F = , /T = + logger.info(f" (Windows): simulation={simulation_id}, pid={process.pid}") try: - # 先尝试优雅终止 subprocess.run( ['taskkill', '/PID', str(process.pid), '/T'], capture_output=True, @@ -741,8 +696,7 @@ def _terminate_process(cls, process: subprocess.Popen, simulation_id: str, timeo try: process.wait(timeout=timeout) except subprocess.TimeoutExpired: - # 强制终止 - logger.warning(f"进程未响应,强制终止: {simulation_id}") + logger.warning(f": {simulation_id}") subprocess.run( ['taskkill', '/F', '/PID', str(process.pid), '/T'], capture_output=True, @@ -750,53 +704,50 @@ def _terminate_process(cls, process: subprocess.Popen, simulation_id: str, timeo ) process.wait(timeout=5) except Exception as e: - logger.warning(f"taskkill 失败,尝试 terminate: {e}") + logger.warning(f"taskkill Falhou terminate: {e}") process.terminate() try: process.wait(timeout=5) except subprocess.TimeoutExpired: process.kill() else: - # Unix: 使用进程组终止 - # 由于使用了 start_new_session=True,进程组 ID 等于主进程 PID + # Unix: + # start_new_session=True ID PID pgid = os.getpgid(process.pid) - logger.info(f"终止进程组 (Unix): simulation={simulation_id}, pgid={pgid}") + logger.info(f" (Unix): simulation={simulation_id}, pgid={pgid}") - # 先发送 SIGTERM 给整个进程组 + # SIGTERM os.killpg(pgid, signal.SIGTERM) try: process.wait(timeout=timeout) except subprocess.TimeoutExpired: - # 如果超时后还没结束,强制发送 SIGKILL - logger.warning(f"进程组未响应 SIGTERM,强制终止: {simulation_id}") + # SIGKILL + logger.warning(f" SIGTERM: {simulation_id}") os.killpg(pgid, signal.SIGKILL) process.wait(timeout=5) @classmethod def stop_simulation(cls, simulation_id: str) -> SimulationRunState: - """停止模拟""" + """Simulação""" state = cls.get_run_state(simulation_id) if not state: - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") if state.runner_status not in [RunnerStatus.RUNNING, RunnerStatus.PAUSED]: - raise ValueError(f"模拟未在运行: {simulation_id}, status={state.runner_status}") + raise ValueError(f"Simulação: {simulation_id}, status={state.runner_status}") state.runner_status = RunnerStatus.STOPPING cls._save_run_state(state) - # 终止进程 process = cls._processes.get(simulation_id) if process and process.poll() is None: try: cls._terminate_process(process, simulation_id) except ProcessLookupError: - # 进程已经不存在 pass except Exception as e: - logger.error(f"终止进程组失败: {simulation_id}, error={e}") - # 回退到直接终止进程 + logger.error(f"Falhou: {simulation_id}, error={e}") try: process.terminate() process.wait(timeout=5) @@ -809,16 +760,16 @@ def stop_simulation(cls, simulation_id: str) -> SimulationRunState: state.completed_at = datetime.now().isoformat() cls._save_run_state(state) - # 停止图谱记忆更新器 + # Grafo if cls._graph_memory_enabled.get(simulation_id, False): try: ZepGraphMemoryManager.stop_updater(simulation_id) - logger.info(f"已停止图谱记忆更新: simulation_id={simulation_id}") + logger.info(f"Grafo: simulation_id={simulation_id}") except Exception as e: - logger.error(f"停止图谱记忆更新器失败: {e}") + logger.error(f"GrafoFalhou: {e}") cls._graph_memory_enabled.pop(simulation_id, None) - logger.info(f"模拟已停止: {simulation_id}") + logger.info(f"Simulação: {simulation_id}") return state @classmethod @@ -831,14 +782,13 @@ def _read_actions_from_file( round_num: Optional[int] = None ) -> List[AgentAction]: """ - 从单个动作文件中读取动作 Args: - file_path: 动作日志文件路径 - default_platform: 默认平台(当动作记录中没有 platform 字段时使用) - platform_filter: 过滤平台 - agent_id: 过滤 Agent ID - round_num: 过滤轮次 + file_path: + default_platform: platform + platform_filter: + agent_id: Agent ID + round_num: """ if not os.path.exists(file_path): return [] @@ -854,18 +804,17 @@ def _read_actions_from_file( try: data = json.loads(line) - # 跳过非动作记录(如 simulation_start, round_start, round_end 等事件) + # simulation_start, round_start, round_end if "event_type" in data: continue - # 跳过没有 agent_id 的记录(非 Agent 动作) + # agent_id Agent if "agent_id" not in data: continue - # 获取平台:优先使用记录中的 platform,否则使用默认平台 + # platform record_platform = data.get("platform") or default_platform or "" - # 过滤 if platform_filter and record_platform != platform_filter: continue if agent_id is not None and data.get("agent_id") != agent_id: @@ -899,54 +848,51 @@ def get_all_actions( round_num: Optional[int] = None ) -> List[AgentAction]: """ - 获取所有平台的完整动作历史(无分页限制) Args: - simulation_id: 模拟ID - platform: 过滤平台(twitter/reddit) - agent_id: 过滤Agent - round_num: 过滤轮次 + simulation_id: ID da simulação + platform: twitter/reddit + agent_id: Agent + round_num: Returns: - 完整的动作列表(按时间戳排序,新的在前) """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) actions = [] - # 读取 Twitter 动作文件(根据文件路径自动设置 platform 为 twitter) + # Twitter platform twitter twitter_actions_log = os.path.join(sim_dir, "twitter", "actions.jsonl") if not platform or platform == "twitter": actions.extend(cls._read_actions_from_file( twitter_actions_log, - default_platform="twitter", # 自动填充 platform 字段 + default_platform="twitter", # platform platform_filter=platform, agent_id=agent_id, round_num=round_num )) - # 读取 Reddit 动作文件(根据文件路径自动设置 platform 为 reddit) + # Reddit platform reddit reddit_actions_log = os.path.join(sim_dir, "reddit", "actions.jsonl") if not platform or platform == "reddit": actions.extend(cls._read_actions_from_file( reddit_actions_log, - default_platform="reddit", # 自动填充 platform 字段 + default_platform="reddit", # platform platform_filter=platform, agent_id=agent_id, round_num=round_num )) - # 如果分平台文件不存在,尝试读取旧的单一文件格式 + # Se o arquivo por plataforma não existir, tenta ler o formato antigo de arquivo único if not actions: actions_log = os.path.join(sim_dir, "actions.jsonl") actions = cls._read_actions_from_file( actions_log, - default_platform=None, # 旧格式文件中应该有 platform 字段 + default_platform=None, # platform platform_filter=platform, agent_id=agent_id, round_num=round_num ) - # 按时间戳排序(新的在前) actions.sort(key=lambda x: x.timestamp, reverse=True) return actions @@ -962,18 +908,16 @@ def get_actions( round_num: Optional[int] = None ) -> List[AgentAction]: """ - 获取动作历史(带分页) Args: - simulation_id: 模拟ID - limit: 返回数量限制 - offset: 偏移量 - platform: 过滤平台 - agent_id: 过滤Agent - round_num: 过滤轮次 + simulation_id: ID da simulação + limit: + offset: + platform: + agent_id: Agent + round_num: Returns: - 动作列表 """ actions = cls.get_all_actions( simulation_id=simulation_id, @@ -982,7 +926,6 @@ def get_actions( round_num=round_num ) - # 分页 return actions[offset:offset + limit] @classmethod @@ -993,19 +936,17 @@ def get_timeline( end_round: Optional[int] = None ) -> List[Dict[str, Any]]: """ - 获取模拟时间线(按轮次汇总) + Simulação Args: - simulation_id: 模拟ID - start_round: 起始轮次 - end_round: 结束轮次 + simulation_id: ID da simulação + start_round: + end_round: Returns: - 每轮的汇总信息 """ actions = cls.get_actions(simulation_id, limit=10000) - # 按轮次分组 rounds: Dict[int, Dict[str, Any]] = {} for action in actions: @@ -1038,7 +979,6 @@ def get_timeline( r["action_types"][action.action_type] = r["action_types"].get(action.action_type, 0) + 1 r["last_action_time"] = action.timestamp - # 转换为列表 result = [] for round_num in sorted(rounds.keys()): r = rounds[round_num] @@ -1059,10 +999,10 @@ def get_timeline( @classmethod def get_agent_stats(cls, simulation_id: str) -> List[Dict[str, Any]]: """ - 获取每个Agent的统计信息 + Agent Returns: - Agent统计列表 + Agent """ actions = cls.get_actions(simulation_id, limit=10000) @@ -1094,7 +1034,6 @@ def get_agent_stats(cls, simulation_id: str) -> List[Dict[str, Any]]: stats["action_types"][action.action_type] = stats["action_types"].get(action.action_type, 0) + 1 stats["last_action_time"] = action.timestamp - # 按总动作数排序 result = sorted(agent_stats.values(), key=lambda x: x["total_actions"], reverse=True) return result @@ -1102,51 +1041,47 @@ def get_agent_stats(cls, simulation_id: str) -> List[Dict[str, Any]]: @classmethod def cleanup_simulation_logs(cls, simulation_id: str) -> Dict[str, Any]: """ - 清理模拟的运行日志(用于强制重新开始模拟) + SimulaçãoSimulação - 会删除以下文件: - run_state.json - twitter/actions.jsonl - reddit/actions.jsonl - simulation.log - stdout.log / stderr.log - - twitter_simulation.db(模拟数据库) - - reddit_simulation.db(模拟数据库) - - env_status.json(环境状态) + - twitter_simulation.dbSimulação + - reddit_simulation.dbSimulação + - env_status.json - 注意:不会删除配置文件(simulation_config.json)和 profile 文件 + Configuraçãosimulation_config.json profile Args: - simulation_id: 模拟ID + simulation_id: ID da simulação Returns: - 清理结果信息 + Resultado """ import shutil sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) if not os.path.exists(sim_dir): - return {"success": True, "message": "模拟目录不存在,无需清理"} + return {"success": True, "message": "Simulação"} cleaned_files = [] errors = [] - # 要删除的文件列表(包括数据库文件) files_to_delete = [ "run_state.json", "simulation.log", "stdout.log", "stderr.log", - "twitter_simulation.db", # Twitter 平台数据库 - "reddit_simulation.db", # Reddit 平台数据库 - "env_status.json", # 环境状态文件 + "twitter_simulation.db", # Twitter + "reddit_simulation.db", # Reddit + "env_status.json", # ] - # 要删除的目录列表(包含动作日志) dirs_to_clean = ["twitter", "reddit"] - # 删除文件 for filename in files_to_delete: file_path = os.path.join(sim_dir, filename) if os.path.exists(file_path): @@ -1154,9 +1089,8 @@ def cleanup_simulation_logs(cls, simulation_id: str) -> Dict[str, Any]: os.remove(file_path) cleaned_files.append(filename) except Exception as e: - errors.append(f"删除 {filename} 失败: {str(e)}") + errors.append(f" {filename} Falhou: {str(e)}") - # 清理平台目录中的动作日志 for dir_name in dirs_to_clean: dir_path = os.path.join(sim_dir, dir_name) if os.path.exists(dir_path): @@ -1166,13 +1100,12 @@ def cleanup_simulation_logs(cls, simulation_id: str) -> Dict[str, Any]: os.remove(actions_file) cleaned_files.append(f"{dir_name}/actions.jsonl") except Exception as e: - errors.append(f"删除 {dir_name}/actions.jsonl 失败: {str(e)}") + errors.append(f" {dir_name}/actions.jsonl Falhou: {str(e)}") - # 清理内存中的运行状态 if simulation_id in cls._run_states: del cls._run_states[simulation_id] - logger.info(f"清理模拟日志完成: {simulation_id}, 删除文件: {cleaned_files}") + logger.info(f"Simulação: {simulation_id}, : {cleaned_files}") return { "success": len(errors) == 0, @@ -1180,71 +1113,65 @@ def cleanup_simulation_logs(cls, simulation_id: str) -> Dict[str, Any]: "errors": errors if errors else None } - # 防止重复清理的标志 _cleanup_done = False @classmethod def cleanup_all_simulations(cls): """ - 清理所有运行中的模拟进程 + Simulação - 在服务器关闭时调用,确保所有子进程被终止 """ - # 防止重复清理 if cls._cleanup_done: return cls._cleanup_done = True - # 检查是否有内容需要清理(避免空进程的进程打印无用日志) + # Conteúdo has_processes = bool(cls._processes) has_updaters = bool(cls._graph_memory_enabled) if not has_processes and not has_updaters: - return # 没有需要清理的内容,静默返回 + return # Conteúdo - logger.info("正在清理所有模拟进程...") + logger.info("Simulação...") - # 首先停止所有图谱记忆更新器(stop_all 内部会打印日志) + # Grafostop_all try: ZepGraphMemoryManager.stop_all() except Exception as e: - logger.error(f"停止图谱记忆更新器失败: {e}") + logger.error(f"GrafoFalhou: {e}") cls._graph_memory_enabled.clear() - # 复制字典以避免在迭代时修改 processes = list(cls._processes.items()) for simulation_id, process in processes: try: - if process.poll() is None: # 进程仍在运行 - logger.info(f"终止模拟进程: {simulation_id}, pid={process.pid}") + if process.poll() is None: # + logger.info(f"Simulação: {simulation_id}, pid={process.pid}") try: - # 使用跨平台的进程终止方法 cls._terminate_process(process, simulation_id, timeout=5) except (ProcessLookupError, OSError): - # 进程可能已经不存在,尝试直接终止 try: process.terminate() process.wait(timeout=3) except Exception: process.kill() - # 更新 run_state.json + # run_state.json state = cls.get_run_state(simulation_id) if state: state.runner_status = RunnerStatus.STOPPED state.twitter_running = False state.reddit_running = False state.completed_at = datetime.now().isoformat() - state.error = "服务器关闭,模拟被终止" + state.error = "Simulação" cls._save_run_state(state) - # 同时更新 state.json,将状态设为 stopped + # state.json stopped try: sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) state_file = os.path.join(sim_dir, "state.json") - logger.info(f"尝试更新 state.json: {state_file}") + logger.info(f" state.json: {state_file}") if os.path.exists(state_file): with open(state_file, 'r', encoding='utf-8') as f: state_data = json.load(f) @@ -1252,16 +1179,15 @@ def cleanup_all_simulations(cls): state_data['updated_at'] = datetime.now().isoformat() with open(state_file, 'w', encoding='utf-8') as f: json.dump(state_data, f, indent=2, ensure_ascii=False) - logger.info(f"已更新 state.json 状态为 stopped: {simulation_id}") + logger.info(f" state.json stopped: {simulation_id}") else: - logger.warning(f"state.json 不存在: {state_file}") + logger.warning(f"state.json : {state_file}") except Exception as state_err: - logger.warning(f"更新 state.json 失败: {simulation_id}, error={state_err}") + logger.warning(f" state.json Falhou: {simulation_id}, error={state_err}") except Exception as e: - logger.error(f"清理进程失败: {simulation_id}, error={e}") + logger.error(f"Falhou: {simulation_id}, error={e}") - # 清理文件句柄 for simulation_id, file_handle in list(cls._stdout_files.items()): try: if file_handle: @@ -1278,89 +1204,83 @@ def cleanup_all_simulations(cls): pass cls._stderr_files.clear() - # 清理内存中的状态 cls._processes.clear() cls._action_queues.clear() - logger.info("模拟进程清理完成") + logger.info("Simulação") @classmethod def register_cleanup(cls): """ - 注册清理函数 - 在 Flask 应用启动时调用,确保服务器关闭时清理所有模拟进程 + Flask Simulação """ global _cleanup_registered if _cleanup_registered: return - # Flask debug 模式下,只在 reloader 子进程中注册清理(实际运行应用的进程) - # WERKZEUG_RUN_MAIN=true 表示是 reloader 子进程 - # 如果不是 debug 模式,则没有这个环境变量,也需要注册 + # Flask debug reloader + # WERKZEUG_RUN_MAIN=true reloader + # debug is_reloader_process = os.environ.get('WERKZEUG_RUN_MAIN') == 'true' is_debug_mode = os.environ.get('FLASK_DEBUG') == '1' or os.environ.get('WERKZEUG_RUN_MAIN') is not None - # 在 debug 模式下,只在 reloader 子进程中注册;非 debug 模式下始终注册 + # debug reloader debug if is_debug_mode and not is_reloader_process: - _cleanup_registered = True # 标记已注册,防止子进程再次尝试 + _cleanup_registered = True # return - # 保存原有的信号处理器 original_sigint = signal.getsignal(signal.SIGINT) original_sigterm = signal.getsignal(signal.SIGTERM) - # SIGHUP 只在 Unix 系统存在(macOS/Linux),Windows 没有 + # SIGHUP Unix macOS/LinuxWindows original_sighup = None has_sighup = hasattr(signal, 'SIGHUP') if has_sighup: original_sighup = signal.getsignal(signal.SIGHUP) def cleanup_handler(signum=None, frame=None): - """信号处理器:先清理模拟进程,再调用原处理器""" - # 只有在有进程需要清理时才打印日志 + """Simulação""" if cls._processes or cls._graph_memory_enabled: - logger.info(f"收到信号 {signum},开始清理...") + logger.info(f" {signum}...") cls.cleanup_all_simulations() - # 调用原有的信号处理器,让 Flask 正常退出 + # Flask if signum == signal.SIGINT and callable(original_sigint): original_sigint(signum, frame) elif signum == signal.SIGTERM and callable(original_sigterm): original_sigterm(signum, frame) elif has_sighup and signum == signal.SIGHUP: - # SIGHUP: 终端关闭时发送 + # SIGHUP: if callable(original_sighup): original_sighup(signum, frame) else: - # 默认行为:正常退出 sys.exit(0) else: - # 如果原处理器不可调用(如 SIG_DFL),则使用默认行为 + # SIG_DFL raise KeyboardInterrupt - # 注册 atexit 处理器(作为备用) + # atexit atexit.register(cls.cleanup_all_simulations) - # 注册信号处理器(仅在主线程中) try: - # SIGTERM: kill 命令默认信号 + # SIGTERM: kill signal.signal(signal.SIGTERM, cleanup_handler) # SIGINT: Ctrl+C signal.signal(signal.SIGINT, cleanup_handler) - # SIGHUP: 终端关闭(仅 Unix 系统) + # SIGHUP: Unix if has_sighup: signal.signal(signal.SIGHUP, cleanup_handler) except ValueError: - # 不在主线程中,只能使用 atexit - logger.warning("无法注册信号处理器(不在主线程),仅使用 atexit") + # atexit + logger.warning(" atexit") _cleanup_registered = True @classmethod def get_running_simulations(cls) -> List[str]: """ - 获取所有正在运行的模拟ID列表 + ID da simulação """ running = [] for sim_id, process in cls._processes.items(): @@ -1368,18 +1288,18 @@ def get_running_simulations(cls) -> List[str]: running.append(sim_id) return running - # ============== Interview 功能 ============== + # ============== Interview ============== @classmethod def check_env_alive(cls, simulation_id: str) -> bool: """ - 检查模拟环境是否存活(可以接收Interview命令) + SimulaçãoInterview Args: - simulation_id: 模拟ID + simulation_id: ID da simulação Returns: - True 表示环境存活,False 表示环境已关闭 + True False """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) if not os.path.exists(sim_dir): @@ -1391,13 +1311,13 @@ def check_env_alive(cls, simulation_id: str) -> bool: @classmethod def get_env_status_detail(cls, simulation_id: str) -> Dict[str, Any]: """ - 获取模拟环境的详细状态信息 + Simulação Args: - simulation_id: 模拟ID + simulation_id: ID da simulação Returns: - 状态详情字典,包含 status, twitter_available, reddit_available, timestamp + status, twitter_available, reddit_available, timestamp """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) status_file = os.path.join(sim_dir, "env_status.json") @@ -1434,35 +1354,35 @@ def interview_agent( timeout: float = 60.0 ) -> Dict[str, Any]: """ - 采访单个Agent + Agent Args: - simulation_id: 模拟ID + simulation_id: ID da simulação agent_id: Agent ID - prompt: 采访问题 - platform: 指定平台(可选) - - "twitter": 只采访Twitter平台 - - "reddit": 只采访Reddit平台 - - None: 双平台模拟时同时采访两个平台,返回整合结果 - timeout: 超时时间(秒) + prompt: + platform: + - "twitter": Twitter + - "reddit": Reddit + - None: SimulaçãoResultado + timeout: Returns: - 采访结果字典 + Resultado Raises: - ValueError: 模拟不存在或环境未运行 - TimeoutError: 等待响应超时 + ValueError: Simulação + TimeoutError: """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) if not os.path.exists(sim_dir): - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") ipc_client = SimulationIPCClient(sim_dir) if not ipc_client.check_env_alive(): - raise ValueError(f"模拟环境未运行或已关闭,无法执行Interview: {simulation_id}") + raise ValueError(f"SimulaçãoInterview: {simulation_id}") - logger.info(f"发送Interview命令: simulation_id={simulation_id}, agent_id={agent_id}, platform={platform}") + logger.info(f"Interview: simulation_id={simulation_id}, agent_id={agent_id}, platform={platform}") response = ipc_client.send_interview( agent_id=agent_id, @@ -1497,34 +1417,34 @@ def interview_agents_batch( timeout: float = 120.0 ) -> Dict[str, Any]: """ - 批量采访多个Agent + Agent Args: - simulation_id: 模拟ID - interviews: 采访列表,每个元素包含 {"agent_id": int, "prompt": str, "platform": str(可选)} - platform: 默认平台(可选,会被每个采访项的platform覆盖) - - "twitter": 默认只采访Twitter平台 - - "reddit": 默认只采访Reddit平台 - - None: 双平台模拟时每个Agent同时采访两个平台 - timeout: 超时时间(秒) + simulation_id: ID da simulação + interviews: {"agent_id": int, "prompt": str, "platform": str()} + platform: platform + - "twitter": Twitter + - "reddit": Reddit + - None: SimulaçãoAgent + timeout: Returns: - 批量采访结果字典 + Resultado Raises: - ValueError: 模拟不存在或环境未运行 - TimeoutError: 等待响应超时 + ValueError: Simulação + TimeoutError: """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) if not os.path.exists(sim_dir): - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") ipc_client = SimulationIPCClient(sim_dir) if not ipc_client.check_env_alive(): - raise ValueError(f"模拟环境未运行或已关闭,无法执行Interview: {simulation_id}") + raise ValueError(f"SimulaçãoInterview: {simulation_id}") - logger.info(f"发送批量Interview命令: simulation_id={simulation_id}, count={len(interviews)}, platform={platform}") + logger.info(f"Interview: simulation_id={simulation_id}, count={len(interviews)}, platform={platform}") response = ipc_client.send_batch_interview( interviews=interviews, @@ -1556,39 +1476,38 @@ def interview_all_agents( timeout: float = 180.0 ) -> Dict[str, Any]: """ - 采访所有Agent(全局采访) + Agent - 使用相同的问题采访模拟中的所有Agent + SimulaçãoAgent Args: - simulation_id: 模拟ID - prompt: 采访问题(所有Agent使用相同问题) - platform: 指定平台(可选) - - "twitter": 只采访Twitter平台 - - "reddit": 只采访Reddit平台 - - None: 双平台模拟时每个Agent同时采访两个平台 - timeout: 超时时间(秒) + simulation_id: ID da simulação + prompt: Agent + platform: + - "twitter": Twitter + - "reddit": Reddit + - None: SimulaçãoAgent + timeout: Returns: - 全局采访结果字典 + Resultado """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) if not os.path.exists(sim_dir): - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") - # 从配置文件获取所有Agent信息 + # ConfiguraçãoAgent config_path = os.path.join(sim_dir, "simulation_config.json") if not os.path.exists(config_path): - raise ValueError(f"模拟配置不存在: {simulation_id}") + raise ValueError(f"Configuração da simulação: {simulation_id}") with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) agent_configs = config.get("agent_configs", []) if not agent_configs: - raise ValueError(f"模拟配置中没有Agent: {simulation_id}") + raise ValueError(f"Configuração da simulaçãoAgent: {simulation_id}") - # 构建批量采访列表 interviews = [] for agent_config in agent_configs: agent_id = agent_config.get("agent_id") @@ -1598,7 +1517,7 @@ def interview_all_agents( "prompt": prompt }) - logger.info(f"发送全局Interview命令: simulation_id={simulation_id}, agent_count={len(interviews)}, platform={platform}") + logger.info(f"Interview: simulation_id={simulation_id}, agent_count={len(interviews)}, platform={platform}") return cls.interview_agents_batch( simulation_id=simulation_id, @@ -1614,45 +1533,44 @@ def close_simulation_env( timeout: float = 30.0 ) -> Dict[str, Any]: """ - 关闭模拟环境(而不是停止模拟进程) + SimulaçãoSimulação - 向模拟发送关闭环境命令,使其优雅退出等待命令模式 + Simulação Args: - simulation_id: 模拟ID - timeout: 超时时间(秒) + simulation_id: ID da simulação + timeout: Returns: - 操作结果字典 + Resultado """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) if not os.path.exists(sim_dir): - raise ValueError(f"模拟不存在: {simulation_id}") + raise ValueError(f"Simulação: {simulation_id}") ipc_client = SimulationIPCClient(sim_dir) if not ipc_client.check_env_alive(): return { "success": True, - "message": "环境已经关闭" + "message": "" } - logger.info(f"发送关闭环境命令: simulation_id={simulation_id}") + logger.info(f": simulation_id={simulation_id}") try: response = ipc_client.send_close_env(timeout=timeout) return { "success": response.status.value == "completed", - "message": "环境关闭命令已发送", + "message": "", "result": response.result, "timestamp": response.timestamp } except TimeoutError: - # 超时可能是因为环境正在关闭 return { "success": True, - "message": "环境关闭命令已发送(等待响应超时,环境可能正在关闭)" + "message": "" } @classmethod @@ -1663,7 +1581,7 @@ def _get_interview_history_from_db( agent_id: Optional[int] = None, limit: int = 100 ) -> List[Dict[str, Any]]: - """从单个数据库获取Interview历史""" + """Interview""" import sqlite3 if not os.path.exists(db_path): @@ -1709,7 +1627,7 @@ def _get_interview_history_from_db( conn.close() except Exception as e: - logger.error(f"读取Interview历史失败 ({platform_name}): {e}") + logger.error(f"InterviewFalhou ({platform_name}): {e}") return results @@ -1722,29 +1640,28 @@ def get_interview_history( limit: int = 100 ) -> List[Dict[str, Any]]: """ - 获取Interview历史记录(从数据库读取) + Interview Args: - simulation_id: 模拟ID - platform: 平台类型(reddit/twitter/None) - - "reddit": 只获取Reddit平台的历史 - - "twitter": 只获取Twitter平台的历史 - - None: 获取两个平台的所有历史 - agent_id: 指定Agent ID(可选,只获取该Agent的历史) - limit: 每个平台返回数量限制 + simulation_id: ID da simulação + platform: reddit/twitter/None + - "reddit": Reddit + - "twitter": Twitter + - None: + agent_id: Agent IDAgent + limit: Returns: - Interview历史记录列表 + Interview """ sim_dir = os.path.join(cls.RUN_STATE_DIR, simulation_id) results = [] - # 确定要查询的平台 if platform in ("reddit", "twitter"): platforms = [platform] else: - # 不指定platform时,查询两个平台 + # platform platforms = ["twitter", "reddit"] for p in platforms: @@ -1757,12 +1674,9 @@ def get_interview_history( ) results.extend(platform_results) - # 按时间降序排序 results.sort(key=lambda x: x.get("timestamp", ""), reverse=True) - # 如果查询了多个平台,限制总数 if len(platforms) > 1 and len(results) > limit: results = results[:limit] return results - diff --git a/backend/app/services/text_processor.py b/backend/app/services/text_processor.py index 91e32acc5d..e266e22177 100644 --- a/backend/app/services/text_processor.py +++ b/backend/app/services/text_processor.py @@ -1,5 +1,4 @@ """ -文本处理服务 """ from typing import List, Optional @@ -7,11 +6,11 @@ class TextProcessor: - """文本处理器""" + """""" @staticmethod def extract_from_files(file_paths: List[str]) -> str: - """从多个文件提取文本""" + """""" return FileParser.extract_from_multiple(file_paths) @staticmethod @@ -21,40 +20,33 @@ def split_text( overlap: int = 50 ) -> List[str]: """ - 分割文本 Args: - text: 原始文本 - chunk_size: 块大小 - overlap: 重叠大小 + text: + chunk_size: + overlap: Returns: - 文本块列表 """ return split_text_into_chunks(text, chunk_size, overlap) @staticmethod def preprocess_text(text: str) -> str: """ - 预处理文本 - - 移除多余空白 - - 标准化换行 + - + - Args: - text: 原始文本 + text: Returns: - 处理后的文本 """ import re - # 标准化换行 text = text.replace('\r\n', '\n').replace('\r', '\n') - # 移除连续空行(保留最多两个换行) text = re.sub(r'\n{3,}', '\n\n', text) - # 移除行首行尾空白 lines = [line.strip() for line in text.split('\n')] text = '\n'.join(lines) @@ -62,7 +54,7 @@ def preprocess_text(text: str) -> str: @staticmethod def get_text_stats(text: str) -> dict: - """获取文本统计信息""" + """""" return { "total_chars": len(text), "total_lines": text.count('\n') + 1, diff --git a/backend/app/services/zep_entity_reader.py b/backend/app/services/zep_entity_reader.py index 71661be499..845c28eef9 100644 --- a/backend/app/services/zep_entity_reader.py +++ b/backend/app/services/zep_entity_reader.py @@ -1,6 +1,6 @@ """ -Zep实体读取与过滤服务 -从Zep图谱中读取节点,筛选出符合预定义实体类型的节点 +ZepEntidade +ZepGrafoEntidade """ import time @@ -15,21 +15,18 @@ logger = get_logger('mirofish.zep_entity_reader') -# 用于泛型返回类型 T = TypeVar('T') @dataclass class EntityNode: - """实体节点数据结构""" + """Entidade""" uuid: str name: str labels: List[str] summary: str attributes: Dict[str, Any] - # 相关的边信息 related_edges: List[Dict[str, Any]] = field(default_factory=list) - # 相关的其他节点信息 related_nodes: List[Dict[str, Any]] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: @@ -44,7 +41,7 @@ def to_dict(self) -> Dict[str, Any]: } def get_entity_type(self) -> Optional[str]: - """获取实体类型(排除默认的Entity标签)""" + """EntidadeEntity""" for label in self.labels: if label not in ["Entity", "Node"]: return label @@ -53,7 +50,7 @@ def get_entity_type(self) -> Optional[str]: @dataclass class FilteredEntities: - """过滤后的实体集合""" + """Entidade""" entities: List[EntityNode] entity_types: Set[str] total_count: int @@ -70,18 +67,17 @@ def to_dict(self) -> Dict[str, Any]: class ZepEntityReader: """ - Zep实体读取与过滤服务 + ZepEntidade - 主要功能: - 1. 从Zep图谱读取所有节点 - 2. 筛选出符合预定义实体类型的节点(Labels不只是Entity的节点) - 3. 获取每个实体的相关边和关联节点信息 + 1. ZepGrafo + 2. EntidadeLabelsEntity + 3. Entidade """ def __init__(self, api_key: Optional[str] = None): self.api_key = api_key or Config.ZEP_API_KEY if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") + raise ValueError("ZEP_API_KEY Configuração") self.client = Zep(api_key=self.api_key) @@ -93,16 +89,16 @@ def _call_with_retry( initial_delay: float = 2.0 ) -> T: """ - 带重试机制的Zep API调用 + Zep API Args: - func: 要执行的函数(无参数的lambda或callable) - operation_name: 操作名称,用于日志 - max_retries: 最大重试次数(默认3次,即最多尝试3次) - initial_delay: 初始延迟秒数 + func: lambdacallable + operation_name: + max_retries: 33 + initial_delay: Returns: - API调用结果 + APIResultado """ last_exception = None delay = initial_delay @@ -114,27 +110,26 @@ def _call_with_retry( last_exception = e if attempt < max_retries - 1: logger.warning( - f"Zep {operation_name} 第 {attempt + 1} 次尝试失败: {str(e)[:100]}, " - f"{delay:.1f}秒后重试..." + f"Zep {operation_name} {attempt + 1} Falhou: {str(e)[:100]}, " + f"{delay:.1f}..." ) time.sleep(delay) - delay *= 2 # 指数退避 + delay *= 2 # else: - logger.error(f"Zep {operation_name} 在 {max_retries} 次尝试后仍失败: {str(e)}") + logger.error(f"Zep {operation_name} {max_retries} Falhou: {str(e)}") raise last_exception def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]: """ - 获取图谱的所有节点(分页获取) + Grafo Args: - graph_id: 图谱ID + graph_id: ID do grafo Returns: - 节点列表 """ - logger.info(f"获取图谱 {graph_id} 的所有节点...") + logger.info(f"Grafo {graph_id} ...") nodes = fetch_all_nodes(self.client, graph_id) @@ -148,20 +143,19 @@ def get_all_nodes(self, graph_id: str) -> List[Dict[str, Any]]: "attributes": node.attributes or {}, }) - logger.info(f"共获取 {len(nodes_data)} 个节点") + logger.info(f" {len(nodes_data)} ") return nodes_data def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]: """ - 获取图谱的所有边(分页获取) + Grafo Args: - graph_id: 图谱ID + graph_id: ID do grafo Returns: - 边列表 """ - logger.info(f"获取图谱 {graph_id} 的所有边...") + logger.info(f"Grafo {graph_id} ...") edges = fetch_all_edges(self.client, graph_id) @@ -176,24 +170,22 @@ def get_all_edges(self, graph_id: str) -> List[Dict[str, Any]]: "attributes": edge.attributes or {}, }) - logger.info(f"共获取 {len(edges_data)} 条边") + logger.info(f" {len(edges_data)} ") return edges_data def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]: """ - 获取指定节点的所有相关边(带重试机制) Args: - node_uuid: 节点UUID + node_uuid: UUID Returns: - 边列表 """ try: - # 使用重试机制调用Zep API + # Zep API edges = self._call_with_retry( func=lambda: self.client.graph.node.get_entity_edges(node_uuid=node_uuid), - operation_name=f"获取节点边(node={node_uuid[:8]}...)" + operation_name=f"(node={node_uuid[:8]}...)" ) edges_data = [] @@ -209,7 +201,7 @@ def get_node_edges(self, node_uuid: str) -> List[Dict[str, Any]]: return edges_data except Exception as e: - logger.warning(f"获取节点 {node_uuid} 的边失败: {str(e)}") + logger.warning(f" {node_uuid} Falhou: {str(e)}") return [] def filter_defined_entities( @@ -219,47 +211,42 @@ def filter_defined_entities( enrich_with_edges: bool = True ) -> FilteredEntities: """ - 筛选出符合预定义实体类型的节点 + Entidade - 筛选逻辑: - - 如果节点的Labels只有一个"Entity",说明这个实体不符合我们预定义的类型,跳过 - - 如果节点的Labels包含除"Entity"和"Node"之外的标签,说明符合预定义类型,保留 + - Labels"Entity"Entidade + - Labels"Entity""Node" Args: - graph_id: 图谱ID - defined_entity_types: 预定义的实体类型列表(可选,如果提供则只保留这些类型) - enrich_with_edges: 是否获取每个实体的相关边信息 + graph_id: ID do grafo + defined_entity_types: Entidade + enrich_with_edges: Entidade Returns: - FilteredEntities: 过滤后的实体集合 + FilteredEntities: Entidade """ - logger.info(f"开始筛选图谱 {graph_id} 的实体...") + logger.info(f"Grafo {graph_id} Entidade...") - # 获取所有节点 all_nodes = self.get_all_nodes(graph_id) total_count = len(all_nodes) - # 获取所有边(用于后续关联查找) all_edges = self.get_all_edges(graph_id) if enrich_with_edges else [] - # 构建节点UUID到节点数据的映射 + # UUID node_map = {n["uuid"]: n for n in all_nodes} - # 筛选符合条件的实体 + # Entidade filtered_entities = [] entity_types_found = set() for node in all_nodes: labels = node.get("labels", []) - # 筛选逻辑:Labels必须包含除"Entity"和"Node"之外的标签 + # Labels"Entity""Node" custom_labels = [l for l in labels if l not in ["Entity", "Node"]] if not custom_labels: - # 只有默认标签,跳过 continue - # 如果指定了预定义类型,检查是否匹配 if defined_entity_types: matching_labels = [l for l in custom_labels if l in defined_entity_types] if not matching_labels: @@ -270,7 +257,7 @@ def filter_defined_entities( entity_types_found.add(entity_type) - # 创建实体节点对象 + # Entidade entity = EntityNode( uuid=node["uuid"], name=node["name"], @@ -279,7 +266,6 @@ def filter_defined_entities( attributes=node["attributes"], ) - # 获取相关边和节点 if enrich_with_edges: related_edges = [] related_node_uuids = set() @@ -304,7 +290,6 @@ def filter_defined_entities( entity.related_edges = related_edges - # 获取关联节点的基本信息 related_nodes = [] for related_uuid in related_node_uuids: if related_uuid in node_map: @@ -320,8 +305,8 @@ def filter_defined_entities( filtered_entities.append(entity) - logger.info(f"筛选完成: 总节点 {total_count}, 符合条件 {len(filtered_entities)}, " - f"实体类型: {entity_types_found}") + logger.info(f": {total_count}, {len(filtered_entities)}, " + f"Entidade: {entity_types_found}") return FilteredEntities( entities=filtered_entities, @@ -336,33 +321,29 @@ def get_entity_with_context( entity_uuid: str ) -> Optional[EntityNode]: """ - 获取单个实体及其完整上下文(边和关联节点,带重试机制) + Entidade Args: - graph_id: 图谱ID - entity_uuid: 实体UUID + graph_id: ID do grafo + entity_uuid: EntidadeUUID Returns: - EntityNode或None + EntityNodeNone """ try: - # 使用重试机制获取节点 node = self._call_with_retry( func=lambda: self.client.graph.node.get(uuid_=entity_uuid), - operation_name=f"获取节点详情(uuid={entity_uuid[:8]}...)" + operation_name=f"(uuid={entity_uuid[:8]}...)" ) if not node: return None - # 获取节点的边 edges = self.get_node_edges(entity_uuid) - # 获取所有节点用于关联查找 all_nodes = self.get_all_nodes(graph_id) node_map = {n["uuid"]: n for n in all_nodes} - # 处理相关边和节点 related_edges = [] related_node_uuids = set() @@ -384,7 +365,6 @@ def get_entity_with_context( }) related_node_uuids.add(edge["source_node_uuid"]) - # 获取关联节点信息 related_nodes = [] for related_uuid in related_node_uuids: if related_uuid in node_map: @@ -407,7 +387,7 @@ def get_entity_with_context( ) except Exception as e: - logger.error(f"获取实体 {entity_uuid} 失败: {str(e)}") + logger.error(f"Entidade {entity_uuid} Falhou: {str(e)}") return None def get_entities_by_type( @@ -417,15 +397,15 @@ def get_entities_by_type( enrich_with_edges: bool = True ) -> List[EntityNode]: """ - 获取指定类型的所有实体 + Entidade Args: - graph_id: 图谱ID - entity_type: 实体类型(如 "Student", "PublicFigure" 等) - enrich_with_edges: 是否获取相关边信息 + graph_id: ID do grafo + entity_type: Entidade "Student", "PublicFigure" + enrich_with_edges: Returns: - 实体列表 + Entidade """ result = self.filter_defined_entities( graph_id=graph_id, diff --git a/backend/app/services/zep_graph_memory_updater.py b/backend/app/services/zep_graph_memory_updater.py index e034fee2b2..0d50f2cc9b 100644 --- a/backend/app/services/zep_graph_memory_updater.py +++ b/backend/app/services/zep_graph_memory_updater.py @@ -1,6 +1,6 @@ """ -Zep图谱记忆更新服务 -将模拟中的Agent活动动态更新到Zep图谱中 +ZepGrafo +SimulaçãoAgentZepGrafo """ import os @@ -23,7 +23,7 @@ @dataclass class AgentActivity: - """Agent活动记录""" + """Agent""" platform: str # twitter / reddit agent_id: int agent_name: str @@ -34,12 +34,12 @@ class AgentActivity: def to_episode_text(self) -> str: """ - 将活动转换为可以发送给Zep的文本描述 + Zep - 采用自然语言描述格式,让Zep能够从中提取实体和关系 - 不添加模拟相关的前缀,避免误导图谱更新 + ZepEntidadeRelacionamento + SimulaçãoGrafo """ - # 根据不同的动作类型生成不同的描述 + # Gerar action_descriptions = { "CREATE_POST": self._describe_create_post, "LIKE_POST": self._describe_like_post, @@ -58,222 +58,215 @@ def to_episode_text(self) -> str: describe_func = action_descriptions.get(self.action_type, self._describe_generic) description = describe_func() - # 直接返回 "agent名称: 活动描述" 格式,不添加模拟前缀 + # "agent: " Simulação return f"{self.agent_name}: {description}" def _describe_create_post(self) -> str: content = self.action_args.get("content", "") if content: - return f"发布了一条帖子:「{content}」" - return "发布了一条帖子" + return f"{content}" + return "" def _describe_like_post(self) -> str: - """点赞帖子 - 包含帖子原文和作者信息""" + """ - """ post_content = self.action_args.get("post_content", "") post_author = self.action_args.get("post_author_name", "") if post_content and post_author: - return f"点赞了{post_author}的帖子:「{post_content}」" + return f"{post_author}{post_content}" elif post_content: - return f"点赞了一条帖子:「{post_content}」" + return f"{post_content}" elif post_author: - return f"点赞了{post_author}的一条帖子" - return "点赞了一条帖子" + return f"{post_author}" + return "" def _describe_dislike_post(self) -> str: - """踩帖子 - 包含帖子原文和作者信息""" + """ - """ post_content = self.action_args.get("post_content", "") post_author = self.action_args.get("post_author_name", "") if post_content and post_author: - return f"踩了{post_author}的帖子:「{post_content}」" + return f"{post_author}{post_content}" elif post_content: - return f"踩了一条帖子:「{post_content}」" + return f"{post_content}" elif post_author: - return f"踩了{post_author}的一条帖子" - return "踩了一条帖子" + return f"{post_author}" + return "" def _describe_repost(self) -> str: - """转发帖子 - 包含原帖内容和作者信息""" + """ - Conteúdo""" original_content = self.action_args.get("original_content", "") original_author = self.action_args.get("original_author_name", "") if original_content and original_author: - return f"转发了{original_author}的帖子:「{original_content}」" + return f"{original_author}{original_content}" elif original_content: - return f"转发了一条帖子:「{original_content}」" + return f"{original_content}" elif original_author: - return f"转发了{original_author}的一条帖子" - return "转发了一条帖子" + return f"{original_author}" + return "" def _describe_quote_post(self) -> str: - """引用帖子 - 包含原帖内容、作者信息和引用评论""" + """ - Conteúdo""" original_content = self.action_args.get("original_content", "") original_author = self.action_args.get("original_author_name", "") quote_content = self.action_args.get("quote_content", "") or self.action_args.get("content", "") base = "" if original_content and original_author: - base = f"引用了{original_author}的帖子「{original_content}」" + base = f"{original_author}{original_content}" elif original_content: - base = f"引用了一条帖子「{original_content}」" + base = f"{original_content}" elif original_author: - base = f"引用了{original_author}的一条帖子" + base = f"{original_author}" else: - base = "引用了一条帖子" + base = "" if quote_content: - base += f",并评论道:「{quote_content}」" + base += f"{quote_content}" return base def _describe_follow(self) -> str: - """关注用户 - 包含被关注用户的名称""" + """ - """ target_user_name = self.action_args.get("target_user_name", "") if target_user_name: - return f"关注了用户「{target_user_name}」" - return "关注了一个用户" + return f"{target_user_name}" + return "" def _describe_create_comment(self) -> str: - """发表评论 - 包含评论内容和所评论的帖子信息""" + """ - Conteúdo""" content = self.action_args.get("content", "") post_content = self.action_args.get("post_content", "") post_author = self.action_args.get("post_author_name", "") if content: if post_content and post_author: - return f"在{post_author}的帖子「{post_content}」下评论道:「{content}」" + return f"{post_author}{post_content}{content}" elif post_content: - return f"在帖子「{post_content}」下评论道:「{content}」" + return f"{post_content}{content}" elif post_author: - return f"在{post_author}的帖子下评论道:「{content}」" - return f"评论道:「{content}」" - return "发表了评论" + return f"{post_author}{content}" + return f"{content}" + return "" def _describe_like_comment(self) -> str: - """点赞评论 - 包含评论内容和作者信息""" + """ - Conteúdo""" comment_content = self.action_args.get("comment_content", "") comment_author = self.action_args.get("comment_author_name", "") if comment_content and comment_author: - return f"点赞了{comment_author}的评论:「{comment_content}」" + return f"{comment_author}{comment_content}" elif comment_content: - return f"点赞了一条评论:「{comment_content}」" + return f"{comment_content}" elif comment_author: - return f"点赞了{comment_author}的一条评论" - return "点赞了一条评论" + return f"{comment_author}" + return "" def _describe_dislike_comment(self) -> str: - """踩评论 - 包含评论内容和作者信息""" + """ - Conteúdo""" comment_content = self.action_args.get("comment_content", "") comment_author = self.action_args.get("comment_author_name", "") if comment_content and comment_author: - return f"踩了{comment_author}的评论:「{comment_content}」" + return f"{comment_author}{comment_content}" elif comment_content: - return f"踩了一条评论:「{comment_content}」" + return f"{comment_content}" elif comment_author: - return f"踩了{comment_author}的一条评论" - return "踩了一条评论" + return f"{comment_author}" + return "" def _describe_search(self) -> str: - """搜索帖子 - 包含搜索关键词""" + """ - """ query = self.action_args.get("query", "") or self.action_args.get("keyword", "") - return f"搜索了「{query}」" if query else "进行了搜索" + return f"{query}" if query else "" def _describe_search_user(self) -> str: - """搜索用户 - 包含搜索关键词""" + """ - """ query = self.action_args.get("query", "") or self.action_args.get("username", "") - return f"搜索了用户「{query}」" if query else "搜索了用户" + return f"{query}" if query else "" def _describe_mute(self) -> str: - """屏蔽用户 - 包含被屏蔽用户的名称""" + """ - """ target_user_name = self.action_args.get("target_user_name", "") if target_user_name: - return f"屏蔽了用户「{target_user_name}」" - return "屏蔽了一个用户" + return f"{target_user_name}" + return "" def _describe_generic(self) -> str: - # 对于未知的动作类型,生成通用描述 - return f"执行了{self.action_type}操作" + # Gerar + return f"{self.action_type}" class ZepGraphMemoryUpdater: """ - Zep图谱记忆更新器 + ZepGrafo - 监控模拟的actions日志文件,将新的agent活动实时更新到Zep图谱中。 - 按平台分组,每累积BATCH_SIZE条活动后批量发送到Zep。 + SimulaçãoactionsagentZepGrafo + BATCH_SIZEZep - 所有有意义的行为都会被更新到Zep,action_args中会包含完整的上下文信息: - - 点赞/踩的帖子原文 - - 转发/引用的帖子原文 - - 关注/屏蔽的用户名 - - 点赞/踩的评论原文 + Zepaction_args + - / + - / + - / + - / """ - # 批量发送大小(每个平台累积多少条后发送) BATCH_SIZE = 5 - # 平台名称映射(用于控制台显示) PLATFORM_DISPLAY_NAMES = { - 'twitter': '世界1', - 'reddit': '世界2', + 'twitter': '1', + 'reddit': '2', } - # 发送间隔(秒),避免请求过快 SEND_INTERVAL = 0.5 - # 重试配置 + # Configuração MAX_RETRIES = 3 - RETRY_DELAY = 2 # 秒 + RETRY_DELAY = 2 # def __init__(self, graph_id: str, api_key: Optional[str] = None): """ - 初始化更新器 Args: - graph_id: Zep图谱ID - api_key: Zep API Key(可选,默认从配置读取) + graph_id: ZepID do grafo + api_key: Zep API KeyConfiguração """ self.graph_id = graph_id self.api_key = api_key or Config.ZEP_API_KEY if not self.api_key: - raise ValueError("ZEP_API_KEY未配置") + raise ValueError("ZEP_API_KEYConfiguração") self.client = Zep(api_key=self.api_key) - # 活动队列 self._activity_queue: Queue = Queue() - # 按平台分组的活动缓冲区(每个平台各自累积到BATCH_SIZE后批量发送) + # BATCH_SIZE self._platform_buffers: Dict[str, List[AgentActivity]] = { 'twitter': [], 'reddit': [], } self._buffer_lock = threading.Lock() - # 控制标志 self._running = False self._worker_thread: Optional[threading.Thread] = None - # 统计 - self._total_activities = 0 # 实际添加到队列的活动数 - self._total_sent = 0 # 成功发送到Zep的批次数 - self._total_items_sent = 0 # 成功发送到Zep的活动条数 - self._failed_count = 0 # 发送失败的批次数 - self._skipped_count = 0 # 被过滤跳过的活动数(DO_NOTHING) + self._total_activities = 0 # + self._total_sent = 0 # Zep + self._total_items_sent = 0 # Zep + self._failed_count = 0 # Falhou + self._skipped_count = 0 # DO_NOTHING - logger.info(f"ZepGraphMemoryUpdater 初始化完成: graph_id={graph_id}, batch_size={self.BATCH_SIZE}") + logger.info(f"ZepGraphMemoryUpdater : graph_id={graph_id}, batch_size={self.BATCH_SIZE}") def _get_platform_display_name(self, platform: str) -> str: - """获取平台的显示名称""" + """""" return self.PLATFORM_DISPLAY_NAMES.get(platform.lower(), platform) def start(self): - """启动后台工作线程""" + """""" if self._running: return @@ -288,19 +281,18 @@ def start(self): name=f"ZepMemoryUpdater-{self.graph_id[:8]}" ) self._worker_thread.start() - logger.info(f"ZepGraphMemoryUpdater 已启动: graph_id={self.graph_id}") + logger.info(f"ZepGraphMemoryUpdater : graph_id={self.graph_id}") def stop(self): - """停止后台工作线程""" + """""" self._running = False - # 发送剩余的活动 self._flush_remaining() if self._worker_thread and self._worker_thread.is_alive(): self._worker_thread.join(timeout=10) - logger.info(f"ZepGraphMemoryUpdater 已停止: graph_id={self.graph_id}, " + logger.info(f"ZepGraphMemoryUpdater : graph_id={self.graph_id}, " f"total_activities={self._total_activities}, " f"batches_sent={self._total_sent}, " f"items_sent={self._total_items_sent}, " @@ -309,43 +301,40 @@ def stop(self): def add_activity(self, activity: AgentActivity): """ - 添加一个agent活动到队列 - - 所有有意义的行为都会被添加到队列,包括: - - CREATE_POST(发帖) - - CREATE_COMMENT(评论) - - QUOTE_POST(引用帖子) - - SEARCH_POSTS(搜索帖子) - - SEARCH_USER(搜索用户) - - LIKE_POST/DISLIKE_POST(点赞/踩帖子) - - REPOST(转发) - - FOLLOW(关注) - - MUTE(屏蔽) - - LIKE_COMMENT/DISLIKE_COMMENT(点赞/踩评论) - - action_args中会包含完整的上下文信息(如帖子原文、用户名等)。 + agent + + - CREATE_POST + - CREATE_COMMENT + - QUOTE_POST + - SEARCH_POSTS + - SEARCH_USER + - LIKE_POST/DISLIKE_POST/ + - REPOST + - FOLLOW + - MUTE + - LIKE_COMMENT/DISLIKE_COMMENT/ + + action_args Args: - activity: Agent活动记录 + activity: Agent """ - # 跳过DO_NOTHING类型的活动 + # DO_NOTHING if activity.action_type == "DO_NOTHING": self._skipped_count += 1 return self._activity_queue.put(activity) self._total_activities += 1 - logger.debug(f"添加活动到Zep队列: {activity.agent_name} - {activity.action_type}") + logger.debug(f"Zep: {activity.agent_name} - {activity.action_type}") def add_activity_from_dict(self, data: Dict[str, Any], platform: str): """ - 从字典数据添加活动 Args: - data: 从actions.jsonl解析的字典数据 - platform: 平台名称 (twitter/reddit) + data: actions.jsonl + platform: (twitter/reddit) """ - # 跳过事件类型的条目 if "event_type" in data: return @@ -361,54 +350,47 @@ def add_activity_from_dict(self, data: Dict[str, Any], platform: str): self.add_activity(activity) - def _worker_loop(self, locale: str = 'zh'): - """后台工作循环 - 按平台批量发送活动到Zep""" + def _worker_loop(self, locale: str = 'pt'): + """ - Zep""" set_locale(locale) while self._running or not self._activity_queue.empty(): try: - # 尝试从队列获取活动(超时1秒) try: activity = self._activity_queue.get(timeout=1) - # 将活动添加到对应平台的缓冲区 platform = activity.platform.lower() with self._buffer_lock: if platform not in self._platform_buffers: self._platform_buffers[platform] = [] self._platform_buffers[platform].append(activity) - # 检查该平台是否达到批量大小 if len(self._platform_buffers[platform]) >= self.BATCH_SIZE: batch = self._platform_buffers[platform][:self.BATCH_SIZE] self._platform_buffers[platform] = self._platform_buffers[platform][self.BATCH_SIZE:] - # 释放锁后再发送 self._send_batch_activities(batch, platform) - # 发送间隔,避免请求过快 time.sleep(self.SEND_INTERVAL) except Empty: pass except Exception as e: - logger.error(f"工作循环异常: {e}") + logger.error(f": {e}") time.sleep(1) def _send_batch_activities(self, activities: List[AgentActivity], platform: str): """ - 批量发送活动到Zep图谱(合并为一条文本) + ZepGrafo Args: - activities: Agent活动列表 - platform: 平台名称 + activities: Agent + platform: """ if not activities: return - # 将多条活动合并为一条文本,用换行分隔 episode_texts = [activity.to_episode_text() for activity in activities] combined_text = "\n".join(episode_texts) - # 带重试的发送 for attempt in range(self.MAX_RETRIES): try: self.client.graph.add( @@ -420,21 +402,20 @@ def _send_batch_activities(self, activities: List[AgentActivity], platform: str) self._total_sent += 1 self._total_items_sent += len(activities) display_name = self._get_platform_display_name(platform) - logger.info(f"成功批量发送 {len(activities)} 条{display_name}活动到图谱 {self.graph_id}") - logger.debug(f"批量内容预览: {combined_text[:200]}...") + logger.info(f" {len(activities)} {display_name}Grafo {self.graph_id}") + logger.debug(f"Conteúdo: {combined_text[:200]}...") return except Exception as e: if attempt < self.MAX_RETRIES - 1: - logger.warning(f"批量发送到Zep失败 (尝试 {attempt + 1}/{self.MAX_RETRIES}): {e}") + logger.warning(f"ZepFalhou ( {attempt + 1}/{self.MAX_RETRIES}): {e}") time.sleep(self.RETRY_DELAY * (attempt + 1)) else: - logger.error(f"批量发送到Zep失败,已重试{self.MAX_RETRIES}次: {e}") + logger.error(f"ZepFalhou{self.MAX_RETRIES}: {e}") self._failed_count += 1 def _flush_remaining(self): - """发送队列和缓冲区中剩余的活动""" - # 首先处理队列中剩余的活动,添加到缓冲区 + """""" while not self._activity_queue.empty(): try: activity = self._activity_queue.get_nowait() @@ -446,41 +427,40 @@ def _flush_remaining(self): except Empty: break - # 然后发送各平台缓冲区中剩余的活动(即使不足BATCH_SIZE条) + # BATCH_SIZE with self._buffer_lock: for platform, buffer in self._platform_buffers.items(): if buffer: display_name = self._get_platform_display_name(platform) - logger.info(f"发送{display_name}平台剩余的 {len(buffer)} 条活动") + logger.info(f"{display_name} {len(buffer)} ") self._send_batch_activities(buffer, platform) - # 清空所有缓冲区 for platform in self._platform_buffers: self._platform_buffers[platform] = [] def get_stats(self) -> Dict[str, Any]: - """获取统计信息""" + """""" with self._buffer_lock: buffer_sizes = {p: len(b) for p, b in self._platform_buffers.items()} return { "graph_id": self.graph_id, "batch_size": self.BATCH_SIZE, - "total_activities": self._total_activities, # 添加到队列的活动总数 - "batches_sent": self._total_sent, # 成功发送的批次数 - "items_sent": self._total_items_sent, # 成功发送的活动条数 - "failed_count": self._failed_count, # 发送失败的批次数 - "skipped_count": self._skipped_count, # 被过滤跳过的活动数(DO_NOTHING) + "total_activities": self._total_activities, # + "batches_sent": self._total_sent, # + "items_sent": self._total_items_sent, # + "failed_count": self._failed_count, # Falhou + "skipped_count": self._skipped_count, # DO_NOTHING "queue_size": self._activity_queue.qsize(), - "buffer_sizes": buffer_sizes, # 各平台缓冲区大小 + "buffer_sizes": buffer_sizes, # "running": self._running, } class ZepGraphMemoryManager: """ - 管理多个模拟的Zep图谱记忆更新器 + SimulaçãoZepGrafo - 每个模拟可以有自己的更新器实例 + Simulação """ _updaters: Dict[str, ZepGraphMemoryUpdater] = {} @@ -489,17 +469,16 @@ class ZepGraphMemoryManager: @classmethod def create_updater(cls, simulation_id: str, graph_id: str) -> ZepGraphMemoryUpdater: """ - 为模拟创建图谱记忆更新器 + SimulaçãoGrafo Args: - simulation_id: 模拟ID - graph_id: Zep图谱ID + simulation_id: ID da simulação + graph_id: ZepID do grafo Returns: - ZepGraphMemoryUpdater实例 + ZepGraphMemoryUpdater """ with cls._lock: - # 如果已存在,先停止旧的 if simulation_id in cls._updaters: cls._updaters[simulation_id].stop() @@ -507,30 +486,29 @@ def create_updater(cls, simulation_id: str, graph_id: str) -> ZepGraphMemoryUpda updater.start() cls._updaters[simulation_id] = updater - logger.info(f"创建图谱记忆更新器: simulation_id={simulation_id}, graph_id={graph_id}") + logger.info(f"Grafo: simulation_id={simulation_id}, graph_id={graph_id}") return updater @classmethod def get_updater(cls, simulation_id: str) -> Optional[ZepGraphMemoryUpdater]: - """获取模拟的更新器""" + """Simulação""" return cls._updaters.get(simulation_id) @classmethod def stop_updater(cls, simulation_id: str): - """停止并移除模拟的更新器""" + """Simulação""" with cls._lock: if simulation_id in cls._updaters: cls._updaters[simulation_id].stop() del cls._updaters[simulation_id] - logger.info(f"已停止图谱记忆更新器: simulation_id={simulation_id}") + logger.info(f"Grafo: simulation_id={simulation_id}") - # 防止 stop_all 重复调用的标志 + # stop_all _stop_all_done = False @classmethod def stop_all(cls): - """停止所有更新器""" - # 防止重复调用 + """""" if cls._stop_all_done: return cls._stop_all_done = True @@ -541,13 +519,13 @@ def stop_all(cls): try: updater.stop() except Exception as e: - logger.error(f"停止更新器失败: simulation_id={simulation_id}, error={e}") + logger.error(f"Falhou: simulation_id={simulation_id}, error={e}") cls._updaters.clear() - logger.info("已停止所有图谱记忆更新器") + logger.info("Grafo") @classmethod def get_all_stats(cls) -> Dict[str, Dict[str, Any]]: - """获取所有更新器的统计信息""" + """""" return { sim_id: updater.get_stats() for sim_id, updater in cls._updaters.items() diff --git a/backend/app/services/zep_tools.py b/backend/app/services/zep_tools.py index 3bc8a57abb..b5a43b242e 100644 --- a/backend/app/services/zep_tools.py +++ b/backend/app/services/zep_tools.py @@ -1,11 +1,11 @@ """ -Zep检索工具服务 -封装图谱搜索、节点读取、边查询等工具,供Report Agent使用 +ZepBuscaFerramenta +GrafoFerramentaReport Agent -核心检索工具(优化后): -1. InsightForge(深度洞察检索)- 最强大的混合检索,自动生成子问题并多维度检索 -2. PanoramaSearch(广度搜索)- 获取全貌,包括过期内容 -3. QuickSearch(简单搜索)- 快速检索 +BuscaFerramenta +1. InsightForgeBusca- BuscaGerarBusca +2. PanoramaSearch- Conteúdo +3. QuickSearch- Busca """ import time @@ -26,7 +26,7 @@ @dataclass class SearchResult: - """搜索结果""" + """Resultado""" facts: List[str] edges: List[Dict[str, Any]] nodes: List[Dict[str, Any]] @@ -43,11 +43,11 @@ def to_dict(self) -> Dict[str, Any]: } def to_text(self) -> str: - """转换为文本格式,供LLM理解""" - text_parts = [f"搜索查询: {self.query}", f"找到 {self.total_count} 条相关信息"] + """LLM""" + text_parts = [f": {self.query}", f" {self.total_count} "] if self.facts: - text_parts.append("\n### 相关事实:") + text_parts.append("\n### :") for i, fact in enumerate(self.facts, 1): text_parts.append(f"{i}. {fact}") @@ -56,7 +56,7 @@ def to_text(self) -> str: @dataclass class NodeInfo: - """节点信息""" + """""" uuid: str name: str labels: List[str] @@ -73,14 +73,14 @@ def to_dict(self) -> Dict[str, Any]: } def to_text(self) -> str: - """转换为文本格式""" - entity_type = next((l for l in self.labels if l not in ["Entity", "Node"]), "未知类型") - return f"实体: {self.name} (类型: {entity_type})\n摘要: {self.summary}" + """""" + entity_type = next((l for l in self.labels if l not in ["Entity", "Node"]), "") + return f"Entidade: {self.name} (: {entity_type})\n: {self.summary}" @dataclass class EdgeInfo: - """边信息""" + """""" uuid: str name: str fact: str @@ -88,7 +88,6 @@ class EdgeInfo: target_node_uuid: str source_node_name: Optional[str] = None target_node_name: Optional[str] = None - # 时间信息 created_at: Optional[str] = None valid_at: Optional[str] = None invalid_at: Optional[str] = None @@ -110,47 +109,46 @@ def to_dict(self) -> Dict[str, Any]: } def to_text(self, include_temporal: bool = False) -> str: - """转换为文本格式""" + """""" source = self.source_node_name or self.source_node_uuid[:8] target = self.target_node_name or self.target_node_uuid[:8] - base_text = f"关系: {source} --[{self.name}]--> {target}\n事实: {self.fact}" + base_text = f"Relacionamento: {source} --[{self.name}]--> {target}\n: {self.fact}" if include_temporal: - valid_at = self.valid_at or "未知" - invalid_at = self.invalid_at or "至今" - base_text += f"\n时效: {valid_at} - {invalid_at}" + valid_at = self.valid_at or "" + invalid_at = self.invalid_at or "" + base_text += f"\n: {valid_at} - {invalid_at}" if self.expired_at: - base_text += f" (已过期: {self.expired_at})" + base_text += f" (: {self.expired_at})" return base_text @property def is_expired(self) -> bool: - """是否已过期""" + """""" return self.expired_at is not None @property def is_invalid(self) -> bool: - """是否已失效""" + """""" return self.invalid_at is not None @dataclass class InsightForgeResult: """ - 深度洞察检索结果 (InsightForge) - 包含多个子问题的检索结果,以及综合分析 + BuscaResultado (InsightForge) + BuscaResultadoAnálise """ query: str simulation_requirement: str sub_queries: List[str] - # 各维度检索结果 - semantic_facts: List[str] = field(default_factory=list) # 语义搜索结果 - entity_insights: List[Dict[str, Any]] = field(default_factory=list) # 实体洞察 - relationship_chains: List[str] = field(default_factory=list) # 关系链 + # BuscaResultado + semantic_facts: List[str] = field(default_factory=list) # Resultado + entity_insights: List[Dict[str, Any]] = field(default_factory=list) # Entidade + relationship_chains: List[str] = field(default_factory=list) # Relacionamento - # 统计信息 total_facts: int = 0 total_entities: int = 0 total_relationships: int = 0 @@ -169,42 +167,41 @@ def to_dict(self) -> Dict[str, Any]: } def to_text(self) -> str: - """转换为详细的文本格式,供LLM理解""" + """LLM""" text_parts = [ - f"## 未来预测深度分析", - f"分析问题: {self.query}", - f"预测场景: {self.simulation_requirement}", - f"\n### 预测数据统计", - f"- 相关预测事实: {self.total_facts}条", - f"- 涉及实体: {self.total_entities}个", - f"- 关系链: {self.total_relationships}条" + f"## PrevisãoAnálise", + f"Análise: {self.query}", + f"Previsão: {self.simulation_requirement}", + f"\n### Previsão", + f"- Previsão: {self.total_facts}", + f"- Entidade: {self.total_entities}", + f"- Relacionamento: {self.total_relationships}" ] - # 子问题 if self.sub_queries: - text_parts.append(f"\n### 分析的子问题") + text_parts.append(f"\n### Análise") for i, sq in enumerate(self.sub_queries, 1): text_parts.append(f"{i}. {sq}") - # 语义搜索结果 + # Resultado if self.semantic_facts: - text_parts.append(f"\n### 【关键事实】(请在报告中引用这些原文)") + text_parts.append(f"\n### (Relatório)") for i, fact in enumerate(self.semantic_facts, 1): text_parts.append(f"{i}. \"{fact}\"") - # 实体洞察 + # Entidade if self.entity_insights: - text_parts.append(f"\n### 【核心实体】") + text_parts.append(f"\n### Entidade") for entity in self.entity_insights: - text_parts.append(f"- **{entity.get('name', '未知')}** ({entity.get('type', '实体')})") + text_parts.append(f"- **{entity.get('name', '')}** ({entity.get('type', 'Entidade')})") if entity.get('summary'): - text_parts.append(f" 摘要: \"{entity.get('summary')}\"") + text_parts.append(f" : \"{entity.get('summary')}\"") if entity.get('related_facts'): - text_parts.append(f" 相关事实: {len(entity.get('related_facts', []))}条") + text_parts.append(f" : {len(entity.get('related_facts', []))}") - # 关系链 + # Relacionamento if self.relationship_chains: - text_parts.append(f"\n### 【关系链】") + text_parts.append(f"\n### Relacionamento") for chain in self.relationship_chains: text_parts.append(f"- {chain}") @@ -214,21 +211,16 @@ def to_text(self) -> str: @dataclass class PanoramaResult: """ - 广度搜索结果 (Panorama) - 包含所有相关信息,包括过期内容 + Resultado (Panorama) + Conteúdo """ query: str - # 全部节点 all_nodes: List[NodeInfo] = field(default_factory=list) - # 全部边(包括过期的) all_edges: List[EdgeInfo] = field(default_factory=list) - # 当前有效的事实 active_facts: List[str] = field(default_factory=list) - # 已过期/失效的事实(历史记录) historical_facts: List[str] = field(default_factory=list) - # 统计 total_nodes: int = 0 total_edges: int = 0 active_count: int = 0 @@ -248,34 +240,32 @@ def to_dict(self) -> Dict[str, Any]: } def to_text(self) -> str: - """转换为文本格式(完整版本,不截断)""" + """""" text_parts = [ - f"## 广度搜索结果(未来全景视图)", - f"查询: {self.query}", - f"\n### 统计信息", - f"- 总节点数: {self.total_nodes}", - f"- 总边数: {self.total_edges}", - f"- 当前有效事实: {self.active_count}条", - f"- 历史/过期事实: {self.historical_count}条" + f"## Resultado", + f": {self.query}", + f"\n### ", + f"- : {self.total_nodes}", + f"- : {self.total_edges}", + f"- : {self.active_count}", + f"- /: {self.historical_count}" ] - # 当前有效的事实(完整输出,不截断) if self.active_facts: - text_parts.append(f"\n### 【当前有效事实】(模拟结果原文)") + text_parts.append(f"\n### (SimulaçãoResultado)") for i, fact in enumerate(self.active_facts, 1): text_parts.append(f"{i}. \"{fact}\"") - # 历史/过期事实(完整输出,不截断) if self.historical_facts: - text_parts.append(f"\n### 【历史/过期事实】(演变过程记录)") + text_parts.append(f"\n### /()") for i, fact in enumerate(self.historical_facts, 1): text_parts.append(f"{i}. \"{fact}\"") - # 关键实体(完整输出,不截断) + # Entidade if self.all_nodes: - text_parts.append(f"\n### 【涉及实体】") + text_parts.append(f"\n### Entidade") for node in self.all_nodes: - entity_type = next((l for l in node.labels if l not in ["Entity", "Node"]), "实体") + entity_type = next((l for l in node.labels if l not in ["Entity", "Node"]), "Entidade") text_parts.append(f"- **{node.name}** ({entity_type})") return "\n".join(text_parts) @@ -283,13 +273,13 @@ def to_text(self) -> str: @dataclass class AgentInterview: - """单个Agent的采访结果""" + """AgentResultado""" agent_name: str - agent_role: str # 角色类型(如:学生、教师、媒体等) - agent_bio: str # 简介 - question: str # 采访问题 - response: str # 采访回答 - key_quotes: List[str] = field(default_factory=list) # 关键引言 + agent_role: str # + agent_bio: str # + question: str # + response: str # + key_quotes: List[str] = field(default_factory=list) # def to_dict(self) -> Dict[str, Any]: return { @@ -303,21 +293,19 @@ def to_dict(self) -> Dict[str, Any]: def to_text(self) -> str: text = f"**{self.agent_name}** ({self.agent_role})\n" - # 显示完整的agent_bio,不截断 - text += f"_简介: {self.agent_bio}_\n\n" + # agent_bio + text += f"_: {self.agent_bio}_\n\n" text += f"**Q:** {self.question}\n\n" text += f"**A:** {self.response}\n" if self.key_quotes: - text += "\n**关键引言:**\n" + text += "\n**:**\n" for quote in self.key_quotes: - # 清理各种引号 clean_quote = quote.replace('\u201c', '').replace('\u201d', '').replace('"', '') clean_quote = clean_quote.replace('\u300c', '').replace('\u300d', '') clean_quote = clean_quote.strip() - # 去掉开头的标点 while clean_quote and clean_quote[0] in ',,;;::、。!?\n\r\t ': clean_quote = clean_quote[1:] - # 过滤包含问题编号的垃圾内容(问题1-9) + # Conteúdo1-9 skip = False for d in '123456789': if f'\u95ee\u9898{d}' in clean_quote: @@ -325,7 +313,7 @@ def to_text(self) -> str: break if skip: continue - # 截断过长内容(按句号截断,而非硬截断) + # Conteúdo if len(clean_quote) > 150: dot_pos = clean_quote.find('\u3002', 80) if dot_pos > 0: @@ -340,23 +328,21 @@ def to_text(self) -> str: @dataclass class InterviewResult: """ - 采访结果 (Interview) - 包含多个模拟Agent的采访回答 + Resultado (Interview) + SimulaçãoAgent """ - interview_topic: str # 采访主题 - interview_questions: List[str] # 采访问题列表 + interview_topic: str # + interview_questions: List[str] # - # 采访选择的Agent + # Agent selected_agents: List[Dict[str, Any]] = field(default_factory=list) - # 各Agent的采访回答 + # Agent interviews: List[AgentInterview] = field(default_factory=list) - # 选择Agent的理由 + # Agent selection_reasoning: str = "" - # 整合后的采访摘要 summary: str = "" - # 统计 total_agents: int = 0 interviewed_count: int = 0 @@ -373,74 +359,74 @@ def to_dict(self) -> Dict[str, Any]: } def to_text(self) -> str: - """转换为详细的文本格式,供LLM理解和报告引用""" + """LLMRelatório""" text_parts = [ - "## 深度采访报告", - f"**采访主题:** {self.interview_topic}", - f"**采访人数:** {self.interviewed_count} / {self.total_agents} 位模拟Agent", - "\n### 采访对象选择理由", - self.selection_reasoning or "(自动选择)", + "## Relatório", + f"**:** {self.interview_topic}", + f"**:** {self.interviewed_count} / {self.total_agents} SimulaçãoAgent", + "\n### ", + self.selection_reasoning or "", "\n---", - "\n### 采访实录", + "\n### ", ] if self.interviews: for i, interview in enumerate(self.interviews, 1): - text_parts.append(f"\n#### 采访 #{i}: {interview.agent_name}") + text_parts.append(f"\n#### #{i}: {interview.agent_name}") text_parts.append(interview.to_text()) text_parts.append("\n---") else: - text_parts.append("(无采访记录)\n\n---") + text_parts.append("\n\n---") - text_parts.append("\n### 采访摘要与核心观点") - text_parts.append(self.summary or "(无摘要)") + text_parts.append("\n### ") + text_parts.append(self.summary or "") return "\n".join(text_parts) class ZepToolsService: """ - Zep检索工具服务 + ZepBuscaFerramenta - 【核心检索工具 - 优化后】 - 1. insight_forge - 深度洞察检索(最强大,自动生成子问题,多维度检索) - 2. panorama_search - 广度搜索(获取全貌,包括过期内容) - 3. quick_search - 简单搜索(快速检索) - 4. interview_agents - 深度采访(采访模拟Agent,获取多视角观点) + BuscaFerramenta - + 1. insight_forge - BuscaGerarBusca + 2. panorama_search - Conteúdo + 3. quick_search - Busca + 4. interview_agents - SimulaçãoAgent - 【基础工具】 - - search_graph - 图谱语义搜索 - - get_all_nodes - 获取图谱所有节点 - - get_all_edges - 获取图谱所有边(含时间信息) - - get_node_detail - 获取节点详细信息 - - get_node_edges - 获取节点相关的边 - - get_entities_by_type - 按类型获取实体 - - get_entity_summary - 获取实体的关系摘要 + Ferramenta + - search_graph - Grafo + - get_all_nodes - Grafo + - get_all_edges - Grafo + - get_node_detail - + - get_node_edges - + - get_entities_by_type - Entidade + - get_entity_summary - EntidadeRelacionamento """ - # 重试配置 + # Configuração MAX_RETRIES = 3 RETRY_DELAY = 2.0 def __init__(self, api_key: Optional[str] = None, llm_client: Optional[LLMClient] = None): self.api_key = api_key or Config.ZEP_API_KEY if not self.api_key: - raise ValueError("ZEP_API_KEY 未配置") + raise ValueError("ZEP_API_KEY Configuração") self.client = Zep(api_key=self.api_key) - # LLM客户端用于InsightForge生成子问题 + # LLMInsightForgeGerar self._llm_client = llm_client logger.info(t("console.zepToolsInitialized")) @property def llm(self) -> LLMClient: - """延迟初始化LLM客户端""" + """LLM""" if self._llm_client is None: - self._llm_client = LLMClient() + self._llm_client = LLMClient(model='gpt-5.4') return self._llm_client def _call_with_retry(self, func, operation_name: str, max_retries: int = None): - """带重试机制的API调用""" + """API""" max_retries = max_retries or self.MAX_RETRIES last_exception = None delay = self.RETRY_DELAY @@ -469,23 +455,23 @@ def search_graph( scope: str = "edges" ) -> SearchResult: """ - 图谱语义搜索 + Grafo - 使用混合搜索(语义+BM25)在图谱中搜索相关信息。 - 如果Zep Cloud的search API不可用,则降级为本地关键词匹配。 + +BM25Grafo + Zep Cloudsearch API Args: - graph_id: 图谱ID (Standalone Graph) - query: 搜索查询 - limit: 返回结果数量 - scope: 搜索范围,"edges" 或 "nodes" + graph_id: ID do grafo (Standalone Graph) + query: + limit: Resultado + scope: "edges" "nodes" Returns: - SearchResult: 搜索结果 + SearchResult: Resultado """ logger.info(t("console.graphSearch", graphId=graph_id, query=query[:50])) - # 尝试使用Zep Cloud Search API + # Zep Cloud Search API try: search_results = self._call_with_retry( func=lambda: self.client.graph.search( @@ -502,7 +488,7 @@ def search_graph( edges = [] nodes = [] - # 解析边搜索结果 + # Resultado if hasattr(search_results, 'edges') and search_results.edges: for edge in search_results.edges: if hasattr(edge, 'fact') and edge.fact: @@ -515,7 +501,7 @@ def search_graph( "target_node_uuid": getattr(edge, 'target_node_uuid', ''), }) - # 解析节点搜索结果 + # Resultado if hasattr(search_results, 'nodes') and search_results.nodes: for node in search_results.nodes: nodes.append({ @@ -524,7 +510,6 @@ def search_graph( "labels": getattr(node, 'labels', []), "summary": getattr(node, 'summary', ''), }) - # 节点摘要也算作事实 if hasattr(node, 'summary') and node.summary: facts.append(f"[{node.name}]: {node.summary}") @@ -540,7 +525,6 @@ def search_graph( except Exception as e: logger.warning(t("console.zepSearchApiFallback", error=str(e))) - # 降级:使用本地关键词匹配搜索 return self._local_search(graph_id, query, limit, scope) def _local_search( @@ -551,18 +535,18 @@ def _local_search( scope: str = "edges" ) -> SearchResult: """ - 本地关键词匹配搜索(作为Zep Search API的降级方案) + Zep Search API - 获取所有边/节点,然后在本地进行关键词匹配 + / Args: - graph_id: 图谱ID - query: 搜索查询 - limit: 返回结果数量 - scope: 搜索范围 + graph_id: ID do grafo + query: + limit: Resultado + scope: Returns: - SearchResult: 搜索结果 + SearchResult: Resultado """ logger.info(t("console.usingLocalSearch", query=query[:30])) @@ -570,19 +554,16 @@ def _local_search( edges_result = [] nodes_result = [] - # 提取查询关键词(简单分词) query_lower = query.lower() keywords = [w.strip() for w in query_lower.replace(',', ' ').replace(',', ' ').split() if len(w.strip()) > 1] def match_score(text: str) -> int: - """计算文本与查询的匹配分数""" + """""" if not text: return 0 text_lower = text.lower() - # 完全匹配查询 if query_lower in text_lower: return 100 - # 关键词匹配 score = 0 for keyword in keywords: if keyword in text_lower: @@ -591,7 +572,6 @@ def match_score(text: str) -> int: try: if scope in ["edges", "both"]: - # 获取所有边并匹配 all_edges = self.get_all_edges(graph_id) scored_edges = [] for edge in all_edges: @@ -599,7 +579,6 @@ def match_score(text: str) -> int: if score > 0: scored_edges.append((score, edge)) - # 按分数排序 scored_edges.sort(key=lambda x: x[0], reverse=True) for score, edge in scored_edges[:limit]: @@ -614,7 +593,6 @@ def match_score(text: str) -> int: }) if scope in ["nodes", "both"]: - # 获取所有节点并匹配 all_nodes = self.get_all_nodes(graph_id) scored_nodes = [] for node in all_nodes: @@ -649,13 +627,12 @@ def match_score(text: str) -> int: def get_all_nodes(self, graph_id: str) -> List[NodeInfo]: """ - 获取图谱的所有节点(分页获取) + Grafo Args: - graph_id: 图谱ID + graph_id: ID do grafo Returns: - 节点列表 """ logger.info(t("console.fetchingAllNodes", graphId=graph_id)) @@ -677,14 +654,14 @@ def get_all_nodes(self, graph_id: str) -> List[NodeInfo]: def get_all_edges(self, graph_id: str, include_temporal: bool = True) -> List[EdgeInfo]: """ - 获取图谱的所有边(分页获取,包含时间信息) + Grafo Args: - graph_id: 图谱ID - include_temporal: 是否包含时间信息(默认True) + graph_id: ID do grafo + include_temporal: True Returns: - 边列表(包含created_at, valid_at, invalid_at, expired_at) + created_at, valid_at, invalid_at, expired_at """ logger.info(t("console.fetchingAllEdges", graphId=graph_id)) @@ -701,7 +678,6 @@ def get_all_edges(self, graph_id: str, include_temporal: bool = True) -> List[Ed target_node_uuid=edge.target_node_uuid or "" ) - # 添加时间信息 if include_temporal: edge_info.created_at = getattr(edge, 'created_at', None) edge_info.valid_at = getattr(edge, 'valid_at', None) @@ -715,13 +691,12 @@ def get_all_edges(self, graph_id: str, include_temporal: bool = True) -> List[Ed def get_node_detail(self, node_uuid: str) -> Optional[NodeInfo]: """ - 获取单个节点的详细信息 Args: - node_uuid: 节点UUID + node_uuid: UUID Returns: - 节点信息或None + None """ logger.info(t("console.fetchingNodeDetail", uuid=node_uuid[:8])) @@ -747,26 +722,23 @@ def get_node_detail(self, node_uuid: str) -> Optional[NodeInfo]: def get_node_edges(self, graph_id: str, node_uuid: str) -> List[EdgeInfo]: """ - 获取节点相关的所有边 - 通过获取图谱所有边,然后过滤出与指定节点相关的边 + Grafo Args: - graph_id: 图谱ID - node_uuid: 节点UUID + graph_id: ID do grafo + node_uuid: UUID Returns: - 边列表 """ logger.info(t("console.fetchingNodeEdges", uuid=node_uuid[:8])) try: - # 获取图谱所有边,然后过滤 + # Grafo all_edges = self.get_all_edges(graph_id) result = [] for edge in all_edges: - # 检查边是否与指定节点相关(作为源或目标) if edge.source_node_uuid == node_uuid or edge.target_node_uuid == node_uuid: result.append(edge) @@ -783,14 +755,14 @@ def get_entities_by_type( entity_type: str ) -> List[NodeInfo]: """ - 按类型获取实体 + Entidade Args: - graph_id: 图谱ID - entity_type: 实体类型(如 Student, PublicFigure 等) + graph_id: ID do grafo + entity_type: Entidade Student, PublicFigure Returns: - 符合类型的实体列表 + Entidade """ logger.info(t("console.fetchingEntitiesByType", type=entity_type)) @@ -798,7 +770,7 @@ def get_entities_by_type( filtered = [] for node in all_nodes: - # 检查labels是否包含指定类型 + # labels if entity_type in node.labels: filtered.append(node) @@ -811,27 +783,27 @@ def get_entity_summary( entity_name: str ) -> Dict[str, Any]: """ - 获取指定实体的关系摘要 + EntidadeRelacionamento - 搜索与该实体相关的所有信息,并生成摘要 + EntidadeGerar Args: - graph_id: 图谱ID - entity_name: 实体名称 + graph_id: ID do grafo + entity_name: Entidade Returns: - 实体摘要信息 + Entidade """ logger.info(t("console.fetchingEntitySummary", name=entity_name)) - # 先搜索该实体相关的信息 + # Entidade search_result = self.search_graph( graph_id=graph_id, query=entity_name, limit=20 ) - # 尝试在所有节点中找到该实体 + # Entidade all_nodes = self.get_all_nodes(graph_id) entity_node = None for node in all_nodes: @@ -841,7 +813,7 @@ def get_entity_summary( related_edges = [] if entity_node: - # 传入graph_id参数 + # graph_id related_edges = self.get_node_edges(graph_id, entity_node.uuid) return { @@ -854,27 +826,26 @@ def get_entity_summary( def get_graph_statistics(self, graph_id: str) -> Dict[str, Any]: """ - 获取图谱的统计信息 + Grafo Args: - graph_id: 图谱ID + graph_id: ID do grafo Returns: - 统计信息 """ logger.info(t("console.fetchingGraphStats", graphId=graph_id)) nodes = self.get_all_nodes(graph_id) edges = self.get_all_edges(graph_id) - # 统计实体类型分布 + # Entidade entity_types = {} for node in nodes: for label in node.labels: if label not in ["Entity", "Node"]: entity_types[label] = entity_types.get(label, 0) + 1 - # 统计关系类型分布 + # Relacionamento relation_types = {} for edge in edges: relation_types[edge.name] = relation_types.get(edge.name, 0) + 1 @@ -894,34 +865,34 @@ def get_simulation_context( limit: int = 30 ) -> Dict[str, Any]: """ - 获取模拟相关的上下文信息 + Simulação - 综合搜索与模拟需求相关的所有信息 + Simulação Args: - graph_id: 图谱ID - simulation_requirement: 模拟需求描述 - limit: 每类信息的数量限制 + graph_id: ID do grafo + simulation_requirement: Descrição dos requisitos da simulação + limit: Returns: - 模拟上下文信息 + Simulação """ logger.info(t("console.fetchingSimContext", requirement=simulation_requirement[:50])) - # 搜索与模拟需求相关的信息 + # Simulação search_result = self.search_graph( graph_id=graph_id, query=simulation_requirement, limit=limit ) - # 获取图谱统计 + # Grafo stats = self.get_graph_statistics(graph_id) - # 获取所有实体节点 + # Entidade all_nodes = self.get_all_nodes(graph_id) - # 筛选有实际类型的实体(非纯Entity节点) + # EntidadeEntity entities = [] for node in all_nodes: custom_labels = [l for l in node.labels if l not in ["Entity", "Node"]] @@ -936,11 +907,11 @@ def get_simulation_context( "simulation_requirement": simulation_requirement, "related_facts": search_result.facts, "graph_statistics": stats, - "entities": entities[:limit], # 限制数量 + "entities": entities[:limit], # "total_entities": len(entities) } - # ========== 核心检索工具(优化后) ========== + # ========== BuscaFerramenta ========== def insight_forge( self, @@ -951,24 +922,24 @@ def insight_forge( max_sub_queries: int = 5 ) -> InsightForgeResult: """ - 【InsightForge - 深度洞察检索】 + InsightForge - Busca - 最强大的混合检索函数,自动分解问题并多维度检索: - 1. 使用LLM将问题分解为多个子问题 - 2. 对每个子问题进行语义搜索 - 3. 提取相关实体并获取其详细信息 - 4. 追踪关系链 - 5. 整合所有结果,生成深度洞察 + BuscaBusca + 1. LLM + 2. + 3. Entidade + 4. Relacionamento + 5. ResultadoGerar Args: - graph_id: 图谱ID - query: 用户问题 - simulation_requirement: 模拟需求描述 - report_context: 报告上下文(可选,用于更精准的子问题生成) - max_sub_queries: 最大子问题数量 + graph_id: ID do grafo + query: + simulation_requirement: Descrição dos requisitos da simulação + report_context: RelatórioGerar + max_sub_queries: Returns: - InsightForgeResult: 深度洞察检索结果 + InsightForgeResult: BuscaResultado """ logger.info(t("console.insightForgeStart", query=query[:50])) @@ -978,7 +949,7 @@ def insight_forge( sub_queries=[] ) - # Step 1: 使用LLM生成子问题 + # Step 1: LLMGerar sub_queries = self._generate_sub_queries( query=query, simulation_requirement=simulation_requirement, @@ -988,7 +959,7 @@ def insight_forge( result.sub_queries = sub_queries logger.info(t("console.generatedSubQueries", count=len(sub_queries))) - # Step 2: 对每个子问题进行语义搜索 + # Step 2: all_facts = [] all_edges = [] seen_facts = set() @@ -1008,7 +979,6 @@ def insight_forge( all_edges.extend(search_result.edges) - # 对原始问题也进行搜索 main_search = self.search_graph( graph_id=graph_id, query=query, @@ -1023,7 +993,7 @@ def insight_forge( result.semantic_facts = all_facts result.total_facts = len(all_facts) - # Step 3: 从边中提取相关实体UUID,只获取这些实体的信息(不获取全部节点) + # Step 3: EntidadeUUIDEntidade entity_uuids = set() for edge_data in all_edges: if isinstance(edge_data, dict): @@ -1034,21 +1004,20 @@ def insight_forge( if target_uuid: entity_uuids.add(target_uuid) - # 获取所有相关实体的详情(不限制数量,完整输出) + # Entidade entity_insights = [] - node_map = {} # 用于后续关系链构建 + node_map = {} # Relacionamento - for uuid in list(entity_uuids): # 处理所有实体,不截断 + for uuid in list(entity_uuids): # Entidade if not uuid: continue try: - # 单独获取每个相关节点的信息 node = self.get_node_detail(uuid) if node: node_map[uuid] = node - entity_type = next((l for l in node.labels if l not in ["Entity", "Node"]), "实体") + entity_type = next((l for l in node.labels if l not in ["Entity", "Node"]), "Entidade") - # 获取该实体相关的所有事实(不截断) + # Entidade related_facts = [ f for f in all_facts if node.name.lower() in f.lower() @@ -1059,18 +1028,18 @@ def insight_forge( "name": node.name, "type": entity_type, "summary": node.summary, - "related_facts": related_facts # 完整输出,不截断 + "related_facts": related_facts # }) except Exception as e: - logger.debug(f"获取节点 {uuid} 失败: {e}") + logger.debug(f" {uuid} Falhou: {e}") continue result.entity_insights = entity_insights result.total_entities = len(entity_insights) - # Step 4: 构建所有关系链(不限制数量) + # Step 4: Relacionamento relationship_chains = [] - for edge_data in all_edges: # 处理所有边,不截断 + for edge_data in all_edges: # if isinstance(edge_data, dict): source_uuid = edge_data.get('source_node_uuid', '') target_uuid = edge_data.get('target_node_uuid', '') @@ -1097,27 +1066,29 @@ def _generate_sub_queries( max_queries: int = 5 ) -> List[str]: """ - 使用LLM生成子问题 + LLMGerar - 将复杂问题分解为多个可以独立检索的子问题 + Busca """ - system_prompt = """你是一个专业的问题分析专家。你的任务是将一个复杂问题分解为多个可以在模拟世界中独立观察的子问题。 + system_prompt = """Você é um especialista em análise de simulações de opinião pública. +Sua tarefa é decompor uma consulta complexa em sub-consultas mais específicas para buscar informações no grafo de conhecimento. -要求: -1. 每个子问题应该足够具体,可以在模拟世界中找到相关的Agent行为或事件 -2. 子问题应该覆盖原问题的不同维度(如:谁、什么、为什么、怎么样、何时、何地) -3. 子问题应该与模拟场景相关 -4. 返回JSON格式:{"sub_queries": ["子问题1", "子问题2", ...]}""" +REGRAS: +1. Considere diferentes ângulos: agentes, comportamentos, tendências, métricas +2. Cada sub-consulta deve focar em um aspecto específico +3. Mantenha relevância com o contexto da simulação +4. Retorne JSON no formato: {"sub_queries": ["consulta 1", "consulta 2", ...]} +5. Escreva TUDO em português do Brasil""" - user_prompt = f"""模拟需求背景: + user_prompt = f"""Contexto da simulação: {simulation_requirement} -{f"报告上下文:{report_context[:500]}" if report_context else ""} +{f"Contexto do relatório: {report_context[:500]}" if report_context else ""} -请将以下问题分解为{max_queries}个子问题: +Gere no máximo {max_queries} sub-consultas para: {query} -返回JSON格式的子问题列表。""" +Retorne JSON.""" try: response = self.llm.chat_json( @@ -1129,17 +1100,15 @@ def _generate_sub_queries( ) sub_queries = response.get("sub_queries", []) - # 确保是字符串列表 return [str(sq) for sq in sub_queries[:max_queries]] except Exception as e: logger.warning(t("console.generateSubQueriesFailed", error=str(e))) - # 降级:返回基于原问题的变体 return [ query, - f"{query} 的主要参与者", - f"{query} 的原因和影响", - f"{query} 的发展过程" + f"{query} ", + f"{query} ", + f"{query} " ][:max_queries] def panorama_search( @@ -1150,40 +1119,37 @@ def panorama_search( limit: int = 50 ) -> PanoramaResult: """ - 【PanoramaSearch - 广度搜索】 + PanoramaSearch - - 获取全貌视图,包括所有相关内容和历史/过期信息: - 1. 获取所有相关节点 - 2. 获取所有边(包括已过期/失效的) - 3. 分类整理当前有效和历史信息 + Conteúdo/ + 1. + 2. / + 3. - 这个工具适用于需要了解事件全貌、追踪演变过程的场景。 + Ferramenta Args: - graph_id: 图谱ID - query: 搜索查询(用于相关性排序) - include_expired: 是否包含过期内容(默认True) - limit: 返回结果数量限制 + graph_id: ID do grafo + query: + include_expired: ConteúdoTrue + limit: Resultado Returns: - PanoramaResult: 广度搜索结果 + PanoramaResult: Resultado """ logger.info(t("console.panoramaSearchStart", query=query[:50])) result = PanoramaResult(query=query) - # 获取所有节点 all_nodes = self.get_all_nodes(graph_id) node_map = {n.uuid: n for n in all_nodes} result.all_nodes = all_nodes result.total_nodes = len(all_nodes) - # 获取所有边(包含时间信息) all_edges = self.get_all_edges(graph_id, include_temporal=True) result.all_edges = all_edges result.total_edges = len(all_edges) - # 分类事实 active_facts = [] historical_facts = [] @@ -1191,24 +1157,20 @@ def panorama_search( if not edge.fact: continue - # 为事实添加实体名称 + # Entidade source_name = node_map.get(edge.source_node_uuid, NodeInfo('', '', [], '', {})).name or edge.source_node_uuid[:8] target_name = node_map.get(edge.target_node_uuid, NodeInfo('', '', [], '', {})).name or edge.target_node_uuid[:8] - # 判断是否过期/失效 is_historical = edge.is_expired or edge.is_invalid if is_historical: - # 历史/过期事实,添加时间标记 - valid_at = edge.valid_at or "未知" - invalid_at = edge.invalid_at or edge.expired_at or "未知" + valid_at = edge.valid_at or "" + invalid_at = edge.invalid_at or edge.expired_at or "" fact_with_time = f"[{valid_at} - {invalid_at}] {edge.fact}" historical_facts.append(fact_with_time) else: - # 当前有效事实 active_facts.append(edge.fact) - # 基于查询进行相关性排序 query_lower = query.lower() keywords = [w.strip() for w in query_lower.replace(',', ' ').replace(',', ' ').split() if len(w.strip()) > 1] @@ -1222,7 +1184,6 @@ def relevance_score(fact: str) -> int: score += 10 return score - # 排序并限制数量 active_facts.sort(key=relevance_score, reverse=True) historical_facts.sort(key=relevance_score, reverse=True) @@ -1241,24 +1202,24 @@ def quick_search( limit: int = 10 ) -> SearchResult: """ - 【QuickSearch - 简单搜索】 + QuickSearch - - 快速、轻量级的检索工具: - 1. 直接调用Zep语义搜索 - 2. 返回最相关的结果 - 3. 适用于简单、直接的检索需求 + BuscaFerramenta + 1. Zep + 2. Resultado + 3. Busca Args: - graph_id: 图谱ID - query: 搜索查询 - limit: 返回结果数量 + graph_id: ID do grafo + query: + limit: Resultado Returns: - SearchResult: 搜索结果 + SearchResult: Resultado """ logger.info(t("console.quickSearchStart", query=query[:50])) - # 直接调用现有的search_graph方法 + # search_graph result = self.search_graph( graph_id=graph_id, query=query, @@ -1278,31 +1239,30 @@ def interview_agents( custom_questions: List[str] = None ) -> InterviewResult: """ - 【InterviewAgents - 深度采访】 + InterviewAgents - - 调用真实的OASIS采访API,采访模拟中正在运行的Agent: - 1. 自动读取人设文件,了解所有模拟Agent - 2. 使用LLM分析采访需求,智能选择最相关的Agent - 3. 使用LLM生成采访问题 - 4. 调用 /api/simulation/interview/batch 接口进行真实采访(双平台同时采访) - 5. 整合所有采访结果,生成采访报告 + OASISAPISimulaçãoAgent + 1. SimulaçãoAgent + 2. LLMAnáliseAgent + 3. LLMGerar + 4. /api/simulation/interview/batch + 5. ResultadoGerarRelatório - 【重要】此功能需要模拟环境处于运行状态(OASIS环境未关闭) + SimulaçãoOASIS - 【使用场景】 - - 需要从不同角色视角了解事件看法 - - 需要收集多方意见和观点 - - 需要获取模拟Agent的真实回答(非LLM模拟) + - + - + - SimulaçãoAgentLLMSimulação Args: - simulation_id: 模拟ID(用于定位人设文件和调用采访API) - interview_requirement: 采访需求描述(非结构化,如"了解学生对事件的看法") - simulation_requirement: 模拟需求背景(可选) - max_agents: 最多采访的Agent数量 - custom_questions: 自定义采访问题(可选,若不提供则自动生成) + simulation_id: ID da simulaçãoAPI + interview_requirement: "" + simulation_requirement: Simulação + max_agents: Agent + custom_questions: Gerar Returns: - InterviewResult: 采访结果 + InterviewResult: Resultado """ from .simulation_runner import SimulationRunner @@ -1313,18 +1273,18 @@ def interview_agents( interview_questions=custom_questions or [] ) - # Step 1: 读取人设文件 + # Step 1: profiles = self._load_agent_profiles(simulation_id) if not profiles: logger.warning(t("console.profilesNotFound", simId=simulation_id)) - result.summary = "未找到可采访的Agent人设文件" + result.summary = "Agent" return result result.total_agents = len(profiles) logger.info(t("console.loadedProfiles", count=len(profiles))) - # Step 2: 使用LLM选择要采访的Agent(返回agent_id列表) + # Step 2: LLMAgentagent_id selected_agents, selected_indices, selection_reasoning = self._select_agents_for_interview( profiles=profiles, interview_requirement=interview_requirement, @@ -1336,7 +1296,7 @@ def interview_agents( result.selection_reasoning = selection_reasoning logger.info(t("console.selectedAgentsForInterview", count=len(selected_agents), indices=selected_indices)) - # Step 3: 生成采访问题(如果没有提供) + # Step 3: Gerar if not result.interview_questions: result.interview_questions = self._generate_interview_questions( interview_requirement=interview_requirement, @@ -1345,103 +1305,101 @@ def interview_agents( ) logger.info(t("console.generatedInterviewQuestions", count=len(result.interview_questions))) - # 将问题合并为一个采访prompt + # prompt combined_prompt = "\n".join([f"{i+1}. {q}" for i, q in enumerate(result.interview_questions)]) - # 添加优化前缀,约束Agent回复格式 + # Agent INTERVIEW_PROMPT_PREFIX = ( - "你正在接受一次采访。请结合你的人设、所有的过往记忆与行动," - "以纯文本方式直接回答以下问题。\n" - "回复要求:\n" - "1. 直接用自然语言回答,不要调用任何工具\n" - "2. 不要返回JSON格式或工具调用格式\n" - "3. 不要使用Markdown标题(如#、##、###)\n" - "4. 按问题编号逐一回答,每个回答以「问题X:」开头(X为问题编号)\n" - "5. 每个问题的回答之间用空行分隔\n" - "6. 回答要有实质内容,每个问题至少回答2-3句话\n\n" + "Você é um agente participando de uma simulação de opinião pública brasileira.\n" + "Responda às perguntas abaixo com base na sua persona, experiências e perspectiva.\n\n" + "REGRAS OBRIGATÓRIAS:\n" + "1. Responda INTEIRAMENTE em português do Brasil.\n" + "2. NÃO use ferramentas, NÃO retorne JSON.\n" + "3. NÃO use títulos Markdown (##, ###, ####).\n" + "4. Seja específico e detalhado — cite exemplos concretos da sua experiência.\n" + "5. Mantenha um tom natural e autêntico para a sua persona.\n" + "6. Cada resposta deve ter 2-3 parágrafos.\n" + "7. NUNCA escreva em chinês, inglês ou qualquer idioma que não seja português do Brasil.\n\n" + "PERGUNTAS:\n" ) optimized_prompt = f"{INTERVIEW_PROMPT_PREFIX}{combined_prompt}" - # Step 4: 调用真实的采访API(不指定platform,默认双平台同时采访) + # Step 4: APIplatform try: - # 构建批量采访列表(不指定platform,双平台采访) + # platform interviews_request = [] for agent_idx in selected_indices: interviews_request.append({ "agent_id": agent_idx, - "prompt": optimized_prompt # 使用优化后的prompt - # 不指定platform,API会在twitter和reddit两个平台都采访 + "prompt": optimized_prompt # prompt + # platformAPItwitterreddit }) logger.info(t("console.callingBatchInterviewApi", count=len(interviews_request))) - # 调用 SimulationRunner 的批量采访方法(不传platform,双平台采访) + # SimulationRunner platform api_result = SimulationRunner.interview_agents_batch( simulation_id=simulation_id, interviews=interviews_request, - platform=None, # 不指定platform,双平台采访 - timeout=180.0 # 双平台需要更长超时 + platform=None, # platform + timeout=180.0 # ) logger.info(t("console.interviewApiReturned", count=api_result.get('interviews_count', 0), success=api_result.get('success'))) - # 检查API调用是否成功 if not api_result.get("success", False): - error_msg = api_result.get("error", "未知错误") + error_msg = api_result.get("error", "") logger.warning(t("console.interviewApiReturnedFailure", error=error_msg)) - result.summary = f"采访API调用失败:{error_msg}。请检查OASIS模拟环境状态。" + result.summary = f"APIFalhou{error_msg}OASISSimulação" return result - # Step 5: 解析API返回结果,构建AgentInterview对象 - # 双平台模式返回格式: {"twitter_0": {...}, "reddit_0": {...}, "twitter_1": {...}, ...} + # Step 5: APIResultadoAgentInterview + # : {"twitter_0": {...}, "reddit_0": {...}, "twitter_1": {...}, ...} api_data = api_result.get("result", {}) results_dict = api_data.get("results", {}) if isinstance(api_data, dict) else {} for i, agent_idx in enumerate(selected_indices): agent = selected_agents[i] agent_name = agent.get("realname", agent.get("username", f"Agent_{agent_idx}")) - agent_role = agent.get("profession", "未知") + agent_role = agent.get("profession", "") agent_bio = agent.get("bio", "") - # 获取该Agent在两个平台的采访结果 + # AgentResultado twitter_result = results_dict.get(f"twitter_{agent_idx}", {}) reddit_result = results_dict.get(f"reddit_{agent_idx}", {}) twitter_response = twitter_result.get("response", "") reddit_response = reddit_result.get("response", "") - # 清理可能的工具调用 JSON 包裹 + # Ferramenta JSON twitter_response = self._clean_tool_call_response(twitter_response) reddit_response = self._clean_tool_call_response(reddit_response) - # 始终输出双平台标记 - twitter_text = twitter_response if twitter_response else "(该平台未获得回复)" - reddit_text = reddit_response if reddit_response else "(该平台未获得回复)" - response_text = f"【Twitter平台回答】\n{twitter_text}\n\n【Reddit平台回答】\n{reddit_text}" + twitter_text = twitter_response if twitter_response else "" + reddit_text = reddit_response if reddit_response else "" + response_text = f"Twitter\n{twitter_text}\n\nReddit\n{reddit_text}" - # 提取关键引言(从两个平台的回答中) import re combined_responses = f"{twitter_response} {reddit_response}" - # 清理响应文本:去掉标记、编号、Markdown 等干扰 + # Markdown clean_text = re.sub(r'#{1,6}\s+', '', combined_responses) clean_text = re.sub(r'\{[^}]*tool_name[^}]*\}', '', clean_text) clean_text = re.sub(r'[*_`|>~\-]{2,}', '', clean_text) - clean_text = re.sub(r'问题\d+[::]\s*', '', clean_text) + clean_text = re.sub(r'\d+[:]\s*', '', clean_text) clean_text = re.sub(r'【[^】]+】', '', clean_text) - # 策略1(主): 提取完整的有实质内容的句子 + # 1: Conteúdo sentences = re.split(r'[。!?]', clean_text) meaningful = [ s.strip() for s in sentences if 20 <= len(s.strip()) <= 150 and not re.match(r'^[\s\W,,;;::、]+', s.strip()) - and not s.strip().startswith(('{', '问题')) + and not s.strip().startswith(('{', '')) ] meaningful.sort(key=len, reverse=True) key_quotes = [s + "。" for s in meaningful[:3]] - # 策略2(补充): 正确配对的中文引号「」内长文本 if not key_quotes: paired = re.findall(r'\u201c([^\u201c\u201d]{15,100})\u201d', clean_text) paired += re.findall(r'\u300c([^\u300c\u300d]{15,100})\u300d', clean_text) @@ -1450,7 +1408,7 @@ def interview_agents( interview = AgentInterview( agent_name=agent_name, agent_role=agent_role, - agent_bio=agent_bio[:1000], # 扩大bio长度限制 + agent_bio=agent_bio[:1000], # bio question=combined_prompt, response=response_text, key_quotes=key_quotes[:5] @@ -1460,18 +1418,18 @@ def interview_agents( result.interviewed_count = len(result.interviews) except ValueError as e: - # 模拟环境未运行 + # Simulação logger.warning(t("console.interviewApiCallFailed", error=e)) - result.summary = f"采访失败:{str(e)}。模拟环境可能已关闭,请确保OASIS环境正在运行。" + result.summary = f"Falhou{str(e)}SimulaçãoOASIS" return result except Exception as e: logger.error(t("console.interviewApiCallException", error=e)) import traceback logger.error(traceback.format_exc()) - result.summary = f"采访过程发生错误:{str(e)}" + result.summary = f"{str(e)}" return result - # Step 6: 生成采访摘要 + # Step 6: Gerar if result.interviews: result.summary = self._generate_interview_summary( interviews=result.interviews, @@ -1483,7 +1441,7 @@ def interview_agents( @staticmethod def _clean_tool_call_response(response: str) -> str: - """清理 Agent 回复中的 JSON 工具调用包裹,提取实际内容""" + """ Agent JSON FerramentaConteúdo""" if not response or not response.strip().startswith('{'): return response text = response.strip() @@ -1503,11 +1461,10 @@ def _clean_tool_call_response(response: str) -> str: return response def _load_agent_profiles(self, simulation_id: str) -> List[Dict[str, Any]]: - """加载模拟的Agent人设文件""" + """SimulaçãoAgent""" import os import csv - # 构建人设文件路径 sim_dir = os.path.join( os.path.dirname(__file__), f'../../uploads/simulations/{simulation_id}' @@ -1515,7 +1472,7 @@ def _load_agent_profiles(self, simulation_id: str) -> List[Dict[str, Any]]: profiles = [] - # 优先尝试读取Reddit JSON格式 + # Reddit JSON reddit_profile_path = os.path.join(sim_dir, "reddit_profiles.json") if os.path.exists(reddit_profile_path): try: @@ -1526,20 +1483,19 @@ def _load_agent_profiles(self, simulation_id: str) -> List[Dict[str, Any]]: except Exception as e: logger.warning(t("console.readRedditProfilesFailed", error=e)) - # 尝试读取Twitter CSV格式 + # Twitter CSV twitter_profile_path = os.path.join(sim_dir, "twitter_profiles.csv") if os.path.exists(twitter_profile_path): try: with open(twitter_profile_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: - # CSV格式转换为统一格式 profiles.append({ "realname": row.get("name", ""), "username": row.get("username", ""), "bio": row.get("description", ""), "persona": row.get("user_char", ""), - "profession": "未知" + "profession": "" }) logger.info(t("console.loadedTwitterProfiles", count=len(profiles))) return profiles @@ -1556,51 +1512,51 @@ def _select_agents_for_interview( max_agents: int ) -> tuple: """ - 使用LLM选择要采访的Agent + LLMAgent Returns: tuple: (selected_agents, selected_indices, reasoning) - - selected_agents: 选中Agent的完整信息列表 - - selected_indices: 选中Agent的索引列表(用于API调用) - - reasoning: 选择理由 + - selected_agents: Agent + - selected_indices: AgentAPI + - reasoning: """ - # 构建Agent摘要列表 + # Agent agent_summaries = [] for i, profile in enumerate(profiles): summary = { "index": i, "name": profile.get("realname", profile.get("username", f"Agent_{i}")), - "profession": profile.get("profession", "未知"), + "profession": profile.get("profession", ""), "bio": profile.get("bio", "")[:200], "interested_topics": profile.get("interested_topics", []) } agent_summaries.append(summary) - system_prompt = """你是一个专业的采访策划专家。你的任务是根据采访需求,从模拟Agent列表中选择最适合采访的对象。 + system_prompt = """Você é um especialista em selecionar os agentes mais relevantes para uma entrevista. -选择标准: -1. Agent的身份/职业与采访主题相关 -2. Agent可能持有独特或有价值的观点 -3. 选择多样化的视角(如:支持方、反对方、中立方、专业人士等) -4. 优先选择与事件直接相关的角色 +REGRAS: +1. Selecione agentes com perspectivas diversas e relevantes para o tema +2. Priorize agentes com opiniões fortes (a favor ou contra) +3. Inclua uma mistura de perfis (diferentes papéis, idades, posições) +4. Justifique sua seleção -返回JSON格式: +Retorne JSON no formato: { - "selected_indices": [选中Agent的索引列表], - "reasoning": "选择理由说明" + "selected_indices": [índices dos agentes selecionados], + "reasoning": "Justificativa da seleção em português do Brasil" }""" - user_prompt = f"""采访需求: + user_prompt = f""" {interview_requirement} -模拟背景: -{simulation_requirement if simulation_requirement else "未提供"} +Simulação +{simulation_requirement if simulation_requirement else ""} -可选择的Agent列表(共{len(agent_summaries)}个): +Agent{len(agent_summaries)} {json.dumps(agent_summaries, ensure_ascii=False, indent=2)} -请选择最多{max_agents}个最适合采访的Agent,并说明选择理由。""" +Selecione no máximo {max_agents} agentes mais relevantes para o tema.""" try: response = self.llm.chat_json( @@ -1612,9 +1568,9 @@ def _select_agents_for_interview( ) selected_indices = response.get("selected_indices", [])[:max_agents] - reasoning = response.get("reasoning", "基于相关性自动选择") + reasoning = response.get("reasoning", "") - # 获取选中的Agent完整信息 + # Agent selected_agents = [] valid_indices = [] for idx in selected_indices: @@ -1626,10 +1582,9 @@ def _select_agents_for_interview( except Exception as e: logger.warning(t("console.llmSelectAgentFailed", error=e)) - # 降级:选择前N个 selected = profiles[:max_agents] indices = list(range(min(max_agents, len(profiles)))) - return selected, indices, "使用默认选择策略" + return selected, indices, "" def _generate_interview_questions( self, @@ -1637,29 +1592,30 @@ def _generate_interview_questions( simulation_requirement: str, selected_agents: List[Dict[str, Any]] ) -> List[str]: - """使用LLM生成采访问题""" + """LLMGerar""" - agent_roles = [a.get("profession", "未知") for a in selected_agents] + agent_roles = [a.get("profession", "") for a in selected_agents] - system_prompt = """你是一个专业的记者/采访者。根据采访需求,生成3-5个深度采访问题。 + system_prompt = """Você é um especialista em conduzir entrevistas qualitativas de pesquisa de mercado. +Gere 3 a 5 perguntas de entrevista relevantes e incisivas. -问题要求: -1. 开放性问题,鼓励详细回答 -2. 针对不同角色可能有不同答案 -3. 涵盖事实、观点、感受等多个维度 -4. 语言自然,像真实采访一样 -5. 每个问题控制在50字以内,简洁明了 -6. 直接提问,不要包含背景说明或前缀 +REGRAS: +1. Perguntas devem ser abertas e provocativas +2. Explore motivações, medos e expectativas +3. Inclua perguntas sobre comportamento e decisão +4. Adapte ao perfil dos entrevistados +5. Cada pergunta deve ter no máximo 50 palavras +6. Escreva TUDO em português do Brasil -返回JSON格式:{"questions": ["问题1", "问题2", ...]}""" +Retorne JSON no formato: {"questions": ["pergunta 1", "pergunta 2", ...]}""" - user_prompt = f"""采访需求:{interview_requirement} + user_prompt = f"""{interview_requirement} -模拟背景:{simulation_requirement if simulation_requirement else "未提供"} +Simulação{simulation_requirement if simulation_requirement else ""} -采访对象角色:{', '.join(agent_roles)} +{', '.join(agent_roles)} -请生成3-5个采访问题。""" +Gere 3 a 5 perguntas de entrevista em português do Brasil.""" try: response = self.llm.chat_json( @@ -1670,14 +1626,14 @@ def _generate_interview_questions( temperature=0.5 ) - return response.get("questions", [f"关于{interview_requirement},您有什么看法?"]) + return response.get("questions", [f"{interview_requirement}"]) except Exception as e: logger.warning(t("console.generateInterviewQuestionsFailed", error=e)) return [ - f"关于{interview_requirement},您的观点是什么?", - "这件事对您或您所代表的群体有什么影响?", - "您认为应该如何解决或改进这个问题?" + f"{interview_requirement}", + "", + "" ] def _generate_interview_summary( @@ -1685,39 +1641,40 @@ def _generate_interview_summary( interviews: List[AgentInterview], interview_requirement: str ) -> str: - """生成采访摘要""" + """Gerar""" if not interviews: - return "未完成任何采访" + return "" - # 收集所有采访内容 + # Conteúdo interview_texts = [] for interview in interviews: interview_texts.append(f"【{interview.agent_name}({interview.agent_role})】\n{interview.response[:500]}") - quote_instruction = "引用受访者原话时使用中文引号「」" if get_locale() == 'zh' else 'Use quotation marks "" when quoting interviewees' - system_prompt = f"""你是一个专业的新闻编辑。请根据多位受访者的回答,生成一份采访摘要。 - -摘要要求: -1. 提炼各方主要观点 -2. 指出观点的共识和分歧 -3. 突出有价值的引言 -4. 客观中立,不偏袒任何一方 -5. 控制在1000字内 - -格式约束(必须遵守): -- 使用纯文本段落,用空行分隔不同部分 -- 不要使用Markdown标题(如#、##、###) -- 不要使用分割线(如---、***) + quote_instruction = "" if get_locale() == 'zh' else 'Use quotation marks "" when quoting interviewees' + system_prompt = f"""Você é um especialista em sintetizar entrevistas qualitativas para relatórios executivos. +Gere um resumo estruturado das entrevistas. + +REGRAS: +1. Identifique os principais temas e padrões nas respostas +2. Destaque pontos de convergência e divergência entre entrevistados +3. Extraia insights acionáveis +4. Mantenha citações relevantes dos entrevistados +5. Máximo de 1000 palavras + +FORMATO: +- Escreva em português do Brasil +- NÃO use títulos Markdown (##, ###, ####) +- Use travessão (—) para separar seções - {quote_instruction} -- 可以使用**加粗**标记关键词,但不要使用其他Markdown语法""" +- Use **negrito** para destaques, sem abusar""" - user_prompt = f"""采访主题:{interview_requirement} + user_prompt = f"""Tema da entrevista: {interview_requirement} -采访内容: +Respostas dos entrevistados: {"".join(interview_texts)} -请生成采访摘要。""" +Gere um resumo analítico em português do Brasil.""" try: summary = self.llm.chat( @@ -1732,5 +1689,4 @@ def _generate_interview_summary( except Exception as e: logger.warning(t("console.generateInterviewSummaryFailed", error=e)) - # 降级:简单拼接 - return f"共采访了{len(interviews)}位受访者,包括:" + "、".join([i.agent_name for i in interviews]) + return f"{len(interviews)}" + "".join([i.agent_name for i in interviews]) diff --git a/backend/app/utils/__init__.py b/backend/app/utils/__init__.py index e70161acb9..e3aa15ef8e 100644 --- a/backend/app/utils/__init__.py +++ b/backend/app/utils/__init__.py @@ -1,5 +1,5 @@ """ -工具模块 +Ferramenta """ from .file_parser import FileParser diff --git a/backend/app/utils/file_parser.py b/backend/app/utils/file_parser.py index 3f1d8ed2e7..74d2eeeed8 100644 --- a/backend/app/utils/file_parser.py +++ b/backend/app/utils/file_parser.py @@ -1,6 +1,6 @@ """ -文件解析工具 -支持PDF、Markdown、TXT文件的文本提取 +Ferramenta +PDFMarkdownTXT """ import os @@ -10,29 +10,28 @@ def _read_text_with_fallback(file_path: str) -> str: """ - 读取文本文件,UTF-8失败时自动探测编码。 + UTF-8Falhou - 采用多级回退策略: - 1. 首先尝试 UTF-8 解码 - 2. 使用 charset_normalizer 检测编码 - 3. 回退到 chardet 检测编码 - 4. 最终使用 UTF-8 + errors='replace' 兜底 + 1. UTF-8 + 2. charset_normalizer + 3. chardet + 4. UTF-8 + errors='replace' Args: - file_path: 文件路径 + file_path: Returns: - 解码后的文本内容 + Conteúdo """ data = Path(file_path).read_bytes() - # 首先尝试 UTF-8 + # UTF-8 try: return data.decode('utf-8') except UnicodeDecodeError: pass - # 尝试使用 charset_normalizer 检测编码 + # charset_normalizer encoding = None try: from charset_normalizer import from_bytes @@ -42,7 +41,7 @@ def _read_text_with_fallback(file_path: str) -> str: except Exception: pass - # 回退到 chardet + # chardet if not encoding: try: import chardet @@ -51,7 +50,7 @@ def _read_text_with_fallback(file_path: str) -> str: except Exception: pass - # 最终兜底:使用 UTF-8 + replace + # UTF-8 + replace if not encoding: encoding = 'utf-8' @@ -59,30 +58,29 @@ def _read_text_with_fallback(file_path: str) -> str: class FileParser: - """文件解析器""" + """""" SUPPORTED_EXTENSIONS = {'.pdf', '.md', '.markdown', '.txt'} @classmethod def extract_text(cls, file_path: str) -> str: """ - 从文件中提取文本 Args: - file_path: 文件路径 + file_path: Returns: - 提取的文本内容 + Conteúdo """ path = Path(file_path) if not path.exists(): - raise FileNotFoundError(f"文件不存在: {file_path}") + raise FileNotFoundError(f"Arquivo não encontrado: {file_path}") suffix = path.suffix.lower() if suffix not in cls.SUPPORTED_EXTENSIONS: - raise ValueError(f"不支持的文件格式: {suffix}") + raise ValueError(f"Formato de arquivo não suportado: {suffix}") if suffix == '.pdf': return cls._extract_from_pdf(file_path) @@ -91,15 +89,15 @@ def extract_text(cls, file_path: str) -> str: elif suffix == '.txt': return cls._extract_from_txt(file_path) - raise ValueError(f"无法处理的文件格式: {suffix}") + raise ValueError(f"Formato de arquivo não processável: {suffix}") @staticmethod def _extract_from_pdf(file_path: str) -> str: - """从PDF提取文本""" + """PDF""" try: import fitz # PyMuPDF except ImportError: - raise ImportError("需要安装PyMuPDF: pip install PyMuPDF") + raise ImportError("PyMuPDF is required: pip install PyMuPDF") text_parts = [] with fitz.open(file_path) as doc: @@ -112,24 +110,22 @@ def _extract_from_pdf(file_path: str) -> str: @staticmethod def _extract_from_md(file_path: str) -> str: - """从Markdown提取文本,支持自动编码检测""" + """Markdown""" return _read_text_with_fallback(file_path) @staticmethod def _extract_from_txt(file_path: str) -> str: - """从TXT提取文本,支持自动编码检测""" + """TXT""" return _read_text_with_fallback(file_path) @classmethod def extract_from_multiple(cls, file_paths: List[str]) -> str: """ - 从多个文件提取文本并合并 Args: - file_paths: 文件路径列表 + file_paths: Returns: - 合并后的文本 """ all_texts = [] @@ -137,9 +133,9 @@ def extract_from_multiple(cls, file_paths: List[str]) -> str: try: text = cls.extract_text(file_path) filename = Path(file_path).name - all_texts.append(f"=== 文档 {i}: {filename} ===\n{text}") + all_texts.append(f"=== {i}: {filename} ===\n{text}") except Exception as e: - all_texts.append(f"=== 文档 {i}: {file_path} (提取失败: {str(e)}) ===") + all_texts.append(f"=== {i}: {file_path} (Falhou: {str(e)}) ===") return "\n\n".join(all_texts) @@ -150,15 +146,13 @@ def split_text_into_chunks( overlap: int = 50 ) -> List[str]: """ - 将文本分割成小块 Args: - text: 原始文本 - chunk_size: 每块的字符数 - overlap: 重叠字符数 + text: + chunk_size: + overlap: Returns: - 文本块列表 """ if len(text) <= chunk_size: return [text] if text.strip() else [] @@ -169,9 +163,7 @@ def split_text_into_chunks( while start < len(text): end = start + chunk_size - # 尝试在句子边界处分割 if end < len(text): - # 查找最近的句子结束符 for sep in ['。', '!', '?', '.\n', '!\n', '?\n', '\n\n', '. ', '! ', '? ']: last_sep = text[start:end].rfind(sep) if last_sep != -1 and last_sep > chunk_size * 0.3: @@ -182,8 +174,6 @@ def split_text_into_chunks( if chunk: chunks.append(chunk) - # 下一个块从重叠位置开始 start = end - overlap if end < len(text) else len(text) return chunks - diff --git a/backend/app/utils/llm_client.py b/backend/app/utils/llm_client.py index 6c1a81f49b..35e89df471 100644 --- a/backend/app/utils/llm_client.py +++ b/backend/app/utils/llm_client.py @@ -1,37 +1,154 @@ """ -LLM客户端封装 -统一使用OpenAI格式调用 +LLM +OpenAI + +AUGUR Enhancement: Rate limit handling com backoff exponencial, +sleep inteligente e abort após timeout configurável. """ import json import re +import time +import logging from typing import Optional, Dict, Any, List -from openai import OpenAI +from openai import OpenAI, RateLimitError, APIStatusError from ..config import Config +logger = logging.getLogger(__name__) + +# ─── Configuração de retry ───────────────────────────────────── +RATE_LIMIT_MAX_RETRIES = 8 # tentativas máximas por chamada +RATE_LIMIT_BASE_SLEEP = 5.0 # sleep inicial em segundos +RATE_LIMIT_MAX_SLEEP = 120.0 # sleep máximo (2 minutos) +RATE_LIMIT_BACKOFF_FACTOR = 2.0 # fator exponencial +CONTEXT_MAX_RETRIES = 2 # tentativas para context_length_exceeded + class LLMClient: - """LLM客户端""" - + """LLM com rate limit handling robusto""" + def __init__( self, api_key: Optional[str] = None, base_url: Optional[str] = None, model: Optional[str] = None ): - self.api_key = api_key or Config.LLM_API_KEY + self.api_key = api_key or Config.LLM_API_KEY self.base_url = base_url or Config.LLM_BASE_URL - self.model = model or Config.LLM_MODEL_NAME - + self.model = model or Config.LLM_MODEL_NAME + if not self.api_key: - raise ValueError("LLM_API_KEY 未配置") - + raise ValueError("LLM_API_KEY Configuração") + self.client = OpenAI( api_key=self.api_key, base_url=self.base_url ) - + + # ─── Método principal com retry ─────────────────────────── + def _call_with_retry(self, kwargs: dict) -> str: + """ + Executa a chamada à OpenAI com: + - Retry automático em rate limit (429) com backoff exponencial + - Sleep progressivo para não agravar o rate limit + - Abort após RATE_LIMIT_MAX_RETRIES tentativas + - Handling de context_length_exceeded (trunca e retenta) + """ + sleep_time = RATE_LIMIT_BASE_SLEEP + messages = kwargs.get("messages", []) + + for attempt in range(1, RATE_LIMIT_MAX_RETRIES + 1): + try: + response = self.client.chat.completions.create(**kwargs) + content = response.choices[0].message.content + # Remove thinking tags de alguns modelos + content = re.sub(r'[\s\S]*?', '', content).strip() + + # Sleep mínimo entre chamadas para evitar burst + time.sleep(0.3) + return content + + except RateLimitError as e: + if attempt >= RATE_LIMIT_MAX_RETRIES: + logger.error( + f"[LLMClient] Rate limit esgotado após {RATE_LIMIT_MAX_RETRIES} tentativas. " + f"Abortando chamada. Erro: {e}" + ) + raise RuntimeError( + f"Rate limit da OpenAI esgotado após {RATE_LIMIT_MAX_RETRIES} tentativas. " + f"Aguarde alguns minutos e tente novamente." + ) from e + + actual_sleep = min(sleep_time, RATE_LIMIT_MAX_SLEEP) + logger.warning( + f"[LLMClient] Rate limit (429) — tentativa {attempt}/{RATE_LIMIT_MAX_RETRIES}. " + f"Aguardando {actual_sleep:.1f}s antes de retomar..." + ) + time.sleep(actual_sleep) + sleep_time *= RATE_LIMIT_BACKOFF_FACTOR + + except APIStatusError as e: + # Context length exceeded — tentar truncar e retentar + if e.status_code == 400 and 'context_length_exceeded' in str(e): + messages = kwargs.get("messages", []) + truncated = self._truncate_messages(messages) + if truncated and attempt <= CONTEXT_MAX_RETRIES: + logger.warning( + f"[LLMClient] Context length exceeded — truncando mensagens " + f"e retentando (tentativa {attempt}/{CONTEXT_MAX_RETRIES})..." + ) + kwargs = {**kwargs, "messages": truncated} + time.sleep(2) + continue + else: + logger.error(f"[LLMClient] Context length exceeded e não foi possível truncar. Abortando.") + raise RuntimeError( + "Contexto da conversa excedeu o limite do modelo. " + "Reduza o número de rodadas ou use menos agentes." + ) from e + + # Outros erros 4xx/5xx — não retenta + logger.error(f"[LLMClient] Erro da API (status {e.status_code}): {e}") + raise + + except Exception as e: + # Erros de rede/timeout — retenta com backoff menor + if attempt >= RATE_LIMIT_MAX_RETRIES: + logger.error(f"[LLMClient] Erro persistente após {attempt} tentativas: {e}") + raise + + actual_sleep = min(sleep_time / 2, 30.0) + logger.warning( + f"[LLMClient] Erro de rede (tentativa {attempt}/{RATE_LIMIT_MAX_RETRIES}): {e}. " + f"Retentando em {actual_sleep:.1f}s..." + ) + time.sleep(actual_sleep) + + raise RuntimeError(f"Falha após {RATE_LIMIT_MAX_RETRIES} tentativas.") + + def _truncate_messages(self, messages: list) -> Optional[list]: + """ + Trunca o histórico de mensagens para caber no contexto. + Mantém sempre: system prompt + última mensagem do usuário. + Remove mensagens do meio (mais antigas) progressivamente. + """ + if len(messages) <= 2: + return None # Não dá para truncar mais + + system_msgs = [m for m in messages if m.get("role") == "system"] + user_msgs = [m for m in messages if m.get("role") != "system"] + + # Manter sistema + 50% das mensagens mais recentes + keep = max(1, len(user_msgs) // 2) + truncated = system_msgs + user_msgs[-keep:] + + logger.info( + f"[LLMClient] Mensagens truncadas: {len(messages)} → {len(truncated)}" + ) + return truncated + + # ─── API pública ────────────────────────────────────────── def chat( self, messages: List[Dict[str, str]], @@ -40,33 +157,19 @@ def chat( response_format: Optional[Dict] = None ) -> str: """ - 发送聊天请求 - - Args: - messages: 消息列表 - temperature: 温度参数 - max_tokens: 最大token数 - response_format: 响应格式(如JSON模式) - - Returns: - 模型响应文本 + Envia request para o LLM com rate limit handling automático. """ - kwargs = { - "model": self.model, - "messages": messages, + kwargs: dict = { + "model": self.model, + "messages": messages, "temperature": temperature, - "max_tokens": max_tokens, + "max_completion_tokens": max_tokens, } - if response_format: kwargs["response_format"] = response_format - - response = self.client.chat.completions.create(**kwargs) - content = response.choices[0].message.content - # 部分模型(如MiniMax M2.5)会在content中包含思考内容,需要移除 - content = re.sub(r'[\s\S]*?', '', content).strip() - return content - + + return self._call_with_retry(kwargs) + def chat_json( self, messages: List[Dict[str, str]], @@ -74,15 +177,7 @@ def chat_json( max_tokens: int = 4096 ) -> Dict[str, Any]: """ - 发送聊天请求并返回JSON - - Args: - messages: 消息列表 - temperature: 温度参数 - max_tokens: 最大token数 - - Returns: - 解析后的JSON对象 + Envia request e retorna JSON parseado, com rate limit handling. """ response = self.chat( messages=messages, @@ -90,14 +185,13 @@ def chat_json( max_tokens=max_tokens, response_format={"type": "json_object"} ) - # 清理markdown代码块标记 - cleaned_response = response.strip() - cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE) - cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response) - cleaned_response = cleaned_response.strip() + # Limpar markdown code blocks + cleaned = response.strip() + cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE) + cleaned = re.sub(r'\n?```\s*$', '', cleaned) + cleaned = cleaned.strip() try: - return json.loads(cleaned_response) + return json.loads(cleaned) except json.JSONDecodeError: - raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}") - + raise ValueError(f"LLM retornou JSON inválido: {cleaned[:200]}") diff --git a/backend/app/utils/locale.py b/backend/app/utils/locale.py index 23d04aa9d5..f72e3aa482 100644 --- a/backend/app/utils/locale.py +++ b/backend/app/utils/locale.py @@ -19,39 +19,56 @@ with open(os.path.join(_locales_dir, filename), 'r', encoding='utf-8') as f: _translations[locale_name] = json.load(f) +# AUGUR: idioma padrão = português do Brasil +DEFAULT_LOCALE = 'pt' + + +def _normalize(raw: str) -> str: + """Normaliza variantes de locale para o código canônico registrado em _translations.""" + if not raw: + return DEFAULT_LOCALE + lower = raw.lower().strip() + # Variantes de português → 'pt' + if lower in ('pt-br', 'pt-pt', 'pt_br', 'pt_pt', 'pt'): + return 'pt' + # Retorna exatamente como veio se já existe em _translations + if raw in _translations: + return raw + # Tenta prefixo de 2 chars (ex: 'en-US' → 'en') + prefix = lower[:2] + if prefix in _translations: + return prefix + return DEFAULT_LOCALE + def set_locale(locale: str): """Set locale for current thread. Call at the start of background threads.""" - _thread_local.locale = locale + _thread_local.locale = _normalize(locale) def get_locale() -> str: if has_request_context(): - raw = request.headers.get('Accept-Language', 'zh') - return raw if raw in _translations else 'zh' - return getattr(_thread_local, 'locale', 'zh') + raw = request.headers.get('Accept-Language', DEFAULT_LOCALE) + return _normalize(raw) + return getattr(_thread_local, 'locale', DEFAULT_LOCALE) def t(key: str, **kwargs) -> str: locale = get_locale() - messages = _translations.get(locale, _translations.get('zh', {})) - - value = messages - for part in key.split('.'): - if isinstance(value, dict): - value = value.get(part) - else: - value = None - break - - if value is None: - value = _translations.get('zh', {}) + # Fallback chain: locale → pt → en → zh + value = None + for fallback in [locale, DEFAULT_LOCALE, 'en']: # sem zh # zh removido — nunca retornar chinês + messages = _translations.get(fallback, {}) + v = messages for part in key.split('.'): - if isinstance(value, dict): - value = value.get(part) + if isinstance(v, dict): + v = v.get(part) else: - value = None + v = None break + if v is not None: + value = v + break if value is None: return key @@ -65,5 +82,9 @@ def t(key: str, **kwargs) -> str: def get_language_instruction() -> str: locale = get_locale() - lang_config = _languages.get(locale, _languages.get('zh', {})) - return lang_config.get('llmInstruction', '请使用中文回答。') + # Fallback chain: locale → pt → en + for fallback in [locale, DEFAULT_LOCALE, 'en']: + lang_config = _languages.get(fallback) + if lang_config: + return lang_config.get('llmInstruction', 'Please respond in Brazilian Portuguese.') + return 'Please respond in Brazilian Portuguese.' diff --git a/backend/app/utils/logger.py b/backend/app/utils/logger.py index 1978c0b84f..84a73abeb3 100644 --- a/backend/app/utils/logger.py +++ b/backend/app/utils/logger.py @@ -1,6 +1,5 @@ """ -日志配置模块 -提供统一的日志管理,同时输出到控制台和文件 +Configuração """ import os @@ -12,47 +11,41 @@ def _ensure_utf8_stdout(): """ - 确保 stdout/stderr 使用 UTF-8 编码 - 解决 Windows 控制台中文乱码问题 + stdout/stderr UTF-8 + Windows """ if sys.platform == 'win32': - # Windows 下重新配置标准输出为 UTF-8 + # Windows Configuração UTF-8 if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8', errors='replace') if hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(encoding='utf-8', errors='replace') -# 日志目录 LOG_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'logs') def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging.Logger: """ - 设置日志器 Args: - name: 日志器名称 - level: 日志级别 + name: + level: Returns: - 配置好的日志器 + Configuração """ - # 确保日志目录存在 os.makedirs(LOG_DIR, exist_ok=True) - # 创建日志器 logger = logging.getLogger(name) logger.setLevel(level) - # 阻止日志向上传播到根 logger,避免重复输出 + # logger logger.propagate = False - # 如果已经有处理器,不重复添加 if logger.handlers: return logger - # 日志格式 detailed_formatter = logging.Formatter( '[%(asctime)s] %(levelname)s [%(name)s.%(funcName)s:%(lineno)d] %(message)s', datefmt='%Y-%m-%d %H:%M:%S' @@ -63,7 +56,7 @@ def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging. datefmt='%H:%M:%S' ) - # 1. 文件处理器 - 详细日志(按日期命名,带轮转) + # 1. - log_filename = datetime.now().strftime('%Y-%m-%d') + '.log' file_handler = RotatingFileHandler( os.path.join(LOG_DIR, log_filename), @@ -74,14 +67,13 @@ def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging. file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(detailed_formatter) - # 2. 控制台处理器 - 简洁日志(INFO及以上) - # 确保 Windows 下使用 UTF-8 编码,避免中文乱码 + # 2. - INFO + # Windows UTF-8 _ensure_utf8_stdout() console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(logging.INFO) console_handler.setFormatter(simple_formatter) - # 添加处理器 logger.addHandler(file_handler) logger.addHandler(console_handler) @@ -90,13 +82,11 @@ def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging. def get_logger(name: str = 'mirofish') -> logging.Logger: """ - 获取日志器(如果不存在则创建) Args: - name: 日志器名称 + name: Returns: - 日志器实例 """ logger = logging.getLogger(name) if not logger.handlers: @@ -104,11 +94,9 @@ def get_logger(name: str = 'mirofish') -> logging.Logger: return logger -# 创建默认日志器 logger = setup_logger() -# 便捷方法 def debug(msg, *args, **kwargs): logger.debug(msg, *args, **kwargs) diff --git a/backend/app/utils/retry.py b/backend/app/utils/retry.py index 819b1cfcf2..85d3d9abfc 100644 --- a/backend/app/utils/retry.py +++ b/backend/app/utils/retry.py @@ -1,6 +1,6 @@ """ -API调用重试机制 -用于处理LLM等外部API调用的重试逻辑 +API +LLMAPI """ import time @@ -22,16 +22,15 @@ def retry_with_backoff( on_retry: Optional[Callable[[Exception, int], None]] = None ): """ - 带指数退避的重试装饰器 Args: - max_retries: 最大重试次数 - initial_delay: 初始延迟(秒) - max_delay: 最大延迟(秒) - backoff_factor: 退避因子 - jitter: 是否添加随机抖动 - exceptions: 需要重试的异常类型 - on_retry: 重试时的回调函数 (exception, retry_count) + max_retries: + initial_delay: + max_delay: + backoff_factor: + jitter: + exceptions: + on_retry: (exception, retry_count) Usage: @retry_with_backoff(max_retries=3) @@ -52,17 +51,16 @@ def wrapper(*args, **kwargs) -> Any: last_exception = e if attempt == max_retries: - logger.error(f"函数 {func.__name__} 在 {max_retries} 次重试后仍失败: {str(e)}") + logger.error(f" {func.__name__} {max_retries} Falhou: {str(e)}") raise - # 计算延迟 current_delay = min(delay, max_delay) if jitter: current_delay = current_delay * (0.5 + random.random()) logger.warning( - f"函数 {func.__name__} 第 {attempt + 1} 次尝试失败: {str(e)}, " - f"{current_delay:.1f}秒后重试..." + f" {func.__name__} {attempt + 1} Falhou: {str(e)}, " + f"{current_delay:.1f}..." ) if on_retry: @@ -87,7 +85,6 @@ def retry_with_backoff_async( on_retry: Optional[Callable[[Exception, int], None]] = None ): """ - 异步版本的重试装饰器 """ import asyncio @@ -105,7 +102,7 @@ async def wrapper(*args, **kwargs) -> Any: last_exception = e if attempt == max_retries: - logger.error(f"异步函数 {func.__name__} 在 {max_retries} 次重试后仍失败: {str(e)}") + logger.error(f" {func.__name__} {max_retries} Falhou: {str(e)}") raise current_delay = min(delay, max_delay) @@ -113,8 +110,8 @@ async def wrapper(*args, **kwargs) -> Any: current_delay = current_delay * (0.5 + random.random()) logger.warning( - f"异步函数 {func.__name__} 第 {attempt + 1} 次尝试失败: {str(e)}, " - f"{current_delay:.1f}秒后重试..." + f" {func.__name__} {attempt + 1} Falhou: {str(e)}, " + f"{current_delay:.1f}..." ) if on_retry: @@ -131,7 +128,7 @@ async def wrapper(*args, **kwargs) -> Any: class RetryableAPIClient: """ - 可重试的API客户端封装 + API """ def __init__( @@ -154,16 +151,15 @@ def call_with_retry( **kwargs ) -> Any: """ - 执行函数调用并在失败时重试 + Falhou Args: - func: 要调用的函数 - *args: 函数参数 - exceptions: 需要重试的异常类型 - **kwargs: 函数关键字参数 + func: + *args: + exceptions: + **kwargs: Returns: - 函数返回值 """ last_exception = None delay = self.initial_delay @@ -176,15 +172,15 @@ def call_with_retry( last_exception = e if attempt == self.max_retries: - logger.error(f"API调用在 {self.max_retries} 次重试后仍失败: {str(e)}") + logger.error(f"API {self.max_retries} Falhou: {str(e)}") raise current_delay = min(delay, self.max_delay) current_delay = current_delay * (0.5 + random.random()) logger.warning( - f"API调用第 {attempt + 1} 次尝试失败: {str(e)}, " - f"{current_delay:.1f}秒后重试..." + f"API {attempt + 1} Falhou: {str(e)}, " + f"{current_delay:.1f}..." ) time.sleep(current_delay) @@ -200,16 +196,16 @@ def call_batch_with_retry( continue_on_failure: bool = True ) -> Tuple[list, list]: """ - 批量调用并对每个失败项单独重试 + Falhou Args: - items: 要处理的项目列表 - process_func: 处理函数,接收单个item作为参数 - exceptions: 需要重试的异常类型 - continue_on_failure: 单项失败后是否继续处理其他项 + items: + process_func: item + exceptions: + continue_on_failure: Falhou Returns: - (成功结果列表, 失败项列表) + (Resultado, Falhou) """ results = [] failures = [] @@ -224,7 +220,7 @@ def call_batch_with_retry( results.append(result) except Exception as e: - logger.error(f"处理第 {idx + 1} 项失败: {str(e)}") + logger.error(f" {idx + 1} Falhou: {str(e)}") failures.append({ "index": idx, "item": item, diff --git a/backend/app/utils/zep_paging.py b/backend/app/utils/zep_paging.py index 943cd1ae29..04bb4b84ca 100644 --- a/backend/app/utils/zep_paging.py +++ b/backend/app/utils/zep_paging.py @@ -1,7 +1,6 @@ -"""Zep Graph 分页读取工具。 +"""Zep Graph Ferramenta -Zep 的 node/edge 列表接口使用 UUID cursor 分页, -本模块封装自动翻页逻辑(含单页重试),对调用方透明地返回完整列表。 +Zep node/edge UUID cursor """ from __future__ import annotations @@ -31,7 +30,7 @@ def _fetch_page_with_retry( page_description: str = "page", **kwargs: Any, ) -> list[Any]: - """单页请求,失败时指数退避重试。仅重试网络/IO类瞬态错误。""" + """Falhou/IO""" if max_retries < 1: raise ValueError("max_retries must be >= 1") @@ -64,7 +63,7 @@ def fetch_all_nodes( max_retries: int = _DEFAULT_MAX_RETRIES, retry_delay: float = _DEFAULT_RETRY_DELAY, ) -> list[Any]: - """分页获取图谱节点,最多返回 max_items 条(默认 2000)。每页请求自带重试。""" + """Grafo max_items 2000""" all_nodes: list[Any] = [] cursor: str | None = None page_num = 0 @@ -109,7 +108,7 @@ def fetch_all_edges( max_retries: int = _DEFAULT_MAX_RETRIES, retry_delay: float = _DEFAULT_RETRY_DELAY, ) -> list[Any]: - """分页获取图谱所有边,返回完整列表。每页请求自带重试。""" + """Grafo""" all_edges: list[Any] = [] cursor: str | None = None page_num = 0 diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 4f5361d537..3196e84892 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "mirofish-backend" version = "0.1.0" -description = "MiroFish - 简洁通用的群体智能引擎,预测万物" +description = "AUGUR - Plataforma de previsao de mercado por IA" requires-python = ">=3.11" license = { text = "AGPL-3.0" } authors = [ @@ -32,6 +32,10 @@ dependencies = [ # 工具库 "python-dotenv>=1.0.0", "pydantic>=2.0.0", + + # AUGUR extras + "fpdf2>=2.7.0", + "flask-jwt-extended>=4.6.0", ] [project.optional-dependencies] diff --git a/backend/run.py b/backend/run.py index 4e3b04fa96..807ab90677 100644 --- a/backend/run.py +++ b/backend/run.py @@ -1,21 +1,20 @@ """ -MiroFish Backend 启动入口 +MiroFish Backend """ import os import sys -# 解决 Windows 控制台中文乱码问题:在所有导入之前设置 UTF-8 编码 +# Windows UTF-8 if sys.platform == 'win32': - # 设置环境变量确保 Python 使用 UTF-8 + # Python UTF-8 os.environ.setdefault('PYTHONIOENCODING', 'utf-8') - # 重新配置标准输出流为 UTF-8 + # Configuração UTF-8 if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8', errors='replace') if hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(encoding='utf-8', errors='replace') -# 添加项目根目录到路径 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) from app import create_app @@ -23,25 +22,23 @@ def main(): - """主函数""" - # 验证配置 + """""" + # Configuração errors = Config.validate() if errors: - print("配置错误:") + print("Configuração:") for err in errors: print(f" - {err}") - print("\n请检查 .env 文件中的配置") + print("\n .env Configuração") sys.exit(1) - # 创建应用 app = create_app() - # 获取运行配置 + # Configuração host = os.environ.get('FLASK_HOST', '0.0.0.0') port = int(os.environ.get('FLASK_PORT', 5001)) debug = Config.DEBUG - # 启动服务 app.run(host=host, port=port, debug=debug, threaded=True) diff --git a/backend/scripts/action_logger.py b/backend/scripts/action_logger.py index 38d025a6c8..6b7fc91c35 100644 --- a/backend/scripts/action_logger.py +++ b/backend/scripts/action_logger.py @@ -1,15 +1,14 @@ """ -动作日志记录器 -用于记录OASIS模拟中每个Agent的动作,供后端监控使用 +OASISSimulaçãoAgent -日志结构: +: sim_xxx/ ├── twitter/ - │ └── actions.jsonl # Twitter 平台动作日志 + │ └── actions.jsonl # Twitter ├── reddit/ - │ └── actions.jsonl # Reddit 平台动作日志 - ├── simulation.log # 主模拟进程日志 - └── run_state.json # 运行状态(API 查询用) + │ └── actions.jsonl # Reddit + ├── simulation.log # Simulação + └── run_state.json # API """ import json @@ -20,15 +19,14 @@ class PlatformActionLogger: - """单平台动作日志记录器""" + """""" def __init__(self, platform: str, base_dir: str): """ - 初始化日志记录器 Args: - platform: 平台名称 (twitter/reddit) - base_dir: 模拟目录的基础路径 + platform: (twitter/reddit) + base_dir: Simulação """ self.platform = platform self.base_dir = base_dir @@ -37,7 +35,7 @@ def __init__(self, platform: str, base_dir: str): self._ensure_dir() def _ensure_dir(self): - """确保目录存在""" + """""" os.makedirs(self.log_dir, exist_ok=True) def log_action( @@ -50,7 +48,7 @@ def log_action( result: Optional[str] = None, success: bool = True ): - """记录一个动作""" + """""" entry = { "round": round_num, "timestamp": datetime.now().isoformat(), @@ -66,7 +64,7 @@ def log_action( f.write(json.dumps(entry, ensure_ascii=False) + '\n') def log_round_start(self, round_num: int, simulated_hour: int): - """记录轮次开始""" + """""" entry = { "round": round_num, "timestamp": datetime.now().isoformat(), @@ -78,7 +76,7 @@ def log_round_start(self, round_num: int, simulated_hour: int): f.write(json.dumps(entry, ensure_ascii=False) + '\n') def log_round_end(self, round_num: int, actions_count: int): - """记录轮次结束""" + """""" entry = { "round": round_num, "timestamp": datetime.now().isoformat(), @@ -90,7 +88,7 @@ def log_round_end(self, round_num: int, actions_count: int): f.write(json.dumps(entry, ensure_ascii=False) + '\n') def log_simulation_start(self, config: Dict[str, Any]): - """记录模拟开始""" + """Simulação""" entry = { "timestamp": datetime.now().isoformat(), "event_type": "simulation_start", @@ -103,7 +101,7 @@ def log_simulation_start(self, config: Dict[str, Any]): f.write(json.dumps(entry, ensure_ascii=False) + '\n') def log_simulation_end(self, total_rounds: int, total_actions: int): - """记录模拟结束""" + """Simulação""" entry = { "timestamp": datetime.now().isoformat(), "event_type": "simulation_end", @@ -118,35 +116,31 @@ def log_simulation_end(self, total_rounds: int, total_actions: int): class SimulationLogManager: """ - 模拟日志管理器 - 统一管理所有日志文件,按平台分离 + Simulação """ def __init__(self, simulation_dir: str): """ - 初始化日志管理器 Args: - simulation_dir: 模拟目录路径 + simulation_dir: Simulação """ self.simulation_dir = simulation_dir self.twitter_logger: Optional[PlatformActionLogger] = None self.reddit_logger: Optional[PlatformActionLogger] = None self._main_logger: Optional[logging.Logger] = None - # 设置主日志 self._setup_main_logger() def _setup_main_logger(self): - """设置主模拟日志""" + """Simulação""" log_path = os.path.join(self.simulation_dir, "simulation.log") - # 创建 logger + # logger self._main_logger = logging.getLogger(f"simulation.{os.path.basename(self.simulation_dir)}") self._main_logger.setLevel(logging.INFO) self._main_logger.handlers.clear() - # 文件处理器 file_handler = logging.FileHandler(log_path, encoding='utf-8', mode='w') file_handler.setLevel(logging.INFO) file_handler.setFormatter(logging.Formatter( @@ -155,7 +149,6 @@ def _setup_main_logger(self): )) self._main_logger.addHandler(file_handler) - # 控制台处理器 console_handler = logging.StreamHandler() console_handler.setLevel(logging.INFO) console_handler.setFormatter(logging.Formatter( @@ -167,19 +160,19 @@ def _setup_main_logger(self): self._main_logger.propagate = False def get_twitter_logger(self) -> PlatformActionLogger: - """获取 Twitter 平台日志记录器""" + """ Twitter """ if self.twitter_logger is None: self.twitter_logger = PlatformActionLogger("twitter", self.simulation_dir) return self.twitter_logger def get_reddit_logger(self) -> PlatformActionLogger: - """获取 Reddit 平台日志记录器""" + """ Reddit """ if self.reddit_logger is None: self.reddit_logger = PlatformActionLogger("reddit", self.simulation_dir) return self.reddit_logger def log(self, message: str, level: str = "info"): - """记录主日志""" + """""" if self._main_logger: getattr(self._main_logger, level.lower(), self._main_logger.info)(message) @@ -196,12 +189,11 @@ def debug(self, message: str): self.log(message, "debug") -# ============ 兼容旧接口 ============ +# ============ ============ class ActionLogger: """ - 动作日志记录器(兼容旧接口) - 建议使用 SimulationLogManager 代替 + SimulationLogManager """ def __init__(self, log_path: str): @@ -288,12 +280,11 @@ def log_simulation_end(self, platform: str, total_rounds: int, total_actions: in f.write(json.dumps(entry, ensure_ascii=False) + '\n') -# 全局日志实例(兼容旧接口) _global_logger: Optional[ActionLogger] = None def get_logger(log_path: Optional[str] = None) -> ActionLogger: - """获取全局日志实例(兼容旧接口)""" + """""" global _global_logger if log_path: diff --git a/backend/scripts/run_parallel_simulation.py b/backend/scripts/run_parallel_simulation.py index 2a627ffd04..6d5dbf87f7 100644 --- a/backend/scripts/run_parallel_simulation.py +++ b/backend/scripts/run_parallel_simulation.py @@ -1,62 +1,62 @@ """ -OASIS 双平台并行模拟预设脚本 -同时运行Twitter和Reddit模拟,读取相同的配置文件 +OASIS Simulação +TwitterRedditSimulaçãoConfiguração -功能特性: -- 双平台(Twitter + Reddit)并行模拟 -- 完成模拟后不立即关闭环境,进入等待命令模式 -- 支持通过IPC接收Interview命令 -- 支持单个Agent采访和批量采访 -- 支持远程关闭环境命令 +: +- Twitter + RedditSimulação +- Simulação +- IPCInterview +- Agent +- -使用方式: +: python run_parallel_simulation.py --config simulation_config.json - python run_parallel_simulation.py --config simulation_config.json --no-wait # 完成后立即关闭 + python run_parallel_simulation.py --config simulation_config.json --no-wait # python run_parallel_simulation.py --config simulation_config.json --twitter-only python run_parallel_simulation.py --config simulation_config.json --reddit-only -日志结构: +: sim_xxx/ ├── twitter/ - │ └── actions.jsonl # Twitter 平台动作日志 + │ └── actions.jsonl # Twitter ├── reddit/ - │ └── actions.jsonl # Reddit 平台动作日志 - ├── simulation.log # 主模拟进程日志 - └── run_state.json # 运行状态(API 查询用) + │ └── actions.jsonl # Reddit + ├── simulation.log # Simulação + └── run_state.json # API """ # ============================================================ -# 解决 Windows 编码问题:在所有 import 之前设置 UTF-8 编码 -# 这是为了修复 OASIS 第三方库读取文件时未指定编码的问题 +# Windows import UTF-8 +# OASIS # ============================================================ import sys import os if sys.platform == 'win32': - # 设置 Python 默认 I/O 编码为 UTF-8 - # 这会影响所有未指定编码的 open() 调用 + # Python I/O UTF-8 + # open() os.environ.setdefault('PYTHONUTF8', '1') os.environ.setdefault('PYTHONIOENCODING', 'utf-8') - # 重新配置标准输出流为 UTF-8(解决控制台中文乱码) + # Configuração UTF-8 if hasattr(sys.stdout, 'reconfigure'): sys.stdout.reconfigure(encoding='utf-8', errors='replace') if hasattr(sys.stderr, 'reconfigure'): sys.stderr.reconfigure(encoding='utf-8', errors='replace') - # 强制设置默认编码(影响 open() 函数的默认编码) - # 注意:这需要在 Python 启动时就设置,运行时设置可能不生效 - # 所以我们还需要 monkey-patch 内置的 open 函数 + # open() + # Python + # monkey-patch open import builtins _original_open = builtins.open def _utf8_open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None): """ - 包装 open() 函数,对于文本模式默认使用 UTF-8 编码 - 这可以修复第三方库(如 OASIS)读取文件时未指定编码的问题 + open() UTF-8 + OASIS """ - # 只对文本模式(非二进制)且未指定编码的情况设置默认编码 + # Caso if encoding is None and 'b' not in mode: encoding = 'utf-8' return _original_open(file, mode, buffering, encoding, errors, @@ -77,52 +77,51 @@ def _utf8_open(file, mode='r', buffering=-1, encoding=None, errors=None, from typing import Dict, Any, List, Optional, Tuple -# 全局变量:用于信号处理 _shutdown_event = None _cleanup_done = False -# 添加 backend 目录到路径 -# 脚本固定位于 backend/scripts/ 目录 +# backend +# backend/scripts/ _scripts_dir = os.path.dirname(os.path.abspath(__file__)) _backend_dir = os.path.abspath(os.path.join(_scripts_dir, '..')) _project_root = os.path.abspath(os.path.join(_backend_dir, '..')) sys.path.insert(0, _scripts_dir) sys.path.insert(0, _backend_dir) -# 加载项目根目录的 .env 文件(包含 LLM_API_KEY 等配置) +# .env LLM_API_KEY Configuração from dotenv import load_dotenv _env_file = os.path.join(_project_root, '.env') if os.path.exists(_env_file): load_dotenv(_env_file) - print(f"已加载环境配置: {_env_file}") + print(f"Configuração: {_env_file}") else: - # 尝试加载 backend/.env + # backend/.env _backend_env = os.path.join(_backend_dir, '.env') if os.path.exists(_backend_env): load_dotenv(_backend_env) - print(f"已加载环境配置: {_backend_env}") + print(f"Configuração: {_backend_env}") class MaxTokensWarningFilter(logging.Filter): - """过滤掉 camel-ai 关于 max_tokens 的警告(我们故意不设置 max_tokens,让模型自行决定)""" + """ camel-ai max_tokens max_tokens""" def filter(self, record): - # 过滤掉包含 max_tokens 警告的日志 + # max_tokens if "max_tokens" in record.getMessage() and "Invalid or missing" in record.getMessage(): return False return True -# 在模块加载时立即添加过滤器,确保在 camel 代码执行前生效 +# camel logging.getLogger().addFilter(MaxTokensWarningFilter()) def disable_oasis_logging(): """ - 禁用 OASIS 库的详细日志输出 - OASIS 的日志太冗余(记录每个 agent 的观察和动作),我们使用自己的 action_logger + OASIS + OASIS agent action_logger """ - # 禁用 OASIS 的所有日志器 + # OASIS oasis_loggers = [ "social.agent", "social.twitter", @@ -133,22 +132,21 @@ def disable_oasis_logging(): for logger_name in oasis_loggers: logger = logging.getLogger(logger_name) - logger.setLevel(logging.CRITICAL) # 只记录严重错误 + logger.setLevel(logging.CRITICAL) # logger.handlers.clear() logger.propagate = False def init_logging_for_simulation(simulation_dir: str): """ - 初始化模拟的日志配置 + SimulaçãoConfiguração Args: - simulation_dir: 模拟目录路径 + simulation_dir: Simulação """ - # 禁用 OASIS 的详细日志 + # OASIS disable_oasis_logging() - # 清理旧的 log 目录(如果存在) old_log_dir = os.path.join(simulation_dir, "log") if os.path.exists(old_log_dir): import shutil @@ -169,12 +167,12 @@ def init_logging_for_simulation(simulation_dir: str): generate_reddit_agent_graph ) except ImportError as e: - print(f"错误: 缺少依赖 {e}") - print("请先安装: pip install oasis-ai camel-ai") + print(f": {e}") + print(": pip install oasis-ai camel-ai") sys.exit(1) -# Twitter可用动作(不包含INTERVIEW,INTERVIEW只能通过ManualAction手动触发) +# TwitterINTERVIEWINTERVIEWManualAction TWITTER_ACTIONS = [ ActionType.CREATE_POST, ActionType.LIKE_POST, @@ -184,7 +182,7 @@ def init_logging_for_simulation(simulation_dir: str): ActionType.QUOTE_POST, ] -# Reddit可用动作(不包含INTERVIEW,INTERVIEW只能通过ManualAction手动触发) +# RedditINTERVIEWINTERVIEWManualAction REDDIT_ACTIONS = [ ActionType.LIKE_POST, ActionType.DISLIKE_POST, @@ -202,13 +200,12 @@ def init_logging_for_simulation(simulation_dir: str): ] -# IPC相关常量 IPC_COMMANDS_DIR = "ipc_commands" IPC_RESPONSES_DIR = "ipc_responses" ENV_STATUS_FILE = "env_status.json" class CommandType: - """命令类型常量""" + """""" INTERVIEW = "interview" BATCH_INTERVIEW = "batch_interview" CLOSE_ENV = "close_env" @@ -216,9 +213,9 @@ class CommandType: class ParallelIPCHandler: """ - 双平台IPC命令处理器 + IPC - 管理两个平台的环境,处理Interview命令 + Interview """ def __init__( @@ -239,12 +236,11 @@ def __init__( self.responses_dir = os.path.join(simulation_dir, IPC_RESPONSES_DIR) self.status_file = os.path.join(simulation_dir, ENV_STATUS_FILE) - # 确保目录存在 os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) def update_status(self, status: str): - """更新环境状态""" + """""" with open(self.status_file, 'w', encoding='utf-8') as f: json.dump({ "status": status, @@ -254,11 +250,10 @@ def update_status(self, status: str): }, f, ensure_ascii=False, indent=2) def poll_command(self) -> Optional[Dict[str, Any]]: - """轮询获取待处理命令""" + """""" if not os.path.exists(self.commands_dir): return None - # 获取命令文件(按时间排序) command_files = [] for filename in os.listdir(self.commands_dir): if filename.endswith('.json'): @@ -277,7 +272,7 @@ def poll_command(self) -> Optional[Dict[str, Any]]: return None def send_response(self, command_id: str, status: str, result: Dict = None, error: str = None): - """发送响应""" + """""" response = { "command_id": command_id, "status": status, @@ -290,7 +285,6 @@ def send_response(self, command_id: str, status: str, result: Dict = None, error with open(response_file, 'w', encoding='utf-8') as f: json.dump(response, f, ensure_ascii=False, indent=2) - # 删除命令文件 command_file = os.path.join(self.commands_dir, f"{command_id}.json") try: os.remove(command_file) @@ -299,13 +293,13 @@ def send_response(self, command_id: str, status: str, result: Dict = None, error def _get_env_and_graph(self, platform: str): """ - 获取指定平台的环境和agent_graph + agent_graph Args: - platform: 平台名称 ("twitter" 或 "reddit") + platform: ("twitter" "reddit") Returns: - (env, agent_graph, platform_name) 或 (None, None, None) + (env, agent_graph, platform_name) (None, None, None) """ if platform == "twitter" and self.twitter_env: return self.twitter_env, self.twitter_agent_graph, "twitter" @@ -316,15 +310,15 @@ def _get_env_and_graph(self, platform: str): async def _interview_single_platform(self, agent_id: int, prompt: str, platform: str) -> Dict[str, Any]: """ - 在单个平台上执行Interview + Interview Returns: - 包含结果的字典,或包含error的字典 + Resultadoerror """ env, agent_graph, actual_platform = self._get_env_and_graph(platform) if not env or not agent_graph: - return {"platform": platform, "error": f"{platform}平台不可用"} + return {"platform": platform, "error": f"{platform}"} try: agent = agent_graph.get_agent(agent_id) @@ -344,36 +338,34 @@ async def _interview_single_platform(self, agent_id: int, prompt: str, platform: async def handle_interview(self, command_id: str, agent_id: int, prompt: str, platform: str = None) -> bool: """ - 处理单个Agent采访命令 + Agent Args: - command_id: 命令ID + command_id: ID agent_id: Agent ID - prompt: 采访问题 - platform: 指定平台(可选) - - "twitter": 只采访Twitter平台 - - "reddit": 只采访Reddit平台 - - None/不指定: 同时采访两个平台,返回整合结果 + prompt: + platform: + - "twitter": Twitter + - "reddit": Reddit + - None/: Resultado Returns: - True 表示成功,False 表示失败 + True False Falhou """ - # 如果指定了平台,只采访该平台 if platform in ("twitter", "reddit"): result = await self._interview_single_platform(agent_id, prompt, platform) if "error" in result: self.send_response(command_id, "failed", error=result["error"]) - print(f" Interview失败: agent_id={agent_id}, platform={platform}, error={result['error']}") + print(f" InterviewFalhou: agent_id={agent_id}, platform={platform}, error={result['error']}") return False else: self.send_response(command_id, "completed", result=result) - print(f" Interview完成: agent_id={agent_id}, platform={platform}") + print(f" Interview: agent_id={agent_id}, platform={platform}") return True - # 未指定平台:同时采访两个平台 if not self.twitter_env and not self.reddit_env: - self.send_response(command_id, "failed", error="没有可用的模拟环境") + self.send_response(command_id, "failed", error="Simulação") return False results = { @@ -383,7 +375,6 @@ async def handle_interview(self, command_id: str, agent_id: int, prompt: str, pl } success_count = 0 - # 并行采访两个平台 tasks = [] platforms_to_interview = [] @@ -395,7 +386,6 @@ async def handle_interview(self, command_id: str, agent_id: int, prompt: str, pl tasks.append(self._interview_single_platform(agent_id, prompt, "reddit")) platforms_to_interview.append("reddit") - # 并行执行 platform_results = await asyncio.gather(*tasks) for platform_name, platform_result in zip(platforms_to_interview, platform_results): @@ -405,30 +395,28 @@ async def handle_interview(self, command_id: str, agent_id: int, prompt: str, pl if success_count > 0: self.send_response(command_id, "completed", result=results) - print(f" Interview完成: agent_id={agent_id}, 成功平台数={success_count}/{len(platforms_to_interview)}") + print(f" Interview: agent_id={agent_id}, ={success_count}/{len(platforms_to_interview)}") return True else: - errors = [f"{p}: {r.get('error', '未知错误')}" for p, r in results["platforms"].items()] + errors = [f"{p}: {r.get('error', '')}" for p, r in results["platforms"].items()] self.send_response(command_id, "failed", error="; ".join(errors)) - print(f" Interview失败: agent_id={agent_id}, 所有平台都失败") + print(f" InterviewFalhou: agent_id={agent_id}, Falhou") return False async def handle_batch_interview(self, command_id: str, interviews: List[Dict], platform: str = None) -> bool: """ - 处理批量采访命令 Args: - command_id: 命令ID + command_id: ID interviews: [{"agent_id": int, "prompt": str, "platform": str(optional)}, ...] - platform: 默认平台(可被每个interview项覆盖) - - "twitter": 只采访Twitter平台 - - "reddit": 只采访Reddit平台 - - None/不指定: 每个Agent同时采访两个平台 + platform: interview + - "twitter": Twitter + - "reddit": Reddit + - None/: Agent """ - # 按平台分组 twitter_interviews = [] reddit_interviews = [] - both_platforms_interviews = [] # 需要同时采访两个平台的 + both_platforms_interviews = [] # for interview in interviews: item_platform = interview.get("platform", platform) @@ -437,10 +425,9 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], elif item_platform == "reddit": reddit_interviews.append(interview) else: - # 未指定平台:两个平台都采访 both_platforms_interviews.append(interview) - # 把 both_platforms_interviews 拆分到两个平台 + # both_platforms_interviews if both_platforms_interviews: if self.twitter_env: twitter_interviews.extend(both_platforms_interviews) @@ -449,7 +436,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], results = {} - # 处理Twitter平台的采访 + # Twitter if twitter_interviews and self.twitter_env: try: twitter_actions = {} @@ -463,7 +450,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], action_args={"prompt": prompt} ) except Exception as e: - print(f" 警告: 无法获取Twitter Agent {agent_id}: {e}") + print(f" : Twitter Agent {agent_id}: {e}") if twitter_actions: await self.twitter_env.step(twitter_actions) @@ -474,9 +461,9 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], result["platform"] = "twitter" results[f"twitter_{agent_id}"] = result except Exception as e: - print(f" Twitter批量Interview失败: {e}") + print(f" TwitterInterviewFalhou: {e}") - # 处理Reddit平台的采访 + # Reddit if reddit_interviews and self.reddit_env: try: reddit_actions = {} @@ -490,7 +477,7 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], action_args={"prompt": prompt} ) except Exception as e: - print(f" 警告: 无法获取Reddit Agent {agent_id}: {e}") + print(f" : Reddit Agent {agent_id}: {e}") if reddit_actions: await self.reddit_env.step(reddit_actions) @@ -501,21 +488,21 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict], result["platform"] = "reddit" results[f"reddit_{agent_id}"] = result except Exception as e: - print(f" Reddit批量Interview失败: {e}") + print(f" RedditInterviewFalhou: {e}") if results: self.send_response(command_id, "completed", result={ "interviews_count": len(results), "results": results }) - print(f" 批量Interview完成: {len(results)} 个Agent") + print(f" Interview: {len(results)} Agent") return True else: - self.send_response(command_id, "failed", error="没有成功的采访") + self.send_response(command_id, "failed", error="") return False def _get_interview_result(self, agent_id: int, platform: str) -> Dict[str, Any]: - """从数据库获取最新的Interview结果""" + """InterviewResultado""" db_path = os.path.join(self.simulation_dir, f"{platform}_simulation.db") result = { @@ -531,7 +518,7 @@ def _get_interview_result(self, agent_id: int, platform: str) -> Dict[str, Any]: conn = sqlite3.connect(db_path) cursor = conn.cursor() - # 查询最新的Interview记录 + # Interview cursor.execute(""" SELECT user_id, info, created_at FROM trace @@ -553,16 +540,15 @@ def _get_interview_result(self, agent_id: int, platform: str) -> Dict[str, Any]: conn.close() except Exception as e: - print(f" 读取Interview结果失败: {e}") + print(f" InterviewResultadoFalhou: {e}") return result async def process_commands(self) -> bool: """ - 处理所有待处理命令 Returns: - True 表示继续运行,False 表示应该退出 + True False """ command = self.poll_command() if not command: @@ -572,7 +558,7 @@ async def process_commands(self) -> bool: command_type = command.get("command_type") args = command.get("args", {}) - print(f"\n收到IPC命令: {command_type}, id={command_id}") + print(f"\nIPC: {command_type}, id={command_id}") if command_type == CommandType.INTERVIEW: await self.handle_interview( @@ -592,25 +578,24 @@ async def process_commands(self) -> bool: return True elif command_type == CommandType.CLOSE_ENV: - print("收到关闭环境命令") - self.send_response(command_id, "completed", result={"message": "环境即将关闭"}) + print("") + self.send_response(command_id, "completed", result={"message": ""}) return False else: - self.send_response(command_id, "failed", error=f"未知命令类型: {command_type}") + self.send_response(command_id, "failed", error=f": {command_type}") return True def load_config(config_path: str) -> Dict[str, Any]: - """加载配置文件""" + """Configuração""" with open(config_path, 'r', encoding='utf-8') as f: return json.load(f) -# 需要过滤掉的非核心动作类型(这些动作对分析价值较低) +# Análise FILTERED_ACTIONS = {'refresh', 'sign_up'} -# 动作类型映射表(数据库中的名称 -> 标准名称) ACTION_TYPE_MAP = { 'create_post': 'CREATE_POST', 'like_post': 'LIKE_POST', @@ -632,15 +617,15 @@ def load_config(config_path: str) -> Dict[str, Any]: def get_agent_names_from_config(config: Dict[str, Any]) -> Dict[int, str]: """ - 从 simulation_config 中获取 agent_id -> entity_name 的映射 + simulation_config agent_id -> entity_name - 这样可以在 actions.jsonl 中显示真实的实体名称,而不是 "Agent_0" 这样的代号 + actions.jsonl Entidade "Agent_0" Args: - config: simulation_config.json 的内容 + config: simulation_config.json Conteúdo Returns: - agent_id -> entity_name 的映射字典 + agent_id -> entity_name """ agent_names = {} agent_configs = config.get("agent_configs", []) @@ -660,17 +645,16 @@ def fetch_new_actions_from_db( agent_names: Dict[int, str] ) -> Tuple[List[Dict[str, Any]], int]: """ - 从数据库中获取新的动作记录,并补充完整的上下文信息 Args: - db_path: 数据库文件路径 - last_rowid: 上次读取的最大 rowid 值(使用 rowid 而不是 created_at,因为不同平台的 created_at 格式不同) - agent_names: agent_id -> agent_name 映射 + db_path: + last_rowid: rowid rowid created_at created_at + agent_names: agent_id -> agent_name Returns: (actions_list, new_last_rowid) - - actions_list: 动作列表,每个元素包含 agent_id, agent_name, action_type, action_args(含上下文信息) - - new_last_rowid: 新的最大 rowid 值 + - actions_list: agent_id, agent_name, action_type, action_args + - new_last_rowid: rowid """ actions = [] new_last_rowid = last_rowid @@ -682,8 +666,8 @@ def fetch_new_actions_from_db( conn = sqlite3.connect(db_path) cursor = conn.cursor() - # 使用 rowid 来追踪已处理的记录(rowid 是 SQLite 的内置自增字段) - # 这样可以避免 created_at 格式差异问题(Twitter 用整数,Reddit 用日期时间字符串) + # rowid rowid SQLite + # created_at Twitter Reddit cursor.execute(""" SELECT rowid, user_id, action, info FROM trace @@ -692,20 +676,18 @@ def fetch_new_actions_from_db( """, (last_rowid,)) for rowid, user_id, action, info_json in cursor.fetchall(): - # 更新最大 rowid + # rowid new_last_rowid = rowid - # 过滤非核心动作 if action in FILTERED_ACTIONS: continue - # 解析动作参数 try: action_args = json.loads(info_json) if info_json else {} except json.JSONDecodeError: action_args = {} - # 精简 action_args,只保留关键字段(保留完整内容,不截断) + # action_argsConteúdo completo, sem truncar simplified_args = {} if 'content' in action_args: simplified_args['content'] = action_args['content'] @@ -726,10 +708,9 @@ def fetch_new_actions_from_db( if 'dislike_id' in action_args: simplified_args['dislike_id'] = action_args['dislike_id'] - # 转换动作类型名称 action_type = ACTION_TYPE_MAP.get(action, action.upper()) - # 补充上下文信息(帖子内容、用户名等) + # Conteúdo _enrich_action_context(cursor, action_type, simplified_args, agent_names) actions.append({ @@ -741,7 +722,7 @@ def fetch_new_actions_from_db( conn.close() except Exception as e: - print(f"读取数据库动作失败: {e}") + print(f"Falhou: {e}") return actions, new_last_rowid @@ -753,16 +734,16 @@ def _enrich_action_context( agent_names: Dict[int, str] ) -> None: """ - 为动作补充上下文信息(帖子内容、用户名等) + Conteúdo Args: - cursor: 数据库游标 - action_type: 动作类型 - action_args: 动作参数(会被修改) - agent_names: agent_id -> agent_name 映射 + cursor: + action_type: + action_args: + agent_names: agent_id -> agent_name """ try: - # 点赞/踩帖子:补充帖子内容和作者 + # /Conteúdo if action_type in ('LIKE_POST', 'DISLIKE_POST'): post_id = action_args.get('post_id') if post_id: @@ -771,11 +752,11 @@ def _enrich_action_context( action_args['post_content'] = post_info.get('content', '') action_args['post_author_name'] = post_info.get('author_name', '') - # 转发帖子:补充原帖内容和作者 + # Conteúdo elif action_type == 'REPOST': new_post_id = action_args.get('new_post_id') if new_post_id: - # 转发帖子的 original_post_id 指向原帖 + # original_post_id cursor.execute(""" SELECT original_post_id FROM post WHERE post_id = ? """, (new_post_id,)) @@ -787,7 +768,7 @@ def _enrich_action_context( action_args['original_content'] = original_info.get('content', '') action_args['original_author_name'] = original_info.get('author_name', '') - # 引用帖子:补充原帖内容、作者和引用评论 + # Conteúdo elif action_type == 'QUOTE_POST': quoted_id = action_args.get('quoted_id') new_post_id = action_args.get('new_post_id') @@ -798,7 +779,7 @@ def _enrich_action_context( action_args['original_content'] = original_info.get('content', '') action_args['original_author_name'] = original_info.get('author_name', '') - # 获取引用帖子的评论内容(quote_content) + # Conteúdoquote_content if new_post_id: cursor.execute(""" SELECT quote_content FROM post WHERE post_id = ? @@ -807,11 +788,10 @@ def _enrich_action_context( if row and row[0]: action_args['quote_content'] = row[0] - # 关注用户:补充被关注用户的名称 elif action_type == 'FOLLOW': follow_id = action_args.get('follow_id') if follow_id: - # 从 follow 表获取 followee_id + # follow followee_id cursor.execute(""" SELECT followee_id FROM follow WHERE follow_id = ? """, (follow_id,)) @@ -822,16 +802,15 @@ def _enrich_action_context( if target_name: action_args['target_user_name'] = target_name - # 屏蔽用户:补充被屏蔽用户的名称 elif action_type == 'MUTE': - # 从 action_args 中获取 user_id 或 target_id + # action_args user_id target_id target_id = action_args.get('user_id') or action_args.get('target_id') if target_id: target_name = _get_user_name(cursor, target_id, agent_names) if target_name: action_args['target_user_name'] = target_name - # 点赞/踩评论:补充评论内容和作者 + # /Conteúdo elif action_type in ('LIKE_COMMENT', 'DISLIKE_COMMENT'): comment_id = action_args.get('comment_id') if comment_id: @@ -840,7 +819,6 @@ def _enrich_action_context( action_args['comment_content'] = comment_info.get('content', '') action_args['comment_author_name'] = comment_info.get('author_name', '') - # 发表评论:补充所评论的帖子信息 elif action_type == 'CREATE_COMMENT': post_id = action_args.get('post_id') if post_id: @@ -850,8 +828,8 @@ def _enrich_action_context( action_args['post_author_name'] = post_info.get('author_name', '') except Exception as e: - # 补充上下文失败不影响主流程 - print(f"补充动作上下文失败: {e}") + # Falhou + print(f"Falhou: {e}") def _get_post_info( @@ -860,15 +838,14 @@ def _get_post_info( agent_names: Dict[int, str] ) -> Optional[Dict[str, str]]: """ - 获取帖子信息 Args: - cursor: 数据库游标 - post_id: 帖子ID - agent_names: agent_id -> agent_name 映射 + cursor: + post_id: ID + agent_names: agent_id -> agent_name Returns: - 包含 content 和 author_name 的字典,或 None + content author_name None """ try: cursor.execute(""" @@ -883,12 +860,12 @@ def _get_post_info( user_id = row[1] agent_id = row[2] - # 优先使用 agent_names 中的名称 + # agent_names author_name = '' if agent_id is not None and agent_id in agent_names: author_name = agent_names[agent_id] elif user_id: - # 从 user 表获取名称 + # user cursor.execute("SELECT name, user_name FROM user WHERE user_id = ?", (user_id,)) user_row = cursor.fetchone() if user_row: @@ -906,15 +883,14 @@ def _get_user_name( agent_names: Dict[int, str] ) -> Optional[str]: """ - 获取用户名称 Args: - cursor: 数据库游标 - user_id: 用户ID - agent_names: agent_id -> agent_name 映射 + cursor: + user_id: ID + agent_names: agent_id -> agent_name Returns: - 用户名称,或 None + None """ try: cursor.execute(""" @@ -926,7 +902,7 @@ def _get_user_name( name = row[1] user_name = row[2] - # 优先使用 agent_names 中的名称 + # agent_names if agent_id is not None and agent_id in agent_names: return agent_names[agent_id] return name or user_name or '' @@ -941,15 +917,14 @@ def _get_comment_info( agent_names: Dict[int, str] ) -> Optional[Dict[str, str]]: """ - 获取评论信息 Args: - cursor: 数据库游标 - comment_id: 评论ID - agent_names: agent_id -> agent_name 映射 + cursor: + comment_id: ID + agent_names: agent_id -> agent_name Returns: - 包含 content 和 author_name 的字典,或 None + content author_name None """ try: cursor.execute(""" @@ -964,12 +939,12 @@ def _get_comment_info( user_id = row[1] agent_id = row[2] - # 优先使用 agent_names 中的名称 + # agent_names author_name = '' if agent_id is not None and agent_id in agent_names: author_name = agent_names[agent_id] elif user_id: - # 从 user 表获取名称 + # user cursor.execute("SELECT name, user_name FROM user WHERE user_id = ?", (user_id,)) user_row = cursor.fetchone() if user_row: @@ -983,53 +958,53 @@ def _get_comment_info( def create_model(config: Dict[str, Any], use_boost: bool = False): """ - 创建LLM模型 + LLM - 支持双 LLM 配置,用于并行模拟时提速: - - 通用配置:LLM_API_KEY, LLM_BASE_URL, LLM_MODEL_NAME - - 加速配置(可选):LLM_BOOST_API_KEY, LLM_BOOST_BASE_URL, LLM_BOOST_MODEL_NAME + LLM ConfiguraçãoSimulação + - ConfiguraçãoLLM_API_KEY, LLM_BASE_URL, LLM_MODEL_NAME + - ConfiguraçãoLLM_BOOST_API_KEY, LLM_BOOST_BASE_URL, LLM_BOOST_MODEL_NAME - 如果配置了加速 LLM,并行模拟时可以让不同平台使用不同的 API 服务商,提高并发能力。 + Configuração LLMSimulação API Args: - config: 模拟配置字典 - use_boost: 是否使用加速 LLM 配置(如果可用) + config: Configuração da simulação + use_boost: LLM Configuração """ - # 检查是否有加速配置 + # Configuração boost_api_key = os.environ.get("LLM_BOOST_API_KEY", "") boost_base_url = os.environ.get("LLM_BOOST_BASE_URL", "") boost_model = os.environ.get("LLM_BOOST_MODEL_NAME", "") has_boost_config = bool(boost_api_key) - # 根据参数和配置情况选择使用哪个 LLM + # ConfiguraçãoCaso LLM if use_boost and has_boost_config: - # 使用加速配置 + # Configuração llm_api_key = boost_api_key llm_base_url = boost_base_url llm_model = boost_model or os.environ.get("LLM_MODEL_NAME", "") - config_label = "[加速LLM]" + config_label = "[LLM]" else: - # 使用通用配置 + # Configuração llm_api_key = os.environ.get("LLM_API_KEY", "") llm_base_url = os.environ.get("LLM_BASE_URL", "") llm_model = os.environ.get("LLM_MODEL_NAME", "") - config_label = "[通用LLM]" + config_label = "[LLM]" - # 如果 .env 中没有模型名,则使用 config 作为备用 + # .env config if not llm_model: llm_model = config.get("llm_model", "gpt-4o-mini") - # 设置 camel-ai 所需的环境变量 + # camel-ai if llm_api_key: os.environ["OPENAI_API_KEY"] = llm_api_key if not os.environ.get("OPENAI_API_KEY"): - raise ValueError("缺少 API Key 配置,请在项目根目录 .env 文件中设置 LLM_API_KEY") + raise ValueError(" API Key Configuração .env LLM_API_KEY") if llm_base_url: os.environ["OPENAI_API_BASE_URL"] = llm_base_url - print(f"{config_label} model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...") + print(f"{config_label} model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else ''}...") return ModelFactory.create( model_platform=ModelPlatformType.OPENAI, @@ -1043,7 +1018,7 @@ def get_active_agents_for_round( current_hour: int, round_num: int ) -> List: - """根据时间和配置决定本轮激活哪些Agent""" + """ConfiguraçãoAgent""" time_config = config.get("time_config", {}) agent_configs = config.get("agent_configs", []) @@ -1091,7 +1066,7 @@ def get_active_agents_for_round( class PlatformSimulation: - """平台模拟结果容器""" + """SimulaçãoResultado""" def __init__(self): self.env = None self.agent_graph = None @@ -1105,17 +1080,17 @@ async def run_twitter_simulation( main_logger: Optional[SimulationLogManager] = None, max_rounds: Optional[int] = None ) -> PlatformSimulation: - """运行Twitter模拟 + """TwitterSimulação Args: - config: 模拟配置 - simulation_dir: 模拟目录 - action_logger: 动作日志记录器 - main_logger: 主日志管理器 - max_rounds: 最大模拟轮数(可选,用于截断过长的模拟) + config: Configuração da simulação + simulation_dir: Simulação + action_logger: + main_logger: + max_rounds: SimulaçãoSimulação Returns: - PlatformSimulation: 包含env和agent_graph的结果对象 + PlatformSimulation: envagent_graphResultado """ result = PlatformSimulation() @@ -1124,15 +1099,15 @@ def log_info(msg): main_logger.info(f"[Twitter] {msg}") print(f"[Twitter] {msg}") - log_info("初始化...") + log_info("...") - # Twitter 使用通用 LLM 配置 + # Twitter LLM Configuração model = create_model(config, use_boost=False) - # OASIS Twitter使用CSV格式 + # OASIS TwitterCSV profile_path = os.path.join(simulation_dir, "twitter_profiles.csv") if not os.path.exists(profile_path): - log_info(f"错误: Profile文件不存在: {profile_path}") + log_info(f": Profile: {profile_path}") return result result.agent_graph = await generate_twitter_agent_graph( @@ -1141,9 +1116,9 @@ def log_info(msg): available_actions=TWITTER_ACTIONS, ) - # 从配置文件获取 Agent 真实名称映射(使用 entity_name 而非默认的 Agent_X) + # Configuração Agent entity_name Agent_X agent_names = get_agent_names_from_config(config) - # 如果配置中没有某个 agent,则使用 OASIS 的默认名称 + # Configuração agent OASIS for agent_id, agent in result.agent_graph.get_agents(): if agent_id not in agent_names: agent_names[agent_id] = getattr(agent, 'name', f'Agent_{agent_id}') @@ -1156,23 +1131,22 @@ def log_info(msg): agent_graph=result.agent_graph, platform=oasis.DefaultPlatformType.TWITTER, database_path=db_path, - semaphore=30, # 限制最大并发 LLM 请求数,防止 API 过载 + semaphore=30, # LLM API ) await result.env.reset() - log_info("环境已启动") + log_info("") if action_logger: action_logger.log_simulation_start(config) total_actions = 0 - last_rowid = 0 # 跟踪数据库中最后处理的行号(使用 rowid 避免 created_at 格式差异) + last_rowid = 0 # rowid created_at - # 执行初始事件 event_config = config.get("event_config", {}) initial_posts = event_config.get("initial_posts", []) - # 记录 round 0 开始(初始事件阶段) + # round 0 if action_logger: action_logger.log_round_start(0, 0) # round 0, simulated_hour 0 @@ -1204,32 +1178,30 @@ def log_info(msg): if initial_actions: await result.env.step(initial_actions) - log_info(f"已发布 {len(initial_actions)} 条初始帖子") + log_info(f" {len(initial_actions)} ") - # 记录 round 0 结束 + # round 0 if action_logger: action_logger.log_round_end(0, initial_action_count) - # 主模拟循环 + # Simulação time_config = config.get("time_config", {}) total_hours = time_config.get("total_simulation_hours", 72) minutes_per_round = time_config.get("minutes_per_round", 30) total_rounds = (total_hours * 60) // minutes_per_round - # 如果指定了最大轮数,则截断 if max_rounds is not None and max_rounds > 0: original_rounds = total_rounds total_rounds = min(total_rounds, max_rounds) if total_rounds < original_rounds: - log_info(f"轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") + log_info(f": {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") start_time = datetime.now() for round_num in range(total_rounds): - # 检查是否收到退出信号 if _shutdown_event and _shutdown_event.is_set(): if main_logger: - main_logger.info(f"收到退出信号,在第 {round_num + 1} 轮停止模拟") + main_logger.info(f" {round_num + 1} Simulação") break simulated_minutes = round_num * minutes_per_round @@ -1240,12 +1212,12 @@ def log_info(msg): result.env, config, simulated_hour, round_num ) - # 无论是否有活跃agent,都记录round开始 + # agentround if action_logger: action_logger.log_round_start(round_num + 1, simulated_hour) if not active_agents: - # 没有活跃agent时也记录round结束(actions_count=0) + # agentroundactions_count=0 if action_logger: action_logger.log_round_end(round_num + 1, 0) continue @@ -1253,7 +1225,6 @@ def log_info(msg): actions = {agent: LLMAction() for _, agent in active_agents} await result.env.step(actions) - # 从数据库获取实际执行的动作并记录 actual_actions, last_rowid = fetch_new_actions_from_db( db_path, last_rowid, agent_names ) @@ -1278,14 +1249,14 @@ def log_info(msg): progress = (round_num + 1) / total_rounds * 100 log_info(f"Day {simulated_day}, {simulated_hour:02d}:00 - Round {round_num + 1}/{total_rounds} ({progress:.1f}%)") - # 注意:不关闭环境,保留给Interview使用 + # Interview if action_logger: action_logger.log_simulation_end(total_rounds, total_actions) result.total_actions = total_actions elapsed = (datetime.now() - start_time).total_seconds() - log_info(f"模拟循环完成! 耗时: {elapsed:.1f}秒, 总动作: {total_actions}") + log_info(f"Simulação! : {elapsed:.1f}, : {total_actions}") return result @@ -1297,17 +1268,17 @@ async def run_reddit_simulation( main_logger: Optional[SimulationLogManager] = None, max_rounds: Optional[int] = None ) -> PlatformSimulation: - """运行Reddit模拟 + """RedditSimulação Args: - config: 模拟配置 - simulation_dir: 模拟目录 - action_logger: 动作日志记录器 - main_logger: 主日志管理器 - max_rounds: 最大模拟轮数(可选,用于截断过长的模拟) + config: Configuração da simulação + simulation_dir: Simulação + action_logger: + main_logger: + max_rounds: SimulaçãoSimulação Returns: - PlatformSimulation: 包含env和agent_graph的结果对象 + PlatformSimulation: envagent_graphResultado """ result = PlatformSimulation() @@ -1316,14 +1287,14 @@ def log_info(msg): main_logger.info(f"[Reddit] {msg}") print(f"[Reddit] {msg}") - log_info("初始化...") + log_info("...") - # Reddit 使用加速 LLM 配置(如果有的话,否则回退到通用配置) + # Reddit LLM ConfiguraçãoConfiguração model = create_model(config, use_boost=True) profile_path = os.path.join(simulation_dir, "reddit_profiles.json") if not os.path.exists(profile_path): - log_info(f"错误: Profile文件不存在: {profile_path}") + log_info(f": Profile: {profile_path}") return result result.agent_graph = await generate_reddit_agent_graph( @@ -1332,9 +1303,9 @@ def log_info(msg): available_actions=REDDIT_ACTIONS, ) - # 从配置文件获取 Agent 真实名称映射(使用 entity_name 而非默认的 Agent_X) + # Configuração Agent entity_name Agent_X agent_names = get_agent_names_from_config(config) - # 如果配置中没有某个 agent,则使用 OASIS 的默认名称 + # Configuração agent OASIS for agent_id, agent in result.agent_graph.get_agents(): if agent_id not in agent_names: agent_names[agent_id] = getattr(agent, 'name', f'Agent_{agent_id}') @@ -1347,23 +1318,22 @@ def log_info(msg): agent_graph=result.agent_graph, platform=oasis.DefaultPlatformType.REDDIT, database_path=db_path, - semaphore=30, # 限制最大并发 LLM 请求数,防止 API 过载 + semaphore=30, # LLM API ) await result.env.reset() - log_info("环境已启动") + log_info("") if action_logger: action_logger.log_simulation_start(config) total_actions = 0 - last_rowid = 0 # 跟踪数据库中最后处理的行号(使用 rowid 避免 created_at 格式差异) + last_rowid = 0 # rowid created_at - # 执行初始事件 event_config = config.get("event_config", {}) initial_posts = event_config.get("initial_posts", []) - # 记录 round 0 开始(初始事件阶段) + # round 0 if action_logger: action_logger.log_round_start(0, 0) # round 0, simulated_hour 0 @@ -1403,32 +1373,30 @@ def log_info(msg): if initial_actions: await result.env.step(initial_actions) - log_info(f"已发布 {len(initial_actions)} 条初始帖子") + log_info(f" {len(initial_actions)} ") - # 记录 round 0 结束 + # round 0 if action_logger: action_logger.log_round_end(0, initial_action_count) - # 主模拟循环 + # Simulação time_config = config.get("time_config", {}) total_hours = time_config.get("total_simulation_hours", 72) minutes_per_round = time_config.get("minutes_per_round", 30) total_rounds = (total_hours * 60) // minutes_per_round - # 如果指定了最大轮数,则截断 if max_rounds is not None and max_rounds > 0: original_rounds = total_rounds total_rounds = min(total_rounds, max_rounds) if total_rounds < original_rounds: - log_info(f"轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") + log_info(f": {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") start_time = datetime.now() for round_num in range(total_rounds): - # 检查是否收到退出信号 if _shutdown_event and _shutdown_event.is_set(): if main_logger: - main_logger.info(f"收到退出信号,在第 {round_num + 1} 轮停止模拟") + main_logger.info(f" {round_num + 1} Simulação") break simulated_minutes = round_num * minutes_per_round @@ -1439,12 +1407,12 @@ def log_info(msg): result.env, config, simulated_hour, round_num ) - # 无论是否有活跃agent,都记录round开始 + # agentround if action_logger: action_logger.log_round_start(round_num + 1, simulated_hour) if not active_agents: - # 没有活跃agent时也记录round结束(actions_count=0) + # agentroundactions_count=0 if action_logger: action_logger.log_round_end(round_num + 1, 0) continue @@ -1452,7 +1420,6 @@ def log_info(msg): actions = {agent: LLMAction() for _, agent in active_agents} await result.env.step(actions) - # 从数据库获取实际执行的动作并记录 actual_actions, last_rowid = fetch_new_actions_from_db( db_path, last_rowid, agent_names ) @@ -1477,76 +1444,75 @@ def log_info(msg): progress = (round_num + 1) / total_rounds * 100 log_info(f"Day {simulated_day}, {simulated_hour:02d}:00 - Round {round_num + 1}/{total_rounds} ({progress:.1f}%)") - # 注意:不关闭环境,保留给Interview使用 + # Interview if action_logger: action_logger.log_simulation_end(total_rounds, total_actions) result.total_actions = total_actions elapsed = (datetime.now() - start_time).total_seconds() - log_info(f"模拟循环完成! 耗时: {elapsed:.1f}秒, 总动作: {total_actions}") + log_info(f"Simulação! : {elapsed:.1f}, : {total_actions}") return result async def main(): - parser = argparse.ArgumentParser(description='OASIS双平台并行模拟') + parser = argparse.ArgumentParser(description='OASISSimulação') parser.add_argument( '--config', type=str, required=True, - help='配置文件路径 (simulation_config.json)' + help='Configuração (simulation_config.json)' ) parser.add_argument( '--twitter-only', action='store_true', - help='只运行Twitter模拟' + help='TwitterSimulação' ) parser.add_argument( '--reddit-only', action='store_true', - help='只运行Reddit模拟' + help='RedditSimulação' ) parser.add_argument( '--max-rounds', type=int, default=None, - help='最大模拟轮数(可选,用于截断过长的模拟)' + help='SimulaçãoSimulação' ) parser.add_argument( '--no-wait', action='store_true', default=False, - help='模拟完成后立即关闭环境,不进入等待命令模式' + help='Simulação' ) args = parser.parse_args() - # 在 main 函数开始时创建 shutdown 事件,确保整个程序都能响应退出信号 + # main shutdown global _shutdown_event _shutdown_event = asyncio.Event() if not os.path.exists(args.config): - print(f"错误: 配置文件不存在: {args.config}") + print(f": Configuração: {args.config}") sys.exit(1) config = load_config(args.config) simulation_dir = os.path.dirname(args.config) or "." wait_for_commands = not args.no_wait - # 初始化日志配置(禁用 OASIS 日志,清理旧文件) + # Configuração OASIS init_logging_for_simulation(simulation_dir) - # 创建日志管理器 log_manager = SimulationLogManager(simulation_dir) twitter_logger = log_manager.get_twitter_logger() reddit_logger = log_manager.get_reddit_logger() log_manager.info("=" * 60) - log_manager.info("OASIS 双平台并行模拟") - log_manager.info(f"配置文件: {args.config}") - log_manager.info(f"模拟ID: {config.get('simulation_id', 'unknown')}") - log_manager.info(f"等待命令模式: {'启用' if wait_for_commands else '禁用'}") + log_manager.info("OASIS Simulação") + log_manager.info(f"Configuração: {args.config}") + log_manager.info(f"ID da simulação: {config.get('simulation_id', 'unknown')}") + log_manager.info(f": {'' if wait_for_commands else ''}") log_manager.info("=" * 60) time_config = config.get("time_config", {}) @@ -1554,25 +1520,25 @@ async def main(): minutes_per_round = time_config.get('minutes_per_round', 30) config_total_rounds = (total_hours * 60) // minutes_per_round - log_manager.info(f"模拟参数:") - log_manager.info(f" - 总模拟时长: {total_hours}小时") - log_manager.info(f" - 每轮时间: {minutes_per_round}分钟") - log_manager.info(f" - 配置总轮数: {config_total_rounds}") + log_manager.info(f"Simulação:") + log_manager.info(f" - Simulação: {total_hours}") + log_manager.info(f" - : {minutes_per_round}") + log_manager.info(f" - Configuração: {config_total_rounds}") if args.max_rounds: - log_manager.info(f" - 最大轮数限制: {args.max_rounds}") + log_manager.info(f" - : {args.max_rounds}") if args.max_rounds < config_total_rounds: - log_manager.info(f" - 实际执行轮数: {args.max_rounds} (已截断)") - log_manager.info(f" - Agent数量: {len(config.get('agent_configs', []))}") + log_manager.info(f" - : {args.max_rounds} ()") + log_manager.info(f" - Agent: {len(config.get('agent_configs', []))}") - log_manager.info("日志结构:") - log_manager.info(f" - 主日志: simulation.log") - log_manager.info(f" - Twitter动作: twitter/actions.jsonl") - log_manager.info(f" - Reddit动作: reddit/actions.jsonl") + log_manager.info(":") + log_manager.info(f" - : simulation.log") + log_manager.info(f" - Twitter: twitter/actions.jsonl") + log_manager.info(f" - Reddit: reddit/actions.jsonl") log_manager.info("=" * 60) start_time = datetime.now() - # 存储两个平台的模拟结果 + # SimulaçãoResultado twitter_result: Optional[PlatformSimulation] = None reddit_result: Optional[PlatformSimulation] = None @@ -1581,7 +1547,6 @@ async def main(): elif args.reddit_only: reddit_result = await run_reddit_simulation(config, simulation_dir, reddit_logger, log_manager, args.max_rounds) else: - # 并行运行(每个平台使用独立的日志记录器) results = await asyncio.gather( run_twitter_simulation(config, simulation_dir, twitter_logger, log_manager, args.max_rounds), run_reddit_simulation(config, simulation_dir, reddit_logger, log_manager, args.max_rounds), @@ -1590,17 +1555,15 @@ async def main(): total_elapsed = (datetime.now() - start_time).total_seconds() log_manager.info("=" * 60) - log_manager.info(f"模拟循环完成! 总耗时: {total_elapsed:.1f}秒") + log_manager.info(f"Simulação! : {total_elapsed:.1f}") - # 是否进入等待命令模式 if wait_for_commands: log_manager.info("") log_manager.info("=" * 60) - log_manager.info("进入等待命令模式 - 环境保持运行") - log_manager.info("支持的命令: interview, batch_interview, close_env") + log_manager.info(" - ") + log_manager.info(": interview, batch_interview, close_env") log_manager.info("=" * 60) - # 创建IPC处理器 ipc_handler = ParallelIPCHandler( simulation_dir=simulation_dir, twitter_env=twitter_result.env if twitter_result else None, @@ -1610,40 +1573,39 @@ async def main(): ) ipc_handler.update_status("alive") - # 等待命令循环(使用全局 _shutdown_event) + # _shutdown_event try: while not _shutdown_event.is_set(): should_continue = await ipc_handler.process_commands() if not should_continue: break - # 使用 wait_for 替代 sleep,这样可以响应 shutdown_event + # wait_for sleep shutdown_event try: await asyncio.wait_for(_shutdown_event.wait(), timeout=0.5) - break # 收到退出信号 + break # except asyncio.TimeoutError: - pass # 超时继续循环 + pass # except KeyboardInterrupt: - print("\n收到中断信号") + print("\n") except asyncio.CancelledError: - print("\n任务被取消") + print("\n") except Exception as e: - print(f"\n命令处理出错: {e}") + print(f"\n: {e}") - log_manager.info("\n关闭环境...") + log_manager.info("\n...") ipc_handler.update_status("stopped") - # 关闭环境 if twitter_result and twitter_result.env: await twitter_result.env.close() - log_manager.info("[Twitter] 环境已关闭") + log_manager.info("[Twitter] ") if reddit_result and reddit_result.env: await reddit_result.env.close() - log_manager.info("[Reddit] 环境已关闭") + log_manager.info("[Reddit] ") log_manager.info("=" * 60) - log_manager.info(f"全部完成!") - log_manager.info(f"日志文件:") + log_manager.info(f"!") + log_manager.info(f":") log_manager.info(f" - {os.path.join(simulation_dir, 'simulation.log')}") log_manager.info(f" - {os.path.join(simulation_dir, 'twitter', 'actions.jsonl')}") log_manager.info(f" - {os.path.join(simulation_dir, 'reddit', 'actions.jsonl')}") @@ -1652,29 +1614,27 @@ async def main(): def setup_signal_handlers(loop=None): """ - 设置信号处理器,确保收到 SIGTERM/SIGINT 时能够正确退出 + SIGTERM/SIGINT - 持久化模拟场景:模拟完成后不退出,等待 interview 命令 - 当收到终止信号时,需要: - 1. 通知 asyncio 循环退出等待 - 2. 让程序有机会正常清理资源(关闭数据库、环境等) - 3. 然后才退出 + SimulaçãoSimulação interview + 1. asyncio + 2. + 3. """ def signal_handler(signum, frame): global _cleanup_done sig_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT" - print(f"\n收到 {sig_name} 信号,正在退出...") + print(f"\n {sig_name} ...") if not _cleanup_done: _cleanup_done = True - # 设置事件通知 asyncio 循环退出(让循环有机会清理资源) + # asyncio if _shutdown_event: _shutdown_event.set() - # 不要直接 sys.exit(),让 asyncio 循环正常退出并清理资源 - # 如果是重复收到信号,才强制退出 + # sys.exit() asyncio else: - print("强制退出...") + print("...") sys.exit(1) signal.signal(signal.SIGTERM, signal_handler) @@ -1686,14 +1646,14 @@ def signal_handler(signum, frame): try: asyncio.run(main()) except KeyboardInterrupt: - print("\n程序被中断") + print("\n") except SystemExit: pass finally: - # 清理 multiprocessing 资源跟踪器(防止退出时的警告) + # multiprocessing try: from multiprocessing import resource_tracker resource_tracker._resource_tracker._stop() except Exception: pass - print("模拟进程已退出") + print("Simulação") diff --git a/backend/scripts/run_reddit_simulation.py b/backend/scripts/run_reddit_simulation.py index 14907cbda5..8c57b2df13 100644 --- a/backend/scripts/run_reddit_simulation.py +++ b/backend/scripts/run_reddit_simulation.py @@ -1,16 +1,16 @@ """ -OASIS Reddit模拟预设脚本 -此脚本读取配置文件中的参数来执行模拟,实现全程自动化 +OASIS RedditSimulação +ConfiguraçãoSimulação -功能特性: -- 完成模拟后不立即关闭环境,进入等待命令模式 -- 支持通过IPC接收Interview命令 -- 支持单个Agent采访和批量采访 -- 支持远程关闭环境命令 +: +- Simulação +- IPCInterview +- Agent +- -使用方式: +: python run_reddit_simulation.py --config /path/to/simulation_config.json - python run_reddit_simulation.py --config /path/to/simulation_config.json --no-wait # 完成后立即关闭 + python run_reddit_simulation.py --config /path/to/simulation_config.json --no-wait # """ import argparse @@ -25,18 +25,16 @@ from datetime import datetime from typing import Dict, Any, List, Optional -# 全局变量:用于信号处理 _shutdown_event = None _cleanup_done = False -# 添加项目路径 _scripts_dir = os.path.dirname(os.path.abspath(__file__)) _backend_dir = os.path.abspath(os.path.join(_scripts_dir, '..')) _project_root = os.path.abspath(os.path.join(_backend_dir, '..')) sys.path.insert(0, _scripts_dir) sys.path.insert(0, _backend_dir) -# 加载项目根目录的 .env 文件(包含 LLM_API_KEY 等配置) +# .env LLM_API_KEY Configuração from dotenv import load_dotenv _env_file = os.path.join(_project_root, '.env') if os.path.exists(_env_file): @@ -51,7 +49,7 @@ class UnicodeFormatter(logging.Formatter): - """自定义格式化器,将 Unicode 转义序列转换为可读字符""" + """ Unicode """ UNICODE_ESCAPE_PATTERN = re.compile(r'\\u([0-9a-fA-F]{4})') @@ -68,24 +66,23 @@ def replace_unicode(match): class MaxTokensWarningFilter(logging.Filter): - """过滤掉 camel-ai 关于 max_tokens 的警告(我们故意不设置 max_tokens,让模型自行决定)""" + """ camel-ai max_tokens max_tokens""" def filter(self, record): - # 过滤掉包含 max_tokens 警告的日志 + # max_tokens if "max_tokens" in record.getMessage() and "Invalid or missing" in record.getMessage(): return False return True -# 在模块加载时立即添加过滤器,确保在 camel 代码执行前生效 +# camel logging.getLogger().addFilter(MaxTokensWarningFilter()) def setup_oasis_logging(log_dir: str): - """配置 OASIS 的日志,使用固定名称的日志文件""" + """Configuração OASIS """ os.makedirs(log_dir, exist_ok=True) - # 清理旧的日志文件 for f in os.listdir(log_dir): old_log = os.path.join(log_dir, f) if os.path.isfile(old_log) and f.endswith('.log'): @@ -126,25 +123,24 @@ def setup_oasis_logging(log_dir: str): generate_reddit_agent_graph ) except ImportError as e: - print(f"错误: 缺少依赖 {e}") - print("请先安装: pip install oasis-ai camel-ai") + print(f": {e}") + print(": pip install oasis-ai camel-ai") sys.exit(1) -# IPC相关常量 IPC_COMMANDS_DIR = "ipc_commands" IPC_RESPONSES_DIR = "ipc_responses" ENV_STATUS_FILE = "env_status.json" class CommandType: - """命令类型常量""" + """""" INTERVIEW = "interview" BATCH_INTERVIEW = "batch_interview" CLOSE_ENV = "close_env" class IPCHandler: - """IPC命令处理器""" + """IPC""" def __init__(self, simulation_dir: str, env, agent_graph): self.simulation_dir = simulation_dir @@ -155,12 +151,11 @@ def __init__(self, simulation_dir: str, env, agent_graph): self.status_file = os.path.join(simulation_dir, ENV_STATUS_FILE) self._running = True - # 确保目录存在 os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) def update_status(self, status: str): - """更新环境状态""" + """""" with open(self.status_file, 'w', encoding='utf-8') as f: json.dump({ "status": status, @@ -168,11 +163,10 @@ def update_status(self, status: str): }, f, ensure_ascii=False, indent=2) def poll_command(self) -> Optional[Dict[str, Any]]: - """轮询获取待处理命令""" + """""" if not os.path.exists(self.commands_dir): return None - # 获取命令文件(按时间排序) command_files = [] for filename in os.listdir(self.commands_dir): if filename.endswith('.json'): @@ -191,7 +185,7 @@ def poll_command(self) -> Optional[Dict[str, Any]]: return None def send_response(self, command_id: str, status: str, result: Dict = None, error: str = None): - """发送响应""" + """""" response = { "command_id": command_id, "status": status, @@ -204,7 +198,6 @@ def send_response(self, command_id: str, status: str, result: Dict = None, error with open(response_file, 'w', encoding='utf-8') as f: json.dump(response, f, ensure_ascii=False, indent=2) - # 删除命令文件 command_file = os.path.join(self.commands_dir, f"{command_id}.json") try: os.remove(command_file) @@ -213,49 +206,47 @@ def send_response(self, command_id: str, status: str, result: Dict = None, error async def handle_interview(self, command_id: str, agent_id: int, prompt: str) -> bool: """ - 处理单个Agent采访命令 + Agent Returns: - True 表示成功,False 表示失败 + True False Falhou """ try: - # 获取Agent + # Agent agent = self.agent_graph.get_agent(agent_id) - # 创建Interview动作 + # Interview interview_action = ManualAction( action_type=ActionType.INTERVIEW, action_args={"prompt": prompt} ) - # 执行Interview + # Interview actions = {agent: interview_action} await self.env.step(actions) - # 从数据库获取结果 + # Resultado result = self._get_interview_result(agent_id) self.send_response(command_id, "completed", result=result) - print(f" Interview完成: agent_id={agent_id}") + print(f" Interview: agent_id={agent_id}") return True except Exception as e: error_msg = str(e) - print(f" Interview失败: agent_id={agent_id}, error={error_msg}") + print(f" InterviewFalhou: agent_id={agent_id}, error={error_msg}") self.send_response(command_id, "failed", error=error_msg) return False async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) -> bool: """ - 处理批量采访命令 Args: interviews: [{"agent_id": int, "prompt": str}, ...] """ try: - # 构建动作字典 actions = {} - agent_prompts = {} # 记录每个agent的prompt + agent_prompts = {} # agentprompt for interview in interviews: agent_id = interview.get("agent_id") @@ -269,16 +260,16 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) ) agent_prompts[agent_id] = prompt except Exception as e: - print(f" 警告: 无法获取Agent {agent_id}: {e}") + print(f" : Agent {agent_id}: {e}") if not actions: - self.send_response(command_id, "failed", error="没有有效的Agent") + self.send_response(command_id, "failed", error="Agent") return False - # 执行批量Interview + # Interview await self.env.step(actions) - # 获取所有结果 + # Resultado results = {} for agent_id in agent_prompts.keys(): result = self._get_interview_result(agent_id) @@ -288,17 +279,17 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) "interviews_count": len(results), "results": results }) - print(f" 批量Interview完成: {len(results)} 个Agent") + print(f" Interview: {len(results)} Agent") return True except Exception as e: error_msg = str(e) - print(f" 批量Interview失败: {error_msg}") + print(f" InterviewFalhou: {error_msg}") self.send_response(command_id, "failed", error=error_msg) return False def _get_interview_result(self, agent_id: int) -> Dict[str, Any]: - """从数据库获取最新的Interview结果""" + """InterviewResultado""" db_path = os.path.join(self.simulation_dir, "reddit_simulation.db") result = { @@ -314,7 +305,7 @@ def _get_interview_result(self, agent_id: int) -> Dict[str, Any]: conn = sqlite3.connect(db_path) cursor = conn.cursor() - # 查询最新的Interview记录 + # Interview cursor.execute(""" SELECT user_id, info, created_at FROM trace @@ -336,16 +327,15 @@ def _get_interview_result(self, agent_id: int) -> Dict[str, Any]: conn.close() except Exception as e: - print(f" 读取Interview结果失败: {e}") + print(f" InterviewResultadoFalhou: {e}") return result async def process_commands(self) -> bool: """ - 处理所有待处理命令 Returns: - True 表示继续运行,False 表示应该退出 + True False """ command = self.poll_command() if not command: @@ -355,7 +345,7 @@ async def process_commands(self) -> bool: command_type = command.get("command_type") args = command.get("args", {}) - print(f"\n收到IPC命令: {command_type}, id={command_id}") + print(f"\nIPC: {command_type}, id={command_id}") if command_type == CommandType.INTERVIEW: await self.handle_interview( @@ -373,19 +363,19 @@ async def process_commands(self) -> bool: return True elif command_type == CommandType.CLOSE_ENV: - print("收到关闭环境命令") - self.send_response(command_id, "completed", result={"message": "环境即将关闭"}) + print("") + self.send_response(command_id, "completed", result={"message": ""}) return False else: - self.send_response(command_id, "failed", error=f"未知命令类型: {command_type}") + self.send_response(command_id, "failed", error=f": {command_type}") return True class RedditSimulationRunner: - """Reddit模拟运行器""" + """RedditSimulação""" - # Reddit可用动作(不包含INTERVIEW,INTERVIEW只能通过ManualAction手动触发) + # RedditINTERVIEWINTERVIEWManualAction AVAILABLE_ACTIONS = [ ActionType.LIKE_POST, ActionType.DISLIKE_POST, @@ -404,11 +394,11 @@ class RedditSimulationRunner: def __init__(self, config_path: str, wait_for_commands: bool = True): """ - 初始化模拟运行器 + Simulação Args: - config_path: 配置文件路径 (simulation_config.json) - wait_for_commands: 模拟完成后是否等待命令(默认True) + config_path: Configuração (simulation_config.json) + wait_for_commands: SimulaçãoTrue """ self.config_path = config_path self.config = self._load_config() @@ -419,47 +409,47 @@ def __init__(self, config_path: str, wait_for_commands: bool = True): self.ipc_handler = None def _load_config(self) -> Dict[str, Any]: - """加载配置文件""" + """Configuração""" with open(self.config_path, 'r', encoding='utf-8') as f: return json.load(f) def _get_profile_path(self) -> str: - """获取Profile文件路径""" + """Profile""" return os.path.join(self.simulation_dir, "reddit_profiles.json") def _get_db_path(self) -> str: - """获取数据库路径""" + """""" return os.path.join(self.simulation_dir, "reddit_simulation.db") def _create_model(self): """ - 创建LLM模型 + LLM - 统一使用项目根目录 .env 文件中的配置(优先级最高): - - LLM_API_KEY: API密钥 - - LLM_BASE_URL: API基础URL - - LLM_MODEL_NAME: 模型名称 + .env Configuração + - LLM_API_KEY: API + - LLM_BASE_URL: APIURL + - LLM_MODEL_NAME: """ - # 优先从 .env 读取配置 + # .env Configuração llm_api_key = os.environ.get("LLM_API_KEY", "") llm_base_url = os.environ.get("LLM_BASE_URL", "") llm_model = os.environ.get("LLM_MODEL_NAME", "") - # 如果 .env 中没有,则使用 config 作为备用 + # .env config if not llm_model: llm_model = self.config.get("llm_model", "gpt-4o-mini") - # 设置 camel-ai 所需的环境变量 + # camel-ai if llm_api_key: os.environ["OPENAI_API_KEY"] = llm_api_key if not os.environ.get("OPENAI_API_KEY"): - raise ValueError("缺少 API Key 配置,请在项目根目录 .env 文件中设置 LLM_API_KEY") + raise ValueError(" API Key Configuração .env LLM_API_KEY") if llm_base_url: os.environ["OPENAI_API_BASE_URL"] = llm_base_url - print(f"LLM配置: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...") + print(f"LLMConfiguração: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else ''}...") return ModelFactory.create( model_platform=ModelPlatformType.OPENAI, @@ -473,7 +463,7 @@ def _get_active_agents_for_round( round_num: int ) -> List: """ - 根据时间和配置决定本轮激活哪些Agent + ConfiguraçãoAgent """ time_config = self.config.get("time_config", {}) agent_configs = self.config.get("agent_configs", []) @@ -521,16 +511,16 @@ def _get_active_agents_for_round( return active_agents async def run(self, max_rounds: int = None): - """运行Reddit模拟 + """RedditSimulação Args: - max_rounds: 最大模拟轮数(可选,用于截断过长的模拟) + max_rounds: SimulaçãoSimulação """ print("=" * 60) - print("OASIS Reddit模拟") - print(f"配置文件: {self.config_path}") - print(f"模拟ID: {self.config.get('simulation_id', 'unknown')}") - print(f"等待命令模式: {'启用' if self.wait_for_commands else '禁用'}") + print("OASIS RedditSimulação") + print(f"Configuração: {self.config_path}") + print(f"ID da simulação: {self.config.get('simulation_id', 'unknown')}") + print(f": {'' if self.wait_for_commands else ''}") print("=" * 60) time_config = self.config.get("time_config", {}) @@ -538,28 +528,27 @@ async def run(self, max_rounds: int = None): minutes_per_round = time_config.get("minutes_per_round", 30) total_rounds = (total_hours * 60) // minutes_per_round - # 如果指定了最大轮数,则截断 if max_rounds is not None and max_rounds > 0: original_rounds = total_rounds total_rounds = min(total_rounds, max_rounds) if total_rounds < original_rounds: - print(f"\n轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") + print(f"\n: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") - print(f"\n模拟参数:") - print(f" - 总模拟时长: {total_hours}小时") - print(f" - 每轮时间: {minutes_per_round}分钟") - print(f" - 总轮数: {total_rounds}") + print(f"\nSimulação:") + print(f" - Simulação: {total_hours}") + print(f" - : {minutes_per_round}") + print(f" - : {total_rounds}") if max_rounds: - print(f" - 最大轮数限制: {max_rounds}") - print(f" - Agent数量: {len(self.config.get('agent_configs', []))}") + print(f" - : {max_rounds}") + print(f" - Agent: {len(self.config.get('agent_configs', []))}") - print("\n初始化LLM模型...") + print("\nLLM...") model = self._create_model() - print("加载Agent Profile...") + print("Agent Profile...") profile_path = self._get_profile_path() if not os.path.exists(profile_path): - print(f"错误: Profile文件不存在: {profile_path}") + print(f": Profile: {profile_path}") return self.agent_graph = await generate_reddit_agent_graph( @@ -571,29 +560,27 @@ async def run(self, max_rounds: int = None): db_path = self._get_db_path() if os.path.exists(db_path): os.remove(db_path) - print(f"已删除旧数据库: {db_path}") + print(f": {db_path}") - print("创建OASIS环境...") + print("OASIS...") self.env = oasis.make( agent_graph=self.agent_graph, platform=oasis.DefaultPlatformType.REDDIT, database_path=db_path, - semaphore=30, # 限制最大并发 LLM 请求数,防止 API 过载 + semaphore=30, # LLM API ) await self.env.reset() - print("环境初始化完成\n") + print("\n") - # 初始化IPC处理器 self.ipc_handler = IPCHandler(self.simulation_dir, self.env, self.agent_graph) self.ipc_handler.update_status("running") - # 执行初始事件 event_config = self.config.get("event_config", {}) initial_posts = event_config.get("initial_posts", []) if initial_posts: - print(f"执行初始事件 ({len(initial_posts)}条初始帖子)...") + print(f" ({len(initial_posts)})...") initial_actions = {} for post in initial_posts: agent_id = post.get("poster_agent_id", 0) @@ -613,14 +600,14 @@ async def run(self, max_rounds: int = None): action_args={"content": content} ) except Exception as e: - print(f" 警告: 无法为Agent {agent_id}创建初始帖子: {e}") + print(f" : Agent {agent_id}: {e}") if initial_actions: await self.env.step(initial_actions) - print(f" 已发布 {len(initial_actions)} 条初始帖子") + print(f" {len(initial_actions)} ") - # 主模拟循环 - print("\n开始模拟循环...") + # Simulação + print("\nSimulação...") start_time = datetime.now() for round_num in range(total_rounds): @@ -651,20 +638,19 @@ async def run(self, max_rounds: int = None): f"- elapsed: {elapsed:.1f}s") total_elapsed = (datetime.now() - start_time).total_seconds() - print(f"\n模拟循环完成!") - print(f" - 总耗时: {total_elapsed:.1f}秒") - print(f" - 数据库: {db_path}") + print(f"\nSimulação!") + print(f" - : {total_elapsed:.1f}") + print(f" - : {db_path}") - # 是否进入等待命令模式 if self.wait_for_commands: print("\n" + "=" * 60) - print("进入等待命令模式 - 环境保持运行") - print("支持的命令: interview, batch_interview, close_env") + print(" - ") + print(": interview, batch_interview, close_env") print("=" * 60) self.ipc_handler.update_status("alive") - # 等待命令循环(使用全局 _shutdown_event) + # _shutdown_event try: while not _shutdown_event.is_set(): should_continue = await self.ipc_handler.process_commands() @@ -672,58 +658,57 @@ async def run(self, max_rounds: int = None): break try: await asyncio.wait_for(_shutdown_event.wait(), timeout=0.5) - break # 收到退出信号 + break # except asyncio.TimeoutError: pass except KeyboardInterrupt: - print("\n收到中断信号") + print("\n") except asyncio.CancelledError: - print("\n任务被取消") + print("\n") except Exception as e: - print(f"\n命令处理出错: {e}") + print(f"\n: {e}") - print("\n关闭环境...") + print("\n...") - # 关闭环境 self.ipc_handler.update_status("stopped") await self.env.close() - print("环境已关闭") + print("") print("=" * 60) async def main(): - parser = argparse.ArgumentParser(description='OASIS Reddit模拟') + parser = argparse.ArgumentParser(description='OASIS RedditSimulação') parser.add_argument( '--config', type=str, required=True, - help='配置文件路径 (simulation_config.json)' + help='Configuração (simulation_config.json)' ) parser.add_argument( '--max-rounds', type=int, default=None, - help='最大模拟轮数(可选,用于截断过长的模拟)' + help='SimulaçãoSimulação' ) parser.add_argument( '--no-wait', action='store_true', default=False, - help='模拟完成后立即关闭环境,不进入等待命令模式' + help='Simulação' ) args = parser.parse_args() - # 在 main 函数开始时创建 shutdown 事件 + # main shutdown global _shutdown_event _shutdown_event = asyncio.Event() if not os.path.exists(args.config): - print(f"错误: 配置文件不存在: {args.config}") + print(f": Configuração: {args.config}") sys.exit(1) - # 初始化日志配置(使用固定文件名,清理旧日志) + # Configuração simulation_dir = os.path.dirname(args.config) or "." setup_oasis_logging(os.path.join(simulation_dir, "log")) @@ -736,20 +721,18 @@ async def main(): def setup_signal_handlers(): """ - 设置信号处理器,确保收到 SIGTERM/SIGINT 时能够正确退出 - 让程序有机会正常清理资源(关闭数据库、环境等) + SIGTERM/SIGINT """ def signal_handler(signum, frame): global _cleanup_done sig_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT" - print(f"\n收到 {sig_name} 信号,正在退出...") + print(f"\n {sig_name} ...") if not _cleanup_done: _cleanup_done = True if _shutdown_event: _shutdown_event.set() else: - # 重复收到信号才强制退出 - print("强制退出...") + print("...") sys.exit(1) signal.signal(signal.SIGTERM, signal_handler) @@ -761,9 +744,9 @@ def signal_handler(signum, frame): try: asyncio.run(main()) except KeyboardInterrupt: - print("\n程序被中断") + print("\n") except SystemExit: pass finally: - print("模拟进程已退出") + print("Simulação") diff --git a/backend/scripts/run_twitter_simulation.py b/backend/scripts/run_twitter_simulation.py index caab9e9d35..431a19a71d 100644 --- a/backend/scripts/run_twitter_simulation.py +++ b/backend/scripts/run_twitter_simulation.py @@ -1,16 +1,16 @@ """ -OASIS Twitter模拟预设脚本 -此脚本读取配置文件中的参数来执行模拟,实现全程自动化 +OASIS TwitterSimulação +ConfiguraçãoSimulação -功能特性: -- 完成模拟后不立即关闭环境,进入等待命令模式 -- 支持通过IPC接收Interview命令 -- 支持单个Agent采访和批量采访 -- 支持远程关闭环境命令 +: +- Simulação +- IPCInterview +- Agent +- -使用方式: +: python run_twitter_simulation.py --config /path/to/simulation_config.json - python run_twitter_simulation.py --config /path/to/simulation_config.json --no-wait # 完成后立即关闭 + python run_twitter_simulation.py --config /path/to/simulation_config.json --no-wait # """ import argparse @@ -25,18 +25,16 @@ from datetime import datetime from typing import Dict, Any, List, Optional -# 全局变量:用于信号处理 _shutdown_event = None _cleanup_done = False -# 添加项目路径 _scripts_dir = os.path.dirname(os.path.abspath(__file__)) _backend_dir = os.path.abspath(os.path.join(_scripts_dir, '..')) _project_root = os.path.abspath(os.path.join(_backend_dir, '..')) sys.path.insert(0, _scripts_dir) sys.path.insert(0, _backend_dir) -# 加载项目根目录的 .env 文件(包含 LLM_API_KEY 等配置) +# .env LLM_API_KEY Configuração from dotenv import load_dotenv _env_file = os.path.join(_project_root, '.env') if os.path.exists(_env_file): @@ -51,7 +49,7 @@ class UnicodeFormatter(logging.Formatter): - """自定义格式化器,将 Unicode 转义序列转换为可读字符""" + """ Unicode """ UNICODE_ESCAPE_PATTERN = re.compile(r'\\u([0-9a-fA-F]{4})') @@ -68,24 +66,23 @@ def replace_unicode(match): class MaxTokensWarningFilter(logging.Filter): - """过滤掉 camel-ai 关于 max_tokens 的警告(我们故意不设置 max_tokens,让模型自行决定)""" + """ camel-ai max_tokens max_tokens""" def filter(self, record): - # 过滤掉包含 max_tokens 警告的日志 + # max_tokens if "max_tokens" in record.getMessage() and "Invalid or missing" in record.getMessage(): return False return True -# 在模块加载时立即添加过滤器,确保在 camel 代码执行前生效 +# camel logging.getLogger().addFilter(MaxTokensWarningFilter()) def setup_oasis_logging(log_dir: str): - """配置 OASIS 的日志,使用固定名称的日志文件""" + """Configuração OASIS """ os.makedirs(log_dir, exist_ok=True) - # 清理旧的日志文件 for f in os.listdir(log_dir): old_log = os.path.join(log_dir, f) if os.path.isfile(old_log) and f.endswith('.log'): @@ -126,25 +123,24 @@ def setup_oasis_logging(log_dir: str): generate_twitter_agent_graph ) except ImportError as e: - print(f"错误: 缺少依赖 {e}") - print("请先安装: pip install oasis-ai camel-ai") + print(f": {e}") + print(": pip install oasis-ai camel-ai") sys.exit(1) -# IPC相关常量 IPC_COMMANDS_DIR = "ipc_commands" IPC_RESPONSES_DIR = "ipc_responses" ENV_STATUS_FILE = "env_status.json" class CommandType: - """命令类型常量""" + """""" INTERVIEW = "interview" BATCH_INTERVIEW = "batch_interview" CLOSE_ENV = "close_env" class IPCHandler: - """IPC命令处理器""" + """IPC""" def __init__(self, simulation_dir: str, env, agent_graph): self.simulation_dir = simulation_dir @@ -155,12 +151,11 @@ def __init__(self, simulation_dir: str, env, agent_graph): self.status_file = os.path.join(simulation_dir, ENV_STATUS_FILE) self._running = True - # 确保目录存在 os.makedirs(self.commands_dir, exist_ok=True) os.makedirs(self.responses_dir, exist_ok=True) def update_status(self, status: str): - """更新环境状态""" + """""" with open(self.status_file, 'w', encoding='utf-8') as f: json.dump({ "status": status, @@ -168,11 +163,10 @@ def update_status(self, status: str): }, f, ensure_ascii=False, indent=2) def poll_command(self) -> Optional[Dict[str, Any]]: - """轮询获取待处理命令""" + """""" if not os.path.exists(self.commands_dir): return None - # 获取命令文件(按时间排序) command_files = [] for filename in os.listdir(self.commands_dir): if filename.endswith('.json'): @@ -191,7 +185,7 @@ def poll_command(self) -> Optional[Dict[str, Any]]: return None def send_response(self, command_id: str, status: str, result: Dict = None, error: str = None): - """发送响应""" + """""" response = { "command_id": command_id, "status": status, @@ -204,7 +198,6 @@ def send_response(self, command_id: str, status: str, result: Dict = None, error with open(response_file, 'w', encoding='utf-8') as f: json.dump(response, f, ensure_ascii=False, indent=2) - # 删除命令文件 command_file = os.path.join(self.commands_dir, f"{command_id}.json") try: os.remove(command_file) @@ -213,49 +206,47 @@ def send_response(self, command_id: str, status: str, result: Dict = None, error async def handle_interview(self, command_id: str, agent_id: int, prompt: str) -> bool: """ - 处理单个Agent采访命令 + Agent Returns: - True 表示成功,False 表示失败 + True False Falhou """ try: - # 获取Agent + # Agent agent = self.agent_graph.get_agent(agent_id) - # 创建Interview动作 + # Interview interview_action = ManualAction( action_type=ActionType.INTERVIEW, action_args={"prompt": prompt} ) - # 执行Interview + # Interview actions = {agent: interview_action} await self.env.step(actions) - # 从数据库获取结果 + # Resultado result = self._get_interview_result(agent_id) self.send_response(command_id, "completed", result=result) - print(f" Interview完成: agent_id={agent_id}") + print(f" Interview: agent_id={agent_id}") return True except Exception as e: error_msg = str(e) - print(f" Interview失败: agent_id={agent_id}, error={error_msg}") + print(f" InterviewFalhou: agent_id={agent_id}, error={error_msg}") self.send_response(command_id, "failed", error=error_msg) return False async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) -> bool: """ - 处理批量采访命令 Args: interviews: [{"agent_id": int, "prompt": str}, ...] """ try: - # 构建动作字典 actions = {} - agent_prompts = {} # 记录每个agent的prompt + agent_prompts = {} # agentprompt for interview in interviews: agent_id = interview.get("agent_id") @@ -269,16 +260,16 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) ) agent_prompts[agent_id] = prompt except Exception as e: - print(f" 警告: 无法获取Agent {agent_id}: {e}") + print(f" : Agent {agent_id}: {e}") if not actions: - self.send_response(command_id, "failed", error="没有有效的Agent") + self.send_response(command_id, "failed", error="Agent") return False - # 执行批量Interview + # Interview await self.env.step(actions) - # 获取所有结果 + # Resultado results = {} for agent_id in agent_prompts.keys(): result = self._get_interview_result(agent_id) @@ -288,17 +279,17 @@ async def handle_batch_interview(self, command_id: str, interviews: List[Dict]) "interviews_count": len(results), "results": results }) - print(f" 批量Interview完成: {len(results)} 个Agent") + print(f" Interview: {len(results)} Agent") return True except Exception as e: error_msg = str(e) - print(f" 批量Interview失败: {error_msg}") + print(f" InterviewFalhou: {error_msg}") self.send_response(command_id, "failed", error=error_msg) return False def _get_interview_result(self, agent_id: int) -> Dict[str, Any]: - """从数据库获取最新的Interview结果""" + """InterviewResultado""" db_path = os.path.join(self.simulation_dir, "twitter_simulation.db") result = { @@ -314,7 +305,7 @@ def _get_interview_result(self, agent_id: int) -> Dict[str, Any]: conn = sqlite3.connect(db_path) cursor = conn.cursor() - # 查询最新的Interview记录 + # Interview cursor.execute(""" SELECT user_id, info, created_at FROM trace @@ -336,16 +327,15 @@ def _get_interview_result(self, agent_id: int) -> Dict[str, Any]: conn.close() except Exception as e: - print(f" 读取Interview结果失败: {e}") + print(f" InterviewResultadoFalhou: {e}") return result async def process_commands(self) -> bool: """ - 处理所有待处理命令 Returns: - True 表示继续运行,False 表示应该退出 + True False """ command = self.poll_command() if not command: @@ -355,7 +345,7 @@ async def process_commands(self) -> bool: command_type = command.get("command_type") args = command.get("args", {}) - print(f"\n收到IPC命令: {command_type}, id={command_id}") + print(f"\nIPC: {command_type}, id={command_id}") if command_type == CommandType.INTERVIEW: await self.handle_interview( @@ -373,19 +363,19 @@ async def process_commands(self) -> bool: return True elif command_type == CommandType.CLOSE_ENV: - print("收到关闭环境命令") - self.send_response(command_id, "completed", result={"message": "环境即将关闭"}) + print("") + self.send_response(command_id, "completed", result={"message": ""}) return False else: - self.send_response(command_id, "failed", error=f"未知命令类型: {command_type}") + self.send_response(command_id, "failed", error=f": {command_type}") return True class TwitterSimulationRunner: - """Twitter模拟运行器""" + """TwitterSimulação""" - # Twitter可用动作(不包含INTERVIEW,INTERVIEW只能通过ManualAction手动触发) + # TwitterINTERVIEWINTERVIEWManualAction AVAILABLE_ACTIONS = [ ActionType.CREATE_POST, ActionType.LIKE_POST, @@ -397,11 +387,11 @@ class TwitterSimulationRunner: def __init__(self, config_path: str, wait_for_commands: bool = True): """ - 初始化模拟运行器 + Simulação Args: - config_path: 配置文件路径 (simulation_config.json) - wait_for_commands: 模拟完成后是否等待命令(默认True) + config_path: Configuração (simulation_config.json) + wait_for_commands: SimulaçãoTrue """ self.config_path = config_path self.config = self._load_config() @@ -412,47 +402,47 @@ def __init__(self, config_path: str, wait_for_commands: bool = True): self.ipc_handler = None def _load_config(self) -> Dict[str, Any]: - """加载配置文件""" + """Configuração""" with open(self.config_path, 'r', encoding='utf-8') as f: return json.load(f) def _get_profile_path(self) -> str: - """获取Profile文件路径(OASIS Twitter使用CSV格式)""" + """ProfileOASIS TwitterCSV""" return os.path.join(self.simulation_dir, "twitter_profiles.csv") def _get_db_path(self) -> str: - """获取数据库路径""" + """""" return os.path.join(self.simulation_dir, "twitter_simulation.db") def _create_model(self): """ - 创建LLM模型 + LLM - 统一使用项目根目录 .env 文件中的配置(优先级最高): - - LLM_API_KEY: API密钥 - - LLM_BASE_URL: API基础URL - - LLM_MODEL_NAME: 模型名称 + .env Configuração + - LLM_API_KEY: API + - LLM_BASE_URL: APIURL + - LLM_MODEL_NAME: """ - # 优先从 .env 读取配置 + # .env Configuração llm_api_key = os.environ.get("LLM_API_KEY", "") llm_base_url = os.environ.get("LLM_BASE_URL", "") llm_model = os.environ.get("LLM_MODEL_NAME", "") - # 如果 .env 中没有,则使用 config 作为备用 + # .env config if not llm_model: llm_model = self.config.get("llm_model", "gpt-4o-mini") - # 设置 camel-ai 所需的环境变量 + # camel-ai if llm_api_key: os.environ["OPENAI_API_KEY"] = llm_api_key if not os.environ.get("OPENAI_API_KEY"): - raise ValueError("缺少 API Key 配置,请在项目根目录 .env 文件中设置 LLM_API_KEY") + raise ValueError(" API Key Configuração .env LLM_API_KEY") if llm_base_url: os.environ["OPENAI_API_BASE_URL"] = llm_base_url - print(f"LLM配置: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else '默认'}...") + print(f"LLMConfiguração: model={llm_model}, base_url={llm_base_url[:40] if llm_base_url else ''}...") return ModelFactory.create( model_platform=ModelPlatformType.OPENAI, @@ -466,24 +456,22 @@ def _get_active_agents_for_round( round_num: int ) -> List: """ - 根据时间和配置决定本轮激活哪些Agent + ConfiguraçãoAgent Args: - env: OASIS环境 - current_hour: 当前模拟小时(0-23) - round_num: 当前轮数 + env: OASIS + current_hour: Simulação0-23 + round_num: Returns: - 激活的Agent列表 + Agent """ time_config = self.config.get("time_config", {}) agent_configs = self.config.get("agent_configs", []) - # 基础激活数量 base_min = time_config.get("agents_per_hour_min", 5) base_max = time_config.get("agents_per_hour_max", 20) - # 根据时段调整 peak_hours = time_config.get("peak_hours", [9, 10, 11, 14, 15, 20, 21, 22]) off_peak_hours = time_config.get("off_peak_hours", [0, 1, 2, 3, 4, 5]) @@ -496,28 +484,25 @@ def _get_active_agents_for_round( target_count = int(random.uniform(base_min, base_max) * multiplier) - # 根据每个Agent的配置计算激活概率 + # AgentConfiguração candidates = [] for cfg in agent_configs: agent_id = cfg.get("agent_id", 0) active_hours = cfg.get("active_hours", list(range(8, 23))) activity_level = cfg.get("activity_level", 0.5) - # 检查是否在活跃时间 if current_hour not in active_hours: continue - # 根据活跃度计算概率 if random.random() < activity_level: candidates.append(agent_id) - # 随机选择 selected_ids = random.sample( candidates, min(target_count, len(candidates)) ) if candidates else [] - # 转换为Agent对象 + # Agent active_agents = [] for agent_id in selected_ids: try: @@ -529,50 +514,47 @@ def _get_active_agents_for_round( return active_agents async def run(self, max_rounds: int = None): - """运行Twitter模拟 + """TwitterSimulação Args: - max_rounds: 最大模拟轮数(可选,用于截断过长的模拟) + max_rounds: SimulaçãoSimulação """ print("=" * 60) - print("OASIS Twitter模拟") - print(f"配置文件: {self.config_path}") - print(f"模拟ID: {self.config.get('simulation_id', 'unknown')}") - print(f"等待命令模式: {'启用' if self.wait_for_commands else '禁用'}") + print("OASIS TwitterSimulação") + print(f"Configuração: {self.config_path}") + print(f"ID da simulação: {self.config.get('simulation_id', 'unknown')}") + print(f": {'' if self.wait_for_commands else ''}") print("=" * 60) - # 加载时间配置 + # Configuração time_config = self.config.get("time_config", {}) total_hours = time_config.get("total_simulation_hours", 72) minutes_per_round = time_config.get("minutes_per_round", 30) - # 计算总轮数 total_rounds = (total_hours * 60) // minutes_per_round - # 如果指定了最大轮数,则截断 if max_rounds is not None and max_rounds > 0: original_rounds = total_rounds total_rounds = min(total_rounds, max_rounds) if total_rounds < original_rounds: - print(f"\n轮数已截断: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") + print(f"\n: {original_rounds} -> {total_rounds} (max_rounds={max_rounds})") - print(f"\n模拟参数:") - print(f" - 总模拟时长: {total_hours}小时") - print(f" - 每轮时间: {minutes_per_round}分钟") - print(f" - 总轮数: {total_rounds}") + print(f"\nSimulação:") + print(f" - Simulação: {total_hours}") + print(f" - : {minutes_per_round}") + print(f" - : {total_rounds}") if max_rounds: - print(f" - 最大轮数限制: {max_rounds}") - print(f" - Agent数量: {len(self.config.get('agent_configs', []))}") + print(f" - : {max_rounds}") + print(f" - Agent: {len(self.config.get('agent_configs', []))}") - # 创建模型 - print("\n初始化LLM模型...") + print("\nLLM...") model = self._create_model() - # 加载Agent图 - print("加载Agent Profile...") + # Agent + print("Agent Profile...") profile_path = self._get_profile_path() if not os.path.exists(profile_path): - print(f"错误: Profile文件不存在: {profile_path}") + print(f": Profile: {profile_path}") return self.agent_graph = await generate_twitter_agent_graph( @@ -581,34 +563,30 @@ async def run(self, max_rounds: int = None): available_actions=self.AVAILABLE_ACTIONS, ) - # 数据库路径 db_path = self._get_db_path() if os.path.exists(db_path): os.remove(db_path) - print(f"已删除旧数据库: {db_path}") + print(f": {db_path}") - # 创建环境 - print("创建OASIS环境...") + print("OASIS...") self.env = oasis.make( agent_graph=self.agent_graph, platform=oasis.DefaultPlatformType.TWITTER, database_path=db_path, - semaphore=30, # 限制最大并发 LLM 请求数,防止 API 过载 + semaphore=30, # LLM API ) await self.env.reset() - print("环境初始化完成\n") + print("\n") - # 初始化IPC处理器 self.ipc_handler = IPCHandler(self.simulation_dir, self.env, self.agent_graph) self.ipc_handler.update_status("running") - # 执行初始事件 event_config = self.config.get("event_config", {}) initial_posts = event_config.get("initial_posts", []) if initial_posts: - print(f"执行初始事件 ({len(initial_posts)}条初始帖子)...") + print(f" ({len(initial_posts)})...") initial_actions = {} for post in initial_posts: agent_id = post.get("poster_agent_id", 0) @@ -620,23 +598,23 @@ async def run(self, max_rounds: int = None): action_args={"content": content} ) except Exception as e: - print(f" 警告: 无法为Agent {agent_id}创建初始帖子: {e}") + print(f" : Agent {agent_id}: {e}") if initial_actions: await self.env.step(initial_actions) - print(f" 已发布 {len(initial_actions)} 条初始帖子") + print(f" {len(initial_actions)} ") - # 主模拟循环 - print("\n开始模拟循环...") + # Simulação + print("\nSimulação...") start_time = datetime.now() for round_num in range(total_rounds): - # 计算当前模拟时间 + # Simulação simulated_minutes = round_num * minutes_per_round simulated_hour = (simulated_minutes // 60) % 24 simulated_day = simulated_minutes // (60 * 24) + 1 - # 获取本轮激活的Agent + # Agent active_agents = self._get_active_agents_for_round( self.env, simulated_hour, round_num ) @@ -644,16 +622,13 @@ async def run(self, max_rounds: int = None): if not active_agents: continue - # 构建动作 actions = { agent: LLMAction() for _, agent in active_agents } - # 执行动作 await self.env.step(actions) - # 打印进度 if (round_num + 1) % 10 == 0 or round_num == 0: elapsed = (datetime.now() - start_time).total_seconds() progress = (round_num + 1) / total_rounds * 100 @@ -663,20 +638,19 @@ async def run(self, max_rounds: int = None): f"- elapsed: {elapsed:.1f}s") total_elapsed = (datetime.now() - start_time).total_seconds() - print(f"\n模拟循环完成!") - print(f" - 总耗时: {total_elapsed:.1f}秒") - print(f" - 数据库: {db_path}") + print(f"\nSimulação!") + print(f" - : {total_elapsed:.1f}") + print(f" - : {db_path}") - # 是否进入等待命令模式 if self.wait_for_commands: print("\n" + "=" * 60) - print("进入等待命令模式 - 环境保持运行") - print("支持的命令: interview, batch_interview, close_env") + print(" - ") + print(": interview, batch_interview, close_env") print("=" * 60) self.ipc_handler.update_status("alive") - # 等待命令循环(使用全局 _shutdown_event) + # _shutdown_event try: while not _shutdown_event.is_set(): should_continue = await self.ipc_handler.process_commands() @@ -684,58 +658,57 @@ async def run(self, max_rounds: int = None): break try: await asyncio.wait_for(_shutdown_event.wait(), timeout=0.5) - break # 收到退出信号 + break # except asyncio.TimeoutError: pass except KeyboardInterrupt: - print("\n收到中断信号") + print("\n") except asyncio.CancelledError: - print("\n任务被取消") + print("\n") except Exception as e: - print(f"\n命令处理出错: {e}") + print(f"\n: {e}") - print("\n关闭环境...") + print("\n...") - # 关闭环境 self.ipc_handler.update_status("stopped") await self.env.close() - print("环境已关闭") + print("") print("=" * 60) async def main(): - parser = argparse.ArgumentParser(description='OASIS Twitter模拟') + parser = argparse.ArgumentParser(description='OASIS TwitterSimulação') parser.add_argument( '--config', type=str, required=True, - help='配置文件路径 (simulation_config.json)' + help='Configuração (simulation_config.json)' ) parser.add_argument( '--max-rounds', type=int, default=None, - help='最大模拟轮数(可选,用于截断过长的模拟)' + help='SimulaçãoSimulação' ) parser.add_argument( '--no-wait', action='store_true', default=False, - help='模拟完成后立即关闭环境,不进入等待命令模式' + help='Simulação' ) args = parser.parse_args() - # 在 main 函数开始时创建 shutdown 事件 + # main shutdown global _shutdown_event _shutdown_event = asyncio.Event() if not os.path.exists(args.config): - print(f"错误: 配置文件不存在: {args.config}") + print(f": Configuração: {args.config}") sys.exit(1) - # 初始化日志配置(使用固定文件名,清理旧日志) + # Configuração simulation_dir = os.path.dirname(args.config) or "." setup_oasis_logging(os.path.join(simulation_dir, "log")) @@ -748,20 +721,18 @@ async def main(): def setup_signal_handlers(): """ - 设置信号处理器,确保收到 SIGTERM/SIGINT 时能够正确退出 - 让程序有机会正常清理资源(关闭数据库、环境等) + SIGTERM/SIGINT """ def signal_handler(signum, frame): global _cleanup_done sig_name = "SIGTERM" if signum == signal.SIGTERM else "SIGINT" - print(f"\n收到 {sig_name} 信号,正在退出...") + print(f"\n {sig_name} ...") if not _cleanup_done: _cleanup_done = True if _shutdown_event: _shutdown_event.set() else: - # 重复收到信号才强制退出 - print("强制退出...") + print("...") sys.exit(1) signal.signal(signal.SIGTERM, signal_handler) @@ -773,8 +744,8 @@ def signal_handler(signum, frame): try: asyncio.run(main()) except KeyboardInterrupt: - print("\n程序被中断") + print("\n") except SystemExit: pass finally: - print("模拟进程已退出") + print("Simulação") diff --git a/backend/scripts/test_profile_format.py b/backend/scripts/test_profile_format.py index 354e8b5ca1..4565fbcd08 100644 --- a/backend/scripts/test_profile_format.py +++ b/backend/scripts/test_profile_format.py @@ -1,8 +1,7 @@ """ -测试Profile格式生成是否符合OASIS要求 -验证: -1. Twitter Profile生成CSV格式 -2. Reddit Profile生成JSON详细格式 +ProfileGerarOASIS +1. Twitter ProfileGerarCSV +2. Reddit ProfileGerarJSON """ import os @@ -11,19 +10,18 @@ import csv import tempfile -# 添加项目路径 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from app.services.oasis_profile_generator import OasisProfileGenerator, OasisAgentProfile def test_profile_formats(): - """测试Profile格式""" + """Profile""" print("=" * 60) - print("OASIS Profile格式测试") + print("OASIS Profile") print("=" * 60) - # 创建测试Profile数据 + # Profile test_profiles = [ OasisAgentProfile( user_id=0, @@ -63,84 +61,80 @@ def test_profile_formats(): generator = OasisProfileGenerator.__new__(OasisProfileGenerator) - # 使用临时目录 with tempfile.TemporaryDirectory() as temp_dir: twitter_path = os.path.join(temp_dir, "twitter_profiles.csv") reddit_path = os.path.join(temp_dir, "reddit_profiles.json") - # 测试Twitter CSV格式 - print("\n1. 测试Twitter Profile (CSV格式)") + # Twitter CSV + print("\n1. Twitter Profile (CSV)") print("-" * 40) generator._save_twitter_csv(test_profiles, twitter_path) - # 读取并验证CSV with open(twitter_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) rows = list(reader) - print(f" 文件: {twitter_path}") - print(f" 行数: {len(rows)}") - print(f" 表头: {list(rows[0].keys())}") - print(f"\n 示例数据 (第1行):") + print(f" : {twitter_path}") + print(f" : {len(rows)}") + print(f" : {list(rows[0].keys())}") + print(f"\n (1):") for key, value in rows[0].items(): print(f" {key}: {value}") - # 验证必需字段 required_twitter_fields = ['user_id', 'user_name', 'name', 'bio', 'friend_count', 'follower_count', 'statuses_count', 'created_at'] missing = set(required_twitter_fields) - set(rows[0].keys()) if missing: - print(f"\n [错误] 缺少字段: {missing}") + print(f"\n [] : {missing}") else: - print(f"\n [通过] 所有必需字段都存在") + print(f"\n [] ") - # 测试Reddit JSON格式 - print("\n2. 测试Reddit Profile (JSON详细格式)") + # Reddit JSON + print("\n2. Reddit Profile (JSON)") print("-" * 40) generator._save_reddit_json(test_profiles, reddit_path) - # 读取并验证JSON + # JSON with open(reddit_path, 'r', encoding='utf-8') as f: reddit_data = json.load(f) - print(f" 文件: {reddit_path}") - print(f" 条目数: {len(reddit_data)}") - print(f" 字段: {list(reddit_data[0].keys())}") - print(f"\n 示例数据 (第1条):") + print(f" : {reddit_path}") + print(f" : {len(reddit_data)}") + print(f" : {list(reddit_data[0].keys())}") + print(f"\n (1):") print(json.dumps(reddit_data[0], ensure_ascii=False, indent=4)) - # 验证详细格式字段 required_reddit_fields = ['realname', 'username', 'bio', 'persona'] optional_reddit_fields = ['age', 'gender', 'mbti', 'country', 'profession', 'interested_topics'] missing = set(required_reddit_fields) - set(reddit_data[0].keys()) if missing: - print(f"\n [错误] 缺少必需字段: {missing}") + print(f"\n [] : {missing}") else: - print(f"\n [通过] 所有必需字段都存在") + print(f"\n [] ") present_optional = set(optional_reddit_fields) & set(reddit_data[0].keys()) - print(f" [信息] 可选字段: {present_optional}") + print(f" [] : {present_optional}") print("\n" + "=" * 60) - print("测试完成!") + print("!") print("=" * 60) def show_expected_formats(): - """显示OASIS期望的格式""" + """OASIS""" print("\n" + "=" * 60) - print("OASIS 期望的Profile格式参考") + print("OASIS Profile") print("=" * 60) - print("\n1. Twitter Profile (CSV格式)") + print("\n1. Twitter Profile (CSV)") print("-" * 40) twitter_example = """user_id,user_name,name,bio,friend_count,follower_count,statuses_count,created_at 0,user0,User Zero,I am user zero with interests in technology.,100,150,500,2023-01-01 1,user1,User One,Tech enthusiast and coffee lover.,200,250,1000,2023-01-02""" print(twitter_example) - print("\n2. Reddit Profile (JSON详细格式)") + print("\n2. Reddit Profile (JSON)") print("-" * 40) reddit_example = [ { diff --git a/docker-compose.yml b/docker-compose.yml index 637f1dfaee..93c69aa4a8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,14 +1,24 @@ services: mirofish: - image: ghcr.io/666ghj/mirofish:latest - # 加速镜像(如拉取缓慢可替换上方地址) - # image: ghcr.nju.edu.cn/666ghj/mirofish:latest + build: + context: . + dockerfile: Dockerfile container_name: mirofish env_file: - .env + environment: + - LLM_MODEL_NAME=gpt-5.4-mini + - CORS_ORIGINS=* ports: - - "3000:3000" - - "5001:5001" + - "127.0.0.1:3000:3000" + - "127.0.0.1:5001:5001" restart: unless-stopped + command: > + sh -c "cd /app/backend && uv pip install fpdf2 flask-jwt-extended && + cd /app && npm run dev" volumes: - - ./backend/uploads:/app/backend/uploads \ No newline at end of file + - ./backend/uploads:/app/backend/uploads + - ./frontend/src:/app/frontend/src + - ./frontend/index.html:/app/frontend/index.html + - ./backend/app:/app/backend/app:rw + - ./data:/app/data diff --git a/docker-compose.yml.bak b/docker-compose.yml.bak new file mode 100644 index 0000000000..637f1dfaee --- /dev/null +++ b/docker-compose.yml.bak @@ -0,0 +1,14 @@ +services: + mirofish: + image: ghcr.io/666ghj/mirofish:latest + # 加速镜像(如拉取缓慢可替换上方地址) + # image: ghcr.nju.edu.cn/666ghj/mirofish:latest + container_name: mirofish + env_file: + - .env + ports: + - "3000:3000" + - "5001:5001" + restart: unless-stopped + volumes: + - ./backend/uploads:/app/backend/uploads \ No newline at end of file diff --git a/docker-compose.yml.bak2 b/docker-compose.yml.bak2 new file mode 100644 index 0000000000..f934b916b9 --- /dev/null +++ b/docker-compose.yml.bak2 @@ -0,0 +1,12 @@ +services: + mirofish: + image: ghcr.io/666ghj/mirofish:latest + container_name: mirofish + env_file: + - .env + ports: + - "127.0.0.1:3000:3000" + - "127.0.0.1:5001:5001" + restart: unless-stopped + volumes: + - ./backend/uploads:/app/backend/uploads diff --git a/frontend/index.html b/frontend/index.html index 0b80095c6d..78ca1a9167 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -1,15 +1,11 @@ - + - - - - - - MiroFish - 预测万物 + + AUGUR by itcast
diff --git a/frontend/src/App.vue b/frontend/src/App.vue index b7cd71ca62..608643ea70 100644 --- a/frontend/src/App.vue +++ b/frontend/src/App.vue @@ -1,47 +1,8 @@ - - - + diff --git a/frontend/src/api/graph.js b/frontend/src/api/graph.js index ef90a2b6f5..de158ea424 100644 --- a/frontend/src/api/graph.js +++ b/frontend/src/api/graph.js @@ -1,8 +1,8 @@ import service, { requestWithRetry } from './index' /** - * 生成本体(上传文档和模拟需求) - * @param {Object} data - 包含files, simulation_requirement, project_name等 + * + * @param {Object} data - files, simulation_requirement, project_name * @returns {Promise} */ export function generateOntology(formData) { @@ -19,8 +19,8 @@ export function generateOntology(formData) { } /** - * 构建图谱 - * @param {Object} data - 包含project_id, graph_name等 + * + * @param {Object} data - project_id, graph_name * @returns {Promise} */ export function buildGraph(data) { @@ -34,8 +34,8 @@ export function buildGraph(data) { } /** - * 查询任务状态 - * @param {String} taskId - 任务ID + * + * @param {String} taskId - ID * @returns {Promise} */ export function getTaskStatus(taskId) { @@ -46,8 +46,8 @@ export function getTaskStatus(taskId) { } /** - * 获取图谱数据 - * @param {String} graphId - 图谱ID + * + * @param {String} graphId - ID * @returns {Promise} */ export function getGraphData(graphId) { @@ -58,8 +58,8 @@ export function getGraphData(graphId) { } /** - * 获取项目信息 - * @param {String} projectId - 项目ID + * + * @param {String} projectId - ID * @returns {Promise} */ export function getProject(projectId) { diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js index e840e1166a..d0f48b88da 100644 --- a/frontend/src/api/index.js +++ b/frontend/src/api/index.js @@ -1,19 +1,20 @@ import axios from 'axios' import i18n from '../i18n' -// 创建axios实例 +// axios const service = axios.create({ baseURL: import.meta.env.VITE_API_BASE_URL || 'http://localhost:5001', - timeout: 300000, // 5分钟超时(本体生成可能需要较长时间) + timeout: 300000, // 5 headers: { 'Content-Type': 'application/json' } }) -// 请求拦截器 +// service.interceptors.request.use( config => { - config.headers['Accept-Language'] = i18n.global.locale.value + const locale = i18n.global.locale.value || 'pt-BR' + config.headers['Accept-Language'] = locale.split('-')[0] return config }, error => { @@ -22,12 +23,12 @@ service.interceptors.request.use( } ) -// 响应拦截器(容错重试机制) +// service.interceptors.response.use( response => { const res = response.data - // 如果返回的状态码不是success,则抛出错误 + // success if (!res.success && res.success !== undefined) { console.error('API Error:', res.error || res.message || 'Unknown error') return Promise.reject(new Error(res.error || res.message || 'Error')) @@ -38,12 +39,12 @@ service.interceptors.response.use( error => { console.error('Response error:', error) - // 处理超时 + // if (error.code === 'ECONNABORTED' && error.message.includes('timeout')) { console.error('Request timeout') } - // 处理网络错误 + // if (error.message === 'Network Error') { console.error('Network error - please check your connection') } @@ -52,7 +53,7 @@ service.interceptors.response.use( } ) -// 带重试的请求函数 +// export const requestWithRetry = async (requestFn, maxRetries = 3, delay = 1000) => { for (let i = 0; i < maxRetries; i++) { try { diff --git a/frontend/src/api/report.js b/frontend/src/api/report.js index c89a67d8c4..3ea7c47f92 100644 --- a/frontend/src/api/report.js +++ b/frontend/src/api/report.js @@ -1,7 +1,7 @@ import service, { requestWithRetry } from './index' /** - * 开始报告生成 + * * @param {Object} data - { simulation_id, force_regenerate? } */ export const generateReport = (data) => { @@ -9,7 +9,7 @@ export const generateReport = (data) => { } /** - * 获取报告生成状态 + * * @param {string} reportId */ export const getReportStatus = (reportId) => { @@ -17,25 +17,25 @@ export const getReportStatus = (reportId) => { } /** - * 获取 Agent 日志(增量) + * Agent * @param {string} reportId - * @param {number} fromLine - 从第几行开始获取 + * @param {number} fromLine - */ export const getAgentLog = (reportId, fromLine = 0) => { return service.get(`/api/report/${reportId}/agent-log`, { params: { from_line: fromLine } }) } /** - * 获取控制台日志(增量) + * * @param {string} reportId - * @param {number} fromLine - 从第几行开始获取 + * @param {number} fromLine - */ export const getConsoleLog = (reportId, fromLine = 0) => { return service.get(`/api/report/${reportId}/console-log`, { params: { from_line: fromLine } }) } /** - * 获取报告详情 + * * @param {string} reportId */ export const getReport = (reportId) => { @@ -43,7 +43,7 @@ export const getReport = (reportId) => { } /** - * 与 Report Agent 对话 + * Report Agent * @param {Object} data - { simulation_id, message, chat_history? } */ export const chatWithReport = (data) => { diff --git a/frontend/src/api/simulation.js b/frontend/src/api/simulation.js index f878586f03..17d0474375 100644 --- a/frontend/src/api/simulation.js +++ b/frontend/src/api/simulation.js @@ -1,7 +1,7 @@ import service, { requestWithRetry } from './index' /** - * 创建模拟 + * * @param {Object} data - { project_id, graph_id?, enable_twitter?, enable_reddit? } */ export const createSimulation = (data) => { @@ -9,7 +9,7 @@ export const createSimulation = (data) => { } /** - * 准备模拟环境(异步任务) + * * @param {Object} data - { simulation_id, entity_types?, use_llm_for_profiles?, parallel_profile_count?, force_regenerate? } */ export const prepareSimulation = (data) => { @@ -17,7 +17,7 @@ export const prepareSimulation = (data) => { } /** - * 查询准备任务进度 + * * @param {Object} data - { task_id?, simulation_id? } */ export const getPrepareStatus = (data) => { @@ -25,7 +25,7 @@ export const getPrepareStatus = (data) => { } /** - * 获取模拟状态 + * * @param {string} simulationId */ export const getSimulation = (simulationId) => { @@ -33,7 +33,7 @@ export const getSimulation = (simulationId) => { } /** - * 获取模拟的 Agent Profiles + * Agent Profiles * @param {string} simulationId * @param {string} platform - 'reddit' | 'twitter' */ @@ -42,7 +42,7 @@ export const getSimulationProfiles = (simulationId, platform = 'reddit') => { } /** - * 实时获取生成中的 Agent Profiles + * Agent Profiles * @param {string} simulationId * @param {string} platform - 'reddit' | 'twitter' */ @@ -51,7 +51,7 @@ export const getSimulationProfilesRealtime = (simulationId, platform = 'reddit') } /** - * 获取模拟配置 + * * @param {string} simulationId */ export const getSimulationConfig = (simulationId) => { @@ -59,17 +59,17 @@ export const getSimulationConfig = (simulationId) => { } /** - * 实时获取生成中的模拟配置 + * * @param {string} simulationId - * @returns {Promise} 返回配置信息,包含元数据和配置内容 + * @returns {Promise} */ export const getSimulationConfigRealtime = (simulationId) => { return service.get(`/api/simulation/${simulationId}/config/realtime`) } /** - * 列出所有模拟 - * @param {string} projectId - 可选,按项目ID过滤 + * + * @param {string} projectId - ID */ export const listSimulations = (projectId) => { const params = projectId ? { project_id: projectId } : {} @@ -77,7 +77,7 @@ export const listSimulations = (projectId) => { } /** - * 启动模拟 + * * @param {Object} data - { simulation_id, platform?, max_rounds?, enable_graph_memory_update? } */ export const startSimulation = (data) => { @@ -85,7 +85,7 @@ export const startSimulation = (data) => { } /** - * 停止模拟 + * * @param {Object} data - { simulation_id } */ export const stopSimulation = (data) => { @@ -93,7 +93,7 @@ export const stopSimulation = (data) => { } /** - * 获取模拟运行实时状态 + * * @param {string} simulationId */ export const getRunStatus = (simulationId) => { @@ -101,7 +101,7 @@ export const getRunStatus = (simulationId) => { } /** - * 获取模拟运行详细状态(包含最近动作) + * * @param {string} simulationId */ export const getRunStatusDetail = (simulationId) => { @@ -109,11 +109,11 @@ export const getRunStatusDetail = (simulationId) => { } /** - * 获取模拟中的帖子 + * * @param {string} simulationId * @param {string} platform - 'reddit' | 'twitter' - * @param {number} limit - 返回数量 - * @param {number} offset - 偏移量 + * @param {number} limit - + * @param {number} offset - */ export const getSimulationPosts = (simulationId, platform = 'reddit', limit = 50, offset = 0) => { return service.get(`/api/simulation/${simulationId}/posts`, { @@ -122,10 +122,10 @@ export const getSimulationPosts = (simulationId, platform = 'reddit', limit = 50 } /** - * 获取模拟时间线(按轮次汇总) + * * @param {string} simulationId - * @param {number} startRound - 起始轮次 - * @param {number} endRound - 结束轮次 + * @param {number} startRound - + * @param {number} endRound - */ export const getSimulationTimeline = (simulationId, startRound = 0, endRound = null) => { const params = { start_round: startRound } @@ -136,7 +136,7 @@ export const getSimulationTimeline = (simulationId, startRound = 0, endRound = n } /** - * 获取Agent统计信息 + * Agent * @param {string} simulationId */ export const getAgentStats = (simulationId) => { @@ -144,7 +144,7 @@ export const getAgentStats = (simulationId) => { } /** - * 获取模拟动作历史 + * * @param {string} simulationId * @param {Object} params - { limit, offset, platform, agent_id, round_num } */ @@ -153,7 +153,7 @@ export const getSimulationActions = (simulationId, params = {}) => { } /** - * 关闭模拟环境(优雅退出) + * * @param {Object} data - { simulation_id, timeout? } */ export const closeSimulationEnv = (data) => { @@ -161,7 +161,7 @@ export const closeSimulationEnv = (data) => { } /** - * 获取模拟环境状态 + * * @param {Object} data - { simulation_id } */ export const getEnvStatus = (data) => { @@ -169,7 +169,7 @@ export const getEnvStatus = (data) => { } /** - * 批量采访 Agent + * Agent * @param {Object} data - { simulation_id, interviews: [{ agent_id, prompt }] } */ export const interviewAgents = (data) => { @@ -177,11 +177,10 @@ export const interviewAgents = (data) => { } /** - * 获取历史模拟列表(带项目详情) - * 用于首页历史项目展示 - * @param {number} limit - 返回数量限制 + * + * + * @param {number} limit - */ export const getSimulationHistory = (limit = 20) => { return service.get('/api/simulation/history', { params: { limit } }) } - diff --git a/frontend/src/assets/design-system.css b/frontend/src/assets/design-system.css new file mode 100644 index 0000000000..45fd8f3909 --- /dev/null +++ b/frontend/src/assets/design-system.css @@ -0,0 +1,73 @@ +:root { + /* ═══ AUGUR LIGHT THEME — Premium Clean ═══ */ + --bg-base: #f8f9fc; + --bg-surface: #ffffff; + --bg-raised: #f0f1f6; + --bg-overlay: #e8e9f0; + + --border: rgba(0, 0, 0, 0.08); + --border-md: rgba(0, 0, 0, 0.12); + --border-hi: rgba(0, 0, 0, 0.18); + + --accent: #00b894; + --accent-dim: rgba(0, 184, 148, 0.10); + --accent2: #6c5ce7; + --accent2-dim: rgba(108, 92, 231, 0.08); + + --danger: #e74c3c; + --warn: #f39c12; + --success: #00b894; + + --text-primary: #1a1a2e; + --text-secondary: #4a4a6a; + --text-muted: #8888a0; + + --font-sans: 'Helvetica Neue', -apple-system, Arial, sans-serif; + --font-mono: 'SF Mono', 'Courier New', monospace; + --font-head: 'Georgia', serif; + + --r-sm: 8px; + --r-md: 12px; + --r-lg: 16px; + + --t-fast: 0.15s ease; + --t-mid: 0.25s ease; + + /* ═══ Shadows (light theme gets shadows instead of borders) ═══ */ + --shadow-sm: 0 1px 3px rgba(0,0,0,0.06), 0 1px 2px rgba(0,0,0,0.04); + --shadow-md: 0 4px 12px rgba(0,0,0,0.08), 0 2px 4px rgba(0,0,0,0.04); + --shadow-lg: 0 12px 36px rgba(0,0,0,0.10), 0 4px 8px rgba(0,0,0,0.06); +} + +* { + box-sizing: border-box; +} + +html, +body, +#app { + width: 100%; + height: 100%; + margin: 0; + padding: 0; +} + +body { + background: var(--bg-base); + color: var(--text-primary); + font-family: var(--font-sans); + -webkit-font-smoothing: antialiased; +} + +button, +input, +textarea, +select { + font: inherit; +} + +/* ═══ Scrollbar light ═══ */ +::-webkit-scrollbar { width: 6px; } +::-webkit-scrollbar-track { background: var(--bg-base); } +::-webkit-scrollbar-thumb { background: var(--border-md); border-radius: 3px; } +::-webkit-scrollbar-thumb:hover { background: var(--border-hi); } diff --git a/frontend/src/assets/logo/augur-logo.svg b/frontend/src/assets/logo/augur-logo.svg new file mode 100644 index 0000000000..069932b290 --- /dev/null +++ b/frontend/src/assets/logo/augur-logo.svg @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/frontend/src/components/GraphPanel.vue b/frontend/src/components/GraphPanel.vue index db18829821..14316a2471 100644 --- a/frontend/src/components/GraphPanel.vue +++ b/frontend/src/components/GraphPanel.vue @@ -1,1423 +1,3 @@ - - - - - + + + \ No newline at end of file diff --git a/frontend/src/components/HistoryDatabase.vue b/frontend/src/components/HistoryDatabase.vue index d6c6e9a576..b4a3cde6eb 100644 --- a/frontend/src/components/HistoryDatabase.vue +++ b/frontend/src/components/HistoryDatabase.vue @@ -1,1342 +1,3 @@ - - - - - + + + \ No newline at end of file diff --git a/frontend/src/components/LanguageSwitcher.vue b/frontend/src/components/LanguageSwitcher.vue deleted file mode 100644 index 723d64b743..0000000000 --- a/frontend/src/components/LanguageSwitcher.vue +++ /dev/null @@ -1,124 +0,0 @@ - - - - - diff --git a/frontend/src/components/Step1GraphBuild.vue b/frontend/src/components/Step1GraphBuild.vue index 687d1c7bb0..7b26315e9c 100644 --- a/frontend/src/components/Step1GraphBuild.vue +++ b/frontend/src/components/Step1GraphBuild.vue @@ -1,700 +1,3 @@ - - - - - + + + \ No newline at end of file diff --git a/frontend/src/components/Step2EnvSetup.vue b/frontend/src/components/Step2EnvSetup.vue index a27ba347c0..33f9ed5553 100644 --- a/frontend/src/components/Step2EnvSetup.vue +++ b/frontend/src/components/Step2EnvSetup.vue @@ -1,2605 +1,3 @@ - - - - - + + + \ No newline at end of file diff --git a/frontend/src/components/Step3Simulation.vue b/frontend/src/components/Step3Simulation.vue index 5b0f968c6e..0440ca7181 100644 --- a/frontend/src/components/Step3Simulation.vue +++ b/frontend/src/components/Step3Simulation.vue @@ -1,1267 +1,3 @@ - - - - - \ No newline at end of file + + + \ No newline at end of file diff --git a/frontend/src/components/Step4Report.vue b/frontend/src/components/Step4Report.vue index 8e53ceb53b..00fa05dde7 100644 --- a/frontend/src/components/Step4Report.vue +++ b/frontend/src/components/Step4Report.vue @@ -1,5162 +1,3 @@ - - - - - - - + + + \ No newline at end of file diff --git a/frontend/src/components/Step5Interaction.vue b/frontend/src/components/Step5Interaction.vue index 9eb791a129..1f34ab91ba 100644 --- a/frontend/src/components/Step5Interaction.vue +++ b/frontend/src/components/Step5Interaction.vue @@ -1,2584 +1,3 @@ - - - - - - - + + + \ No newline at end of file diff --git a/frontend/src/components/layout/AppShell.vue b/frontend/src/components/layout/AppShell.vue new file mode 100644 index 0000000000..250d3e7727 --- /dev/null +++ b/frontend/src/components/layout/AppShell.vue @@ -0,0 +1,112 @@ + + + + + diff --git a/frontend/src/components/layout/AppSidebar.vue b/frontend/src/components/layout/AppSidebar.vue new file mode 100644 index 0000000000..b8c3924157 --- /dev/null +++ b/frontend/src/components/layout/AppSidebar.vue @@ -0,0 +1,39 @@ + + + + + diff --git a/frontend/src/components/layout/AppTopbar.vue b/frontend/src/components/layout/AppTopbar.vue new file mode 100644 index 0000000000..18077f58d4 --- /dev/null +++ b/frontend/src/components/layout/AppTopbar.vue @@ -0,0 +1,20 @@ + + + + + diff --git a/frontend/src/components/simulation/AgentCard.vue b/frontend/src/components/simulation/AgentCard.vue new file mode 100644 index 0000000000..983da447a2 --- /dev/null +++ b/frontend/src/components/simulation/AgentCard.vue @@ -0,0 +1,21 @@ + + + diff --git a/frontend/src/components/simulation/AgentFeed.vue b/frontend/src/components/simulation/AgentFeed.vue new file mode 100644 index 0000000000..f77667eaee --- /dev/null +++ b/frontend/src/components/simulation/AgentFeed.vue @@ -0,0 +1,131 @@ + + + + + diff --git a/frontend/src/components/simulation/RoundTimeline.vue b/frontend/src/components/simulation/RoundTimeline.vue new file mode 100644 index 0000000000..ef83f0e3d3 --- /dev/null +++ b/frontend/src/components/simulation/RoundTimeline.vue @@ -0,0 +1,22 @@ + + + diff --git a/frontend/src/components/simulation/SimulationCard.vue b/frontend/src/components/simulation/SimulationCard.vue new file mode 100644 index 0000000000..08276192f1 --- /dev/null +++ b/frontend/src/components/simulation/SimulationCard.vue @@ -0,0 +1,24 @@ + + + diff --git a/frontend/src/components/ui/AugurBadge.vue b/frontend/src/components/ui/AugurBadge.vue new file mode 100644 index 0000000000..53f8cc98c9 --- /dev/null +++ b/frontend/src/components/ui/AugurBadge.vue @@ -0,0 +1,21 @@ + + + + + diff --git a/frontend/src/components/ui/AugurButton.vue b/frontend/src/components/ui/AugurButton.vue new file mode 100644 index 0000000000..15c979838a --- /dev/null +++ b/frontend/src/components/ui/AugurButton.vue @@ -0,0 +1,23 @@ + + + + + diff --git a/frontend/src/components/ui/AugurCard.vue b/frontend/src/components/ui/AugurCard.vue new file mode 100644 index 0000000000..0757d60076 --- /dev/null +++ b/frontend/src/components/ui/AugurCard.vue @@ -0,0 +1,2 @@ + + diff --git a/frontend/src/components/ui/AugurProgress.vue b/frontend/src/components/ui/AugurProgress.vue new file mode 100644 index 0000000000..5d25da2b8c --- /dev/null +++ b/frontend/src/components/ui/AugurProgress.vue @@ -0,0 +1,12 @@ + + + diff --git a/frontend/src/components/ui/MetricCard.vue b/frontend/src/components/ui/MetricCard.vue new file mode 100644 index 0000000000..900a40f809 --- /dev/null +++ b/frontend/src/components/ui/MetricCard.vue @@ -0,0 +1,18 @@ + + + diff --git a/frontend/src/components/ui/SentimentBar.vue b/frontend/src/components/ui/SentimentBar.vue new file mode 100644 index 0000000000..cf6e29541e --- /dev/null +++ b/frontend/src/components/ui/SentimentBar.vue @@ -0,0 +1,30 @@ + + + diff --git a/frontend/src/components/ui/SkeletonLoader.vue b/frontend/src/components/ui/SkeletonLoader.vue new file mode 100644 index 0000000000..2c4a832031 --- /dev/null +++ b/frontend/src/components/ui/SkeletonLoader.vue @@ -0,0 +1,25 @@ + + + diff --git a/frontend/src/components/ui/ToastContainer.vue b/frontend/src/components/ui/ToastContainer.vue new file mode 100644 index 0000000000..418fa27c64 --- /dev/null +++ b/frontend/src/components/ui/ToastContainer.vue @@ -0,0 +1,68 @@ + + + + + diff --git a/frontend/src/composables/usePolling.js b/frontend/src/composables/usePolling.js new file mode 100644 index 0000000000..6748e39f96 --- /dev/null +++ b/frontend/src/composables/usePolling.js @@ -0,0 +1,33 @@ +import { onUnmounted, ref } from 'vue' + +export function usePolling(fn, intervalMs = Number(import.meta.env.VITE_POLL_INTERVAL) || 5000) { + const isPolling = ref(false) + let timer = null + + const run = async () => { + try { + await fn() + } catch (error) { + console.error('Erro no polling:', error) + } + } + + function start() { + if (timer) return + isPolling.value = true + timer = setInterval(run, intervalMs) + run() + } + + function stop() { + isPolling.value = false + if (timer) { + clearInterval(timer) + timer = null + } + } + + onUnmounted(stop) + + return { isPolling, start, stop } +} diff --git a/frontend/src/composables/useSimulation.js b/frontend/src/composables/useSimulation.js new file mode 100644 index 0000000000..480357a867 --- /dev/null +++ b/frontend/src/composables/useSimulation.js @@ -0,0 +1,64 @@ +import { computed, ref } from 'vue' +import service from '../api' + +export function useSimulation(simulationId) { + const simulation = ref(null) + const runStatus = ref(null) + const isLoading = ref(false) + const error = ref(null) + + const id = computed(() => (typeof simulationId === 'object' ? simulationId.value : simulationId)) + + async function fetchSimulation() { + if (!id.value) return null + isLoading.value = true + error.value = null + try { + const response = await service.get(`/api/simulation/${id.value}`) + simulation.value = response.data || response + return simulation.value + } catch (err) { + error.value = err + throw err + } finally { + isLoading.value = false + } + } + + async function fetchRunStatus() { + if (!id.value) return null + try { + const response = await service.get(`/api/simulation/${id.value}/run-status`) + runStatus.value = response.data || response + return runStatus.value + } catch (err) { + error.value = err + throw err + } + } + + async function startSimulation(params) { + isLoading.value = true + error.value = null + try { + return await service.post('/api/simulation/start', params) + } catch (err) { + error.value = err + throw err + } finally { + isLoading.value = false + } + } + + async function stopSimulation() { + if (!id.value) return null + try { + return await service.post('/api/simulation/stop', { simulation_id: id.value }) + } catch (err) { + error.value = err + throw err + } + } + + return { simulation, runStatus, isLoading, error, fetchSimulation, fetchRunStatus, startSimulation, stopSimulation } +} diff --git a/frontend/src/composables/useToast.js b/frontend/src/composables/useToast.js new file mode 100644 index 0000000000..0b687d8bf1 --- /dev/null +++ b/frontend/src/composables/useToast.js @@ -0,0 +1,28 @@ +// composables/useToast.js +// Sistema global de notificações toast +import { ref } from 'vue' + +const toasts = ref([]) +let nextId = 0 + +export function useToast() { + function add(message, type = 'info', duration = 4000) { + const id = ++nextId + toasts.value.push({ id, message, type, visible: true }) + setTimeout(() => remove(id), duration) + } + + function remove(id) { + const idx = toasts.value.findIndex(t => t.id === id) + if (idx !== -1) toasts.value.splice(idx, 1) + } + + return { + toasts, + success: (msg, dur) => add(msg, 'success', dur), + error: (msg, dur) => add(msg, 'error', dur || 6000), + info: (msg, dur) => add(msg, 'info', dur), + warn: (msg, dur) => add(msg, 'warn', dur || 5000), + remove, + } +} diff --git a/frontend/src/i18n/index.js b/frontend/src/i18n/index.js index aa26553527..ab10896b3e 100644 --- a/frontend/src/i18n/index.js +++ b/frontend/src/i18n/index.js @@ -1,27 +1,177 @@ import { createI18n } from 'vue-i18n' -import languages from '../../../locales/languages.json' -const localeFiles = import.meta.glob('../../../locales/!(languages).json', { eager: true }) - -const messages = {} -const availableLocales = [] - -for (const path in localeFiles) { - const key = path.match(/\/([^/]+)\.json$/)[1] - if (languages[key]) { - messages[key] = localeFiles[path].default - availableLocales.push({ key, label: languages[key].label }) +const messages = { + 'pt-BR': { + nav: { + dashboard: 'Dashboard', + newSimulation: 'Nova Simulação', + running: 'Em Execução', + reports: 'Relatórios', + agents: 'Entrevistar Agentes', + settings: 'Configurações' + }, + dashboard: { + title: 'Dashboard', + recentSimulations: 'Simulações Recentes', + recentSub: 'Seus últimos projetos de previsão', + agentActivity: 'Atividade dos Agentes', + activitySub: 'Volume de interações por dia', + avgSentiment: 'Sentimento médio — última semana', + newSimulation: '+ Nova Simulação', + export: 'Exportar', + noSimulations: 'Nenhuma simulação ainda', + noSimSub: 'Crie sua primeira simulação para começar' + }, + metrics: { + simulations: 'Simulações', + totalAgents: 'Agentes Totais', + avgAccuracy: 'Precisão Média', + reports: 'Relatórios' + }, + status: { + completed: 'Concluído', + running: 'Em execução', + draft: 'Rascunho', + failed: 'Falhou', + preparing: 'Preparando', + ready: 'Pronto', + paused: 'Pausado' + }, + wizard: { + title: 'Nova Simulação', + step1: 'Documento', + step2: 'Grafo', + step3: 'Parâmetros', + step4: 'Agentes', + step5: 'Executar', + cancel: 'Cancelar', + back: 'Voltar', + next: 'Próximo', + save: 'Salvar rascunho', + start: 'Iniciar Simulação' + }, + upload: { + title: 'Carregue o documento semente', + sub: 'O material base para a simulação — notícia, relatório, comunicado, análise', + drop: 'Arraste e solte ou clique para selecionar', + formats: 'Formatos aceitos: PDF, MD, TXT', + uploaded: 'Arquivo carregado', + change: 'Trocar arquivo' + }, + params: { + title: 'Configurar Simulação', + sub: 'Defina os parâmetros da sua previsão de opinião pública', + projectName: 'Nome do projeto', + projectNamePlaceholder: 'ex: Lançamento produto X — Campanha Q2', + objective: 'Objetivo da simulação — descreva o que quer prever', + objectivePlaceholder: 'ex: Como o público vai reagir ao anúncio de reajuste de 15% nos preços?', + agents: 'Agentes', + rounds: 'Rodadas', + hours: 'Horas simuladas', + platforms: 'Plataformas de simulação', + twitterDesc: 'Posts, reposts, follows', + redditDesc: 'Posts, comentários, upvotes' + }, + run: { + title: 'Execução ao vivo', + round: 'Rodada', + of: 'de', + progress: 'concluído', + postsCreated: 'Posts criados', + activeAgents: 'Agentes ativos', + dominantTone: 'Tom dominante', + twitterLive: 'Twitter — Ao vivo', + redditLive: 'Reddit — Ao vivo', + roundTimeline: 'Timeline de rodadas — sentimento acumulado', + topicsTrending: 'Tópicos em alta', + topAgents: 'Agentes mais influentes', + interactions: 'interações', + pause: 'Pausar', + stop: 'Encerrar', + viewReport: 'Ver Relatório' + }, + sentiment: { + label: 'Sentimento', + positive: 'Positivo', + neutral: 'Neutro', + negative: 'Negativo', + general: 'Sentimento geral', + twitter: 'Sentimento — Twitter', + reddit: 'Sentimento — Reddit' + }, + report: { + title: 'Relatório', + executiveSummary: 'Sumário Executivo', + generatedBy: 'Gerado pelo ReportAgent', + mainPrediction: 'Predição principal', + detailedInsights: 'Insights detalhados', + confidence: 'Confiança', + basedOn: 'Baseado em', + agentInteractions: 'interações de agentes', + metrics: 'Principais métricas', + agentsReached: 'Agentes alcançados', + postsGenerated: 'Posts gerados', + purchaseIntent: 'Intenção de compra', + viralProbability: 'Probabilidade de viral', + emergingKeywords: 'Palavras-chave emergentes', + exportPdf: 'Exportar PDF', + interviewAgents: 'Entrevistar Agentes', + tag: { + opportunity: 'Oportunidade', + risk: 'Risco', + observation: 'Observação', + neutral: 'Neutro' + } + }, + interaction: { + title: 'Entrevistar Agentes', + selectAgent: 'Agente', + interviewAll: 'Entrevistar todos', + allAgents: 'Todos os agentes', + influence: 'Influência', + sendMessage: 'Enviar pergunta ao agente...', + send: 'Enviar', + groupQuestion: 'Pergunta para todos os agentes ao mesmo tempo...', + sendToAll: 'Enviar para todos os agentes', + roundLabel: 'rodada', + you: 'Você' + }, + errors: { + uploadFailed: 'Falha no upload. Tente novamente.', + simulationFailed: 'Erro ao criar simulação.', + networkError: 'Erro de conexão. Verifique sua rede.', + notFound: 'Recurso não encontrado.', + generic: 'Algo deu errado. Tente novamente.' + }, + general: { + loading: 'Carregando...', + save: 'Salvar', + cancel: 'Cancelar', + confirm: 'Confirmar', + delete: 'Excluir', + edit: 'Editar', + view: 'Visualizar', + close: 'Fechar', + yes: 'Sim', + no: 'Não', + back: 'Voltar', + next: 'Próximo', + finish: 'Concluir', + agents: 'agentes', + rounds: 'rodadas', + hours: 'horas', + now: 'agora', + min: 'min', + ago: 'atrás', + byItcast: 'by itcast', + productName: 'AUGUR' + } } } -const savedLocale = localStorage.getItem('locale') || 'zh' - -const i18n = createI18n({ +export default createI18n({ legacy: false, - locale: savedLocale, - fallbackLocale: 'zh', + locale: 'pt-BR', + fallbackLocale: 'pt-BR', messages }) - -export { availableLocales } -export default i18n diff --git a/frontend/src/main.js b/frontend/src/main.js index cc3d101e4e..50b716c964 100644 --- a/frontend/src/main.js +++ b/frontend/src/main.js @@ -2,10 +2,9 @@ import { createApp } from 'vue' import App from './App.vue' import router from './router' import i18n from './i18n' +import './assets/design-system.css' const app = createApp(App) - app.use(router) app.use(i18n) - app.mount('#app') diff --git a/frontend/src/router/index.js b/frontend/src/router/index.js index 62d2320122..be3431ba1b 100644 --- a/frontend/src/router/index.js +++ b/frontend/src/router/index.js @@ -1,52 +1,140 @@ import { createRouter, createWebHistory } from 'vue-router' -import Home from '../views/Home.vue' -import Process from '../views/MainView.vue' -import SimulationView from '../views/SimulationView.vue' +import DashboardView from '../views/DashboardView.vue' +import NovoProjetoView from '../views/NovoProjetoView.vue' +import ProjetoView from '../views/ProjetoView.vue' +import SimulationView from '../views/SimulationView.vue' import SimulationRunView from '../views/SimulationRunView.vue' -import ReportView from '../views/ReportView.vue' -import InteractionView from '../views/InteractionView.vue' +import ReportView from '../views/ReportView.vue' +import InteractionView from '../views/InteractionView.vue' +import GraphView from '../views/GraphView.vue' +import AgentesView from '../views/AgentesView.vue' +import InfluentesView from '../views/InfluentesView.vue' +import CompararView from '../views/CompararView.vue' +import PublicReportView from '../views/PublicReportView.vue' +import AgentProfileView from '../views/AgentProfileView.vue' +import PostsTimelineView from '../views/PostsTimelineView.vue' +import DemoReportView from '../views/DemoReportView.vue' +import AgentLibraryView from '../views/AgentLibraryView.vue' const routes = [ { path: '/', - name: 'Home', - component: Home + name: 'Dashboard', + component: DashboardView }, { - path: '/process/:projectId', - name: 'Process', - component: Process, + path: '/projeto/novo', + name: 'NovoProjeto', + component: NovoProjetoView + }, + { + path: '/projeto/:projectId', + name: 'Projeto', + component: ProjetoView, props: true }, { - path: '/simulation/:simulationId', - name: 'Simulation', + path: '/novo', + redirect: '/projeto/novo' + }, + { + path: '/simulacao/:projectId', + name: 'Simulacao', component: SimulationView, props: true }, { - path: '/simulation/:simulationId/start', - name: 'SimulationRun', + path: '/simulacao/:simulationId/executar', + name: 'Execucao', component: SimulationRunView, props: true }, { - path: '/report/:reportId', - name: 'Report', + // Agentes da simulacao — grid com perfis + path: '/simulacao/:simulationId/agentes', + name: 'Agentes', + component: AgentesView, + props: true + }, + { + // Ranking de influencia + mapa de coalizoes + path: '/simulacao/:simulationId/influentes', + name: 'Influentes', + component: InfluentesView, + props: true + }, + { + // Perfil completo de um agente + path: '/simulacao/:simulationId/agente/:agentId', + name: 'AgentProfile', + component: AgentProfileView, + props: true + }, + { + // Timeline de todos os posts + path: '/simulacao/:simulationId/posts', + name: 'PostsTimeline', + component: PostsTimelineView, + props: true + }, + { + path: '/relatorio/:reportId', + name: 'Relatorio', component: ReportView, props: true }, { - path: '/interaction/:reportId', - name: 'Interaction', + // Chat com ReportAgent + path: '/agentes/:reportId', + name: 'ChatAgentes', component: InteractionView, props: true + }, + { + path: '/projeto/:projectId/grafo', + name: 'Grafo', + component: GraphView, + props: true + }, + { + path: '/comparar', + name: 'Comparar', + component: CompararView + }, + { + // Link público do relatório (sem sidebar, sem auth) + path: '/r/:token', + name: 'PublicReport', + component: PublicReportView, + props: true, + meta: { public: true } + }, + { + path: '/agentes-biblioteca', + name: 'AgentLibrary', + component: AgentLibraryView + }, + { + path: '/relatorio-publico/:code', + name: 'PublicReport', + component: PublicReportView, + meta: { public: true } + }, + { + path: '/demo', + name: 'Demo', + component: DemoReportView + }, + { + path: '/:pathMatch(.*)*', + redirect: '/' } ] -const router = createRouter({ +export default createRouter({ history: createWebHistory(), - routes + routes, + scrollBehavior() { + return { top: 0 } + } }) - -export default router diff --git a/frontend/src/store/pendingUpload.js b/frontend/src/store/pendingUpload.js index 958c3d0a6e..f099e38487 100644 --- a/frontend/src/store/pendingUpload.js +++ b/frontend/src/store/pendingUpload.js @@ -1,6 +1,6 @@ /** - * 临时存储待上传的文件和需求 - * 用于首页点击启动引擎后立即跳转,在Process页面再进行API调用 + * + * ProcessAPI */ import { reactive } from 'vue' diff --git a/frontend/src/views/AgentLibraryView.vue b/frontend/src/views/AgentLibraryView.vue new file mode 100644 index 0000000000..0b21d15505 --- /dev/null +++ b/frontend/src/views/AgentLibraryView.vue @@ -0,0 +1,398 @@ + + + + + diff --git a/frontend/src/views/AgentPreview.vue b/frontend/src/views/AgentPreview.vue new file mode 100644 index 0000000000..3680018990 --- /dev/null +++ b/frontend/src/views/AgentPreview.vue @@ -0,0 +1,269 @@ + + + + + diff --git a/frontend/src/views/AgentProfileView.vue b/frontend/src/views/AgentProfileView.vue new file mode 100644 index 0000000000..f447fa89ce --- /dev/null +++ b/frontend/src/views/AgentProfileView.vue @@ -0,0 +1,129 @@ + + + diff --git a/frontend/src/views/AgentesView.vue b/frontend/src/views/AgentesView.vue new file mode 100644 index 0000000000..3680018990 --- /dev/null +++ b/frontend/src/views/AgentesView.vue @@ -0,0 +1,269 @@ + + + + + diff --git a/frontend/src/views/CompararView.vue b/frontend/src/views/CompararView.vue new file mode 100644 index 0000000000..8587ea99a9 --- /dev/null +++ b/frontend/src/views/CompararView.vue @@ -0,0 +1,326 @@ + + + + + diff --git a/frontend/src/views/DashboardView.vue b/frontend/src/views/DashboardView.vue new file mode 100644 index 0000000000..ba53b2eea1 --- /dev/null +++ b/frontend/src/views/DashboardView.vue @@ -0,0 +1,380 @@ + + + + + diff --git a/frontend/src/views/DemoReportView.vue b/frontend/src/views/DemoReportView.vue new file mode 100644 index 0000000000..511bf15be5 --- /dev/null +++ b/frontend/src/views/DemoReportView.vue @@ -0,0 +1,361 @@ + + + + + diff --git a/frontend/src/views/GraphView.vue b/frontend/src/views/GraphView.vue new file mode 100644 index 0000000000..822817ffc2 --- /dev/null +++ b/frontend/src/views/GraphView.vue @@ -0,0 +1,585 @@ + + + + + diff --git a/frontend/src/views/Home.vue b/frontend/src/views/Home.vue deleted file mode 100644 index ca7ef6ff61..0000000000 --- a/frontend/src/views/Home.vue +++ /dev/null @@ -1,953 +0,0 @@ - - - - - - - diff --git a/frontend/src/views/InfluentesView.vue b/frontend/src/views/InfluentesView.vue new file mode 100644 index 0000000000..be06352330 --- /dev/null +++ b/frontend/src/views/InfluentesView.vue @@ -0,0 +1,231 @@ + + + + + diff --git a/frontend/src/views/InteractionView.vue b/frontend/src/views/InteractionView.vue index 7e555b26e4..3c8e7f4792 100644 --- a/frontend/src/views/InteractionView.vue +++ b/frontend/src/views/InteractionView.vue @@ -1,355 +1,621 @@ - - + messages.value.push({ role: 'user', text: msg, modo: modo.value, ts: Date.now() }) + prompt.value = '' + enviando.value = true + scrollToBottom() - diff --git a/frontend/src/views/MainView.vue b/frontend/src/views/MainView.vue deleted file mode 100644 index 513c70d833..0000000000 --- a/frontend/src/views/MainView.vue +++ /dev/null @@ -1,545 +0,0 @@ - - - - - diff --git a/frontend/src/views/NewSimulationView.vue b/frontend/src/views/NewSimulationView.vue new file mode 100644 index 0000000000..84e0cc0690 --- /dev/null +++ b/frontend/src/views/NewSimulationView.vue @@ -0,0 +1,695 @@ + + +