optimizations to the agentic workflow - force rerunning d8 upon extracting crash from DB, etc...

Zia-Rashid · Zia-Rashid · commit b5357f92ea1c · 2026-03-12T22:14:53.000Z
diff --git a/Sources/Agentic_System/agents/EBG_crash.py b/Sources/Agentic_System/agents/EBG_crash.py
@@ -240,6 +240,7 @@ def setup_agents(self, crash_program_hash: Optional[str] = None):
                 execute_javascript_program_tool,
                 list_d8_flags_tool,
                 list_v8_trace_options_tool,
+                trace_v8_analysis_tool,
                 read_from_generate_folder_tool,
                 list_generate_folder_tool,
             ],
diff --git a/Sources/Agentic_System/prompts/EBG-crash-prompts/runtime_analyzer.txt b/Sources/Agentic_System/prompts/EBG-crash-prompts/runtime_analyzer.txt
@@ -31,9 +31,19 @@ which flags you need to use run list_v8_trace_options. Here your goal is to figu
 based on the information that was returned to you from the DB analyzer. Your goal should be to figure out what and how the
 JS programs runs and create a plan towards figuring out a path forward in terms of analzying the v8 code base to better understand 
 how to fix the system.
+You should execute d8 with tracing flags (via trace_v8_analysis and/or execute_javascript_program) before finalizing conclusions.
+If a crash hash is available, you must call trace_v8_analysis for that hash before Stage 5 and include concrete evidence from its stderr/stdout output.
+Your final Stage 5 answer must cite: trace_v8_analysis flags_used, return_code, and at least one raw crash line from trace output.
 
-If database evidence indicates synthetic/non-reproducible data (for example fake crash markers), do not launch heavy debugger flows.
-In that case, report the limitation and proceed with static/runtime trace evidence only.
+CRITICAL EVIDENCE PRIORITY:
+1) raw runtime artifacts (stderr, signal, fatal line, stack trace) from direct d8 execution and trace_v8_analysis
+2) trace output metadata from tools
+3) database summaries/aggregates
+
+Never let database aggregate summaries override contradictory raw runtime artifacts.
+If DB indicates "fake crash" but raw runtime has concrete crash evidence (signal/fatal/stack), classify as inconsistent DB state and continue analysis using raw evidence.
+
+Do not skip stages only because the crash is synthetic; gate interpretation confidence, not execution.
 
 
 ## STAGE 3:
diff --git a/Sources/Agentic_System/prompts/EBG-crash-prompts/variant_analysis.txt b/Sources/Agentic_System/prompts/EBG-crash-prompts/variant_analysis.txt
@@ -24,10 +24,19 @@ Look for similar instances of the crashing code across the codebase.
 Go through the returned RAG entries of V8Search and validate variants, if you can confirm the same bug
 exists in the code, please save the code as valid_variant to the RAG. 
 
+Evidence priority during validation:
+1) raw runtime crash artifacts from d8 runs (stderr/signal/fatal line/stack)
+2) trace metadata and tool outputs
+3) DB summaries
 
 ## STAGE 3
 After performing variant analysis, use `JSGenerator` and `Debugger` to create programs 
 that crash in a similar manner.
+You should run d8 with trace flags for crash confirmation context before declaring a variant valid.
+For every candidate variant marked valid, you must include one trace_v8_analysis or traced execute_javascript_program result with:
+- flags used
+- return code
+- stderr/stdout crash evidence
 
 Only call `Debugger` after you have a concrete JS artifact path and a specific hypothesis to test.
 Do not run debugger loops when there is no reproducible crash signal.
diff --git a/Sources/Agentic_System/prompts/EBG-crash-prompts/variant_manager.txt b/Sources/Agentic_System/prompts/EBG-crash-prompts/variant_manager.txt
@@ -22,6 +22,11 @@ Call `RuntimeAnalyzer` in the following format:
             PLEASE PROVIDE A REASON AND PROOF REGARDING YOUR REASONING."
 }
 
+CRITICAL EVIDENCE FOR STAGE 1 RESULTS:
+- Raw runtime crash artifacts (stderr, signal, fatal line, stack) from direct d8 traces.
+- If raw artifacts and DB summaries conflict, treat as DB inconsistency and continue with raw evidence.
+- If a crash is found/selected, RuntimeAnalyzer should include atleast one trace_v8_analysis result for that crash hash.
+- Stage 1 summary must include: flags used, return code, and quoted stderr/stdout evidence from trace_v8_analysis.
 
 ## STAGE 2 Call "VariantAnalysis"
 After you have received the reasoning behind the crash/bug, your goal is to call variant analysis: 
diff --git a/Sources/Agentic_System/tools/EBG_tools.py b/Sources/Agentic_System/tools/EBG_tools.py
@@ -5,6 +5,7 @@
 from fuzzywuzzy import fuzz
 from functools import wraps
 from decimal import Decimal
+from collections import OrderedDict
 
 import psycopg2
 import psycopg2.extras
@@ -13,6 +14,7 @@
 import hashlib
 import random
 import re
+import json
 
 # Environment variables (optional, for remote PostgreSQL):
 #   - POSTGRES_HOST: Remote PostgreSQL host/IP (if set, connects to remote instead of local container)
@@ -46,6 +48,8 @@
 
 # Lazy initialization - folder is created only when needed, not at module import
 _GENERATE_FOLDER_HASHS = None
+_DB_QUERY_CACHE = OrderedDict()
+_DB_QUERY_CACHE_MAX = 128
 
 def _get_varianal_folder():
     """Get or create the variant analysis folder path (lazy initialization)."""
@@ -62,6 +66,69 @@ def json_serial(obj):
     raise TypeError(f"Type {type(obj)} not serializable")
 
 
+def _normalize_sql_whitespace(query: str) -> str:
+    return " ".join((query or "").strip().split())
+
+
+def _is_read_only_sql(query: str) -> bool:
+    normalized = _normalize_sql_whitespace(query).lower()
+    return normalized.startswith("select ") or normalized.startswith("with ") or normalized.startswith("explain ")
+
+
+def _build_cache_key(query: str, exec_params: tuple) -> str:
+    return f"{query}||{json.dumps(exec_params, default=str, ensure_ascii=True)}"
+
+
+def _cache_get(cache_key: str):
+    if cache_key not in _DB_QUERY_CACHE:
+        return None
+    _DB_QUERY_CACHE.move_to_end(cache_key)
+    return _DB_QUERY_CACHE[cache_key]
+
+
+def _cache_set(cache_key: str, value: str) -> None:
+    _DB_QUERY_CACHE[cache_key] = value
+    _DB_QUERY_CACHE.move_to_end(cache_key)
+    while len(_DB_QUERY_CACHE) > _DB_QUERY_CACHE_MAX:
+        _DB_QUERY_CACHE.popitem(last=False)
+
+
+def _validate_and_prepare_sql(query: str, params: list) -> tuple:
+    query = (query or "").strip()
+    params = [] if params is None else params
+    pg_matches = re.findall(r"\$(\d+)", query)
+    percent_placeholder_count = len(re.findall(r"(?<!%)%s", query))
+
+    if pg_matches and percent_placeholder_count:
+        return None, None, "Database error: mixed placeholder styles are not allowed (use either $n or %s)."
+
+    if pg_matches:
+        positions = [int(match) for match in pg_matches]
+        max_pos = max(positions)
+        if len(params) != max_pos:
+            return (
+                None,
+                None,
+                f"Database error: positional placeholder mismatch (expected exactly {max_pos} params for $ placeholders, got {len(params)}).",
+            )
+        normalized_query = re.sub(r"\$\d+", "%s", query)
+        exec_params = tuple(params[pos - 1] for pos in positions)
+        return normalized_query, exec_params, None
+
+    if percent_placeholder_count:
+        if len(params) != percent_placeholder_count:
+            return (
+                None,
+                None,
+                f"Database error: placeholder mismatch (expected exactly {percent_placeholder_count} params for %s placeholders, got {len(params)}).",
+            )
+        return query, tuple(params), None
+
+    if len(params) > 0:
+        return None, None, "Database error: query has no placeholders but params were provided."
+    return query, tuple(), None
+
+
 
 @tool
 def db_query(query: str, params: list = None) -> str:
@@ -77,22 +144,19 @@ def db_query(query: str, params: list = None) -> str:
     """ 
     conn = None
     try:
-        params = [] if params is None else params
-
-        # Accept PostgreSQL-style placeholders ($1, $2, ...) and normalize to psycopg2 (%s).
-        positional_matches = re.findall(r"\$(\d+)", query)
-        if positional_matches:
-            max_pos = max(int(m) for m in positional_matches)
-            if len(params) < max_pos:
-                return (
-                    f"Database error: not enough parameters for positional placeholders "
-                    f"(expected at least {max_pos}, got {len(params)})"
-                )
-            normalized_query = re.sub(r"\$\d+", "%s", query)
-            exec_params = tuple(params[int(m) - 1] for m in positional_matches)
+        normalized_query, exec_params, validation_error = _validate_and_prepare_sql(query, params)
+        if validation_error:
+            return validation_error
+
+        read_only_query = _is_read_only_sql(normalized_query)
+        cache_key = _build_cache_key(_normalize_sql_whitespace(normalized_query), exec_params)
+        if read_only_query:
+            cached = _cache_get(cache_key)
+            if cached is not None:
+                return cached
         else:
-            normalized_query = query
-            exec_params = tuple(params)
+            # Keep cache only for stable read-only data.
+            _DB_QUERY_CACHE.clear()
 
         conn = psycopg2.connect(
             host=POSTGRES_HOST,
@@ -103,8 +167,17 @@ def db_query(query: str, params: list = None) -> str:
         )
         cursor = conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
         cursor.execute(normalized_query, exec_params)
+        if cursor.description is None:
+            conn.commit()
+            return json.dumps(
+                {"status": "ok", "rows_affected": cursor.rowcount},
+                default=json_serial,
+                indent=2,
+            )
         rows = cursor.fetchall()
         result_json = json.dumps(rows, default=json_serial, indent=2)
+        if read_only_query:
+            _cache_set(cache_key, result_json)
         return result_json
     except psycopg2.Error as e:
         return f"Database error: {e}"
@@ -926,6 +999,10 @@ def trace_v8_analysis(
     with open(filepath_js, "w") as f:
         f.write(js_code)
     
+    # Enforce baseline tracing when caller does not specify tracing options.
+    if presets is None and custom_flags is None:
+        presets = ["tiering", "maglev", "ignition"]
+
     flags = ["--allow-natives-syntax"]
     
     if presets:
@@ -1165,6 +1242,7 @@ def db_store_generated_program(js_program: str, fuzzer_id: int) -> str:
         # Fetch the inserted row
         row = cursor.fetchone()
         conn.commit()
+        _DB_QUERY_CACHE.clear()
         
         # If row is None, the program already existed (conflict)
         if row is None:
diff --git a/Sources/Agentic_System/tools/FoG_tools_ika.py b/Sources/Agentic_System/tools/FoG_tools_ika.py
@@ -1633,11 +1633,24 @@ def _execute_javascript_program_executor(params: dict) -> str:
     if not template_js_path:
         return "Error: template_js_path parameter is required"
 
-    if "--allow-natives-syntax" not in d8_flags:
-        d8_flags += " --allow-natives-syntax"
+    required_flags = [
+        "--allow-natives-syntax",
+        "--trace-opt",
+        "--trace-deopt",
+        "--trace-maglev-graph-building",
+        "--print-bytecode",
+    ]
+    for flag in required_flags:
+        if flag not in d8_flags:
+            d8_flags += f" {flag}"
+    d8_flags = d8_flags.strip()
 
     d8 = run_command(f"{D8_PATH} {d8_flags} {template_js_path}")
-    return f"Program execution result:\n{d8.stderr}\n{d8.stdout}"
+    return (
+        "Program execution result:\n"
+        f"[flags used] {d8_flags}\n"
+        f"{d8.stderr}\n{d8.stdout}"
+    )
 
 execute_javascript_program_tool = IkaTools(
     name="execute_javascript_program",