google · DonggeLiu · Apr 10, 2025 · Mar 25, 2025 · Mar 25, 2025 · Mar 25, 2025
diff --git a/agent/coverage_analyzer.py b/agent/coverage_analyzer.py
@@ -14,8 +14,132 @@
 """An LLM agent to analyze and provide insight of a fuzz target's low coverage.
 Use it as a usual module locally, or as script in cloud builds.
 """
+import os
+from typing import Optional
+
+import logger
 from agent.base_agent import BaseAgent
+from experiment.workdir import WorkDirs
+from llm_toolkit import prompt_builder
+from llm_toolkit.prompt_builder import CoverageAnalyzerTemplateBuilder
+from llm_toolkit.prompts import Prompt
+from results import AnalysisResult, CoverageResult, Result, RunResult
+from tool.container_tool import ProjectContainerTool
+
+INVALID_PRMOT_PATH = os.path.join('prompts', 'agent',
+                                  'coverage-analyzer-invalid-response.txt')
 
 
 class CoverageAnalyzer(BaseAgent):
-  pass
+  """The Agent to refine a compilable fuzz target for higher coverage."""
+
+  def _initial_prompt(self, results: list[Result]) -> Prompt:
+    """Constructs initial prompt of the agent."""
+    last_result = results[-1]
+    benchmark = last_result.benchmark
+
+    if not isinstance(last_result, RunResult):
+      logger.error('The last result in %s is not RunResult: %s',
+                   self.name,
+                   results,
+                   trial=self.trial)
+      return Prompt()
+
+    builder = CoverageAnalyzerTemplateBuilder(self.llm, benchmark, last_result)
+    prompt = builder.build(example_pair=[],
+                           tool_guides=self.inspect_tool.tutorial(),
+                           project_dir=self.inspect_tool.project_dir)
+    # TODO: A different file name/dir.
+    prompt.save(self.args.work_dirs.prompt)
+
+    return prompt
+
+  def _container_handle_conclusion(self, cur_round: int, response: str,
+                                   coverage_result: CoverageResult,
+                                   prompt: Prompt) -> Optional[Prompt]:
+    """Runs a compilation tool to validate the new fuzz target and build script
+    from LLM."""
+    conclusion = self._parse_tag(response, 'conclusion')
+    if not conclusion:
+      return prompt
+    logger.info('----- ROUND %02d Received conclusion -----',
+                cur_round,
+                trial=self.trial)
+
+    coverage_result.improve_required = conclusion.strip().lower() == 'true'
+    coverage_result.insight = self._parse_tag(response, 'insights')
+    coverage_result.suggestions = self._parse_tag(response, 'suggestions')
+
+    return None
+
+  def _container_tool_reaction(
+      self, cur_round: int, response: str, run_result: RunResult,
+      coverage_result: CoverageResult) -> Optional[Prompt]:
+    """Validates LLM conclusion or executes its command."""
+    del run_result
+    prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([])
+
+    prompt = self._container_handle_bash_commands(response, self.inspect_tool,
+                                                  prompt)
+    # Only report conclusion when no more bash investigation is required.
+    if not prompt.gettext():
+      # Then build fuzz target.
+      prompt = self._container_handle_conclusion(cur_round, response,
+                                                 coverage_result, prompt)
+      if prompt is None:
+        # Succeeded.
+        return None
+
+    # Finally check invalid responses.
+    if not response or not prompt.get():
+      prompt = self._container_handle_invalid_tool_usage(
+          self.inspect_tool, cur_round, response, prompt)
+      with open(INVALID_PRMOT_PATH, 'r') as prompt_file:
+        prompt.append(prompt_file.read())
+
+    return prompt
+
+  def execute(self, result_history: list[Result]) -> AnalysisResult:
+    """Executes the agent to analyze the root cause to the low coverage."""
+    WorkDirs(self.args.work_dirs.base, keep=True)
+    last_result = result_history[-1]
+    assert isinstance(last_result, RunResult)
+
+    logger.info('Executing %s', self.name, trial=last_result.trial)
+    benchmark = last_result.benchmark
+    # TODO(dongge): Use the generated fuzz target and build script here.
+    self.inspect_tool = ProjectContainerTool(benchmark, name='inspect')
+    self.inspect_tool.write_to_file(content=last_result.fuzz_target_source,
+                                    file_path=benchmark.target_path)
+    if last_result.build_script_source:
+      self.inspect_tool.write_to_file(
+          content=last_result.build_script_source,
+          file_path=self.inspect_tool.build_script_path)
+    self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
+    cur_round = 1
+    coverage_result = CoverageResult()
+    prompt = self._initial_prompt(result_history)
+
+    try:
+      client = self.llm.get_chat_client(model=self.llm.get_model())
+      while prompt and cur_round < self.max_round:
+        response = self.chat_llm(cur_round,
+                                 client=client,
+                                 prompt=prompt,
+                                 trial=last_result.trial)
+        prompt = self._container_tool_reaction(cur_round, response, last_result,
+                                               coverage_result)
+        cur_round += 1
+    finally:
+      # Cleanup: stop and remove the container
+      logger.debug('Stopping and removing the inspect container %s',
+                   self.inspect_tool.container_id,
+                   trial=last_result.trial)
+      self.inspect_tool.terminate()
+
+    analysis_result = AnalysisResult(
+        author=self,
+        run_result=last_result,
+        coverage_result=coverage_result,
+        chat_history={self.name: coverage_result.to_dict()})
+    return analysis_result
diff --git a/agent/enhancer.py b/agent/enhancer.py
@@ -16,8 +16,10 @@
 """
 import logger
 from agent.prototyper import Prototyper
-from llm_toolkit.prompt_builder import EnhancerTemplateBuilder, JvmFixingBuilder
-from llm_toolkit.prompts import Prompt
+from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder,
+                                        EnhancerTemplateBuilder,
+                                        JvmFixingBuilder)
+from llm_toolkit.prompts import Prompt, TextPrompt
 from results import AnalysisResult, BuildResult, Result
 
 
@@ -52,9 +54,27 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
                                  last_result.run_result.fuzz_target_source, [])
       prompt = builder.build([], None, None)
     else:
-      error_desc, errors = last_result.semantic_result.get_error_info()
-      builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build_result,
-                                        error_desc, errors)
+      # TODO(dongge): Refine this logic.
+      if last_result.semantic_result:
+        error_desc, errors = last_result.semantic_result.get_error_info()
+        builder = EnhancerTemplateBuilder(self.llm, benchmark,
+                                          last_build_result, error_desc, errors)
+      elif last_result.coverage_result:
+        builder = CoverageEnhancerTemplateBuilder(
+            self.llm,
+            benchmark,
+            last_build_result,
+            coverage_result=last_result.coverage_result)
+      else:
+        logger.error(
+            'Last result does not contain either semantic result or '
+            'coverage result',
+            trial=self.trial)
+        # TODO(dongge): Give some default initial prompt.
+        prompt = TextPrompt(
+            'Last result does not contain either semantic result or '
+            'coverage result')
+        return prompt
       prompt = builder.build(example_pair=[],
                              tool_guides=self.inspect_tool.tutorial(),
                              project_dir=self.inspect_tool.project_dir)

diff --git a/agent/one_prompt_enhancer.py b/agent/one_prompt_enhancer.py
@@ -42,14 +42,25 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
                                  last_result.run_result.fuzz_target_source, [])
       prompt = builder.build([], None, None)
     else:
-      error_desc, errors = last_result.semantic_result.get_error_info()
+      # TODO(dongge): Refine this logic.
       builder = DefaultTemplateBuilder(self.llm)
-      prompt = builder.build_fixer_prompt(benchmark,
-                                          last_result.fuzz_target_source,
-                                          error_desc,
-                                          errors,
-                                          context='',
-                                          instruction='')
+      if last_result.semantic_result:
+        error_desc, errors = last_result.semantic_result.get_error_info()
+        prompt = builder.build_fixer_prompt(benchmark,
+                                            last_result.fuzz_target_source,
+                                            error_desc,
+                                            errors,
+                                            context='',
+                                            instruction='')
+      else:
+        prompt = builder.build_fixer_prompt(
+            benchmark=benchmark,
+            raw_code=last_result.fuzz_target_source,
+            error_desc='',
+            errors=[],
+            coverage_result=last_result.coverage_result,
+            context='',
+            instruction='')
       # TODO: A different file name/dir.
       prompt.save(self.args.work_dirs.prompt)
 

diff --git a/agent/one_prompt_prototyper.py b/agent/one_prompt_prototyper.py
@@ -150,8 +150,11 @@ def _advice_fuzz_target(self, build_result: BuildResult,
       instruction = code_fixer.collect_instructions(
           build_result.benchmark, errors, build_result.fuzz_target_source)
       prompt = builder.build_fixer_prompt(build_result.benchmark,
-                                          build_result.fuzz_target_source, '',
-                                          errors, context, instruction)
+                                          build_result.fuzz_target_source,
+                                          '',
+                                          errors,
+                                          context=context,
+                                          instruction=instruction)
 
     return prompt
 

diff --git a/agent/prototyper.py b/agent/prototyper.py
@@ -154,18 +154,12 @@ def _validate_fuzz_target_and_build_script_via_compile(
     compilation_tool = ProjectContainerTool(benchmark=benchmark)
 
     # Replace fuzz target and build script in the container.
-    replace_file_content_command = (
-        'cat << "OFG_EOF" > {file_path}\n{file_content}\nOFG_EOF')
-    compilation_tool.execute(
-        replace_file_content_command.format(
-            file_path=benchmark.target_path,
-            file_content=build_result.fuzz_target_source))
-
+    compilation_tool.write_to_file(content=build_result.fuzz_target_source,
+                                   file_path=benchmark.target_path)
     if build_result.build_script_source:
-      compilation_tool.execute(
-          replace_file_content_command.format(
-              file_path='/src/build.sh',
-              file_content=build_result.build_script_source))
+      compilation_tool.write_to_file(
+          content=build_result.build_script_source,
+          file_path=compilation_tool.build_script_path)
 
     # Recompile.
     logger.info('===== ROUND %02d Recompile =====',

diff --git a/agent/semantic_analyzer.py b/agent/semantic_analyzer.py
@@ -14,7 +14,6 @@
 """An LLM agent to generate a simple fuzz target prototype that can build.
 Use it as a usual module locally, or as script in cloud builds.
 """
-import os
 import re
 from collections import defaultdict, namedtuple
 from typing import Optional
@@ -61,11 +60,8 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
     last_result = result_history[-1]
     assert isinstance(last_result, RunResult)
 
-    with open(
-        os.path.join(last_result.work_dirs.run_logs, f'{self.trial:02}.log'),
-        'rb') as fuzzer_log:
-      _, _, _, _, semantic_result = self._parse_libfuzzer_logs(
-          fuzzer_log, last_result.benchmark.project)
+    _, _, _, _, semantic_result = self._parse_libfuzzer_logs(
+        last_result.run_log, last_result.benchmark.project)
 
     analysis_result = AnalysisResult(
         author=self,
@@ -75,24 +71,13 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
     return analysis_result
 
   def _parse_libfuzzer_logs(self,
-                            log_handle,
+                            fuzzlog,
                             project_name: str,
                             check_cov_increase: bool = True) -> ParseResult:
     """Parses libFuzzer logs."""
     lines = None
-    try:
-      fuzzlog = log_handle.read(-1)
-      # Some crashes can mess up the libfuzzer output and raise decode error.
-      fuzzlog = fuzzlog.decode('utf-8', errors='ignore')
-      lines = fuzzlog.split('\n')
-    except MemoryError as e:
-      # Some logs from abnormal fuzz targets are too large to be parsed.
-      logger.error('%s is too large to parse: %s',
-                   log_handle.name,
-                   e,
-                   trial=self.trial)
-      return ParseResult(0, 0, False, '',
-                         SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP))
+    # Some crashes can mess up the libfuzzer output and raise decode error.
+    lines = fuzzlog.split('\n')
 
     cov_pcs, total_pcs, crashes = 0, 0, False
 

diff --git a/common/cloud_builder.py b/common/cloud_builder.py
@@ -53,6 +53,7 @@ class CloudBuilder:
 
   def __init__(self, args: argparse.Namespace) -> None:
     self.tags = ['ofg', 'agent', args.cloud_experiment_name]
+    self.exp_args = args
     self.credentials, self.project_id = default()
     assert self.project_id, 'Cloud experiment requires a Google cloud project.'
     assert hasattr(
@@ -96,17 +97,21 @@ def _upload_to_gcs(self, local_file_path: str) -> str:
     logging.info('Uploaded %s to %s', local_file_path, bucket_file_url)
     return bucket_file_url
 
-  def _prepare_and_upload_archive(self) -> str:
+  def _prepare_and_upload_archive(self, result_history: list[Result]) -> str:
     """Archives and uploads local OFG repo to cloud build."""
-    files_in_dir = set(
+    dir_files = set(
         os.path.relpath(os.path.join(root, file))
         for root, _, files in os.walk(OFG_ROOT_DIR)
         for file in files)
-    files_in_git = set(
+    git_files = set(
         subprocess.check_output(['git', 'ls-files'],
                                 cwd=OFG_ROOT_DIR,
                                 text=True).splitlines())
-    file_to_upload = list(files_in_dir & files_in_git)
+    result_files = set(
+        os.path.relpath(os.path.join(root, file))
+        for root, _, files in os.walk(result_history[-1].work_dirs.base)
+        for file in files)
+    file_to_upload = list((dir_files & git_files) | result_files)
 
     return self._upload_files(f'ofg-repo-{uuid.uuid4().hex}.tar.gz',
                               OFG_ROOT_DIR, file_to_upload)
@@ -363,7 +368,7 @@ def run(self, agent: BaseAgent, result_history: list[Result],
     self.tags += [
         str(agent),
         str(result_history[-1].benchmark.project),
-        # TODO(dongge): A tag for function name, compatible with tag format.
+        str(result_history[-1].benchmark.function_name),
         str(result_history[-1].trial)
     ]
     # Step1: Generate dill files.
@@ -374,7 +379,7 @@ def run(self, agent: BaseAgent, result_history: list[Result],
     # TODO(dongge): Encrypt dill files?
 
     # Step 2: Upload OFG repo and dill files to GCS.
-    ofg_url = self._prepare_and_upload_archive()
+    ofg_url = self._prepare_and_upload_archive(result_history)
     agent_url = self._upload_to_gcs(agent_dill)
     results_url = self._upload_to_gcs(results_dill)
     oss_fuzz_data_url = self._upload_oss_fuzz_data()

diff --git a/experiment/builder_runner.py b/experiment/builder_runner.py
@@ -127,6 +127,7 @@ def _libfuzzer_args(self) -> list[str]:
         '-len_control=0',
         # Timeout per testcase.
         '-timeout=30',
+        '-detect_leaks=0',
     ]
 
   def _get_minimum_func_name(self, func_sig: str) -> str:
@@ -922,19 +923,24 @@ def build_and_run_cloud(
         f'--real_project={project_name}',
     ]
 
-    if oss_fuzz_checkout.ENABLE_CACHING and (
-        oss_fuzz_checkout.is_image_cached(project_name, 'address') and
-        oss_fuzz_checkout.is_image_cached(project_name, 'coverage')):
-      logger.info('Using cached image for %s', project_name)
-      command.append('--use_cached_image')
-
-      # Overwrite the Dockerfile to be caching friendly
-      # We hardcode 'address' here, but this is irrelevant and will be
-      # overridden later via a Docker argument.
-      oss_fuzz_checkout.rewrite_project_to_cached_project(
-          project_name, generated_project, 'address')
-      oss_fuzz_checkout.prepare_build(project_name, 'address',
-                                      generated_project)
+    # TODO(dongge): Reenable caching when build script is not modified.
+    # Current caching is not applicable when OFG modifies the build script,
+    # There is no simple way to check if the build script has been modified,
+    # but this feature should be added later.
+    # and fails to build the project (particularly with coverage sanitizer).
+    # if oss_fuzz_checkout.ENABLE_CACHING and (
+    #     oss_fuzz_checkout.is_image_cached(project_name, 'address') and
+    #     oss_fuzz_checkout.is_image_cached(project_name, 'coverage')):
+    #   logger.info('Using cached image for %s', project_name)
+    #   command.append('--use_cached_image')
+
+    #   # Overwrite the Dockerfile to be caching friendly
+    #   # We hardcode 'address' here, but this is irrelevant and will be
+    #   # overridden later via a Docker argument.
+    #   oss_fuzz_checkout.rewrite_project_to_cached_project(
+    #       project_name, generated_project, 'address')
+    #   oss_fuzz_checkout.prepare_build(project_name, 'address',
+    #                                   generated_project)
 
     if cloud_build_tags:
       command += ['--tags'] + cloud_build_tags