Skip to content

Coverage analysis agent #928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Apr 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
871aaf6
A draft coverage analyzer
DonggeLiu Mar 25, 2025
2982c64
We are not interested in mem leaks
DonggeLiu Mar 25, 2025
0f08775
Prompt builder for coverage analyzer
DonggeLiu Mar 25, 2025
efc14e0
Adjust coverage result and analysis result
DonggeLiu Mar 25, 2025
0a93b3f
Call the corresponding agent for specific tasks
DonggeLiu Mar 25, 2025
da5711f
Save run log into runresult
DonggeLiu Mar 25, 2025
7b9627c
Add coverage analyzer for experiments
DonggeLiu Mar 25, 2025
990fe6d
temp fix for enhancer and one_prompt_prototyper
DonggeLiu Mar 25, 2025
e4711d0
bug fix
DonggeLiu Mar 25, 2025
61b0b24
Unify agent selection and execution statements
DonggeLiu Mar 25, 2025
683f658
Priming template for coverage analyzer
DonggeLiu Mar 25, 2025
d9cdb38
Use saved run log, the text file will not exist in cloud build agents
DonggeLiu Mar 25, 2025
2937278
Add tool guide
DonggeLiu Mar 25, 2025
93480fb
a todo
DonggeLiu Mar 25, 2025
80b95fe
fix type error
DonggeLiu Mar 25, 2025
6fa3cbf
Refine enhancer prompt and truncate overlong run log
DonggeLiu Mar 26, 2025
ca25e6f
bug fix
DonggeLiu Mar 26, 2025
90e0366
Do not save full fuzzing log to avoid OOM
DonggeLiu Mar 26, 2025
5d8c0ac
Improve prompt
DonggeLiu Mar 26, 2025
7439cc9
Upload the result dir + tag agent cloud build by function name
DonggeLiu Mar 27, 2025
c5a2ef0
Bug fix: use unique ID, only upload the required result files
DonggeLiu Mar 27, 2025
f372a5f
organize cached image and oss_fuzz project path
DonggeLiu Mar 28, 2025
fe330f6
More info in report via chathistory
DonggeLiu Mar 29, 2025
5c2c80e
temp disable caching
DonggeLiu Mar 29, 2025
6185465
temp disable caching
DonggeLiu Mar 29, 2025
217079e
Revert 'temp disable caching'
DonggeLiu Apr 2, 2025
ef0858c
Disable cache for execution stage only
DonggeLiu Apr 2, 2025
67555bc
Put coverage analyzer invalid response prompt into a file
DonggeLiu Apr 8, 2025
3ac54d7
A function to write content to a file in container, using it to
DonggeLiu Apr 8, 2025
b1ad6f5
Explain why caching is disabled for execution stage
DonggeLiu Apr 8, 2025
cc529af
Fix bug
DonggeLiu Apr 8, 2025
2066f61
Merge branch 'main' into cov-agent
DonggeLiu Apr 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 125 additions & 1 deletion agent/coverage_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,132 @@
"""An LLM agent to analyze and provide insight of a fuzz target's low coverage.
Use it as a usual module locally, or as script in cloud builds.
"""
import os
from typing import Optional

import logger
from agent.base_agent import BaseAgent
from experiment.workdir import WorkDirs
from llm_toolkit import prompt_builder
from llm_toolkit.prompt_builder import CoverageAnalyzerTemplateBuilder
from llm_toolkit.prompts import Prompt
from results import AnalysisResult, CoverageResult, Result, RunResult
from tool.container_tool import ProjectContainerTool

INVALID_PRMOT_PATH = os.path.join('prompts', 'agent',
'coverage-analyzer-invalid-response.txt')


class CoverageAnalyzer(BaseAgent):
pass
"""The Agent to refine a compilable fuzz target for higher coverage."""

def _initial_prompt(self, results: list[Result]) -> Prompt:
"""Constructs initial prompt of the agent."""
last_result = results[-1]
benchmark = last_result.benchmark

if not isinstance(last_result, RunResult):
logger.error('The last result in %s is not RunResult: %s',
self.name,
results,
trial=self.trial)
return Prompt()

builder = CoverageAnalyzerTemplateBuilder(self.llm, benchmark, last_result)
prompt = builder.build(example_pair=[],
tool_guides=self.inspect_tool.tutorial(),
project_dir=self.inspect_tool.project_dir)
# TODO: A different file name/dir.
prompt.save(self.args.work_dirs.prompt)

return prompt

def _container_handle_conclusion(self, cur_round: int, response: str,
coverage_result: CoverageResult,
prompt: Prompt) -> Optional[Prompt]:
"""Runs a compilation tool to validate the new fuzz target and build script
from LLM."""
conclusion = self._parse_tag(response, 'conclusion')
if not conclusion:
return prompt
logger.info('----- ROUND %02d Received conclusion -----',
cur_round,
trial=self.trial)

coverage_result.improve_required = conclusion.strip().lower() == 'true'
coverage_result.insight = self._parse_tag(response, 'insights')
coverage_result.suggestions = self._parse_tag(response, 'suggestions')

return None

def _container_tool_reaction(
self, cur_round: int, response: str, run_result: RunResult,
coverage_result: CoverageResult) -> Optional[Prompt]:
"""Validates LLM conclusion or executes its command."""
del run_result
prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([])

prompt = self._container_handle_bash_commands(response, self.inspect_tool,
prompt)
# Only report conclusion when no more bash investigation is required.
if not prompt.gettext():
# Then build fuzz target.
prompt = self._container_handle_conclusion(cur_round, response,
coverage_result, prompt)
if prompt is None:
# Succeeded.
return None

# Finally check invalid responses.
if not response or not prompt.get():
prompt = self._container_handle_invalid_tool_usage(
self.inspect_tool, cur_round, response, prompt)
with open(INVALID_PRMOT_PATH, 'r') as prompt_file:
prompt.append(prompt_file.read())

return prompt

def execute(self, result_history: list[Result]) -> AnalysisResult:
"""Executes the agent to analyze the root cause to the low coverage."""
WorkDirs(self.args.work_dirs.base, keep=True)
last_result = result_history[-1]
assert isinstance(last_result, RunResult)

logger.info('Executing %s', self.name, trial=last_result.trial)
benchmark = last_result.benchmark
# TODO(dongge): Use the generated fuzz target and build script here.
self.inspect_tool = ProjectContainerTool(benchmark, name='inspect')
self.inspect_tool.write_to_file(content=last_result.fuzz_target_source,
file_path=benchmark.target_path)
if last_result.build_script_source:
self.inspect_tool.write_to_file(
content=last_result.build_script_source,
file_path=self.inspect_tool.build_script_path)
self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
cur_round = 1
coverage_result = CoverageResult()
prompt = self._initial_prompt(result_history)

try:
client = self.llm.get_chat_client(model=self.llm.get_model())
while prompt and cur_round < self.max_round:
response = self.chat_llm(cur_round,
client=client,
prompt=prompt,
trial=last_result.trial)
prompt = self._container_tool_reaction(cur_round, response, last_result,
coverage_result)
cur_round += 1
finally:
# Cleanup: stop and remove the container
logger.debug('Stopping and removing the inspect container %s',
self.inspect_tool.container_id,
trial=last_result.trial)
self.inspect_tool.terminate()

analysis_result = AnalysisResult(
author=self,
run_result=last_result,
coverage_result=coverage_result,
chat_history={self.name: coverage_result.to_dict()})
return analysis_result
30 changes: 25 additions & 5 deletions agent/enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
"""
import logger
from agent.prototyper import Prototyper
from llm_toolkit.prompt_builder import EnhancerTemplateBuilder, JvmFixingBuilder
from llm_toolkit.prompts import Prompt
from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder,
EnhancerTemplateBuilder,
JvmFixingBuilder)
from llm_toolkit.prompts import Prompt, TextPrompt
from results import AnalysisResult, BuildResult, Result


Expand Down Expand Up @@ -52,9 +54,27 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
last_result.run_result.fuzz_target_source, [])
prompt = builder.build([], None, None)
else:
error_desc, errors = last_result.semantic_result.get_error_info()
builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build_result,
error_desc, errors)
# TODO(dongge): Refine this logic.
if last_result.semantic_result:
error_desc, errors = last_result.semantic_result.get_error_info()
builder = EnhancerTemplateBuilder(self.llm, benchmark,
last_build_result, error_desc, errors)
elif last_result.coverage_result:
builder = CoverageEnhancerTemplateBuilder(
self.llm,
benchmark,
last_build_result,
coverage_result=last_result.coverage_result)
else:
logger.error(
'Last result does not contain either semantic result or '
'coverage result',
trial=self.trial)
# TODO(dongge): Give some default initial prompt.
prompt = TextPrompt(
'Last result does not contain either semantic result or '
'coverage result')
return prompt
prompt = builder.build(example_pair=[],
tool_guides=self.inspect_tool.tutorial(),
project_dir=self.inspect_tool.project_dir)
Expand Down
25 changes: 18 additions & 7 deletions agent/one_prompt_enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,25 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
last_result.run_result.fuzz_target_source, [])
prompt = builder.build([], None, None)
else:
error_desc, errors = last_result.semantic_result.get_error_info()
# TODO(dongge): Refine this logic.
builder = DefaultTemplateBuilder(self.llm)
prompt = builder.build_fixer_prompt(benchmark,
last_result.fuzz_target_source,
error_desc,
errors,
context='',
instruction='')
if last_result.semantic_result:
error_desc, errors = last_result.semantic_result.get_error_info()
prompt = builder.build_fixer_prompt(benchmark,
last_result.fuzz_target_source,
error_desc,
errors,
context='',
instruction='')
else:
prompt = builder.build_fixer_prompt(
benchmark=benchmark,
raw_code=last_result.fuzz_target_source,
error_desc='',
errors=[],
coverage_result=last_result.coverage_result,
context='',
instruction='')
# TODO: A different file name/dir.
prompt.save(self.args.work_dirs.prompt)

Expand Down
7 changes: 5 additions & 2 deletions agent/one_prompt_prototyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,11 @@ def _advice_fuzz_target(self, build_result: BuildResult,
instruction = code_fixer.collect_instructions(
build_result.benchmark, errors, build_result.fuzz_target_source)
prompt = builder.build_fixer_prompt(build_result.benchmark,
build_result.fuzz_target_source, '',
errors, context, instruction)
build_result.fuzz_target_source,
'',
errors,
context=context,
instruction=instruction)

return prompt

Expand Down
16 changes: 5 additions & 11 deletions agent/prototyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,18 +154,12 @@ def _validate_fuzz_target_and_build_script_via_compile(
compilation_tool = ProjectContainerTool(benchmark=benchmark)

# Replace fuzz target and build script in the container.
replace_file_content_command = (
'cat << "OFG_EOF" > {file_path}\n{file_content}\nOFG_EOF')
compilation_tool.execute(
replace_file_content_command.format(
file_path=benchmark.target_path,
file_content=build_result.fuzz_target_source))

compilation_tool.write_to_file(content=build_result.fuzz_target_source,
file_path=benchmark.target_path)
if build_result.build_script_source:
compilation_tool.execute(
replace_file_content_command.format(
file_path='/src/build.sh',
file_content=build_result.build_script_source))
compilation_tool.write_to_file(
content=build_result.build_script_source,
file_path=compilation_tool.build_script_path)

# Recompile.
logger.info('===== ROUND %02d Recompile =====',
Expand Down
25 changes: 5 additions & 20 deletions agent/semantic_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"""An LLM agent to generate a simple fuzz target prototype that can build.
Use it as a usual module locally, or as script in cloud builds.
"""
import os
import re
from collections import defaultdict, namedtuple
from typing import Optional
Expand Down Expand Up @@ -61,11 +60,8 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
last_result = result_history[-1]
assert isinstance(last_result, RunResult)

with open(
os.path.join(last_result.work_dirs.run_logs, f'{self.trial:02}.log'),
'rb') as fuzzer_log:
_, _, _, _, semantic_result = self._parse_libfuzzer_logs(
fuzzer_log, last_result.benchmark.project)
_, _, _, _, semantic_result = self._parse_libfuzzer_logs(
last_result.run_log, last_result.benchmark.project)

analysis_result = AnalysisResult(
author=self,
Expand All @@ -75,24 +71,13 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
return analysis_result

def _parse_libfuzzer_logs(self,
log_handle,
fuzzlog,
project_name: str,
check_cov_increase: bool = True) -> ParseResult:
"""Parses libFuzzer logs."""
lines = None
try:
fuzzlog = log_handle.read(-1)
# Some crashes can mess up the libfuzzer output and raise decode error.
fuzzlog = fuzzlog.decode('utf-8', errors='ignore')
lines = fuzzlog.split('\n')
except MemoryError as e:
# Some logs from abnormal fuzz targets are too large to be parsed.
logger.error('%s is too large to parse: %s',
log_handle.name,
e,
trial=self.trial)
return ParseResult(0, 0, False, '',
SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP))
# Some crashes can mess up the libfuzzer output and raise decode error.
lines = fuzzlog.split('\n')

cov_pcs, total_pcs, crashes = 0, 0, False

Expand Down
17 changes: 11 additions & 6 deletions common/cloud_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class CloudBuilder:

def __init__(self, args: argparse.Namespace) -> None:
self.tags = ['ofg', 'agent', args.cloud_experiment_name]
self.exp_args = args
self.credentials, self.project_id = default()
assert self.project_id, 'Cloud experiment requires a Google cloud project.'
assert hasattr(
Expand Down Expand Up @@ -96,17 +97,21 @@ def _upload_to_gcs(self, local_file_path: str) -> str:
logging.info('Uploaded %s to %s', local_file_path, bucket_file_url)
return bucket_file_url

def _prepare_and_upload_archive(self) -> str:
def _prepare_and_upload_archive(self, result_history: list[Result]) -> str:
"""Archives and uploads local OFG repo to cloud build."""
files_in_dir = set(
dir_files = set(
os.path.relpath(os.path.join(root, file))
for root, _, files in os.walk(OFG_ROOT_DIR)
for file in files)
files_in_git = set(
git_files = set(
subprocess.check_output(['git', 'ls-files'],
cwd=OFG_ROOT_DIR,
text=True).splitlines())
file_to_upload = list(files_in_dir & files_in_git)
result_files = set(
os.path.relpath(os.path.join(root, file))
for root, _, files in os.walk(result_history[-1].work_dirs.base)
for file in files)
file_to_upload = list((dir_files & git_files) | result_files)

return self._upload_files(f'ofg-repo-{uuid.uuid4().hex}.tar.gz',
OFG_ROOT_DIR, file_to_upload)
Expand Down Expand Up @@ -363,7 +368,7 @@ def run(self, agent: BaseAgent, result_history: list[Result],
self.tags += [
str(agent),
str(result_history[-1].benchmark.project),
# TODO(dongge): A tag for function name, compatible with tag format.
str(result_history[-1].benchmark.function_name),
str(result_history[-1].trial)
]
# Step1: Generate dill files.
Expand All @@ -374,7 +379,7 @@ def run(self, agent: BaseAgent, result_history: list[Result],
# TODO(dongge): Encrypt dill files?

# Step 2: Upload OFG repo and dill files to GCS.
ofg_url = self._prepare_and_upload_archive()
ofg_url = self._prepare_and_upload_archive(result_history)
agent_url = self._upload_to_gcs(agent_dill)
results_url = self._upload_to_gcs(results_dill)
oss_fuzz_data_url = self._upload_oss_fuzz_data()
Expand Down
32 changes: 19 additions & 13 deletions experiment/builder_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def _libfuzzer_args(self) -> list[str]:
'-len_control=0',
# Timeout per testcase.
'-timeout=30',
'-detect_leaks=0',
]

def _get_minimum_func_name(self, func_sig: str) -> str:
Expand Down Expand Up @@ -922,19 +923,24 @@ def build_and_run_cloud(
f'--real_project={project_name}',
]

if oss_fuzz_checkout.ENABLE_CACHING and (
oss_fuzz_checkout.is_image_cached(project_name, 'address') and
oss_fuzz_checkout.is_image_cached(project_name, 'coverage')):
logger.info('Using cached image for %s', project_name)
command.append('--use_cached_image')

# Overwrite the Dockerfile to be caching friendly
# We hardcode 'address' here, but this is irrelevant and will be
# overridden later via a Docker argument.
oss_fuzz_checkout.rewrite_project_to_cached_project(
project_name, generated_project, 'address')
oss_fuzz_checkout.prepare_build(project_name, 'address',
generated_project)
# TODO(dongge): Reenable caching when build script is not modified.
# Current caching is not applicable when OFG modifies the build script,
# There is no simple way to check if the build script has been modified,
# but this feature should be added later.
# and fails to build the project (particularly with coverage sanitizer).
# if oss_fuzz_checkout.ENABLE_CACHING and (
# oss_fuzz_checkout.is_image_cached(project_name, 'address') and
# oss_fuzz_checkout.is_image_cached(project_name, 'coverage')):
# logger.info('Using cached image for %s', project_name)
# command.append('--use_cached_image')

# # Overwrite the Dockerfile to be caching friendly
# # We hardcode 'address' here, but this is irrelevant and will be
# # overridden later via a Docker argument.
# oss_fuzz_checkout.rewrite_project_to_cached_project(
# project_name, generated_project, 'address')
# oss_fuzz_checkout.prepare_build(project_name, 'address',
# generated_project)

if cloud_build_tags:
command += ['--tags'] + cloud_build_tags
Expand Down
Loading