diff --git a/.gitignore b/.gitignore index d3af9fc..510c3fe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /node_modules /thirdparty +/fuzz-*.log # ===== BEGIN JEKYLL ===== diff --git a/compiler/CMakeLists.txt b/compiler/CMakeLists.txt index 9569b50..274aaca 100644 --- a/compiler/CMakeLists.txt +++ b/compiler/CMakeLists.txt @@ -12,7 +12,8 @@ project( # it. If the LLVM build enables RTTI, we are free to choose. option(GRAPHALG_ENABLE_RTTI "Enable run time type information" OFF) -option(ENABLE_WASM "Enable wasm-only targets" OFF) +option(GRAPHALG_ENABLE_FUZZER "Enable building fuzzer targets" OFF) +option(GRAPHALG_ENABLE_SANITIZE "Enable sanitizers" OFF) # ================================ END OPTIONS ================================= # ================================= BEGIN LLVM ================================= @@ -46,9 +47,27 @@ endif() include(AddMLIR) # ================================== END MLIR ================================== +if (GRAPHALG_ENABLE_SANITIZE) + # Why these sanitizers in particular? + # See https://stackoverflow.com/questions/50364533 + # AddressSanitizer is the only main sanitizer documented to work well with + # LibFuzzer (https://llvm.org/docs/LibFuzzer.html#id24). + # + # -fno-omit-frame-pointer added to get nicer stack traces + # See https://clang.llvm.org/docs/AddressSanitizer.html#usage + add_compile_options(-fsanitize=address,undefined,leak -fno-omit-frame-pointer) + add_link_options(-fsanitize=address,undefined,leak -fno-omit-frame-pointer) +endif () + +if (GRAPHALG_ENABLE_COVERAGE) + add_compile_options(-fprofile-instr-generate -fcoverage-mapping) + add_link_options(-fprofile-instr-generate -fcoverage-mapping) +endif () + # Directories with MLIR dialects add_subdirectory(include/graphalg) add_subdirectory(src) add_subdirectory(test) add_subdirectory(tools) + diff --git a/compiler/collect_coverage.py b/compiler/collect_coverage.py new file mode 100755 index 0000000..6fafda5 --- /dev/null +++ b/compiler/collect_coverage.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +import subprocess +import sys +import shutil +from pathlib import Path + +WORKSPACE_ROOT = Path(__file__).parent +BUILD_DIR = WORKSPACE_ROOT / "build-coverage" + + +def main(): + # Run configure-coverage.sh only if build dir does not exist yet + if not BUILD_DIR.exists(): + configure_script = WORKSPACE_ROOT / "configure-coverage.sh" + subprocess.run([configure_script], check=True) + else: + # If build dir already exists, delete coverage directories + coverage_dir = BUILD_DIR / "test" / "coverage" + coverage_report_dir = BUILD_DIR / "coverage-report" + + if coverage_dir.exists(): + shutil.rmtree(coverage_dir) + if coverage_report_dir.exists(): + shutil.rmtree(coverage_report_dir) + + # Build with cmake + subprocess.run(["cmake", "--build", BUILD_DIR, "--target", "check"], check=True) + + # Merge coverage data + coverage_dir = BUILD_DIR / "test" / "coverage" + profraw_files = list(coverage_dir.glob("*.profraw")) + + if not profraw_files: + print(f"Error: No .profraw files found in {coverage_dir}", file=sys.stderr) + sys.exit(1) + + profdata_output = BUILD_DIR / "compiler.profdata" + subprocess.run([ + "llvm-profdata-20", "merge", "-sparse", + *profraw_files, + "-o", profdata_output + ], check=True) + + # Generate coverage reports for all libraries + coverage_report_base = BUILD_DIR / "coverage-report" + coverage_report_base.mkdir(exist_ok=True) + + # Find all .a library files in the build directory + library_files = list(BUILD_DIR.glob("**/*.a")) + + if not library_files: + print("Error: No .a library files found in build directory", file=sys.stderr) + sys.exit(1) + + print(f"Found {len(library_files)} libraries, generating coverage reports...") + + for lib_file in library_files: + # Create a report directory based on the library name + lib_name = lib_file.stem # e.g., "libGraphAlgIR" -> "GraphAlgIR" + if lib_name.startswith("lib"): + lib_name = lib_name[3:] # Remove "lib" prefix + + report_dir = coverage_report_base / lib_name + report_dir.mkdir(exist_ok=True) + + print(f" Generating report for {lib_file.name}...") + subprocess.run([ + "llvm-cov-20", "show", + lib_file, + f"-instr-profile={profdata_output}", + "--ignore-filename-regex=opt/llvm-debug", + "--format=html", + "-o", report_dir + ], check=True) + + print(f"\nCoverage reports generated at {coverage_report_base}") + + +if __name__ == "__main__": + main() diff --git a/compiler/configure-coverage.sh b/compiler/configure-coverage.sh new file mode 100755 index 0000000..d4c8275 --- /dev/null +++ b/compiler/configure-coverage.sh @@ -0,0 +1,13 @@ +#!/bin/bash +WORKSPACE_ROOT=compiler/ +BUILD_DIR=$WORKSPACE_ROOT/build-coverage +rm -rf $BUILD_DIR +cmake -S $WORKSPACE_ROOT -B $BUILD_DIR -G Ninja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DCMAKE_CXX_COMPILER_LAUNCHER=ccache \ + -DCMAKE_CXX_COMPILER=clang++-20 \ + -DCMAKE_LINKER_TYPE=MOLD \ + -DGRAPHALG_OVERRIDE_LLVM_TOOLS_BINARY_DIR="/usr/lib/llvm-20/bin" \ + -DGRAPHALG_ENABLE_SANITIZE=ON \ + -DGRAPHALG_ENABLE_COVERAGE=ON \ + -DLLVM_ROOT="/opt/llvm-debug" \ diff --git a/compiler/configure.sh b/compiler/configure.sh index 9223362..5bdbdf6 100755 --- a/compiler/configure.sh +++ b/compiler/configure.sh @@ -9,4 +9,6 @@ cmake -S $WORKSPACE_ROOT -B $BUILD_DIR -G Ninja \ -DCMAKE_EXPORT_COMPILE_COMMANDS=1 \ -DCMAKE_LINKER_TYPE=MOLD \ -DGRAPHALG_OVERRIDE_LLVM_TOOLS_BINARY_DIR="/usr/lib/llvm-20/bin" \ + -DGRAPHALG_ENABLE_SANITIZE=ON \ + -DGRAPHALG_ENABLE_FUZZER=ON \ -DLLVM_ROOT="/opt/llvm-debug" \ diff --git a/compiler/src/graphalg/parse/Lexer.cpp b/compiler/src/graphalg/parse/Lexer.cpp index 3d8c9fd..e683600 100644 --- a/compiler/src/graphalg/parse/Lexer.cpp +++ b/compiler/src/graphalg/parse/Lexer.cpp @@ -128,10 +128,15 @@ void Lexer::eatWhitespace() { continue; } else if (peek(2) == "//") { // Line comment - while (cur() != '\n') { + while (cur() && cur() != '\n') { eat(); } + if (!cur()) { + // End of input + return; + } + assert(cur() == '\n'); eat(); continue; diff --git a/compiler/src/graphalg/parse/Parser.cpp b/compiler/src/graphalg/parse/Parser.cpp index 052e895..e1013ef 100644 --- a/compiler/src/graphalg/parse/Parser.cpp +++ b/compiler/src/graphalg/parse/Parser.cpp @@ -224,6 +224,8 @@ class Parser { mlir::ParseResult parseAtomDiag(mlir::Value &v); mlir::ParseResult parseAtomTril(mlir::Value &v); mlir::ParseResult parseAtomTriu(mlir::Value &v); + mlir::ParseResult parseAtomNot(mlir::Value &v); + mlir::ParseResult parseAtomNeg(mlir::Value &v); mlir::ParseResult parseLiteral(mlir::Type ring, mlir::Value &v); @@ -1561,47 +1563,10 @@ mlir::ParseResult Parser::parseAtom(mlir::Value &v) { v = var.value; return mlir::success(); } - case Token::NOT: { - if (eatOrError(Token::NOT) || parseAtom(v)) { - return mlir::failure(); - } - - // Check that NOT is only used with bool semiring - auto vType = llvm::cast(v.getType()); - auto semiring = vType.getSemiring(); - auto *ctx = _builder.getContext(); - if (semiring != SemiringTypes::forBool(ctx)) { - auto diag = mlir::emitError(loc) - << "not operator is only supported for bool type"; - diag.attachNote(v.getLoc()) - << "operand has semiring " << typeToString(semiring); - return mlir::failure(); - } - - v = _builder.create(loc, v); - return mlir::success(); - } - case Token::MINUS: { - if (eatOrError(Token::MINUS) || parseAtom(v)) { - return mlir::failure(); - } - - // Check that negation is only used with int or real semirings - auto vType = llvm::cast(v.getType()); - auto semiring = vType.getSemiring(); - auto *ctx = _builder.getContext(); - if (semiring != SemiringTypes::forInt(ctx) && - semiring != SemiringTypes::forReal(ctx)) { - auto diag = mlir::emitError(loc) - << "negation is only supported for int and real types"; - diag.attachNote(v.getLoc()) - << "operand has semiring " << typeToString(semiring); - return mlir::failure(); - } - - v = _builder.create(loc, v); - return mlir::success(); - } + case Token::NOT: + return parseAtomNot(v); + case Token::MINUS: + return parseAtomNeg(v); default: return mlir::emitError(cur().loc) << "invalid expression"; } @@ -2014,6 +1979,67 @@ mlir::ParseResult Parser::parseAtomTriu(mlir::Value &v) { return mlir::success(); } +mlir::ParseResult Parser::parseAtomNot(mlir::Value &v) { + auto loc = cur().loc; + if (eatOrError(Token::NOT) || parseAtom(v)) { + return mlir::failure(); + } + + // Check that NOT is only used with scalar types + auto vType = llvm::cast(v.getType()); + if (!vType.isScalar()) { + auto diag = mlir::emitError(loc) + << "not operator is only supported for scalar bool type"; + diag.attachNote(v.getLoc()) << "operand has type " << typeToString(vType); + return mlir::failure(); + } + + // Check that NOT is only used with bool semiring + auto semiring = vType.getSemiring(); + auto *ctx = _builder.getContext(); + if (semiring != SemiringTypes::forBool(ctx)) { + auto diag = mlir::emitError(loc) + << "not operator is only supported for bool type"; + diag.attachNote(v.getLoc()) + << "operand has semiring " << typeToString(semiring); + return mlir::failure(); + } + + v = _builder.create(loc, v); + return mlir::success(); +} + +mlir::ParseResult Parser::parseAtomNeg(mlir::Value &v) { + auto loc = cur().loc; + if (eatOrError(Token::MINUS) || parseAtom(v)) { + return mlir::failure(); + } + + // Check that negation is only used with scalar types + auto vType = llvm::cast(v.getType()); + if (!vType.isScalar()) { + auto diag = mlir::emitError(loc) + << "negation is only supported for scalar types"; + diag.attachNote(v.getLoc()) << "operand has type " << typeToString(vType); + return mlir::failure(); + } + + // Check that negation is only used with int or real semirings + auto semiring = vType.getSemiring(); + auto *ctx = _builder.getContext(); + if (semiring != SemiringTypes::forInt(ctx) && + semiring != SemiringTypes::forReal(ctx)) { + auto diag = mlir::emitError(loc) + << "negation is only supported for int and real types"; + diag.attachNote(v.getLoc()) + << "operand has semiring " << typeToString(semiring); + return mlir::failure(); + } + + v = _builder.create(loc, v); + return mlir::success(); +} + mlir::ParseResult Parser::parseLiteral(mlir::Type ring, mlir::Value &v) { auto *ctx = _builder.getContext(); mlir::TypedAttr attr; diff --git a/compiler/test/lit.site.cfg.py.in b/compiler/test/lit.site.cfg.py.in index 6c5fd9e..bb09cf8 100644 --- a/compiler/test/lit.site.cfg.py.in +++ b/compiler/test/lit.site.cfg.py.in @@ -6,6 +6,7 @@ lit.llvm.initialize(lit_config, config) config.llvm_tools_dir = "@LLVM_TOOLS_BINARY_DIR@" config.graphalg_tools_dir = "@CMAKE_BINARY_DIR@/tools" config.graphalg_src_root = "@CMAKE_SOURCE_DIR@" +config.environment['LLVM_PROFILE_FILE'] = os.getcwd() + '/coverage/%4m.profraw' # Let the main config do the real work. lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg.py") diff --git a/compiler/test/parse-err/neg-matrix.gr b/compiler/test/parse-err/neg-matrix.gr new file mode 100644 index 0000000..54c28df --- /dev/null +++ b/compiler/test/parse-err/neg-matrix.gr @@ -0,0 +1,9 @@ +// RUN: graphalg-translate --import-graphalg --verify-diagnostics %s + +func NegMatrix( + // expected-note@below{{operand has type Matrix}} + a: Matrix) -> Matrix { + // Negation only supports scalar inputs + // expected-error@below{{negation is only supported for scalar types}} + return -a; +} diff --git a/compiler/test/parse-err/neg-vector.gr b/compiler/test/parse-err/neg-vector.gr new file mode 100644 index 0000000..93ea5ef --- /dev/null +++ b/compiler/test/parse-err/neg-vector.gr @@ -0,0 +1,9 @@ +// RUN: graphalg-translate --import-graphalg --verify-diagnostics %s + +func NegVector( + // expected-note@below{{operand has type Vector}} + a: Vector) -> Vector { + // Negation only supports scalar inputs + // expected-error@below{{negation is only supported for scalar types}} + return -a; +} diff --git a/compiler/test/parse-err/not-matrix.gr b/compiler/test/parse-err/not-matrix.gr new file mode 100644 index 0000000..f538583 --- /dev/null +++ b/compiler/test/parse-err/not-matrix.gr @@ -0,0 +1,9 @@ +// RUN: graphalg-translate --import-graphalg --verify-diagnostics %s + +func NotMatrix( + // expected-note@below{{operand has type Matrix}} + a: Matrix) -> Matrix { + // NOT only supports scalar boolean inputs + // expected-error@below{{not operator is only supported for scalar bool type}} + return !a; +} diff --git a/compiler/test/parse-err/not-vector.gr b/compiler/test/parse-err/not-vector.gr new file mode 100644 index 0000000..bb752bf --- /dev/null +++ b/compiler/test/parse-err/not-vector.gr @@ -0,0 +1,9 @@ +// RUN: graphalg-translate --import-graphalg --verify-diagnostics %s + +func NotVector( + // expected-note@below{{operand has type Vector}} + a: Vector) -> Vector { + // NOT only supports scalar boolean inputs + // expected-error@below{{not operator is only supported for scalar bool type}} + return !a; +} diff --git a/compiler/tools/CMakeLists.txt b/compiler/tools/CMakeLists.txt index 921b6df..a869516 100644 --- a/compiler/tools/CMakeLists.txt +++ b/compiler/tools/CMakeLists.txt @@ -28,3 +28,16 @@ target_link_libraries(graphalg-exec PRIVATE GraphAlgIR MLIRParser ) + +if (GRAPHALG_ENABLE_FUZZER) + # Add fuzzer targets. + add_executable(fuzz-parser fuzz-parser.cpp) + target_link_libraries(fuzz-parser PRIVATE + ${llvm_libs} + GraphAlgIR + GraphAlgParse + ) + + target_compile_options(fuzz-parser PRIVATE -fsanitize=fuzzer) + target_link_options(fuzz-parser PRIVATE -fsanitize=fuzzer) +endif () diff --git a/compiler/tools/fuzz-parser.cpp b/compiler/tools/fuzz-parser.cpp new file mode 100644 index 0000000..511fdda --- /dev/null +++ b/compiler/tools/fuzz-parser.cpp @@ -0,0 +1,41 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, std::size_t size) { + mlir::MLIRContext ctx(mlir::MLIRContext::Threading::DISABLED); + ctx.getDiagEngine().registerHandler([](mlir::Diagnostic &diag) { + // Drop the diagnostic here to avoid printing them to stdout, which would + // slow down the fuzzing. + }); + + llvm::StringRef filename = ""; + auto loc = mlir::FileLineColLoc::get(&ctx, filename, + /*line=*/1, /*column=*/1); + mlir::OwningOpRef moduleOp = + mlir::ModuleOp::create(loc, filename); + llvm::StringRef input(reinterpret_cast(data), size); + if (mlir::failed(graphalg::parse(input, *moduleOp))) { + // OK if we fail to parse, as long as we don't crash. + return 0; + } + + if (mlir::failed( + mlir::verify(moduleOp->getOperation(), /*verifyRecursively=*/true))) { + // Parser says OK but the op verifiers disagree. + llvm::errs() << "Parser says OK but the op verifiers disagree\n"; + std::abort(); + } + + return 0; +} diff --git a/docs/fuzzing.md b/docs/fuzzing.md new file mode 100644 index 0000000..4df92f0 --- /dev/null +++ b/docs/fuzzing.md @@ -0,0 +1,13 @@ +# Fuzzing the Parser +Create a new corpus from the parser tests: + +```bash +export CORPUS_DIR= +mkdir $CORPUS_DIR +cp compiler/test/parse/*.gr $CORPUS_DIR/ +cp compiler/test/parse-err/*.gr $CORPUS_DIR/ + +./compiler/configure.sh +cmake --build ./compiler/build --target fuzz-parser +./compiler/build/tools/fuzz-parser $CORPUS_DIR -max_len=1000 -jobs=8 -only_ascii=1 +```