From fef3d201479fef6530b72a17e893920797c77121 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 30 Oct 2023 13:43:23 +0100
Subject: [PATCH 01/20] Starts to implement ptkgen grammar compiler, refactors
 ptk.Matcher to not return an optional anymore

---
 .envrc                                        |   1 +
 build.zig                                     | 104 ++++-
 build.zig.zon                                 |  10 +
 design/ptkdefv/design.md                      |   4 +
 design/ptkdefv/grammar.ptk                    |  43 ++
 design/ptkdefv/mapping-concept-01.ptk         |  37 ++
 examples/ptkgen/ast-with-unions.ptk           |  62 +++
 shell.nix                                     |  11 +
 src/ptkgen/main.zig                           | 106 +++++
 src/ptkgen/parser.zig                         | 370 ++++++++++++++++++
 src/{ => toolkit}/Diagnostics.zig             |   9 +-
 src/{ => toolkit}/Error.zig                   |   0
 src/{ => toolkit}/Location.zig                |   0
 src/{ => toolkit}/StringCache.zig             |   0
 src/{ => toolkit}/main.zig                    |   4 +
 src/{ => toolkit}/parser_core.zig             |   0
 src/{ => toolkit}/token.zig                   |  10 +
 src/{ => toolkit}/tokenizer.zig               |  68 ++--
 .../accept/empty-with-comment-linefeed.ptk    |   1 +
 test/parser/accept/empty-with-comment.ptk     |   1 +
 test/parser/accept/empty.ptk                  |   0
 test/parser/accept/identifiers.ptk            |   8 +
 22 files changed, 797 insertions(+), 52 deletions(-)
 create mode 100644 .envrc
 create mode 100644 build.zig.zon
 create mode 100644 design/ptkdefv/design.md
 create mode 100644 design/ptkdefv/grammar.ptk
 create mode 100644 design/ptkdefv/mapping-concept-01.ptk
 create mode 100644 examples/ptkgen/ast-with-unions.ptk
 create mode 100644 shell.nix
 create mode 100644 src/ptkgen/main.zig
 create mode 100644 src/ptkgen/parser.zig
 rename src/{ => toolkit}/Diagnostics.zig (84%)
 rename src/{ => toolkit}/Error.zig (100%)
 rename src/{ => toolkit}/Location.zig (100%)
 rename src/{ => toolkit}/StringCache.zig (100%)
 rename src/{ => toolkit}/main.zig (87%)
 rename src/{ => toolkit}/parser_core.zig (100%)
 rename src/{ => toolkit}/token.zig (56%)
 rename src/{ => toolkit}/tokenizer.zig (89%)
 create mode 100644 test/parser/accept/empty-with-comment-linefeed.ptk
 create mode 100644 test/parser/accept/empty-with-comment.ptk
 create mode 100644 test/parser/accept/empty.ptk
 create mode 100644 test/parser/accept/identifiers.ptk

diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..17d6464
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use_nix
\ No newline at end of file
diff --git a/build.zig b/build.zig
index 00a2907..12b0ce7 100644
--- a/build.zig
+++ b/build.zig
@@ -1,31 +1,99 @@
 const std = @import("std");
 
 pub fn build(b: *std.build.Builder) void {
+    // build options:
+
+    const target = b.standardTargetOptions(.{});
     const optimize = b.standardOptimizeOption(.{});
 
-    _ = b.addModule("parser-toolkit", .{
-        .source_file = .{ .path = "src/main.zig" },
+    const test_step = b.step("test", "Run library tests");
+    const examples_step = b.step("examples", "Builds and installs examples");
+    const run_calc_step = b.step("run-calculator", "Runs calculator example");
+
+    const all_step = b.step("all", "Builds everything, tests everything");
+    all_step.dependOn(b.getInstallStep());
+    all_step.dependOn(test_step);
+    all_step.dependOn(examples_step);
+
+    // dependencies
+
+    const args_dep = b.dependency("args", .{});
+
+    // external modules
+
+    const args_mod = args_dep.module("args");
+
+    // internal modules
+
+    const ptk_mod = b.addModule("parser-toolkit", .{
+        .source_file = .{ .path = "src/toolkit/main.zig" },
         .dependencies = &.{},
     });
 
-    var main_tests = b.addTest(.{
-        .root_source_file = .{ .path = "src/main.zig" },
-        .optimize = optimize,
-    });
+    // Applications
+    const ptkdef_exe = blk: {
+        const ptkdef = b.addExecutable(.{
+            .name = "ptkgen",
+            .root_source_file = .{ .path = "src/ptkgen/main.zig" },
+            .optimize = optimize,
+            .target = target,
+        });
 
-    const test_step = b.step("test", "Run library tests");
-    test_step.dependOn(&b.addRunArtifact(main_tests).step);
+        ptkdef.addModule("parser-toolkit", ptk_mod);
+        ptkdef.addModule("args", args_mod);
 
-    const calculator_example = b.addExecutable(.{
-        .root_source_file = .{ .path = "examples/calculator.zig" },
-        .name = "calculator",
-        .optimize = optimize,
-    });
+        b.installArtifact(ptkdef);
 
-    b.installArtifact(calculator_example);
-    calculator_example.addAnonymousModule("parser-toolkit", .{
-        .source_file = .{ .path = "src/main.zig" },
-    });
+        break :blk ptkdef;
+    };
+
+    // test suite
+    {
+        // unit tests for ptk:
+        var ptk_tests = b.addTest(.{
+            .root_source_file = ptk_mod.source_file,
+            .optimize = optimize,
+        });
+        for (ptk_mod.dependencies.keys()) |dep_name| {
+            ptk_tests.addModule(dep_name, ptk_mod.dependencies.get(dep_name).?);
+        }
+        test_step.dependOn(&b.addRunArtifact(ptk_tests).step);
 
-    b.step("run", "Runs the calculator example").dependOn(&b.addRunArtifact(calculator_example).step);
+        // unit tests for ptkgen:
+        var ptkgen_tests = b.addTest(.{
+            .root_source_file = .{ .path = "src/ptkgen/main.zig" },
+            .optimize = optimize,
+        });
+        ptkgen_tests.addModule("parser-toolkit", ptk_mod);
+        test_step.dependOn(&b.addRunArtifact(ptkgen_tests).step);
+
+        // Integration tests for ptkgen:
+        for (parser_ok_files) |file| {
+            const run = b.addRunArtifact(ptkdef_exe);
+            run.addArg("--test_mode=parse_only");
+            run.addFileArg(.{ .path = file });
+            test_step.dependOn(&run.step);
+        }
+    }
+
+    // examples
+    {
+        const calculator_example = b.addExecutable(.{
+            .root_source_file = .{ .path = "examples/calculator.zig" },
+            .name = "calculator",
+            .optimize = optimize,
+        });
+        calculator_example.addModule("parser-toolkit", ptk_mod);
+        examples_step.dependOn(&b.addInstallArtifact(calculator_example, .{}).step);
+
+        run_calc_step.dependOn(&b.addRunArtifact(calculator_example).step);
+    }
 }
+
+const parser_ok_files = [_][]const u8{
+    "test/parser/accept/empty.ptk",
+    "test/parser/accept/empty-with-comment-linefeed.ptk",
+    "test/parser/accept/empty-with-comment.ptk",
+    "test/parser/accept/identifiers.ptk",
+    "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
+};
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..5cbec5c
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,10 @@
+.{
+    .name = "parser-toolkit",
+    .version = "0.2.0",
+    .dependencies = .{
+        .args = .{
+            .url = "https://github.com/MasterQ32/zig-args/archive/7989929d055ef7618e60de84cc54644046516fdb.tar.gz",
+            .hash = "12207752d975a7f5d7cc65662ed1c6b117da8dec6d1bd7af9a39e1b65d90bf86e833",
+        },
+    },
+}
diff --git a/design/ptkdefv/design.md b/design/ptkdefv/design.md
new file mode 100644
index 0000000..e017c98
--- /dev/null
+++ b/design/ptkdefv/design.md
@@ -0,0 +1,4 @@
+# Parser Generator Language
+
+Create basic recursive descent parsers with "well-known" patterns that output a Zig AST data structure.
+
diff --git a/design/ptkdefv/grammar.ptk b/design/ptkdefv/grammar.ptk
new file mode 100644
index 0000000..e50f519
--- /dev/null
+++ b/design/ptkdefv/grammar.ptk
@@ -0,0 +1,43 @@
+
+
+root <document>; # <...> is a "rule reference"
+
+token identifier = regex "[A-Za-z_][A-Za-z0-9_]*"; # defines token "identifier" to match this regex
+
+token line-comment = regex "//[^\n]*" skip; # ignores this token when parsing, but tokenizer recognizes it
+token whitespace = regex "[ \t\r\n]" skip;
+
+rule document =
+    # [ ... ] is a loop construct, can appear several times
+    [ <using> ] <namespace-decl> [ <toplevel-decl> ]*
+;
+
+rule toplevel-decl = 
+    # | is a "either/or" scenario, with precedence from left to right (first come, first serve)
+    <namespace-group> | <interface-decl> | <module-decl>
+;
+
+rule interface-decl =
+    "interface" $identifier "(" ... ")" ";";
+;
+
+rule module-decl =
+    "module" $identifier "(" ... ")" "{" ... "}" ";";
+;
+
+rule using =
+    # "bla" is a literal token
+    # $bla is an explicitly defined token reference 
+    # ...? is an optional part of a parse
+    "using" <compound-identifier> ";" ( "as" $identifier )?
+;
+
+rule namespace-decl = 
+    "namespace" <compound-identifier> ";"
+;
+
+rule compound-identifier = 
+    $identifier [ "." $identifier ]*
+;
+
+
diff --git a/design/ptkdefv/mapping-concept-01.ptk b/design/ptkdefv/mapping-concept-01.ptk
new file mode 100644
index 0000000..9e4ccf9
--- /dev/null
+++ b/design/ptkdefv/mapping-concept-01.ptk
@@ -0,0 +1,37 @@
+
+# "!id" is a type reference 
+# "$id" is a token reference 
+# "<id>" is a rule reference 
+
+# maps type "array" to a slice/arraylist of whatever "int" is 
+node array = sequence !int;
+
+# "int" is the Zig type "i32"
+node int = literal "i32"; 
+
+# the initial rule is "list", also determines the root type of the ast
+start <list>;
+
+# "decimal" token is a decimal number sequence token
+token decimal = regex "\d+";
+
+# "list" is a sequence of decimals with comma separated, potential trailing comma,
+# enclosed by square brackets
+rule list = "[" [ $decimal "," ] $decimal? "]";
+#           $0  $1______________ $2_______ $3
+
+# the rule "list" is mapped to the type "array"
+# as a sequence of the second element (unwrapped into items) and
+# the third item appended. square brackets in a map are the "construct array operator".
+# if the array is not sequence of optionals, optional items are skipped in construction
+map <list> !array = [ $1..., $2 ];
+
+# the "decimal" token is mapped to i32 by invoking a Zig function called
+# "parse" that takes the token as input and returns "i32":
+map $decimal !int = @parse($0);
+
+
+
+
+
+
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
new file mode 100644
index 0000000..b8c093f
--- /dev/null
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -0,0 +1,62 @@
+# parse a construct like this into a single type:
+# var   name       = value;
+# const name       = value;
+# var   name: type = value;
+# const name: type = value;
+
+node declaration = struct
+    is_const: literal `bool`,
+    name: !identifier,
+    type: optional !type,
+    value: !value
+;
+
+node identifier = literal `[]const u8`;
+node type = custom `TypeId`; # enum { int, float, string }
+node value = custom `Value`;
+
+start <decl>;
+
+rule decl : !declaration = 
+    <decl-type> <id> ( ":" <type> )? "=" <value> => {
+        is_const = $0,
+        name = $1,
+        type = $2,
+        value = $4
+    }
+#   $0_________ $1__ $2_____________ $3  $4_____
+;
+
+rule <decl-type> : literal `bool` = 
+      "var"   => `false`
+    | "const" => `true`
+;
+
+rule <id> : !identifier = "name" => tostring $0;
+
+rule <type> : !type = 
+      "int"    => `.int`
+    | "float"  => `.float`
+    | "string" => `.string`
+;
+
+rule <value> : !value = 
+      "10"       => @parseInt($0)
+    | "3.14"     => @parseFloat($0)
+    | "\"nice\"" => @parseStringLiteral($0)
+;
+
+
+
+# Unions have can only have a single option active at a time
+node TLDeclaration = union 
+    ns        : !namespace,
+    interface : !interface,
+    module    : !module,
+;
+
+rule toplevel-decl : !TLDeclaration = 
+      <namespace-group> => ns: $0, # this is syntax for a union field selector as unions are not compounds
+    | <interface-decl>  => interface: $0,
+    | <module-decl>     => module: $0,
+;
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000..664d354
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,11 @@
+{ pkgs ? import <nixpkgs> { } }:
+pkgs.mkShell {
+  nativeBuildInputs = [
+    # zig
+    pkgs.zig_0_11
+  ];
+  buildInputs = [ ];
+  shellHook = ''
+    # put your shell hook here
+  '';
+}
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
new file mode 100644
index 0000000..3303955
--- /dev/null
+++ b/src/ptkgen/main.zig
@@ -0,0 +1,106 @@
+//!
+//! Parser Toolkit Grammar Compiler
+//!
+
+const std = @import("std");
+const args_parser = @import("args");
+const ptk = @import("parser-toolkit");
+
+const parser = @import("parser.zig");
+
+comptime {
+    // reference for unit tests:
+    _ = parser;
+}
+
+pub const CliOptions = struct {
+    help: bool = false,
+    output: ?[]const u8 = null,
+    test_mode: TestMode = .none,
+
+    pub const shorthands = .{
+        .h = "help",
+        .o = "output",
+    };
+
+    pub const meta = .{
+        .full_text = "Compiles a .ptk grammar file into Zig code.",
+
+        .usage_summary = "[-h] [-o <file>] [<input>]",
+
+        .option_docs = .{
+            .help = "Prints this help.",
+            .output = "If given, will print the generated code into <file>",
+
+            .test_mode = "(internal use only, required for testing)",
+        },
+    };
+};
+
+const TestMode = enum {
+    none,
+    parse_only,
+};
+
+pub fn main() !u8 {
+    var stdout = std.io.getStdOut();
+    var stdin = std.io.getStdIn();
+    var stderr = std.io.getStdErr();
+
+    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
+    defer _ = gpa.deinit();
+
+    var arena = std.heap.ArenaAllocator.init(gpa.allocator());
+    defer arena.deinit();
+
+    const dynamic_allocator = gpa.allocator();
+    const static_allocator = arena.allocator();
+
+    var cli = args_parser.parseForCurrentProcess(CliOptions, static_allocator, .print) catch return 1;
+    defer cli.deinit();
+
+    if (cli.options.help) {
+        try args_parser.printHelp(CliOptions, cli.executable_name orelse "ptkgen", stdout.writer());
+        return 0;
+    }
+
+    var diagnostics = ptk.Diagnostics.init(dynamic_allocator);
+    defer diagnostics.deinit();
+
+    // From here on, always print the diagnostics on exit!
+    defer diagnostics.print(stderr.writer()) catch {};
+
+    var input_file = switch (cli.positionals.len) {
+        0 => stdin,
+        1 => std.fs.cwd().openFile(cli.positionals[0], .{}) catch |err| {
+            try stderr.writer().print("failed to open file {s}: {s}\n", .{
+                cli.positionals[0],
+                @errorName(err),
+            });
+            return 1;
+        },
+        else => {
+            try stderr.writeAll("Expects either a single positional file or none.\nSee --help for usage!\n");
+            return 1;
+        },
+    };
+    defer input_file.close();
+
+    var ast = try parser.parse(
+        dynamic_allocator,
+        &diagnostics,
+        if (cli.positionals.len > 0)
+            cli.positionals[0]
+        else
+            "stdint",
+        input_file.reader(),
+    );
+    defer ast.deinit();
+
+    if (cli.options.test_mode == .parse_only) {
+        // we're done if we're here
+        return 0;
+    }
+
+    return 0;
+}
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
new file mode 100644
index 0000000..f1402ee
--- /dev/null
+++ b/src/ptkgen/parser.zig
@@ -0,0 +1,370 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+pub const Document = struct {
+    arena: std.heap.ArenaAllocator,
+
+    file_name: []const u8,
+    source_text: []const u8,
+
+    pub fn deinit(ts: *Document) void {
+        ts.arena.deinit();
+        ts.* = undefined;
+    }
+};
+
+pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, file_name: []const u8, stream: anytype) !Document {
+    var arena = std.heap.ArenaAllocator.init(allocator);
+    errdefer arena.deinit();
+
+    const file_name_copy = try arena.allocator().dupe(u8, file_name);
+
+    const text = try stream.readAllAlloc(arena.allocator(), 4 << 20); // 4 MB should be enough for now...
+
+    var tokenizer = Tokenizer.init(text, file_name_copy);
+
+    while (true) {
+        const token_or_none = tokenizer.next() catch |err| switch (err) {
+            error.UnexpectedCharacter => {
+                try diagnostics.emit(tokenizer.current_location, .@"error", "Unexpected character: '{}'", .{
+                    std.zig.fmtEscapes(tokenizer.source[tokenizer.offset..][0..1]),
+                });
+                return error.SyntaxError;
+            },
+
+            else => |e| return e,
+        };
+        const token = token_or_none orelse break;
+
+        std.log.info("token: {}", .{token});
+    }
+
+    return Document{
+        .arena = arena,
+        .file_name = file_name_copy,
+        .source_text = text,
+    };
+}
+
+pub const TokenType = enum {
+
+    // keywords
+
+    node,
+    @"struct",
+    optional,
+    start,
+    rule,
+    token,
+
+    literal,
+    custom,
+    regex,
+    skip,
+
+    // user values
+
+    raw_identifier, // foo-bar_bam
+    node_ref, // !node
+    rule_ref, // <rule>
+    token_ref, // $token
+    value_ref, // $0
+    builtin_ref, // @builtin
+
+    // values
+
+    string_literal, // "string"
+    code_literal, // `code`
+
+    // operators
+
+    @"=",
+    @",",
+    @".",
+    @"*",
+    @"+",
+    @":",
+    @";",
+    @"|",
+    @"!",
+    @"?",
+    @"[",
+    @"]",
+    @"(",
+    @")",
+    @"{",
+    @"}",
+    @"=>",
+
+    // auxiliary
+
+    line_comment,
+    whitespace,
+};
+
+pub const Token = Tokenizer.Token;
+
+const match = ptk.matchers;
+
+const Pattern = ptk.Pattern(TokenType);
+
+const ParserCore = ptk.ParserCore(TokenType, .{ .whitespace, .line_comment });
+
+const Tokenizer = ptk.Tokenizer(TokenType, &.{
+    Pattern.create(.line_comment, match.sequenceOf(.{ match.literal("#"), match.takeNoneOf("\r\n") })),
+
+    Pattern.create(.node, match.word("node")),
+    Pattern.create(.@"struct", match.word("struct")),
+    Pattern.create(.optional, match.word("optional")),
+    Pattern.create(.start, match.word("start")),
+    Pattern.create(.rule, match.word("rule")),
+    Pattern.create(.token, match.word("token")),
+    Pattern.create(.literal, match.word("literal")),
+    Pattern.create(.custom, match.word("custom")),
+    Pattern.create(.regex, match.word("regex")),
+    Pattern.create(.skip, match.word("skip")),
+
+    Pattern.create(.@"=>", match.literal("=>")),
+
+    Pattern.create(.@"=", match.literal("=")),
+    Pattern.create(.@",", match.literal(",")),
+    Pattern.create(.@".", match.literal(".")),
+    Pattern.create(.@"*", match.literal("*")),
+    Pattern.create(.@"+", match.literal("+")),
+    Pattern.create(.@":", match.literal(":")),
+    Pattern.create(.@";", match.literal(";")),
+    Pattern.create(.@"|", match.literal("|")),
+    Pattern.create(.@"!", match.literal("!")),
+    Pattern.create(.@"?", match.literal("?")),
+    Pattern.create(.@"[", match.literal("[")),
+    Pattern.create(.@"]", match.literal("]")),
+    Pattern.create(.@"(", match.literal("(")),
+    Pattern.create(.@")", match.literal(")")),
+    Pattern.create(.@"{", match.literal("{")),
+    Pattern.create(.@"}", match.literal("}")),
+
+    Pattern.create(.string_literal, matchStringLiteral),
+    Pattern.create(.code_literal, matchCodeLiteral),
+
+    // identifiers must come after keywords:
+    Pattern.create(.raw_identifier, matchRawIdentifier),
+    Pattern.create(.node_ref, matchNodeRef),
+    Pattern.create(.rule_ref, matchRuleRef),
+    Pattern.create(.token_ref, matchTokenRef),
+    Pattern.create(.value_ref, matchValueRef),
+    Pattern.create(.builtin_ref, matchBuiltinRef),
+
+    // Whitespace is the "kitchen sink" at the end:
+    Pattern.create(.whitespace, match.takeAnyOf(" \r\n\t")),
+});
+
+/// Accepts a basic identifier without any prefix or suffix.
+/// The regex that matches this pattern is roughly this:
+///
+///     (@\"[^"]+\")|([A-Za-z_][A-Za-z0-9_\-]*)
+///
+fn matchRawIdentifier(text: []const u8) usize {
+    if (text.len < 1)
+        return 0;
+
+    if (std.mem.startsWith(u8, text, "@\"")) {
+        if (text.len < 3)
+            return 0;
+
+        var i: usize = 2; // skip `@"`
+        while (i < text.len) : (i += 1) {
+            if (text[i] == '\"')
+                return i + 1;
+            if (text[i] == '\\')
+                i += 1;
+        }
+
+        return 0;
+    } else {
+        const prefix_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_";
+        const suffix_chars = prefix_chars ++ "0123456789";
+        const inner_chars = suffix_chars ++ "-";
+
+        if (std.mem.indexOfScalar(u8, prefix_chars, text[0]) == null)
+            return 0; // invalid start char
+
+        // Suffix check is done in "postprocessing" by checking if any identifier ends with "-"
+
+        var len: usize = 1;
+        while (len < text.len and std.mem.indexOfScalar(u8, inner_chars, text[len]) != null) {
+            len += 1;
+        }
+
+        return len;
+    }
+
+    return 0;
+}
+
+test matchRawIdentifier {
+    try ptk.testing.validateMatcher(matchRawIdentifier, &.{
+        // good:
+        "a",
+        "a-z",
+        "items10",
+        "_foo",
+        "_",
+        "_cheese-cake",
+    }, &.{
+        // bad:
+        "-",
+        "-10",
+        "10",
+        "1-2",
+        "10items",
+    });
+}
+
+const matchNodeRef = match.sequenceOf(.{ match.literal("!"), matchRawIdentifier });
+
+test matchNodeRef {
+    try ptk.testing.validateMatcher(matchNodeRef, &.{
+        // good:
+        "!a",
+        "!foo_bar",
+    }, &.{
+        // bad:
+        "a",
+        "!",
+    });
+}
+
+const matchRuleRef = match.sequenceOf(.{ match.literal("<"), matchRawIdentifier, match.literal(">") });
+
+test matchRuleRef {
+    try ptk.testing.validateMatcher(matchRuleRef, &.{
+        // good:
+        "<foo>",
+        "<bad-boy>",
+        "<good_boy>",
+        "<@\"very exiting boy\">",
+    }, &.{
+        // bad:
+        "<foo",
+        "foo",
+        "foo>",
+    });
+}
+
+const matchTokenRef = match.sequenceOf(.{ match.literal("$"), matchRawIdentifier });
+
+test matchTokenRef {
+    try ptk.testing.validateMatcher(matchTokenRef, &.{
+        // good:
+        "$token",
+        "$user-token",
+        "$user_token",
+        "$@\"wtf\"",
+    }, &.{
+        // bad:
+        "$\"wtf\"",
+        "bad boy",
+        "bad-boy",
+        "$0",
+        "$100",
+    });
+}
+
+const matchValueRef = match.sequenceOf(.{ match.literal("$"), match.decimalNumber });
+
+test matchValueRef {
+    try ptk.testing.validateMatcher(matchValueRef, &.{
+        // good:
+        "$0",
+        "$10",
+        "$99999999",
+    }, &.{
+        // bad:
+        "9",
+        "$",
+        "$foo",
+    });
+}
+
+const matchBuiltinRef = match.sequenceOf(.{ match.literal("@"), matchRawIdentifier });
+
+test matchBuiltinRef {
+    try ptk.testing.validateMatcher(matchBuiltinRef, &.{
+        // good:
+        "@token",
+        "@user-token",
+        "@user_token",
+        "@@\"wtf\"",
+    }, &.{
+        // bad:
+        "@\"wtf\"",
+        "bad boy",
+        "bad-boy",
+        "@0",
+        "@100",
+    });
+}
+
+fn matchStringLiteral(text: []const u8) usize {
+    if (text.len < 2)
+        return 0;
+
+    if (text[0] != '"')
+        return 0;
+
+    var i: usize = 1; // skip `"`
+    while (i < text.len) : (i += 1) {
+        if (text[i] == '\"')
+            return i + 1;
+        if (text[i] == '\\')
+            i += 1;
+    }
+
+    return 0;
+}
+
+test matchStringLiteral {
+    try ptk.testing.validateMatcher(matchStringLiteral, &.{
+        // good:
+        "\"\"",
+        "\"x\"",
+        "\" \"",
+        "\" hello \\\"world\\\"\"",
+    }, &.{
+        // bad:
+        "\"",
+        "\"\\\"",
+        "\"",
+        "foo\"",
+    });
+}
+
+fn matchCodeLiteral(text: []const u8) usize {
+    var prefix_len: usize = 0;
+    while (prefix_len < text.len and text[prefix_len] == '`') {
+        prefix_len += 1;
+    }
+
+    if (prefix_len == 0 or 2 * prefix_len >= text.len)
+        return 0;
+
+    const body_len = std.mem.indexOf(u8, text[prefix_len..], text[0..prefix_len]) orelse return 0;
+
+    return 2 * prefix_len + body_len;
+}
+
+test matchCodeLiteral {
+    try ptk.testing.validateMatcher(matchCodeLiteral, &.{
+        // good:
+        "`x`",
+        "`\"hello, World!\"`",
+        "`\n\n`",
+        "`\x00`",
+        "``you can write a `code` snippet like this!``",
+    }, &.{
+        // bad:
+        "`",
+        "``",
+        "```hello, world!``",
+    });
+}
diff --git a/src/Diagnostics.zig b/src/toolkit/Diagnostics.zig
similarity index 84%
rename from src/Diagnostics.zig
rename to src/toolkit/Diagnostics.zig
index 0a93c19..bf3a842 100644
--- a/src/Diagnostics.zig
+++ b/src/toolkit/Diagnostics.zig
@@ -38,8 +38,15 @@ pub fn emit(self: *Self, location: Location, level: Error.Level, comptime fmt: [
     const str = try std.fmt.allocPrintZ(allocator, fmt, args);
     errdefer allocator.free(str);
 
+    var cloned_location = location;
+    if (location.source) |source| {
+        cloned_location.source = try allocator.dupe(u8, source);
+    }
+    errdefer if (cloned_location.source) |source|
+        allocator.free(source);
+
     try self.errors.append(allocator, Error{
-        .location = location,
+        .location = cloned_location,
         .level = level,
         .message = str,
     });
diff --git a/src/Error.zig b/src/toolkit/Error.zig
similarity index 100%
rename from src/Error.zig
rename to src/toolkit/Error.zig
diff --git a/src/Location.zig b/src/toolkit/Location.zig
similarity index 100%
rename from src/Location.zig
rename to src/toolkit/Location.zig
diff --git a/src/StringCache.zig b/src/toolkit/StringCache.zig
similarity index 100%
rename from src/StringCache.zig
rename to src/toolkit/StringCache.zig
diff --git a/src/main.zig b/src/toolkit/main.zig
similarity index 87%
rename from src/main.zig
rename to src/toolkit/main.zig
index 784dec5..09b1ba8 100644
--- a/src/main.zig
+++ b/src/toolkit/main.zig
@@ -18,6 +18,10 @@ pub const Error = @import("Error.zig");
 pub const Diagnostics = @import("Diagnostics.zig");
 pub const StringCache = @import("StringCache.zig");
 
+pub const testing = struct {
+    pub const validateMatcher = tok.testMatcher;
+};
+
 test {
     _ = Location;
     _ = tok;
diff --git a/src/parser_core.zig b/src/toolkit/parser_core.zig
similarity index 100%
rename from src/parser_core.zig
rename to src/toolkit/parser_core.zig
diff --git a/src/token.zig b/src/toolkit/token.zig
similarity index 56%
rename from src/token.zig
rename to src/toolkit/token.zig
index 60ae8fa..028272c 100644
--- a/src/token.zig
+++ b/src/toolkit/token.zig
@@ -14,5 +14,15 @@ pub fn Token(comptime Type: type) type {
 
         /// The type of the token that was matched by a matching function
         type: Type,
+
+        pub fn format(token: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = fmt;
+            _ = options;
+            try writer.print("Token {{ .type = {}, .text = \"{}\", .location = {} }}", .{
+                token.type,
+                std.zig.fmtEscapes(token.text),
+                token.location,
+            });
+        }
     };
 }
diff --git a/src/tokenizer.zig b/src/toolkit/tokenizer.zig
similarity index 89%
rename from src/tokenizer.zig
rename to src/toolkit/tokenizer.zig
index 1ee859c..ec20f18 100644
--- a/src/tokenizer.zig
+++ b/src/toolkit/tokenizer.zig
@@ -3,7 +3,9 @@ const std = @import("std");
 const Location = @import("Location.zig");
 const GenericToken = @import("token.zig").Token;
 
-pub const Matcher = *const fn (str: []const u8) ?usize;
+/// This is a function that will either accept a `text` as a token
+/// of a non-zero length or returns `0` if the text does not match the token.
+pub const Matcher = *const fn (text: []const u8) usize;
 
 pub fn Pattern(comptime TokenType: type) type {
     return struct {
@@ -66,14 +68,13 @@ pub fn Tokenizer(comptime TokenTypeT: type, comptime patterns: []const Pattern(T
             if (rest.len == 0)
                 return null;
             const maybe_token = for (patterns) |pat| {
-                if (pat.match(rest)) |len| {
-                    if (len > 0) {
-                        break Token{
-                            .location = self.current_location,
-                            .text = rest[0..len],
-                            .type = pat.type,
-                        };
-                    }
+                const len = pat.match(rest);
+                if (len > 0) {
+                    break Token{
+                        .location = self.current_location,
+                        .text = rest[0..len],
+                        .type = pat.type,
+                    };
                 }
             } else null;
             if (maybe_token) |token| {
@@ -91,11 +92,11 @@ pub const matchers = struct {
     /// Matches the literal `text`.
     pub fn literal(comptime text: []const u8) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 return if (std.mem.startsWith(u8, str, text))
                     text.len
                 else
-                    null;
+                    0;
             }
         }.match;
     }
@@ -103,17 +104,17 @@ pub const matchers = struct {
     /// Matches any "word" that is "text\b"
     pub fn word(comptime text: []const u8) Matcher {
         return struct {
-            fn match(input: []const u8) ?usize {
+            fn match(input: []const u8) usize {
                 if (std.mem.startsWith(u8, input, text)) {
                     if (text.len == input.len)
                         return text.len;
                     const c = input[text.len];
                     if (std.ascii.isAlphanumeric(c) or (c == '_')) // matches regex \w\W
-                        return null;
+                        return 0;
                     return text.len;
                 }
 
-                return null;
+                return 0;
             }
         }.match;
     }
@@ -121,7 +122,7 @@ pub const matchers = struct {
     /// Takes characters while they are any of the given `chars`.
     pub fn takeAnyOf(comptime chars: []const u8) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 for (str, 0..) |c, i| {
                     if (std.mem.indexOfScalar(u8, chars, c) == null) {
                         return i;
@@ -140,7 +141,7 @@ pub const matchers = struct {
         };
 
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 for (str, 0..) |c, i| {
                     const lc = std.ascii.toLower(c);
                     if (std.mem.indexOfScalar(u8, lower_chars, lc) == null) {
@@ -155,7 +156,7 @@ pub const matchers = struct {
     /// Takes characters while they are not any of the given `chars`.
     pub fn takeNoneOf(comptime chars: []const u8) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 for (str, 0..) |c, i| {
                     if (std.mem.indexOfScalar(u8, chars, c) != null) {
                         return i;
@@ -168,10 +169,12 @@ pub const matchers = struct {
 
     pub fn withPrefix(comptime prefix: []const u8, comptime matcher: Matcher) Matcher {
         return struct {
-            fn match(str: []const u8) ?usize {
+            fn match(str: []const u8) usize {
                 if (!std.mem.startsWith(u8, str, prefix))
-                    return null;
-                const pattern_len = matcher(str[prefix.len..]) orelse return null;
+                    return 0;
+                const pattern_len = matcher(str[prefix.len..]);
+                if (pattern_len == 0)
+                    return 0;
                 return prefix.len + pattern_len;
             }
         }.match;
@@ -183,12 +186,12 @@ pub const matchers = struct {
         if (sequence.len == 0)
             @compileError("Empty sequence not allowed!");
         return struct {
-            fn match(input: []const u8) ?usize {
+            fn match(input: []const u8) usize {
                 var total_len: usize = 0;
                 for (sequence) |seq_match| {
-                    const len = seq_match(input[total_len..]) orelse return null;
+                    const len = seq_match(input[total_len..]);
                     if (len == 0)
-                        return null;
+                        return 0;
                     total_len += len;
                 }
                 return total_len;
@@ -198,7 +201,7 @@ pub const matchers = struct {
 
     // pre-shipped typical patterns
 
-    pub fn identifier(str: []const u8) ?usize {
+    pub fn identifier(str: []const u8) usize {
         const first_char = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
         const all_chars = first_char ++ "0123456789";
         for (str, 0..) |c, i| {
@@ -209,7 +212,7 @@ pub const matchers = struct {
         return str.len;
     }
 
-    pub fn whitespace(str: []const u8) ?usize {
+    pub fn whitespace(str: []const u8) usize {
         for (str, 0..) |c, i| {
             if (!std.ascii.isWhitespace(c))
                 return i;
@@ -217,12 +220,12 @@ pub const matchers = struct {
         return str.len;
     }
 
-    pub fn linefeed(str: []const u8) ?usize {
+    pub fn linefeed(str: []const u8) usize {
         if (std.mem.startsWith(u8, str, "\r\n"))
             return 2;
         if (std.mem.startsWith(u8, str, "\n"))
             return 1;
-        return null;
+        return 0;
     }
 
     pub fn numberOfBase(comptime base: comptime_int) Matcher {
@@ -321,12 +324,11 @@ test "save/restore tokenization" {
     try std.testing.expectEqual(Location{ .source = null, .line = 2, .column = 1 }, id1.location);
 }
 
-fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8) !void {
+pub fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8) !void {
+    std.debug.assert(good.len > 0);
+    std.debug.assert(bad.len > 0);
     for (good) |str| {
-        const v = match(str) orelse {
-            std.log.err("Didn't match pattern '{s}'", .{str});
-            return error.MissedGoodPattern;
-        };
+        const v = match(str);
         if (v == 0) {
             std.log.err("Didn't match pattern '{s}'", .{str});
             return error.MissedGoodPattern;
@@ -334,7 +336,7 @@ fn testMatcher(match: Matcher, good: []const []const u8, bad: []const []const u8
     }
     for (bad) |str| {
         const v = match(str);
-        if (v != null and v.? > 0) {
+        if (v > 0) {
             std.log.err("Matched pattern '{s}'", .{str});
             return error.MissedBadPattern;
         }
diff --git a/test/parser/accept/empty-with-comment-linefeed.ptk b/test/parser/accept/empty-with-comment-linefeed.ptk
new file mode 100644
index 0000000..a1e7613
--- /dev/null
+++ b/test/parser/accept/empty-with-comment-linefeed.ptk
@@ -0,0 +1 @@
+# hello, world!
diff --git a/test/parser/accept/empty-with-comment.ptk b/test/parser/accept/empty-with-comment.ptk
new file mode 100644
index 0000000..0017949
--- /dev/null
+++ b/test/parser/accept/empty-with-comment.ptk
@@ -0,0 +1 @@
+# hello, world!
\ No newline at end of file
diff --git a/test/parser/accept/empty.ptk b/test/parser/accept/empty.ptk
new file mode 100644
index 0000000..e69de29
diff --git a/test/parser/accept/identifiers.ptk b/test/parser/accept/identifiers.ptk
new file mode 100644
index 0000000..521db6f
--- /dev/null
+++ b/test/parser/accept/identifiers.ptk
@@ -0,0 +1,8 @@
+
+rule a                  = literal `whatever`;
+rule _                  = literal `whatever`;
+rule a0                 = literal `whatever`;
+rule a-z                = literal `whatever`;
+rule _10                = literal `whatever`;
+rule @"x"               = literal `whatever`;
+rule @"hello, world!"   = literal `whatever`;

From 8ae8684fca1f72315827076cf0bf3e530355cfaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 30 Oct 2023 17:46:17 +0100
Subject: [PATCH 02/20] Starts working on the parser, can already recognize
 some basic nodes

---
 build.zig                                     |   9 +-
 docs/grammar.md                               |  40 +++
 examples/ptkgen/ast-with-unions.ptk           |   6 +-
 src/ptkgen/ast.zig                            | 166 +++++++++
 src/ptkgen/main.zig                           | 176 +++++++++-
 src/ptkgen/parser.zig                         | 330 +++++++++++++++++-
 src/toolkit/main.zig                          |   4 +-
 src/toolkit/parser_core.zig                   |   1 +
 src/toolkit/strings.zig                       | 156 +++++++++
 test/analysis/accept/match-literal-rule.ptk   |   2 +
 .../accept/match-literal-sequence.ptk         |   2 +
 11 files changed, 855 insertions(+), 37 deletions(-)
 create mode 100644 docs/grammar.md
 create mode 100644 src/ptkgen/ast.zig
 create mode 100644 src/toolkit/strings.zig
 create mode 100644 test/analysis/accept/match-literal-rule.ptk
 create mode 100644 test/analysis/accept/match-literal-sequence.ptk

diff --git a/build.zig b/build.zig
index 12b0ce7..d09acee 100644
--- a/build.zig
+++ b/build.zig
@@ -94,6 +94,11 @@ const parser_ok_files = [_][]const u8{
     "test/parser/accept/empty.ptk",
     "test/parser/accept/empty-with-comment-linefeed.ptk",
     "test/parser/accept/empty-with-comment.ptk",
-    "test/parser/accept/identifiers.ptk",
-    "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
+    // "test/parser/accept/identifiers.ptk",
+    // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
+} ++ analyis_ok_files;
+
+const analyis_ok_files = [_][]const u8{
+    "test/analysis/accept/match-literal-rule.ptk",
+    "test/analysis/accept/match-literal-sequence.ptk",
 };
diff --git a/docs/grammar.md b/docs/grammar.md
new file mode 100644
index 0000000..031d096
--- /dev/null
+++ b/docs/grammar.md
@@ -0,0 +1,40 @@
+# Parser Toolkit Grammar
+
+## Syntax
+
+```rb
+
+@Identifier # references Identifier from the user context. can be used for types, functions, values
+<Rule>      # references another rule named Rule
+!Node       # references another ast node called Node
+
+
+```
+
+## Types
+
+```rb 
+literal `text`  # pastes text into the code
+optional ...    # makes ... an optional type
+
+struct          # constructs a structure type, having two fields:
+    field: !type,
+    field: !type
+
+union           # constructs a type for alternatives, here with two variants:
+    Foo: !type, # alternative called Foo
+    Bar: !type  # alternative called Bar
+
+```
+
+## Strings
+
+- `\x00 ... \xFF` => Hexadecimal escape
+- `\000 ... \377` => Octal escape
+- `\n` => LF (0x0A)
+- `\r` => CR (0x0D)
+- `\'` => single quote (0x27)
+- `\"` => double quote (0x22)
+- `\\` => back slash (0x5C)
+- `\u????` => UTF-16
+- `\U????????` => UTF-32
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index b8c093f..fa170fc 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -12,8 +12,8 @@ node declaration = struct
 ;
 
 node identifier = literal `[]const u8`;
-node type = custom `TypeId`; # enum { int, float, string }
-node value = custom `Value`;
+node type = @TypeId; # enum { int, float, string }
+node value = @Value;
 
 start <decl>;
 
@@ -32,7 +32,7 @@ rule <decl-type> : literal `bool` =
     | "const" => `true`
 ;
 
-rule <id> : !identifier = "name" => tostring $0;
+rule <id> : !identifier = "name" => tostring($0);
 
 rule <type> : !type = 
       "int"    => `.int`
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
new file mode 100644
index 0000000..0bda109
--- /dev/null
+++ b/src/ptkgen/ast.zig
@@ -0,0 +1,166 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const Location = ptk.Location;
+
+pub fn List(comptime T: type) type {
+    return struct {
+        pub const Item = T;
+
+        pub const Node = std.TailQueue(T).Node;
+
+        inner: std.TailQueue(T) = .{},
+
+        pub fn append(list: *@This(), item: *@This().Node) void {
+            list.inner.append(item);
+        }
+
+        pub fn len(list: @This()) usize {
+            return list.inner.len;
+        }
+
+        pub fn only(list: @This()) ?T {
+            return if (list.inner.len == 1)
+                list.inner.first.?.data
+            else
+                null;
+        }
+    };
+}
+
+pub fn Iterator(comptime T: type) type {
+    return struct {
+        node: ?*List(T).Node,
+
+        pub fn next(iter: *@This()) ?T {
+            const current = iter.node orelse return null;
+            iter.node = current.next;
+            return current.data;
+        }
+    };
+}
+
+pub fn iterate(list: anytype) Iterator(@TypeOf(list).Item) {
+    return Iterator(@TypeOf(list).Item){ .node = list.inner.first };
+}
+
+pub fn Reference(comptime T: type) type {
+    return struct {
+        pub const Referenced = T;
+
+        location: Location,
+        identifier: ptk.strings.String,
+    };
+}
+
+fn String(comptime Tag: anytype) type {
+    return struct {
+        pub const tag = Tag;
+
+        location: Location,
+        value: ptk.strings.String,
+    };
+}
+
+pub const Identifier = String(.identifier);
+pub const StringLiteral = String(.string);
+pub const CodeLiteral = String(.code);
+pub const BuiltinLiteral = String(.builtin);
+
+pub const Document = List(TopLevelDeclaration);
+
+pub const TopLevelDeclaration = union(enum) {
+    start: NodeRef,
+    rule: Rule,
+    node: Node,
+};
+
+pub const NodeRef = Reference(Node); // !mynode
+pub const RuleRef = Reference(Rule); // <myrule>
+pub const TokenRef = Reference(Token); // $mytoken
+pub const ValueRef = struct { // $0
+    location: Location,
+    index: u32,
+};
+
+pub const Node = struct { // node <name> = ...;
+    name: Identifier,
+    value: TypeSpec,
+};
+
+pub const Rule = struct { // rule <name> ( : <type> )? = ...;
+    name: Identifier, //
+    ast_type: ?TypeSpec, // if specified, defines the ast node of the rule
+    productions: List(MappedProduction), // all alternatives of the rule
+};
+
+pub const Token = struct { // token <name> = ...;
+    name: Identifier,
+    pattern: Pattern,
+};
+
+pub const MappedProduction = struct { // ... => value
+    production: Production, // the thing before "=>"
+    mapping: ?AstMapping, // the thing after "=>"
+};
+
+pub const Production = union(enum) {
+    literal: StringLiteral, // "text"
+    terminal: TokenRef, // $token
+    recursion: RuleRef, // <rule>
+    sequence: List(Production), // ( ... )
+    optional: *Production, // ...?
+    repetition_zero: *Production, // [ ... ]*
+    repetition_one: *Production, // [ ... ]+
+};
+
+pub const AstMapping = union(enum) {
+    constructor: List(FieldAssignment), // { field = ..., field = ... }
+    literal: CodeLiteral, // field: value
+    context_reference: ValueRef, // $0
+    user_reference: BuiltinLiteral, // @field
+    function_call: FunctionCall, // ...(a,b,c)
+    union_init: UnionInitializer,
+};
+
+pub const UnionInitializer = struct {
+    field: Identifier,
+    value: *AstMapping,
+};
+
+pub const FunctionCall = struct {
+    function: *AstMapping,
+    arguments: List(AstMapping),
+};
+
+pub const FieldAssignment = struct {
+    location: Location,
+    field: Identifier,
+    value: *AstMapping,
+};
+
+pub const Pattern = union(enum) {
+    literal: StringLiteral, // literal "+"
+    word: StringLiteral, // word "while"
+    regex: StringLiteral, // regex "string"
+    external: CodeLiteral, // custom `matchMe`
+};
+
+pub const TypeSpec = union(enum) {
+    reference: NodeRef, // !type
+    literal: CodeLiteral, // literal `bool`
+    custom: CodeLiteral, // custom `Custom`
+    @"struct": CompoundType, // struct <fields...>
+    @"union": CompoundType, // union <fields...>
+};
+
+pub const CompoundType = struct {
+    location: Location,
+    fields: List(Field),
+};
+
+pub const Field = struct {
+    location: Location,
+    name: Identifier,
+    type: TypeSpec,
+};
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 3303955..852fa0f 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -6,6 +6,7 @@ const std = @import("std");
 const args_parser = @import("args");
 const ptk = @import("parser-toolkit");
 
+const ast = @import("ast.zig");
 const parser = @import("parser.zig");
 
 comptime {
@@ -64,12 +65,12 @@ pub fn main() !u8 {
         return 0;
     }
 
+    var string_pool = try ptk.strings.Pool.init(dynamic_allocator);
+    defer string_pool.deinit();
+
     var diagnostics = ptk.Diagnostics.init(dynamic_allocator);
     defer diagnostics.deinit();
 
-    // From here on, always print the diagnostics on exit!
-    defer diagnostics.print(stderr.writer()) catch {};
-
     var input_file = switch (cli.positionals.len) {
         0 => stdin,
         1 => std.fs.cwd().openFile(cli.positionals[0], .{}) catch |err| {
@@ -86,21 +87,172 @@ pub fn main() !u8 {
     };
     defer input_file.close();
 
-    var ast = try parser.parse(
+    const file_name = if (cli.positionals.len > 0)
+        cli.positionals[0]
+    else
+        "stdint";
+
+    compileFile(
         dynamic_allocator,
         &diagnostics,
-        if (cli.positionals.len > 0)
-            cli.positionals[0]
-        else
-            "stdint",
+        &string_pool,
+        input_file,
+        file_name,
+        cli.options.test_mode,
+    ) catch |err| switch (err) {
+        // syntax errors must produce diagnostics:
+        error.SyntaxError => std.debug.assert(diagnostics.hasErrors()),
+
+        error.OutOfMemory => {
+            try diagnostics.emit(.{
+                .source = file_name,
+                .line = 1,
+                .column = 1,
+            }, .@"error", "out of memory", .{});
+        },
+
+        error.StreamTooLong => {
+            try diagnostics.emit(.{
+                .source = file_name,
+                .line = 1,
+                .column = 1,
+            }, .@"error", "input file too large", .{});
+        },
+
+        error.InputOutput,
+        error.AccessDenied,
+        error.BrokenPipe,
+        error.SystemResources,
+        error.OperationAborted,
+        error.WouldBlock,
+        error.ConnectionResetByPeer,
+        error.Unexpected,
+        error.IsDir,
+        error.ConnectionTimedOut,
+        error.NotOpenForReading,
+        error.NetNameDeleted,
+        => {
+            try diagnostics.emit(.{
+                .source = file_name,
+                .line = 1,
+                .column = 1,
+            }, .@"error", "i/o error: {s}", .{@errorName(err)});
+        },
+    };
+
+    try diagnostics.print(stderr.writer());
+
+    return if (diagnostics.hasErrors())
+        1
+    else
+        0;
+}
+
+fn compileFile(
+    allocator: std.mem.Allocator,
+    diagnostics: *ptk.Diagnostics,
+    string_pool: *ptk.strings.Pool,
+    input_file: std.fs.File,
+    file_name: []const u8,
+    mode: TestMode,
+) !void {
+    var tree = try parser.parse(
+        allocator,
+        diagnostics,
+        string_pool,
+        file_name,
         input_file.reader(),
     );
-    defer ast.deinit();
+    defer tree.deinit();
+
+    dumpAst(string_pool, tree.top_level_declarations);
 
-    if (cli.options.test_mode == .parse_only) {
+    if (mode == .parse_only) {
         // we're done if we're here
-        return 0;
+        return;
+    }
+}
+
+fn dumpAst(strings: *const ptk.strings.Pool, decls: ast.List(ast.TopLevelDeclaration)) void {
+    std.debug.print("ast dump:\n", .{});
+
+    var iter = ast.iterate(decls);
+    while (iter.next()) |decl| {
+        switch (decl) {
+            .start => |item| std.debug.print("start {s}\n", .{strings.get(item.identifier)}),
+
+            .rule => |rule| {
+                std.debug.print("rule {s}", .{strings.get(rule.name.value)});
+
+                if (rule.ast_type) |ast_type| {
+                    std.debug.print(" : ", .{});
+                    dumpAstType(strings, ast_type);
+                }
+
+                std.debug.print(" = \n", .{});
+
+                var prods = ast.iterate(rule.productions);
+                var first = true;
+                while (prods.next()) |prod| {
+                    defer first = false;
+                    if (!first) {
+                        std.debug.print("  | ", .{});
+                    } else {
+                        std.debug.print("    ", .{});
+                    }
+                    dumpMappedProd(strings, prod);
+                }
+
+                std.debug.print("\n;\n", .{});
+            },
+
+            .node => |node| {
+                std.debug.print("node {s}", .{strings.get(node.name.value)});
+
+                std.debug.print(";\n", .{});
+            },
+        }
+    }
+}
+
+fn dumpAstType(strings: *const ptk.strings.Pool, typespec: ast.TypeSpec) void {
+    _ = strings;
+    _ = typespec;
+    std.debug.print("<TYPE HERE>", .{});
+}
+
+fn dumpMappedProd(strings: *const ptk.strings.Pool, mapped_prod: ast.MappedProduction) void {
+    dumpProd(strings, mapped_prod.production);
+
+    if (mapped_prod.mapping) |mapping| {
+        dumpMapping(strings, mapping);
+    }
+}
+
+fn dumpProd(strings: *const ptk.strings.Pool, production: ast.Production) void {
+    switch (production) {
+        .literal => |lit| std.debug.print("\"{}\"", .{std.zig.fmtEscapes(strings.get(lit.value))}),
+        .terminal => |term| std.debug.print("<{}>", .{std.zig.fmtId(strings.get(term.identifier))}),
+        .recursion => std.debug.print("<recursion>", .{}),
+        .sequence => |seq| {
+            std.debug.print("(", .{});
+
+            var iter = ast.iterate(seq);
+            while (iter.next()) |item| {
+                std.debug.print(" ", .{});
+                dumpProd(strings, item);
+            }
+
+            std.debug.print(" )", .{});
+        },
+        .optional => std.debug.print("<optional>", .{}),
+        .repetition_zero => std.debug.print("<repetition_zero>", .{}),
+        .repetition_one => std.debug.print("<repetition_one>", .{}),
     }
+}
 
-    return 0;
+fn dumpMapping(strings: *const ptk.strings.Pool, mapping: ast.AstMapping) void {
+    _ = strings;
+    _ = mapping;
+    std.debug.print("<MAPPING HERE>", .{});
 }
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index f1402ee..5a5167a 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -1,5 +1,8 @@
 const std = @import("std");
 const ptk = @import("parser-toolkit");
+const ast = @import("ast.zig");
+
+const fmtEscapes = std.zig.fmtEscapes;
 
 pub const Document = struct {
     arena: std.heap.ArenaAllocator,
@@ -7,13 +10,15 @@ pub const Document = struct {
     file_name: []const u8,
     source_text: []const u8,
 
+    top_level_declarations: ast.Document,
+
     pub fn deinit(ts: *Document) void {
         ts.arena.deinit();
         ts.* = undefined;
     }
 };
 
-pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, file_name: []const u8, stream: anytype) !Document {
+pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, string_pool: *ptk.strings.Pool, file_name: []const u8, stream: anytype) !Document {
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
@@ -23,31 +28,54 @@ pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, file_n
 
     var tokenizer = Tokenizer.init(text, file_name_copy);
 
-    while (true) {
-        const token_or_none = tokenizer.next() catch |err| switch (err) {
-            error.UnexpectedCharacter => {
-                try diagnostics.emit(tokenizer.current_location, .@"error", "Unexpected character: '{}'", .{
-                    std.zig.fmtEscapes(tokenizer.source[tokenizer.offset..][0..1]),
-                });
-                return error.SyntaxError;
-            },
+    var parser = Parser{
+        .core = ParserCore.init(&tokenizer),
+        .arena = arena.allocator(),
+        .pool = string_pool,
+        .diagnostics = diagnostics,
+    };
 
-            else => |e| return e,
-        };
-        const token = token_or_none orelse break;
+    const document_node = parser.acceptDocument() catch |err| switch (err) {
+        error.UnexpectedCharacter => {
+            try diagnostics.emit(tokenizer.current_location, .@"error", "Unexpected character: '{}'", .{
+                fmtEscapes(tokenizer.source[tokenizer.offset..][0..1]),
+            });
+            return error.SyntaxError;
+        },
 
-        std.log.info("token: {}", .{token});
+        error.EndOfStream, error.UnexpectedToken => @panic("Error handling is fucked up, something escaped"),
+
+        // Unrecoverable syntax error, must have created diagnostics already
+        error.SyntaxError => |e| {
+            std.debug.assert(diagnostics.hasErrors());
+            return e;
+        },
+
+        error.OutOfMemory => |e| return e,
+    };
+
+    if (tokenizer.next()) |token_or_null| {
+        if (token_or_null) |token| {
+            try diagnostics.emit(token.location, .@"error", "Excess token at the end of the file: {s}", .{@tagName(token.type)});
+            return error.SyntaxError;
+        }
+    } else |_| {
+        try diagnostics.emit(tokenizer.current_location, .@"error", "Unexpected character: '{}'", .{
+            fmtEscapes(tokenizer.source[tokenizer.offset..][0..1]),
+        });
+        return error.SyntaxError;
     }
 
     return Document{
         .arena = arena,
         .file_name = file_name_copy,
         .source_text = text,
+
+        .top_level_declarations = document_node,
     };
 }
 
 pub const TokenType = enum {
-
     // keywords
 
     node,
@@ -64,7 +92,7 @@ pub const TokenType = enum {
 
     // user values
 
-    raw_identifier, // foo-bar_bam
+    identifier, // foo-bar_bam
     node_ref, // !node
     rule_ref, // <rule>
     token_ref, // $token
@@ -104,12 +132,276 @@ pub const TokenType = enum {
 
 pub const Token = Tokenizer.Token;
 
-const match = ptk.matchers;
+const ParserCore = ptk.ParserCore(Tokenizer, .{ .whitespace, .line_comment });
 
-const Pattern = ptk.Pattern(TokenType);
+const Parser = struct {
+    const RS = ptk.RuleSet(TokenType);
+    const String = ptk.strings.String;
+
+    core: ParserCore,
+    arena: std.mem.Allocator,
+    pool: *ptk.strings.Pool,
+    diagnostics: *ptk.Diagnostics,
+
+    pub fn acceptDocument(parser: *Parser) !ast.Document {
+        var doc = ast.Document{};
+
+        while (true) {
+            const decl_or_eof = try parser.acceptTopLevelDecl();
+
+            const decl = decl_or_eof orelse break;
+
+            try parser.append(ast.TopLevelDeclaration, &doc, decl);
+        }
+
+        return doc;
+    }
+
+    fn emitDiagnostic(parser: *Parser, loc: ?ptk.Location, comptime fmt: []const u8, args: anytype) !void {
+        // Anything detected here is always an error
+        try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, .@"error", fmt, args);
+    }
+
+    fn acceptTopLevelDecl(parser: *Parser) !?ast.TopLevelDeclaration {
+        if (parser.acceptLiteral(.rule)) |_| {
+            return .{
+                .rule = try parser.acceptRule(),
+            };
+        } else |err| try filterAcceptError(err);
+
+        // Detect any excess tokens on the top level:
+        if (try parser.core.nextToken()) |token| {
+            try parser.emitDiagnostic(null, "Unexpected token '{}'", .{fmtEscapes(token.text)});
+            return error.SyntaxError;
+        }
+
+        return null;
+    }
+
+    fn acceptRule(parser: *Parser) !ast.Rule {
+        var state = parser.save();
+        errdefer parser.restore(state);
+
+        const identifier = try parser.acceptIdentifier();
+
+        const rule_type = if (parser.acceptLiteral(.@":"))
+            try parser.acceptTypeSpec()
+        else |_|
+            null;
+
+        try parser.acceptLiteral(.@"=");
+
+        var list: ast.List(ast.MappedProduction) = .{};
+
+        while (true) {
+            var production = try parser.acceptMappedProduction();
+
+            try parser.append(ast.MappedProduction, &list, production);
+
+            // TODO: Improve error reporting here
+            if (parser.acceptLiteral(.@";")) {
+                break;
+            } else |_| {}
+
+            try parser.acceptLiteral(.@"|");
+        }
+
+        return ast.Rule{
+            .ast_type = rule_type,
+            .productions = list,
+            .name = identifier,
+        };
+    }
+
+    fn acceptMappedProduction(parser: *Parser) !ast.MappedProduction {
+        var sequence = try parser.acceptProductionSequence();
 
-const ParserCore = ptk.ParserCore(TokenType, .{ .whitespace, .line_comment });
+        const mapping = if (parser.acceptLiteral(.@"=>"))
+            try parser.acceptAstMapping()
+        else |_|
+            null;
 
+        return ast.MappedProduction{
+            .production = if (sequence.only()) |item|
+                item
+            else
+                .{ .sequence = sequence },
+            .mapping = mapping,
+        };
+    }
+
+    fn acceptProductionSequence(parser: *Parser) !ast.List(ast.Production) {
+        var list: ast.List(ast.Production) = .{};
+
+        while (true) {
+            if (parser.acceptProduction()) |prod| {
+                try parser.append(ast.Production, &list, prod);
+            } else |err| switch (err) {
+                error.UnexpectedToken => break,
+                else => |e| return e,
+            }
+        }
+
+        return list;
+    }
+
+    fn acceptProduction(parser: *Parser) !ast.Production {
+        const str = try parser.acceptStringLiteral();
+
+        return ast.Production{
+            .literal = str,
+        };
+    }
+
+    fn acceptAstMapping(parser: *Parser) !ast.AstMapping {
+        _ = parser;
+        return error.UnexpectedToken;
+    }
+
+    fn acceptTypeSpec(parser: *Parser) !ast.TypeSpec {
+        _ = parser;
+        return error.UnexpectedToken;
+    }
+
+    fn acceptStringLiteral(parser: *Parser) !ast.StringLiteral {
+        const token = try parser.core.accept(RS.is(.string_literal));
+
+        std.debug.assert(token.text.len >= 2);
+
+        return ast.StringLiteral{
+            .location = token.location,
+            .value = try parser.unwrapString(token.location, token.text[1 .. token.text.len - 1]),
+        };
+    }
+
+    fn acceptIdentifier(parser: *Parser) !ast.Identifier {
+        const token = try parser.core.accept(RS.is(.identifier));
+
+        return ast.Identifier{
+            .location = token.location,
+            .value = try parser.unwrapIdentifierString(token.location, token.text),
+        };
+    }
+
+    fn acceptLiteral(parser: *Parser, comptime token_type: TokenType) !void {
+        _ = try parser.core.accept(RS.is(token_type));
+    }
+
+    // management:
+
+    fn unwrapIdentifierString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String {
+        std.debug.assert(raw.len > 0);
+        if (raw[0] == '@') {
+            std.debug.assert(raw[1] == '"');
+            std.debug.assert(raw[raw.len - 1] == '"');
+            // string-escaped identifier
+            return try parser.unwrapString(loc, raw[2 .. raw.len - 1]);
+        } else {
+            return try parser.pool.insert(raw);
+        }
+    }
+
+    fn unwrapString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String {
+        var fallback = std.heap.stackFallback(512, parser.arena);
+
+        var working_space = std.ArrayList(u8).init(fallback.get());
+        defer working_space.deinit();
+
+        var i: usize = 0;
+        while (i < raw.len) {
+            const c = raw[i];
+            if (c == '\\') {
+                i += 1;
+                if (i >= raw.len) {
+                    try parser.emitDiagnostic(loc, "Invalid string escape: Missing escaped character!", .{});
+                    return error.SyntaxError;
+                }
+                const escape = raw[i];
+                const slice = switch (escape) {
+                    'n' => "\n",
+                    'r' => "\r",
+                    '\"' => "\"",
+                    '\'' => "\'",
+                    '\\' => "\\",
+
+                    'x' => @panic("Implement hex escape \\x??"),
+                    'u' => @panic("Implement utf-16 \\u????"),
+                    'U' => @panic("Implement utf-32 \\U????????"),
+
+                    '0'...'3' => @panic("Implement octal escape \\???"),
+
+                    else => {
+                        if (std.ascii.isPrint(c)) {
+                            try parser.emitDiagnostic(loc, "Invalid string escape \\{c}", .{escape});
+                        } else {
+                            try parser.emitDiagnostic(loc, "Invalid string escape \\x{X:0>2}", .{escape});
+                        }
+                        return error.SyntaxError;
+                    },
+                };
+                try working_space.appendSlice(slice);
+            } else {
+                try working_space.append(c);
+            }
+            i += 1;
+        }
+
+        return try parser.pool.insert(working_space.items);
+    }
+
+    fn save(parser: Parser) ParserCore.State {
+        return parser.core.saveState();
+    }
+
+    fn restore(parser: *Parser, state: ParserCore.State) void {
+        parser.core.restoreState(state);
+    }
+
+    fn internString(parser: *Parser, string: []const u8) !String {
+        return try parser.pool.insert(string);
+    }
+
+    fn append(parser: *Parser, comptime T: type, list: *ast.List(T), item: T) !void {
+        const node = try parser.arena.create(ast.List(T).Node);
+        errdefer parser.arena.destroy(node);
+
+        node.data = item;
+
+        list.append(node);
+    }
+
+    pub const FatalAcceptError = error{
+        // We're out of memory accepting some rule. We cannot recover from this.
+        OutOfMemory,
+
+        // We found a character the tokenizer does not accept, we cannot recover from this ever.
+        UnexpectedCharacter,
+    };
+
+    pub const AcceptError = FatalAcceptError || error{
+
+        // The token stream is too short to accept this rule
+        EndOfStream,
+
+        // The token stream contains an unexpected token, this is a syntax error
+        UnexpectedToken,
+    };
+
+    fn filterAcceptError(err: AcceptError) FatalAcceptError!void {
+        return switch (err) {
+            error.EndOfStream,
+            error.UnexpectedToken,
+            => {},
+
+            error.OutOfMemory,
+            error.UnexpectedCharacter,
+            => |e| return e,
+        };
+    }
+};
+
+const match = ptk.matchers;
+const Pattern = ptk.Pattern(TokenType);
 const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.line_comment, match.sequenceOf(.{ match.literal("#"), match.takeNoneOf("\r\n") })),
 
@@ -147,7 +439,7 @@ const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.code_literal, matchCodeLiteral),
 
     // identifiers must come after keywords:
-    Pattern.create(.raw_identifier, matchRawIdentifier),
+    Pattern.create(.identifier, matchRawIdentifier),
     Pattern.create(.node_ref, matchNodeRef),
     Pattern.create(.rule_ref, matchRuleRef),
     Pattern.create(.token_ref, matchTokenRef),
diff --git a/src/toolkit/main.zig b/src/toolkit/main.zig
index 09b1ba8..9a5d40b 100644
--- a/src/toolkit/main.zig
+++ b/src/toolkit/main.zig
@@ -17,13 +17,15 @@ pub const RuleSet = pcore.RuleSet;
 pub const Error = @import("Error.zig");
 pub const Diagnostics = @import("Diagnostics.zig");
 pub const StringCache = @import("StringCache.zig");
+pub const strings = @import("strings.zig");
 
 pub const testing = struct {
     pub const validateMatcher = tok.testMatcher;
 };
 
-test {
+comptime {
     _ = Location;
     _ = tok;
     _ = pcore;
+    _ = strings;
 }
diff --git a/src/toolkit/parser_core.zig b/src/toolkit/parser_core.zig
index 394d679..9bfcf42 100644
--- a/src/toolkit/parser_core.zig
+++ b/src/toolkit/parser_core.zig
@@ -52,6 +52,7 @@ pub fn ParserCore(comptime TokenizerT: type, comptime ignore_list: anytype) type
         }
 
         pub const AcceptError = error{ EndOfStream, UnexpectedToken } || Tokenizer.NextError;
+
         /// Accepts a token that matches `rule`. Otherwise returns
         /// - `error.EndOfStream` when no tokens are available
         /// - `error.UnexpectedToken` when an invalid token was encountered
diff --git a/src/toolkit/strings.zig b/src/toolkit/strings.zig
new file mode 100644
index 0000000..9c41933
--- /dev/null
+++ b/src/toolkit/strings.zig
@@ -0,0 +1,156 @@
+pub const std = @import("std");
+
+pub const String = enum(u32) {
+    empty,
+
+    _,
+
+    pub fn format(string: String, fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = fmt;
+        _ = options;
+        if (string == .empty) {
+            try writer.writeAll("String(empty)");
+        } else {
+            try writer.print("String({})", .{
+                @intFromEnum(string),
+            });
+        }
+    }
+};
+
+/// A string pool that can store up to 4 GB of text and deduplicate instances.
+///
+/// Use this to reduce the memory footprint of your AST and allow quick comparison of strings
+/// by using the `String` type instead of doing a `std.mem.eql`.
+pub const Pool = struct {
+    data: std.ArrayList(u8),
+    count: usize = 0,
+
+    pub fn init(allocator: std.mem.Allocator) !Pool {
+        var pool = Pool{
+            .data = std.ArrayList(u8).init(allocator),
+        };
+        errdefer pool.deinit();
+
+        std.debug.assert(try pool.insert("") == .empty);
+
+        return pool;
+    }
+
+    pub fn deinit(pool: *Pool) void {
+        pool.data.deinit();
+        pool.* = undefined;
+    }
+
+    pub fn insert(pool: *Pool, string: []const u8) error{OutOfMemory}!String {
+        std.debug.assert(std.mem.indexOfScalar(u8, string, 0) == null); // Interned strings must not contain NUL!
+
+        const storage = pool.data.items;
+
+        var search_index: usize = 0;
+        while (search_index < storage.len) {
+            const index = std.mem.indexOfPos(u8, storage, search_index, string) orelse break;
+
+            if (index + string.len + 1 > storage.len)
+                break;
+
+            if (storage[index + string.len] == 0)
+                return @enumFromInt(index);
+
+            // starts with `string`, but doesn't end with NUL.
+            search_index = index + string.len;
+        }
+
+        const index = storage.len;
+
+        if (index > std.math.maxInt(u32)) {
+            return error.OutOfMemory;
+        }
+
+        try pool.data.ensureUnusedCapacity(string.len + 1); // invalidates storage
+        pool.data.appendSliceAssumeCapacity(string);
+        pool.data.appendAssumeCapacity(0);
+        pool.count += 1;
+
+        return @enumFromInt(index);
+    }
+
+    /// Returns the string in the pool.
+    pub fn get(pool: *const Pool, string: String) [:0]const u8 {
+        const storage = pool.data.items;
+        const index: usize = @intFromEnum(string);
+        std.debug.assert(index < storage.len);
+        const slice = std.mem.sliceTo(storage[index..], 0);
+        return slice.ptr[0..slice.len :0];
+    }
+
+    pub fn format(pool: Pool, fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = fmt;
+        _ = options;
+        try writer.print("StringPool(count={}, size={:.2f})", .{
+            pool.count,
+            std.fmt.fmtIntSizeBin(pool.data.items.len),
+        });
+    }
+};
+
+/// Very simplistic string deduplicator, returns the same slice for each string.
+/// Does only perform deduplication, no fancy storage strategy.
+pub const Dedupe = struct {
+    arena: std.heap.ArenaAllocator,
+    items: std.StringHashMapUnmanaged(void),
+
+    pub fn init(allocator: std.mem.Allocator) Dedupe {
+        return Dedupe{
+            .arena = std.heap.ArenaAllocator.init(allocator),
+            .items = .{},
+        };
+    }
+
+    pub fn deinit(cache: *Dedupe) void {
+        cache.items.deinit(cache.arena.child_allocator);
+        cache.arena.deinit();
+        cache.* = undefined;
+    }
+
+    /// Gets or inserts a string into the cache. `string` might be a short-lived value,
+    /// the returned value is guaranteed to have the livetime of the string cache.
+    pub fn fetch(cache: *Dedupe, string: []const u8) ![]const u8 {
+        const allocator = cache.arena.child_allocator;
+        const gop = try cache.items.getOrPut(allocator, string);
+        if (!gop.found_existing) {
+            errdefer _ = cache.items.remove(string);
+            gop.key_ptr.* = try cache.arena.allocator().dupe(u8, string);
+        }
+        return gop.key_ptr.*;
+    }
+};
+
+test Pool {
+    var pool = try Pool.init(std.testing.allocator);
+    defer pool.deinit();
+
+    try std.testing.expectEqualStrings("", pool.get(.empty));
+
+    try std.testing.expectEqual(String.empty, try pool.insert(""));
+
+    const a = try pool.insert("hello, world!");
+    const b = try pool.insert("world!"); // suffix of a
+    const c = try pool.insert("world"); // non-suffix
+
+    // All strings must be unique:
+    try std.testing.expect(a != b);
+    try std.testing.expect(a != c);
+    try std.testing.expect(b != c);
+
+    // But must retain their qualities:
+    try std.testing.expectEqualStrings("hello, world!", pool.get(a));
+    try std.testing.expectEqualStrings("world!", pool.get(b));
+    try std.testing.expectEqualStrings("world", pool.get(c));
+
+    // sequential inserts may never return different values:
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+    try std.testing.expectEqual(a, try pool.insert("hello, world!"));
+}
diff --git a/test/analysis/accept/match-literal-rule.ptk b/test/analysis/accept/match-literal-rule.ptk
new file mode 100644
index 0000000..3cda9a8
--- /dev/null
+++ b/test/analysis/accept/match-literal-rule.ptk
@@ -0,0 +1,2 @@
+# This file contains a single rule with no well-defined start point:
+rule basic = "basic";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-sequence.ptk b/test/analysis/accept/match-literal-sequence.ptk
new file mode 100644
index 0000000..555a2dc
--- /dev/null
+++ b/test/analysis/accept/match-literal-sequence.ptk
@@ -0,0 +1,2 @@
+# This file contains a single rule with no well-defined start point:
+rule basic = "basic" "words" "after" "another";
\ No newline at end of file

From f53a16d8db5ab4f9f12c0243dc4a70c7be25972c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 1 Nov 2023 15:28:18 +0100
Subject: [PATCH 03/20] Improves error reporting

---
 src/ptkgen/ast_dump.zig | 127 +++++++++++++++++++++++++++++++++++++
 src/ptkgen/main.zig     |  87 +-------------------------
 src/ptkgen/parser.zig   | 134 ++++++++++++++++++++++++++++------------
 3 files changed, 224 insertions(+), 124 deletions(-)
 create mode 100644 src/ptkgen/ast_dump.zig

diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
new file mode 100644
index 0000000..835dadd
--- /dev/null
+++ b/src/ptkgen/ast_dump.zig
@@ -0,0 +1,127 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const ast = @import("ast.zig");
+const parser = @import("parser.zig");
+
+pub fn dump(strings: *const ptk.strings.Pool, decls: parser.Document) void {
+    var printer = AstPrinter{
+        .strings = strings,
+    };
+
+    printer.dumpRoot(decls.top_level_declarations);
+}
+
+const AstPrinter = struct {
+    const print = std.debug.print;
+
+    strings: *const ptk.strings.Pool,
+
+    fn dumpRoot(printer: AstPrinter, decls: ast.List(ast.TopLevelDeclaration)) void {
+        print("ast dump:\n", .{});
+
+        var iter = ast.iterate(decls);
+        while (iter.next()) |decl| {
+            switch (decl) {
+                .start => |item| print("start {}\n", .{printer.fmtId(item.identifier)}),
+
+                .rule => |rule| {
+                    print("rule {s}", .{printer.fmtId(rule.name.value)});
+
+                    if (rule.ast_type) |ast_type| {
+                        print(" : ", .{});
+                        printer.dumpAstType(ast_type);
+                    }
+
+                    print(" = \n", .{});
+
+                    var prods = ast.iterate(rule.productions);
+                    var first = true;
+                    while (prods.next()) |prod| {
+                        defer first = false;
+                        if (!first) {
+                            print("  | ", .{});
+                        } else {
+                            print("    ", .{});
+                        }
+                        printer.dumpMappedProd(prod);
+                    }
+
+                    print("\n;\n", .{});
+                },
+
+                .node => |node| {
+                    print("node {s}", .{printer.fmtId(node.name.value)});
+                    print(";\n", .{});
+                },
+            }
+        }
+    }
+
+    fn dumpAstType(printer: AstPrinter, typespec: ast.TypeSpec) void {
+        _ = printer;
+        _ = typespec;
+        std.debug.print("<TYPE HERE>", .{});
+    }
+
+    fn dumpMappedProd(printer: AstPrinter, mapped_prod: ast.MappedProduction) void {
+        printer.dumpProd(mapped_prod.production);
+
+        if (mapped_prod.mapping) |mapping| {
+            printer.dumpMapping(mapping);
+        }
+    }
+
+    fn dumpProd(printer: AstPrinter, production: ast.Production) void {
+        switch (production) {
+            .literal => |lit| print("\"{}\"", .{printer.fmtString(lit.value)}),
+            .terminal => |term| print("<{}>", .{printer.fmtId(term.identifier)}),
+            .recursion => print("<recursion>", .{}),
+            .sequence => |seq| {
+                print("(", .{});
+
+                var iter = ast.iterate(seq);
+                while (iter.next()) |item| {
+                    print(" ", .{});
+                    printer.dumpProd(item);
+                }
+
+                print(" )", .{});
+            },
+            .optional => print("<optional>", .{}),
+            .repetition_zero => print("<repetition_zero>", .{}),
+            .repetition_one => print("<repetition_one>", .{}),
+        }
+    }
+
+    fn dumpMapping(printer: AstPrinter, mapping: ast.AstMapping) void {
+        _ = printer;
+        _ = mapping;
+        print("<MAPPING HERE>", .{});
+    }
+
+    fn fmtString(printer: AstPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .text };
+    }
+
+    fn fmtId(printer: AstPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .id };
+    }
+
+    const StringPrinter = struct {
+        printer: AstPrinter,
+        str: ptk.strings.String,
+        mode: enum { id, text },
+
+        pub fn format(strpr: StringPrinter, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = opt;
+            _ = fmt;
+
+            const text = strpr.printer.strings.get(strpr.str);
+            switch (strpr.mode) {
+                .id => try writer.print("\"{}\"", .{std.zig.fmtId(text)}),
+                .text => try writer.print("\"{}\"", .{std.zig.fmtEscapes(text)}),
+            }
+        }
+    };
+};
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 852fa0f..15316ac 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -8,6 +8,7 @@ const ptk = @import("parser-toolkit");
 
 const ast = @import("ast.zig");
 const parser = @import("parser.zig");
+const ast_dump = @import("ast_dump.zig");
 
 comptime {
     // reference for unit tests:
@@ -165,94 +166,10 @@ fn compileFile(
     );
     defer tree.deinit();
 
-    dumpAst(string_pool, tree.top_level_declarations);
-
     if (mode == .parse_only) {
         // we're done if we're here
         return;
     }
-}
-
-fn dumpAst(strings: *const ptk.strings.Pool, decls: ast.List(ast.TopLevelDeclaration)) void {
-    std.debug.print("ast dump:\n", .{});
-
-    var iter = ast.iterate(decls);
-    while (iter.next()) |decl| {
-        switch (decl) {
-            .start => |item| std.debug.print("start {s}\n", .{strings.get(item.identifier)}),
-
-            .rule => |rule| {
-                std.debug.print("rule {s}", .{strings.get(rule.name.value)});
-
-                if (rule.ast_type) |ast_type| {
-                    std.debug.print(" : ", .{});
-                    dumpAstType(strings, ast_type);
-                }
-
-                std.debug.print(" = \n", .{});
-
-                var prods = ast.iterate(rule.productions);
-                var first = true;
-                while (prods.next()) |prod| {
-                    defer first = false;
-                    if (!first) {
-                        std.debug.print("  | ", .{});
-                    } else {
-                        std.debug.print("    ", .{});
-                    }
-                    dumpMappedProd(strings, prod);
-                }
-
-                std.debug.print("\n;\n", .{});
-            },
-
-            .node => |node| {
-                std.debug.print("node {s}", .{strings.get(node.name.value)});
-
-                std.debug.print(";\n", .{});
-            },
-        }
-    }
-}
-
-fn dumpAstType(strings: *const ptk.strings.Pool, typespec: ast.TypeSpec) void {
-    _ = strings;
-    _ = typespec;
-    std.debug.print("<TYPE HERE>", .{});
-}
-
-fn dumpMappedProd(strings: *const ptk.strings.Pool, mapped_prod: ast.MappedProduction) void {
-    dumpProd(strings, mapped_prod.production);
-
-    if (mapped_prod.mapping) |mapping| {
-        dumpMapping(strings, mapping);
-    }
-}
-
-fn dumpProd(strings: *const ptk.strings.Pool, production: ast.Production) void {
-    switch (production) {
-        .literal => |lit| std.debug.print("\"{}\"", .{std.zig.fmtEscapes(strings.get(lit.value))}),
-        .terminal => |term| std.debug.print("<{}>", .{std.zig.fmtId(strings.get(term.identifier))}),
-        .recursion => std.debug.print("<recursion>", .{}),
-        .sequence => |seq| {
-            std.debug.print("(", .{});
-
-            var iter = ast.iterate(seq);
-            while (iter.next()) |item| {
-                std.debug.print(" ", .{});
-                dumpProd(strings, item);
-            }
-
-            std.debug.print(" )", .{});
-        },
-        .optional => std.debug.print("<optional>", .{}),
-        .repetition_zero => std.debug.print("<repetition_zero>", .{}),
-        .repetition_one => std.debug.print("<repetition_one>", .{}),
-    }
-}
 
-fn dumpMapping(strings: *const ptk.strings.Pool, mapping: ast.AstMapping) void {
-    _ = strings;
-    _ = mapping;
-    std.debug.print("<MAPPING HERE>", .{});
+    ast_dump.dump(string_pool, tree);
 }
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 5a5167a..3d6bfe0 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -36,14 +36,6 @@ pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, string
     };
 
     const document_node = parser.acceptDocument() catch |err| switch (err) {
-        error.UnexpectedCharacter => {
-            try diagnostics.emit(tokenizer.current_location, .@"error", "Unexpected character: '{}'", .{
-                fmtEscapes(tokenizer.source[tokenizer.offset..][0..1]),
-            });
-            return error.SyntaxError;
-        },
-
-        error.EndOfStream, error.UnexpectedToken => @panic("Error handling is fucked up, something escaped"),
 
         // Unrecoverable syntax error, must have created diagnostics already
         error.SyntaxError => |e| {
@@ -163,15 +155,18 @@ const Parser = struct {
     }
 
     fn acceptTopLevelDecl(parser: *Parser) !?ast.TopLevelDeclaration {
-        if (parser.acceptLiteral(.rule)) |_| {
-            return .{
-                .rule = try parser.acceptRule(),
-            };
+        if (parser.acceptRule()) |rule| {
+            return .{ .rule = rule };
         } else |err| try filterAcceptError(err);
 
         // Detect any excess tokens on the top level:
-        if (try parser.core.nextToken()) |token| {
-            try parser.emitDiagnostic(null, "Unexpected token '{}'", .{fmtEscapes(token.text)});
+        const excess_tokens = if (parser.core.nextToken()) |token|
+            (token != null)
+        else |err| switch (err) {
+            error.UnexpectedCharacter => true,
+        };
+        if (excess_tokens) {
+            try parser.emitDiagnostic(null, "Unexpected end of file", .{});
             return error.SyntaxError;
         }
 
@@ -182,14 +177,16 @@ const Parser = struct {
         var state = parser.save();
         errdefer parser.restore(state);
 
-        const identifier = try parser.acceptIdentifier();
+        try parser.acceptLiteral(.rule, .recover);
+
+        const identifier = try parser.acceptIdentifier(.fail);
 
-        const rule_type = if (parser.acceptLiteral(.@":"))
+        const rule_type = if (try parser.tryAcceptLiteral(.@":"))
             try parser.acceptTypeSpec()
-        else |_|
+        else
             null;
 
-        try parser.acceptLiteral(.@"=");
+        try parser.acceptLiteral(.@"=", .fail);
 
         var list: ast.List(ast.MappedProduction) = .{};
 
@@ -199,11 +196,11 @@ const Parser = struct {
             try parser.append(ast.MappedProduction, &list, production);
 
             // TODO: Improve error reporting here
-            if (parser.acceptLiteral(.@";")) {
+            if (try parser.tryAcceptLiteral(.@";")) {
                 break;
-            } else |_| {}
+            }
 
-            try parser.acceptLiteral(.@"|");
+            try parser.acceptLiteral(.@"|", .fail);
         }
 
         return ast.Rule{
@@ -216,9 +213,9 @@ const Parser = struct {
     fn acceptMappedProduction(parser: *Parser) !ast.MappedProduction {
         var sequence = try parser.acceptProductionSequence();
 
-        const mapping = if (parser.acceptLiteral(.@"=>"))
+        const mapping = if (try parser.tryAcceptLiteral(.@"=>"))
             try parser.acceptAstMapping()
-        else |_|
+        else
             null;
 
         return ast.MappedProduction{
@@ -237,8 +234,8 @@ const Parser = struct {
             if (parser.acceptProduction()) |prod| {
                 try parser.append(ast.Production, &list, prod);
             } else |err| switch (err) {
-                error.UnexpectedToken => break,
-                else => |e| return e,
+                error.UnexpectedTokenRecoverable => break,
+                error.OutOfMemory, error.SyntaxError => |e| return e,
             }
         }
 
@@ -246,7 +243,7 @@ const Parser = struct {
     }
 
     fn acceptProduction(parser: *Parser) !ast.Production {
-        const str = try parser.acceptStringLiteral();
+        const str = try parser.acceptStringLiteral(.recover);
 
         return ast.Production{
             .literal = str,
@@ -255,16 +252,16 @@ const Parser = struct {
 
     fn acceptAstMapping(parser: *Parser) !ast.AstMapping {
         _ = parser;
-        return error.UnexpectedToken;
+        @panic("not implemented yet");
     }
 
     fn acceptTypeSpec(parser: *Parser) !ast.TypeSpec {
         _ = parser;
-        return error.UnexpectedToken;
+        @panic("not implemented yet");
     }
 
-    fn acceptStringLiteral(parser: *Parser) !ast.StringLiteral {
-        const token = try parser.core.accept(RS.is(.string_literal));
+    fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) !ast.StringLiteral {
+        const token = try parser.acceptToken(.string_literal, accept_mode);
 
         std.debug.assert(token.text.len >= 2);
 
@@ -274,8 +271,8 @@ const Parser = struct {
         };
     }
 
-    fn acceptIdentifier(parser: *Parser) !ast.Identifier {
-        const token = try parser.core.accept(RS.is(.identifier));
+    fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) !ast.Identifier {
+        const token = try parser.acceptToken(.identifier, accept_mode);
 
         return ast.Identifier{
             .location = token.location,
@@ -283,10 +280,69 @@ const Parser = struct {
         };
     }
 
-    fn acceptLiteral(parser: *Parser, comptime token_type: TokenType) !void {
-        _ = try parser.core.accept(RS.is(token_type));
+    fn acceptLiteral(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) !void {
+        _ = try parser.acceptToken(token_type, accept_mode);
+    }
+
+    fn tryAcceptLiteral(parser: *Parser, comptime token_type: TokenType) !bool {
+        _ = parser.acceptToken(token_type, .recover) catch |err| switch (err) {
+            error.UnexpectedTokenRecoverable => return false,
+            error.OutOfMemory, error.SyntaxError => |e| return e,
+        };
+        return true;
+    }
+
+    /// Tries to accept a given token and will emit a diagnostic if it fails.
+    fn acceptToken(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) !Token {
+        const saved_state = parser.save();
+        errdefer parser.restore(saved_state);
+
+        const source_offset = parser.core.tokenizer.offset;
+        const location = parser.core.tokenizer.current_location;
+
+        if (parser.core.accept(RS.any)) |token| {
+            // std.log.debug("token trace: {}", .{token});
+
+            if (token.type != token_type) {
+                switch (accept_mode) {
+                    .fail => {
+                        try parser.emitDiagnostic(location, "Expected token {s}, but discovered token {s} ('{}')", .{
+                            @tagName(token_type),
+                            @tagName(token.type),
+                            std.zig.fmtEscapes(token.text),
+                        });
+                        return error.SyntaxError;
+                    },
+                    .recover => return error.UnexpectedTokenRecoverable,
+                }
+            }
+            return token;
+        } else |err| switch (err) {
+            error.UnexpectedToken => unreachable, // RS.any will always accept the token
+            error.EndOfStream => switch (accept_mode) {
+                .fail => {
+                    try parser.emitDiagnostic(location, "Expected token {s}, but end of file was discovered", .{@tagName(token_type)});
+                    return error.SyntaxError;
+                },
+                .recover => return error.UnexpectedTokenRecoverable,
+            },
+            error.UnexpectedCharacter => {
+                try parser.emitDiagnostic(location, "Unexpected character: '{}'", .{
+                    fmtEscapes(parser.core.tokenizer.source[source_offset..][0..1]),
+                });
+                return error.SyntaxError;
+            },
+        }
     }
 
+    const AcceptMode = enum {
+        /// Will emit a syntax error with diagnostic
+        fail,
+
+        /// Is recoverable
+        recover,
+    };
+
     // management:
 
     fn unwrapIdentifierString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String {
@@ -374,8 +430,8 @@ const Parser = struct {
         // We're out of memory accepting some rule. We cannot recover from this.
         OutOfMemory,
 
-        // We found a character the tokenizer does not accept, we cannot recover from this ever.
-        UnexpectedCharacter,
+        // Something could not be accepted.
+        SyntaxError,
     };
 
     pub const AcceptError = FatalAcceptError || error{
@@ -384,17 +440,17 @@ const Parser = struct {
         EndOfStream,
 
         // The token stream contains an unexpected token, this is a syntax error
-        UnexpectedToken,
+        UnexpectedTokenRecoverable,
     };
 
     fn filterAcceptError(err: AcceptError) FatalAcceptError!void {
         return switch (err) {
             error.EndOfStream,
-            error.UnexpectedToken,
+            error.UnexpectedTokenRecoverable,
             => {},
 
             error.OutOfMemory,
-            error.UnexpectedCharacter,
+            error.SyntaxError,
             => |e| return e,
         };
     }

From b47acc5a6dd15049948df5e2b09a745847cfc3a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 2 Nov 2023 11:39:52 +0100
Subject: [PATCH 04/20] Adds basic i18n, makes diagnostics be errorcode-based
 instead of using literals

---
 src/ptkgen/Diagnostics.zig | 249 +++++++++++++++++++++++++++++++++++++
 src/ptkgen/intl.zig        |  73 +++++++++++
 src/ptkgen/intl/en.json    |  33 +++++
 src/ptkgen/main.zig        |  18 +--
 src/ptkgen/parser.zig      |  74 ++++++-----
 5 files changed, 406 insertions(+), 41 deletions(-)
 create mode 100644 src/ptkgen/Diagnostics.zig
 create mode 100644 src/ptkgen/intl.zig
 create mode 100644 src/ptkgen/intl/en.json

diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
new file mode 100644
index 0000000..d5b75f5
--- /dev/null
+++ b/src/ptkgen/Diagnostics.zig
@@ -0,0 +1,249 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const intl = @import("intl.zig");
+const parser = @import("parser.zig");
+
+const Diagnostics = @This();
+
+pub const Code = enum(u16) {
+    pub const first_error = 1000;
+    pub const first_warning = 4000;
+    pub const first_note = 8000;
+    pub const last_item = 10000;
+
+    out_of_memory = 1000,
+    file_limit_exceeded = 1001,
+    io_error = 1002,
+
+    invalid_source_encoding = 1003,
+    unexpected_token_eof = 1004,
+    unexpected_token = 1005,
+    unexpected_character = 1006,
+    unexpected_eof = 1007,
+
+    bad_string_escape = 1008,
+
+    invalid_string_escape = 1009,
+
+    excess_tokens = 1010,
+
+    comptime {
+        std.debug.assert(first_error < first_warning);
+        std.debug.assert(first_warning < first_note);
+        std.debug.assert(first_note < last_item);
+    }
+
+    pub fn isError(code: Code) bool {
+        const int = @intFromEnum(code);
+        return @intFromEnum(code) >= first_error and int < first_warning;
+    }
+
+    pub fn isWarning(code: Code) bool {
+        const int = @intFromEnum(code);
+        return int >= first_warning and int < first_note;
+    }
+
+    pub fn isNote(code: Code) bool {
+        const int = @intFromEnum(code);
+        return int >= first_note and int < last_item;
+    }
+};
+
+const NoDiagnosticData = struct {};
+pub fn Data(comptime code: Code) type {
+    return switch (code) {
+        .out_of_memory => NoDiagnosticData,
+        .file_limit_exceeded => NoDiagnosticData,
+        .io_error => struct { error_code: intl.FormattableError },
+
+        .unexpected_token_eof => struct {
+            expected_type: parser.TokenType,
+        },
+        .unexpected_token => struct {
+            expected_type: parser.TokenType,
+            actual_type: parser.TokenType,
+            actual_text: []const u8,
+        },
+        .unexpected_eof => NoDiagnosticData,
+
+        .invalid_source_encoding => NoDiagnosticData,
+        .unexpected_character => struct { character: u21 },
+
+        .bad_string_escape => NoDiagnosticData,
+        .invalid_string_escape => struct { escape: u21 },
+        .excess_tokens => struct { token_type: parser.TokenType },
+
+        // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
+    };
+}
+
+pub const Message = struct {
+    level: ptk.Error.Level,
+    location: ptk.Location,
+    text: []const u8,
+};
+
+inner: ptk.Diagnostics,
+
+pub fn init(allocator: std.mem.Allocator) Diagnostics {
+    return Diagnostics{
+        .inner = ptk.Diagnostics.init(allocator),
+    };
+}
+
+pub fn deinit(diag: *Diagnostics) void {
+    diag.inner.deinit();
+    diag.* = undefined;
+}
+
+pub fn hasErrors(diag: Diagnostics) bool {
+    return diag.inner.hasErrors();
+}
+
+pub fn hasWarnings(diag: Diagnostics) bool {
+    return diag.inner.hasWarnings();
+}
+
+fn Formatter(comptime T: type) type {
+    return switch (T) {
+        // text and unicode:
+        []const u8 => struct {
+            // TODO: Distinct between "string body" and "string literal"
+
+            value: T,
+
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{}", .{std.zig.fmtEscapes(item.value)});
+            }
+        },
+
+        u21 => struct {
+            value: T,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+
+                if (item.value < 0x80) {
+                    const ascii: u8 = @intCast(item.value);
+
+                    if (std.ascii.isPrint(ascii)) {
+                        try writer.print("{c}", .{ascii});
+                    } else {
+                        try writer.print("[nonprint: 0x{X:0>2}]", .{ascii});
+                    }
+                } else {
+                    var buf: [4]u8 = undefined;
+                    if (std.unicode.utf8Encode(item.value, &buf)) |len| {
+                        try writer.print("{s}", .{buf[0..len]});
+                    } else |_| {
+                        try writer.print("<bad unicode: U+{X:0>4}>", .{item.value});
+                    }
+                }
+            }
+        },
+
+        // enums:
+        parser.TokenType => struct {
+            value: T,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{s}", .{@tagName(item.value)});
+            }
+        },
+
+        intl.FormattableError => struct {
+            value: T,
+
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+
+                inline for (@typeInfo(intl.FormattableError).ErrorSet.?) |err| {
+                    if (item.value == @field(intl.FormattableError, err.name)) {
+                        try writer.writeAll(@field(intl.localization.errors, err.name));
+                        return;
+                    }
+                } else unreachable;
+            }
+        },
+
+        else => @compileError(std.fmt.comptimePrint("{s} is not a supported diagnostic type!", .{@typeName(T)})),
+    };
+}
+
+fn createFormatter(comptime T: type, value: T) Formatter(T) {
+    return Formatter(T){ .value = value };
+}
+
+fn FormattedData(comptime code: Code) type {
+    const Field = std.builtin.Type.StructField;
+    const D = Data(code);
+
+    const src_fields = @typeInfo(D).Struct.fields;
+
+    var dst_fields: [src_fields.len]Field = undefined;
+
+    for (&dst_fields, src_fields) |*dst, src| {
+        dst.* = .{
+            .name = src.name,
+            .type = Formatter(src.type),
+            .default_value = null,
+            .is_comptime = false,
+            .alignment = @alignOf(Formatter(src.type)),
+        };
+    }
+
+    return @Type(.{
+        .Struct = .{
+            .layout = .Auto,
+            .fields = &dst_fields,
+            .decls = &.{},
+            .is_tuple = false,
+        },
+    });
+}
+
+fn formatData(comptime code: Code, params: Data(code)) FormattedData(code) {
+    var formatted: FormattedData(code) = undefined;
+    inline for (std.meta.fields(Data(code))) |fld| {
+        @field(formatted, fld.name) = createFormatter(fld.type, @field(params, fld.name));
+    }
+    return formatted;
+}
+
+pub fn emit(diag: *Diagnostics, location: ptk.Location, comptime code: Code, params: Data(code)) error{OutOfMemory}!void {
+    const level = if (code.isError())
+        ptk.Error.Level.@"error"
+    else if (code.isWarning())
+        ptk.Error.Level.warning
+    else if (code.isNote())
+        ptk.Error.Level.info
+    else
+        unreachable;
+
+    const fmt_string = @field(intl.localization.diagnostics, @tagName(code));
+
+    var stack_fallback = std.heap.stackFallback(1024, diag.inner.memory.allocator());
+    const stack_fallback_allocator = stack_fallback.get();
+
+    const formatted_params = formatData(code, params);
+
+    const message_text = try std.fmt.allocPrint(stack_fallback_allocator, fmt_string, formatted_params);
+    defer stack_fallback_allocator.free(message_text);
+
+    const code_prefix = switch (level) {
+        .@"error" => "E",
+        .warning => "W",
+        .info => "D",
+    };
+
+    try diag.inner.emit(location, level, "{s}{d:0>4}: {s}", .{ code_prefix, @intFromEnum(code), message_text });
+}
+
+pub fn render(diag: Diagnostics, stream: anytype) !void {
+    try diag.inner.print(stream);
+}
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
new file mode 100644
index 0000000..51623eb
--- /dev/null
+++ b/src/ptkgen/intl.zig
@@ -0,0 +1,73 @@
+const std = @import("std");
+
+pub const Language = enum {
+    en,
+};
+
+pub const language: Language = .en;
+
+pub const localization = @field(localizations, @tagName(language));
+
+pub const localizations = struct {
+    pub const en = Localization.generate(@embedFile("intl/en.json"));
+};
+
+pub const FormattableError = blk: {
+    const list = @typeInfo(std.meta.fieldInfo(Localization, .errors).type).Struct.fields;
+
+    var errors: [list.len]std.builtin.Type.Error = undefined;
+    for (&errors, list) |*dst, src| {
+        dst.* = .{ .name = src.name };
+    }
+
+    break :blk @Type(.{
+        .ErrorSet = &errors,
+    });
+};
+
+pub const Localization = struct {
+    diagnostics: struct {
+        out_of_memory: []const u8,
+        file_limit_exceeded: []const u8,
+        io_error: []const u8,
+        invalid_source_encoding: []const u8,
+        unexpected_token_eof: []const u8,
+        unexpected_token: []const u8,
+        unexpected_character: []const u8,
+        unexpected_eof: []const u8,
+        bad_string_escape: []const u8,
+        invalid_string_escape: []const u8,
+        excess_tokens: []const u8,
+    },
+
+    errors: struct {
+        Unexpected: []const u8,
+
+        OutOfMemory: []const u8,
+
+        InputOutput: []const u8,
+        AccessDenied: []const u8,
+        BrokenPipe: []const u8,
+        SystemResources: []const u8,
+        OperationAborted: []const u8,
+        WouldBlock: []const u8,
+        ConnectionResetByPeer: []const u8,
+        IsDir: []const u8,
+        ConnectionTimedOut: []const u8,
+        NotOpenForReading: []const u8,
+        NetNameDeleted: []const u8,
+
+        StreamTooLong: []const u8,
+        SyntaxError: []const u8,
+        InvalidSourceEncoding: []const u8,
+    },
+
+    pub fn generate(comptime buffer: []const u8) Localization {
+        @setEvalBranchQuota(1_000_000);
+
+        var alloc_buf: [buffer.len]u8 = undefined;
+        var fba = std.heap.FixedBufferAllocator.init(&alloc_buf);
+
+        return std.json.parseFromSliceLeaky(Localization, fba.allocator(), buffer, .{}) catch @compileError("failed to parse json");
+    }
+};
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
new file mode 100644
index 0000000..72ed39a
--- /dev/null
+++ b/src/ptkgen/intl/en.json
@@ -0,0 +1,33 @@
+{
+    "diagnostics": {
+        "out_of_memory": "Out of memory",
+        "file_limit_exceeded": "Input file exceeds maximum file size",
+        "io_error": "I/O error: {[error_code]}",
+        "invalid_source_encoding": "Invalid source code encoding detected",
+        "unexpected_token_eof": "Expected token {[expected_type]}, but end of file was discovered",
+        "unexpected_token": "Expected token {[expected_type]}, but discovered token {[actual_type]} ('{[actual_text]}')",
+        "unexpected_character": "Unexpected character: '{[character]}'",
+        "unexpected_eof": "Unexpected end of file",
+        "bad_string_escape": "Invalid string escape: Escape sequence at the end of string",
+        "invalid_string_escape": "Invalid string escape \\{[escape]}",
+        "excess_tokens": "Excess token at the end of the file: {[token_type]}"
+    },
+    "errors": {
+        "Unexpected": "unexpected error encountered",
+        "OutOfMemory": "out of memory",
+        "InputOutput": "input output",
+        "AccessDenied": "access denied",
+        "BrokenPipe": "broken pipe",
+        "SystemResources": "system resources",
+        "OperationAborted": "operation aborted",
+        "WouldBlock": "would block",
+        "ConnectionResetByPeer": "connection reset by peer",
+        "IsDir": "path points to directory",
+        "ConnectionTimedOut": "connection timed out",
+        "NotOpenForReading": "not open for reading",
+        "NetNameDeleted": "net name deleted",
+        "StreamTooLong": "stream too long",
+        "SyntaxError": "syntax error",
+        "InvalidSourceEncoding": "invalid source encoding"
+    }
+}
\ No newline at end of file
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 15316ac..b5e2741 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -10,6 +10,8 @@ const ast = @import("ast.zig");
 const parser = @import("parser.zig");
 const ast_dump = @import("ast_dump.zig");
 
+const Diagnostics = @import("Diagnostics.zig");
+
 comptime {
     // reference for unit tests:
     _ = parser;
@@ -45,6 +47,8 @@ const TestMode = enum {
 };
 
 pub fn main() !u8 {
+    // errdefer |e| @compileLog(@TypeOf(e));
+
     var stdout = std.io.getStdOut();
     var stdin = std.io.getStdIn();
     var stderr = std.io.getStdErr();
@@ -69,7 +73,7 @@ pub fn main() !u8 {
     var string_pool = try ptk.strings.Pool.init(dynamic_allocator);
     defer string_pool.deinit();
 
-    var diagnostics = ptk.Diagnostics.init(dynamic_allocator);
+    var diagnostics = Diagnostics.init(dynamic_allocator);
     defer diagnostics.deinit();
 
     var input_file = switch (cli.positionals.len) {
@@ -102,14 +106,14 @@ pub fn main() !u8 {
         cli.options.test_mode,
     ) catch |err| switch (err) {
         // syntax errors must produce diagnostics:
-        error.SyntaxError => std.debug.assert(diagnostics.hasErrors()),
+        error.SyntaxError, error.InvalidSourceEncoding => std.debug.assert(diagnostics.hasErrors()),
 
         error.OutOfMemory => {
             try diagnostics.emit(.{
                 .source = file_name,
                 .line = 1,
                 .column = 1,
-            }, .@"error", "out of memory", .{});
+            }, .out_of_memory, .{});
         },
 
         error.StreamTooLong => {
@@ -117,7 +121,7 @@ pub fn main() !u8 {
                 .source = file_name,
                 .line = 1,
                 .column = 1,
-            }, .@"error", "input file too large", .{});
+            }, .file_limit_exceeded, .{});
         },
 
         error.InputOutput,
@@ -137,11 +141,11 @@ pub fn main() !u8 {
                 .source = file_name,
                 .line = 1,
                 .column = 1,
-            }, .@"error", "i/o error: {s}", .{@errorName(err)});
+            }, .io_error, .{ .error_code = err });
         },
     };
 
-    try diagnostics.print(stderr.writer());
+    try diagnostics.render(stderr.writer());
 
     return if (diagnostics.hasErrors())
         1
@@ -151,7 +155,7 @@ pub fn main() !u8 {
 
 fn compileFile(
     allocator: std.mem.Allocator,
-    diagnostics: *ptk.Diagnostics,
+    diagnostics: *Diagnostics,
     string_pool: *ptk.strings.Pool,
     input_file: std.fs.File,
     file_name: []const u8,
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 3d6bfe0..0b462c3 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -2,6 +2,8 @@ const std = @import("std");
 const ptk = @import("parser-toolkit");
 const ast = @import("ast.zig");
 
+const Diagnostics = @import("Diagnostics.zig");
+
 const fmtEscapes = std.zig.fmtEscapes;
 
 pub const Document = struct {
@@ -18,7 +20,7 @@ pub const Document = struct {
     }
 };
 
-pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, string_pool: *ptk.strings.Pool, file_name: []const u8, stream: anytype) !Document {
+pub fn parse(allocator: std.mem.Allocator, diagnostics: *Diagnostics, string_pool: *ptk.strings.Pool, file_name: []const u8, stream: anytype) !Document {
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
@@ -38,7 +40,7 @@ pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, string
     const document_node = parser.acceptDocument() catch |err| switch (err) {
 
         // Unrecoverable syntax error, must have created diagnostics already
-        error.SyntaxError => |e| {
+        error.SyntaxError, error.InvalidSourceEncoding => |e| {
             std.debug.assert(diagnostics.hasErrors());
             return e;
         },
@@ -48,13 +50,11 @@ pub fn parse(allocator: std.mem.Allocator, diagnostics: *ptk.Diagnostics, string
 
     if (tokenizer.next()) |token_or_null| {
         if (token_or_null) |token| {
-            try diagnostics.emit(token.location, .@"error", "Excess token at the end of the file: {s}", .{@tagName(token.type)});
+            try diagnostics.emit(token.location, .excess_tokens, .{ .token_type = token.type });
             return error.SyntaxError;
         }
     } else |_| {
-        try diagnostics.emit(tokenizer.current_location, .@"error", "Unexpected character: '{}'", .{
-            fmtEscapes(tokenizer.source[tokenizer.offset..][0..1]),
-        });
+        try parser.emitUnexpectedCharacter(tokenizer.current_location, tokenizer.offset);
         return error.SyntaxError;
     }
 
@@ -133,7 +133,7 @@ const Parser = struct {
     core: ParserCore,
     arena: std.mem.Allocator,
     pool: *ptk.strings.Pool,
-    diagnostics: *ptk.Diagnostics,
+    diagnostics: *Diagnostics,
 
     pub fn acceptDocument(parser: *Parser) !ast.Document {
         var doc = ast.Document{};
@@ -149,9 +149,23 @@ const Parser = struct {
         return doc;
     }
 
-    fn emitDiagnostic(parser: *Parser, loc: ?ptk.Location, comptime fmt: []const u8, args: anytype) !void {
+    fn emitDiagnostic(parser: Parser, loc: ?ptk.Location, comptime code: Diagnostics.Code, data: Diagnostics.Data(code)) !void {
         // Anything detected here is always an error
-        try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, .@"error", fmt, args);
+        std.debug.assert(code.isError());
+        try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data);
+    }
+
+    fn emitUnexpectedCharacter(parser: Parser, location: ptk.Location, source_offset: usize) !void {
+        var utf8_view = std.unicode.Utf8View.init(parser.core.tokenizer.source[source_offset..]) catch {
+            try parser.emitDiagnostic(location, .invalid_source_encoding, .{});
+            return error.InvalidSourceEncoding;
+        };
+
+        var iter = utf8_view.iterator();
+
+        try parser.emitDiagnostic(location, .unexpected_character, .{
+            .character = iter.nextCodepoint() orelse @panic("very unexpected end of file"),
+        });
     }
 
     fn acceptTopLevelDecl(parser: *Parser) !?ast.TopLevelDeclaration {
@@ -166,7 +180,7 @@ const Parser = struct {
             error.UnexpectedCharacter => true,
         };
         if (excess_tokens) {
-            try parser.emitDiagnostic(null, "Unexpected end of file", .{});
+            try parser.emitDiagnostic(null, .unexpected_eof, .{});
             return error.SyntaxError;
         }
 
@@ -235,7 +249,7 @@ const Parser = struct {
                 try parser.append(ast.Production, &list, prod);
             } else |err| switch (err) {
                 error.UnexpectedTokenRecoverable => break,
-                error.OutOfMemory, error.SyntaxError => |e| return e,
+                error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e,
             }
         }
 
@@ -287,13 +301,13 @@ const Parser = struct {
     fn tryAcceptLiteral(parser: *Parser, comptime token_type: TokenType) !bool {
         _ = parser.acceptToken(token_type, .recover) catch |err| switch (err) {
             error.UnexpectedTokenRecoverable => return false,
-            error.OutOfMemory, error.SyntaxError => |e| return e,
+            error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e,
         };
         return true;
     }
 
     /// Tries to accept a given token and will emit a diagnostic if it fails.
-    fn acceptToken(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) !Token {
+    fn acceptToken(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!Token {
         const saved_state = parser.save();
         errdefer parser.restore(saved_state);
 
@@ -306,10 +320,10 @@ const Parser = struct {
             if (token.type != token_type) {
                 switch (accept_mode) {
                     .fail => {
-                        try parser.emitDiagnostic(location, "Expected token {s}, but discovered token {s} ('{}')", .{
-                            @tagName(token_type),
-                            @tagName(token.type),
-                            std.zig.fmtEscapes(token.text),
+                        try parser.emitDiagnostic(location, .unexpected_token, .{
+                            .expected_type = token_type,
+                            .actual_type = token.type,
+                            .actual_text = token.text,
                         });
                         return error.SyntaxError;
                     },
@@ -321,15 +335,13 @@ const Parser = struct {
             error.UnexpectedToken => unreachable, // RS.any will always accept the token
             error.EndOfStream => switch (accept_mode) {
                 .fail => {
-                    try parser.emitDiagnostic(location, "Expected token {s}, but end of file was discovered", .{@tagName(token_type)});
+                    try parser.emitDiagnostic(location, .unexpected_token_eof, .{ .expected_type = token_type });
                     return error.SyntaxError;
                 },
                 .recover => return error.UnexpectedTokenRecoverable,
             },
             error.UnexpectedCharacter => {
-                try parser.emitDiagnostic(location, "Unexpected character: '{}'", .{
-                    fmtEscapes(parser.core.tokenizer.source[source_offset..][0..1]),
-                });
+                try parser.emitUnexpectedCharacter(location, source_offset);
                 return error.SyntaxError;
             },
         }
@@ -369,7 +381,7 @@ const Parser = struct {
             if (c == '\\') {
                 i += 1;
                 if (i >= raw.len) {
-                    try parser.emitDiagnostic(loc, "Invalid string escape: Missing escaped character!", .{});
+                    try parser.emitDiagnostic(loc, .bad_string_escape, .{});
                     return error.SyntaxError;
                 }
                 const escape = raw[i];
@@ -387,11 +399,7 @@ const Parser = struct {
                     '0'...'3' => @panic("Implement octal escape \\???"),
 
                     else => {
-                        if (std.ascii.isPrint(c)) {
-                            try parser.emitDiagnostic(loc, "Invalid string escape \\{c}", .{escape});
-                        } else {
-                            try parser.emitDiagnostic(loc, "Invalid string escape \\x{X:0>2}", .{escape});
-                        }
+                        try parser.emitDiagnostic(loc, .invalid_string_escape, .{ .escape = escape });
                         return error.SyntaxError;
                     },
                 };
@@ -432,25 +440,23 @@ const Parser = struct {
 
         // Something could not be accepted.
         SyntaxError,
+
+        // The source code contained invalid bytes
+        InvalidSourceEncoding,
     };
 
     pub const AcceptError = FatalAcceptError || error{
-
-        // The token stream is too short to accept this rule
-        EndOfStream,
-
         // The token stream contains an unexpected token, this is a syntax error
         UnexpectedTokenRecoverable,
     };
 
     fn filterAcceptError(err: AcceptError) FatalAcceptError!void {
         return switch (err) {
-            error.EndOfStream,
-            error.UnexpectedTokenRecoverable,
-            => {},
+            error.UnexpectedTokenRecoverable => {},
 
             error.OutOfMemory,
             error.SyntaxError,
+            error.InvalidSourceEncoding,
             => |e| return e,
         };
     }

From d7b0050adf3ffdd0feedc48d817905105a5662eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 2 Nov 2023 15:20:11 +0100
Subject: [PATCH 05/20] Starts creating ptkgen grammar grammar file, adds
 support for basic group constructs

---
 build.zig                                     |  9 ++-
 examples/ptkgen/grammar.ptk                   | 38 ++++++++++
 src/ptkgen/ast.zig                            |  8 +-
 src/ptkgen/ast_dump.zig                       |  6 +-
 src/ptkgen/parser.zig                         | 76 +++++++++++--------
 .../analysis/accept/match-group-many-item.ptk |  1 +
 .../accept/match-group-many-sequence.ptk      |  1 +
 test/analysis/accept/match-group-nested.ptk   |  1 +
 test/analysis/accept/match-group-one-item.ptk |  1 +
 .../accept/match-group-one-sequence.ptk       |  1 +
 test/analysis/accept/match-literal-rule.ptk   |  1 -
 .../accept/match-literal-sequence-variant.ptk |  4 +
 .../accept/match-literal-sequence.ptk         |  1 -
 .../accept/match-literal-variants.ptk         |  1 +
 test/parser/accept/identifiers.ptk            | 14 ++--
 15 files changed, 116 insertions(+), 47 deletions(-)
 create mode 100644 examples/ptkgen/grammar.ptk
 create mode 100644 test/analysis/accept/match-group-many-item.ptk
 create mode 100644 test/analysis/accept/match-group-many-sequence.ptk
 create mode 100644 test/analysis/accept/match-group-nested.ptk
 create mode 100644 test/analysis/accept/match-group-one-item.ptk
 create mode 100644 test/analysis/accept/match-group-one-sequence.ptk
 create mode 100644 test/analysis/accept/match-literal-sequence-variant.ptk
 create mode 100644 test/analysis/accept/match-literal-variants.ptk

diff --git a/build.zig b/build.zig
index d09acee..c6b0c55 100644
--- a/build.zig
+++ b/build.zig
@@ -94,11 +94,18 @@ const parser_ok_files = [_][]const u8{
     "test/parser/accept/empty.ptk",
     "test/parser/accept/empty-with-comment-linefeed.ptk",
     "test/parser/accept/empty-with-comment.ptk",
-    // "test/parser/accept/identifiers.ptk",
+    "test/parser/accept/identifiers.ptk",
     // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
 } ++ analyis_ok_files;
 
 const analyis_ok_files = [_][]const u8{
     "test/analysis/accept/match-literal-rule.ptk",
     "test/analysis/accept/match-literal-sequence.ptk",
+    "test/analysis/accept/match-literal-variants.ptk",
+    "test/analysis/accept/match-literal-sequence-variant.ptk",
+    "test/analysis/accept/match-group-one-item.ptk",
+    "test/analysis/accept/match-group-one-sequence.ptk",
+    "test/analysis/accept/match-group-many-item.ptk",
+    "test/analysis/accept/match-group-many-sequence.ptk",
+    "test/analysis/accept/match-group-nested.ptk",
 };
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
new file mode 100644
index 0000000..2177628
--- /dev/null
+++ b/examples/ptkgen/grammar.ptk
@@ -0,0 +1,38 @@
+
+
+
+rule document = [ <top_level> ]* ;
+
+rule top_level =
+      <start_decl>
+    | <token_decl>
+    | <node_decl>
+    | <rule_decl>
+;
+
+# rule start_decl = "start" $rule_ref ";" ;
+
+# rule token_decl = "token" $identifier "="     ";" ;
+
+# rule node_decl = "node" $identifier "="     ";" ;
+
+rule rule_decl = "rule" $identifier ( ":" <type> )? "=" <mapped_productions> ";" ;
+
+rule mapped_productions = <mapped_production> ( "|" <mapped_production> )* ;
+
+rule mapped_production = <production_sequence> ( "=>" <mapping> )? ;
+
+rule production_sequence = ( <production> )+;
+
+rule production =
+    $string_literal
+  | $rule_ref
+  | "(" <production_sequence> ")" "?"
+  | "(" <production_sequence> ")" "*"
+  | "(" <production_sequence> ")" "+"
+  | "(" <production_sequence> ")"
+;
+
+rule mapping = 
+    # TODO
+;
\ No newline at end of file
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index 0bda109..821e13e 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -108,10 +108,10 @@ pub const Production = union(enum) {
     literal: StringLiteral, // "text"
     terminal: TokenRef, // $token
     recursion: RuleRef, // <rule>
-    sequence: List(Production), // ( ... )
-    optional: *Production, // ...?
-    repetition_zero: *Production, // [ ... ]*
-    repetition_one: *Production, // [ ... ]+
+    sequence: List(Production), // ...
+    optional: List(Production), // ( ... )?
+    repetition_zero: List(Production), // [ ... ]*
+    repetition_one: List(Production), // [ ... ]+
 };
 
 pub const AstMapping = union(enum) {
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 835dadd..2b8f08a 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -40,7 +40,7 @@ const AstPrinter = struct {
                     while (prods.next()) |prod| {
                         defer first = false;
                         if (!first) {
-                            print("  | ", .{});
+                            print("\n  | ", .{});
                         } else {
                             print("    ", .{});
                         }
@@ -119,8 +119,8 @@ const AstPrinter = struct {
 
             const text = strpr.printer.strings.get(strpr.str);
             switch (strpr.mode) {
-                .id => try writer.print("\"{}\"", .{std.zig.fmtId(text)}),
-                .text => try writer.print("\"{}\"", .{std.zig.fmtEscapes(text)}),
+                .id => try writer.print("{}", .{std.zig.fmtId(text)}),
+                .text => try writer.print("{}", .{std.zig.fmtEscapes(text)}),
             }
         }
     };
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 0b462c3..fd8665b 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -135,7 +135,7 @@ const Parser = struct {
     pool: *ptk.strings.Pool,
     diagnostics: *Diagnostics,
 
-    pub fn acceptDocument(parser: *Parser) !ast.Document {
+    pub fn acceptDocument(parser: *Parser) FatalAcceptError!ast.Document {
         var doc = ast.Document{};
 
         while (true) {
@@ -149,26 +149,7 @@ const Parser = struct {
         return doc;
     }
 
-    fn emitDiagnostic(parser: Parser, loc: ?ptk.Location, comptime code: Diagnostics.Code, data: Diagnostics.Data(code)) !void {
-        // Anything detected here is always an error
-        std.debug.assert(code.isError());
-        try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data);
-    }
-
-    fn emitUnexpectedCharacter(parser: Parser, location: ptk.Location, source_offset: usize) !void {
-        var utf8_view = std.unicode.Utf8View.init(parser.core.tokenizer.source[source_offset..]) catch {
-            try parser.emitDiagnostic(location, .invalid_source_encoding, .{});
-            return error.InvalidSourceEncoding;
-        };
-
-        var iter = utf8_view.iterator();
-
-        try parser.emitDiagnostic(location, .unexpected_character, .{
-            .character = iter.nextCodepoint() orelse @panic("very unexpected end of file"),
-        });
-    }
-
-    fn acceptTopLevelDecl(parser: *Parser) !?ast.TopLevelDeclaration {
+    fn acceptTopLevelDecl(parser: *Parser) FatalAcceptError!?ast.TopLevelDeclaration {
         if (parser.acceptRule()) |rule| {
             return .{ .rule = rule };
         } else |err| try filterAcceptError(err);
@@ -187,7 +168,7 @@ const Parser = struct {
         return null;
     }
 
-    fn acceptRule(parser: *Parser) !ast.Rule {
+    fn acceptRule(parser: *Parser) AcceptError!ast.Rule {
         var state = parser.save();
         errdefer parser.restore(state);
 
@@ -224,7 +205,7 @@ const Parser = struct {
         };
     }
 
-    fn acceptMappedProduction(parser: *Parser) !ast.MappedProduction {
+    fn acceptMappedProduction(parser: *Parser) AcceptError!ast.MappedProduction {
         var sequence = try parser.acceptProductionSequence();
 
         const mapping = if (try parser.tryAcceptLiteral(.@"=>"))
@@ -233,6 +214,7 @@ const Parser = struct {
             null;
 
         return ast.MappedProduction{
+            // Auto-flatten the "tree" here if the top level production is a "sequence" of one
             .production = if (sequence.only()) |item|
                 item
             else
@@ -241,7 +223,7 @@ const Parser = struct {
         };
     }
 
-    fn acceptProductionSequence(parser: *Parser) !ast.List(ast.Production) {
+    fn acceptProductionSequence(parser: *Parser) AcceptError!ast.List(ast.Production) {
         var list: ast.List(ast.Production) = .{};
 
         while (true) {
@@ -256,7 +238,22 @@ const Parser = struct {
         return list;
     }
 
-    fn acceptProduction(parser: *Parser) !ast.Production {
+    fn acceptProduction(parser: *Parser) AcceptError!ast.Production {
+        if (try parser.tryAcceptLiteral(.@"(")) {
+            var sequence = try parser.acceptProductionSequence();
+            try parser.acceptLiteral(.@")", .fail);
+
+            if (try parser.tryAcceptLiteral(.@"?")) {
+                return .{ .optional = sequence };
+            } else if (try parser.tryAcceptLiteral(.@"+")) {
+                return .{ .repetition_one = sequence };
+            } else if (try parser.tryAcceptLiteral(.@"*")) {
+                return .{ .repetition_zero = sequence };
+            } else {
+                return .{ .sequence = sequence };
+            }
+        }
+
         const str = try parser.acceptStringLiteral(.recover);
 
         return ast.Production{
@@ -264,17 +261,17 @@ const Parser = struct {
         };
     }
 
-    fn acceptAstMapping(parser: *Parser) !ast.AstMapping {
+    fn acceptAstMapping(parser: *Parser) AcceptError!ast.AstMapping {
         _ = parser;
         @panic("not implemented yet");
     }
 
-    fn acceptTypeSpec(parser: *Parser) !ast.TypeSpec {
+    fn acceptTypeSpec(parser: *Parser) AcceptError!ast.TypeSpec {
         _ = parser;
         @panic("not implemented yet");
     }
 
-    fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) !ast.StringLiteral {
+    fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.StringLiteral {
         const token = try parser.acceptToken(.string_literal, accept_mode);
 
         std.debug.assert(token.text.len >= 2);
@@ -285,7 +282,7 @@ const Parser = struct {
         };
     }
 
-    fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) !ast.Identifier {
+    fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.Identifier {
         const token = try parser.acceptToken(.identifier, accept_mode);
 
         return ast.Identifier{
@@ -294,7 +291,7 @@ const Parser = struct {
         };
     }
 
-    fn acceptLiteral(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) !void {
+    fn acceptLiteral(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!void {
         _ = try parser.acceptToken(token_type, accept_mode);
     }
 
@@ -357,6 +354,25 @@ const Parser = struct {
 
     // management:
 
+    fn emitDiagnostic(parser: Parser, loc: ?ptk.Location, comptime code: Diagnostics.Code, data: Diagnostics.Data(code)) !void {
+        // Anything detected here is always an error
+        std.debug.assert(code.isError());
+        try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data);
+    }
+
+    fn emitUnexpectedCharacter(parser: Parser, location: ptk.Location, source_offset: usize) !void {
+        var utf8_view = std.unicode.Utf8View.init(parser.core.tokenizer.source[source_offset..]) catch {
+            try parser.emitDiagnostic(location, .invalid_source_encoding, .{});
+            return error.InvalidSourceEncoding;
+        };
+
+        var iter = utf8_view.iterator();
+
+        try parser.emitDiagnostic(location, .unexpected_character, .{
+            .character = iter.nextCodepoint() orelse @panic("very unexpected end of file"),
+        });
+    }
+
     fn unwrapIdentifierString(parser: *Parser, loc: ptk.Location, raw: []const u8) !ptk.strings.String {
         std.debug.assert(raw.len > 0);
         if (raw[0] == '@') {
diff --git a/test/analysis/accept/match-group-many-item.ptk b/test/analysis/accept/match-group-many-item.ptk
new file mode 100644
index 0000000..5e1e31f
--- /dev/null
+++ b/test/analysis/accept/match-group-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" );
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-many-sequence.ptk b/test/analysis/accept/match-group-many-sequence.ptk
new file mode 100644
index 0000000..40902e7
--- /dev/null
+++ b/test/analysis/accept/match-group-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" ) "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-nested.ptk b/test/analysis/accept/match-group-nested.ptk
new file mode 100644
index 0000000..d35091c
--- /dev/null
+++ b/test/analysis/accept/match-group-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" ) "L1:2" ) "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-one-item.ptk b/test/analysis/accept/match-group-one-item.ptk
new file mode 100644
index 0000000..faa24e7
--- /dev/null
+++ b/test/analysis/accept/match-group-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" );
\ No newline at end of file
diff --git a/test/analysis/accept/match-group-one-sequence.ptk b/test/analysis/accept/match-group-one-sequence.ptk
new file mode 100644
index 0000000..e34f909
--- /dev/null
+++ b/test/analysis/accept/match-group-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" ) "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-rule.ptk b/test/analysis/accept/match-literal-rule.ptk
index 3cda9a8..a0b8dc0 100644
--- a/test/analysis/accept/match-literal-rule.ptk
+++ b/test/analysis/accept/match-literal-rule.ptk
@@ -1,2 +1 @@
-# This file contains a single rule with no well-defined start point:
 rule basic = "basic";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-sequence-variant.ptk b/test/analysis/accept/match-literal-sequence-variant.ptk
new file mode 100644
index 0000000..842274e
--- /dev/null
+++ b/test/analysis/accept/match-literal-sequence-variant.ptk
@@ -0,0 +1,4 @@
+rule mode = 
+    "basic" "item"
+  | "extended" "item"
+;
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-sequence.ptk b/test/analysis/accept/match-literal-sequence.ptk
index 555a2dc..245add7 100644
--- a/test/analysis/accept/match-literal-sequence.ptk
+++ b/test/analysis/accept/match-literal-sequence.ptk
@@ -1,2 +1 @@
-# This file contains a single rule with no well-defined start point:
 rule basic = "basic" "words" "after" "another";
\ No newline at end of file
diff --git a/test/analysis/accept/match-literal-variants.ptk b/test/analysis/accept/match-literal-variants.ptk
new file mode 100644
index 0000000..28ff569
--- /dev/null
+++ b/test/analysis/accept/match-literal-variants.ptk
@@ -0,0 +1 @@
+rule mode = "basic" | "extended";
\ No newline at end of file
diff --git a/test/parser/accept/identifiers.ptk b/test/parser/accept/identifiers.ptk
index 521db6f..3c4baaa 100644
--- a/test/parser/accept/identifiers.ptk
+++ b/test/parser/accept/identifiers.ptk
@@ -1,8 +1,8 @@
 
-rule a                  = literal `whatever`;
-rule _                  = literal `whatever`;
-rule a0                 = literal `whatever`;
-rule a-z                = literal `whatever`;
-rule _10                = literal `whatever`;
-rule @"x"               = literal `whatever`;
-rule @"hello, world!"   = literal `whatever`;
+rule a                  = "whatever";
+rule _                  = "whatever";
+rule a0                 = "whatever";
+rule a-z                = "whatever";
+rule _10                = "whatever";
+rule @"x"               = "whatever";
+rule @"hello, world!"   = "whatever";

From d0a08c7c3d4f6118c7e058d23fcbdb5631279dde Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 2 Nov 2023 16:42:57 +0100
Subject: [PATCH 06/20] Implements more tests, adds support for parser
 rejection tests

---
 build.zig                                     |  38 ++++++
 src/ptkgen/Diagnostics.zig                    |  27 +++--
 src/ptkgen/ast_dump.zig                       |  12 +-
 src/ptkgen/intl.zig                           |   3 +-
 src/ptkgen/intl/en.json                       |   5 +-
 src/ptkgen/main.zig                           | 108 ++++++++++++++++--
 src/ptkgen/parser.zig                         |  16 ++-
 .../accept/match-optional-many-item.ptk       |   1 +
 .../accept/match-optional-many-sequence.ptk   |   1 +
 .../analysis/accept/match-optional-nested.ptk |   1 +
 .../accept/match-optional-one-item.ptk        |   1 +
 .../accept/match-optional-one-sequence.ptk    |   1 +
 .../accept/match-rep_one-many-item.ptk        |   1 +
 .../accept/match-rep_one-many-sequence.ptk    |   1 +
 test/analysis/accept/match-rep_one-nested.ptk |   1 +
 .../accept/match-rep_one-one-item.ptk         |   1 +
 .../accept/match-rep_one-one-sequence.ptk     |   1 +
 .../accept/match-rep_zero-many-item.ptk       |   1 +
 .../accept/match-rep_zero-many-sequence.ptk   |   1 +
 .../analysis/accept/match-rep_zero-nested.ptk |   1 +
 .../accept/match-rep_zero-one-item.ptk        |   1 +
 .../accept/match-rep_zero-one-sequence.ptk    |   1 +
 test/parser/accept/optional-nospace.ptk       |   1 +
 test/parser/accept/optional-space.ptk         |   1 +
 test/parser/accept/rep_one-nospace.ptk        |   1 +
 test/parser/accept/rep_one-space.ptk          |   1 +
 test/parser/accept/rep_zero-nospace.ptk       |   1 +
 test/parser/accept/rep_zero-space.ptk         |   1 +
 test/parser/reject/empty-group.rule           |   2 +
 test/parser/reject/empty-optional.rule        |   2 +
 test/parser/reject/empty-rep_one.rule         |   2 +
 test/parser/reject/empty-rep_zero.rule        |   2 +
 test/parser/reject/empty-rule.rule            |   2 +
 33 files changed, 204 insertions(+), 36 deletions(-)
 create mode 100644 test/analysis/accept/match-optional-many-item.ptk
 create mode 100644 test/analysis/accept/match-optional-many-sequence.ptk
 create mode 100644 test/analysis/accept/match-optional-nested.ptk
 create mode 100644 test/analysis/accept/match-optional-one-item.ptk
 create mode 100644 test/analysis/accept/match-optional-one-sequence.ptk
 create mode 100644 test/analysis/accept/match-rep_one-many-item.ptk
 create mode 100644 test/analysis/accept/match-rep_one-many-sequence.ptk
 create mode 100644 test/analysis/accept/match-rep_one-nested.ptk
 create mode 100644 test/analysis/accept/match-rep_one-one-item.ptk
 create mode 100644 test/analysis/accept/match-rep_one-one-sequence.ptk
 create mode 100644 test/analysis/accept/match-rep_zero-many-item.ptk
 create mode 100644 test/analysis/accept/match-rep_zero-many-sequence.ptk
 create mode 100644 test/analysis/accept/match-rep_zero-nested.ptk
 create mode 100644 test/analysis/accept/match-rep_zero-one-item.ptk
 create mode 100644 test/analysis/accept/match-rep_zero-one-sequence.ptk
 create mode 100644 test/parser/accept/optional-nospace.ptk
 create mode 100644 test/parser/accept/optional-space.ptk
 create mode 100644 test/parser/accept/rep_one-nospace.ptk
 create mode 100644 test/parser/accept/rep_one-space.ptk
 create mode 100644 test/parser/accept/rep_zero-nospace.ptk
 create mode 100644 test/parser/accept/rep_zero-space.ptk
 create mode 100644 test/parser/reject/empty-group.rule
 create mode 100644 test/parser/reject/empty-optional.rule
 create mode 100644 test/parser/reject/empty-rep_one.rule
 create mode 100644 test/parser/reject/empty-rep_zero.rule
 create mode 100644 test/parser/reject/empty-rule.rule

diff --git a/build.zig b/build.zig
index c6b0c55..54c2ae2 100644
--- a/build.zig
+++ b/build.zig
@@ -74,6 +74,13 @@ pub fn build(b: *std.build.Builder) void {
             run.addFileArg(.{ .path = file });
             test_step.dependOn(&run.step);
         }
+
+        for (parser_reject_files) |file| {
+            const run = b.addRunArtifact(ptkdef_exe);
+            run.addArg("--test_mode=parse_only");
+            run.addFileArg(.{ .path = file });
+            test_step.dependOn(&run.step);
+        }
     }
 
     // examples
@@ -95,6 +102,14 @@ const parser_ok_files = [_][]const u8{
     "test/parser/accept/empty-with-comment-linefeed.ptk",
     "test/parser/accept/empty-with-comment.ptk",
     "test/parser/accept/identifiers.ptk",
+
+    "test/parser/accept/optional-nospace.ptk",
+    "test/parser/accept/optional-space.ptk",
+    "test/parser/accept/rep_one-nospace.ptk",
+    "test/parser/accept/rep_one-space.ptk",
+    "test/parser/accept/rep_zero-nospace.ptk",
+    "test/parser/accept/rep_zero-space.ptk",
+
     // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
 } ++ analyis_ok_files;
 
@@ -108,4 +123,27 @@ const analyis_ok_files = [_][]const u8{
     "test/analysis/accept/match-group-many-item.ptk",
     "test/analysis/accept/match-group-many-sequence.ptk",
     "test/analysis/accept/match-group-nested.ptk",
+    "test/analysis/accept/match-optional-one-item.ptk",
+    "test/analysis/accept/match-optional-one-sequence.ptk",
+    "test/analysis/accept/match-optional-many-item.ptk",
+    "test/analysis/accept/match-optional-many-sequence.ptk",
+    "test/analysis/accept/match-optional-nested.ptk",
+    "test/analysis/accept/match-rep_zero-one-item.ptk",
+    "test/analysis/accept/match-rep_zero-one-sequence.ptk",
+    "test/analysis/accept/match-rep_zero-many-item.ptk",
+    "test/analysis/accept/match-rep_zero-many-sequence.ptk",
+    "test/analysis/accept/match-rep_zero-nested.ptk",
+    "test/analysis/accept/match-rep_one-one-item.ptk",
+    "test/analysis/accept/match-rep_one-one-sequence.ptk",
+    "test/analysis/accept/match-rep_one-many-item.ptk",
+    "test/analysis/accept/match-rep_one-many-sequence.ptk",
+    "test/analysis/accept/match-rep_one-nested.ptk",
+};
+
+const parser_reject_files = [_][]const u8{
+    "test/parser/reject/empty-rule.rule",
+    "test/parser/reject/empty-group.rule",
+    "test/parser/reject/empty-optional.rule",
+    "test/parser/reject/empty-rep_one.rule",
+    "test/parser/reject/empty-rep_zero.rule",
 };
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index d5b75f5..12b8fa1 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -12,21 +12,24 @@ pub const Code = enum(u16) {
     pub const first_note = 8000;
     pub const last_item = 10000;
 
+    // generic failures:
     out_of_memory = 1000,
     file_limit_exceeded = 1001,
     io_error = 1002,
 
-    invalid_source_encoding = 1003,
-    unexpected_token_eof = 1004,
-    unexpected_token = 1005,
-    unexpected_character = 1006,
-    unexpected_eof = 1007,
+    // non-recoverable syntax errors:
 
-    bad_string_escape = 1008,
+    invalid_source_encoding = 1100,
+    unexpected_token_eof = 1101,
+    unexpected_token = 1102,
+    unexpected_character = 1103,
+    unexpected_eof = 1104,
+    bad_string_escape = 1105,
+    invalid_string_escape = 1106,
+    excess_tokens = 1107,
 
-    invalid_string_escape = 1009,
-
-    excess_tokens = 1010,
+    // recoverable syntax errors:
+    illegal_empty_group = 1200,
 
     comptime {
         std.debug.assert(first_error < first_warning);
@@ -74,6 +77,8 @@ pub fn Data(comptime code: Code) type {
         .invalid_string_escape => struct { escape: u21 },
         .excess_tokens => struct { token_type: parser.TokenType },
 
+        .illegal_empty_group => NoDiagnosticData,
+
         // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
     };
 }
@@ -85,14 +90,17 @@ pub const Message = struct {
 };
 
 inner: ptk.Diagnostics,
+codes: std.ArrayList(Code),
 
 pub fn init(allocator: std.mem.Allocator) Diagnostics {
     return Diagnostics{
         .inner = ptk.Diagnostics.init(allocator),
+        .codes = std.ArrayList(Code).init(allocator),
     };
 }
 
 pub fn deinit(diag: *Diagnostics) void {
+    diag.codes.deinit();
     diag.inner.deinit();
     diag.* = undefined;
 }
@@ -242,6 +250,7 @@ pub fn emit(diag: *Diagnostics, location: ptk.Location, comptime code: Code, par
     };
 
     try diag.inner.emit(location, level, "{s}{d:0>4}: {s}", .{ code_prefix, @intFromEnum(code), message_text });
+    try diag.codes.append(code);
 }
 
 pub fn render(diag: Diagnostics, stream: anytype) !void {
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 2b8f08a..9bff39e 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -77,7 +77,7 @@ const AstPrinter = struct {
             .literal => |lit| print("\"{}\"", .{printer.fmtString(lit.value)}),
             .terminal => |term| print("<{}>", .{printer.fmtId(term.identifier)}),
             .recursion => print("<recursion>", .{}),
-            .sequence => |seq| {
+            .sequence, .optional, .repetition_zero, .repetition_one => |seq| {
                 print("(", .{});
 
                 var iter = ast.iterate(seq);
@@ -87,10 +87,14 @@ const AstPrinter = struct {
                 }
 
                 print(" )", .{});
+                switch (production) {
+                    .sequence => {},
+                    .optional => print("?", .{}),
+                    .repetition_zero => print("*", .{}),
+                    .repetition_one => print("+", .{}),
+                    else => unreachable,
+                }
             },
-            .optional => print("<optional>", .{}),
-            .repetition_zero => print("<repetition_zero>", .{}),
-            .repetition_one => print("<repetition_one>", .{}),
         }
     }
 
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
index 51623eb..f6c2705 100644
--- a/src/ptkgen/intl.zig
+++ b/src/ptkgen/intl.zig
@@ -38,6 +38,7 @@ pub const Localization = struct {
         bad_string_escape: []const u8,
         invalid_string_escape: []const u8,
         excess_tokens: []const u8,
+        illegal_empty_group: []const u8,
     },
 
     errors: struct {
@@ -57,7 +58,7 @@ pub const Localization = struct {
         NotOpenForReading: []const u8,
         NetNameDeleted: []const u8,
 
-        StreamTooLong: []const u8,
+        FileTooBig: []const u8,
         SyntaxError: []const u8,
         InvalidSourceEncoding: []const u8,
     },
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index 72ed39a..762bb4c 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -10,7 +10,8 @@
         "unexpected_eof": "Unexpected end of file",
         "bad_string_escape": "Invalid string escape: Escape sequence at the end of string",
         "invalid_string_escape": "Invalid string escape \\{[escape]}",
-        "excess_tokens": "Excess token at the end of the file: {[token_type]}"
+        "excess_tokens": "Excess token at the end of the file: {[token_type]}",
+        "illegal_empty_group": "Production sequence may not be empty"
     },
     "errors": {
         "Unexpected": "unexpected error encountered",
@@ -26,7 +27,7 @@
         "ConnectionTimedOut": "connection timed out",
         "NotOpenForReading": "not open for reading",
         "NetNameDeleted": "net name deleted",
-        "StreamTooLong": "stream too long",
+        "FileTooBig": "Input file exceeds resources",
         "SyntaxError": "syntax error",
         "InvalidSourceEncoding": "invalid source encoding"
     }
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index b5e2741..169f457 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -116,7 +116,7 @@ pub fn main() !u8 {
             }, .out_of_memory, .{});
         },
 
-        error.StreamTooLong => {
+        error.FileTooBig => {
             try diagnostics.emit(.{
                 .source = file_name,
                 .line = 1,
@@ -136,21 +136,74 @@ pub fn main() !u8 {
         error.ConnectionTimedOut,
         error.NotOpenForReading,
         error.NetNameDeleted,
-        => {
+        => |e| {
             try diagnostics.emit(.{
                 .source = file_name,
                 .line = 1,
                 .column = 1,
-            }, .io_error, .{ .error_code = err });
+            }, .io_error, .{ .error_code = e });
         },
+
+        error.TestExpectationMismatched => return 1, // this is a shortcut we can take to not render the diagnostics on test failure
     };
 
-    try diagnostics.render(stderr.writer());
+    if (cli.options.test_mode == .none) {
+        try diagnostics.render(stderr.writer());
 
-    return if (diagnostics.hasErrors())
-        1
-    else
-        0;
+        return if (diagnostics.hasErrors())
+            1
+        else
+            0;
+    } else {
+        // test fails through `error.TestExpectationMismatched`, not through diagnostics
+        return 0;
+    }
+}
+
+const TestExpectation = struct {
+    code: Diagnostics.Code,
+};
+
+fn validateDiagnostics(allocator: std.mem.Allocator, diagnostics: Diagnostics, expectations: []const TestExpectation) !void {
+    var available = std.ArrayList(Diagnostics.Code).init(allocator);
+    defer available.deinit();
+
+    var expected = std.ArrayList(Diagnostics.Code).init(allocator);
+    defer expected.deinit();
+
+    try available.appendSlice(diagnostics.codes.items);
+    try expected.resize(expectations.len);
+
+    for (expected.items, expectations) |*dst, src| {
+        dst.* = src.code;
+    }
+
+    // Remove everything from expected and available that is present in both:
+    {
+        var i: usize = 0;
+        while (i < expected.items.len) {
+            const e = expected.items[i];
+
+            if (std.mem.indexOfScalar(Diagnostics.Code, available.items, e)) |index| {
+                _ = available.swapRemove(index);
+                _ = expected.swapRemove(i);
+            } else {
+                i += 1;
+            }
+        }
+    }
+
+    const ok = (available.items.len == 0) and (expected.items.len == 0);
+
+    for (available.items) |code| {
+        std.log.err("unexpected diagnostic: {0}", .{code});
+    }
+    for (expected.items) |code| {
+        std.log.err("unmatched diagnostic:  {0}", .{code});
+    }
+
+    if (!ok)
+        return error.TestExpectationMismatched;
 }
 
 fn compileFile(
@@ -161,19 +214,52 @@ fn compileFile(
     file_name: []const u8,
     mode: TestMode,
 ) !void {
+    var source_code = try input_file.readToEndAlloc(allocator, 4 << 20); // 4 MB should be enough for now...
+    defer allocator.free(source_code);
+
+    var expectations = std.ArrayList(TestExpectation).init(allocator);
+    defer expectations.deinit();
+
+    if (mode != .none) {
+        // parse expectations from source code:
+        var lines = std.mem.tokenize(u8, source_code, "\n");
+        while (lines.next()) |line| {
+            const prefix = "# expected:";
+            if (std.mem.startsWith(u8, line, prefix)) {
+                var items = std.mem.tokenize(u8, line[prefix.len..], " \t,");
+                while (items.next()) |error_code| {
+                    if (error_code.len == 0 or (error_code[0] != 'E' and error_code[0] != 'W' and error_code[0] != 'D'))
+                        @panic("invalid error code!");
+                    const id = std.fmt.parseInt(u16, error_code[1..], 10) catch @panic("bad integer");
+                    const code = std.meta.intToEnum(Diagnostics.Code, id) catch @panic("bad diagnostic code");
+                    try expectations.append(.{ .code = code });
+                }
+            }
+        }
+    }
+
     var tree = try parser.parse(
         allocator,
         diagnostics,
         string_pool,
         file_name,
-        input_file.reader(),
+        source_code,
     );
     defer tree.deinit();
 
     if (mode == .parse_only) {
-        // we're done if we're here
+        try validateDiagnostics(allocator, diagnostics.*, expectations.items);
         return;
     }
 
-    ast_dump.dump(string_pool, tree);
+    // TODO: Implement sema
+
+    // TODO: Implement parsergen / tablegen / highlightergen
+
+    if (mode == .none) {
+        ast_dump.dump(string_pool, tree);
+    } else {
+        // we need to validate against test expectations when doing *any* test mode
+        try validateDiagnostics(allocator, diagnostics.*, expectations.items);
+    }
 }
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index fd8665b..c586b67 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -8,10 +8,7 @@ const fmtEscapes = std.zig.fmtEscapes;
 
 pub const Document = struct {
     arena: std.heap.ArenaAllocator,
-
     file_name: []const u8,
-    source_text: []const u8,
-
     top_level_declarations: ast.Document,
 
     pub fn deinit(ts: *Document) void {
@@ -20,15 +17,13 @@ pub const Document = struct {
     }
 };
 
-pub fn parse(allocator: std.mem.Allocator, diagnostics: *Diagnostics, string_pool: *ptk.strings.Pool, file_name: []const u8, stream: anytype) !Document {
+pub fn parse(allocator: std.mem.Allocator, diagnostics: *Diagnostics, string_pool: *ptk.strings.Pool, file_name: []const u8, source_code: []const u8) !Document {
     var arena = std.heap.ArenaAllocator.init(allocator);
     errdefer arena.deinit();
 
     const file_name_copy = try arena.allocator().dupe(u8, file_name);
 
-    const text = try stream.readAllAlloc(arena.allocator(), 4 << 20); // 4 MB should be enough for now...
-
-    var tokenizer = Tokenizer.init(text, file_name_copy);
+    var tokenizer = Tokenizer.init(source_code, file_name_copy);
 
     var parser = Parser{
         .core = ParserCore.init(&tokenizer),
@@ -61,8 +56,6 @@ pub fn parse(allocator: std.mem.Allocator, diagnostics: *Diagnostics, string_poo
     return Document{
         .arena = arena,
         .file_name = file_name_copy,
-        .source_text = text,
-
         .top_level_declarations = document_node,
     };
 }
@@ -235,6 +228,11 @@ const Parser = struct {
             }
         }
 
+        if (list.len() == 0) {
+            // Empty list is a non-recoverable syntax error:
+            try parser.emitDiagnostic(null, .illegal_empty_group, .{});
+        }
+
         return list;
     }
 
diff --git a/test/analysis/accept/match-optional-many-item.ptk b/test/analysis/accept/match-optional-many-item.ptk
new file mode 100644
index 0000000..fb4b409
--- /dev/null
+++ b/test/analysis/accept/match-optional-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" )?;
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-many-sequence.ptk b/test/analysis/accept/match-optional-many-sequence.ptk
new file mode 100644
index 0000000..2c49812
--- /dev/null
+++ b/test/analysis/accept/match-optional-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" )? "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-nested.ptk b/test/analysis/accept/match-optional-nested.ptk
new file mode 100644
index 0000000..18bf0d9
--- /dev/null
+++ b/test/analysis/accept/match-optional-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )? "L1:2" )? "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-one-item.ptk b/test/analysis/accept/match-optional-one-item.ptk
new file mode 100644
index 0000000..3c5ccc0
--- /dev/null
+++ b/test/analysis/accept/match-optional-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" )?;
\ No newline at end of file
diff --git a/test/analysis/accept/match-optional-one-sequence.ptk b/test/analysis/accept/match-optional-one-sequence.ptk
new file mode 100644
index 0000000..c5fd167
--- /dev/null
+++ b/test/analysis/accept/match-optional-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" )? "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-many-item.ptk b/test/analysis/accept/match-rep_one-many-item.ptk
new file mode 100644
index 0000000..89961d7
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" )+;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-many-sequence.ptk b/test/analysis/accept/match-rep_one-many-sequence.ptk
new file mode 100644
index 0000000..0568546
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" )+ "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-nested.ptk b/test/analysis/accept/match-rep_one-nested.ptk
new file mode 100644
index 0000000..99fbc2f
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )+ "L1:2" )+ "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-one-item.ptk b/test/analysis/accept/match-rep_one-one-item.ptk
new file mode 100644
index 0000000..7f273d5
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" )+;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_one-one-sequence.ptk b/test/analysis/accept/match-rep_one-one-sequence.ptk
new file mode 100644
index 0000000..64af460
--- /dev/null
+++ b/test/analysis/accept/match-rep_one-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" )+ "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-many-item.ptk b/test/analysis/accept/match-rep_zero-many-item.ptk
new file mode 100644
index 0000000..5d9b366
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-many-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "first" "second" "third" )*;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-many-sequence.ptk b/test/analysis/accept/match-rep_zero-many-sequence.ptk
new file mode 100644
index 0000000..cadf2c5
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-many-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "one" "two" "three" )* "third";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-nested.ptk b/test/analysis/accept/match-rep_zero-nested.ptk
new file mode 100644
index 0000000..fee0799
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-nested.ptk
@@ -0,0 +1 @@
+rule mode = "L0:0" ( "L1:0" ( "L2:0" "L2:1" "L2:2" )* "L1:2" )* "L0:2";
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-one-item.ptk b/test/analysis/accept/match-rep_zero-one-item.ptk
new file mode 100644
index 0000000..d058aee
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-one-item.ptk
@@ -0,0 +1 @@
+rule mode = ( "item" )*;
\ No newline at end of file
diff --git a/test/analysis/accept/match-rep_zero-one-sequence.ptk b/test/analysis/accept/match-rep_zero-one-sequence.ptk
new file mode 100644
index 0000000..34e3a06
--- /dev/null
+++ b/test/analysis/accept/match-rep_zero-one-sequence.ptk
@@ -0,0 +1 @@
+rule mode = "first" ( "second" )* "third";
\ No newline at end of file
diff --git a/test/parser/accept/optional-nospace.ptk b/test/parser/accept/optional-nospace.ptk
new file mode 100644
index 0000000..c72723f
--- /dev/null
+++ b/test/parser/accept/optional-nospace.ptk
@@ -0,0 +1 @@
+rule group=("word")?;
\ No newline at end of file
diff --git a/test/parser/accept/optional-space.ptk b/test/parser/accept/optional-space.ptk
new file mode 100644
index 0000000..b95fdab
--- /dev/null
+++ b/test/parser/accept/optional-space.ptk
@@ -0,0 +1 @@
+rule group = ( "word" ) ? ;
\ No newline at end of file
diff --git a/test/parser/accept/rep_one-nospace.ptk b/test/parser/accept/rep_one-nospace.ptk
new file mode 100644
index 0000000..9a8646d
--- /dev/null
+++ b/test/parser/accept/rep_one-nospace.ptk
@@ -0,0 +1 @@
+rule group=("word")+;
\ No newline at end of file
diff --git a/test/parser/accept/rep_one-space.ptk b/test/parser/accept/rep_one-space.ptk
new file mode 100644
index 0000000..c624039
--- /dev/null
+++ b/test/parser/accept/rep_one-space.ptk
@@ -0,0 +1 @@
+rule group = ( "word" ) + ;
\ No newline at end of file
diff --git a/test/parser/accept/rep_zero-nospace.ptk b/test/parser/accept/rep_zero-nospace.ptk
new file mode 100644
index 0000000..3bfb157
--- /dev/null
+++ b/test/parser/accept/rep_zero-nospace.ptk
@@ -0,0 +1 @@
+rule group=("word")*;
\ No newline at end of file
diff --git a/test/parser/accept/rep_zero-space.ptk b/test/parser/accept/rep_zero-space.ptk
new file mode 100644
index 0000000..3696d95
--- /dev/null
+++ b/test/parser/accept/rep_zero-space.ptk
@@ -0,0 +1 @@
+rule group = ( "word" ) * ;
\ No newline at end of file
diff --git a/test/parser/reject/empty-group.rule b/test/parser/reject/empty-group.rule
new file mode 100644
index 0000000..2860712
--- /dev/null
+++ b/test/parser/reject/empty-group.rule
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( );
\ No newline at end of file
diff --git a/test/parser/reject/empty-optional.rule b/test/parser/reject/empty-optional.rule
new file mode 100644
index 0000000..82ac677
--- /dev/null
+++ b/test/parser/reject/empty-optional.rule
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( )?;
\ No newline at end of file
diff --git a/test/parser/reject/empty-rep_one.rule b/test/parser/reject/empty-rep_one.rule
new file mode 100644
index 0000000..82ac677
--- /dev/null
+++ b/test/parser/reject/empty-rep_one.rule
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( )?;
\ No newline at end of file
diff --git a/test/parser/reject/empty-rep_zero.rule b/test/parser/reject/empty-rep_zero.rule
new file mode 100644
index 0000000..82ac677
--- /dev/null
+++ b/test/parser/reject/empty-rep_zero.rule
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ( )?;
\ No newline at end of file
diff --git a/test/parser/reject/empty-rule.rule b/test/parser/reject/empty-rule.rule
new file mode 100644
index 0000000..8d32fe9
--- /dev/null
+++ b/test/parser/reject/empty-rule.rule
@@ -0,0 +1,2 @@
+# expected: E1200
+rule group = ;
\ No newline at end of file

From 66527580a7e62cb44e8260dcedf93385bb6e6e31 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Thu, 2 Nov 2023 17:43:54 +0100
Subject: [PATCH 07/20] Implements parsing of start rule.

---
 build.zig                                     |  44 +++---
 examples/ptkgen/grammar.ptk                   |  11 +-
 src/ptkgen/Diagnostics.zig                    |   5 +
 src/ptkgen/ast.zig                            |   3 +-
 src/ptkgen/ast_dump.zig                       |  12 +-
 src/ptkgen/intl.zig                           |   1 +
 src/ptkgen/intl/en.json                       |   1 +
 src/ptkgen/main.zig                           | 133 ++++++++++--------
 src/ptkgen/parser.zig                         |  87 ++++++++++--
 test/parser/accept/basic-rule-ref.ptk         |   1 +
 test/parser/accept/basic-token-ref.ptk        |   1 +
 test/parser/accept/document-start.ptk         |   1 +
 .../parser/accept/rule-primitive-sequence.ptk |   1 +
 .../reject/unexpected-token-string.rule       |   2 +
 14 files changed, 207 insertions(+), 96 deletions(-)
 create mode 100644 test/parser/accept/basic-rule-ref.ptk
 create mode 100644 test/parser/accept/basic-token-ref.ptk
 create mode 100644 test/parser/accept/document-start.ptk
 create mode 100644 test/parser/accept/rule-primitive-sequence.ptk
 create mode 100644 test/parser/reject/unexpected-token-string.rule

diff --git a/build.zig b/build.zig
index 54c2ae2..e2ba020 100644
--- a/build.zig
+++ b/build.zig
@@ -97,21 +97,9 @@ pub fn build(b: *std.build.Builder) void {
     }
 }
 
-const parser_ok_files = [_][]const u8{
-    "test/parser/accept/empty.ptk",
-    "test/parser/accept/empty-with-comment-linefeed.ptk",
-    "test/parser/accept/empty-with-comment.ptk",
-    "test/parser/accept/identifiers.ptk",
-
-    "test/parser/accept/optional-nospace.ptk",
-    "test/parser/accept/optional-space.ptk",
-    "test/parser/accept/rep_one-nospace.ptk",
-    "test/parser/accept/rep_one-space.ptk",
-    "test/parser/accept/rep_zero-nospace.ptk",
-    "test/parser/accept/rep_zero-space.ptk",
-
-    // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
-} ++ analyis_ok_files;
+const example_files = [_][]const u8{
+    "/home/felix/projects/parser-toolkit/examples/ptkgen/grammar.ptk",
+};
 
 const analyis_ok_files = [_][]const u8{
     "test/analysis/accept/match-literal-rule.ptk",
@@ -138,7 +126,29 @@ const analyis_ok_files = [_][]const u8{
     "test/analysis/accept/match-rep_one-many-item.ptk",
     "test/analysis/accept/match-rep_one-many-sequence.ptk",
     "test/analysis/accept/match-rep_one-nested.ptk",
-};
+} ++ example_files;
+
+const parser_ok_files = [_][]const u8{
+    "test/parser/accept/empty.ptk",
+    "test/parser/accept/empty-with-comment-linefeed.ptk",
+    "test/parser/accept/empty-with-comment.ptk",
+    "test/parser/accept/identifiers.ptk",
+
+    "test/parser/accept/optional-nospace.ptk",
+    "test/parser/accept/optional-space.ptk",
+    "test/parser/accept/rep_one-nospace.ptk",
+    "test/parser/accept/rep_one-space.ptk",
+    "test/parser/accept/rep_zero-nospace.ptk",
+    "test/parser/accept/rep_zero-space.ptk",
+
+    "test/parser/accept/basic-rule-ref.ptk",
+    "test/parser/accept/basic-token-ref.ptk",
+    "test/parser/accept/rule-primitive-sequence.ptk",
+
+    "test/parser/accept/document-start.ptk",
+
+    // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
+} ++ analyis_ok_files;
 
 const parser_reject_files = [_][]const u8{
     "test/parser/reject/empty-rule.rule",
@@ -146,4 +156,6 @@ const parser_reject_files = [_][]const u8{
     "test/parser/reject/empty-optional.rule",
     "test/parser/reject/empty-rep_one.rule",
     "test/parser/reject/empty-rep_zero.rule",
+
+    "test/parser/reject/unexpected-token-string.rule",
 };
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 2177628..07edaec 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -1,7 +1,7 @@
 
+start <document>;
 
-
-rule document = [ <top_level> ]* ;
+rule document = ( <top_level> )* ;
 
 rule top_level =
       <start_decl>
@@ -27,12 +27,13 @@ rule production_sequence = ( <production> )+;
 rule production =
     $string_literal
   | $rule_ref
+  | $token_ref
   | "(" <production_sequence> ")" "?"
   | "(" <production_sequence> ")" "*"
   | "(" <production_sequence> ")" "+"
   | "(" <production_sequence> ")"
 ;
 
-rule mapping = 
-    # TODO
-;
\ No newline at end of file
+# rule mapping = 
+#     # TODO
+# ;
\ No newline at end of file
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 12b8fa1..555ac8d 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -27,6 +27,7 @@ pub const Code = enum(u16) {
     bad_string_escape = 1105,
     invalid_string_escape = 1106,
     excess_tokens = 1107,
+    unexpected_toplevel_token = 1108,
 
     // recoverable syntax errors:
     illegal_empty_group = 1200,
@@ -68,6 +69,10 @@ pub fn Data(comptime code: Code) type {
             actual_type: parser.TokenType,
             actual_text: []const u8,
         },
+        .unexpected_toplevel_token => struct {
+            actual_type: parser.TokenType,
+            actual_text: []const u8,
+        },
         .unexpected_eof => NoDiagnosticData,
 
         .invalid_source_encoding => NoDiagnosticData,
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index 821e13e..1bfac2a 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -70,9 +70,10 @@ pub const BuiltinLiteral = String(.builtin);
 pub const Document = List(TopLevelDeclaration);
 
 pub const TopLevelDeclaration = union(enum) {
-    start: NodeRef,
+    start: RuleRef,
     rule: Rule,
     node: Node,
+    token: Token,
 };
 
 pub const NodeRef = Reference(Node); // !mynode
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 9bff39e..166b173 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -23,7 +23,7 @@ const AstPrinter = struct {
         var iter = ast.iterate(decls);
         while (iter.next()) |decl| {
             switch (decl) {
-                .start => |item| print("start {}\n", .{printer.fmtId(item.identifier)}),
+                .start => |item| print("start <{}>;\n", .{printer.fmtId(item.identifier)}),
 
                 .rule => |rule| {
                     print("rule {s}", .{printer.fmtId(rule.name.value)});
@@ -54,6 +54,11 @@ const AstPrinter = struct {
                     print("node {s}", .{printer.fmtId(node.name.value)});
                     print(";\n", .{});
                 },
+
+                .token => |token| {
+                    print("token {s}", .{printer.fmtId(token.name.value)});
+                    print(";\n", .{});
+                },
             }
         }
     }
@@ -75,8 +80,9 @@ const AstPrinter = struct {
     fn dumpProd(printer: AstPrinter, production: ast.Production) void {
         switch (production) {
             .literal => |lit| print("\"{}\"", .{printer.fmtString(lit.value)}),
-            .terminal => |term| print("<{}>", .{printer.fmtId(term.identifier)}),
-            .recursion => print("<recursion>", .{}),
+            .terminal => |term| print("${}", .{printer.fmtId(term.identifier)}),
+            .recursion => |term| print("<{}>", .{printer.fmtId(term.identifier)}),
+
             .sequence, .optional, .repetition_zero, .repetition_one => |seq| {
                 print("(", .{});
 
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
index f6c2705..981120c 100644
--- a/src/ptkgen/intl.zig
+++ b/src/ptkgen/intl.zig
@@ -39,6 +39,7 @@ pub const Localization = struct {
         invalid_string_escape: []const u8,
         excess_tokens: []const u8,
         illegal_empty_group: []const u8,
+        unexpected_toplevel_token: []const u8,
     },
 
     errors: struct {
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index 762bb4c..2fb57b2 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -6,6 +6,7 @@
         "invalid_source_encoding": "Invalid source code encoding detected",
         "unexpected_token_eof": "Expected token {[expected_type]}, but end of file was discovered",
         "unexpected_token": "Expected token {[expected_type]}, but discovered token {[actual_type]} ('{[actual_text]}')",
+        "unexpected_toplevel_token": "Expected token 'start', 'rule', 'node' or 'token', but discovered token {[actual_type]} ('{[actual_text]}')",
         "unexpected_character": "Unexpected character: '{[character]}'",
         "unexpected_eof": "Unexpected end of file",
         "bad_string_escape": "Invalid string escape: Escape sequence at the end of string",
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 169f457..151ea33 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -7,6 +7,7 @@ const args_parser = @import("args");
 const ptk = @import("parser-toolkit");
 
 const ast = @import("ast.zig");
+const intl = @import("intl.zig");
 const parser = @import("parser.zig");
 const ast_dump = @import("ast_dump.zig");
 
@@ -22,6 +23,8 @@ pub const CliOptions = struct {
     output: ?[]const u8 = null,
     test_mode: TestMode = .none,
 
+    @"max-file-size": u32 = 4 * 1024, // 4 MB of source code is a lot!
+
     pub const shorthands = .{
         .h = "help",
         .o = "output",
@@ -37,6 +40,8 @@ pub const CliOptions = struct {
             .output = "If given, will print the generated code into <file>",
 
             .test_mode = "(internal use only, required for testing)",
+
+            .@"max-file-size" = "Maximum input file size in KiB (default: 4096)",
         },
     };
 };
@@ -46,7 +51,8 @@ const TestMode = enum {
     parse_only,
 };
 
-pub fn main() !u8 {
+const AppError = error{OutOfMemory} || std.fs.File.WriteError;
+pub fn main() AppError!u8 {
     // errdefer |e| @compileLog(@TypeOf(e));
 
     var stdout = std.io.getStdOut();
@@ -97,14 +103,74 @@ pub fn main() !u8 {
     else
         "stdint";
 
-    compileFile(
-        dynamic_allocator,
-        &diagnostics,
-        &string_pool,
-        input_file,
-        file_name,
-        cli.options.test_mode,
-    ) catch |err| switch (err) {
+    var expectations = std.ArrayList(TestExpectation).init(dynamic_allocator);
+    defer expectations.deinit();
+
+    const processing_ok = process_file: {
+        // 4 MB should be enough for now...
+        var source_code = input_file.readToEndAlloc(static_allocator, 1024 * cli.options.@"max-file-size") catch |err| {
+            try convertErrorToDiagnostics(&diagnostics, file_name, err);
+            break :process_file false;
+        };
+
+        defer static_allocator.free(source_code);
+
+        if (cli.options.test_mode != .none) {
+            // in test mode, parse expectations from source code:
+            var lines = std.mem.tokenize(u8, source_code, "\n");
+            while (lines.next()) |line| {
+                const prefix = "# expected:";
+                if (std.mem.startsWith(u8, line, prefix)) {
+                    var items = std.mem.tokenize(u8, line[prefix.len..], " \t,");
+                    while (items.next()) |error_code| {
+                        if (error_code.len == 0 or (error_code[0] != 'E' and error_code[0] != 'W' and error_code[0] != 'D'))
+                            @panic("invalid error code!");
+                        const id = std.fmt.parseInt(u16, error_code[1..], 10) catch @panic("bad integer");
+                        const code = std.meta.intToEnum(Diagnostics.Code, id) catch @panic("bad diagnostic code");
+                        try expectations.append(.{ .code = code });
+                    }
+                }
+            }
+        }
+
+        compileFile(
+            dynamic_allocator,
+            &diagnostics,
+            &string_pool,
+            source_code,
+            file_name,
+            cli.options.test_mode,
+        ) catch |err| {
+            try convertErrorToDiagnostics(&diagnostics, file_name, err);
+            break :process_file false;
+        };
+
+        // Todo: continue from here?
+
+        break :process_file true;
+    };
+
+    if (cli.options.test_mode == .none) {
+        try diagnostics.render(stderr.writer());
+
+        return if (processing_ok and !diagnostics.hasErrors())
+            0 // exit code for success
+        else
+            1; // exit code for failure
+    } else {
+        // test fails through `error.TestExpectationMismatched`, not through diagnostics:
+        validateDiagnostics(dynamic_allocator, diagnostics, expectations.items) catch {
+            try stderr.writeAll("Full diagnostics:\n");
+            try diagnostics.render(stderr.writer());
+
+            return 1;
+        };
+        return 0;
+    }
+}
+
+fn convertErrorToDiagnostics(diagnostics: *Diagnostics, file_name: []const u8, err: intl.FormattableError) error{OutOfMemory}!void {
+    switch (err) {
         // syntax errors must produce diagnostics:
         error.SyntaxError, error.InvalidSourceEncoding => std.debug.assert(diagnostics.hasErrors()),
 
@@ -143,20 +209,6 @@ pub fn main() !u8 {
                 .column = 1,
             }, .io_error, .{ .error_code = e });
         },
-
-        error.TestExpectationMismatched => return 1, // this is a shortcut we can take to not render the diagnostics on test failure
-    };
-
-    if (cli.options.test_mode == .none) {
-        try diagnostics.render(stderr.writer());
-
-        return if (diagnostics.hasErrors())
-            1
-        else
-            0;
-    } else {
-        // test fails through `error.TestExpectationMismatched`, not through diagnostics
-        return 0;
     }
 }
 
@@ -187,6 +239,7 @@ fn validateDiagnostics(allocator: std.mem.Allocator, diagnostics: Diagnostics, e
             if (std.mem.indexOfScalar(Diagnostics.Code, available.items, e)) |index| {
                 _ = available.swapRemove(index);
                 _ = expected.swapRemove(i);
+                // std.log.info("found matching diagnostic {s}", .{@tagName(e)});
             } else {
                 i += 1;
             }
@@ -210,34 +263,10 @@ fn compileFile(
     allocator: std.mem.Allocator,
     diagnostics: *Diagnostics,
     string_pool: *ptk.strings.Pool,
-    input_file: std.fs.File,
+    source_code: []const u8,
     file_name: []const u8,
     mode: TestMode,
 ) !void {
-    var source_code = try input_file.readToEndAlloc(allocator, 4 << 20); // 4 MB should be enough for now...
-    defer allocator.free(source_code);
-
-    var expectations = std.ArrayList(TestExpectation).init(allocator);
-    defer expectations.deinit();
-
-    if (mode != .none) {
-        // parse expectations from source code:
-        var lines = std.mem.tokenize(u8, source_code, "\n");
-        while (lines.next()) |line| {
-            const prefix = "# expected:";
-            if (std.mem.startsWith(u8, line, prefix)) {
-                var items = std.mem.tokenize(u8, line[prefix.len..], " \t,");
-                while (items.next()) |error_code| {
-                    if (error_code.len == 0 or (error_code[0] != 'E' and error_code[0] != 'W' and error_code[0] != 'D'))
-                        @panic("invalid error code!");
-                    const id = std.fmt.parseInt(u16, error_code[1..], 10) catch @panic("bad integer");
-                    const code = std.meta.intToEnum(Diagnostics.Code, id) catch @panic("bad diagnostic code");
-                    try expectations.append(.{ .code = code });
-                }
-            }
-        }
-    }
-
     var tree = try parser.parse(
         allocator,
         diagnostics,
@@ -247,19 +276,11 @@ fn compileFile(
     );
     defer tree.deinit();
 
-    if (mode == .parse_only) {
-        try validateDiagnostics(allocator, diagnostics.*, expectations.items);
-        return;
-    }
-
     // TODO: Implement sema
 
     // TODO: Implement parsergen / tablegen / highlightergen
 
     if (mode == .none) {
         ast_dump.dump(string_pool, tree);
-    } else {
-        // we need to validate against test expectations when doing *any* test mode
-        try validateDiagnostics(allocator, diagnostics.*, expectations.items);
     }
 }
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index c586b67..f739f85 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -143,24 +143,45 @@ const Parser = struct {
     }
 
     fn acceptTopLevelDecl(parser: *Parser) FatalAcceptError!?ast.TopLevelDeclaration {
+        if (parser.acceptStartDecl()) |root_rule| {
+            return .{ .start = root_rule };
+        } else |err| try filterAcceptError(err);
+
         if (parser.acceptRule()) |rule| {
             return .{ .rule = rule };
         } else |err| try filterAcceptError(err);
 
         // Detect any excess tokens on the top level:
-        const excess_tokens = if (parser.core.nextToken()) |token|
-            (token != null)
-        else |err| switch (err) {
-            error.UnexpectedCharacter => true,
-        };
-        if (excess_tokens) {
-            try parser.emitDiagnostic(null, .unexpected_eof, .{});
-            return error.SyntaxError;
+        if (parser.core.nextToken()) |maybe_token| {
+            if (maybe_token) |token| {
+                try parser.emitDiagnostic(token.location, .unexpected_toplevel_token, .{
+                    .actual_type = token.type,
+                    .actual_text = token.text,
+                });
+                return error.SyntaxError;
+            } else {
+                // This is actually the good path here, as only if we don't find any token or tokenization error,
+                // we reached the end of the file.
+            }
+        } else |err| switch (err) {
+            error.UnexpectedCharacter => {
+                try parser.emitUnexpectedCharacter(parser.core.tokenizer.current_location, parser.core.tokenizer.offset);
+                return error.SyntaxError;
+            },
         }
 
         return null;
     }
 
+    fn acceptStartDecl(parser: *Parser) AcceptError!ast.RuleRef {
+        try parser.acceptLiteral(.start, .recover);
+        const init_rule = try parser.acceptRuleReference(.fail);
+
+        try parser.acceptLiteral(.@";", .fail);
+
+        return init_rule;
+    }
+
     fn acceptRule(parser: *Parser) AcceptError!ast.Rule {
         var state = parser.save();
         errdefer parser.restore(state);
@@ -229,7 +250,7 @@ const Parser = struct {
         }
 
         if (list.len() == 0) {
-            // Empty list is a non-recoverable syntax error:
+            // Empty list is a recoverable syntax error:
             try parser.emitDiagnostic(null, .illegal_empty_group, .{});
         }
 
@@ -252,11 +273,20 @@ const Parser = struct {
             }
         }
 
-        const str = try parser.acceptStringLiteral(.recover);
+        if (parser.acceptStringLiteral(.recover)) |str| {
+            return ast.Production{ .literal = str };
+        } else |err| try filterAcceptError(err);
 
-        return ast.Production{
-            .literal = str,
-        };
+        if (parser.acceptTokenReference(.recover)) |ref| {
+            return ast.Production{ .terminal = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptRuleReference(.recover)) |ref| {
+            return ast.Production{ .recursion = ref };
+        } else |err| try filterAcceptError(err);
+
+        // We're done with out list
+        return error.UnexpectedTokenRecoverable;
     }
 
     fn acceptAstMapping(parser: *Parser) AcceptError!ast.AstMapping {
@@ -282,18 +312,45 @@ const Parser = struct {
 
     fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.Identifier {
         const token = try parser.acceptToken(.identifier, accept_mode);
-
         return ast.Identifier{
             .location = token.location,
             .value = try parser.unwrapIdentifierString(token.location, token.text),
         };
     }
 
+    fn acceptRuleReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.RuleRef {
+        const token = try parser.acceptToken(.rule_ref, accept_mode);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "<"));
+        std.debug.assert(std.mem.endsWith(u8, token.text, ">"));
+        return ast.RuleRef{
+            .location = token.location,
+            .identifier = try parser.unwrapIdentifierString(token.location, token.text[1 .. token.text.len - 1]),
+        };
+    }
+
+    fn acceptTokenReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.TokenRef {
+        const token = try parser.acceptToken(.token_ref, accept_mode);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
+        return ast.TokenRef{
+            .location = token.location,
+            .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]),
+        };
+    }
+
+    fn acceptNodeReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.NodeRef {
+        const token = try parser.acceptToken(.node_ref, accept_mode);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "!"));
+        return ast.NodeRef{
+            .location = token.location,
+            .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]),
+        };
+    }
+
     fn acceptLiteral(parser: *Parser, comptime token_type: TokenType, accept_mode: AcceptMode) AcceptError!void {
         _ = try parser.acceptToken(token_type, accept_mode);
     }
 
-    fn tryAcceptLiteral(parser: *Parser, comptime token_type: TokenType) !bool {
+    fn tryAcceptLiteral(parser: *Parser, comptime token_type: TokenType) FatalAcceptError!bool {
         _ = parser.acceptToken(token_type, .recover) catch |err| switch (err) {
             error.UnexpectedTokenRecoverable => return false,
             error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e,
diff --git a/test/parser/accept/basic-rule-ref.ptk b/test/parser/accept/basic-rule-ref.ptk
new file mode 100644
index 0000000..e31192c
--- /dev/null
+++ b/test/parser/accept/basic-rule-ref.ptk
@@ -0,0 +1 @@
+rule output = <input>;
\ No newline at end of file
diff --git a/test/parser/accept/basic-token-ref.ptk b/test/parser/accept/basic-token-ref.ptk
new file mode 100644
index 0000000..29f9ce7
--- /dev/null
+++ b/test/parser/accept/basic-token-ref.ptk
@@ -0,0 +1 @@
+rule output = $terminal;
\ No newline at end of file
diff --git a/test/parser/accept/document-start.ptk b/test/parser/accept/document-start.ptk
new file mode 100644
index 0000000..0623db6
--- /dev/null
+++ b/test/parser/accept/document-start.ptk
@@ -0,0 +1 @@
+start <root>;
\ No newline at end of file
diff --git a/test/parser/accept/rule-primitive-sequence.ptk b/test/parser/accept/rule-primitive-sequence.ptk
new file mode 100644
index 0000000..0067313
--- /dev/null
+++ b/test/parser/accept/rule-primitive-sequence.ptk
@@ -0,0 +1 @@
+rule sequence = "literal" $terminal <rule> "literal" $terminal <rule>; 
\ No newline at end of file
diff --git a/test/parser/reject/unexpected-token-string.rule b/test/parser/reject/unexpected-token-string.rule
new file mode 100644
index 0000000..4848c41
--- /dev/null
+++ b/test/parser/reject/unexpected-token-string.rule
@@ -0,0 +1,2 @@
+# expected: E1108
+"bad toplevel token!"
\ No newline at end of file

From 45a467f038a9e9c8f6f4dfca0186574cb5ab5ed1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 3 Nov 2023 12:53:20 +0100
Subject: [PATCH 08/20] Starts parsing of ast mappings.

---
 build.zig                                     |  20 ++-
 examples/ptkgen/ast-with-unions.ptk           |   6 +-
 examples/ptkgen/grammar.ptk                   |  30 ++++-
 src/ptkgen/Diagnostics.zig                    |  13 ++
 src/ptkgen/ast.zig                            |  17 ++-
 src/ptkgen/intl.zig                           |  44 ++++---
 src/ptkgen/intl/en.json                       |   5 +-
 src/ptkgen/parser.zig                         | 121 +++++++++++++++++-
 test/parser/accept/mapping-code-literal.ptk   |   1 +
 test/parser/accept/mapping-user-value.ptk     |   1 +
 test/parser/accept/mapping-value-ref.ptk      |   1 +
 .../reject/bad-mapping-invalid-token.ptk      |   2 +
 test/parser/reject/bad-mapping-too-long.ptk   |   2 +
 .../{empty-group.rule => empty-group.ptk}     |   0
 test/parser/reject/empty-mapping.ptk          |   2 +
 ...empty-optional.rule => empty-optional.ptk} |   0
 .../{empty-rep_one.rule => empty-rep_one.ptk} |   0
 ...empty-rep_zero.rule => empty-rep_zero.ptk} |   0
 .../{empty-rule.rule => empty-rule.ptk}       |   0
 ...tring.rule => unexpected-token-string.ptk} |   0
 20 files changed, 226 insertions(+), 39 deletions(-)
 create mode 100644 test/parser/accept/mapping-code-literal.ptk
 create mode 100644 test/parser/accept/mapping-user-value.ptk
 create mode 100644 test/parser/accept/mapping-value-ref.ptk
 create mode 100644 test/parser/reject/bad-mapping-invalid-token.ptk
 create mode 100644 test/parser/reject/bad-mapping-too-long.ptk
 rename test/parser/reject/{empty-group.rule => empty-group.ptk} (100%)
 create mode 100644 test/parser/reject/empty-mapping.ptk
 rename test/parser/reject/{empty-optional.rule => empty-optional.ptk} (100%)
 rename test/parser/reject/{empty-rep_one.rule => empty-rep_one.ptk} (100%)
 rename test/parser/reject/{empty-rep_zero.rule => empty-rep_zero.ptk} (100%)
 rename test/parser/reject/{empty-rule.rule => empty-rule.ptk} (100%)
 rename test/parser/reject/{unexpected-token-string.rule => unexpected-token-string.ptk} (100%)

diff --git a/build.zig b/build.zig
index e2ba020..05e948e 100644
--- a/build.zig
+++ b/build.zig
@@ -147,15 +147,23 @@ const parser_ok_files = [_][]const u8{
 
     "test/parser/accept/document-start.ptk",
 
+    "test/parser/accept/mapping-value-ref.ptk",
+    "test/parser/accept/mapping-code-literal.ptk",
+    "test/parser/accept/mapping-user-value.ptk",
+
     // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
 } ++ analyis_ok_files;
 
 const parser_reject_files = [_][]const u8{
-    "test/parser/reject/empty-rule.rule",
-    "test/parser/reject/empty-group.rule",
-    "test/parser/reject/empty-optional.rule",
-    "test/parser/reject/empty-rep_one.rule",
-    "test/parser/reject/empty-rep_zero.rule",
+    "test/parser/reject/empty-rule.ptk",
+    "test/parser/reject/empty-group.ptk",
+    "test/parser/reject/empty-optional.ptk",
+    "test/parser/reject/empty-rep_one.ptk",
+    "test/parser/reject/empty-rep_zero.ptk",
+
+    "test/parser/reject/unexpected-token-string.ptk",
 
-    "test/parser/reject/unexpected-token-string.rule",
+    "test/parser/reject/empty-mapping.ptk",
+    "test/parser/reject/bad-mapping-invalid-token.ptk",
+    "test/parser/reject/bad-mapping-too-long.ptk",
 };
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index fa170fc..1b041d7 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -56,7 +56,7 @@ node TLDeclaration = union
 ;
 
 rule toplevel-decl : !TLDeclaration = 
-      <namespace-group> => ns: $0, # this is syntax for a union field selector as unions are not compounds
-    | <interface-decl>  => interface: $0,
-    | <module-decl>     => module: $0,
+      <namespace-group> => ns: $0 # this is syntax for a union field selector as unions are not compounds
+    | <interface-decl>  => interface: $0
+    | <module-decl>     => module: $0
 ;
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 07edaec..03caa88 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -34,6 +34,30 @@ rule production =
   | "(" <production_sequence> ")"
 ;
 
-# rule mapping = 
-#     # TODO
-# ;
\ No newline at end of file
+rule mapping = 
+    $identifier ":" <mapped_value>
+  | <mapped_value>
+;
+
+rule mapped_value =
+    <struct_ctor>                   # { field = <value>, field = <value>, ... }
+  | <list_ctor>                     # { <value>, <value>, ... }
+  | $code_literal                   # `code`
+  | $value_ref                      # $0
+  | $userval "(" <value_list> ")"   # @func(...)
+  | $userval                        # @value
+;
+
+rule struct_ctor = 
+    "{" <assign_field> ( "," <assign_field> )* "}"
+;
+
+rule assign_field = 
+    $identifier "=" $mapped_value
+;
+
+rule list_ctor = "{" <value_list> "}";
+
+rule value_list =
+    <mapped_value> ( "," <mapped_value> )*
+;
\ No newline at end of file
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 555ac8d..e9de60f 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -28,9 +28,12 @@ pub const Code = enum(u16) {
     invalid_string_escape = 1106,
     excess_tokens = 1107,
     unexpected_toplevel_token = 1108,
+    unexpected_token_no_context = 1109,
 
     // recoverable syntax errors:
     illegal_empty_group = 1200,
+    empty_mapping = 1201,
+    integer_overflow = 1202,
 
     comptime {
         std.debug.assert(first_error < first_warning);
@@ -73,6 +76,9 @@ pub fn Data(comptime code: Code) type {
             actual_type: parser.TokenType,
             actual_text: []const u8,
         },
+        .unexpected_token_no_context => struct {
+            actual_type: parser.TokenType,
+        },
         .unexpected_eof => NoDiagnosticData,
 
         .invalid_source_encoding => NoDiagnosticData,
@@ -83,6 +89,13 @@ pub fn Data(comptime code: Code) type {
         .excess_tokens => struct { token_type: parser.TokenType },
 
         .illegal_empty_group => NoDiagnosticData,
+        .empty_mapping => NoDiagnosticData,
+
+        .integer_overflow => struct {
+            min: []const u8,
+            max: []const u8,
+            actual: []const u8,
+        },
 
         // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
     };
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index 1bfac2a..163d94b 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -65,7 +65,7 @@ fn String(comptime Tag: anytype) type {
 pub const Identifier = String(.identifier);
 pub const StringLiteral = String(.string);
 pub const CodeLiteral = String(.code);
-pub const BuiltinLiteral = String(.builtin);
+pub const UserDefinedIdentifier = String(.user_defined);
 
 pub const Document = List(TopLevelDeclaration);
 
@@ -119,8 +119,9 @@ pub const AstMapping = union(enum) {
     constructor: List(FieldAssignment), // { field = ..., field = ... }
     literal: CodeLiteral, // field: value
     context_reference: ValueRef, // $0
-    user_reference: BuiltinLiteral, // @field
-    function_call: FunctionCall, // ...(a,b,c)
+    user_reference: UserDefinedIdentifier, // @field
+    user_function_call: FunctionCall(UserDefinedIdentifier), // @builtin(a,b,c)
+    function_call: FunctionCall(Identifier), // identifier(a,b,c)
     union_init: UnionInitializer,
 };
 
@@ -129,10 +130,12 @@ pub const UnionInitializer = struct {
     value: *AstMapping,
 };
 
-pub const FunctionCall = struct {
-    function: *AstMapping,
-    arguments: List(AstMapping),
-};
+pub fn FunctionCall(comptime Name: type) type {
+    return struct {
+        function: Name,
+        arguments: List(AstMapping),
+    };
+}
 
 pub const FieldAssignment = struct {
     location: Location,
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
index 981120c..70af58f 100644
--- a/src/ptkgen/intl.zig
+++ b/src/ptkgen/intl.zig
@@ -1,5 +1,7 @@
 const std = @import("std");
 
+const Diagnostics = @import("Diagnostics.zig");
+
 pub const Language = enum {
     en,
 };
@@ -12,7 +14,7 @@ pub const localizations = struct {
     pub const en = Localization.generate(@embedFile("intl/en.json"));
 };
 
-pub const FormattableError = blk: {
+pub const FormattableError: type = blk: {
     const list = @typeInfo(std.meta.fieldInfo(Localization, .errors).type).Struct.fields;
 
     var errors: [list.len]std.builtin.Type.Error = undefined;
@@ -25,22 +27,32 @@ pub const FormattableError = blk: {
     });
 };
 
+pub const DiagnosticStrings: type = blk: {
+    const list = @typeInfo(Diagnostics.Code).Enum.fields;
+
+    var dst_fields: [list.len]std.builtin.Type.StructField = undefined;
+    for (&dst_fields, list) |*dst, src| {
+        dst.* = .{
+            .name = src.name,
+            .type = []const u8,
+            .default_value = null,
+            .is_comptime = false,
+            .alignment = @alignOf([]const u8),
+        };
+    }
+
+    break :blk @Type(.{
+        .Struct = .{
+            .layout = .Auto,
+            .fields = &dst_fields,
+            .decls = &.{},
+            .is_tuple = false,
+        },
+    });
+};
+
 pub const Localization = struct {
-    diagnostics: struct {
-        out_of_memory: []const u8,
-        file_limit_exceeded: []const u8,
-        io_error: []const u8,
-        invalid_source_encoding: []const u8,
-        unexpected_token_eof: []const u8,
-        unexpected_token: []const u8,
-        unexpected_character: []const u8,
-        unexpected_eof: []const u8,
-        bad_string_escape: []const u8,
-        invalid_string_escape: []const u8,
-        excess_tokens: []const u8,
-        illegal_empty_group: []const u8,
-        unexpected_toplevel_token: []const u8,
-    },
+    diagnostics: DiagnosticStrings,
 
     errors: struct {
         Unexpected: []const u8,
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index 2fb57b2..dd32bbe 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -12,7 +12,10 @@
         "bad_string_escape": "Invalid string escape: Escape sequence at the end of string",
         "invalid_string_escape": "Invalid string escape \\{[escape]}",
         "excess_tokens": "Excess token at the end of the file: {[token_type]}",
-        "illegal_empty_group": "Production sequence may not be empty"
+        "illegal_empty_group": "Production sequence may not be empty",
+        "unexpected_token_no_context": "Unexpected token '{[actual_type]}'",
+        "empty_mapping": "Empty mappings are not allowed",
+        "integer_overflow": "Integer value {[actual]} out of range. Values must be between {[min]} and {[max]}"
     },
     "errors": {
         "Unexpected": "unexpected error encountered",
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index f739f85..1f7b0bd 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -82,7 +82,7 @@ pub const TokenType = enum {
     rule_ref, // <rule>
     token_ref, // $token
     value_ref, // $0
-    builtin_ref, // @builtin
+    userval_ref, // @userval
 
     // values
 
@@ -290,8 +290,98 @@ const Parser = struct {
     }
 
     fn acceptAstMapping(parser: *Parser) AcceptError!ast.AstMapping {
+        const position = parser.core.tokenizer.current_location;
+
+        if (parser.acceptUnionInit()) |init| {
+            return .{ .union_init = init };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptCodeLiteral()) |literal| {
+            return .{ .literal = literal };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptValueReference()) |literal| {
+            return .{ .context_reference = literal };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptBuiltinCall()) |call| {
+            return .{ .function_call = call };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptUserCall()) |call| {
+            return .{ .user_function_call = call };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptUserReference()) |ref| {
+            return .{ .user_reference = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|")) {
+            try parser.emitDiagnostic(position, .empty_mapping, .{});
+            return error.SyntaxError;
+        }
+
+        return parser.emitUnexpectedToken();
+    }
+
+    fn acceptUnionInit(parser: *Parser) AcceptError!ast.UnionInitializer {
         _ = parser;
-        @panic("not implemented yet");
+        return error.UnexpectedTokenRecoverable;
+    }
+
+    fn acceptCodeLiteral(parser: *Parser) AcceptError!ast.CodeLiteral {
+        const token = try parser.acceptToken(.code_literal, .recover);
+
+        std.debug.assert(std.mem.startsWith(u8, token.text, "`"));
+        std.debug.assert(std.mem.endsWith(u8, token.text, "`"));
+
+        var prefix_len: usize = 0;
+        while (token.text[prefix_len] == '`') {
+            prefix_len += 1;
+        }
+
+        return ast.CodeLiteral{
+            .location = token.location,
+            .value = try parser.pool.insert(token.text[prefix_len .. token.text.len - prefix_len]),
+        };
+    }
+
+    fn acceptValueReference(parser: *Parser) AcceptError!ast.ValueRef {
+        const token = try parser.acceptToken(.value_ref, .recover);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
+        return ast.ValueRef{
+            .location = token.location,
+            .index = std.fmt.parseInt(u32, token.text[1..], 10) catch |err| switch (err) {
+                error.InvalidCharacter => unreachable, // ensured by tokenizer,
+                error.Overflow => blk: {
+                    try parser.emitDiagnostic(token.location, .integer_overflow, .{
+                        .min = comptime std.fmt.comptimePrint("{}", .{std.math.minInt(u32)}),
+                        .max = comptime std.fmt.comptimePrint("{}", .{std.math.maxInt(u32)}),
+                        .actual = token.text[1..],
+                    });
+                    break :blk 0;
+                },
+            },
+        };
+    }
+
+    fn acceptBuiltinCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.Identifier) {
+        _ = parser;
+        return error.UnexpectedTokenRecoverable;
+    }
+
+    fn acceptUserCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.UserDefinedIdentifier) {
+        _ = parser;
+        return error.UnexpectedTokenRecoverable;
+    }
+
+    fn acceptUserReference(parser: *Parser) AcceptError!ast.UserDefinedIdentifier {
+        const token = try parser.acceptToken(.userval_ref, .recover);
+        std.debug.assert(std.mem.startsWith(u8, token.text, "@"));
+        return ast.UserDefinedIdentifier{
+            .location = token.location,
+            .value = try parser.pool.insert(token.text[1..]),
+        };
     }
 
     fn acceptTypeSpec(parser: *Parser) AcceptError!ast.TypeSpec {
@@ -415,6 +505,31 @@ const Parser = struct {
         try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data);
     }
 
+    fn emitUnexpectedToken(parser: *Parser) AcceptError {
+        const state = parser.save();
+        defer parser.restore(state);
+
+        const location = parser.core.tokenizer.current_location;
+        const offset = parser.core.tokenizer.offset;
+
+        const token_or_null = parser.core.nextToken() catch |err| switch (err) {
+            error.UnexpectedCharacter => {
+                try parser.emitUnexpectedCharacter(location, offset);
+                return error.SyntaxError;
+            },
+        };
+
+        const token = token_or_null orelse {
+            try parser.emitDiagnostic(location, .unexpected_eof, .{});
+            return error.SyntaxError;
+        };
+
+        try parser.emitDiagnostic(location, .unexpected_token_no_context, .{
+            .actual_type = token.type,
+        });
+        return error.SyntaxError;
+    }
+
     fn emitUnexpectedCharacter(parser: Parser, location: ptk.Location, source_offset: usize) !void {
         var utf8_view = std.unicode.Utf8View.init(parser.core.tokenizer.source[source_offset..]) catch {
             try parser.emitDiagnostic(location, .invalid_source_encoding, .{});
@@ -577,7 +692,7 @@ const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.rule_ref, matchRuleRef),
     Pattern.create(.token_ref, matchTokenRef),
     Pattern.create(.value_ref, matchValueRef),
-    Pattern.create(.builtin_ref, matchBuiltinRef),
+    Pattern.create(.userval_ref, matchBuiltinRef),
 
     // Whitespace is the "kitchen sink" at the end:
     Pattern.create(.whitespace, match.takeAnyOf(" \r\n\t")),
diff --git a/test/parser/accept/mapping-code-literal.ptk b/test/parser/accept/mapping-code-literal.ptk
new file mode 100644
index 0000000..b18e2b9
--- /dev/null
+++ b/test/parser/accept/mapping-code-literal.ptk
@@ -0,0 +1 @@
+rule r = "" => `.item`;
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-value.ptk b/test/parser/accept/mapping-user-value.ptk
new file mode 100644
index 0000000..2183ab2
--- /dev/null
+++ b/test/parser/accept/mapping-user-value.ptk
@@ -0,0 +1 @@
+rule r = "" => @value;
\ No newline at end of file
diff --git a/test/parser/accept/mapping-value-ref.ptk b/test/parser/accept/mapping-value-ref.ptk
new file mode 100644
index 0000000..b2293b8
--- /dev/null
+++ b/test/parser/accept/mapping-value-ref.ptk
@@ -0,0 +1 @@
+rule r = "" => $0;
\ No newline at end of file
diff --git a/test/parser/reject/bad-mapping-invalid-token.ptk b/test/parser/reject/bad-mapping-invalid-token.ptk
new file mode 100644
index 0000000..5d783df
--- /dev/null
+++ b/test/parser/reject/bad-mapping-invalid-token.ptk
@@ -0,0 +1,2 @@
+# expected: E1109
+rule group = "value" => "bad" ;
\ No newline at end of file
diff --git a/test/parser/reject/bad-mapping-too-long.ptk b/test/parser/reject/bad-mapping-too-long.ptk
new file mode 100644
index 0000000..1ecf764
--- /dev/null
+++ b/test/parser/reject/bad-mapping-too-long.ptk
@@ -0,0 +1,2 @@
+# expected: E1102
+rule group = "value" => $0 whatever ;
\ No newline at end of file
diff --git a/test/parser/reject/empty-group.rule b/test/parser/reject/empty-group.ptk
similarity index 100%
rename from test/parser/reject/empty-group.rule
rename to test/parser/reject/empty-group.ptk
diff --git a/test/parser/reject/empty-mapping.ptk b/test/parser/reject/empty-mapping.ptk
new file mode 100644
index 0000000..6479ae9
--- /dev/null
+++ b/test/parser/reject/empty-mapping.ptk
@@ -0,0 +1,2 @@
+# expected: E1201
+rule group = "value" => ;
\ No newline at end of file
diff --git a/test/parser/reject/empty-optional.rule b/test/parser/reject/empty-optional.ptk
similarity index 100%
rename from test/parser/reject/empty-optional.rule
rename to test/parser/reject/empty-optional.ptk
diff --git a/test/parser/reject/empty-rep_one.rule b/test/parser/reject/empty-rep_one.ptk
similarity index 100%
rename from test/parser/reject/empty-rep_one.rule
rename to test/parser/reject/empty-rep_one.ptk
diff --git a/test/parser/reject/empty-rep_zero.rule b/test/parser/reject/empty-rep_zero.ptk
similarity index 100%
rename from test/parser/reject/empty-rep_zero.rule
rename to test/parser/reject/empty-rep_zero.ptk
diff --git a/test/parser/reject/empty-rule.rule b/test/parser/reject/empty-rule.ptk
similarity index 100%
rename from test/parser/reject/empty-rule.rule
rename to test/parser/reject/empty-rule.ptk
diff --git a/test/parser/reject/unexpected-token-string.rule b/test/parser/reject/unexpected-token-string.ptk
similarity index 100%
rename from test/parser/reject/unexpected-token-string.rule
rename to test/parser/reject/unexpected-token-string.ptk

From 73b5c85ea7a003f820c87c852dc4c30380a33021 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 3 Nov 2023 19:01:39 +0100
Subject: [PATCH 09/20] Implements a good amount of ast mappings

---
 build.zig                                     |  17 +++
 examples/ptkgen/grammar.ptk                   |   6 +-
 src/ptkgen/ast.zig                            |   8 +-
 src/ptkgen/ast_dump.zig                       |  58 +++++++-
 src/ptkgen/parser.zig                         | 126 ++++++++++++++++--
 test/parser/accept/mapping-array-a0.ptk       |   1 +
 test/parser/accept/mapping-array-a1.ptk       |   1 +
 test/parser/accept/mapping-array-a5.ptk       |   1 +
 test/parser/accept/mapping-array-nested.ptk   |   1 +
 .../accept/mapping-builtin-function-a0.ptk    |   1 +
 .../accept/mapping-builtin-function-a1.ptk    |   1 +
 .../accept/mapping-builtin-function-a5.ptk    |   1 +
 .../accept/mapping-builtin-function-nest.ptk  |   1 +
 .../accept/mapping-user-function-a0.ptk       |   1 +
 .../accept/mapping-user-function-a1.ptk       |   1 +
 .../accept/mapping-user-function-a5.ptk       |   1 +
 .../accept/mapping-user-function-nest.ptk     |   1 +
 test/parser/accept/mapping-variant-init.ptk   |   1 +
 18 files changed, 207 insertions(+), 21 deletions(-)
 create mode 100644 test/parser/accept/mapping-array-a0.ptk
 create mode 100644 test/parser/accept/mapping-array-a1.ptk
 create mode 100644 test/parser/accept/mapping-array-a5.ptk
 create mode 100644 test/parser/accept/mapping-array-nested.ptk
 create mode 100644 test/parser/accept/mapping-builtin-function-a0.ptk
 create mode 100644 test/parser/accept/mapping-builtin-function-a1.ptk
 create mode 100644 test/parser/accept/mapping-builtin-function-a5.ptk
 create mode 100644 test/parser/accept/mapping-builtin-function-nest.ptk
 create mode 100644 test/parser/accept/mapping-user-function-a0.ptk
 create mode 100644 test/parser/accept/mapping-user-function-a1.ptk
 create mode 100644 test/parser/accept/mapping-user-function-a5.ptk
 create mode 100644 test/parser/accept/mapping-user-function-nest.ptk
 create mode 100644 test/parser/accept/mapping-variant-init.ptk

diff --git a/build.zig b/build.zig
index 05e948e..9cfc229 100644
--- a/build.zig
+++ b/build.zig
@@ -151,6 +151,23 @@ const parser_ok_files = [_][]const u8{
     "test/parser/accept/mapping-code-literal.ptk",
     "test/parser/accept/mapping-user-value.ptk",
 
+    "test/parser/accept/mapping-builtin-function-a0.ptk",
+    "test/parser/accept/mapping-builtin-function-a1.ptk",
+    "test/parser/accept/mapping-builtin-function-a5.ptk",
+    "test/parser/accept/mapping-builtin-function-nest.ptk",
+
+    "test/parser/accept/mapping-user-function-a0.ptk",
+    "test/parser/accept/mapping-user-function-a1.ptk",
+    "test/parser/accept/mapping-user-function-a5.ptk",
+    "test/parser/accept/mapping-user-function-nest.ptk",
+
+    "test/parser/accept/mapping-array-a0.ptk",
+    "test/parser/accept/mapping-array-a1.ptk",
+    "test/parser/accept/mapping-array-a5.ptk",
+    "test/parser/accept/mapping-array-nested.ptk",
+
+    "test/parser/accept/mapping-variant-init.ptk",
+
     // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
 } ++ analyis_ok_files;
 
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 03caa88..30b0a2a 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -35,12 +35,12 @@ rule production =
 ;
 
 rule mapping = 
-    $identifier ":" <mapped_value>
-  | <mapped_value>
+    $identifier ":" <mapped_value> # variant init
+  | <mapped_value>                 # regular init
 ;
 
 rule mapped_value =
-    <struct_ctor>                   # { field = <value>, field = <value>, ... }
+    <record_ctor>                   # { field = <value>, field = <value>, ... }
   | <list_ctor>                     # { <value>, <value>, ... }
   | $code_literal                   # `code`
   | $value_ref                      # $0
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index 163d94b..5b7c715 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -116,16 +116,18 @@ pub const Production = union(enum) {
 };
 
 pub const AstMapping = union(enum) {
-    constructor: List(FieldAssignment), // { field = ..., field = ... }
+    record: List(FieldAssignment), // { field = ..., field = ... }
+    list: List(AstMapping), // { ..., ..., ... }
+    variant: VariantInitializer, // field: ...
+
     literal: CodeLiteral, // field: value
     context_reference: ValueRef, // $0
     user_reference: UserDefinedIdentifier, // @field
     user_function_call: FunctionCall(UserDefinedIdentifier), // @builtin(a,b,c)
     function_call: FunctionCall(Identifier), // identifier(a,b,c)
-    union_init: UnionInitializer,
 };
 
-pub const UnionInitializer = struct {
+pub const VariantInitializer = struct {
     field: Identifier,
     value: *AstMapping,
 };
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 166b173..64571a1 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -66,13 +66,14 @@ const AstPrinter = struct {
     fn dumpAstType(printer: AstPrinter, typespec: ast.TypeSpec) void {
         _ = printer;
         _ = typespec;
-        std.debug.print("<TYPE HERE>", .{});
+        print("<TYPE HERE>", .{});
     }
 
     fn dumpMappedProd(printer: AstPrinter, mapped_prod: ast.MappedProduction) void {
         printer.dumpProd(mapped_prod.production);
 
         if (mapped_prod.mapping) |mapping| {
+            print(" => ", .{});
             printer.dumpMapping(mapping);
         }
     }
@@ -105,9 +106,58 @@ const AstPrinter = struct {
     }
 
     fn dumpMapping(printer: AstPrinter, mapping: ast.AstMapping) void {
-        _ = printer;
-        _ = mapping;
-        print("<MAPPING HERE>", .{});
+        switch (mapping) {
+            .record => |record| {
+                _ = record;
+                @panic("printing not implemented yet");
+            },
+
+            .list => |list| {
+                if (list.len() > 0) {
+                    print("{{ ", .{});
+                    printer.dumpMappingList(list);
+                    print(" }}", .{});
+                } else {
+                    print("{{}}", .{});
+                }
+            },
+
+            .variant => |variant| {
+                print("{}: ", .{printer.fmtId(variant.field.value)});
+                printer.dumpMapping(variant.value.*);
+            },
+
+            .literal => |literal| print("`{s}`", .{printer.strings.get(literal.value)}),
+
+            .context_reference => |context_reference| print("${}", .{context_reference.index}),
+
+            .user_reference => |user_reference| print("@{}", .{printer.fmtId(user_reference.value)}),
+
+            .user_function_call => |user_function_call| {
+                print("@{}(", .{printer.fmtId(user_function_call.function.value)});
+                printer.dumpMappingList(user_function_call.arguments);
+                print(")", .{});
+            },
+
+            .function_call => |function_call| {
+                print("{}(", .{printer.fmtId(function_call.function.value)});
+                printer.dumpMappingList(function_call.arguments);
+                print(")", .{});
+            },
+        }
+    }
+
+    fn dumpMappingList(printer: AstPrinter, list: ast.List(ast.AstMapping)) void {
+        var first = true;
+        var iter = ast.iterate(list);
+        while (iter.next()) |arg| {
+            if (!first) {
+                print(", ", .{});
+            }
+            first = false;
+
+            printer.dumpMapping(arg);
+        }
     }
 
     fn fmtString(printer: AstPrinter, str: ptk.strings.String) StringPrinter {
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 1f7b0bd..f47e7cc 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -223,7 +223,7 @@ const Parser = struct {
         var sequence = try parser.acceptProductionSequence();
 
         const mapping = if (try parser.tryAcceptLiteral(.@"=>"))
-            try parser.acceptAstMapping()
+            try parser.acceptAstMapping(.fail)
         else
             null;
 
@@ -289,11 +289,22 @@ const Parser = struct {
         return error.UnexpectedTokenRecoverable;
     }
 
-    fn acceptAstMapping(parser: *Parser) AcceptError!ast.AstMapping {
+    fn acceptAstMapping(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.AstMapping {
+        const state = parser.save();
+        errdefer parser.restore(state);
+
         const position = parser.core.tokenizer.current_location;
 
-        if (parser.acceptUnionInit()) |init| {
-            return .{ .union_init = init };
+        if (parser.acceptVariantInit()) |init| {
+            return .{ .variant = init };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptRecordInit()) |init| {
+            return .{ .record = init };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptListInit()) |init| {
+            return .{ .list = init };
         } else |err| try filterAcceptError(err);
 
         if (parser.acceptCodeLiteral()) |literal| {
@@ -321,14 +332,51 @@ const Parser = struct {
             return error.SyntaxError;
         }
 
-        return parser.emitUnexpectedToken();
+        switch (accept_mode) {
+            .recover => return error.UnexpectedTokenRecoverable,
+            .fail => return parser.emitUnexpectedToken(),
+        }
     }
 
-    fn acceptUnionInit(parser: *Parser) AcceptError!ast.UnionInitializer {
-        _ = parser;
+    fn acceptVariantInit(parser: *Parser) AcceptError!ast.VariantInitializer {
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const field = try parser.acceptIdentifier(.recover);
+
+        try parser.acceptLiteral(.@":", .recover);
+
+        const value = try parser.acceptAstMapping(.fail);
+
+        const clone = try parser.arena.create(ast.AstMapping);
+        clone.* = value;
+
+        return .{
+            .field = field,
+            .value = clone,
+        };
+    }
+
+    fn acceptRecordInit(parser: *Parser) AcceptError!ast.List(ast.FieldAssignment) {
+        const state = parser.save();
+        errdefer parser.restore(state);
+
         return error.UnexpectedTokenRecoverable;
     }
 
+    fn acceptListInit(parser: *Parser) AcceptError!ast.List(ast.AstMapping) {
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        try parser.acceptLiteral(.@"{", .recover);
+
+        var items = try parser.acceptMappingList();
+
+        try parser.acceptLiteral(.@"}", .fail);
+
+        return items;
+    }
+
     fn acceptCodeLiteral(parser: *Parser) AcceptError!ast.CodeLiteral {
         const token = try parser.acceptToken(.code_literal, .recover);
 
@@ -366,13 +414,40 @@ const Parser = struct {
     }
 
     fn acceptBuiltinCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.Identifier) {
-        _ = parser;
-        return error.UnexpectedTokenRecoverable;
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const id = try parser.acceptIdentifier(.recover);
+
+        try parser.acceptLiteral(.@"(", .fail); // a builtin function is the only legal way to use an identifier here, so we fail unrecoverably
+
+        const list = try parser.acceptMappingList();
+
+        try parser.acceptLiteral(.@")", .fail);
+
+        return .{
+            .function = id,
+            .arguments = list,
+        };
     }
 
     fn acceptUserCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.UserDefinedIdentifier) {
-        _ = parser;
-        return error.UnexpectedTokenRecoverable;
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const id = try parser.acceptUserReference();
+
+        // If we only accept a user value, fail and fall back to regular user value acceptance later
+        try parser.acceptLiteral(.@"(", .recover);
+
+        const list = try parser.acceptMappingList();
+
+        try parser.acceptLiteral(.@")", .fail);
+
+        return .{
+            .function = id,
+            .arguments = list,
+        };
     }
 
     fn acceptUserReference(parser: *Parser) AcceptError!ast.UserDefinedIdentifier {
@@ -384,6 +459,35 @@ const Parser = struct {
         };
     }
 
+    fn acceptMappingList(parser: *Parser) AcceptError!ast.List(ast.AstMapping) {
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        var list = ast.List(ast.AstMapping){};
+
+        var accept_mode: AcceptMode = .recover;
+        while (true) {
+            // first item is allowed to be failing, otherwise comma separation must be done!
+            defer accept_mode = .fail;
+
+            const item_state = parser.save();
+
+            if (parser.acceptAstMapping(accept_mode)) |mapping| {
+                try parser.append(ast.AstMapping, &list, mapping);
+            } else |err| {
+                try filterAcceptError(err);
+                parser.restore(item_state); // rollback to the previous item
+                break;
+            }
+
+            if (!try parser.tryAcceptLiteral(.@",")) {
+                break;
+            }
+        }
+
+        return list;
+    }
+
     fn acceptTypeSpec(parser: *Parser) AcceptError!ast.TypeSpec {
         _ = parser;
         @panic("not implemented yet");
diff --git a/test/parser/accept/mapping-array-a0.ptk b/test/parser/accept/mapping-array-a0.ptk
new file mode 100644
index 0000000..3ef8c33
--- /dev/null
+++ b/test/parser/accept/mapping-array-a0.ptk
@@ -0,0 +1 @@
+rule r = "" => { };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-array-a1.ptk b/test/parser/accept/mapping-array-a1.ptk
new file mode 100644
index 0000000..48a6912
--- /dev/null
+++ b/test/parser/accept/mapping-array-a1.ptk
@@ -0,0 +1 @@
+rule r = "" => { $0 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-array-a5.ptk b/test/parser/accept/mapping-array-a5.ptk
new file mode 100644
index 0000000..a46ab16
--- /dev/null
+++ b/test/parser/accept/mapping-array-a5.ptk
@@ -0,0 +1 @@
+rule r = "" => { $0, $1, $2, $3, $4 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-array-nested.ptk b/test/parser/accept/mapping-array-nested.ptk
new file mode 100644
index 0000000..be8a59a
--- /dev/null
+++ b/test/parser/accept/mapping-array-nested.ptk
@@ -0,0 +1 @@
+rule r = "" => { $0, { $10, $11, $12 }, $2 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-a0.ptk b/test/parser/accept/mapping-builtin-function-a0.ptk
new file mode 100644
index 0000000..478e220
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-a0.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring();
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-a1.ptk b/test/parser/accept/mapping-builtin-function-a1.ptk
new file mode 100644
index 0000000..58e9623
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-a1.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring($0);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-a5.ptk b/test/parser/accept/mapping-builtin-function-a5.ptk
new file mode 100644
index 0000000..acf6f75
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-a5.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring($0, $1, $2, $3, $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-builtin-function-nest.ptk b/test/parser/accept/mapping-builtin-function-nest.ptk
new file mode 100644
index 0000000..c7457fe
--- /dev/null
+++ b/test/parser/accept/mapping-builtin-function-nest.ptk
@@ -0,0 +1 @@
+rule r = "" => tostring($0, tostring($1), $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-a0.ptk b/test/parser/accept/mapping-user-function-a0.ptk
new file mode 100644
index 0000000..12d6fce
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-a0.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert();
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-a1.ptk b/test/parser/accept/mapping-user-function-a1.ptk
new file mode 100644
index 0000000..0c51664
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-a1.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert($0);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-a5.ptk b/test/parser/accept/mapping-user-function-a5.ptk
new file mode 100644
index 0000000..684e3f3
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-a5.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert($0, $1, $2, $3, $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-user-function-nest.ptk b/test/parser/accept/mapping-user-function-nest.ptk
new file mode 100644
index 0000000..f78963b
--- /dev/null
+++ b/test/parser/accept/mapping-user-function-nest.ptk
@@ -0,0 +1 @@
+rule r = "" => @convert($0, tostring($1), $4);
\ No newline at end of file
diff --git a/test/parser/accept/mapping-variant-init.ptk b/test/parser/accept/mapping-variant-init.ptk
new file mode 100644
index 0000000..0fc50e8
--- /dev/null
+++ b/test/parser/accept/mapping-variant-init.ptk
@@ -0,0 +1 @@
+rule r = "" => child: $0;
\ No newline at end of file

From a7e57259ce61aab61c30b6693f5060f5ad62bcf8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 3 Nov 2023 19:52:38 +0100
Subject: [PATCH 10/20] Implements acceptance of record rules.

---
 build.zig                                     |   3 +
 src/ptkgen/ast_dump.zig                       |  20 +-
 src/ptkgen/main.zig                           |  18 +-
 src/ptkgen/parser.zig                         | 204 ++++++++++++++++--
 test/parser/accept/mapping-record-init-f1.ptk |   1 +
 test/parser/accept/mapping-record-init-f3.ptk |   1 +
 6 files changed, 227 insertions(+), 20 deletions(-)
 create mode 100644 test/parser/accept/mapping-record-init-f1.ptk
 create mode 100644 test/parser/accept/mapping-record-init-f3.ptk

diff --git a/build.zig b/build.zig
index 9cfc229..60a0d97 100644
--- a/build.zig
+++ b/build.zig
@@ -168,6 +168,9 @@ const parser_ok_files = [_][]const u8{
 
     "test/parser/accept/mapping-variant-init.ptk",
 
+    "test/parser/accept/mapping-record-init-f1.ptk",
+    "test/parser/accept/mapping-record-init-f3.ptk",
+
     // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
 } ++ analyis_ok_files;
 
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 64571a1..c554abf 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -108,8 +108,24 @@ const AstPrinter = struct {
     fn dumpMapping(printer: AstPrinter, mapping: ast.AstMapping) void {
         switch (mapping) {
             .record => |record| {
-                _ = record;
-                @panic("printing not implemented yet");
+                std.debug.assert(record.len() > 0);
+
+                print("{{ ", .{});
+
+                var first = true;
+                var iter = ast.iterate(record);
+                while (iter.next()) |arg| {
+                    if (!first) {
+                        print(", ", .{});
+                    }
+                    first = false;
+
+                    print("{} = ", .{printer.fmtId(arg.field.value)});
+
+                    printer.dumpMapping(arg.value.*);
+                }
+
+                print(" }}", .{});
             },
 
             .list => |list| {
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 151ea33..bfb273c 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -22,6 +22,7 @@ pub const CliOptions = struct {
     help: bool = false,
     output: ?[]const u8 = null,
     test_mode: TestMode = .none,
+    trace: bool = false,
 
     @"max-file-size": u32 = 4 * 1024, // 4 MB of source code is a lot!
 
@@ -42,6 +43,8 @@ pub const CliOptions = struct {
             .test_mode = "(internal use only, required for testing)",
 
             .@"max-file-size" = "Maximum input file size in KiB (default: 4096)",
+
+            .trace = "Prints a parse trace",
         },
     };
 };
@@ -140,6 +143,7 @@ pub fn main() AppError!u8 {
             source_code,
             file_name,
             cli.options.test_mode,
+            cli.options.trace,
         ) catch |err| {
             try convertErrorToDiagnostics(&diagnostics, file_name, err);
             break :process_file false;
@@ -266,13 +270,17 @@ fn compileFile(
     source_code: []const u8,
     file_name: []const u8,
     mode: TestMode,
+    trace_enabled: bool,
 ) !void {
     var tree = try parser.parse(
-        allocator,
-        diagnostics,
-        string_pool,
-        file_name,
-        source_code,
+        .{
+            .allocator = allocator,
+            .diagnostics = diagnostics,
+            .string_pool = string_pool,
+            .file_name = file_name,
+            .source_code = source_code,
+            .trace_enabled = trace_enabled,
+        },
     );
     defer tree.deinit();
 
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index f47e7cc..852355a 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -17,26 +17,46 @@ pub const Document = struct {
     }
 };
 
-pub fn parse(allocator: std.mem.Allocator, diagnostics: *Diagnostics, string_pool: *ptk.strings.Pool, file_name: []const u8, source_code: []const u8) !Document {
-    var arena = std.heap.ArenaAllocator.init(allocator);
+pub fn parse(opt: struct {
+    allocator: std.mem.Allocator,
+    diagnostics: *Diagnostics,
+    string_pool: *ptk.strings.Pool,
+    file_name: []const u8,
+    source_code: []const u8,
+    trace_enabled: bool,
+}) !Document {
+    var arena = std.heap.ArenaAllocator.init(opt.allocator);
     errdefer arena.deinit();
 
-    const file_name_copy = try arena.allocator().dupe(u8, file_name);
+    const file_name_copy = try arena.allocator().dupe(u8, opt.file_name);
 
-    var tokenizer = Tokenizer.init(source_code, file_name_copy);
+    var tokenizer = Tokenizer.init(opt.source_code, file_name_copy);
 
     var parser = Parser{
         .core = ParserCore.init(&tokenizer),
         .arena = arena.allocator(),
-        .pool = string_pool,
-        .diagnostics = diagnostics,
+        .pool = opt.string_pool,
+        .diagnostics = opt.diagnostics,
+        .trace_enabled = opt.trace_enabled,
     };
 
     const document_node = parser.acceptDocument() catch |err| switch (err) {
 
         // Unrecoverable syntax error, must have created diagnostics already
-        error.SyntaxError, error.InvalidSourceEncoding => |e| {
-            std.debug.assert(diagnostics.hasErrors());
+        error.SyntaxError => |e| {
+            std.debug.assert(opt.diagnostics.hasErrors());
+
+            if (opt.trace_enabled) {
+                if (@errorReturnTrace()) |trace| {
+                    std.debug.dumpStackTrace(trace.*);
+                }
+            }
+
+            return e;
+        },
+        error.InvalidSourceEncoding => |e| {
+            std.debug.assert(opt.diagnostics.hasErrors());
+
             return e;
         },
 
@@ -45,7 +65,7 @@ pub fn parse(allocator: std.mem.Allocator, diagnostics: *Diagnostics, string_poo
 
     if (tokenizer.next()) |token_or_null| {
         if (token_or_null) |token| {
-            try diagnostics.emit(token.location, .excess_tokens, .{ .token_type = token.type });
+            try opt.diagnostics.emit(token.location, .excess_tokens, .{ .token_type = token.type });
             return error.SyntaxError;
         }
     } else |_| {
@@ -128,7 +148,13 @@ const Parser = struct {
     pool: *ptk.strings.Pool,
     diagnostics: *Diagnostics,
 
+    trace_enabled: bool,
+    trace_depth: u32 = 0,
+
     pub fn acceptDocument(parser: *Parser) FatalAcceptError!ast.Document {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         var doc = ast.Document{};
 
         while (true) {
@@ -143,6 +169,9 @@ const Parser = struct {
     }
 
     fn acceptTopLevelDecl(parser: *Parser) FatalAcceptError!?ast.TopLevelDeclaration {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         if (parser.acceptStartDecl()) |root_rule| {
             return .{ .start = root_rule };
         } else |err| try filterAcceptError(err);
@@ -174,6 +203,9 @@ const Parser = struct {
     }
 
     fn acceptStartDecl(parser: *Parser) AcceptError!ast.RuleRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         try parser.acceptLiteral(.start, .recover);
         const init_rule = try parser.acceptRuleReference(.fail);
 
@@ -183,6 +215,9 @@ const Parser = struct {
     }
 
     fn acceptRule(parser: *Parser) AcceptError!ast.Rule {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         var state = parser.save();
         errdefer parser.restore(state);
 
@@ -220,6 +255,9 @@ const Parser = struct {
     }
 
     fn acceptMappedProduction(parser: *Parser) AcceptError!ast.MappedProduction {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         var sequence = try parser.acceptProductionSequence();
 
         const mapping = if (try parser.tryAcceptLiteral(.@"=>"))
@@ -238,6 +276,9 @@ const Parser = struct {
     }
 
     fn acceptProductionSequence(parser: *Parser) AcceptError!ast.List(ast.Production) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         var list: ast.List(ast.Production) = .{};
 
         while (true) {
@@ -258,6 +299,9 @@ const Parser = struct {
     }
 
     fn acceptProduction(parser: *Parser) AcceptError!ast.Production {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         if (try parser.tryAcceptLiteral(.@"(")) {
             var sequence = try parser.acceptProductionSequence();
             try parser.acceptLiteral(.@")", .fail);
@@ -290,6 +334,9 @@ const Parser = struct {
     }
 
     fn acceptAstMapping(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.AstMapping {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const state = parser.save();
         errdefer parser.restore(state);
 
@@ -339,6 +386,9 @@ const Parser = struct {
     }
 
     fn acceptVariantInit(parser: *Parser) AcceptError!ast.VariantInitializer {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const state = parser.save();
         errdefer parser.restore(state);
 
@@ -358,13 +408,65 @@ const Parser = struct {
     }
 
     fn acceptRecordInit(parser: *Parser) AcceptError!ast.List(ast.FieldAssignment) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const state = parser.save();
         errdefer parser.restore(state);
 
-        return error.UnexpectedTokenRecoverable;
+        try parser.acceptLiteral(.@"{", .recover);
+
+        var mode: AcceptMode = .recover;
+
+        var list = ast.List(ast.FieldAssignment){};
+        while (true) {
+            // First item might fail, then it's not a record initializer, but
+            // afterwards, all fields must comply
+            defer mode = .fail;
+
+            const node = try parser.acceptFieldInit(mode);
+
+            try parser.append(ast.FieldAssignment, &list, node);
+
+            if (!try parser.tryAcceptLiteral(.@",")) {
+                break;
+            }
+        }
+
+        try parser.acceptLiteral(.@"}", .fail);
+
+        return list;
+    }
+
+    fn acceptFieldInit(parser: *Parser, mode: AcceptMode) AcceptError!ast.FieldAssignment {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const state = parser.save();
+        errdefer parser.restore(state);
+
+        const location = parser.core.tokenizer.current_location;
+
+        const field = try parser.acceptIdentifier(mode);
+
+        try parser.acceptLiteral(.@"=", .fail);
+
+        const value = try parser.acceptAstMapping(.fail);
+
+        const clone = try parser.arena.create(ast.AstMapping);
+        clone.* = value;
+
+        return .{
+            .location = location,
+            .field = field,
+            .value = clone,
+        };
     }
 
     fn acceptListInit(parser: *Parser) AcceptError!ast.List(ast.AstMapping) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const state = parser.save();
         errdefer parser.restore(state);
 
@@ -378,6 +480,9 @@ const Parser = struct {
     }
 
     fn acceptCodeLiteral(parser: *Parser) AcceptError!ast.CodeLiteral {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.code_literal, .recover);
 
         std.debug.assert(std.mem.startsWith(u8, token.text, "`"));
@@ -395,6 +500,9 @@ const Parser = struct {
     }
 
     fn acceptValueReference(parser: *Parser) AcceptError!ast.ValueRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.value_ref, .recover);
         std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
         return ast.ValueRef{
@@ -414,6 +522,9 @@ const Parser = struct {
     }
 
     fn acceptBuiltinCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.Identifier) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const state = parser.save();
         errdefer parser.restore(state);
 
@@ -432,6 +543,9 @@ const Parser = struct {
     }
 
     fn acceptUserCall(parser: *Parser) AcceptError!ast.FunctionCall(ast.UserDefinedIdentifier) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const state = parser.save();
         errdefer parser.restore(state);
 
@@ -451,6 +565,9 @@ const Parser = struct {
     }
 
     fn acceptUserReference(parser: *Parser) AcceptError!ast.UserDefinedIdentifier {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.userval_ref, .recover);
         std.debug.assert(std.mem.startsWith(u8, token.text, "@"));
         return ast.UserDefinedIdentifier{
@@ -460,6 +577,9 @@ const Parser = struct {
     }
 
     fn acceptMappingList(parser: *Parser) AcceptError!ast.List(ast.AstMapping) {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const list_state = parser.save();
         errdefer parser.restore(list_state);
 
@@ -489,11 +609,16 @@ const Parser = struct {
     }
 
     fn acceptTypeSpec(parser: *Parser) AcceptError!ast.TypeSpec {
-        _ = parser;
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         @panic("not implemented yet");
     }
 
     fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.StringLiteral {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.string_literal, accept_mode);
 
         std.debug.assert(token.text.len >= 2);
@@ -505,6 +630,9 @@ const Parser = struct {
     }
 
     fn acceptIdentifier(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.Identifier {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.identifier, accept_mode);
         return ast.Identifier{
             .location = token.location,
@@ -513,6 +641,9 @@ const Parser = struct {
     }
 
     fn acceptRuleReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.RuleRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.rule_ref, accept_mode);
         std.debug.assert(std.mem.startsWith(u8, token.text, "<"));
         std.debug.assert(std.mem.endsWith(u8, token.text, ">"));
@@ -523,6 +654,9 @@ const Parser = struct {
     }
 
     fn acceptTokenReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.TokenRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.token_ref, accept_mode);
         std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
         return ast.TokenRef{
@@ -532,6 +666,9 @@ const Parser = struct {
     }
 
     fn acceptNodeReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.NodeRef {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
         const token = try parser.acceptToken(.node_ref, accept_mode);
         std.debug.assert(std.mem.startsWith(u8, token.text, "!"));
         return ast.NodeRef{
@@ -561,8 +698,7 @@ const Parser = struct {
         const location = parser.core.tokenizer.current_location;
 
         if (parser.core.accept(RS.any)) |token| {
-            // std.log.debug("token trace: {}", .{token});
-
+            errdefer parser.emitTrace(.{ .token_reject = .{ .actual = token, .expected = token_type } });
             if (token.type != token_type) {
                 switch (accept_mode) {
                     .fail => {
@@ -576,6 +712,7 @@ const Parser = struct {
                     .recover => return error.UnexpectedTokenRecoverable,
                 }
             }
+            parser.emitTrace(.{ .token_accept = token });
             return token;
         } else |err| switch (err) {
             error.UnexpectedToken => unreachable, // RS.any will always accept the token
@@ -602,6 +739,47 @@ const Parser = struct {
     };
 
     // management:
+    const TraceKind = union(enum) {
+        token_accept: Token,
+        token_reject: struct { actual: Token, expected: TokenType },
+        rule: []const u8,
+    };
+
+    const Trace = struct {
+        depth: u32,
+        kind: TraceKind,
+
+        pub fn format(trace: Trace, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = fmt;
+            _ = opt;
+            try writer.writeByteNTimes(' ', 4 * trace.depth);
+            try writer.print("{s}:", .{@tagName(trace.kind)});
+            switch (trace.kind) {
+                .token_accept => |item| try writer.print("accept {}", .{item}),
+                .token_reject => |item| try writer.print("reject {}, expected '{s}'", .{ item.actual, @tagName(item.expected) }),
+                .rule => |item| try writer.print("{s}", .{item}),
+            }
+        }
+    };
+
+    fn traceEnterRule(parser: *Parser, loc: std.builtin.SourceLocation) void {
+        parser.emitTrace(.{ .rule = loc.fn_name });
+        parser.trace_depth += 1;
+    }
+
+    fn popTrace(parser: *Parser) void {
+        parser.trace_depth -= 1;
+    }
+
+    fn emitTrace(parser: Parser, trace: TraceKind) void {
+        if (!parser.trace_enabled) {
+            return;
+        }
+        std.log.debug("rule trace: {}", .{Trace{
+            .depth = parser.trace_depth,
+            .kind = trace,
+        }});
+    }
 
     fn emitDiagnostic(parser: Parser, loc: ?ptk.Location, comptime code: Diagnostics.Code, data: Diagnostics.Data(code)) !void {
         // Anything detected here is always an error
diff --git a/test/parser/accept/mapping-record-init-f1.ptk b/test/parser/accept/mapping-record-init-f1.ptk
new file mode 100644
index 0000000..dcce273
--- /dev/null
+++ b/test/parser/accept/mapping-record-init-f1.ptk
@@ -0,0 +1 @@
+rule r = "" => { x = $0 };
\ No newline at end of file
diff --git a/test/parser/accept/mapping-record-init-f3.ptk b/test/parser/accept/mapping-record-init-f3.ptk
new file mode 100644
index 0000000..22d7640
--- /dev/null
+++ b/test/parser/accept/mapping-record-init-f3.ptk
@@ -0,0 +1 @@
+rule r = "" => { x = $0, y = $1, z = $2 };
\ No newline at end of file

From 25220b2877e2524b40ea8c3087271a30a1f5a4e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 3 Nov 2023 20:11:42 +0100
Subject: [PATCH 11/20] Starts to implement typespec parsing.

---
 build.zig                                    |  4 ++
 examples/ptkgen/ast-with-unions.ptk          | 36 ++++++++---------
 examples/ptkgen/grammar.ptk                  | 23 ++++++-----
 src/ptkgen/ast.zig                           |  6 +--
 src/ptkgen/ast_dump.zig                      | 10 +++--
 src/ptkgen/parser.zig                        | 42 +++++++++++++-------
 test/parser/accept/rule-typespec-custom.ptk  |  1 +
 test/parser/accept/rule-typespec-literal.ptk |  1 +
 test/parser/accept/rule-typespec-ref.ptk     |  1 +
 9 files changed, 74 insertions(+), 50 deletions(-)
 create mode 100644 test/parser/accept/rule-typespec-custom.ptk
 create mode 100644 test/parser/accept/rule-typespec-literal.ptk
 create mode 100644 test/parser/accept/rule-typespec-ref.ptk

diff --git a/build.zig b/build.zig
index 60a0d97..f8528cd 100644
--- a/build.zig
+++ b/build.zig
@@ -171,6 +171,10 @@ const parser_ok_files = [_][]const u8{
     "test/parser/accept/mapping-record-init-f1.ptk",
     "test/parser/accept/mapping-record-init-f3.ptk",
 
+    "test/parser/accept/rule-typespec-custom.ptk",
+    "test/parser/accept/rule-typespec-ref.ptk",
+    "test/parser/accept/rule-typespec-literal.ptk",
+
     // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
 } ++ analyis_ok_files;
 
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index 1b041d7..1e8f226 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -4,16 +4,16 @@
 # var   name: type = value;
 # const name: type = value;
 
-node declaration = struct
-    is_const: literal `bool`,
-    name: !identifier,
-    type: optional !type,
-    value: !value
-;
+# node declaration = struct
+#     is_const: `bool`,
+#     name: !identifier,
+#     type: optional !type,
+#     value: !value
+# ;
 
-node identifier = literal `[]const u8`;
-node type = @TypeId; # enum { int, float, string }
-node value = @Value;
+# node identifier = `[]const u8`;
+# node type = @TypeId; # enum { int, float, string }
+# node value = @Value;
 
 start <decl>;
 
@@ -27,20 +27,20 @@ rule decl : !declaration =
 #   $0_________ $1__ $2_____________ $3  $4_____
 ;
 
-rule <decl-type> : literal `bool` = 
+rule decl-type : `bool` = 
       "var"   => `false`
     | "const" => `true`
 ;
 
-rule <id> : !identifier = "name" => tostring($0);
+rule id : !identifier = "name" => tostring($0);
 
-rule <type> : !type = 
+rule type : !type = 
       "int"    => `.int`
     | "float"  => `.float`
     | "string" => `.string`
 ;
 
-rule <value> : !value = 
+rule value : !value = 
       "10"       => @parseInt($0)
     | "3.14"     => @parseFloat($0)
     | "\"nice\"" => @parseStringLiteral($0)
@@ -49,11 +49,11 @@ rule <value> : !value =
 
 
 # Unions have can only have a single option active at a time
-node TLDeclaration = union 
-    ns        : !namespace,
-    interface : !interface,
-    module    : !module,
-;
+# node TLDeclaration = union 
+#     ns        : !namespace,
+#     interface : !interface,
+#     module    : !module,
+# ;
 
 rule toplevel-decl : !TLDeclaration = 
       <namespace-group> => ns: $0 # this is syntax for a union field selector as unions are not compounds
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 30b0a2a..7dff596 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -10,11 +10,11 @@ rule top_level =
     | <rule_decl>
 ;
 
-# rule start_decl = "start" $rule_ref ";" ;
+rule start_decl = "start" $rule_ref ";" ;
 
-# rule token_decl = "token" $identifier "="     ";" ;
+rule token_decl = "token" $identifier "="     ";" ;
 
-# rule node_decl = "node" $identifier "="     ";" ;
+rule node_decl = "node" $identifier "="     ";" ;
 
 rule rule_decl = "rule" $identifier ( ":" <type> )? "=" <mapped_productions> ";" ;
 
@@ -40,12 +40,13 @@ rule mapping =
 ;
 
 rule mapped_value =
-    <record_ctor>                   # { field = <value>, field = <value>, ... }
-  | <list_ctor>                     # { <value>, <value>, ... }
-  | $code_literal                   # `code`
-  | $value_ref                      # $0
-  | $userval "(" <value_list> ")"   # @func(...)
-  | $userval                        # @value
+    <record_ctor>                    # { field = <value>, field = <value>, ... }
+  | <list_ctor>                      # { <value>, <value>, ... }
+  | $code_literal                    # `code`
+  | $value_ref                       # $0
+  | $identifier "(" <value_list> ")" # builtin(...)
+  | $userval    "(" <value_list> ")" # @func(...)
+  | $userval                         # @value
 ;
 
 rule struct_ctor = 
@@ -56,8 +57,8 @@ rule assign_field =
     $identifier "=" $mapped_value
 ;
 
-rule list_ctor = "{" <value_list> "}";
+rule list_ctor = "{" ( <value_list> )? "}";
 
 rule value_list =
     <mapped_value> ( "," <mapped_value> )*
-;
\ No newline at end of file
+;
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index 5b7c715..e62d664 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -155,9 +155,9 @@ pub const Pattern = union(enum) {
 pub const TypeSpec = union(enum) {
     reference: NodeRef, // !type
     literal: CodeLiteral, // literal `bool`
-    custom: CodeLiteral, // custom `Custom`
-    @"struct": CompoundType, // struct <fields...>
-    @"union": CompoundType, // union <fields...>
+    custom: UserDefinedIdentifier, // custom `Custom`
+    record: CompoundType, // struct <fields...>
+    variant: CompoundType, // union <fields...>
 };
 
 pub const CompoundType = struct {
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index c554abf..6b2451b 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -64,9 +64,13 @@ const AstPrinter = struct {
     }
 
     fn dumpAstType(printer: AstPrinter, typespec: ast.TypeSpec) void {
-        _ = printer;
-        _ = typespec;
-        print("<TYPE HERE>", .{});
+        switch (typespec) {
+            .reference => |ref| print("!{}", .{printer.fmtId(ref.identifier)}),
+            .literal => |lit| print("literal `{s}`", .{printer.strings.get(lit.value)}),
+            .custom => @panic("not done yet"),
+            .record => @panic("not done yet"),
+            .variant => @panic("not done yet"),
+        }
     }
 
     fn dumpMappedProd(printer: AstPrinter, mapped_prod: ast.MappedProduction) void {
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 852355a..ef45197 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -84,13 +84,13 @@ pub const TokenType = enum {
     // keywords
 
     node,
-    @"struct",
+    record,
+    variant,
     optional,
     start,
     rule,
     token,
 
-    literal,
     custom,
     regex,
     skip,
@@ -612,6 +612,18 @@ const Parser = struct {
         parser.traceEnterRule(@src());
         defer parser.popTrace();
 
+        if (parser.acceptCodeLiteral()) |code| {
+            return .{ .literal = code };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptUserReference()) |ref| {
+            return .{ .custom = ref };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptNodeReference(.fail)) |ref| {
+            return .{ .reference = ref };
+        } else |err| try filterAcceptError(err);
+
         @panic("not implemented yet");
     }
 
@@ -936,16 +948,27 @@ const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.line_comment, match.sequenceOf(.{ match.literal("#"), match.takeNoneOf("\r\n") })),
 
     Pattern.create(.node, match.word("node")),
-    Pattern.create(.@"struct", match.word("struct")),
+    Pattern.create(.record, match.word("record")),
+    Pattern.create(.variant, match.word("variant")),
     Pattern.create(.optional, match.word("optional")),
     Pattern.create(.start, match.word("start")),
     Pattern.create(.rule, match.word("rule")),
     Pattern.create(.token, match.word("token")),
-    Pattern.create(.literal, match.word("literal")),
     Pattern.create(.custom, match.word("custom")),
     Pattern.create(.regex, match.word("regex")),
     Pattern.create(.skip, match.word("skip")),
 
+    Pattern.create(.string_literal, matchStringLiteral),
+    Pattern.create(.code_literal, matchCodeLiteral),
+
+    // identifiers must come after keywords:
+    Pattern.create(.identifier, matchRawIdentifier),
+    Pattern.create(.node_ref, matchNodeRef),
+    Pattern.create(.rule_ref, matchRuleRef),
+    Pattern.create(.token_ref, matchTokenRef),
+    Pattern.create(.value_ref, matchValueRef),
+    Pattern.create(.userval_ref, matchBuiltinRef),
+
     Pattern.create(.@"=>", match.literal("=>")),
 
     Pattern.create(.@"=", match.literal("=")),
@@ -965,17 +988,6 @@ const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.@"{", match.literal("{")),
     Pattern.create(.@"}", match.literal("}")),
 
-    Pattern.create(.string_literal, matchStringLiteral),
-    Pattern.create(.code_literal, matchCodeLiteral),
-
-    // identifiers must come after keywords:
-    Pattern.create(.identifier, matchRawIdentifier),
-    Pattern.create(.node_ref, matchNodeRef),
-    Pattern.create(.rule_ref, matchRuleRef),
-    Pattern.create(.token_ref, matchTokenRef),
-    Pattern.create(.value_ref, matchValueRef),
-    Pattern.create(.userval_ref, matchBuiltinRef),
-
     // Whitespace is the "kitchen sink" at the end:
     Pattern.create(.whitespace, match.takeAnyOf(" \r\n\t")),
 });
diff --git a/test/parser/accept/rule-typespec-custom.ptk b/test/parser/accept/rule-typespec-custom.ptk
new file mode 100644
index 0000000..3df8de4
--- /dev/null
+++ b/test/parser/accept/rule-typespec-custom.ptk
@@ -0,0 +1 @@
+rule r : @Point = "";
\ No newline at end of file
diff --git a/test/parser/accept/rule-typespec-literal.ptk b/test/parser/accept/rule-typespec-literal.ptk
new file mode 100644
index 0000000..7a700d7
--- /dev/null
+++ b/test/parser/accept/rule-typespec-literal.ptk
@@ -0,0 +1 @@
+rule r : `bool` = "";
\ No newline at end of file
diff --git a/test/parser/accept/rule-typespec-ref.ptk b/test/parser/accept/rule-typespec-ref.ptk
new file mode 100644
index 0000000..1af0072
--- /dev/null
+++ b/test/parser/accept/rule-typespec-ref.ptk
@@ -0,0 +1 @@
+rule r : !farpointer = "";
\ No newline at end of file

From ce25d3c6a0297504f52969e49ff797d80c199838 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 6 Nov 2023 10:59:20 +0100
Subject: [PATCH 12/20] Adds declaration of nodes, and parsing of compound
 types.

---
 build.zig                                     |  12 +-
 examples/ptkgen/ast-with-unions.ptk           |  28 ++--
 src/ptkgen/Diagnostics.zig                    |  40 ++++--
 src/ptkgen/ast_dump.zig                       |  33 ++++-
 src/ptkgen/intl/en.json                       |  27 ++--
 src/ptkgen/parser.zig                         | 136 ++++++++++++++++--
 test/parser/accept/node-alias.ptk             |   1 +
 test/parser/accept/node-custom.ptk            |   1 +
 test/parser/accept/node-literal.ptk           |   1 +
 test/parser/accept/node-record-f1.ptk         |   1 +
 test/parser/accept/node-record-f4.ptk         |   6 +
 test/parser/accept/node-variant-f1.ptk        |   1 +
 test/parser/accept/node-variant-f4.ptk        |   6 +
 .../reject/bad-mapping-invalid-token.ptk      |   2 +-
 14 files changed, 242 insertions(+), 53 deletions(-)
 create mode 100644 test/parser/accept/node-alias.ptk
 create mode 100644 test/parser/accept/node-custom.ptk
 create mode 100644 test/parser/accept/node-literal.ptk
 create mode 100644 test/parser/accept/node-record-f1.ptk
 create mode 100644 test/parser/accept/node-record-f4.ptk
 create mode 100644 test/parser/accept/node-variant-f1.ptk
 create mode 100644 test/parser/accept/node-variant-f4.ptk

diff --git a/build.zig b/build.zig
index f8528cd..562412c 100644
--- a/build.zig
+++ b/build.zig
@@ -126,6 +126,8 @@ const analyis_ok_files = [_][]const u8{
     "test/analysis/accept/match-rep_one-many-item.ptk",
     "test/analysis/accept/match-rep_one-many-sequence.ptk",
     "test/analysis/accept/match-rep_one-nested.ptk",
+
+    "examples/ptkgen/ast-with-unions.ptk",
 } ++ example_files;
 
 const parser_ok_files = [_][]const u8{
@@ -175,7 +177,15 @@ const parser_ok_files = [_][]const u8{
     "test/parser/accept/rule-typespec-ref.ptk",
     "test/parser/accept/rule-typespec-literal.ptk",
 
-    // "examples/ptkgen/ast-with-unions.ptk", // TODO: Move to examples
+    "test/parser/accept/node-alias.ptk",
+    "test/parser/accept/node-custom.ptk",
+    "test/parser/accept/node-literal.ptk",
+
+    "test/parser/accept/node-record-f1.ptk",
+    "test/parser/accept/node-record-f4.ptk",
+
+    "test/parser/accept/node-variant-f4.ptk",
+    "test/parser/accept/node-variant-f1.ptk",
 } ++ analyis_ok_files;
 
 const parser_reject_files = [_][]const u8{
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index 1e8f226..9435a51 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -4,16 +4,16 @@
 # var   name: type = value;
 # const name: type = value;
 
-# node declaration = struct
-#     is_const: `bool`,
-#     name: !identifier,
-#     type: optional !type,
-#     value: !value
-# ;
+node declaration = record
+    is_const: `bool`,
+    name: !identifier,
+    # type: optional !type,
+    value: !value
+;
 
-# node identifier = `[]const u8`;
-# node type = @TypeId; # enum { int, float, string }
-# node value = @Value;
+node identifier = `[]const u8`;
+node type = @TypeId; # enum { int, float, string }
+node value = @Value;
 
 start <decl>;
 
@@ -49,11 +49,11 @@ rule value : !value =
 
 
 # Unions have can only have a single option active at a time
-# node TLDeclaration = union 
-#     ns        : !namespace,
-#     interface : !interface,
-#     module    : !module,
-# ;
+node TLDeclaration = variant 
+    ns        : !namespace,
+    interface : !interface,
+    module    : !module
+;
 
 rule toplevel-decl : !TLDeclaration = 
       <namespace-group> => ns: $0 # this is syntax for a union field selector as unions are not compounds
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index e9de60f..ecccd61 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -29,11 +29,14 @@ pub const Code = enum(u16) {
     excess_tokens = 1107,
     unexpected_toplevel_token = 1108,
     unexpected_token_no_context = 1109,
+    unexpected_token_type_spec = 1110,
+    unexpected_token_mapping = 1111,
 
     // recoverable syntax errors:
     illegal_empty_group = 1200,
     empty_mapping = 1201,
     integer_overflow = 1202,
+    empty_typespec = 1203,
 
     comptime {
         std.debug.assert(first_error < first_warning);
@@ -58,6 +61,11 @@ pub const Code = enum(u16) {
 };
 
 const NoDiagnosticData = struct {};
+
+const UnexpectedTokenMessage = struct {
+    actual: parser.Token,
+};
+
 pub fn Data(comptime code: Code) type {
     return switch (code) {
         .out_of_memory => NoDiagnosticData,
@@ -69,16 +77,14 @@ pub fn Data(comptime code: Code) type {
         },
         .unexpected_token => struct {
             expected_type: parser.TokenType,
-            actual_type: parser.TokenType,
-            actual_text: []const u8,
-        },
-        .unexpected_toplevel_token => struct {
-            actual_type: parser.TokenType,
-            actual_text: []const u8,
-        },
-        .unexpected_token_no_context => struct {
-            actual_type: parser.TokenType,
+            actual: parser.Token,
         },
+
+        .unexpected_toplevel_token => UnexpectedTokenMessage,
+        .unexpected_token_no_context => UnexpectedTokenMessage,
+        .unexpected_token_type_spec => UnexpectedTokenMessage,
+        .unexpected_token_mapping => UnexpectedTokenMessage,
+
         .unexpected_eof => NoDiagnosticData,
 
         .invalid_source_encoding => NoDiagnosticData,
@@ -97,6 +103,8 @@ pub fn Data(comptime code: Code) type {
             actual: []const u8,
         },
 
+        .empty_typespec => NoDiagnosticData,
+
         // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
     };
 }
@@ -173,7 +181,7 @@ fn Formatter(comptime T: type) type {
 
         // enums:
         parser.TokenType => struct {
-            value: T,
+            value: parser.TokenType,
             pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
                 _ = options;
                 _ = fmt;
@@ -181,6 +189,18 @@ fn Formatter(comptime T: type) type {
             }
         },
 
+        parser.Token => struct {
+            value: parser.Token,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{s} ('{}')", .{
+                    @tagName(item.value.type),
+                    std.zig.fmtEscapes(item.value.text),
+                });
+            }
+        },
+
         intl.FormattableError => struct {
             value: T,
 
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 6b2451b..eabfa11 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -51,7 +51,8 @@ const AstPrinter = struct {
                 },
 
                 .node => |node| {
-                    print("node {s}", .{printer.fmtId(node.name.value)});
+                    print("node {s} = ", .{printer.fmtId(node.name.value)});
+                    printer.dumpAstType(node.value);
                     print(";\n", .{});
                 },
 
@@ -66,10 +67,32 @@ const AstPrinter = struct {
     fn dumpAstType(printer: AstPrinter, typespec: ast.TypeSpec) void {
         switch (typespec) {
             .reference => |ref| print("!{}", .{printer.fmtId(ref.identifier)}),
-            .literal => |lit| print("literal `{s}`", .{printer.strings.get(lit.value)}),
-            .custom => @panic("not done yet"),
-            .record => @panic("not done yet"),
-            .variant => @panic("not done yet"),
+            .literal => |lit| print("`{s}`", .{printer.strings.get(lit.value)}),
+            .custom => |custom| print("@{}", .{printer.fmtId(custom.value)}),
+            .record, .variant => |compound| {
+                const multi_field = compound.fields.len() > 1;
+
+                print("{s} ", .{@tagName(typespec)});
+                var iter = ast.iterate(compound.fields);
+
+                if (multi_field) {
+                    var line_prefix: []const u8 = "\n    ";
+                    while (iter.next()) |field| {
+                        print("{s}{}: ", .{ line_prefix, printer.fmtId(field.name.value) });
+                        printer.dumpAstType(field.type);
+
+                        if (multi_field) {
+                            line_prefix = ",\n    ";
+                        }
+                    }
+                    print("\n", .{});
+                } else {
+                    const field = iter.next().?;
+
+                    print("{}: ", .{printer.fmtId(field.name.value)});
+                    printer.dumpAstType(field.type);
+                }
+            },
         }
     }
 
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index dd32bbe..0275d39 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -4,18 +4,21 @@
         "file_limit_exceeded": "Input file exceeds maximum file size",
         "io_error": "I/O error: {[error_code]}",
         "invalid_source_encoding": "Invalid source code encoding detected",
-        "unexpected_token_eof": "Expected token {[expected_type]}, but end of file was discovered",
-        "unexpected_token": "Expected token {[expected_type]}, but discovered token {[actual_type]} ('{[actual_text]}')",
-        "unexpected_toplevel_token": "Expected token 'start', 'rule', 'node' or 'token', but discovered token {[actual_type]} ('{[actual_text]}')",
-        "unexpected_character": "Unexpected character: '{[character]}'",
-        "unexpected_eof": "Unexpected end of file",
-        "bad_string_escape": "Invalid string escape: Escape sequence at the end of string",
-        "invalid_string_escape": "Invalid string escape \\{[escape]}",
-        "excess_tokens": "Excess token at the end of the file: {[token_type]}",
-        "illegal_empty_group": "Production sequence may not be empty",
-        "unexpected_token_no_context": "Unexpected token '{[actual_type]}'",
-        "empty_mapping": "Empty mappings are not allowed",
-        "integer_overflow": "Integer value {[actual]} out of range. Values must be between {[min]} and {[max]}"
+        "bad_string_escape": "Invalid string escape: Escape sequence at the end of string.",
+        "invalid_string_escape": "Invalid string escape '\\{[escape]}'.",
+        "excess_tokens": "Excess token at the end of the file: {[token_type]}-",
+        "illegal_empty_group": "Production sequence may not be empty.",
+        "integer_overflow": "Integer value {[actual]} out of range. Values must be between {[min]} and {[max]}.",
+        "empty_mapping": "Empty mappings are not allowed.",
+        "empty_typespec": "A type specifier is missing.",
+        "unexpected_token_eof": "Expected a token of type '{[expected_type]}', but the end of file was discovered.",
+        "unexpected_token": "Expected a token of type '{[expected_type]}', but found token {[actual]}.",
+        "unexpected_character": "Unexpected character '{[character]}' found.",
+        "unexpected_eof": "Unexpected end of file.",
+        "unexpected_toplevel_token": "Expected a top level declaration ('start', 'rule', 'node' or 'token'), but found token {[actual]}",
+        "unexpected_token_no_context": "Unexpected token '{[actual]}'.",
+        "unexpected_token_type_spec": "Expected a type specifier, but found token '{[actual]}'.",
+        "unexpected_token_mapping": "Expected an AST mapping, but found token '{[actual]}'."
     },
     "errors": {
         "Unexpected": "unexpected error encountered",
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index ef45197..e33e692 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -6,6 +6,8 @@ const Diagnostics = @import("Diagnostics.zig");
 
 const fmtEscapes = std.zig.fmtEscapes;
 
+const BAD_TYPE_SPEC: ast.TypeSpec = undefined;
+
 pub const Document = struct {
     arena: std.heap.ArenaAllocator,
     file_name: []const u8,
@@ -180,12 +182,15 @@ const Parser = struct {
             return .{ .rule = rule };
         } else |err| try filterAcceptError(err);
 
+        if (parser.acceptNode()) |node| {
+            return .{ .node = node };
+        } else |err| try filterAcceptError(err);
+
         // Detect any excess tokens on the top level:
         if (parser.core.nextToken()) |maybe_token| {
             if (maybe_token) |token| {
                 try parser.emitDiagnostic(token.location, .unexpected_toplevel_token, .{
-                    .actual_type = token.type,
-                    .actual_text = token.text,
+                    .actual = token,
                 });
                 return error.SyntaxError;
             } else {
@@ -214,6 +219,29 @@ const Parser = struct {
         return init_rule;
     }
 
+    fn acceptNode(parser: *Parser) AcceptError!ast.Node {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var state = parser.save();
+        errdefer parser.restore(state);
+
+        try parser.acceptLiteral(.node, .recover);
+
+        const identifier = try parser.acceptIdentifier(.fail);
+
+        try parser.acceptLiteral(.@"=", .fail);
+
+        const value = try parser.acceptTypeSpec();
+
+        try parser.acceptLiteral(.@";", .fail);
+
+        return .{
+            .name = identifier,
+            .value = value,
+        };
+    }
+
     fn acceptRule(parser: *Parser) AcceptError!ast.Rule {
         parser.traceEnterRule(@src());
         defer parser.popTrace();
@@ -381,7 +409,9 @@ const Parser = struct {
 
         switch (accept_mode) {
             .recover => return error.UnexpectedTokenRecoverable,
-            .fail => return parser.emitUnexpectedToken(),
+            .fail => return parser.emitUnexpectedToken(.{
+                .unexpected_token = .unexpected_token_mapping,
+            }),
         }
     }
 
@@ -612,6 +642,11 @@ const Parser = struct {
         parser.traceEnterRule(@src());
         defer parser.popTrace();
 
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        const position = parser.core.tokenizer.current_location;
+
         if (parser.acceptCodeLiteral()) |code| {
             return .{ .literal = code };
         } else |err| try filterAcceptError(err);
@@ -620,11 +655,86 @@ const Parser = struct {
             return .{ .custom = ref };
         } else |err| try filterAcceptError(err);
 
-        if (parser.acceptNodeReference(.fail)) |ref| {
+        if (parser.acceptNodeReference(.recover)) |ref| {
             return .{ .reference = ref };
         } else |err| try filterAcceptError(err);
 
-        @panic("not implemented yet");
+        if (parser.acceptCompoundType(.record)) |record| {
+            return .{ .record = record };
+        } else |err| try filterAcceptError(err);
+
+        if (parser.acceptCompoundType(.variant)) |variant| {
+            return .{ .variant = variant };
+        } else |err| try filterAcceptError(err);
+
+        if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|") or try parser.tryAcceptLiteral(.@"=")) {
+            try parser.emitDiagnostic(position, .empty_typespec, .{});
+            return BAD_TYPE_SPEC;
+        }
+
+        // switch (accept_mode) {
+        //     .recover => return error.UnexpectedTokenRecoverable,
+        //     .fail =>
+        return parser.emitUnexpectedToken(.{
+            .unexpected_token = .unexpected_token_type_spec,
+        });
+        // }
+    }
+
+    fn acceptCompoundType(parser: *Parser, comptime designator: TokenType) AcceptError!ast.CompoundType {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        const current_location = parser.core.tokenizer.current_location;
+
+        // we can recover "struct"/"record", afterwards you must follow the rules
+        try parser.acceptLiteral(designator, .recover);
+
+        var fields = ast.List(ast.Field){};
+
+        while (true) {
+            const field = try parser.acceptField();
+
+            try parser.append(ast.Field, &fields, field);
+
+            if (try parser.tryAcceptLiteral(.@",")) {
+                // Comma means we're having another field
+                continue;
+            } else {
+                // Otherwise, the list is over.
+                break;
+            }
+        }
+
+        return .{
+            .location = current_location,
+            .fields = fields,
+        };
+    }
+
+    fn acceptField(parser: *Parser) AcceptError!ast.Field {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        const list_state = parser.save();
+        errdefer parser.restore(list_state);
+
+        const current_location = parser.core.tokenizer.current_location;
+
+        const name = try parser.acceptIdentifier(.fail);
+
+        try parser.acceptLiteral(.@":", .fail);
+
+        const type_spec = try parser.acceptTypeSpec();
+
+        return .{
+            .location = current_location,
+            .name = name,
+            .type = type_spec,
+        };
     }
 
     fn acceptStringLiteral(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.StringLiteral {
@@ -716,8 +826,7 @@ const Parser = struct {
                     .fail => {
                         try parser.emitDiagnostic(location, .unexpected_token, .{
                             .expected_type = token_type,
-                            .actual_type = token.type,
-                            .actual_text = token.text,
+                            .actual = token,
                         });
                         return error.SyntaxError;
                     },
@@ -799,7 +908,14 @@ const Parser = struct {
         try parser.diagnostics.emit(loc orelse parser.core.tokenizer.current_location, code, data);
     }
 
-    fn emitUnexpectedToken(parser: *Parser) AcceptError {
+    const UnexpectedTokenOptions = struct {
+        unexpected_token: Diagnostics.Code = .unexpected_token_no_context,
+    };
+    fn emitUnexpectedToken(parser: *Parser, comptime opt: UnexpectedTokenOptions) AcceptError {
+        if (Diagnostics.Data(opt.unexpected_token) != Diagnostics.Data(.unexpected_token_no_context)) {
+            @compileError("Generic unexpected token must use the same type as 'unexpected_token_no_context' diagnostic.");
+        }
+
         const state = parser.save();
         defer parser.restore(state);
 
@@ -818,8 +934,8 @@ const Parser = struct {
             return error.SyntaxError;
         };
 
-        try parser.emitDiagnostic(location, .unexpected_token_no_context, .{
-            .actual_type = token.type,
+        try parser.emitDiagnostic(location, opt.unexpected_token, .{
+            .actual = token,
         });
         return error.SyntaxError;
     }
diff --git a/test/parser/accept/node-alias.ptk b/test/parser/accept/node-alias.ptk
new file mode 100644
index 0000000..468dbc0
--- /dev/null
+++ b/test/parser/accept/node-alias.ptk
@@ -0,0 +1 @@
+node Alias = !OtherType;
\ No newline at end of file
diff --git a/test/parser/accept/node-custom.ptk b/test/parser/accept/node-custom.ptk
new file mode 100644
index 0000000..da3a508
--- /dev/null
+++ b/test/parser/accept/node-custom.ptk
@@ -0,0 +1 @@
+node String = @StringIdentifier;
\ No newline at end of file
diff --git a/test/parser/accept/node-literal.ptk b/test/parser/accept/node-literal.ptk
new file mode 100644
index 0000000..d2e3530
--- /dev/null
+++ b/test/parser/accept/node-literal.ptk
@@ -0,0 +1 @@
+node String = `[]const u8`;
\ No newline at end of file
diff --git a/test/parser/accept/node-record-f1.ptk b/test/parser/accept/node-record-f1.ptk
new file mode 100644
index 0000000..8b8db7d
--- /dev/null
+++ b/test/parser/accept/node-record-f1.ptk
@@ -0,0 +1 @@
+node Struct = record field: `bool`;
\ No newline at end of file
diff --git a/test/parser/accept/node-record-f4.ptk b/test/parser/accept/node-record-f4.ptk
new file mode 100644
index 0000000..28b3356
--- /dev/null
+++ b/test/parser/accept/node-record-f4.ptk
@@ -0,0 +1,6 @@
+node Struct = record
+    x: `i32`,
+    y: `i32`,
+    z: `i32`,
+    location: !Location
+;
\ No newline at end of file
diff --git a/test/parser/accept/node-variant-f1.ptk b/test/parser/accept/node-variant-f1.ptk
new file mode 100644
index 0000000..0f675d8
--- /dev/null
+++ b/test/parser/accept/node-variant-f1.ptk
@@ -0,0 +1 @@
+node Struct = variant field: `bool`;
\ No newline at end of file
diff --git a/test/parser/accept/node-variant-f4.ptk b/test/parser/accept/node-variant-f4.ptk
new file mode 100644
index 0000000..e346aea
--- /dev/null
+++ b/test/parser/accept/node-variant-f4.ptk
@@ -0,0 +1,6 @@
+node Struct = variant
+    x: `i32`,
+    y: `i32`,
+    z: `i32`,
+    location: !Location
+;
\ No newline at end of file
diff --git a/test/parser/reject/bad-mapping-invalid-token.ptk b/test/parser/reject/bad-mapping-invalid-token.ptk
index 5d783df..aada416 100644
--- a/test/parser/reject/bad-mapping-invalid-token.ptk
+++ b/test/parser/reject/bad-mapping-invalid-token.ptk
@@ -1,2 +1,2 @@
-# expected: E1109
+# expected: E1111
 rule group = "value" => "bad" ;
\ No newline at end of file

From 808720823665735f6f4f19d460f9f95ddb73ddac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 6 Nov 2023 11:17:15 +0100
Subject: [PATCH 13/20] Adds more diagnostics and tests for those

---
 build.zig                                   |  5 +++
 src/ptkgen/Diagnostics.zig                  |  4 ++
 src/ptkgen/intl/en.json                     |  6 ++-
 src/ptkgen/parser.zig                       | 41 +++++++++++++++++++--
 test/parser/reject/bad-mapping-too-long.ptk |  2 +-
 test/parser/reject/node-no-type.ptk         |  2 +
 test/parser/reject/rule-bad-prod.ptk        |  2 +
 test/parser/reject/rule-no-type-no-prod.ptk |  2 +
 test/parser/reject/rule-no-type.ptk         |  2 +
 9 files changed, 59 insertions(+), 7 deletions(-)
 create mode 100644 test/parser/reject/node-no-type.ptk
 create mode 100644 test/parser/reject/rule-bad-prod.ptk
 create mode 100644 test/parser/reject/rule-no-type-no-prod.ptk
 create mode 100644 test/parser/reject/rule-no-type.ptk

diff --git a/build.zig b/build.zig
index 562412c..d7e3bd2 100644
--- a/build.zig
+++ b/build.zig
@@ -200,4 +200,9 @@ const parser_reject_files = [_][]const u8{
     "test/parser/reject/empty-mapping.ptk",
     "test/parser/reject/bad-mapping-invalid-token.ptk",
     "test/parser/reject/bad-mapping-too-long.ptk",
+
+    "test/parser/reject/node-no-type.ptk",
+    "test/parser/reject/rule-no-type.ptk",
+    "test/parser/reject/rule-no-type-no-prod.ptk",
+    "test/parser/reject/rule-bad-prod.ptk",
 };
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index ecccd61..014542a 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -31,6 +31,8 @@ pub const Code = enum(u16) {
     unexpected_token_no_context = 1109,
     unexpected_token_type_spec = 1110,
     unexpected_token_mapping = 1111,
+    unexpected_token_production_list = 1112,
+    unexpected_token_production = 1113,
 
     // recoverable syntax errors:
     illegal_empty_group = 1200,
@@ -84,6 +86,8 @@ pub fn Data(comptime code: Code) type {
         .unexpected_token_no_context => UnexpectedTokenMessage,
         .unexpected_token_type_spec => UnexpectedTokenMessage,
         .unexpected_token_mapping => UnexpectedTokenMessage,
+        .unexpected_token_production_list => UnexpectedTokenMessage,
+        .unexpected_token_production => UnexpectedTokenMessage,
 
         .unexpected_eof => NoDiagnosticData,
 
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index 0275d39..360b5c1 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -17,8 +17,10 @@
         "unexpected_eof": "Unexpected end of file.",
         "unexpected_toplevel_token": "Expected a top level declaration ('start', 'rule', 'node' or 'token'), but found token {[actual]}",
         "unexpected_token_no_context": "Unexpected token '{[actual]}'.",
-        "unexpected_token_type_spec": "Expected a type specifier, but found token '{[actual]}'.",
-        "unexpected_token_mapping": "Expected an AST mapping, but found token '{[actual]}'."
+        "unexpected_token_type_spec": "Expected a type specifier, but found '{[actual]}'.",
+        "unexpected_token_mapping": "Expected an AST mapping, but found '{[actual]}'.",
+        "unexpected_token_production_list": "Expected ';' or '|', but found '{[actual]}'.",
+        "unexpected_token_production": "Expected a production, but found '{[actual]}'."
     },
     "errors": {
         "Unexpected": "unexpected error encountered",
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index e33e692..0cf12fc 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -267,10 +267,20 @@ const Parser = struct {
 
             try parser.append(ast.MappedProduction, &list, production);
 
-            // TODO: Improve error reporting here
+            // if a semicolon follows, we're done
             if (try parser.tryAcceptLiteral(.@";")) {
                 break;
             }
+            // if a pipe follows, we got more rules
+            else if (try parser.tryAcceptLiteral(.@"|")) {
+                continue;
+            }
+            // otherwise, it's a syntax error:
+            else {
+                return parser.emitUnexpectedToken(.{
+                    .unexpected_token = .unexpected_token_production_list,
+                });
+            }
 
             try parser.acceptLiteral(.@"|", .fail);
         }
@@ -309,11 +319,28 @@ const Parser = struct {
 
         var list: ast.List(ast.Production) = .{};
 
-        while (true) {
+        sequence_loop: while (true) {
             if (parser.acceptProduction()) |prod| {
                 try parser.append(ast.Production, &list, prod);
             } else |err| switch (err) {
-                error.UnexpectedTokenRecoverable => break,
+                error.UnexpectedTokenRecoverable => {
+                    // we couldn't accept a production, so let's see if we're in a legal state here:
+
+                    const seekahead_reset = parser.save();
+
+                    // all of the following might allow to terminate a list:
+                    inline for (.{ .@")", .@";", .@"=>", .@"|" }) |legal_terminator| {
+                        if (try parser.tryAcceptLiteral(legal_terminator)) {
+                            // All of the above tokens
+                            parser.restore(seekahead_reset);
+                            break :sequence_loop;
+                        }
+                    }
+
+                    return parser.emitUnexpectedToken(.{
+                        .unexpected_token = .unexpected_token_production,
+                    });
+                },
                 error.OutOfMemory, error.InvalidSourceEncoding, error.SyntaxError => |e| return e,
             }
         }
@@ -667,8 +694,14 @@ const Parser = struct {
             return .{ .variant = variant };
         } else |err| try filterAcceptError(err);
 
+        const contiuation_pos = parser.save();
         if (try parser.tryAcceptLiteral(.@";") or try parser.tryAcceptLiteral(.@"|") or try parser.tryAcceptLiteral(.@"=")) {
             try parser.emitDiagnostic(position, .empty_typespec, .{});
+
+            // restore the previous position, we just seeked a bit forward to make better
+            // errors here:
+            parser.restore(contiuation_pos);
+
             return BAD_TYPE_SPEC;
         }
 
@@ -909,7 +942,7 @@ const Parser = struct {
     }
 
     const UnexpectedTokenOptions = struct {
-        unexpected_token: Diagnostics.Code = .unexpected_token_no_context,
+        unexpected_token: Diagnostics.Code,
     };
     fn emitUnexpectedToken(parser: *Parser, comptime opt: UnexpectedTokenOptions) AcceptError {
         if (Diagnostics.Data(opt.unexpected_token) != Diagnostics.Data(.unexpected_token_no_context)) {
diff --git a/test/parser/reject/bad-mapping-too-long.ptk b/test/parser/reject/bad-mapping-too-long.ptk
index 1ecf764..057dcd5 100644
--- a/test/parser/reject/bad-mapping-too-long.ptk
+++ b/test/parser/reject/bad-mapping-too-long.ptk
@@ -1,2 +1,2 @@
-# expected: E1102
+# expected: E1112
 rule group = "value" => $0 whatever ;
\ No newline at end of file
diff --git a/test/parser/reject/node-no-type.ptk b/test/parser/reject/node-no-type.ptk
new file mode 100644
index 0000000..9a6b774
--- /dev/null
+++ b/test/parser/reject/node-no-type.ptk
@@ -0,0 +1,2 @@
+# expected: E1203
+node foo = ;
\ No newline at end of file
diff --git a/test/parser/reject/rule-bad-prod.ptk b/test/parser/reject/rule-bad-prod.ptk
new file mode 100644
index 0000000..f5bf832
--- /dev/null
+++ b/test/parser/reject/rule-bad-prod.ptk
@@ -0,0 +1,2 @@
+# expected: E1113
+rule foo = `illegal here`;
\ No newline at end of file
diff --git a/test/parser/reject/rule-no-type-no-prod.ptk b/test/parser/reject/rule-no-type-no-prod.ptk
new file mode 100644
index 0000000..bbd4401
--- /dev/null
+++ b/test/parser/reject/rule-no-type-no-prod.ptk
@@ -0,0 +1,2 @@
+# expected: E1203, E1200
+rule foo : = ;
\ No newline at end of file
diff --git a/test/parser/reject/rule-no-type.ptk b/test/parser/reject/rule-no-type.ptk
new file mode 100644
index 0000000..6ab328d
--- /dev/null
+++ b/test/parser/reject/rule-no-type.ptk
@@ -0,0 +1,2 @@
+# expected: E1203
+rule foo : = "code";
\ No newline at end of file

From a2ffdf4a6f07c65cffe161858273be4155bf24cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 6 Nov 2023 14:30:06 +0100
Subject: [PATCH 14/20] Implements some tests for string escaping.

---
 docs/grammar.md             |  5 ++-
 examples/ptkgen/grammar.ptk |  2 +-
 src/ptkgen/Diagnostics.zig  |  7 ++-
 src/ptkgen/parser.zig       | 86 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/docs/grammar.md b/docs/grammar.md
index 031d096..0d3d2b5 100644
--- a/docs/grammar.md
+++ b/docs/grammar.md
@@ -36,5 +36,6 @@ union           # constructs a type for alternatives, here with two variants:
 - `\'` => single quote (0x27)
 - `\"` => double quote (0x22)
 - `\\` => back slash (0x5C)
-- `\u????` => UTF-16
-- `\U????????` => UTF-32
+- `\u{????}` => UTF-8 encoded codepoint
+
+
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 7dff596..6cdf958 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -49,7 +49,7 @@ rule mapped_value =
   | $userval                         # @value
 ;
 
-rule struct_ctor = 
+rule record_ctor = 
     "{" <assign_field> ( "," <assign_field> )* "}"
 ;
 
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 014542a..4514708 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -12,13 +12,12 @@ pub const Code = enum(u16) {
     pub const first_note = 8000;
     pub const last_item = 10000;
 
-    // generic failures:
+    // generic failures (1000-1099):
     out_of_memory = 1000,
     file_limit_exceeded = 1001,
     io_error = 1002,
 
-    // non-recoverable syntax errors:
-
+    // non-recoverable syntax errors (1100-1199):
     invalid_source_encoding = 1100,
     unexpected_token_eof = 1101,
     unexpected_token = 1102,
@@ -34,7 +33,7 @@ pub const Code = enum(u16) {
     unexpected_token_production_list = 1112,
     unexpected_token_production = 1113,
 
-    // recoverable syntax errors:
+    // recoverable syntax errors (1200-1299):
     illegal_empty_group = 1200,
     empty_mapping = 1201,
     integer_overflow = 1202,
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 0cf12fc..9803b79 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -1022,8 +1022,7 @@ const Parser = struct {
                     '\\' => "\\",
 
                     'x' => @panic("Implement hex escape \\x??"),
-                    'u' => @panic("Implement utf-16 \\u????"),
-                    'U' => @panic("Implement utf-32 \\U????????"),
+                    'u' => @panic("Implement unicode utf-8 escapes \\u{????}"),
 
                     '0'...'3' => @panic("Implement octal escape \\???"),
 
@@ -1351,3 +1350,86 @@ test matchCodeLiteral {
         "```hello, world!``",
     });
 }
+
+test "parser string literal" {
+    const Test = struct {
+        pub fn run(expected: []const u8, code: []const u8) !void {
+            var arena = std.heap.ArenaAllocator.init(std.testing.allocator);
+            defer arena.deinit();
+
+            var diag = Diagnostics.init(std.testing.allocator);
+            defer diag.deinit();
+
+            var strings = try ptk.strings.Pool.init(std.testing.allocator);
+            defer strings.deinit();
+
+            var tokenizer = Tokenizer.init(code, "unittest");
+
+            var parser = Parser{
+                .diagnostics = &diag,
+                .pool = &strings,
+                .core = ParserCore.init(&tokenizer),
+                .arena = arena.allocator(),
+                .trace_enabled = false,
+            };
+
+            const literal = try parser.acceptStringLiteral(.fail);
+
+            const actual = strings.get(literal.value);
+
+            try std.testing.expectEqualStrings(expected, actual);
+        }
+    };
+
+    // Empty string:
+    try Test.run("",
+        \\""
+    );
+
+    // Regular string
+    try Test.run("hello, world!",
+        \\"hello, world!"
+    );
+
+    // Validate escape sequences:
+    try Test.run("\r",
+        \\"\r"
+    );
+    try Test.run("\n",
+        \\"\n"
+    );
+    try Test.run("\\",
+        \\"\\"
+    );
+    try Test.run("\"",
+        \\"\""
+    );
+    try Test.run("\"hello, world!\"",
+        \\"\"hello, world!\""
+    );
+    try Test.run("A\'B",
+        \\"A\'B"
+    );
+    // TODO: enable those tests for escape sequences!
+    // try Test.run("\x34",
+    //     \\"\x34"
+    // );
+    // try Test.run("A\xFFB",
+    //     \\"A\xFFB"
+    // );
+    // try Test.run("\x10\x22",
+    //     \\"\x10\x22"
+    // );
+    // try Test.run("A\x1BB",
+    //     \\"A\033B"
+    // );
+    // try Test.run("A\xFFB",
+    //     \\"A\377B"
+    // );
+    // try Test.run("A\x01B",
+    //     \\"A\001B"
+    // );
+    // try Test.run("[\u{1F4A9}]",
+    //     \\"[\u{1F4A9}]"
+    // );
+}

From 490c537fbb9b4b66f3cd5d52cc4d14646dab6e5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Tue, 7 Nov 2023 14:17:30 +0100
Subject: [PATCH 15/20] Implements start rule analysis.

---
 build.zig                                     |  28 +-
 examples/ptkgen/grammar.ptk                   |   4 +-
 src/ptkgen/Diagnostics.zig                    | 125 +++++-
 src/ptkgen/ast.zig                            |  29 +-
 src/ptkgen/ast_dump.zig                       |  12 +-
 src/ptkgen/intl.zig                           |   4 +-
 src/ptkgen/intl/en.json                       |  13 +-
 src/ptkgen/main.zig                           |  51 ++-
 src/ptkgen/parser.zig                         |  13 +-
 src/ptkgen/sema.zig                           | 368 ++++++++++++++++++
 .../accept/expect-warn-missing-start.ptk      |   1 +
 test/analysis/accept/start-decl.ptk           |   7 +
 test/analysis/reject/duplicate-node.ptk       |   3 +
 test/analysis/reject/duplicate-pattern.ptk    |   3 +
 test/analysis/reject/duplicate-rule.ptk       |   3 +
 test/analysis/reject/duplicate-start.ptk      |  10 +
 .../reject/duplicate-undeclared-start.ptk     |   8 +
 test/analysis/reject/undeclared-start.ptk     |   2 +
 18 files changed, 624 insertions(+), 60 deletions(-)
 create mode 100644 src/ptkgen/sema.zig
 create mode 100644 test/analysis/accept/expect-warn-missing-start.ptk
 create mode 100644 test/analysis/accept/start-decl.ptk
 create mode 100644 test/analysis/reject/duplicate-node.ptk
 create mode 100644 test/analysis/reject/duplicate-pattern.ptk
 create mode 100644 test/analysis/reject/duplicate-rule.ptk
 create mode 100644 test/analysis/reject/duplicate-start.ptk
 create mode 100644 test/analysis/reject/duplicate-undeclared-start.ptk
 create mode 100644 test/analysis/reject/undeclared-start.ptk

diff --git a/build.zig b/build.zig
index d7e3bd2..c8d2c43 100644
--- a/build.zig
+++ b/build.zig
@@ -68,16 +68,17 @@ pub fn build(b: *std.build.Builder) void {
         test_step.dependOn(&b.addRunArtifact(ptkgen_tests).step);
 
         // Integration tests for ptkgen:
-        for (parser_ok_files) |file| {
+        for (parser_accept_files ++ parser_reject_files) |file| {
             const run = b.addRunArtifact(ptkdef_exe);
             run.addArg("--test_mode=parse_only");
             run.addFileArg(.{ .path = file });
             test_step.dependOn(&run.step);
         }
 
-        for (parser_reject_files) |file| {
+        // Integration tests for ptkgen:
+        for (analyis_accept_files ++ analyis_reject_files) |file| {
             const run = b.addRunArtifact(ptkdef_exe);
-            run.addArg("--test_mode=parse_only");
+            run.addArg("--test_mode=no_codegen");
             run.addFileArg(.{ .path = file });
             test_step.dependOn(&run.step);
         }
@@ -99,9 +100,10 @@ pub fn build(b: *std.build.Builder) void {
 
 const example_files = [_][]const u8{
     "/home/felix/projects/parser-toolkit/examples/ptkgen/grammar.ptk",
+    "examples/ptkgen/ast-with-unions.ptk",
 };
 
-const analyis_ok_files = [_][]const u8{
+const analyis_accept_files = [_][]const u8{
     "test/analysis/accept/match-literal-rule.ptk",
     "test/analysis/accept/match-literal-sequence.ptk",
     "test/analysis/accept/match-literal-variants.ptk",
@@ -127,10 +129,22 @@ const analyis_ok_files = [_][]const u8{
     "test/analysis/accept/match-rep_one-many-sequence.ptk",
     "test/analysis/accept/match-rep_one-nested.ptk",
 
-    "examples/ptkgen/ast-with-unions.ptk",
+    "test/analysis/accept/start-decl.ptk",
 } ++ example_files;
 
-const parser_ok_files = [_][]const u8{
+const analyis_reject_files = [_][]const u8{
+    "test/analysis/reject/duplicate-node.ptk",
+    // "test/analysis/reject/duplicate-pattern.ptk", // TODO: Implement pattern support in parser
+    "test/analysis/reject/duplicate-rule.ptk",
+
+    "test/analysis/accept/expect-warn-missing-start.ptk",
+
+    "test/analysis/reject/undeclared-start.ptk",
+    "test/analysis/reject/duplicate-undeclared-start.ptk",
+    "test/analysis/reject/duplicate-start.ptk",
+};
+
+const parser_accept_files = [_][]const u8{
     "test/parser/accept/empty.ptk",
     "test/parser/accept/empty-with-comment-linefeed.ptk",
     "test/parser/accept/empty-with-comment.ptk",
@@ -186,7 +200,7 @@ const parser_ok_files = [_][]const u8{
 
     "test/parser/accept/node-variant-f4.ptk",
     "test/parser/accept/node-variant-f1.ptk",
-} ++ analyis_ok_files;
+} ++ analyis_accept_files;
 
 const parser_reject_files = [_][]const u8{
     "test/parser/reject/empty-rule.ptk",
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 6cdf958..bc098d1 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -5,14 +5,14 @@ rule document = ( <top_level> )* ;
 
 rule top_level =
       <start_decl>
-    | <token_decl>
+    | <pattern_decl>
     | <node_decl>
     | <rule_decl>
 ;
 
 rule start_decl = "start" $rule_ref ";" ;
 
-rule token_decl = "token" $identifier "="     ";" ;
+rule pattern_decl = "pattern" $identifier "="     ";" ;
 
 rule node_decl = "node" $identifier "="     ";" ;
 
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 4514708..7e37483 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -10,7 +10,7 @@ pub const Code = enum(u16) {
     pub const first_error = 1000;
     pub const first_warning = 4000;
     pub const first_note = 8000;
-    pub const last_item = 10000;
+    pub const last_item = 9999;
 
     // generic failures (1000-1099):
     out_of_memory = 1000,
@@ -39,12 +39,56 @@ pub const Code = enum(u16) {
     integer_overflow = 1202,
     empty_typespec = 1203,
 
+    // semantic errors (1300-1399):
+
+    duplicate_identifier_rule = 1300,
+    duplicate_identifier_node = 1301,
+    duplicate_identifier_pattern = 1302,
+
+    reference_to_undeclared_rule = 1303,
+    reference_to_undeclared_node = 1304,
+    reference_to_undeclared_pattern = 1305,
+
+    multiple_start_symbols = 1306,
+
+    // semantic warnings (4000-4099):
+
+    missing_start_symbol = 4000,
+
     comptime {
         std.debug.assert(first_error < first_warning);
         std.debug.assert(first_warning < first_note);
         std.debug.assert(first_note < last_item);
     }
 
+    const max_item_len = blk: {
+        var len = 0;
+        for (@typeInfo(Code).Enum.fields) |fld| {
+            len = @max(len, fld.name);
+        }
+        break :blk len;
+    };
+
+    const code_strings = blk: {
+        @setEvalBranchQuota(10_000);
+        var map = std.EnumArray(Code, []const u8).initUndefined();
+
+        for (std.enums.values(Code)) |code| {
+            const tag = @tagName(code);
+
+            // perform kebab conversion:
+            var buf: [tag.len]u8 = tag[0..tag.len].*;
+            for (&buf) |*c| {
+                if (c.* == '_')
+                    c.* = '-';
+            }
+
+            map.set(code, &buf);
+        }
+
+        break :blk map;
+    };
+
     pub fn isError(code: Code) bool {
         const int = @intFromEnum(code);
         return @intFromEnum(code) >= first_error and int < first_warning;
@@ -59,6 +103,45 @@ pub const Code = enum(u16) {
         const int = @intFromEnum(code);
         return int >= first_note and int < last_item;
     }
+
+    pub fn parse(string: []const u8) error{
+        /// Format is not recognized
+        InvalidFormat,
+        /// Numeric error code is out of range.
+        OutOfRange,
+        /// Numeric error code does not exist.
+        InvalidId,
+    }!Code {
+        if (string.len == 0 or (string[0] != 'E' and string[0] != 'W' and string[0] != 'D'))
+            return error.InvalidFormat;
+        const id = std.fmt.parseInt(u16, string[1..], 10) catch |err| switch (err) {
+            error.InvalidCharacter => return error.InvalidFormat,
+            error.Overflow => return error.OutOfRange,
+        };
+        if (id > last_item)
+            return error.OutOfRange;
+        return std.meta.intToEnum(Diagnostics.Code, id) catch return error.InvalidId;
+    }
+
+    pub fn format(code: Code, comptime fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+        _ = opt;
+
+        if (comptime std.mem.eql(u8, fmt, "d")) {
+            const code_prefix = if (code.isError())
+                "E"
+            else if (code.isWarning())
+                "W"
+            else
+                "D";
+
+            try writer.print("{s}{d:0>4}", .{ code_prefix, @intFromEnum(code) });
+        } else if (comptime std.mem.eql(u8, fmt, "s")) {
+            try writer.writeAll(code_strings.get(code));
+        } else {
+            @compileError("Code fmt must be {s} (string variant) or {d} (numeric variant)!");
+        }
+        //
+    }
 };
 
 const NoDiagnosticData = struct {};
@@ -67,6 +150,14 @@ const UnexpectedTokenMessage = struct {
     actual: parser.Token,
 };
 
+const DuplicateIdentifier = struct {
+    identifier: []const u8,
+    previous_location: ptk.Location,
+};
+const UndeclaredIdentifier = struct {
+    identifier: []const u8
+};
+
 pub fn Data(comptime code: Code) type {
     return switch (code) {
         .out_of_memory => NoDiagnosticData,
@@ -108,6 +199,21 @@ pub fn Data(comptime code: Code) type {
 
         .empty_typespec => NoDiagnosticData,
 
+        .duplicate_identifier_rule => DuplicateIdentifier,
+        .duplicate_identifier_node => DuplicateIdentifier,
+        .duplicate_identifier_pattern => DuplicateIdentifier,
+
+        .reference_to_undeclared_rule => UndeclaredIdentifier,
+        .reference_to_undeclared_node => UndeclaredIdentifier,
+        .reference_to_undeclared_pattern => UndeclaredIdentifier,
+
+        .multiple_start_symbols => struct {
+            identifier: []const u8,
+            previous_location: ptk.Location,
+        },
+
+        .missing_start_symbol => NoDiagnosticData,
+
         // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
     };
 }
@@ -204,6 +310,15 @@ fn Formatter(comptime T: type) type {
             }
         },
 
+        ptk.Location => struct {
+            value: ptk.Location,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+                try writer.print("{}", .{item.value});
+            }
+        },
+
         intl.FormattableError => struct {
             value: T,
 
@@ -284,13 +399,7 @@ pub fn emit(diag: *Diagnostics, location: ptk.Location, comptime code: Code, par
     const message_text = try std.fmt.allocPrint(stack_fallback_allocator, fmt_string, formatted_params);
     defer stack_fallback_allocator.free(message_text);
 
-    const code_prefix = switch (level) {
-        .@"error" => "E",
-        .warning => "W",
-        .info => "D",
-    };
-
-    try diag.inner.emit(location, level, "{s}{d:0>4}: {s}", .{ code_prefix, @intFromEnum(code), message_text });
+    try diag.inner.emit(location, level, "{d}: {s}", .{ code, message_text });
     try diag.codes.append(code);
 }
 
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index e62d664..f269552 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -32,10 +32,10 @@ pub fn Iterator(comptime T: type) type {
     return struct {
         node: ?*List(T).Node,
 
-        pub fn next(iter: *@This()) ?T {
+        pub fn next(iter: *@This()) ?*T {
             const current = iter.node orelse return null;
             iter.node = current.next;
-            return current.data;
+            return &current.data;
         }
     };
 }
@@ -73,12 +73,13 @@ pub const TopLevelDeclaration = union(enum) {
     start: RuleRef,
     rule: Rule,
     node: Node,
-    token: Token,
+    pattern: Pattern,
 };
 
 pub const NodeRef = Reference(Node); // !mynode
 pub const RuleRef = Reference(Rule); // <myrule>
-pub const TokenRef = Reference(Token); // $mytoken
+pub const PatternRef = Reference(Pattern); // $mytoken
+
 pub const ValueRef = struct { // $0
     location: Location,
     index: u32,
@@ -95,9 +96,16 @@ pub const Rule = struct { // rule <name> ( : <type> )? = ...;
     productions: List(MappedProduction), // all alternatives of the rule
 };
 
-pub const Token = struct { // token <name> = ...;
+pub const Pattern = struct { // token <name> = ...;
     name: Identifier,
-    pattern: Pattern,
+    pattern: Data,
+
+    pub const Data = union(enum) {
+        literal: StringLiteral, // literal "+"
+        word: StringLiteral, // word "while"
+        regex: StringLiteral, // regex "string"
+        external: CodeLiteral, // custom `matchMe`
+    };
 };
 
 pub const MappedProduction = struct { // ... => value
@@ -107,7 +115,7 @@ pub const MappedProduction = struct { // ... => value
 
 pub const Production = union(enum) {
     literal: StringLiteral, // "text"
-    terminal: TokenRef, // $token
+    terminal: PatternRef, // $token
     recursion: RuleRef, // <rule>
     sequence: List(Production), // ...
     optional: List(Production), // ( ... )?
@@ -145,13 +153,6 @@ pub const FieldAssignment = struct {
     value: *AstMapping,
 };
 
-pub const Pattern = union(enum) {
-    literal: StringLiteral, // literal "+"
-    word: StringLiteral, // word "while"
-    regex: StringLiteral, // regex "string"
-    external: CodeLiteral, // custom `matchMe`
-};
-
 pub const TypeSpec = union(enum) {
     reference: NodeRef, // !type
     literal: CodeLiteral, // literal `bool`
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index eabfa11..468d789 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -22,7 +22,7 @@ const AstPrinter = struct {
 
         var iter = ast.iterate(decls);
         while (iter.next()) |decl| {
-            switch (decl) {
+            switch (decl.*) {
                 .start => |item| print("start <{}>;\n", .{printer.fmtId(item.identifier)}),
 
                 .rule => |rule| {
@@ -44,7 +44,7 @@ const AstPrinter = struct {
                         } else {
                             print("    ", .{});
                         }
-                        printer.dumpMappedProd(prod);
+                        printer.dumpMappedProd(prod.*);
                     }
 
                     print("\n;\n", .{});
@@ -56,8 +56,8 @@ const AstPrinter = struct {
                     print(";\n", .{});
                 },
 
-                .token => |token| {
-                    print("token {s}", .{printer.fmtId(token.name.value)});
+                .pattern => |pattern| {
+                    print("pattern {s}", .{printer.fmtId(pattern.name.value)});
                     print(";\n", .{});
                 },
             }
@@ -117,7 +117,7 @@ const AstPrinter = struct {
                 var iter = ast.iterate(seq);
                 while (iter.next()) |item| {
                     print(" ", .{});
-                    printer.dumpProd(item);
+                    printer.dumpProd(item.*);
                 }
 
                 print(" )", .{});
@@ -199,7 +199,7 @@ const AstPrinter = struct {
             }
             first = false;
 
-            printer.dumpMapping(arg);
+            printer.dumpMapping(arg.*);
         }
     }
 
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
index 70af58f..13ff049 100644
--- a/src/ptkgen/intl.zig
+++ b/src/ptkgen/intl.zig
@@ -57,6 +57,9 @@ pub const Localization = struct {
     errors: struct {
         Unexpected: []const u8,
 
+        SyntaxError: []const u8,
+        SemanticError: []const u8,
+
         OutOfMemory: []const u8,
 
         InputOutput: []const u8,
@@ -72,7 +75,6 @@ pub const Localization = struct {
         NetNameDeleted: []const u8,
 
         FileTooBig: []const u8,
-        SyntaxError: []const u8,
         InvalidSourceEncoding: []const u8,
     },
 
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index 360b5c1..95db0b7 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -20,9 +20,19 @@
         "unexpected_token_type_spec": "Expected a type specifier, but found '{[actual]}'.",
         "unexpected_token_mapping": "Expected an AST mapping, but found '{[actual]}'.",
         "unexpected_token_production_list": "Expected ';' or '|', but found '{[actual]}'.",
-        "unexpected_token_production": "Expected a production, but found '{[actual]}'."
+        "unexpected_token_production": "Expected a production, but found '{[actual]}'.",
+        "duplicate_identifier_rule": "Rule {[identifier]} already defined here: {[previous_location]}",
+        "duplicate_identifier_node": "Node {[identifier]} already defined here: {[previous_location]}",
+        "duplicate_identifier_pattern": "Pattern {[identifier]} already defined here: {[previous_location]}",
+        "reference_to_undeclared_rule": "Reference to undeclared rule {[identifier]}.",
+        "reference_to_undeclared_node": "Reference to undeclared node {[identifier]}.",
+        "reference_to_undeclared_pattern": "Reference to undeclared pattern {[identifier]}.",
+        "missing_start_symbol": "Grammar file has no start symbol declared.",
+        "multiple_start_symbols": "Another start rule ({[identifier]}) was already declared here: {[previous_location]}"
     },
     "errors": {
+        "SyntaxError": "syntax error",
+        "SemanticError": "semantic error",
         "Unexpected": "unexpected error encountered",
         "OutOfMemory": "out of memory",
         "InputOutput": "input output",
@@ -37,7 +47,6 @@
         "NotOpenForReading": "not open for reading",
         "NetNameDeleted": "net name deleted",
         "FileTooBig": "Input file exceeds resources",
-        "SyntaxError": "syntax error",
         "InvalidSourceEncoding": "invalid source encoding"
     }
 }
\ No newline at end of file
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index bfb273c..4384d44 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -7,6 +7,7 @@ const args_parser = @import("args");
 const ptk = @import("parser-toolkit");
 
 const ast = @import("ast.zig");
+const sema = @import("sema.zig");
 const intl = @import("intl.zig");
 const parser = @import("parser.zig");
 const ast_dump = @import("ast_dump.zig");
@@ -52,6 +53,7 @@ pub const CliOptions = struct {
 const TestMode = enum {
     none,
     parse_only,
+    no_codegen,
 };
 
 const AppError = error{OutOfMemory} || std.fs.File.WriteError;
@@ -126,10 +128,10 @@ pub fn main() AppError!u8 {
                 if (std.mem.startsWith(u8, line, prefix)) {
                     var items = std.mem.tokenize(u8, line[prefix.len..], " \t,");
                     while (items.next()) |error_code| {
-                        if (error_code.len == 0 or (error_code[0] != 'E' and error_code[0] != 'W' and error_code[0] != 'D'))
-                            @panic("invalid error code!");
-                        const id = std.fmt.parseInt(u16, error_code[1..], 10) catch @panic("bad integer");
-                        const code = std.meta.intToEnum(Diagnostics.Code, id) catch @panic("bad diagnostic code");
+                        const code = Diagnostics.Code.parse(
+                            error_code,
+                        ) catch @panic("invalid error code!");
+
                         try expectations.append(.{ .code = code });
                     }
                 }
@@ -142,8 +144,7 @@ pub fn main() AppError!u8 {
             &string_pool,
             source_code,
             file_name,
-            cli.options.test_mode,
-            cli.options.trace,
+            cli.options,
         ) catch |err| {
             try convertErrorToDiagnostics(&diagnostics, file_name, err);
             break :process_file false;
@@ -176,7 +177,7 @@ pub fn main() AppError!u8 {
 fn convertErrorToDiagnostics(diagnostics: *Diagnostics, file_name: []const u8, err: intl.FormattableError) error{OutOfMemory}!void {
     switch (err) {
         // syntax errors must produce diagnostics:
-        error.SyntaxError, error.InvalidSourceEncoding => std.debug.assert(diagnostics.hasErrors()),
+        error.SyntaxError, error.SemanticError, error.InvalidSourceEncoding => std.debug.assert(diagnostics.hasErrors()),
 
         error.OutOfMemory => {
             try diagnostics.emit(.{
@@ -250,13 +251,26 @@ fn validateDiagnostics(allocator: std.mem.Allocator, diagnostics: Diagnostics, e
         }
     }
 
+    // Remove all non-errors from available, we do match on them with "-W4000" instead of forcing a expected W4000 into all files without start rules (or similar)
+    {
+        var i: usize = 0;
+        while (i < available.items.len) {
+            const code = available.items[i];
+            if (!code.isError()) {
+                _ = available.swapRemove(i);
+            } else {
+                i += 1;
+            }
+        }
+    }
+
     const ok = (available.items.len == 0) and (expected.items.len == 0);
 
     for (available.items) |code| {
-        std.log.err("unexpected diagnostic: {0}", .{code});
+        std.log.err("unexpected diagnostic: {s} ({d})", .{ code, code });
     }
     for (expected.items) |code| {
-        std.log.err("unmatched diagnostic:  {0}", .{code});
+        std.log.err("unmatched diagnostic:  {s} ({d})", .{ code, code });
     }
 
     if (!ok)
@@ -269,8 +283,7 @@ fn compileFile(
     string_pool: *ptk.strings.Pool,
     source_code: []const u8,
     file_name: []const u8,
-    mode: TestMode,
-    trace_enabled: bool,
+    options: CliOptions,
 ) !void {
     var tree = try parser.parse(
         .{
@@ -279,16 +292,26 @@ fn compileFile(
             .string_pool = string_pool,
             .file_name = file_name,
             .source_code = source_code,
-            .trace_enabled = trace_enabled,
+            .trace_enabled = options.trace,
         },
     );
     defer tree.deinit();
 
-    // TODO: Implement sema
+    if (options.test_mode == .parse_only) {
+        return;
+    }
+
+    var grammar = try sema.analyze(
+        allocator,
+        diagnostics,
+        string_pool,
+        tree.top_level_declarations,
+    );
+    defer grammar.deinit();
 
     // TODO: Implement parsergen / tablegen / highlightergen
 
-    if (mode == .none) {
+    if (options.test_mode == .none) {
         ast_dump.dump(string_pool, tree);
     }
 }
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index 9803b79..cb2a445 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -85,13 +85,14 @@ pub fn parse(opt: struct {
 pub const TokenType = enum {
     // keywords
 
+    start,
     node,
+    rule,
+    pattern,
+
     record,
     variant,
     optional,
-    start,
-    rule,
-    token,
 
     custom,
     regex,
@@ -808,13 +809,13 @@ const Parser = struct {
         };
     }
 
-    fn acceptTokenReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.TokenRef {
+    fn acceptTokenReference(parser: *Parser, accept_mode: AcceptMode) AcceptError!ast.PatternRef {
         parser.traceEnterRule(@src());
         defer parser.popTrace();
 
         const token = try parser.acceptToken(.token_ref, accept_mode);
         std.debug.assert(std.mem.startsWith(u8, token.text, "$"));
-        return ast.TokenRef{
+        return ast.PatternRef{
             .location = token.location,
             .identifier = try parser.unwrapIdentifierString(token.location, token.text[1..]),
         };
@@ -1101,7 +1102,7 @@ const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.optional, match.word("optional")),
     Pattern.create(.start, match.word("start")),
     Pattern.create(.rule, match.word("rule")),
-    Pattern.create(.token, match.word("token")),
+    Pattern.create(.pattern, match.word("pattern")),
     Pattern.create(.custom, match.word("custom")),
     Pattern.create(.regex, match.word("regex")),
     Pattern.create(.skip, match.word("skip")),
diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig
new file mode 100644
index 0000000..cfa7562
--- /dev/null
+++ b/src/ptkgen/sema.zig
@@ -0,0 +1,368 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const ast = @import("ast.zig");
+const Diagnostics = @import("Diagnostics.zig");
+
+pub const AnalyzeError = error{ OutOfMemory, SemanticError };
+
+const String = ptk.strings.String;
+
+pub fn StringHashMap(comptime T: type) type {
+    return std.AutoArrayHashMap(String, T);
+}
+
+pub const Grammar = struct {
+    arena: std.heap.ArenaAllocator,
+
+    start: ?StartDeclaration,
+
+    rules: StringHashMap(*Rule),
+    nodes: StringHashMap(*Node),
+    patterns: StringHashMap(*Pattern),
+
+    pub fn deinit(grammar: *Grammar) void {
+        grammar.rules.deinit();
+        grammar.nodes.deinit();
+        grammar.patterns.deinit();
+        grammar.arena.deinit();
+        grammar.* = undefined;
+    }
+};
+
+pub const StartDeclaration = struct {
+    rule: *Rule,
+    location: ptk.Location,
+};
+
+pub const Rule = struct {
+    location: ptk.Location,
+    name: String,
+
+    type: ?*Type,
+    production: *Production,
+};
+
+pub const Production = union(enum) {
+    terminal: *Pattern, // literal and terminal ast nodes are wrapped to this
+    recursion: *Rule, // <rule>
+    sequence: []Production, // ...
+    optional: *Production, // ( ... )?
+    repetition_zero: *Production, // [ ... ]*
+    repetition_one: *Production, // [ ... ]+
+};
+
+pub const Node = struct {
+    location: ptk.Location,
+    name: String,
+
+    type: *Type,
+};
+
+pub const Pattern = struct {
+    location: ptk.Location,
+    name: String,
+
+    data: Data,
+
+    pub const Data = union(enum) {
+        literal_match: String,
+        word: String,
+        regex: String,
+        external: String,
+    };
+};
+
+pub const Type = union(enum) {
+    code_literal: String,
+    user_value: String,
+
+    optional: *Type,
+    record: *CompoundType,
+    variant: *CompoundType,
+};
+
+pub const CompoundType = struct {
+    fields: StringHashMap(Field),
+};
+
+pub const Field = struct {
+    name: String,
+    type: *Type,
+};
+
+pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings: *const ptk.strings.Pool, document: ast.Document) AnalyzeError!Grammar {
+    std.debug.assert(diagnostics.hasErrors() == false);
+    errdefer |err| if (err == error.SemanticError)
+        std.debug.assert(diagnostics.hasErrors());
+
+    var grammar = Grammar{
+        .arena = std.heap.ArenaAllocator.init(allocator),
+
+        .rules = StringHashMap(*Rule).init(allocator),
+        .nodes = StringHashMap(*Node).init(allocator),
+        .patterns = StringHashMap(*Pattern).init(allocator),
+
+        .start = null,
+    };
+    errdefer grammar.deinit();
+
+    var analyzer = Analyzer{
+        .arena = grammar.arena.allocator(),
+        .diagnostics = diagnostics,
+        .strings = strings,
+
+        .rule_to_ast = std.AutoHashMap(*Rule, *ast.Rule).init(allocator),
+        .node_to_ast = std.AutoHashMap(*Node, *ast.Node).init(allocator),
+        .pattern_to_ast = std.AutoHashMap(*Pattern, *ast.Pattern).init(allocator),
+
+        .document = document,
+
+        .target = &grammar,
+    };
+    defer analyzer.deinit();
+
+    try innerAnalysis(&analyzer);
+
+    if (grammar.start == null) {
+        try analyzer.emitDiagnostic(ptk.Location{
+            .line = 0,
+            .column = 0,
+            .source = null,
+        }, .missing_start_symbol, .{});
+    }
+
+    return grammar;
+}
+
+fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
+    // Phase 0: Validate productions on legality (coarse error checking)
+    // - Generates errors for badly constructed elements
+    try analyzer.validateAstRulesCoarse();
+
+    // Phase 1: Create all global declarations
+    // - Populates the declaration lookups
+    // - Generates errors for duplicate identifiers
+    try analyzer.createDeclarations();
+
+    // Phase 2: Instantiate all node types and patterns, determine start symbol
+
+    try analyzer.iterateOn(.start, Analyzer.instantiateStartSymbol);
+    try analyzer.iterateOn(.node, Analyzer.instantiatePatterns);
+    try analyzer.iterateOn(.node, Analyzer.instantiateNodeTypes);
+
+    // Phase 3: Validate generated types
+
+    // Phase 4: Instantiate AST productions
+
+    // Phase 5: Instantiate and validate AST mappings
+
+}
+
+const Analyzer = struct {
+    arena: std.mem.Allocator,
+    diagnostics: *Diagnostics,
+    strings: *const ptk.strings.Pool,
+    target: *Grammar,
+
+    document: ast.Document,
+
+    rule_to_ast: std.AutoHashMap(*Rule, *ast.Rule),
+    node_to_ast: std.AutoHashMap(*Node, *ast.Node),
+    pattern_to_ast: std.AutoHashMap(*Pattern, *ast.Pattern),
+
+    fn deinit(analyzer: *Analyzer) void {
+        analyzer.rule_to_ast.deinit();
+        analyzer.node_to_ast.deinit();
+        analyzer.pattern_to_ast.deinit();
+        analyzer.* = undefined;
+    }
+
+    const IterativeAnalysisError = error{RecoverableSemanticError} || AnalyzeError;
+
+    fn iterateOn(
+        analyzer: *Analyzer,
+        comptime node_type: std.meta.FieldEnum(ast.TopLevelDeclaration),
+        comptime functor: fn (*Analyzer, *std.meta.FieldType(ast.TopLevelDeclaration, node_type)) IterativeAnalysisError!void,
+    ) AnalyzeError!void {
+        var iter = ast.iterate(analyzer.document);
+        while (iter.next()) |item| {
+            switch (item.*) {
+                @field(std.meta.Tag(ast.TopLevelDeclaration), @tagName(node_type)) => |*node| {
+                    functor(analyzer, node) catch |err| switch (err) {
+                        error.RecoverableSemanticError => {},
+                        else => |e| return e,
+                    };
+                },
+                else => {},
+            }
+        }
+    }
+
+    fn validateAstRulesCoarse(analyzer: *Analyzer) !void {
+        var iter = ast.iterate(analyzer.document);
+        while (iter.next()) |item| {
+            switch (item.*) {
+                .start => |start| {
+                    _ = start;
+                },
+
+                .rule => |rule| {
+                    _ = rule;
+                },
+
+                .node => |node| {
+                    _ = node;
+                },
+
+                .pattern => |pattern| {
+                    _ = pattern;
+                },
+            }
+        }
+    }
+
+    fn createDeclarations(analyzer: *Analyzer) !void {
+        var iter = ast.iterate(analyzer.document);
+        while (iter.next()) |item| {
+            switch (item.*) {
+                .start => {},
+
+                .rule => |*rule| {
+                    const instance = try analyzer.declareElement(
+                        Rule,
+                        ast.Rule,
+                        &analyzer.target.rules,
+                        &analyzer.rule_to_ast,
+                        rule,
+                        rule.name,
+                        .duplicate_identifier_rule,
+                    );
+                    instance.* = .{
+                        .location = rule.name.location,
+                        .name = rule.name.value,
+
+                        .type = undefined, // created in phase 4
+                        .production = undefined, // created in phase 5
+                    };
+                },
+
+                .node => |*node| {
+                    const instance = try analyzer.declareElement(
+                        Node,
+                        ast.Node,
+                        &analyzer.target.nodes,
+                        &analyzer.node_to_ast,
+                        node,
+                        node.name,
+                        .duplicate_identifier_node,
+                    );
+                    instance.* = .{
+                        .location = node.name.location,
+                        .name = node.name.value,
+
+                        .type = undefined, // created in phase 2
+                    };
+                },
+
+                .pattern => |*pattern| {
+                    const instance = try analyzer.declareElement(
+                        Pattern,
+                        ast.Pattern,
+                        &analyzer.target.patterns,
+                        &analyzer.pattern_to_ast,
+                        pattern,
+                        pattern.name,
+                        .duplicate_identifier_pattern,
+                    );
+                    instance.* = .{
+                        .location = pattern.name.location,
+                        .name = pattern.name.value,
+
+                        .data = undefined, // created in phase 2
+                    };
+                },
+            }
+        }
+    }
+
+    fn instantiateStartSymbol(analyzer: *Analyzer, start: *ast.RuleRef) !void {
+        if (analyzer.target.start) |old_start| {
+            try analyzer.emitDiagnostic(start.location, .multiple_start_symbols, .{
+                .identifier = analyzer.strings.get(old_start.rule.name),
+                .previous_location = old_start.location,
+            });
+            // error return is further down below so we can also catch the undefined reference error
+        }
+
+        const rule = analyzer.target.rules.get(start.identifier) orelse {
+            try analyzer.emitDiagnostic(start.location, .reference_to_undeclared_rule, .{
+                .identifier = analyzer.strings.get(start.identifier),
+            });
+            return error.RecoverableSemanticError;
+        };
+
+        if (analyzer.target.start != null) {
+            // return for the first if block
+            return error.RecoverableSemanticError;
+        }
+
+        analyzer.target.start = .{
+            .rule = rule,
+            .location = start.location,
+        };
+    }
+
+    fn instantiatePatterns(analyzer: *Analyzer, node: *ast.Node) !void {
+        _ = analyzer;
+        _ = node;
+        //
+    }
+
+    fn instantiateNodeTypes(analyzer: *Analyzer, node: *ast.Node) !void {
+        _ = analyzer;
+        _ = node;
+        //
+    }
+
+    const DeclarationError = error{
+        OutOfMemory,
+        SemanticError,
+    };
+    fn declareElement(
+        analyzer: *Analyzer,
+        comptime Element: type,
+        comptime AstNode: type,
+        set: *StringHashMap(*Element),
+        ast_map: *std.AutoHashMap(*Element, *AstNode),
+        ast_node: *AstNode,
+        name: ast.Identifier,
+        comptime diagnostic: Diagnostics.Code,
+    ) DeclarationError!*Element {
+        const gop = try set.getOrPut(name.value);
+        if (gop.found_existing) {
+            // emit diagnostic here
+            try analyzer.emitDiagnostic(name.location, diagnostic, .{
+                .identifier = analyzer.strings.get(name.value),
+                .previous_location = gop.value_ptr.*.*.location,
+            });
+            return error.SemanticError;
+        }
+        errdefer _ = set.swapRemove(name.value);
+
+        const item = try analyzer.arena.create(Element);
+        errdefer analyzer.arena.destroy(item);
+
+        item.* = undefined;
+
+        gop.value_ptr.* = item;
+
+        try ast_map.putNoClobber(item, ast_node);
+
+        return item;
+    }
+
+    fn emitDiagnostic(analyzer: *Analyzer, location: ptk.Location, comptime code: Diagnostics.Code, params: Diagnostics.Data(code)) !void {
+        try analyzer.diagnostics.emit(location, code, params);
+    }
+};
diff --git a/test/analysis/accept/expect-warn-missing-start.ptk b/test/analysis/accept/expect-warn-missing-start.ptk
new file mode 100644
index 0000000..f31365e
--- /dev/null
+++ b/test/analysis/accept/expect-warn-missing-start.ptk
@@ -0,0 +1 @@
+# expected: W4000
diff --git a/test/analysis/accept/start-decl.ptk b/test/analysis/accept/start-decl.ptk
new file mode 100644
index 0000000..404f545
--- /dev/null
+++ b/test/analysis/accept/start-decl.ptk
@@ -0,0 +1,7 @@
+
+
+
+start <magic>;
+
+rule magic = "magic";
+
diff --git a/test/analysis/reject/duplicate-node.ptk b/test/analysis/reject/duplicate-node.ptk
new file mode 100644
index 0000000..0f67291
--- /dev/null
+++ b/test/analysis/reject/duplicate-node.ptk
@@ -0,0 +1,3 @@
+# expected: E1301
+node foo = `bool`;
+node foo = `bool`;
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-pattern.ptk b/test/analysis/reject/duplicate-pattern.ptk
new file mode 100644
index 0000000..4302396
--- /dev/null
+++ b/test/analysis/reject/duplicate-pattern.ptk
@@ -0,0 +1,3 @@
+# expected: E1302
+pattern foo = literal "bla";
+pattern foo = literal "bla";
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-rule.ptk b/test/analysis/reject/duplicate-rule.ptk
new file mode 100644
index 0000000..eff3ee6
--- /dev/null
+++ b/test/analysis/reject/duplicate-rule.ptk
@@ -0,0 +1,3 @@
+# expected: E1300
+rule foo = "";
+rule foo = "";
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-start.ptk b/test/analysis/reject/duplicate-start.ptk
new file mode 100644
index 0000000..52c55cf
--- /dev/null
+++ b/test/analysis/reject/duplicate-start.ptk
@@ -0,0 +1,10 @@
+# expected: E1306
+
+start <magic>;
+
+rule magic = "magic";
+
+rule disco = "disco";
+
+start <disco>;
+
diff --git a/test/analysis/reject/duplicate-undeclared-start.ptk b/test/analysis/reject/duplicate-undeclared-start.ptk
new file mode 100644
index 0000000..8b53833
--- /dev/null
+++ b/test/analysis/reject/duplicate-undeclared-start.ptk
@@ -0,0 +1,8 @@
+# expected: E1303, E1306
+
+start <magic>;
+
+rule magic = "magic";
+
+start <no_disco>;
+
diff --git a/test/analysis/reject/undeclared-start.ptk b/test/analysis/reject/undeclared-start.ptk
new file mode 100644
index 0000000..5a97c96
--- /dev/null
+++ b/test/analysis/reject/undeclared-start.ptk
@@ -0,0 +1,2 @@
+# expected: E1303, W4000
+start <undeclared>;
\ No newline at end of file

From 05c58f0c0552f99b595ad2c56acba575e7e3a99f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Wed, 15 Nov 2023 15:01:24 +0100
Subject: [PATCH 16/20] More semantic analysis

---
 build.zig                                     |  12 ++
 examples/ptkgen/ast-with-unions.ptk           |   4 +
 examples/ptkgen/grammar.ptk                   |   9 +-
 src/ptkgen/Diagnostics.zig                    |   6 +-
 src/ptkgen/ast.zig                            |   5 +-
 src/ptkgen/ast_dump.zig                       |  13 +-
 src/ptkgen/intl/en.json                       |  11 +-
 src/ptkgen/parser.zig                         |  63 ++++++-
 src/ptkgen/sema.zig                           | 173 +++++++++++++++++-
 test/analysis/accept/pattern-custom-skip.ptk  |   4 +
 test/analysis/accept/pattern-custom.ptk       |   4 +
 test/analysis/accept/pattern-literal-skip.ptk |   4 +
 test/analysis/accept/pattern-literal.ptk      |   4 +
 test/analysis/accept/pattern-regex-skip.ptk   |   4 +
 test/analysis/accept/pattern-regex.ptk        |   4 +
 test/analysis/accept/pattern-word-skip.ptk    |   4 +
 test/analysis/accept/pattern-word.ptk         |   4 +
 .../reject/pattern-unexpected-token.ptk       |   4 +
 18 files changed, 308 insertions(+), 24 deletions(-)
 create mode 100644 test/analysis/accept/pattern-custom-skip.ptk
 create mode 100644 test/analysis/accept/pattern-custom.ptk
 create mode 100644 test/analysis/accept/pattern-literal-skip.ptk
 create mode 100644 test/analysis/accept/pattern-literal.ptk
 create mode 100644 test/analysis/accept/pattern-regex-skip.ptk
 create mode 100644 test/analysis/accept/pattern-regex.ptk
 create mode 100644 test/analysis/accept/pattern-word-skip.ptk
 create mode 100644 test/analysis/accept/pattern-word.ptk
 create mode 100644 test/parser/reject/pattern-unexpected-token.ptk

diff --git a/build.zig b/build.zig
index c8d2c43..dcade1e 100644
--- a/build.zig
+++ b/build.zig
@@ -130,6 +130,16 @@ const analyis_accept_files = [_][]const u8{
     "test/analysis/accept/match-rep_one-nested.ptk",
 
     "test/analysis/accept/start-decl.ptk",
+
+    "test/analysis/accept/pattern-custom.ptk",
+    "test/analysis/accept/pattern-literal.ptk",
+    "test/analysis/accept/pattern-regex.ptk",
+    "test/analysis/accept/pattern-word.ptk",
+
+    "test/analysis/accept/pattern-word-skip.ptk",
+    "test/analysis/accept/pattern-regex-skip.ptk",
+    "test/analysis/accept/pattern-literal-skip.ptk",
+    "test/analysis/accept/pattern-custom-skip.ptk",
 } ++ example_files;
 
 const analyis_reject_files = [_][]const u8{
@@ -219,4 +229,6 @@ const parser_reject_files = [_][]const u8{
     "test/parser/reject/rule-no-type.ptk",
     "test/parser/reject/rule-no-type-no-prod.ptk",
     "test/parser/reject/rule-bad-prod.ptk",
+
+    "test/parser/reject/pattern-unexpected-token.ptk",
 };
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index 9435a51..6da295d 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -55,6 +55,10 @@ node TLDeclaration = variant
     module    : !module
 ;
 
+node namespace = @extern;
+node interface = @extern;
+node module = @extern;
+
 rule toplevel-decl : !TLDeclaration = 
       <namespace-group> => ns: $0 # this is syntax for a union field selector as unions are not compounds
     | <interface-decl>  => interface: $0
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index bc098d1..d788e27 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -12,7 +12,14 @@ rule top_level =
 
 rule start_decl = "start" $rule_ref ";" ;
 
-rule pattern_decl = "pattern" $identifier "="     ";" ;
+rule pattern_decl = "pattern" $identifier "=" <pattern_spec> ( "skip" )? ";" ;
+
+rule pattern_spec =
+    "literal" $string_literal
+  | "word"    $string_literal
+  | "regex"   $string_literal
+  | $userval
+;
 
 rule node_decl = "node" $identifier "="     ";" ;
 
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 7e37483..56aeea2 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -32,6 +32,7 @@ pub const Code = enum(u16) {
     unexpected_token_mapping = 1111,
     unexpected_token_production_list = 1112,
     unexpected_token_production = 1113,
+    unexpected_token_pattern = 1114,
 
     // recoverable syntax errors (1200-1299):
     illegal_empty_group = 1200,
@@ -154,9 +155,7 @@ const DuplicateIdentifier = struct {
     identifier: []const u8,
     previous_location: ptk.Location,
 };
-const UndeclaredIdentifier = struct {
-    identifier: []const u8
-};
+const UndeclaredIdentifier = struct { identifier: []const u8 };
 
 pub fn Data(comptime code: Code) type {
     return switch (code) {
@@ -178,6 +177,7 @@ pub fn Data(comptime code: Code) type {
         .unexpected_token_mapping => UnexpectedTokenMessage,
         .unexpected_token_production_list => UnexpectedTokenMessage,
         .unexpected_token_production => UnexpectedTokenMessage,
+        .unexpected_token_pattern => UnexpectedTokenMessage,
 
         .unexpected_eof => NoDiagnosticData,
 
diff --git a/src/ptkgen/ast.zig b/src/ptkgen/ast.zig
index f269552..a650c35 100644
--- a/src/ptkgen/ast.zig
+++ b/src/ptkgen/ast.zig
@@ -98,13 +98,14 @@ pub const Rule = struct { // rule <name> ( : <type> )? = ...;
 
 pub const Pattern = struct { // token <name> = ...;
     name: Identifier,
-    pattern: Data,
+    data: Data,
+    invisible: bool,
 
     pub const Data = union(enum) {
         literal: StringLiteral, // literal "+"
         word: StringLiteral, // word "while"
         regex: StringLiteral, // regex "string"
-        external: CodeLiteral, // custom `matchMe`
+        external: UserDefinedIdentifier, // @matchMe
     };
 };
 
diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/ast_dump.zig
index 468d789..226b324 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/ast_dump.zig
@@ -57,7 +57,18 @@ const AstPrinter = struct {
                 },
 
                 .pattern => |pattern| {
-                    print("pattern {s}", .{printer.fmtId(pattern.name.value)});
+                    print("pattern {s} = ", .{printer.fmtId(pattern.name.value)});
+
+                    switch (pattern.data) {
+                        .literal => |value| print("literal \"{}\"", .{printer.fmtString(value.value)}),
+                        .word => |value| print("word \"{}\"", .{printer.fmtString(value.value)}),
+                        .regex => |value| print("regex \"{}\"", .{printer.fmtString(value.value)}),
+                        .external => |value| print("@{}", .{printer.fmtId(value.value)}),
+                    }
+
+                    if (pattern.invisible) {
+                        print(" skip", .{});
+                    }
                     print(";\n", .{});
                 },
             }
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index 95db0b7..c98d0c3 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -15,20 +15,21 @@
         "unexpected_token": "Expected a token of type '{[expected_type]}', but found token {[actual]}.",
         "unexpected_character": "Unexpected character '{[character]}' found.",
         "unexpected_eof": "Unexpected end of file.",
-        "unexpected_toplevel_token": "Expected a top level declaration ('start', 'rule', 'node' or 'token'), but found token {[actual]}",
+        "unexpected_toplevel_token": "Expected a top level declaration ('start', 'rule', 'node' or 'pattern'), but found token {[actual]}",
         "unexpected_token_no_context": "Unexpected token '{[actual]}'.",
         "unexpected_token_type_spec": "Expected a type specifier, but found '{[actual]}'.",
         "unexpected_token_mapping": "Expected an AST mapping, but found '{[actual]}'.",
         "unexpected_token_production_list": "Expected ';' or '|', but found '{[actual]}'.",
         "unexpected_token_production": "Expected a production, but found '{[actual]}'.",
+        "unexpected_token_pattern": "Expected a pattern definition, but found '{[actual]}'.",
         "duplicate_identifier_rule": "Rule {[identifier]} already defined here: {[previous_location]}",
         "duplicate_identifier_node": "Node {[identifier]} already defined here: {[previous_location]}",
         "duplicate_identifier_pattern": "Pattern {[identifier]} already defined here: {[previous_location]}",
-        "reference_to_undeclared_rule": "Reference to undeclared rule {[identifier]}.",
-        "reference_to_undeclared_node": "Reference to undeclared node {[identifier]}.",
-        "reference_to_undeclared_pattern": "Reference to undeclared pattern {[identifier]}.",
+        "reference_to_undeclared_rule": "Reference to undeclared rule '{[identifier]}'.",
+        "reference_to_undeclared_node": "Reference to undeclared node '{[identifier]}'.",
+        "reference_to_undeclared_pattern": "Reference to undeclared pattern '{[identifier]}'.",
         "missing_start_symbol": "Grammar file has no start symbol declared.",
-        "multiple_start_symbols": "Another start rule ({[identifier]}) was already declared here: {[previous_location]}"
+        "multiple_start_symbols": "Another start rule '({[identifier]})' was already declared here: {[previous_location]}"
     },
     "errors": {
         "SyntaxError": "syntax error",
diff --git a/src/ptkgen/parser.zig b/src/ptkgen/parser.zig
index cb2a445..7d26a61 100644
--- a/src/ptkgen/parser.zig
+++ b/src/ptkgen/parser.zig
@@ -94,7 +94,8 @@ pub const TokenType = enum {
     variant,
     optional,
 
-    custom,
+    literal,
+    word,
     regex,
     skip,
 
@@ -187,6 +188,10 @@ const Parser = struct {
             return .{ .node = node };
         } else |err| try filterAcceptError(err);
 
+        if (parser.acceptPatternDefinition()) |pattern| {
+            return .{ .pattern = pattern };
+        } else |err| try filterAcceptError(err);
+
         // Detect any excess tokens on the top level:
         if (parser.core.nextToken()) |maybe_token| {
             if (maybe_token) |token| {
@@ -220,6 +225,59 @@ const Parser = struct {
         return init_rule;
     }
 
+    fn acceptPatternDefinition(parser: *Parser) AcceptError!ast.Pattern {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        try parser.acceptLiteral(.pattern, .recover);
+
+        const name = try parser.acceptIdentifier(.fail);
+        try parser.acceptLiteral(.@"=", .fail);
+
+        const data = try parser.acceptPatternSpec();
+
+        const invisible = try parser.tryAcceptLiteral(.skip);
+
+        try parser.acceptLiteral(.@";", .fail);
+
+        return .{
+            .name = name,
+            .data = data,
+            .invisible = invisible,
+        };
+    }
+
+    fn acceptPatternSpec(parser: *Parser) AcceptError!ast.Pattern.Data {
+        parser.traceEnterRule(@src());
+        defer parser.popTrace();
+
+        var state = parser.save();
+        errdefer parser.restore(state);
+
+        if (try parser.tryAcceptLiteral(.literal)) {
+            const string = try parser.acceptStringLiteral(.fail);
+            return .{ .literal = string };
+        }
+
+        if (try parser.tryAcceptLiteral(.word)) {
+            const string = try parser.acceptStringLiteral(.fail);
+            return .{ .word = string };
+        }
+
+        if (try parser.tryAcceptLiteral(.regex)) {
+            const string = try parser.acceptStringLiteral(.fail);
+            return .{ .regex = string };
+        }
+
+        if (parser.acceptUserReference()) |ref| {
+            return .{ .external = ref };
+        } else |err| try filterAcceptError(err);
+
+        return parser.emitUnexpectedToken(.{
+            .unexpected_token = .unexpected_token_pattern,
+        });
+    }
+
     fn acceptNode(parser: *Parser) AcceptError!ast.Node {
         parser.traceEnterRule(@src());
         defer parser.popTrace();
@@ -1103,7 +1161,8 @@ const Tokenizer = ptk.Tokenizer(TokenType, &.{
     Pattern.create(.start, match.word("start")),
     Pattern.create(.rule, match.word("rule")),
     Pattern.create(.pattern, match.word("pattern")),
-    Pattern.create(.custom, match.word("custom")),
+    Pattern.create(.literal, match.word("literal")),
+    Pattern.create(.word, match.word("word")),
     Pattern.create(.regex, match.word("regex")),
     Pattern.create(.skip, match.word("skip")),
 
diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig
index cfa7562..fd60ca1 100644
--- a/src/ptkgen/sema.zig
+++ b/src/ptkgen/sema.zig
@@ -1,6 +1,8 @@
 const std = @import("std");
 const ptk = @import("parser-toolkit");
 
+const logger = std.log.scoped(.ptk_sema);
+
 const ast = @import("ast.zig");
 const Diagnostics = @import("Diagnostics.zig");
 
@@ -74,20 +76,31 @@ pub const Pattern = struct {
 };
 
 pub const Type = union(enum) {
+    // trivial types:
     code_literal: String,
-    user_value: String,
+    user_type: String,
 
+    // anonymous compound types:
     optional: *Type,
     record: *CompoundType,
     variant: *CompoundType,
+
+    // ast nodes are basically "named types" and must be handled as such
+    named: *Node,
+
+    pub fn id(t: *const Type) TypeId {
+        return @as(TypeId, t.*);
+    }
 };
 
+pub const TypeId: type = std.meta.Tag(Type);
+
 pub const CompoundType = struct {
     fields: StringHashMap(Field),
 };
 
 pub const Field = struct {
-    name: String,
+    // name: String,
     type: *Type,
 };
 
@@ -116,6 +129,8 @@ pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings:
         .node_to_ast = std.AutoHashMap(*Node, *ast.Node).init(allocator),
         .pattern_to_ast = std.AutoHashMap(*Pattern, *ast.Pattern).init(allocator),
 
+        .type_stash = Analyzer.TypeStash.init(allocator),
+
         .document = document,
 
         .target = &grammar,
@@ -135,6 +150,10 @@ pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings:
     return grammar;
 }
 
+var BAD_TYPE_SENTINEL: Type = undefined;
+var BAD_NODE_SENTINEL: Node = undefined;
+var BAD_RULE_SENTINEL: Rule = undefined;
+
 fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
     // Phase 0: Validate productions on legality (coarse error checking)
     // - Generates errors for badly constructed elements
@@ -148,10 +167,11 @@ fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
     // Phase 2: Instantiate all node types and patterns, determine start symbol
 
     try analyzer.iterateOn(.start, Analyzer.instantiateStartSymbol);
-    try analyzer.iterateOn(.node, Analyzer.instantiatePatterns);
+    try analyzer.iterateOn(.pattern, Analyzer.instantiatePatterns);
     try analyzer.iterateOn(.node, Analyzer.instantiateNodeTypes);
 
     // Phase 3: Validate generated types
+    try analyzer.iterateOn(.node, Analyzer.validateNodes);
 
     // Phase 4: Instantiate AST productions
 
@@ -160,6 +180,8 @@ fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
 }
 
 const Analyzer = struct {
+    const TypeStash = std.HashMap(*Type, void, TypeContext, std.hash_map.default_max_load_percentage);
+
     arena: std.mem.Allocator,
     diagnostics: *Diagnostics,
     strings: *const ptk.strings.Pool,
@@ -171,10 +193,15 @@ const Analyzer = struct {
     node_to_ast: std.AutoHashMap(*Node, *ast.Node),
     pattern_to_ast: std.AutoHashMap(*Pattern, *ast.Pattern),
 
+    type_stash: TypeStash,
+
+    deduplicated_type_count: usize = 0,
+
     fn deinit(analyzer: *Analyzer) void {
         analyzer.rule_to_ast.deinit();
         analyzer.node_to_ast.deinit();
         analyzer.pattern_to_ast.deinit();
+        analyzer.type_stash.deinit();
         analyzer.* = undefined;
     }
 
@@ -313,16 +340,112 @@ const Analyzer = struct {
         };
     }
 
-    fn instantiatePatterns(analyzer: *Analyzer, node: *ast.Node) !void {
-        _ = analyzer;
-        _ = node;
-        //
+    fn instantiatePatterns(analyzer: *Analyzer, ast_pattern: *ast.Pattern) !void {
+        const sema_pattern = analyzer.target.patterns.get(ast_pattern.name.value).?;
+
+        sema_pattern.data = switch (ast_pattern.data) {
+            .literal => |value| .{ .literal_match = value.value },
+            .word => |value| .{ .word = value.value },
+            .regex => |value| .{ .regex = value.value },
+            .external => |value| .{ .external = value.value },
+        };
+
+        // TODO: Implement regex validation here!
+    }
+
+    fn instantiateNodeTypes(analyzer: *Analyzer, ast_node: *ast.Node) !void {
+        const sema_node = analyzer.target.nodes.get(ast_node.name.value).?;
+
+        sema_node.type = try analyzer.resolveType(&ast_node.value);
+    }
+
+    fn validateNodes(analyzer: *Analyzer, ast_node: *ast.Node) !void {
+        const sema_node = analyzer.target.nodes.get(ast_node.name.value).?;
+
+        try analyzer.validateType(sema_node.type);
     }
 
-    fn instantiateNodeTypes(analyzer: *Analyzer, node: *ast.Node) !void {
+    fn validateType(analyzer: *Analyzer, type_node: *Type) !void {
         _ = analyzer;
-        _ = node;
-        //
+        if (type_node == &BAD_TYPE_SENTINEL) {
+            @panic("bad sentinel");
+        }
+    }
+
+    fn createCompoundType(analyzer: *Analyzer, def: ast.CompoundType) !*CompoundType {
+        const ct = try analyzer.target.arena.allocator().create(CompoundType);
+        errdefer analyzer.target.arena.allocator().destroy(ct);
+
+        ct.* = CompoundType{
+            .fields = StringHashMap(Field).init(analyzer.target.arena.allocator()),
+        };
+        errdefer ct.fields.deinit();
+
+        try ct.fields.ensureTotalCapacity(def.fields.len());
+
+        var iter = ast.iterate(def.fields);
+        while (iter.next()) |field_def| {
+            const field_type = try analyzer.resolveType(&field_def.type);
+            ct.fields.putAssumeCapacityNoClobber(field_def.name.value, .{
+                .type = field_type,
+            });
+        }
+
+        return ct;
+    }
+
+    fn destroyCompoundType(analyzer: *Analyzer, ct: *CompoundType) void {
+        ct.fields.deinit();
+        analyzer.target.arena.allocator().destroy(ct);
+        ct.* = undefined;
+    }
+
+    fn resolveType(analyzer: *Analyzer, type_node: *ast.TypeSpec) error{OutOfMemory}!*Type {
+        var compound_type: ?*CompoundType = null;
+        var proto_type: Type = switch (type_node.*) {
+            .reference => |def| .{
+                .named = analyzer.target.nodes.get(def.identifier) orelse blk: {
+                    try analyzer.emitDiagnostic(def.location, .reference_to_undeclared_node, .{
+                        .identifier = analyzer.strings.get(def.identifier),
+                    });
+                    break :blk &BAD_NODE_SENTINEL;
+                },
+            },
+            .literal => |def| Type{ .code_literal = def.value },
+            .custom => |def| Type{ .user_type = def.value },
+            .record => |def| blk: {
+                compound_type = try analyzer.createCompoundType(def);
+                break :blk .{ .record = compound_type.? };
+            },
+            .variant => |def| blk: {
+                compound_type = try analyzer.createCompoundType(def);
+                break :blk .{ .record = compound_type.? };
+            },
+        };
+        errdefer if (compound_type) |ct|
+            analyzer.destroyCompoundType(ct);
+
+        if (analyzer.getUniqueTypeHandle(&proto_type)) |resolved_type| {
+            analyzer.deduplicated_type_count += 1;
+            // logger.debug("deduplicated a {s}", .{@tagName(resolved_type.*)});
+            return resolved_type;
+        }
+
+        const new_type = try analyzer.target.arena.allocator().create(Type);
+        errdefer analyzer.target.arena.allocator().destroy(new_type);
+
+        new_type.* = proto_type;
+
+        try analyzer.type_stash.putNoClobber(new_type, {});
+
+        return new_type;
+    }
+
+    fn getUniqueTypeHandle(analyzer: Analyzer, proto_type: *Type) ?*Type {
+        if (analyzer.type_stash.getKey(proto_type)) |key| {
+            return key;
+        }
+        return null;
     }
 
     const DeclarationError = error{
@@ -366,3 +489,33 @@ const Analyzer = struct {
         try analyzer.diagnostics.emit(location, code, params);
     }
 };
+
+const TypeContext = struct {
+    const HashFn = std.hash.Fnv1a_64;
+
+    pub fn eql(ctx: TypeContext, lhs: *Type, rhs: *Type) bool {
+        _ = ctx;
+        if (lhs == rhs)
+            return true;
+        if (lhs.id() != rhs.id())
+            return false;
+        switch (lhs.*) {
+            inline .code_literal, .user_type, .optional, .named => |val, tag| return val == @field(rhs, @tagName(tag)),
+            .record, .variant => return false, // they are same-by-identitiy
+        }
+    }
+
+    pub fn hash(ctx: TypeContext, t: *Type) u64 {
+        _ = ctx;
+        var hasher = HashFn.init();
+        hasher.update(@tagName(t.*));
+        switch (t.*) {
+            .code_literal => |lit| hasher.update(&std.mem.toBytes(@intFromEnum(lit))),
+            .user_type => |lit| hasher.update(&std.mem.toBytes(@intFromEnum(lit))),
+            .optional => |child| hasher.update(&std.mem.toBytes(child)),
+            .named => |node| hasher.update(&std.mem.toBytes(node)),
+            .record, .variant => hasher.update(&std.mem.toBytes(t)),
+        }
+        return hasher.final();
+    }
+};
diff --git a/test/analysis/accept/pattern-custom-skip.ptk b/test/analysis/accept/pattern-custom-skip.ptk
new file mode 100644
index 0000000..83f23c7
--- /dev/null
+++ b/test/analysis/accept/pattern-custom-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = @externalFunction;
+
diff --git a/test/analysis/accept/pattern-custom.ptk b/test/analysis/accept/pattern-custom.ptk
new file mode 100644
index 0000000..83f23c7
--- /dev/null
+++ b/test/analysis/accept/pattern-custom.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = @externalFunction;
+
diff --git a/test/analysis/accept/pattern-literal-skip.ptk b/test/analysis/accept/pattern-literal-skip.ptk
new file mode 100644
index 0000000..a5efb6c
--- /dev/null
+++ b/test/analysis/accept/pattern-literal-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = literal "a-word" skip;
+
diff --git a/test/analysis/accept/pattern-literal.ptk b/test/analysis/accept/pattern-literal.ptk
new file mode 100644
index 0000000..4964d2c
--- /dev/null
+++ b/test/analysis/accept/pattern-literal.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = literal "a-word";
+
diff --git a/test/analysis/accept/pattern-regex-skip.ptk b/test/analysis/accept/pattern-regex-skip.ptk
new file mode 100644
index 0000000..b9e45ec
--- /dev/null
+++ b/test/analysis/accept/pattern-regex-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = regex "a-word" skip;
+
diff --git a/test/analysis/accept/pattern-regex.ptk b/test/analysis/accept/pattern-regex.ptk
new file mode 100644
index 0000000..4ec3715
--- /dev/null
+++ b/test/analysis/accept/pattern-regex.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = regex "a-word";
+
diff --git a/test/analysis/accept/pattern-word-skip.ptk b/test/analysis/accept/pattern-word-skip.ptk
new file mode 100644
index 0000000..07a0e07
--- /dev/null
+++ b/test/analysis/accept/pattern-word-skip.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = word "a-word" skip;
+
diff --git a/test/analysis/accept/pattern-word.ptk b/test/analysis/accept/pattern-word.ptk
new file mode 100644
index 0000000..07a0e07
--- /dev/null
+++ b/test/analysis/accept/pattern-word.ptk
@@ -0,0 +1,4 @@
+
+
+pattern a_word = word "a-word" skip;
+
diff --git a/test/parser/reject/pattern-unexpected-token.ptk b/test/parser/reject/pattern-unexpected-token.ptk
new file mode 100644
index 0000000..158522d
--- /dev/null
+++ b/test/parser/reject/pattern-unexpected-token.ptk
@@ -0,0 +1,4 @@
+# expected: E1114
+
+pattern a_word = `illegal`;
+

From 466a1a8148171a77323cfd73f6e9ebba7470f0c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 1 Dec 2023 10:01:54 +0100
Subject: [PATCH 17/20] Implements production instantiation

---
 build.zig                                     |   3 +
 examples/ptkgen/ast-with-unions.ptk           |   4 +
 examples/ptkgen/grammar.ptk                   |  14 ++
 src/ptkgen/Diagnostics.zig                    |   7 +
 src/ptkgen/intl/en.json                       |   3 +-
 src/ptkgen/sema.zig                           | 172 +++++++++++++++++-
 .../reject/duplicate-field-record.ptk         |   7 +
 .../reject/duplicate-field-variant.ptk        |   7 +
 8 files changed, 209 insertions(+), 8 deletions(-)
 create mode 100644 test/analysis/reject/duplicate-field-record.ptk
 create mode 100644 test/analysis/reject/duplicate-field-variant.ptk

diff --git a/build.zig b/build.zig
index dcade1e..7bcc743 100644
--- a/build.zig
+++ b/build.zig
@@ -152,6 +152,9 @@ const analyis_reject_files = [_][]const u8{
     "test/analysis/reject/undeclared-start.ptk",
     "test/analysis/reject/duplicate-undeclared-start.ptk",
     "test/analysis/reject/duplicate-start.ptk",
+
+    "test/analysis/reject/duplicate-field-record.ptk",
+    "test/analysis/reject/duplicate-field-variant.ptk",
 };
 
 const parser_accept_files = [_][]const u8{
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index 6da295d..0133fcd 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -64,3 +64,7 @@ rule toplevel-decl : !TLDeclaration =
     | <interface-decl>  => interface: $0
     | <module-decl>     => module: $0
 ;
+
+rule namespace-group = "to be done";
+rule interface-decl = "to be done";
+rule module-decl = "to be done";
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index d788e27..11c29f5 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -69,3 +69,17 @@ rule list_ctor = "{" ( <value_list> )? "}";
 rule value_list =
     <mapped_value> ( "," <mapped_value> )*
 ;
+
+
+# TODO:
+
+pattern rule_ref = literal "";
+pattern identifier = literal "";
+pattern string_literal = literal "";
+pattern userval = literal "";
+pattern token_ref = literal "";
+pattern code_literal = literal "";
+pattern value_ref = literal "";
+pattern mapped_value = literal "";
+
+rule type = "empty";
\ No newline at end of file
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 56aeea2..16677f5 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -52,6 +52,8 @@ pub const Code = enum(u16) {
 
     multiple_start_symbols = 1306,
 
+    duplicate_compound_field = 1307,
+
     // semantic warnings (4000-4099):
 
     missing_start_symbol = 4000,
@@ -214,6 +216,11 @@ pub fn Data(comptime code: Code) type {
 
         .missing_start_symbol => NoDiagnosticData,
 
+        .duplicate_compound_field => struct {
+            identifier: []const u8,
+            previous_location: ptk.Location,
+        },
+
         // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
     };
 }
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index c98d0c3..c0f5eb8 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -29,7 +29,8 @@
         "reference_to_undeclared_node": "Reference to undeclared node '{[identifier]}'.",
         "reference_to_undeclared_pattern": "Reference to undeclared pattern '{[identifier]}'.",
         "missing_start_symbol": "Grammar file has no start symbol declared.",
-        "multiple_start_symbols": "Another start rule '({[identifier]})' was already declared here: {[previous_location]}"
+        "multiple_start_symbols": "Another start rule '({[identifier]})' was already declared here: {[previous_location]}",
+        "duplicate_compound_field": "Another field named '{s}' was already declared here: {[previous_location]}"
     },
     "errors": {
         "SyntaxError": "syntax error",
diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig
index fd60ca1..837b36b 100644
--- a/src/ptkgen/sema.zig
+++ b/src/ptkgen/sema.zig
@@ -22,11 +22,13 @@ pub const Grammar = struct {
     rules: StringHashMap(*Rule),
     nodes: StringHashMap(*Node),
     patterns: StringHashMap(*Pattern),
+    literal_patterns: StringHashMap(*Pattern),
 
     pub fn deinit(grammar: *Grammar) void {
         grammar.rules.deinit();
         grammar.nodes.deinit();
         grammar.patterns.deinit();
+        grammar.literal_patterns.deinit();
         grammar.arena.deinit();
         grammar.* = undefined;
     }
@@ -42,7 +44,14 @@ pub const Rule = struct {
     name: String,
 
     type: ?*Type,
-    production: *Production,
+    productions: []MappedProduction,
+};
+
+/// A production of a rule that is able to map the parsed structure
+/// into an AST node.
+pub const MappedProduction = struct {
+    production: Production,
+    mapping: ?Mapping,
 };
 
 pub const Production = union(enum) {
@@ -54,6 +63,10 @@ pub const Production = union(enum) {
     repetition_one: *Production, // [ ... ]+
 };
 
+pub const Mapping = struct {
+    //
+};
+
 pub const Node = struct {
     location: ptk.Location,
     name: String,
@@ -100,6 +113,7 @@ pub const CompoundType = struct {
 };
 
 pub const Field = struct {
+    location: ptk.Location,
     // name: String,
     type: *Type,
 };
@@ -115,6 +129,7 @@ pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings:
         .rules = StringHashMap(*Rule).init(allocator),
         .nodes = StringHashMap(*Node).init(allocator),
         .patterns = StringHashMap(*Pattern).init(allocator),
+        .literal_patterns = StringHashMap(*Pattern).init(allocator),
 
         .start = null,
     };
@@ -153,6 +168,7 @@ pub fn analyze(allocator: std.mem.Allocator, diagnostics: *Diagnostics, strings:
 var BAD_TYPE_SENTINEL: Type = undefined;
 var BAD_NODE_SENTINEL: Node = undefined;
 var BAD_RULE_SENTINEL: Rule = undefined;
+var BAD_PATTERN_SENTINEL: Pattern = undefined;
 
 fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
     // Phase 0: Validate productions on legality (coarse error checking)
@@ -174,6 +190,7 @@ fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
     try analyzer.iterateOn(.node, Analyzer.validateNodes);
 
     // Phase 4: Instantiate AST productions
+    try analyzer.iterateOn(.rule, Analyzer.instantiateRules);
 
     // Phase 5: Instantiate and validate AST mappings
 
@@ -249,6 +266,8 @@ const Analyzer = struct {
         }
     }
 
+    /// Creates declarations in the target Grammar and makes sure all declared objects are reachable.
+    /// Emits diagnostics for duplicate declarations.
     fn createDeclarations(analyzer: *Analyzer) !void {
         var iter = ast.iterate(analyzer.document);
         while (iter.next()) |item| {
@@ -270,7 +289,7 @@ const Analyzer = struct {
                         .name = rule.name.value,
 
                         .type = undefined, // created in phase 4
-                        .production = undefined, // created in phase 5
+                        .productions = &.{}, // created in phase 5
                     };
                 },
 
@@ -313,6 +332,8 @@ const Analyzer = struct {
         }
     }
 
+    /// Searches all start symbol declarations and stores a reference to the initial rule.
+    /// Will emit diagnostics for duplicate start symbol decls and invalid references.
     fn instantiateStartSymbol(analyzer: *Analyzer, start: *ast.RuleRef) !void {
         if (analyzer.target.start) |old_start| {
             try analyzer.emitDiagnostic(start.location, .multiple_start_symbols, .{
@@ -340,6 +361,7 @@ const Analyzer = struct {
         };
     }
 
+    /// Fully populate all content of the pattern declarations. Emits diagnostics for invalid patterns.
     fn instantiatePatterns(analyzer: *Analyzer, ast_pattern: *ast.Pattern) !void {
         const sema_pattern = analyzer.target.patterns.get(ast_pattern.name.value).?;
 
@@ -353,6 +375,8 @@ const Analyzer = struct {
         // TODO: Implement regex validation here!
     }
 
+    /// Instantiates and validates all node declarations.
+    /// Emits diagnostics for bad type declarations.
     fn instantiateNodeTypes(analyzer: *Analyzer, ast_node: *ast.Node) !void {
         const sema_node = analyzer.target.nodes.get(ast_node.name.value).?;
 
@@ -365,13 +389,136 @@ const Analyzer = struct {
         try analyzer.validateType(sema_node.type);
     }
 
-    fn validateType(analyzer: *Analyzer, type_node: *Type) !void {
-        _ = analyzer;
+    fn instantiateRules(analyzer: *Analyzer, ast_rule: *ast.Rule) !void {
+        const sema_rule = analyzer.target.rules.get(ast_rule.name.value).?;
+
+        sema_rule.type = if (ast_rule.ast_type) |ast_type|
+            try analyzer.resolveType(&ast_type)
+        else
+            null;
+
+        sema_rule.productions = try analyzer.target.arena.allocator().alloc(MappedProduction, ast_rule.productions.len());
+        errdefer {
+            analyzer.target.arena.allocator().free(sema_rule.productions);
+            sema_rule.productions = &.{};
+        }
+
+        if (sema_rule.productions.len == 0) {
+            @panic("empty sema rule!");
+        }
+
+        var iter = ast.iterate(ast_rule.productions);
+        var index: usize = 0;
+        while (iter.next()) |ast_production| : (index += 1) {
+            const sema_production = &sema_rule.productions[index];
+
+            sema_production.* = MappedProduction{
+                .production = try analyzer.translateProduction(ast_production.production),
+                .mapping = null, // Will be instantiated later
+            };
+        }
+    }
+
+    fn translateProduction(analyzer: *Analyzer, ast_prod: ast.Production) error{OutOfMemory}!Production {
+        switch (ast_prod) {
+            .literal => |literal| {
+                const gop = try analyzer.target.literal_patterns.getOrPut(literal.value);
+                if (!gop.found_existing) {
+                    gop.value_ptr.* = try analyzer.target.arena.allocator().create(Pattern);
+                    gop.value_ptr.*.* = .{
+                        .location = literal.location, // place of first use
+                        .name = literal.value,
+                        .data = .{ .literal_match = literal.value },
+                    };
+                }
+                return Production{ .terminal = gop.value_ptr.* };
+            },
+            .terminal => |terminal| {
+                if (analyzer.target.patterns.get(terminal.identifier)) |pattern| {
+                    return Production{ .terminal = pattern };
+                } else {
+                    try analyzer.emitDiagnostic(terminal.location, .reference_to_undeclared_pattern, .{
+                        .identifier = analyzer.strings.get(terminal.identifier),
+                    });
+                    return Production{ .terminal = &BAD_PATTERN_SENTINEL };
+                }
+            },
+            .recursion => |recursion| {
+                if (analyzer.target.rules.get(recursion.identifier)) |rule| {
+                    return Production{ .recursion = rule };
+                } else {
+                    try analyzer.emitDiagnostic(recursion.location, .reference_to_undeclared_rule, .{
+                        .identifier = analyzer.strings.get(recursion.identifier),
+                    });
+                    return Production{ .recursion = &BAD_RULE_SENTINEL };
+                }
+            },
+            .sequence => |sequence| {
+                if (sequence.len() == 0)
+                    @panic("bad sequence: empty");
+
+                var seq = std.ArrayList(Production).init(analyzer.target.arena.allocator());
+                defer seq.deinit();
+
+                try seq.ensureTotalCapacityPrecise(sequence.len());
+
+                var iter = ast.iterate(sequence);
+                while (iter.next()) |inner_prod| {
+                    const inner_sema = try analyzer.translateProduction(inner_prod.*);
+                    seq.appendAssumeCapacity(inner_sema);
+                }
+
+                return Production{
+                    .sequence = seq.toOwnedSlice() catch @panic("bad capacity"),
+                };
+            },
+            .optional => |optional| {
+                const nested = try analyzer.target.arena.allocator().create(Production);
+                errdefer analyzer.target.arena.allocator().destroy(nested);
+                nested.* = try analyzer.translateProduction(.{ .sequence = optional });
+                return .{ .optional = nested };
+            },
+            .repetition_zero => |repetition| {
+                const nested = try analyzer.target.arena.allocator().create(Production);
+                errdefer analyzer.target.arena.allocator().destroy(nested);
+                nested.* = try analyzer.translateProduction(.{ .sequence = repetition });
+                return .{ .repetition_zero = nested };
+            },
+            .repetition_one => |repetition| {
+                const nested = try analyzer.target.arena.allocator().create(Production);
+                errdefer analyzer.target.arena.allocator().destroy(nested);
+                nested.* = try analyzer.translateProduction(.{ .sequence = repetition });
+                return .{ .repetition_one = nested };
+            },
+        }
+    }
+
+    /// Checks if the given type is semantically ok or emits compiler errors if not.
+    fn validateType(analyzer: *Analyzer, type_node: *Type) error{OutOfMemory}!void {
         if (type_node == &BAD_TYPE_SENTINEL) {
             @panic("bad sentinel");
         }
+
+        switch (type_node.*) {
+            .code_literal, .user_type => {}, // always fine
+            .optional => |child_type| try analyzer.validateType(child_type),
+            .record, .variant => |compound_type| {
+                var fields = compound_type.fields.iterator();
+                while (fields.next()) |kv| {
+                    const field_type = kv.value_ptr.type;
+                    try analyzer.validateType(field_type);
+                }
+            },
+            .named => |node| {
+                if (node == &BAD_NODE_SENTINEL) {
+                    @panic("bad node!");
+                }
+            },
+        }
     }
 
+    /// Constructs a new compound type from the given AST declaration. Will emit diagnostics
+    /// on error and returns an incomplete type if errors happened.
     fn createCompoundType(analyzer: *Analyzer, def: ast.CompoundType) !*CompoundType {
         const ct = try analyzer.target.arena.allocator().create(CompoundType);
         errdefer analyzer.target.arena.allocator().destroy(ct);
@@ -386,9 +533,20 @@ const Analyzer = struct {
         var iter = ast.iterate(def.fields);
         while (iter.next()) |field_def| {
             const field_type = try analyzer.resolveType(&field_def.type);
-            ct.fields.putAssumeCapacityNoClobber(field_def.name.value, .{
+            const gop_result = ct.fields.getOrPutAssumeCapacity(field_def.name.value);
+
+            if (gop_result.found_existing) {
+                try analyzer.emitDiagnostic(field_def.location, .duplicate_compound_field, .{
+                    .previous_location = gop_result.value_ptr.location,
+                    .identifier = analyzer.strings.get(field_def.name.value),
+                });
+                continue;
+            }
+
+            gop_result.value_ptr.* = .{
                 .type = field_type,
-            });
+                .location = field_def.location,
+            };
         }
 
         return ct;
@@ -400,7 +558,7 @@ const Analyzer = struct {
         ct.* = undefined;
     }
 
-    fn resolveType(analyzer: *Analyzer, type_node: *ast.TypeSpec) error{OutOfMemory}!*Type {
+    fn resolveType(analyzer: *Analyzer, type_node: *const ast.TypeSpec) error{OutOfMemory}!*Type {
         var compound_type: ?*CompoundType = null;
         var proto_type: Type = switch (type_node.*) {
             .reference => |def| .{
diff --git a/test/analysis/reject/duplicate-field-record.ptk b/test/analysis/reject/duplicate-field-record.ptk
new file mode 100644
index 0000000..3a64f2a
--- /dev/null
+++ b/test/analysis/reject/duplicate-field-record.ptk
@@ -0,0 +1,7 @@
+# expected: E1307
+
+node bad = record
+    x: `bool`,
+    y: `bool`,
+    x: `bool`
+;
\ No newline at end of file
diff --git a/test/analysis/reject/duplicate-field-variant.ptk b/test/analysis/reject/duplicate-field-variant.ptk
new file mode 100644
index 0000000..377a38a
--- /dev/null
+++ b/test/analysis/reject/duplicate-field-variant.ptk
@@ -0,0 +1,7 @@
+# expected: E1307
+
+node bad = variant
+    x: `bool`,
+    y: `bool`,
+    x: `bool`
+;
\ No newline at end of file

From a5a50422250e03b917edda4db129fcd98f558a63 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 1 Dec 2023 14:39:23 +0100
Subject: [PATCH 18/20] Implements sema grammar dumper

---
 build.zig                                     |   5 +-
 examples/ptkgen/ast-with-unions.ptk           |   6 +-
 examples/ptkgen/grammar.ptk                   |   6 +-
 src/ptkgen/main.zig                           |   5 +
 src/ptkgen/sema.zig                           |   5 +-
 src/ptkgen/sema_dump.zig                      | 173 ++++++++++++++++++
 .../production-undeclared-pattern-ref.ptk     |   3 +
 .../reject/production-undeclared-rule-ref.ptk |   3 +
 8 files changed, 197 insertions(+), 9 deletions(-)
 create mode 100644 src/ptkgen/sema_dump.zig
 create mode 100644 test/analysis/reject/production-undeclared-pattern-ref.ptk
 create mode 100644 test/analysis/reject/production-undeclared-rule-ref.ptk

diff --git a/build.zig b/build.zig
index 7bcc743..1b9b0ce 100644
--- a/build.zig
+++ b/build.zig
@@ -144,7 +144,7 @@ const analyis_accept_files = [_][]const u8{
 
 const analyis_reject_files = [_][]const u8{
     "test/analysis/reject/duplicate-node.ptk",
-    // "test/analysis/reject/duplicate-pattern.ptk", // TODO: Implement pattern support in parser
+    "test/analysis/reject/duplicate-pattern.ptk",
     "test/analysis/reject/duplicate-rule.ptk",
 
     "test/analysis/accept/expect-warn-missing-start.ptk",
@@ -155,6 +155,9 @@ const analyis_reject_files = [_][]const u8{
 
     "test/analysis/reject/duplicate-field-record.ptk",
     "test/analysis/reject/duplicate-field-variant.ptk",
+
+    "test/analysis/reject/production-undeclared-pattern-ref.ptk",
+    "test/analysis/reject/production-undeclared-rule-ref.ptk",
 };
 
 const parser_accept_files = [_][]const u8{
diff --git a/examples/ptkgen/ast-with-unions.ptk b/examples/ptkgen/ast-with-unions.ptk
index 0133fcd..369c9c9 100644
--- a/examples/ptkgen/ast-with-unions.ptk
+++ b/examples/ptkgen/ast-with-unions.ptk
@@ -55,9 +55,9 @@ node TLDeclaration = variant
     module    : !module
 ;
 
-node namespace = @extern;
-node interface = @extern;
-node module = @extern;
+node namespace = @Namespace;
+node interface = @Interface;
+node module = @Module;
 
 rule toplevel-decl : !TLDeclaration = 
       <namespace-group> => ns: $0 # this is syntax for a union field selector as unions are not compounds
diff --git a/examples/ptkgen/grammar.ptk b/examples/ptkgen/grammar.ptk
index 11c29f5..6a6d95a 100644
--- a/examples/ptkgen/grammar.ptk
+++ b/examples/ptkgen/grammar.ptk
@@ -70,11 +70,12 @@ rule value_list =
     <mapped_value> ( "," <mapped_value> )*
 ;
 
-
 # TODO:
 
+rule type = "empty";
+
 pattern rule_ref = literal "";
-pattern identifier = literal "";
+pattern identifier = regex "[A-Za-z_][A-Za-z0-9_]*";
 pattern string_literal = literal "";
 pattern userval = literal "";
 pattern token_ref = literal "";
@@ -82,4 +83,3 @@ pattern code_literal = literal "";
 pattern value_ref = literal "";
 pattern mapped_value = literal "";
 
-rule type = "empty";
\ No newline at end of file
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 4384d44..42ba2c0 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -11,6 +11,7 @@ const sema = @import("sema.zig");
 const intl = @import("intl.zig");
 const parser = @import("parser.zig");
 const ast_dump = @import("ast_dump.zig");
+const sema_dump = @import("sema_dump.zig");
 
 const Diagnostics = @import("Diagnostics.zig");
 
@@ -312,6 +313,10 @@ fn compileFile(
     // TODO: Implement parsergen / tablegen / highlightergen
 
     if (options.test_mode == .none) {
+        std.debug.print("ast dump:\n", .{});
         ast_dump.dump(string_pool, tree);
+
+        std.debug.print("\n\nsema dump:\n", .{});
+        sema_dump.dump(string_pool, grammar);
     }
 }
diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig
index 837b36b..ba18328 100644
--- a/src/ptkgen/sema.zig
+++ b/src/ptkgen/sema.zig
@@ -77,7 +77,7 @@ pub const Node = struct {
 pub const Pattern = struct {
     location: ptk.Location,
     name: String,
-
+    is_literal: bool,
     data: Data,
 
     pub const Data = union(enum) {
@@ -324,7 +324,7 @@ const Analyzer = struct {
                     instance.* = .{
                         .location = pattern.name.location,
                         .name = pattern.name.value,
-
+                        .is_literal = false,
                         .data = undefined, // created in phase 2
                     };
                 },
@@ -429,6 +429,7 @@ const Analyzer = struct {
                         .location = literal.location, // place of first use
                         .name = literal.value,
                         .data = .{ .literal_match = literal.value },
+                        .is_literal = true,
                     };
                 }
                 return Production{ .terminal = gop.value_ptr.* };
diff --git a/src/ptkgen/sema_dump.zig b/src/ptkgen/sema_dump.zig
new file mode 100644
index 0000000..3fe684f
--- /dev/null
+++ b/src/ptkgen/sema_dump.zig
@@ -0,0 +1,173 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const sema = @import("sema.zig");
+const parser = @import("parser.zig");
+
+pub fn dump(strings: *const ptk.strings.Pool, grammar: sema.Grammar) void {
+    var printer = SemaPrinter{
+        .strings = strings,
+    };
+
+    SemaPrinter.print("literal patterns:\n", .{});
+    printer.dumpPatterns(grammar.literal_patterns);
+
+    SemaPrinter.print("\nuser patterns:\n", .{});
+    printer.dumpPatterns(grammar.patterns);
+
+    SemaPrinter.print("\nstart rule: ", .{});
+    if (grammar.start) |start| {
+        SemaPrinter.print("<{}>\n", .{printer.fmtId(start.rule.name)});
+    } else {
+        SemaPrinter.print("-none-\n", .{});
+    }
+
+    SemaPrinter.print("\nast nodes:\n", .{});
+    printer.dumpNodes(grammar.nodes);
+
+    SemaPrinter.print("\nrules:\n", .{});
+    printer.dumpRules(grammar.rules);
+}
+
+const SemaPrinter = struct {
+    const print = std.debug.print;
+
+    strings: *const ptk.strings.Pool,
+
+    fn dumpPatterns(printer: SemaPrinter, patterns: sema.StringHashMap(*sema.Pattern)) void {
+        for (patterns.values()) |pattern| {
+            print("pattern {} = ", .{printer.fmtId(pattern.name)});
+
+            switch (pattern.data) {
+                inline else => |value, tag| print("{s} \"{}\"", .{ @tagName(tag), printer.fmtString(value) }),
+            }
+
+            print(";\n", .{});
+        }
+    }
+
+    fn dumpNodes(printer: SemaPrinter, nodes: sema.StringHashMap(*sema.Node)) void {
+        for (nodes.values()) |node| {
+            print("node {} = ", .{printer.fmtId(node.name)});
+
+            printer.dumpType(node.type);
+
+            print(";\n", .{});
+        }
+    }
+
+    fn dumpRules(printer: SemaPrinter, rules: sema.StringHashMap(*sema.Rule)) void {
+        for (rules.values()) |rule| {
+            print("rule {}", .{printer.fmtId(rule.name)});
+
+            if (rule.type) |rule_type| {
+                print(": ", .{});
+                printer.dumpType(rule_type);
+            }
+
+            print(" = ", .{});
+
+            for (rule.productions, 0..) |production, i| {
+                if (i > 0) print("\n    | ", .{});
+                printer.dumpMappedProduction(production);
+            }
+
+            print(";\n", .{});
+        }
+    }
+
+    fn dumpMappedProduction(printer: SemaPrinter, mapped_prod: sema.MappedProduction) void {
+        printer.dumpProduction(mapped_prod.production);
+
+        if (mapped_prod.mapping) |mapping| {
+            print(" -> ", .{});
+            printer.dumpMapping(mapping);
+        }
+    }
+
+    fn dumpProduction(printer: SemaPrinter, production: sema.Production) void {
+        switch (production) {
+            .terminal => |terminal| {
+                if (terminal.is_literal) {
+                    print("\"{}\"", .{printer.fmtString(terminal.data.literal_match)});
+                } else {
+                    print("${}", .{printer.fmtId(terminal.name)});
+                }
+            },
+            .recursion => |recursion| print("<{}>", .{printer.fmtId(recursion.name)}),
+            .sequence => |sequence| {
+                for (sequence, 0..) |item, i| {
+                    if (i > 0)
+                        print(" ", .{});
+                    printer.dumpProduction(item);
+                }
+            },
+            .optional => |optional| {
+                print("(", .{});
+                printer.dumpProduction(optional.*);
+                print(")?", .{});
+            },
+            .repetition_zero => |repetition_zero| {
+                print("(", .{});
+                printer.dumpProduction(repetition_zero.*);
+                print(")*", .{});
+            },
+            .repetition_one => |repetition_one| {
+                print("(", .{});
+                printer.dumpProduction(repetition_one.*);
+                print(")+", .{});
+            },
+        }
+    }
+
+    fn dumpMapping(printer: SemaPrinter, mapping: sema.Mapping) void {
+        _ = mapping;
+        _ = printer;
+    }
+
+    fn dumpType(printer: SemaPrinter, stype: *sema.Type) void {
+        switch (stype.*) {
+            .code_literal => |literal| print("`{}`", .{printer.fmtString(literal)}),
+            .user_type => |literal| print("@{}", .{printer.fmtId(literal)}),
+            .optional => |inner| {
+                print("optional ", .{});
+                printer.dumpType(inner);
+            },
+            inline .record, .variant => |compound, tag| {
+                print("{s} ", .{@tagName(tag)});
+                for (compound.fields.keys(), compound.fields.values(), 0..) |name, field, i| {
+                    if (i > 0)
+                        print(", ", .{});
+                    print("{}: ", .{printer.fmtId(name)});
+                    printer.dumpType(field.type);
+                }
+            },
+            .named => |other| print("!{}", .{printer.fmtId(other.name)}),
+        }
+    }
+
+    fn fmtString(printer: SemaPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .text };
+    }
+
+    fn fmtId(printer: SemaPrinter, str: ptk.strings.String) StringPrinter {
+        return StringPrinter{ .printer = printer, .str = str, .mode = .id };
+    }
+
+    const StringPrinter = struct {
+        printer: SemaPrinter,
+        str: ptk.strings.String,
+        mode: enum { id, text },
+
+        pub fn format(strpr: StringPrinter, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = opt;
+            _ = fmt;
+
+            const text = strpr.printer.strings.get(strpr.str);
+            switch (strpr.mode) {
+                .id => try writer.print("{}", .{std.zig.fmtId(text)}),
+                .text => try writer.print("{}", .{std.zig.fmtEscapes(text)}),
+            }
+        }
+    };
+};
diff --git a/test/analysis/reject/production-undeclared-pattern-ref.ptk b/test/analysis/reject/production-undeclared-pattern-ref.ptk
new file mode 100644
index 0000000..10e66f0
--- /dev/null
+++ b/test/analysis/reject/production-undeclared-pattern-ref.ptk
@@ -0,0 +1,3 @@
+# expected: E1305
+
+rule foo = $pat;
\ No newline at end of file
diff --git a/test/analysis/reject/production-undeclared-rule-ref.ptk b/test/analysis/reject/production-undeclared-rule-ref.ptk
new file mode 100644
index 0000000..a5525cc
--- /dev/null
+++ b/test/analysis/reject/production-undeclared-rule-ref.ptk
@@ -0,0 +1,3 @@
+# expected: E1303
+
+rule foo = <bar>;
\ No newline at end of file

From 768cd58603801fbaf4f3fc7603e3ebe8fd8b11e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Fri, 1 Dec 2023 16:07:28 +0100
Subject: [PATCH 19/20] Adds basic json output of parsed grammar.

---
 src/ptkgen/{ast_dump.zig => dump/ast.zig}   |   4 +-
 src/ptkgen/dump/json.zig                    | 231 ++++++++++++++++++++
 src/ptkgen/{sema_dump.zig => dump/sema.zig} |   4 +-
 src/ptkgen/intl.zig                         |  30 ++-
 src/ptkgen/intl/en.json                     |  27 ++-
 src/ptkgen/main.zig                         |  80 ++++++-
 6 files changed, 367 insertions(+), 9 deletions(-)
 rename src/ptkgen/{ast_dump.zig => dump/ast.zig} (99%)
 create mode 100644 src/ptkgen/dump/json.zig
 rename src/ptkgen/{sema_dump.zig => dump/sema.zig} (98%)

diff --git a/src/ptkgen/ast_dump.zig b/src/ptkgen/dump/ast.zig
similarity index 99%
rename from src/ptkgen/ast_dump.zig
rename to src/ptkgen/dump/ast.zig
index 226b324..9c5d675 100644
--- a/src/ptkgen/ast_dump.zig
+++ b/src/ptkgen/dump/ast.zig
@@ -1,8 +1,8 @@
 const std = @import("std");
 const ptk = @import("parser-toolkit");
 
-const ast = @import("ast.zig");
-const parser = @import("parser.zig");
+const ast = @import("../ast.zig");
+const parser = @import("../parser.zig");
 
 pub fn dump(strings: *const ptk.strings.Pool, decls: parser.Document) void {
     var printer = AstPrinter{
diff --git a/src/ptkgen/dump/json.zig b/src/ptkgen/dump/json.zig
new file mode 100644
index 0000000..f777011
--- /dev/null
+++ b/src/ptkgen/dump/json.zig
@@ -0,0 +1,231 @@
+const std = @import("std");
+const ptk = @import("parser-toolkit");
+
+const sema = @import("../sema.zig");
+const parser = @import("../parser.zig");
+
+pub fn createJsonValue(
+    arena: *std.heap.ArenaAllocator,
+    strings: *const ptk.strings.Pool,
+    grammar: sema.Grammar,
+) !std.json.Value {
+    const allocator = arena.allocator();
+
+    var mapper = JsonMapper{
+        .allocator = allocator,
+        .strings = strings,
+    };
+
+    var root = std.json.ObjectMap.init(allocator);
+    errdefer root.deinit();
+
+    if (grammar.start) |start| {
+        try root.put("start", mapper.jsonString(start.rule.name));
+    } else {
+        try root.put("start", .null);
+    }
+
+    {
+        var list = mapper.newArray();
+        errdefer list.deinit();
+
+        var iter = grammar.literal_patterns.iterator();
+        while (iter.next()) |kvp| {
+            try list.append(mapper.jsonString(kvp.value_ptr.*.data.literal_match));
+        }
+
+        try root.put("literal_patterns", .{ .array = list });
+    }
+
+    {
+        var patterns = std.json.ObjectMap.init(allocator);
+        errdefer patterns.deinit();
+
+        var iter = grammar.patterns.iterator();
+        while (iter.next()) |kvp| {
+            const spattern: *sema.Pattern = kvp.value_ptr.*;
+
+            var jpattern = std.json.ObjectMap.init(allocator);
+            errdefer jpattern.deinit();
+
+            // try jpattern.put("name", .{ .string = strings.get(spattern.name) });
+            try jpattern.put("kind", .{ .string = @tagName(spattern.data) });
+            switch (spattern.data) {
+                inline else => |val| try jpattern.put("data", mapper.jsonString(val)),
+            }
+
+            try patterns.putNoClobber(
+                strings.get(kvp.key_ptr.*),
+                .{ .object = jpattern },
+            );
+        }
+
+        try root.put("patterns", .{ .object = patterns });
+    }
+
+    {
+        var nodes = std.json.ObjectMap.init(allocator);
+        errdefer nodes.deinit();
+
+        var iter = grammar.nodes.iterator();
+        while (iter.next()) |kvp| {
+            const snode: *sema.Node = kvp.value_ptr.*;
+
+            var jtype = try mapper.convertType(snode.type);
+
+            try nodes.putNoClobber(
+                strings.get(kvp.key_ptr.*),
+                jtype,
+            );
+        }
+
+        try root.put("ast_nodes", .{ .object = nodes });
+    }
+
+    {
+        var rules = std.json.ObjectMap.init(allocator);
+        errdefer rules.deinit();
+
+        var iter = grammar.rules.iterator();
+        while (iter.next()) |kvp| {
+            const srule: *sema.Rule = kvp.value_ptr.*;
+
+            var jrule = mapper.newObject();
+            errdefer jrule.deinit();
+
+            if (srule.type) |rule_type| {
+                var jtype = try mapper.convertType(rule_type);
+                try jrule.putNoClobber("type", jtype);
+            } else {
+                try jrule.putNoClobber("type", .null);
+            }
+
+            {
+                var jprods = mapper.newArray();
+                errdefer jprods.deinit();
+
+                try jprods.resize(srule.productions.len);
+
+                for (jprods.items, srule.productions) |*jmprod_val, mapped_production| {
+                    var jmprod = mapper.newObject();
+                    errdefer jmprod.deinit();
+
+                    var jprod = try mapper.convertProduction(mapped_production.production);
+
+                    try jmprod.putNoClobber("production", jprod);
+
+                    if (mapped_production.mapping) |mapping| {
+                        var jmap = try mapper.convertMapping(mapping);
+                        try jmprod.putNoClobber("mapping", jmap);
+                    } else {
+                        try jmprod.putNoClobber("mapping", .null);
+                    }
+
+                    jmprod_val.* = .{ .object = jmprod };
+                }
+
+                try jrule.putNoClobber("mapped_productions", .{ .array = jprods });
+            }
+
+            try rules.putNoClobber(
+                strings.get(kvp.key_ptr.*),
+                .{ .object = jrule },
+            );
+        }
+
+        try root.put("rules", .{ .object = rules });
+    }
+
+    return std.json.Value{ .object = root };
+}
+
+const JsonMapper = struct {
+    allocator: std.mem.Allocator,
+    strings: *const ptk.strings.Pool,
+
+    fn convertProduction(mapper: JsonMapper, production: sema.Production) error{OutOfMemory}!std.json.Value {
+        var jtype = mapper.newObject();
+        errdefer jtype.deinit();
+
+        try jtype.putNoClobber("kind", .{ .string = @tagName(production) });
+
+        const data: std.json.Value = switch (production) {
+            .terminal => |terminal| blk: {
+                if (terminal.is_literal) {
+                    try jtype.put("kind", .{ .string = "literal-terminal" });
+                }
+                break :blk mapper.jsonString(terminal.name);
+            },
+            .recursion => |recursion| mapper.jsonString(recursion.name),
+
+            .sequence => |sequence| blk: {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(sequence.len);
+
+                for (list.items, sequence) |*dst, src| {
+                    dst.* = try mapper.convertProduction(src);
+                }
+
+                break :blk .{ .array = list };
+            },
+
+            .optional, .repetition_zero, .repetition_one => |optional| try mapper.convertProduction(optional.*),
+        };
+        try jtype.putNoClobber("data", data);
+
+        return .{ .object = jtype };
+    }
+
+    fn convertMapping(mapper: JsonMapper, mapping: sema.Mapping) error{OutOfMemory}!std.json.Value {
+        _ = mapping;
+        _ = mapper;
+
+        @panic("implement generation of mappings");
+    }
+
+    fn convertType(mapper: JsonMapper, stype: *sema.Type) error{OutOfMemory}!std.json.Value {
+        const data: std.json.Value = switch (stype.*) {
+            .code_literal, .user_type => |literal| mapper.jsonString(literal),
+            .named => |named| mapper.jsonString(named.name),
+
+            .optional => |inner| try mapper.convertType(inner),
+
+            .record, .variant => |compound| blk: {
+                var fields = mapper.newObject();
+                errdefer fields.deinit();
+
+                for (compound.fields.keys(), compound.fields.values()) |name, field| {
+                    var field_type = try mapper.convertType(field.type);
+                    try fields.putNoClobber(
+                        mapper.strings.get(name),
+                        field_type,
+                    );
+                }
+
+                break :blk .{ .object = fields };
+            },
+        };
+
+        var jtype = mapper.newObject();
+        errdefer jtype.deinit();
+
+        try jtype.putNoClobber("kind", .{ .string = @tagName(stype.*) });
+        try jtype.putNoClobber("data", data);
+
+        return .{ .object = jtype };
+    }
+
+    fn jsonString(mapper: JsonMapper, string: ptk.strings.String) std.json.Value {
+        return .{ .string = mapper.strings.get(string) };
+    }
+
+    fn newObject(mapper: JsonMapper) std.json.ObjectMap {
+        return std.json.ObjectMap.init(mapper.allocator);
+    }
+
+    fn newArray(mapper: JsonMapper) std.json.Array {
+        return std.json.Array.init(mapper.allocator);
+    }
+};
diff --git a/src/ptkgen/sema_dump.zig b/src/ptkgen/dump/sema.zig
similarity index 98%
rename from src/ptkgen/sema_dump.zig
rename to src/ptkgen/dump/sema.zig
index 3fe684f..f338341 100644
--- a/src/ptkgen/sema_dump.zig
+++ b/src/ptkgen/dump/sema.zig
@@ -1,8 +1,8 @@
 const std = @import("std");
 const ptk = @import("parser-toolkit");
 
-const sema = @import("sema.zig");
-const parser = @import("parser.zig");
+const sema = @import("../sema.zig");
+const parser = @import("../parser.zig");
 
 pub fn dump(strings: *const ptk.strings.Pool, grammar: sema.Grammar) void {
     var printer = SemaPrinter{
diff --git a/src/ptkgen/intl.zig b/src/ptkgen/intl.zig
index 13ff049..fa0e3d5 100644
--- a/src/ptkgen/intl.zig
+++ b/src/ptkgen/intl.zig
@@ -76,14 +76,40 @@ pub const Localization = struct {
 
         FileTooBig: []const u8,
         InvalidSourceEncoding: []const u8,
+
+        DiskQuota: []const u8,
+        NoSpaceLeft: []const u8,
+        DeviceBusy: []const u8,
+        InvalidArgument: []const u8,
+        NotOpenForWriting: []const u8,
+        LockViolation: []const u8,
+        ProcessFdQuotaExceeded: []const u8,
+        SystemFdQuotaExceeded: []const u8,
+        SharingViolation: []const u8,
+        PathAlreadyExists: []const u8,
+        FileNotFound: []const u8,
+        PipeBusy: []const u8,
+        NameTooLong: []const u8,
+        InvalidUtf8: []const u8,
+        BadPathName: []const u8,
+        NetworkNotFound: []const u8,
+        InvalidHandle: []const u8,
+        SymLinkLoop: []const u8,
+        NoDevice: []const u8,
+        NotDir: []const u8,
+        FileLocksNotSupported: []const u8,
+        FileBusy: []const u8,
+        LinkQuotaExceeded: []const u8,
+        ReadOnlyFileSystem: []const u8,
+        RenameAcrossMountPoints: []const u8,
     },
 
     pub fn generate(comptime buffer: []const u8) Localization {
         @setEvalBranchQuota(1_000_000);
 
-        var alloc_buf: [buffer.len]u8 = undefined;
+        var alloc_buf: [4 * buffer.len]u8 = undefined;
         var fba = std.heap.FixedBufferAllocator.init(&alloc_buf);
 
-        return std.json.parseFromSliceLeaky(Localization, fba.allocator(), buffer, .{}) catch @compileError("failed to parse json");
+        return std.json.parseFromSliceLeaky(Localization, fba.allocator(), buffer, .{}) catch |err| @compileError(std.fmt.comptimePrint("failed to parse json: {}", .{err}));
     }
 };
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index c0f5eb8..fe62e4d 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -49,6 +49,31 @@
         "NotOpenForReading": "not open for reading",
         "NetNameDeleted": "net name deleted",
         "FileTooBig": "Input file exceeds resources",
-        "InvalidSourceEncoding": "invalid source encoding"
+        "InvalidSourceEncoding": "invalid source encoding",
+        "DiskQuota": "disk quota",
+        "NoSpaceLeft": "no space left",
+        "DeviceBusy": "device busy",
+        "InvalidArgument": "invalid argument",
+        "NotOpenForWriting": "not open for writing",
+        "LockViolation": "lock violation",
+        "ProcessFdQuotaExceeded": "process fd quota exceeded",
+        "SystemFdQuotaExceeded": "system fd quota exceeded",
+        "SharingViolation": "sharing violation",
+        "PathAlreadyExists": "path already exists",
+        "FileNotFound": "file not found",
+        "PipeBusy": "pipe busy",
+        "NameTooLong": "name too long",
+        "InvalidUtf8": "invalid utf8",
+        "BadPathName": "bad path name",
+        "NetworkNotFound": "network not found",
+        "InvalidHandle": "invalid handle",
+        "SymLinkLoop": "sym link loop",
+        "NoDevice": "no device",
+        "NotDir": "not dir",
+        "FileLocksNotSupported": "file locks not supported",
+        "FileBusy": "file busy",
+        "LinkQuotaExceeded": "link quota exceeded",
+        "ReadOnlyFileSystem": "read only file system",
+        "RenameAcrossMountPoints": "rename across mount points"
     }
 }
\ No newline at end of file
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 42ba2c0..2983bd1 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -10,8 +10,9 @@ const ast = @import("ast.zig");
 const sema = @import("sema.zig");
 const intl = @import("intl.zig");
 const parser = @import("parser.zig");
-const ast_dump = @import("ast_dump.zig");
-const sema_dump = @import("sema_dump.zig");
+const ast_dump = @import("dump/ast.zig");
+const sema_dump = @import("dump/sema.zig");
+const json_dump = @import("dump/json.zig");
 
 const Diagnostics = @import("Diagnostics.zig");
 
@@ -20,11 +21,17 @@ comptime {
     _ = parser;
 }
 
+pub const Format = enum {
+    json,
+    // zig,
+};
+
 pub const CliOptions = struct {
     help: bool = false,
     output: ?[]const u8 = null,
     test_mode: TestMode = .none,
     trace: bool = false,
+    format: Format = .json,
 
     @"max-file-size": u32 = 4 * 1024, // 4 MB of source code is a lot!
 
@@ -47,6 +54,8 @@ pub const CliOptions = struct {
             .@"max-file-size" = "Maximum input file size in KiB (default: 4096)",
 
             .trace = "Prints a parse trace",
+
+            .format = "Selects the output format of the grammar. Can be one of [ json, zig ]",
         },
     };
 };
@@ -196,6 +205,7 @@ fn convertErrorToDiagnostics(diagnostics: *Diagnostics, file_name: []const u8, e
             }, .file_limit_exceeded, .{});
         },
 
+        // input errors:
         error.InputOutput,
         error.AccessDenied,
         error.BrokenPipe,
@@ -208,6 +218,33 @@ fn convertErrorToDiagnostics(diagnostics: *Diagnostics, file_name: []const u8, e
         error.ConnectionTimedOut,
         error.NotOpenForReading,
         error.NetNameDeleted,
+
+        // output errors:
+        error.DiskQuota,
+        error.NoSpaceLeft,
+        error.DeviceBusy,
+        error.InvalidArgument,
+        error.NotOpenForWriting,
+        error.LockViolation,
+        error.ProcessFdQuotaExceeded,
+        error.SystemFdQuotaExceeded,
+        error.SharingViolation,
+        error.PathAlreadyExists,
+        error.FileNotFound,
+        error.PipeBusy,
+        error.NameTooLong,
+        error.InvalidUtf8,
+        error.BadPathName,
+        error.NetworkNotFound,
+        error.InvalidHandle,
+        error.SymLinkLoop,
+        error.NoDevice,
+        error.NotDir,
+        error.FileLocksNotSupported,
+        error.FileBusy,
+        error.LinkQuotaExceeded,
+        error.ReadOnlyFileSystem,
+        error.RenameAcrossMountPoints,
         => |e| {
             try diagnostics.emit(.{
                 .source = file_name,
@@ -319,4 +356,43 @@ fn compileFile(
         std.debug.print("\n\nsema dump:\n", .{});
         sema_dump.dump(string_pool, grammar);
     }
+
+    if (options.test_mode != .none)
+        return;
+
+    // Output generation:
+    {
+        const use_stdout = (options.output == null) or std.mem.eql(u8, options.output.?, "-");
+
+        var atomic_output_file: std.fs.AtomicFile = undefined;
+        if (!use_stdout) {
+            atomic_output_file = try std.fs.cwd().atomicFile(options.output.?, .{});
+        }
+        defer if (!use_stdout)
+            atomic_output_file.deinit();
+
+        var output_file = if (use_stdout)
+            std.io.getStdOut()
+        else
+            atomic_output_file.file;
+
+        // write to output_file here:
+        switch (options.format) {
+            .json => {
+                var arena = std.heap.ArenaAllocator.init(allocator);
+                defer arena.deinit();
+
+                var json_repr: std.json.Value = try json_dump.createJsonValue(
+                    &arena,
+                    string_pool,
+                    grammar,
+                );
+
+                try std.json.stringify(json_repr, .{}, output_file.writer());
+            },
+        }
+
+        if (!use_stdout)
+            try atomic_output_file.finish();
+    }
 }

From ee986c1c5daab34c3bf790476d475fa6b4f46d32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Felix=20=22xq=22=20Quei=C3=9Fner?= <git@random-projects.net>
Date: Mon, 3 Jun 2024 20:11:39 +0200
Subject: [PATCH 20/20] Backup

---
 docs/semantics.md                             |  23 ++
 src/ptkgen/Diagnostics.zig                    |  53 +++
 src/ptkgen/dump/json.zig                      |  69 +++-
 src/ptkgen/intl/en.json                       |   9 +-
 src/ptkgen/main.zig                           |   7 +-
 src/ptkgen/sema.zig                           | 385 +++++++++++++++++-
 .../accept/map-simple-builtin-fncall-0.ptk    |   1 +
 .../accept/map-simple-builtin-fncall-1.ptk    |   1 +
 .../accept/map-simple-builtin-fncall-4.ptk    |   1 +
 .../accept/map-simple-code-literal.ptk        |   1 +
 test/analysis/accept/map-simple-list-0.ptk    |   1 +
 test/analysis/accept/map-simple-list-1.ptk    |   1 +
 test/analysis/accept/map-simple-list-4.ptk    |   1 +
 test/analysis/accept/map-simple-record-0.ptk  |   1 +
 test/analysis/accept/map-simple-record-1.ptk  |   1 +
 test/analysis/accept/map-simple-record-4.ptk  |   1 +
 test/analysis/accept/map-simple-ruleref.ptk   |   1 +
 .../accept/map-simple-user-fncall-0.ptk       |   1 +
 .../accept/map-simple-user-fncall-1.ptk       |   1 +
 .../accept/map-simple-user-fncall-4.ptk       |   1 +
 .../accept/map-simple-user-literal.ptk        |   1 +
 test/analysis/accept/map-simple-variant.ptk   |   1 +
 test/analysis/reject/map-ruleref-oob.ptk      |   2 +
 23 files changed, 555 insertions(+), 9 deletions(-)
 create mode 100644 docs/semantics.md
 create mode 100644 test/analysis/accept/map-simple-builtin-fncall-0.ptk
 create mode 100644 test/analysis/accept/map-simple-builtin-fncall-1.ptk
 create mode 100644 test/analysis/accept/map-simple-builtin-fncall-4.ptk
 create mode 100644 test/analysis/accept/map-simple-code-literal.ptk
 create mode 100644 test/analysis/accept/map-simple-list-0.ptk
 create mode 100644 test/analysis/accept/map-simple-list-1.ptk
 create mode 100644 test/analysis/accept/map-simple-list-4.ptk
 create mode 100644 test/analysis/accept/map-simple-record-0.ptk
 create mode 100644 test/analysis/accept/map-simple-record-1.ptk
 create mode 100644 test/analysis/accept/map-simple-record-4.ptk
 create mode 100644 test/analysis/accept/map-simple-ruleref.ptk
 create mode 100644 test/analysis/accept/map-simple-user-fncall-0.ptk
 create mode 100644 test/analysis/accept/map-simple-user-fncall-1.ptk
 create mode 100644 test/analysis/accept/map-simple-user-fncall-4.ptk
 create mode 100644 test/analysis/accept/map-simple-user-literal.ptk
 create mode 100644 test/analysis/accept/map-simple-variant.ptk
 create mode 100644 test/analysis/reject/map-ruleref-oob.ptk

diff --git a/docs/semantics.md b/docs/semantics.md
new file mode 100644
index 0000000..7d23443
--- /dev/null
+++ b/docs/semantics.md
@@ -0,0 +1,23 @@
+# PtkGen Semantics
+
+## Context References
+
+tl;dr: `$n` can access the elements of the top-level productions of a rule.
+
+```rb
+rule r = "hello" "world" => $0; # access "hello"
+rule r = "hello" "world" => $1; # access "world"
+```
+
+### Index Resolution
+
+1. Flatten hierarchy
+2. Use index in flattened list
+
+```rb
+rule r = a b   c d e    f g h; # [ a b  c d e      f g h ] => flat sequence
+rule r = a b ( c d e )? f g h; # [ a b  c? d? e?   f g h ] => `c`, `d`, `e` get promoted to optional)
+rule r = a b ( c d e )* f g h; # [ a b  [[c d e]]         f g h ] => `c d e` get promoted to list of lists ([[c d e], [c d e], ...])
+rule r = a b ( c d e )+ f g h; # [ a b  [[c d e]]         f g h ] => `c d e` get promoted to list of lists ([[c d e], [c d e], ...])
+rule r = a b ( c d e )  f g h; # [ a b  c d e  f g h ] => `c d e` gets flattened into the master list
+```
diff --git a/src/ptkgen/Diagnostics.zig b/src/ptkgen/Diagnostics.zig
index 16677f5..6559663 100644
--- a/src/ptkgen/Diagnostics.zig
+++ b/src/ptkgen/Diagnostics.zig
@@ -54,6 +54,18 @@ pub const Code = enum(u16) {
 
     duplicate_compound_field = 1307,
 
+    context_reference_out_of_bounds = 1308,
+
+    variant_does_not_exist = 1309,
+
+    record_field_does_not_exist = 1310,
+    record_field_already_initialized = 1311,
+    record_field_not_initialized = 1312,
+
+    mapping_requires_typed_rule = 1313,
+
+    invalid_builtin_function = 1314,
+
     // semantic warnings (4000-4099):
 
     missing_start_symbol = 4000,
@@ -221,6 +233,35 @@ pub fn Data(comptime code: Code) type {
             previous_location: ptk.Location,
         },
 
+        .context_reference_out_of_bounds => struct {
+            index: u32,
+            limit: u32,
+        },
+
+        .variant_does_not_exist => struct {
+            field: []const u8,
+            type_location: ptk.Location,
+        },
+
+        .record_field_does_not_exist => struct {
+            field: []const u8,
+            type_location: ptk.Location,
+        },
+        .record_field_already_initialized => struct {
+            field: []const u8,
+            prev_init: ptk.Location,
+        },
+        .record_field_not_initialized => struct {
+            field: []const u8,
+            field_location: ptk.Location,
+        },
+
+        .mapping_requires_typed_rule => NoDiagnosticData,
+
+        .invalid_builtin_function => struct {
+            name: []const u8,
+        },
+
         // else => @compileError(std.fmt.comptimePrint("Code {} has no diagnostic type associated!", .{code})),
     };
 }
@@ -342,6 +383,18 @@ fn Formatter(comptime T: type) type {
             }
         },
 
+        // integers:
+
+        u32 => struct {
+            value: T,
+            pub fn format(item: @This(), fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
+                _ = options;
+                _ = fmt;
+
+                try writer.print("{}", .{item.value});
+            }
+        },
+
         else => @compileError(std.fmt.comptimePrint("{s} is not a supported diagnostic type!", .{@typeName(T)})),
     };
 }
diff --git a/src/ptkgen/dump/json.zig b/src/ptkgen/dump/json.zig
index f777011..0da58ee 100644
--- a/src/ptkgen/dump/json.zig
+++ b/src/ptkgen/dump/json.zig
@@ -179,10 +179,71 @@ const JsonMapper = struct {
     }
 
     fn convertMapping(mapper: JsonMapper, mapping: sema.Mapping) error{OutOfMemory}!std.json.Value {
-        _ = mapping;
-        _ = mapper;
+        var jtype = mapper.newObject();
+        errdefer jtype.deinit();
+
+        try jtype.putNoClobber("kind", .{ .string = @tagName(mapping) });
+
+        switch (mapping) {
+            .record_initializer => |record_initializer| {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(record_initializer.fields.len);
+
+                for (list.items, record_initializer.fields) |*dst, src| {
+                    var jfield = mapper.newObject();
+                    errdefer jfield.deinit();
+
+                    try jfield.putNoClobber("field", mapper.jsonString(src.field.name));
+                    try jfield.putNoClobber("value", try mapper.convertMapping(src.value));
+
+                    dst.* = .{ .object = jfield };
+                }
+
+                try jtype.putNoClobber("fields", .{ .array = list });
+            },
+            .list_initializer => |list_initializer| {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(list_initializer.items.len);
+
+                for (list.items, list_initializer.items) |*dst, src| {
+                    dst.* = try mapper.convertMapping(src);
+                }
 
-        @panic("implement generation of mappings");
+                try jtype.putNoClobber("items", .{ .array = list });
+            },
+            .variant_initializer => |variant_initializer| {
+                try jtype.putNoClobber("field", mapper.jsonString(variant_initializer.field.name));
+                try jtype.putNoClobber("value", try mapper.convertMapping(variant_initializer.value.*));
+            },
+            .user_function_call, .builtin_function_call => |function_call| {
+                var list = mapper.newArray();
+                errdefer list.deinit();
+
+                try list.resize(function_call.arguments.len);
+
+                for (list.items, function_call.arguments) |*dst, src| {
+                    dst.* = try mapper.convertMapping(src);
+                }
+
+                try jtype.putNoClobber("arguments", .{ .array = list });
+
+                try jtype.putNoClobber("function", mapper.jsonString(function_call.function));
+            },
+
+            .code_literal, .user_literal => |literal| {
+                try jtype.putNoClobber("literal", mapper.jsonString(literal));
+            },
+
+            .context_reference => |context_reference| {
+                try jtype.putNoClobber("index", .{ .integer = context_reference.index });
+            },
+        }
+
+        return .{ .object = jtype };
     }
 
     fn convertType(mapper: JsonMapper, stype: *sema.Type) error{OutOfMemory}!std.json.Value {
@@ -206,6 +267,8 @@ const JsonMapper = struct {
 
                 break :blk .{ .object = fields };
             },
+
+            .token => .null,
         };
 
         var jtype = mapper.newObject();
diff --git a/src/ptkgen/intl/en.json b/src/ptkgen/intl/en.json
index fe62e4d..00ec0ea 100644
--- a/src/ptkgen/intl/en.json
+++ b/src/ptkgen/intl/en.json
@@ -30,7 +30,14 @@
         "reference_to_undeclared_pattern": "Reference to undeclared pattern '{[identifier]}'.",
         "missing_start_symbol": "Grammar file has no start symbol declared.",
         "multiple_start_symbols": "Another start rule '({[identifier]})' was already declared here: {[previous_location]}",
-        "duplicate_compound_field": "Another field named '{s}' was already declared here: {[previous_location]}"
+        "duplicate_compound_field": "Another field named '{[identifier]s}' was already declared here: {[previous_location]}",
+        "context_reference_out_of_bounds": "Context reference index out of bounds. {[index]} was given, but the highest possible index is {[limit]}.",
+        "variant_does_not_exist": "The variant field {[identifier]s} does not exist. The variant type is declared here: {[type_location]}",
+        "record_field_does_not_exist": "The record field {[field]s} does not exist. The record type is declared here: {[type_location]}",
+        "record_field_already_initialized": "The record field {[field]s} is already initialized. Previous initialization: {[prev_init]}",
+        "record_field_not_initialized": "The record field {[field]s} was not initialized. Field declared here: {[field_location]}",
+        "mapping_requires_typed_rule": "The use of a rule mapping requires that the rule has an explicitly declared type.",
+        "invalid_builtin_function": "The builtin function {[name]s} does not exist!"
     },
     "errors": {
         "SyntaxError": "syntax error",
diff --git a/src/ptkgen/main.zig b/src/ptkgen/main.zig
index 2983bd1..699b991 100644
--- a/src/ptkgen/main.zig
+++ b/src/ptkgen/main.zig
@@ -35,9 +35,12 @@ pub const CliOptions = struct {
 
     @"max-file-size": u32 = 4 * 1024, // 4 MB of source code is a lot!
 
+    dump: bool = false,
+
     pub const shorthands = .{
         .h = "help",
         .o = "output",
+        .D = "dump",
     };
 
     pub const meta = .{
@@ -56,6 +59,8 @@ pub const CliOptions = struct {
             .trace = "Prints a parse trace",
 
             .format = "Selects the output format of the grammar. Can be one of [ json, zig ]",
+
+            .dump = "Dumps results from parser and sema to stderr.",
         },
     };
 };
@@ -349,7 +354,7 @@ fn compileFile(
 
     // TODO: Implement parsergen / tablegen / highlightergen
 
-    if (options.test_mode == .none) {
+    if (options.dump) {
         std.debug.print("ast dump:\n", .{});
         ast_dump.dump(string_pool, tree);
 
diff --git a/src/ptkgen/sema.zig b/src/ptkgen/sema.zig
index ba18328..bd64364 100644
--- a/src/ptkgen/sema.zig
+++ b/src/ptkgen/sema.zig
@@ -54,6 +54,8 @@ pub const MappedProduction = struct {
     mapping: ?Mapping,
 };
 
+/// A production is a part of a grammar. Productions consume
+/// tokens and generate structure from this.
 pub const Production = union(enum) {
     terminal: *Pattern, // literal and terminal ast nodes are wrapped to this
     recursion: *Rule, // <rule>
@@ -63,8 +65,51 @@ pub const Production = union(enum) {
     repetition_one: *Production, // [ ... ]+
 };
 
-pub const Mapping = struct {
-    //
+pub const Mapping = union(enum) {
+    record_initializer: RecordInitializer, // { a = b, c = d, ... }
+    list_initializer: ListInitializer, // [ a, b, c, ... ]
+    variant_initializer: VariantInitializer, // field: ...
+
+    user_function_call: FunctionCall, // @builtin(a,b,c)
+    builtin_function_call: FunctionCall, // identifier(a,b,c)
+
+    code_literal: String, // `code`
+    user_literal: String, // @user_data
+
+    context_reference: ContextReference, // $0
+};
+
+pub const ContextReference = struct {
+    index: u32,
+    production: *Production,
+    type: *Type,
+};
+
+const RecordInitializer = struct {
+    type: *Type,
+    fields: []FieldInitializer,
+};
+
+const FieldInitializer = struct {
+    field: *Field,
+    value: Mapping,
+};
+
+const ListInitializer = struct {
+    type: *Type,
+    items: []Mapping,
+};
+
+const VariantInitializer = struct {
+    type: *Type,
+    field: *Field,
+    value: *Mapping,
+};
+
+const FunctionCall = struct {
+    return_type: ?*Type,
+    function: String,
+    arguments: []Mapping,
 };
 
 pub const Node = struct {
@@ -101,6 +146,9 @@ pub const Type = union(enum) {
     // ast nodes are basically "named types" and must be handled as such
     named: *Node,
 
+    // builtin types:
+    token, // points to a PTK token
+
     pub fn id(t: *const Type) TypeId {
         return @as(TypeId, t.*);
     }
@@ -114,7 +162,7 @@ pub const CompoundType = struct {
 
 pub const Field = struct {
     location: ptk.Location,
-    // name: String,
+    name: String,
     type: *Type,
 };
 
@@ -169,6 +217,8 @@ var BAD_TYPE_SENTINEL: Type = undefined;
 var BAD_NODE_SENTINEL: Node = undefined;
 var BAD_RULE_SENTINEL: Rule = undefined;
 var BAD_PATTERN_SENTINEL: Pattern = undefined;
+var BAD_PRODUCTION_SENTINEL: Production = undefined;
+var BAD_FIELD_SENTINEL: Field = undefined;
 
 fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
     // Phase 0: Validate productions on legality (coarse error checking)
@@ -193,7 +243,8 @@ fn innerAnalysis(analyzer: *Analyzer) AnalyzeError!void {
     try analyzer.iterateOn(.rule, Analyzer.instantiateRules);
 
     // Phase 5: Instantiate and validate AST mappings
-
+    try analyzer.iterateOn(.rule, Analyzer.instantiateMappings); // Create data structures
+    try analyzer.iterateOn(.rule, Analyzer.linkAndValidateMappedProductions); // Validate if data tr
 }
 
 const Analyzer = struct {
@@ -494,6 +545,313 @@ const Analyzer = struct {
         }
     }
 
+    fn instantiateMappings(analyzer: *Analyzer, ast_rule: *ast.Rule) !void {
+        const sem_rule: *Rule = analyzer.target.rules.get(ast_rule.name.value).?;
+
+        var iter = ast.iterate(ast_rule.productions);
+
+        for (sem_rule.productions) |*sem_prod| {
+            const ast_prod = iter.next().?;
+            sem_prod.mapping = if (ast_prod.mapping) |src_mapping|
+                try analyzer.translateMapping(src_mapping)
+            else
+                null;
+        }
+        std.debug.assert(iter.next() == null);
+    }
+
+    fn translateMapping(analyzer: *Analyzer, ast_mapping: ast.AstMapping) error{OutOfMemory}!Mapping {
+        switch (ast_mapping) {
+            .literal => |ref| return Mapping{ .code_literal = ref.value },
+            .user_reference => |ref| return Mapping{ .code_literal = ref.value },
+
+            .context_reference => |ast_context_reference| {
+                return Mapping{
+                    .context_reference = .{
+                        .index = ast_context_reference.index,
+                        .production = &BAD_PRODUCTION_SENTINEL,
+                        .type = &BAD_TYPE_SENTINEL,
+                    },
+                };
+            },
+
+            inline .user_function_call, .function_call => |function_call| {
+                const function_name = function_call.function.value;
+
+                var args = try analyzer.target.arena.allocator().alloc(Mapping, function_call.arguments.len());
+                errdefer analyzer.target.arena.allocator().free(args);
+
+                var iter = ast.iterate(function_call.arguments);
+                for (args) |*item| {
+                    const src = iter.next().?;
+                    item.* = try analyzer.translateMapping(src.*);
+                }
+                std.debug.assert(iter.next() == null);
+
+                const fncall = FunctionCall{
+                    .arguments = args,
+                    .function = function_name,
+                    .return_type = null,
+                };
+
+                return switch (ast_mapping) {
+                    .user_function_call => Mapping{ .user_function_call = fncall },
+                    .function_call => Mapping{ .builtin_function_call = fncall },
+                    else => unreachable,
+                };
+            },
+
+            .variant => |ast_variant| {
+                const init_expr = try analyzer.translateMapping(ast_variant.value.*);
+
+                // ast_variant.field.value
+                return Mapping{
+                    .variant_initializer = .{
+                        .type = &BAD_TYPE_SENTINEL,
+                        .field = &BAD_FIELD_SENTINEL,
+                        .value = try moveToHeap(&analyzer.target.arena, Mapping, init_expr),
+                    },
+                };
+            },
+
+            .list => |ast_list| {
+                var items = try analyzer.target.arena.allocator().alloc(Mapping, ast_list.len());
+                errdefer analyzer.target.arena.allocator().free(items);
+
+                var iter = ast.iterate(ast_list);
+                for (items) |*item| {
+                    const src = iter.next().?;
+                    item.* = try analyzer.translateMapping(src.*);
+                }
+                std.debug.assert(iter.next() == null);
+
+                return Mapping{
+                    .list_initializer = .{
+                        .items = items,
+                        .type = &BAD_TYPE_SENTINEL,
+                    },
+                };
+            },
+
+            .record => |ast_record| {
+                var fields = try analyzer.target.arena.allocator().alloc(FieldInitializer, ast_record.len());
+                errdefer analyzer.target.arena.allocator().free(fields);
+
+                var iter = ast.iterate(ast_record);
+                for (fields) |*item| {
+                    const src = iter.next().?;
+                    const field_name = src.field.value;
+                    _ = field_name;
+                    item.* = .{
+                        .field = &BAD_FIELD_SENTINEL,
+                        .value = try analyzer.translateMapping(src.value.*),
+                    };
+                }
+                std.debug.assert(iter.next() == null);
+
+                return Mapping{
+                    .record_initializer = .{
+                        .fields = fields,
+                        .type = &BAD_TYPE_SENTINEL,
+                    },
+                };
+            },
+        }
+    }
+
+    const TypeTransform = struct {
+        optional: bool = false,
+        sequence: bool = false,
+
+        pub fn add(tt: TypeTransform, comptime field: enum { optional, sequence }) TypeTransform {
+            var copy = tt;
+            @field(copy, @tagName(field)) = true;
+            return copy;
+        }
+
+        pub fn format(tt: TypeTransform, fmt: []const u8, opt: std.fmt.FormatOptions, writer: anytype) !void {
+            _ = fmt;
+            _ = opt;
+            var list = std.BoundedArray([]const u8, 2){};
+
+            if (tt.optional) list.appendAssumeCapacity("opt");
+            if (tt.sequence) list.appendAssumeCapacity("seq");
+
+            try writer.writeAll("TypeTransform(");
+
+            if (list.len == 0) {
+                try writer.writeAll("none");
+            } else {
+                for (list.slice(), 0..) |item, i| {
+                    if (i > 0)
+                        try writer.writeAll(",");
+                    try writer.writeAll(item);
+                }
+            }
+
+            try writer.writeAll(")");
+        }
+    };
+
+    const IndexedProd = struct {
+        transform: TypeTransform,
+        production: *Production,
+    };
+
+    const ProductionIndex = std.ArrayList(IndexedProd);
+
+    fn linkAndValidateMappedProductions(analyzer: *Analyzer, ast_rule: *ast.Rule) !void {
+        const sem_rule: *Rule = analyzer.target.rules.get(ast_rule.name.value).?;
+
+        const has_any_mapping = for (sem_rule.productions) |prod| {
+            if (prod.mapping != null)
+                break true;
+        } else false;
+
+        if (has_any_mapping and sem_rule.type == null) {
+            try analyzer.emitDiagnostic(sem_rule.location, .mapping_requires_typed_rule, .{});
+            return;
+        }
+
+        if (!has_any_mapping) {
+            // We're done here, nothing to link and validate.
+            return;
+        }
+
+        const rule_type = sem_rule.type.?;
+
+        var iter = ast.iterate(ast_rule.productions);
+
+        var prod_index = ProductionIndex.init(analyzer.arena);
+        defer prod_index.deinit();
+
+        for (sem_rule.productions) |*sem_prod| {
+            const ast_prod = iter.next().?;
+
+            if (ast_prod.mapping) |src_mapping| {
+                const dst_mapping = &sem_prod.mapping.?;
+
+                // Rebuild index:
+                prod_index.shrinkRetainingCapacity(0);
+                try analyzer.rebuildProductionIndex(&prod_index, &sem_prod.production, .{});
+
+                std.debug.print("index:\n", .{});
+                for (0.., prod_index.items) |index, item| {
+                    std.debug.print("[{}]: {} {s}\n", .{ index, item.transform, @tagName(item.production.*) });
+                }
+
+                try analyzer.linkAndValidateMapping(
+                    rule_type,
+                    dst_mapping,
+                    src_mapping,
+                    prod_index.items,
+                );
+            } else {
+                std.debug.assert(sem_prod.mapping == null);
+            }
+        }
+
+        std.debug.assert(iter.next() == null);
+    }
+
+    fn rebuildProductionIndex(analyzer: *Analyzer, prod_index: *ProductionIndex, production: *Production, transform: TypeTransform) error{OutOfMemory}!void {
+        switch (production.*) {
+            // Those are terminals and will be appended as-is:
+            .terminal => try prod_index.append(.{ .production = production, .transform = transform }),
+            .recursion => try prod_index.append(.{ .production = production, .transform = transform }),
+
+            // Sequences are unwrapped:
+            .sequence => |list| for (list) |*inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform);
+            },
+
+            // They just "recurse" into their inner workings, but annotate type changes:
+            .optional => |inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.optional));
+            },
+
+            .repetition_zero => |inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.sequence));
+            },
+
+            .repetition_one => |inner_prod| {
+                try analyzer.rebuildProductionIndex(prod_index, inner_prod, transform.add(.sequence));
+            },
+        }
+    }
+
+    fn linkAndValidateMapping(analyzer: *Analyzer, type_context: *Type, sem_map: *Mapping, ast_map: ast.AstMapping, production_index: []const IndexedProd) !void {
+        _ = type_context;
+
+        switch (sem_map.*) {
+            // Always fine, and terminate recursion:
+            .code_literal, .user_literal => {},
+
+            // Rule refs:
+
+            .context_reference => |*context_reference| {
+                if (context_reference.index >= production_index.len) {
+                    context_reference.production = &BAD_PRODUCTION_SENTINEL;
+                    try analyzer.emitDiagnostic(ast_map.context_reference.location, .context_reference_out_of_bounds, .{
+                        .index = context_reference.index,
+                        .limit = @as(u32, @truncate(production_index.len - 1)), // should never underflow as empty rules are illegal
+                    });
+                    return;
+                }
+
+                context_reference.production = production_index[context_reference.index].production;
+
+                const base_type: *Type = switch (context_reference.production.*) {
+                    //
+                    .terminal => blk: {
+                        var proto: Type = .token;
+                        const canon = try analyzer.getCanonicalType(&proto);
+                        std.debug.assert(canon != &proto);
+                        break :blk canon;
+                    },
+
+                    // Invocations of other
+                    .recursion => |rule| rule.type,
+
+                    .sequence,
+                    .optional,
+                    .repetition_zero,
+                    .repetition_one,
+                    => unreachable, // we should not be able to reach those
+
+                };
+
+                // TODO: Transform type for context reference
+
+                context_reference.type = base_type;
+            },
+
+            // Calls:
+
+            .user_function_call => |*user_function_call| {
+                _ = user_function_call;
+            },
+
+            .builtin_function_call => |*builtin_function_call| {
+                _ = builtin_function_call;
+            },
+
+            // Compounds:
+
+            .record_initializer => |*record_initializer| {
+                _ = record_initializer;
+            },
+
+            .list_initializer => |*list_initializer| {
+                _ = list_initializer;
+            },
+
+            .variant_initializer => |*variant_initializer| {
+                _ = variant_initializer;
+            },
+        }
+    }
+
     /// Checks if the given type is semantically ok or emits compiler errors if not.
     fn validateType(analyzer: *Analyzer, type_node: *Type) error{OutOfMemory}!void {
         if (type_node == &BAD_TYPE_SENTINEL) {
@@ -547,6 +905,7 @@ const Analyzer = struct {
             gop_result.value_ptr.* = .{
                 .type = field_type,
                 .location = field_def.location,
+                .name = field_def.name.value,
             };
         }
 
@@ -584,6 +943,10 @@ const Analyzer = struct {
         errdefer if (compound_type) |ct|
             analyzer.destroyCompoundType(ct);
 
+        return try analyzer.getCanonicalType(&proto_type);
+    }
+
+    fn getCanonicalType(analyzer: Analyzer, proto_type: *Type) error{OutOfMemory}!?*Type {
         if (analyzer.getUniqueTypeHandle(&proto_type)) |resolved_type| {
             analyzer.deduplicated_type_count += 1;
             // logger.debug("deduplicated a {s}", .{@tagName(resolved_type.*)});
@@ -678,3 +1041,17 @@ const TypeContext = struct {
         return hasher.final();
     }
 };
+
+fn moveToHeap(arena: *std.heap.ArenaAllocator, comptime T: type, template: T) error{OutOfMemory}!*T {
+    const dupe = try arena.allocator().create(T);
+    dupe.* = template;
+    return dupe;
+}
+
+pub const BuiltinFunction = struct {
+    name: []const u8,
+};
+
+pub const builtins = struct {
+    pub const foo = BuiltinFunction{ .name = "foo" };
+};
diff --git a/test/analysis/accept/map-simple-builtin-fncall-0.ptk b/test/analysis/accept/map-simple-builtin-fncall-0.ptk
new file mode 100644
index 0000000..b4d4eec
--- /dev/null
+++ b/test/analysis/accept/map-simple-builtin-fncall-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => builtin();
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-builtin-fncall-1.ptk b/test/analysis/accept/map-simple-builtin-fncall-1.ptk
new file mode 100644
index 0000000..21ebc7f
--- /dev/null
+++ b/test/analysis/accept/map-simple-builtin-fncall-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => builtin(`1`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-builtin-fncall-4.ptk b/test/analysis/accept/map-simple-builtin-fncall-4.ptk
new file mode 100644
index 0000000..09e4372
--- /dev/null
+++ b/test/analysis/accept/map-simple-builtin-fncall-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => builtin(`1`, `2`, `3`, `4`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-code-literal.ptk b/test/analysis/accept/map-simple-code-literal.ptk
new file mode 100644
index 0000000..475f0a4
--- /dev/null
+++ b/test/analysis/accept/map-simple-code-literal.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => `code`;
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-list-0.ptk b/test/analysis/accept/map-simple-list-0.ptk
new file mode 100644
index 0000000..dffe97f
--- /dev/null
+++ b/test/analysis/accept/map-simple-list-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { };
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-list-1.ptk b/test/analysis/accept/map-simple-list-1.ptk
new file mode 100644
index 0000000..ab4e2c2
--- /dev/null
+++ b/test/analysis/accept/map-simple-list-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { `1` };
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-list-4.ptk b/test/analysis/accept/map-simple-list-4.ptk
new file mode 100644
index 0000000..3f970b9
--- /dev/null
+++ b/test/analysis/accept/map-simple-list-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { `1`, `2`, `3`, `4` };
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-record-0.ptk b/test/analysis/accept/map-simple-record-0.ptk
new file mode 100644
index 0000000..8f1a98c
--- /dev/null
+++ b/test/analysis/accept/map-simple-record-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { };
diff --git a/test/analysis/accept/map-simple-record-1.ptk b/test/analysis/accept/map-simple-record-1.ptk
new file mode 100644
index 0000000..4cf6bfd
--- /dev/null
+++ b/test/analysis/accept/map-simple-record-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { field = `1` };
diff --git a/test/analysis/accept/map-simple-record-4.ptk b/test/analysis/accept/map-simple-record-4.ptk
new file mode 100644
index 0000000..5f03773
--- /dev/null
+++ b/test/analysis/accept/map-simple-record-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => { x = `1`, y = `2`, z = `3`, w = `4` };
diff --git a/test/analysis/accept/map-simple-ruleref.ptk b/test/analysis/accept/map-simple-ruleref.ptk
new file mode 100644
index 0000000..4e0bc07
--- /dev/null
+++ b/test/analysis/accept/map-simple-ruleref.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => $0;
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-fncall-0.ptk b/test/analysis/accept/map-simple-user-fncall-0.ptk
new file mode 100644
index 0000000..82eb16e
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-fncall-0.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @userFn();
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-fncall-1.ptk b/test/analysis/accept/map-simple-user-fncall-1.ptk
new file mode 100644
index 0000000..b6b55fe
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-fncall-1.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @userFn(`1`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-fncall-4.ptk b/test/analysis/accept/map-simple-user-fncall-4.ptk
new file mode 100644
index 0000000..ab0bcb2
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-fncall-4.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @userFn(`1`, `2`, `3`, `4`);
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-user-literal.ptk b/test/analysis/accept/map-simple-user-literal.ptk
new file mode 100644
index 0000000..afef9ad
--- /dev/null
+++ b/test/analysis/accept/map-simple-user-literal.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => @externalThingy;
\ No newline at end of file
diff --git a/test/analysis/accept/map-simple-variant.ptk b/test/analysis/accept/map-simple-variant.ptk
new file mode 100644
index 0000000..229b3cb
--- /dev/null
+++ b/test/analysis/accept/map-simple-variant.ptk
@@ -0,0 +1 @@
+rule basic = "hello" => field: `code`;
\ No newline at end of file
diff --git a/test/analysis/reject/map-ruleref-oob.ptk b/test/analysis/reject/map-ruleref-oob.ptk
new file mode 100644
index 0000000..8af2ba4
--- /dev/null
+++ b/test/analysis/reject/map-ruleref-oob.ptk
@@ -0,0 +1,2 @@
+# expected: E1308
+rule basic = "hello" => $1;
\ No newline at end of file