From 6cedddd00873dc98317f5dad3cf1b5dfe51f0f14 Mon Sep 17 00:00:00 2001 From: Cameron Ross Date: Mon, 2 Mar 2020 22:08:55 -0400 Subject: [PATCH] initial implementation of YAML schemas --- examples/yaml_bench/yaml_bench.d | 2 +- source/dyaml/composer.d | 67 +++++++------ source/dyaml/dumper.d | 17 ++-- source/dyaml/loader.d | 25 ++--- source/dyaml/package.d | 1 + source/dyaml/resolver.d | 166 +++++++++---------------------- source/dyaml/schema.d | 89 +++++++++++++++++ 7 files changed, 196 insertions(+), 171 deletions(-) create mode 100644 source/dyaml/schema.d diff --git a/examples/yaml_bench/yaml_bench.d b/examples/yaml_bench/yaml_bench.d index bb8446e2..a140a518 100644 --- a/examples/yaml_bench/yaml_bench.d +++ b/examples/yaml_bench/yaml_bench.d @@ -101,7 +101,7 @@ void main(string[] args) //@safe { // Instead of constructing a resolver/constructor with each Loader, // construct them once to remove noise when profiling. - auto resolver = Resolver.withDefaultResolvers; + auto resolver = Resolver(DefaultSchema); auto constructTime = stopWatch.peek(); diff --git a/source/dyaml/composer.d b/source/dyaml/composer.d index c000b027..2a37239e 100644 --- a/source/dyaml/composer.d +++ b/source/dyaml/composer.d @@ -42,13 +42,8 @@ class ComposerException : MarkedYAMLException struct Composer { private: - ///Parser providing YAML events. - Parser parser_; - ///Resolver resolving tags (data types). - Resolver resolver_; ///Nodes associated with anchors. Used by YAML aliases. Node[string] anchors_; - ///Used to reduce allocations when creating pair arrays. /// ///We need one appender for each nesting level that involves @@ -62,7 +57,13 @@ struct Composer ///part of the outer levels. Used as a stack. Appender!(Node[])[] nodeAppenders_; + package: + ///Parser providing YAML events. + Parser parser; + ///Resolver resolving tags (data types). + Resolver resolver; public: + @disable this(); /** * Construct a composer. * @@ -71,8 +72,8 @@ struct Composer */ this(Parser parser, Resolver resolver) @safe { - parser_ = parser; - resolver_ = resolver; + this.parser = parser; + this.resolver = resolver; } /** @@ -83,17 +84,17 @@ struct Composer bool checkNode() @safe { // If next event is stream start, skip it - parser_.skipOver!"a.id == b"(EventID.streamStart); + parser.skipOver!"a.id == b"(EventID.streamStart); //True if there are more documents available. - return parser_.front.id != EventID.streamEnd; + return parser.front.id != EventID.streamEnd; } ///Get a YAML document as a node (the root of the document). Node getNode() @safe { //Get the root node of the next document. - assert(parser_.front.id != EventID.streamEnd, + assert(parser.front.id != EventID.streamEnd, "Trying to get a node from Composer when there is no node to " ~ "get. use checkNode() to determine if there is a node."); @@ -104,7 +105,7 @@ struct Composer void skipExpected(const EventID id) @safe { - const foundExpected = parser_.skipOver!"a.id == b"(id); + const foundExpected = parser.skipOver!"a.id == b"(id); assert(foundExpected, text("Expected ", id, " not found.")); } ///Ensure that appenders for specified nesting levels exist. @@ -144,10 +145,10 @@ struct Composer /// nodeAppenderLevel = Current level of the node appender stack. Node composeNode(const uint pairAppenderLevel, const uint nodeAppenderLevel) @safe { - if(parser_.front.id == EventID.alias_) + if(parser.front.id == EventID.alias_) { - const event = parser_.front; - parser_.popFront(); + const event = parser.front; + parser.popFront(); const anchor = event.anchor; enforce((anchor in anchors_) !is null, new ComposerException("Found undefined alias: " ~ anchor, @@ -163,7 +164,7 @@ struct Composer return anchors_[anchor]; } - const event = parser_.front; + const event = parser.front; const anchor = event.anchor; if((anchor !is null) && (anchor in anchors_) !is null) { @@ -179,7 +180,7 @@ struct Composer anchors_[anchor] = Node(); } - switch (parser_.front.id) + switch (parser.front.id) { case EventID.scalar: result = composeScalarNode(); @@ -203,9 +204,9 @@ struct Composer ///Compose a scalar node. Node composeScalarNode() @safe { - const event = parser_.front; - parser_.popFront(); - const tag = resolver_.resolve(NodeID.scalar, event.tag, event.value, + const event = parser.front; + parser.popFront(); + const tag = resolver.resolve(NodeID.scalar, event.tag, event.value, event.implicit); Node node = constructNode(event.startMark, event.endMark, tag, @@ -225,20 +226,20 @@ struct Composer ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); auto nodeAppender = &(nodeAppenders_[nodeAppenderLevel]); - const startEvent = parser_.front; - parser_.popFront(); - const tag = resolver_.resolve(NodeID.sequence, startEvent.tag, null, + const startEvent = parser.front; + parser.popFront(); + const tag = resolver.resolve(NodeID.sequence, startEvent.tag, null, startEvent.implicit); - while(parser_.front.id != EventID.sequenceEnd) + while(parser.front.id != EventID.sequenceEnd) { nodeAppender.put(composeNode(pairAppenderLevel, nodeAppenderLevel + 1)); } - Node node = constructNode(startEvent.startMark, parser_.front.endMark, + Node node = constructNode(startEvent.startMark, parser.front.endMark, tag, nodeAppender.data.dup); node.collectionStyle = startEvent.collectionStyle; - parser_.popFront(); + parser.popFront(); nodeAppender.clear(); return node; @@ -329,14 +330,14 @@ struct Composer @safe { ensureAppendersExist(pairAppenderLevel, nodeAppenderLevel); - const startEvent = parser_.front; - parser_.popFront(); - const tag = resolver_.resolve(NodeID.mapping, startEvent.tag, null, + const startEvent = parser.front; + parser.popFront(); + const tag = resolver.resolve(NodeID.mapping, startEvent.tag, null, startEvent.implicit); auto pairAppender = &(pairAppenders_[pairAppenderLevel]); Tuple!(Node, Mark)[] toMerge; - while(parser_.front.id != EventID.mappingEnd) + while(parser.front.id != EventID.mappingEnd) { auto pair = Node.Pair(composeNode(pairAppenderLevel + 1, nodeAppenderLevel), composeNode(pairAppenderLevel + 1, nodeAppenderLevel)); @@ -344,7 +345,7 @@ struct Composer //Need to flatten and merge the node referred by YAMLMerge. if(pair.key.type == NodeType.merge) { - toMerge ~= tuple(pair.value, cast(Mark)parser_.front.endMark); + toMerge ~= tuple(pair.value, cast(Mark)parser.front.endMark); } //Not YAMLMerge, just add the pair. else @@ -362,12 +363,12 @@ struct Composer .uniq!((x,y) => x.key == y.key) .walkLength; enforce(numUnique == pairAppender.data.length, - new ComposerException("Duplicate key found in mapping", parser_.front.startMark)); + new ComposerException("Duplicate key found in mapping", parser.front.startMark)); - Node node = constructNode(startEvent.startMark, parser_.front.endMark, + Node node = constructNode(startEvent.startMark, parser.front.endMark, tag, pairAppender.data.dup); node.collectionStyle = startEvent.collectionStyle; - parser_.popFront(); + parser.popFront(); pairAppender.clear(); return node; diff --git a/source/dyaml/dumper.d b/source/dyaml/dumper.d index 51f232fe..b456342a 100644 --- a/source/dyaml/dumper.d +++ b/source/dyaml/dumper.d @@ -22,6 +22,7 @@ import dyaml.linebreak; import dyaml.node; import dyaml.representer; import dyaml.resolver; +import dyaml.schema; import dyaml.serializer; import dyaml.style; import dyaml.tagdirective; @@ -37,9 +38,11 @@ import dyaml.tagdirective; */ auto dumper() { - auto dumper = Dumper(); - dumper.resolver = Resolver.withDefaultResolvers; - return dumper; + return dumper(DefaultSchema); +} +auto dumper(Schema schema) +{ + return Dumper(Resolver(schema)); } struct Dumper @@ -73,8 +76,10 @@ struct Dumper // Default style for collection nodes. If style is $(D CollectionStyle.invalid), the _style is chosen automatically. CollectionStyle defaultCollectionStyle = CollectionStyle.invalid; - @disable bool opEquals(ref Dumper); - @disable int opCmp(ref Dumper); + @disable this(); + this(Resolver resolver) pure @safe{ + this.resolver = resolver; + } ///Set indentation width. 2 by default. Must not be zero. @property void indent(uint indent) pure @safe nothrow @@ -195,7 +200,7 @@ struct Dumper import std.regex : regex; auto node = Node([1, 2, 3, 4, 5]); auto dumper = dumper(); - dumper.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); + dumper.resolver.addRule(SchemaRule("!tag", regex("A.*"), "A")); dumper.dump(new Appender!string(), node); } /// Set default scalar style diff --git a/source/dyaml/loader.d b/source/dyaml/loader.d index c7703e66..3a705711 100644 --- a/source/dyaml/loader.d +++ b/source/dyaml/loader.d @@ -22,6 +22,7 @@ import dyaml.parser; import dyaml.reader; import dyaml.resolver; import dyaml.scanner; +import dyaml.schema; import dyaml.token; @@ -36,9 +37,9 @@ struct Loader // Processes character data to YAML tokens. Scanner scanner_; // Processes tokens to YAML events. - Parser parser_; - // Resolves tags (data types). - Resolver resolver_; + //Parser parser_; + // + Composer composer_; // Name of the input file or stream, used in error messages. string name_ = ""; // Are we done loading? @@ -147,12 +148,12 @@ struct Loader /// Ditto private this(ubyte[] yamlData) @safe { - resolver_ = Resolver.withDefaultResolvers; + auto resolver = Resolver(DefaultSchema); try { auto reader_ = new Reader(yamlData); scanner_ = Scanner(reader_); - parser_ = new Parser(scanner_); + composer_ = Composer(new Parser(scanner_), resolver); } catch(YAMLException e) { @@ -171,7 +172,7 @@ struct Loader /// Specify custom Resolver to use. auto ref resolver() pure @safe nothrow @nogc { - return resolver_; + return composer_.resolver; } /** Load single YAML document. @@ -213,20 +214,16 @@ struct Loader * * Reads the next document from the stream, if possible. */ - void popFront() @safe + void popFront() @trusted { - // Composer initialization is done here in case the constructor is - // modified, which is a pretty common case. - static Composer composer; if (!rangeInitialized) { - composer = Composer(parser_, resolver_); rangeInitialized = true; } assert(!done_, "Loader.popFront called on empty range"); - if (composer.checkNode()) + if (composer_.checkNode()) { - currentNode = composer.getNode(); + currentNode = composer_.getNode(); } else { @@ -268,7 +265,7 @@ struct Loader // Parse and return all events. Used for debugging. auto parse() @safe { - return parser_; + return composer_.parser; } } /// Load single YAML document from a file: diff --git a/source/dyaml/package.d b/source/dyaml/package.d index e61b716a..2cf79342 100644 --- a/source/dyaml/package.d +++ b/source/dyaml/package.d @@ -11,5 +11,6 @@ public import dyaml.exception; public import dyaml.linebreak; public import dyaml.loader; public import dyaml.resolver; +public import dyaml.schema; public import dyaml.style; public import dyaml.node; diff --git a/source/dyaml/resolver.d b/source/dyaml/resolver.d index ceed1e51..8c6984ca 100644 --- a/source/dyaml/resolver.d +++ b/source/dyaml/resolver.d @@ -1,5 +1,6 @@ // Copyright Ferdinand Majerech 2011. +// Copyright Cameron Ross 2020. // Distributed under the Boost Software License, Version 1.0. // (See accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) @@ -15,128 +16,47 @@ module dyaml.resolver; -import std.conv; import std.regex; -import std.typecons; -import std.utf; import dyaml.node; -import dyaml.exception; +import dyaml.schema; -/// Type of `regexes` -private alias RegexType = Tuple!(string, "tag", const Regex!char, "regexp", string, "chars"); - -private immutable RegexType[] regexes; - -shared static this() @safe -{ - RegexType[] tmp; - tmp ~= RegexType("tag:yaml.org,2002:bool", - regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~ - "|false|False|FALSE|on|On|ON|off|Off|OFF)$"), - "yYnNtTfFoO"); - tmp ~= RegexType("tag:yaml.org,2002:float", - regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~ - "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~ - "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~ - "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~ - "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~ - "(?:nan|NaN|NAN))$"), - "-+0123456789."); - tmp ~= RegexType("tag:yaml.org,2002:int", - regex(r"^(?:[-+]?0b[0-1_]+" ~ - "|[-+]?0[0-7_]+" ~ - "|[-+]?(?:0|[1-9][0-9_]*)" ~ - "|[-+]?0x[0-9a-fA-F_]+" ~ - "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"), - "-+0123456789"); - tmp ~= RegexType("tag:yaml.org,2002:merge", regex(r"^<<$"), "<"); - tmp ~= RegexType("tag:yaml.org,2002:null", - regex(r"^$|^(?:~|null|Null|NULL)$"), "~nN\0"); - tmp ~= RegexType("tag:yaml.org,2002:timestamp", - regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~ - "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~ - "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~ - "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~ - "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~ - "[0-9]?(?::[0-9][0-9])?)?$"), - "0123456789"); - tmp ~= RegexType("tag:yaml.org,2002:value", regex(r"^=$"), "="); - - - //The following resolver is only for documentation purposes. It cannot work - //because plain scalars cannot start with '!', '&', or '*'. - tmp ~= RegexType("tag:yaml.org,2002:yaml", regex(r"^(?:!|&|\*)$"), "!&*"); - - regexes = () @trusted { return cast(immutable)tmp; }(); -} - /** - * Resolves YAML tags (data types). - * - * Can be used to implicitly resolve custom data types of scalar values. + * Used to implicitly resolve tags of scalar values, according to sets of rules + * known as schemas. */ struct Resolver { private: - // Default tag to use for scalars. - string defaultScalarTag_ = "tag:yaml.org,2002:str"; - // Default tag to use for sequences. - string defaultSequenceTag_ = "tag:yaml.org,2002:seq"; - // Default tag to use for mappings. - string defaultMappingTag_ = "tag:yaml.org,2002:map"; + Schema schema; + /// Arrays of schema rules indexed by their starting characters. + const(SchemaRule)[][dchar] yamlImplicitResolvers_; - /* - * Arrays of scalar resolver tuples indexed by starting character of a scalar. - * - * Each tuple stores regular expression the scalar must match, - * and tag to assign to it if it matches. - */ - Tuple!(string, const Regex!char)[][dchar] yamlImplicitResolvers_; - - package: - static auto withDefaultResolvers() @safe - { - Resolver resolver; - foreach(pair; regexes) + @disable this(); + public: + this(Schema schema) @safe pure { + this.schema = schema; + foreach(tagResolver; schema.rules) { - resolver.addImplicitResolver(pair.tag, pair.regexp, pair.chars); + addRule(tagResolver); } - return resolver; } - - public: - @disable bool opEquals(ref Resolver); - @disable int opCmp(ref Resolver); - /** - * Add an implicit scalar resolver. + * Add a rule. * * If a scalar matches regexp and starts with any character in first, - * its _tag is set to tag. If it matches more than one resolver _regexp - * resolvers added _first override ones added later. Default resolvers - * override any user specified resolvers, but they can be disabled in - * Resolver constructor. - * - * If a scalar is not resolved to anything, it is assigned the default - * YAML _tag for strings. - * - * Params: tag = Tag to resolve to. - * regexp = Regular expression the scalar must match to have this _tag. - * first = String of possible starting characters of the scalar. + * its tag is set to the _rule's tag. In case of multiple rules + * matching, the first specified _rule has higher priority. * + * Params: rule = The rule to add. */ - void addImplicitResolver(string tag, const Regex!char regexp, string first) + void addRule(const SchemaRule rule) pure @safe { - foreach(const dchar c; first) + foreach(const dchar c; rule.chars) { - if((c in yamlImplicitResolvers_) is null) - { - yamlImplicitResolvers_[c] = []; - } - yamlImplicitResolvers_[c] ~= tuple(tag, regexp); + yamlImplicitResolvers_.require(c, []) ~= rule; } } /// Resolve scalars starting with 'A' to !_tag @@ -150,12 +70,19 @@ struct Resolver write("example.yaml", "A"); auto loader = Loader.fromFile("example.yaml"); - loader.resolver.addImplicitResolver("!tag", regex("A.*"), "A"); + loader.resolver.addRule(SchemaRule("!tag", regex("A.*"), "A")); auto node = loader.load(); assert(node.tag == "!tag"); } + deprecated("Use addRule(SchemaRule) instead") + void addImplicitResolver(string tag, Regex!char regexp, string first) + pure @safe + { + addRule(SchemaRule(tag, regexp, first)); + } + package: /** * Resolve tag of a node. @@ -165,8 +92,7 @@ struct Resolver * value = Value of the node, if any. * implicit = Should the node be implicitly resolved? * - * If the tag is already specified and not non-specific, that tag will - * be returned. + * If the node has an explicit specific tag, that tag will be returned. * * Returns: Resolved tag. */ @@ -184,35 +110,32 @@ struct Resolver case NodeID.scalar: if(!implicit) { - return defaultScalarTag_; + return schema.defaultScalarTag; } //Get the first char of the value. const dchar first = value.empty ? '\0' : value.front; - auto resolvers = (first in yamlImplicitResolvers_) is null ? - [] : yamlImplicitResolvers_[first]; - //If regexp matches, return tag. - foreach(resolver; resolvers) + foreach(rule; yamlImplicitResolvers_.get(first, [])) { - if(!(match(value, resolver[1]).empty)) + if(!(match(value, rule.regexp).empty)) { - return resolver[0]; + return rule.tag; } } - return defaultScalarTag_; + return schema.defaultScalarTag; case NodeID.sequence: - return defaultSequenceTag_; + return schema.defaultSequenceTag; case NodeID.mapping: - return defaultMappingTag_; + return schema.defaultMappingTag; case NodeID.invalid: assert(false, "Cannot resolve an invalid node"); } } @safe unittest { - auto resolver = Resolver.withDefaultResolvers; + auto resolver = Resolver(YAML11Schema); bool tagMatch(string tag, string[] values) @safe { @@ -251,11 +174,20 @@ struct Resolver } ///Returns: Default scalar tag. - @property string defaultScalarTag() const pure @safe nothrow {return defaultScalarTag_;} + @property string defaultScalarTag() const pure @safe nothrow + { + return schema.defaultScalarTag; + } ///Returns: Default sequence tag. - @property string defaultSequenceTag() const pure @safe nothrow {return defaultSequenceTag_;} + @property string defaultSequenceTag() const pure @safe nothrow + { + return schema.defaultSequenceTag; + } ///Returns: Default mapping tag. - @property string defaultMappingTag() const pure @safe nothrow {return defaultMappingTag_;} + @property string defaultMappingTag() const pure @safe nothrow + { + return schema.defaultMappingTag; + } } diff --git a/source/dyaml/schema.d b/source/dyaml/schema.d new file mode 100644 index 00000000..ecc7eba8 --- /dev/null +++ b/source/dyaml/schema.d @@ -0,0 +1,89 @@ +// Copyright Cameron Ross 2020. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) + +/// Definitions for YAML schemas. Used to define sets of rules to resolve tags +/// when not explicitly specified. +module dyaml.schema; + +import std.regex; + +/// A single schema rule +struct SchemaRule { + /// Tag this rule will be resolved to + string tag; + /// The regular expression + Regex!char regexp; + /// The character(s) that strings matching this rule will start with. + /// This is not optional. + string chars; +} + +struct Schema { + SchemaRule[] rules; + // Default tag to use for scalars. + string defaultScalarTag = "tag:yaml.org,2002:str"; + // Default tag to use for sequences. + string defaultSequenceTag = "tag:yaml.org,2002:seq"; + // Default tag to use for mappings. + string defaultMappingTag = "tag:yaml.org,2002:map"; +} + +/// Schema for YAML 1.1 documents +enum YAML11Schema = Schema([ + SchemaRule("tag:yaml.org,2002:bool", + regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~ + "|false|False|FALSE|on|On|ON|off|Off|OFF)$"), + "yYnNtTfFoO" + ), + SchemaRule("tag:yaml.org,2002:float", + regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~ + "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~ + "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~ + "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~ + "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~ + "(?:nan|NaN|NAN))$"), + "-+0123456789." + ), + SchemaRule("tag:yaml.org,2002:int", + regex(r"^(?:[-+]?0b[0-1_]+" ~ + "|[-+]?0[0-7_]+" ~ + "|[-+]?(?:0|[1-9][0-9_]*)" ~ + "|[-+]?0x[0-9a-fA-F_]+" ~ + "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"), + "-+0123456789" + ), + SchemaRule("tag:yaml.org,2002:merge", + regex(r"^<<$"), + "<" + ), + SchemaRule("tag:yaml.org,2002:null", + regex(r"^$|^(?:~|null|Null|NULL)$"), + "~nN\0" + ), + SchemaRule("tag:yaml.org,2002:timestamp", + regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~ + "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~ + "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~ + "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~ + "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~ + "[0-9]?(?::[0-9][0-9])?)?$"), + "0123456789" + ), + SchemaRule("tag:yaml.org,2002:value", + regex(r"^=$"), + "=" + ), + //The following resolver is only for documentation purposes. It cannot work + //because plain scalars cannot start with '!', '&', or '*'. + SchemaRule("tag:yaml.org,2002:yaml", + regex(r"^(?:!|&|\*)$"), + "!&*" + ) +]); + +/// No tags except !str, !map, !seq +enum NullSchema = Schema(); + +alias DefaultSchema = YAML11Schema;