diff --git a/.travis.yml b/.travis.yml index 004172a2..7348c50c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,9 +1,17 @@ language: go go: - - 1.4 - - 1.5 - - 1.6 - - tip + - "1.4.x" + - "1.5.x" + - "1.6.x" + - "1.7.x" + - "1.8.x" + - "1.9.x" + - "1.10.x" + - "1.11.x" + - "1.12.x" + - "1.13.x" + - "1.14.x" + - "tip" go_import_path: gopkg.in/yaml.v2 diff --git a/LICENSE b/LICENSE index 866d74a7..8dada3ed 100644 --- a/LICENSE +++ b/LICENSE @@ -1,13 +1,201 @@ -Copyright 2011-2016 Canonical Ltd. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - http://www.apache.org/licenses/LICENSE-2.0 + 1. Definitions. -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/NOTICE b/NOTICE new file mode 100644 index 00000000..866d74a7 --- /dev/null +++ b/NOTICE @@ -0,0 +1,13 @@ +Copyright 2011-2016 Canonical Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/README.md b/README.md index 7b8bd867..c9388da4 100644 --- a/README.md +++ b/README.md @@ -20,18 +20,16 @@ supported since they're a poor design and are gone in YAML 1.2. Installation and usage ---------------------- -The import path for the package is *gopkg.in/yaml.v2*. +The import path for the package is *go.yaml.in/yaml/v2*. To install it, run: - go get gopkg.in/yaml.v2 + go get go.yaml.in/yaml/v2 API documentation ----------------- -If opened in a browser, the import path itself leads to the API documentation: - - * [https://gopkg.in/yaml.v2](https://gopkg.in/yaml.v2) +See: API stability ------------- @@ -42,7 +40,7 @@ The package API for yaml v2 will remain stable as described in [gopkg.in](https: License ------- -The yaml package is licensed under the LGPL with an exception that allows it to be linked statically. Please see the LICENSE file for details. +The yaml package is licensed under the Apache License 2.0. Please see the LICENSE file for details. Example @@ -55,7 +53,7 @@ import ( "fmt" "log" - "gopkg.in/yaml.v2" + "go.yaml.in/yaml/v2" ) var data = ` @@ -65,6 +63,8 @@ b: d: [3, 4] ` +// Note: struct fields must be public in order for unmarshal to +// correctly populate the data. type T struct { A string B struct { diff --git a/apic.go b/apic.go index 95ec014e..acf71402 100644 --- a/apic.go +++ b/apic.go @@ -2,7 +2,6 @@ package yaml import ( "io" - "os" ) func yaml_insert_token(parser *yaml_parser_t, pos int, token *yaml_token_t) { @@ -48,9 +47,9 @@ func yaml_string_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err return n, nil } -// File read handler. -func yaml_file_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { - return parser.input_file.Read(buffer) +// Reader read handler. +func yaml_reader_read_handler(parser *yaml_parser_t, buffer []byte) (n int, err error) { + return parser.input_reader.Read(buffer) } // Set a string input. @@ -64,12 +63,12 @@ func yaml_parser_set_input_string(parser *yaml_parser_t, input []byte) { } // Set a file input. -func yaml_parser_set_input_file(parser *yaml_parser_t, file *os.File) { +func yaml_parser_set_input_reader(parser *yaml_parser_t, r io.Reader) { if parser.read_handler != nil { panic("must set the input source only once") } - parser.read_handler = yaml_file_read_handler - parser.input_file = file + parser.read_handler = yaml_reader_read_handler + parser.input_reader = r } // Set the source encoding. @@ -80,15 +79,19 @@ func yaml_parser_set_encoding(parser *yaml_parser_t, encoding yaml_encoding_t) { parser.encoding = encoding } +var disableLineWrapping = false + // Create a new emitter object. -func yaml_emitter_initialize(emitter *yaml_emitter_t) bool { +func yaml_emitter_initialize(emitter *yaml_emitter_t) { *emitter = yaml_emitter_t{ buffer: make([]byte, output_buffer_size), raw_buffer: make([]byte, 0, output_raw_buffer_size), states: make([]yaml_emitter_state_t, 0, initial_stack_size), events: make([]yaml_event_t, 0, initial_queue_size), } - return true + if disableLineWrapping { + emitter.best_width = -1 + } } // Destroy an emitter object. @@ -102,9 +105,10 @@ func yaml_string_write_handler(emitter *yaml_emitter_t, buffer []byte) error { return nil } -// File write handler. -func yaml_file_write_handler(emitter *yaml_emitter_t, buffer []byte) error { - _, err := emitter.output_file.Write(buffer) +// yaml_writer_write_handler uses emitter.output_writer to write the +// emitted text. +func yaml_writer_write_handler(emitter *yaml_emitter_t, buffer []byte) error { + _, err := emitter.output_writer.Write(buffer) return err } @@ -118,12 +122,12 @@ func yaml_emitter_set_output_string(emitter *yaml_emitter_t, output_buffer *[]by } // Set a file output. -func yaml_emitter_set_output_file(emitter *yaml_emitter_t, file io.Writer) { +func yaml_emitter_set_output_writer(emitter *yaml_emitter_t, w io.Writer) { if emitter.write_handler != nil { panic("must set the output target only once") } - emitter.write_handler = yaml_file_write_handler - emitter.output_file = file + emitter.write_handler = yaml_writer_write_handler + emitter.output_writer = w } // Set the output encoding. @@ -252,41 +256,41 @@ func yaml_emitter_set_break(emitter *yaml_emitter_t, line_break yaml_break_t) { // // Create STREAM-START. -func yaml_stream_start_event_initialize(event *yaml_event_t, encoding yaml_encoding_t) bool { +func yaml_stream_start_event_initialize(event *yaml_event_t, encoding yaml_encoding_t) { *event = yaml_event_t{ typ: yaml_STREAM_START_EVENT, encoding: encoding, } - return true } // Create STREAM-END. -func yaml_stream_end_event_initialize(event *yaml_event_t) bool { +func yaml_stream_end_event_initialize(event *yaml_event_t) { *event = yaml_event_t{ typ: yaml_STREAM_END_EVENT, } - return true } // Create DOCUMENT-START. -func yaml_document_start_event_initialize(event *yaml_event_t, version_directive *yaml_version_directive_t, - tag_directives []yaml_tag_directive_t, implicit bool) bool { +func yaml_document_start_event_initialize( + event *yaml_event_t, + version_directive *yaml_version_directive_t, + tag_directives []yaml_tag_directive_t, + implicit bool, +) { *event = yaml_event_t{ typ: yaml_DOCUMENT_START_EVENT, version_directive: version_directive, tag_directives: tag_directives, implicit: implicit, } - return true } // Create DOCUMENT-END. -func yaml_document_end_event_initialize(event *yaml_event_t, implicit bool) bool { +func yaml_document_end_event_initialize(event *yaml_event_t, implicit bool) { *event = yaml_event_t{ typ: yaml_DOCUMENT_END_EVENT, implicit: implicit, } - return true } ///* @@ -348,7 +352,7 @@ func yaml_sequence_end_event_initialize(event *yaml_event_t) bool { } // Create MAPPING-START. -func yaml_mapping_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_mapping_style_t) bool { +func yaml_mapping_start_event_initialize(event *yaml_event_t, anchor, tag []byte, implicit bool, style yaml_mapping_style_t) { *event = yaml_event_t{ typ: yaml_MAPPING_START_EVENT, anchor: anchor, @@ -356,15 +360,13 @@ func yaml_mapping_start_event_initialize(event *yaml_event_t, anchor, tag []byte implicit: implicit, style: yaml_style_t(style), } - return true } // Create MAPPING-END. -func yaml_mapping_end_event_initialize(event *yaml_event_t) bool { +func yaml_mapping_end_event_initialize(event *yaml_event_t) { *event = yaml_event_t{ typ: yaml_MAPPING_END_EVENT, } - return true } // Destroy an event object. @@ -471,7 +473,7 @@ func yaml_event_delete(event *yaml_event_t) { // } context // tag_directive *yaml_tag_directive_t // -// context.error = YAML_NO_ERROR // Eliminate a compliler warning. +// context.error = YAML_NO_ERROR // Eliminate a compiler warning. // // assert(document) // Non-NULL document object is expected. // diff --git a/decode.go b/decode.go index 085cddc4..a34c62c7 100644 --- a/decode.go +++ b/decode.go @@ -4,6 +4,7 @@ import ( "encoding" "encoding/base64" "fmt" + "io" "math" "reflect" "strconv" @@ -22,19 +23,22 @@ type node struct { kind int line, column int tag string - value string - implicit bool - children []*node - anchors map[string]*node + // For an alias node, alias holds the resolved alias. + alias *node + value string + implicit bool + children []*node + anchors map[string]*node } // ---------------------------------------------------------------------------- // Parser, produces a node tree out of a libyaml event stream. type parser struct { - parser yaml_parser_t - event yaml_event_t - doc *node + parser yaml_parser_t + event yaml_event_t + doc *node + doneInit bool } func newParser(b []byte) *parser { @@ -42,21 +46,30 @@ func newParser(b []byte) *parser { if !yaml_parser_initialize(&p.parser) { panic("failed to initialize YAML emitter") } - if len(b) == 0 { b = []byte{'\n'} } - yaml_parser_set_input_string(&p.parser, b) + return &p +} - p.skip() - if p.event.typ != yaml_STREAM_START_EVENT { - panic("expected stream start event, got " + strconv.Itoa(int(p.event.typ))) +func newParserFromReader(r io.Reader) *parser { + p := parser{} + if !yaml_parser_initialize(&p.parser) { + panic("failed to initialize YAML emitter") } - p.skip() + yaml_parser_set_input_reader(&p.parser, r) return &p } +func (p *parser) init() { + if p.doneInit { + return + } + p.expect(yaml_STREAM_START_EVENT) + p.doneInit = true +} + func (p *parser) destroy() { if p.event.typ != yaml_NO_EVENT { yaml_event_delete(&p.event) @@ -64,16 +77,35 @@ func (p *parser) destroy() { yaml_parser_delete(&p.parser) } -func (p *parser) skip() { - if p.event.typ != yaml_NO_EVENT { - if p.event.typ == yaml_STREAM_END_EVENT { - failf("attempted to go past the end of stream; corrupted value?") +// expect consumes an event from the event stream and +// checks that it's of the expected type. +func (p *parser) expect(e yaml_event_type_t) { + if p.event.typ == yaml_NO_EVENT { + if !yaml_parser_parse(&p.parser, &p.event) { + p.fail() } - yaml_event_delete(&p.event) + } + if p.event.typ == yaml_STREAM_END_EVENT { + failf("attempted to go past the end of stream; corrupted value?") + } + if p.event.typ != e { + p.parser.problem = fmt.Sprintf("expected %s event but got %s", e, p.event.typ) + p.fail() + } + yaml_event_delete(&p.event) + p.event.typ = yaml_NO_EVENT +} + +// peek peeks at the next event in the event stream, +// puts the results into p.event and returns the event type. +func (p *parser) peek() yaml_event_type_t { + if p.event.typ != yaml_NO_EVENT { + return p.event.typ } if !yaml_parser_parse(&p.parser, &p.event) { p.fail() } + return p.event.typ } func (p *parser) fail() { @@ -81,6 +113,10 @@ func (p *parser) fail() { var line int if p.parser.problem_mark.line != 0 { line = p.parser.problem_mark.line + // Scanner errors don't iterate line before returning error + if p.parser.error == yaml_SCANNER_ERROR { + line++ + } } else if p.parser.context_mark.line != 0 { line = p.parser.context_mark.line } @@ -103,7 +139,8 @@ func (p *parser) anchor(n *node, anchor []byte) { } func (p *parser) parse() *node { - switch p.event.typ { + p.init() + switch p.peek() { case yaml_SCALAR_EVENT: return p.scalar() case yaml_ALIAS_EVENT: @@ -118,9 +155,8 @@ func (p *parser) parse() *node { // Happens when attempting to decode an empty buffer. return nil default: - panic("attempted to parse unknown event: " + strconv.Itoa(int(p.event.typ))) + panic("attempted to parse unknown event: " + p.event.typ.String()) } - panic("unreachable") } func (p *parser) node(kind int) *node { @@ -135,19 +171,20 @@ func (p *parser) document() *node { n := p.node(documentNode) n.anchors = make(map[string]*node) p.doc = n - p.skip() + p.expect(yaml_DOCUMENT_START_EVENT) n.children = append(n.children, p.parse()) - if p.event.typ != yaml_DOCUMENT_END_EVENT { - panic("expected end of document event but got " + strconv.Itoa(int(p.event.typ))) - } - p.skip() + p.expect(yaml_DOCUMENT_END_EVENT) return n } func (p *parser) alias() *node { n := p.node(aliasNode) n.value = string(p.event.anchor) - p.skip() + n.alias = p.doc.anchors[n.value] + if n.alias == nil { + failf("unknown anchor '%s' referenced", n.value) + } + p.expect(yaml_ALIAS_EVENT) return n } @@ -157,29 +194,29 @@ func (p *parser) scalar() *node { n.tag = string(p.event.tag) n.implicit = p.event.implicit p.anchor(n, p.event.anchor) - p.skip() + p.expect(yaml_SCALAR_EVENT) return n } func (p *parser) sequence() *node { n := p.node(sequenceNode) p.anchor(n, p.event.anchor) - p.skip() - for p.event.typ != yaml_SEQUENCE_END_EVENT { + p.expect(yaml_SEQUENCE_START_EVENT) + for p.peek() != yaml_SEQUENCE_END_EVENT { n.children = append(n.children, p.parse()) } - p.skip() + p.expect(yaml_SEQUENCE_END_EVENT) return n } func (p *parser) mapping() *node { n := p.node(mappingNode) p.anchor(n, p.event.anchor) - p.skip() - for p.event.typ != yaml_MAPPING_END_EVENT { + p.expect(yaml_MAPPING_START_EVENT) + for p.peek() != yaml_MAPPING_END_EVENT { n.children = append(n.children, p.parse(), p.parse()) } - p.skip() + p.expect(yaml_MAPPING_END_EVENT) return n } @@ -188,9 +225,14 @@ func (p *parser) mapping() *node { type decoder struct { doc *node - aliases map[string]bool + aliases map[*node]bool mapType reflect.Type terrors []string + strict bool + + decodeCount int + aliasCount int + aliasDepth int } var ( @@ -198,11 +240,13 @@ var ( durationType = reflect.TypeOf(time.Duration(0)) defaultMapType = reflect.TypeOf(map[interface{}]interface{}{}) ifaceType = defaultMapType.Elem() + timeType = reflect.TypeOf(time.Time{}) + ptrTimeType = reflect.TypeOf(&time.Time{}) ) -func newDecoder() *decoder { - d := &decoder{mapType: defaultMapType} - d.aliases = make(map[string]bool) +func newDecoder(strict bool) *decoder { + d := &decoder{mapType: defaultMapType, strict: strict} + d.aliases = make(map[*node]bool) return d } @@ -249,9 +293,10 @@ func (d *decoder) callUnmarshaler(n *node, u Unmarshaler) (good bool) { // unmarshalling was already done by UnmarshalYAML, and if so whether // its types unmarshalled appropriately. // -// If n holds a null value, prepare returns before doing anything. +// If n holds a null value, prepare returns before doing anything. However, if +// n holds a literal empty string (i.e.: ""), prepare runs as usual. func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unmarshaled, good bool) { - if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "") { + if n.tag == yaml_NULL_TAG || n.kind == scalarNode && n.tag == "" && (n.value == "null" || n.value == "~" || n.value == "" && n.implicit) { return out, false, false } again := true @@ -274,7 +319,43 @@ func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unm return out, false, false } +const ( + // 400,000 decode operations is ~500kb of dense object declarations, or + // ~5kb of dense object declarations with 10000% alias expansion + alias_ratio_range_low = 400000 + + // 4,000,000 decode operations is ~5MB of dense object declarations, or + // ~4.5MB of dense object declarations with 10% alias expansion + alias_ratio_range_high = 4000000 + + // alias_ratio_range is the range over which we scale allowed alias ratios + alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low) +) + +func allowedAliasRatio(decodeCount int) float64 { + switch { + case decodeCount <= alias_ratio_range_low: + // allow 99% to come from alias expansion for small-to-medium documents + return 0.99 + case decodeCount >= alias_ratio_range_high: + // allow 10% to come from alias expansion for very large documents + return 0.10 + default: + // scale smoothly from 99% down to 10% over the range. + // this maps to 396,000 - 400,000 allowed alias-driven decodes over the range. + // 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps). + return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range) + } +} + func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) { + d.decodeCount++ + if d.aliasDepth > 0 { + d.aliasCount++ + } + if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) { + failf("document contains excessive aliasing") + } switch n.kind { case documentNode: return d.document(n, out) @@ -308,16 +389,15 @@ func (d *decoder) document(n *node, out reflect.Value) (good bool) { } func (d *decoder) alias(n *node, out reflect.Value) (good bool) { - an, ok := d.doc.anchors[n.value] - if !ok { - failf("unknown anchor '%s' referenced", n.value) - } - if d.aliases[n.value] { + if d.aliases[n] { + // TODO this could actually be allowed in some circumstances. failf("anchor '%s' value contains itself", n.value) } - d.aliases[n.value] = true - good = d.unmarshal(an, out) - delete(d.aliases, n.value) + d.aliases[n] = true + d.aliasDepth++ + good = d.unmarshal(n.alias, out) + d.aliasDepth-- + delete(d.aliases, n) return good } @@ -329,7 +409,7 @@ func resetMap(out reflect.Value) { } } -func (d *decoder) scalar(n *node, out reflect.Value) (good bool) { +func (d *decoder) scalar(n *node, out reflect.Value) bool { var tag string var resolved interface{} if n.tag == "" && !n.implicit { @@ -353,9 +433,26 @@ func (d *decoder) scalar(n *node, out reflect.Value) (good bool) { } return true } - if s, ok := resolved.(string); ok && out.CanAddr() { - if u, ok := out.Addr().Interface().(encoding.TextUnmarshaler); ok { - err := u.UnmarshalText([]byte(s)) + if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { + // We've resolved to exactly the type we want, so use that. + out.Set(resolvedv) + return true + } + // Perhaps we can use the value as a TextUnmarshaler to + // set its value. + if out.CanAddr() { + u, ok := out.Addr().Interface().(encoding.TextUnmarshaler) + if ok { + var text []byte + if tag == yaml_BINARY_TAG { + text = []byte(resolved.(string)) + } else { + // We let any value be unmarshaled into TextUnmarshaler. + // That might be more lax than we'd like, but the + // TextUnmarshaler itself should bowl out any dubious values. + text = []byte(n.value) + } + err := u.UnmarshalText(text) if err != nil { fail(err) } @@ -366,46 +463,54 @@ func (d *decoder) scalar(n *node, out reflect.Value) (good bool) { case reflect.String: if tag == yaml_BINARY_TAG { out.SetString(resolved.(string)) - good = true - } else if resolved != nil { + return true + } + if resolved != nil { out.SetString(n.value) - good = true + return true } case reflect.Interface: if resolved == nil { out.Set(reflect.Zero(out.Type())) + } else if tag == yaml_TIMESTAMP_TAG { + // It looks like a timestamp but for backward compatibility + // reasons we set it as a string, so that code that unmarshals + // timestamp-like values into interface{} will continue to + // see a string and not a time.Time. + // TODO(v3) Drop this. + out.Set(reflect.ValueOf(n.value)) } else { out.Set(reflect.ValueOf(resolved)) } - good = true + return true case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: switch resolved := resolved.(type) { case int: if !out.OverflowInt(int64(resolved)) { out.SetInt(int64(resolved)) - good = true + return true } case int64: if !out.OverflowInt(resolved) { out.SetInt(resolved) - good = true + return true } case uint64: if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { out.SetInt(int64(resolved)) - good = true + return true } case float64: if resolved <= math.MaxInt64 && !out.OverflowInt(int64(resolved)) { out.SetInt(int64(resolved)) - good = true + return true } case string: if out.Type() == durationType { d, err := time.ParseDuration(resolved) if err == nil { out.SetInt(int64(d)) - good = true + return true } } } @@ -414,44 +519,49 @@ func (d *decoder) scalar(n *node, out reflect.Value) (good bool) { case int: if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { out.SetUint(uint64(resolved)) - good = true + return true } case int64: if resolved >= 0 && !out.OverflowUint(uint64(resolved)) { out.SetUint(uint64(resolved)) - good = true + return true } case uint64: if !out.OverflowUint(uint64(resolved)) { out.SetUint(uint64(resolved)) - good = true + return true } case float64: if resolved <= math.MaxUint64 && !out.OverflowUint(uint64(resolved)) { out.SetUint(uint64(resolved)) - good = true + return true } } case reflect.Bool: switch resolved := resolved.(type) { case bool: out.SetBool(resolved) - good = true + return true } case reflect.Float32, reflect.Float64: switch resolved := resolved.(type) { case int: out.SetFloat(float64(resolved)) - good = true + return true case int64: out.SetFloat(float64(resolved)) - good = true + return true case uint64: out.SetFloat(float64(resolved)) - good = true + return true case float64: out.SetFloat(resolved) - good = true + return true + } + case reflect.Struct: + if resolvedv := reflect.ValueOf(resolved); out.Type() == resolvedv.Type() { + out.Set(resolvedv) + return true } case reflect.Ptr: if out.Type().Elem() == reflect.TypeOf(resolved) { @@ -459,13 +569,11 @@ func (d *decoder) scalar(n *node, out reflect.Value) (good bool) { elem := reflect.New(out.Type().Elem()) elem.Elem().Set(reflect.ValueOf(resolved)) out.Set(elem) - good = true + return true } } - if !good { - d.terror(n, tag, out) - } - return good + d.terror(n, tag, out) + return false } func settableValueOf(i interface{}) reflect.Value { @@ -482,6 +590,10 @@ func (d *decoder) sequence(n *node, out reflect.Value) (good bool) { switch out.Kind() { case reflect.Slice: out.Set(reflect.MakeSlice(out.Type(), l, l)) + case reflect.Array: + if l != out.Len() { + failf("invalid array: want %d elements but got %d", out.Len(), l) + } case reflect.Interface: // No type hints. Will have to use a generic sequence. iface = out @@ -500,7 +612,9 @@ func (d *decoder) sequence(n *node, out reflect.Value) (good bool) { j++ } } - out.Set(out.Slice(0, j)) + if out.Kind() != reflect.Array { + out.Set(out.Slice(0, j)) + } if iface.IsValid() { iface.Set(out) } @@ -561,7 +675,7 @@ func (d *decoder) mapping(n *node, out reflect.Value) (good bool) { } e := reflect.New(et).Elem() if d.unmarshal(n.children[i+1], e) { - out.SetMapIndex(k, e) + d.setMapIndex(n.children[i+1], out, k, e) } } } @@ -569,6 +683,14 @@ func (d *decoder) mapping(n *node, out reflect.Value) (good bool) { return true } +func (d *decoder) setMapIndex(n *node, out, k, v reflect.Value) { + if d.strict && out.MapIndex(k) != zeroValue { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: key %#v already set in map", n.line+1, k.Interface())) + return + } + out.SetMapIndex(k, v) +} + func (d *decoder) mappingSlice(n *node, out reflect.Value) (good bool) { outt := out.Type() if outt.Elem() != mapItemType { @@ -616,6 +738,10 @@ func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) { elemType = inlineMap.Type().Elem() } + var doneFields []bool + if d.strict { + doneFields = make([]bool, len(sinfo.FieldsList)) + } for i := 0; i < l; i += 2 { ni := n.children[i] if isMerge(ni) { @@ -626,6 +752,13 @@ func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) { continue } if info, ok := sinfo.FieldsMap[name.String()]; ok { + if d.strict { + if doneFields[info.Id] { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s already set in type %s", ni.line+1, name.String(), out.Type())) + continue + } + doneFields[info.Id] = true + } var field reflect.Value if info.Inline == nil { field = out.Field(info.Num) @@ -639,7 +772,9 @@ func (d *decoder) mappingStruct(n *node, out reflect.Value) (good bool) { } value := reflect.New(elemType).Elem() d.unmarshal(n.children[i+1], value) - inlineMap.SetMapIndex(name, value) + d.setMapIndex(n.children[i+1], inlineMap, name, value) + } else if d.strict { + d.terrors = append(d.terrors, fmt.Sprintf("line %d: field %s not found in type %s", ni.line+1, name.String(), out.Type())) } } return true @@ -654,8 +789,7 @@ func (d *decoder) merge(n *node, out reflect.Value) { case mappingNode: d.unmarshal(n, out) case aliasNode: - an, ok := d.doc.anchors[n.value] - if ok && an.kind != mappingNode { + if n.alias != nil && n.alias.kind != mappingNode { failWantMap() } d.unmarshal(n, out) @@ -664,8 +798,7 @@ func (d *decoder) merge(n *node, out reflect.Value) { for i := len(n.children) - 1; i >= 0; i-- { ni := n.children[i] if ni.kind == aliasNode { - an, ok := d.doc.anchors[ni.value] - if ok && an.kind != mappingNode { + if ni.alias != nil && ni.alias.kind != mappingNode { failWantMap() } } else if ni.kind != mappingNode { diff --git a/decode_test.go b/decode_test.go index c159760b..b49578ac 100644 --- a/decode_test.go +++ b/decode_test.go @@ -2,13 +2,14 @@ package yaml_test import ( "errors" - . "gopkg.in/check.v1" - "gopkg.in/yaml.v2" + "io" "math" - "net" "reflect" "strings" "time" + + "go.yaml.in/yaml/v2" + . "gopkg.in/check.v1" ) var unmarshalIntTest = 123 @@ -19,8 +20,9 @@ var unmarshalTests = []struct { }{ { "", - &struct{}{}, - }, { + (*struct{})(nil), + }, + { "{}", &struct{}{}, }, { "v: hi", @@ -127,6 +129,9 @@ var unmarshalTests = []struct { }, { "bin: -0b101010", map[string]interface{}{"bin": -42}, + }, { + "bin: -0b1000000000000000000000000000000000000000000000000000000000000000", + map[string]interface{}{"bin": -9223372036854775808}, }, { "decimal: +685_230", map[string]int{"decimal": 685230}, @@ -239,6 +244,9 @@ var unmarshalTests = []struct { }, { "a: [1, 2]", &struct{ A []int }{[]int{1, 2}}, + }, { + "a: [1, 2]", + &struct{ A [2]int }{[2]int{1, 2}}, }, { "a: 1", &struct{ B int }{0}, @@ -397,12 +405,30 @@ var unmarshalTests = []struct { { "v: !!float '1.1'", map[string]interface{}{"v": 1.1}, + }, { + "v: !!float 0", + map[string]interface{}{"v": float64(0)}, + }, { + "v: !!float -1", + map[string]interface{}{"v": float64(-1)}, }, { "v: !!null ''", map[string]interface{}{"v": nil}, }, { "%TAG !y! tag:yaml.org,2002:\n---\nv: !y!int '1'", map[string]interface{}{"v": 1}, + }, { + "v: ! 12", + map[string]interface{}{"v": "12"}, + }, { + "seq: ! [A, B]", + map[string]interface{}{"seq": []interface{}{"A", "B"}}, + }, + + // Non-specific tag (Issue #75) + { + "v: ! test", + map[string]interface{}{"v": "test"}, }, // Anchors and aliases. @@ -419,19 +445,15 @@ var unmarshalTests = []struct { }, { "a: &a [1, 2]\nb: *a", &struct{ B []int }{[]int{1, 2}}, - }, { - "b: *a\na: &a {c: 1}", - &struct { - A, B struct { - C int - } - }{struct{ C int }{1}, struct{ C int }{1}}, }, // Bug #1133337 { "foo: ''", map[string]*string{"foo": new(string)}, + }, { + "foo: null", + map[string]*string{"foo": nil}, }, { "foo: null", map[string]string{"foo": ""}, @@ -440,6 +462,18 @@ var unmarshalTests = []struct { map[string]interface{}{"foo": nil}, }, + // Support for ~ + { + "foo: ~", + map[string]*string{"foo": nil}, + }, { + "foo: ~", + map[string]string{"foo": ""}, + }, { + "foo: ~", + map[string]interface{}{"foo": nil}, + }, + // Ignored field { "a: 1\nb: 2\n", @@ -496,6 +530,18 @@ var unmarshalTests = []struct { map[string]interface{}{"a": "50cent_of_dollar"}, }, + // issue #295 (allow scalars with colons in flow mappings and sequences) + { + "a: {b: https://github.com/go-yaml/yaml}", + map[string]interface{}{"a": map[interface{}]interface{}{ + "b": "https://github.com/go-yaml/yaml", + }}, + }, + { + "a: [https://github.com/go-yaml/yaml]", + map[string]interface{}{"a": []interface{}{"https://github.com/go-yaml/yaml"}}, + }, + // Duration { "a: 3s", @@ -547,11 +593,80 @@ var unmarshalTests = []struct { // Support encoding.TextUnmarshaler. { "a: 1.2.3.4\n", - map[string]net.IP{"a": net.IPv4(1, 2, 3, 4)}, + map[string]textUnmarshaler{"a": textUnmarshaler{S: "1.2.3.4"}}, }, { "a: 2015-02-24T18:19:39Z\n", - map[string]time.Time{"a": time.Unix(1424801979, 0)}, + map[string]textUnmarshaler{"a": textUnmarshaler{"2015-02-24T18:19:39Z"}}, + }, + + // Timestamps + { + // Date only. + "a: 2015-01-01\n", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // RFC3339 + "a: 2015-02-24T18:19:39.12Z\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, .12e9, time.UTC)}, + }, + { + // RFC3339 with short dates. + "a: 2015-2-3T3:4:5Z", + map[string]time.Time{"a": time.Date(2015, 2, 3, 3, 4, 5, 0, time.UTC)}, + }, + { + // ISO8601 lower case t + "a: 2015-02-24t18:19:39Z\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + }, + { + // space separate, no time zone + "a: 2015-02-24 18:19:39\n", + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + }, + // Some cases not currently handled. Uncomment these when + // the code is fixed. + // { + // // space separated with time zone + // "a: 2001-12-14 21:59:43.10 -5", + // map[string]interface{}{"a": time.Date(2001, 12, 14, 21, 59, 43, .1e9, time.UTC)}, + // }, + // { + // // arbitrary whitespace between fields + // "a: 2001-12-14 \t\t \t21:59:43.10 \t Z", + // map[string]interface{}{"a": time.Date(2001, 12, 14, 21, 59, 43, .1e9, time.UTC)}, + // }, + { + // explicit string tag + "a: !!str 2015-01-01", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // explicit timestamp tag on quoted string + "a: !!timestamp \"2015-01-01\"", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // explicit timestamp tag on unquoted string + "a: !!timestamp 2015-01-01", + map[string]time.Time{"a": time.Date(2015, 1, 1, 0, 0, 0, 0, time.UTC)}, + }, + { + // quoted string that's a valid timestamp + "a: \"2015-01-01\"", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // explicit timestamp tag into interface. + "a: !!timestamp \"2015-01-01\"", + map[string]interface{}{"a": "2015-01-01"}, + }, + { + // implicit timestamp tag into interface. + "a: 2015-01-01", + map[string]interface{}{"a": "2015-01-01"}, }, // Encode empty lists as zero-length slices. @@ -581,6 +696,50 @@ var unmarshalTests = []struct { "\xfe\xff\x00\xf1\x00o\x00\xf1\x00o\x00:\x00 \x00v\x00e\x00r\x00y\x00 \x00y\x00e\x00s\x00 \xd8=\xdf\xd4\x00\n", M{"ñoño": "very yes 🟔"}, }, + + // This *is* in fact a float number, per the spec. #171 was a mistake. + { + "a: 123456e1\n", + M{"a": 123456e1}, + }, { + "a: 123456E1\n", + M{"a": 123456e1}, + }, + // yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes + { + "First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n", + map[interface{}]interface{}{ + "Reuse anchor": "Bar", + "First occurrence": "Foo", + "Second occurrence": "Foo", + "Override anchor": "Bar", + }, + }, + // Single document with garbage following it. + { + "---\nhello\n...\n}not yaml", + "hello", + }, + { + "a: 5\n", + &struct{ A jsonNumberT }{"5"}, + }, + { + "a: 5.5\n", + &struct{ A jsonNumberT }{"5.5"}, + }, + { + ` +a: + b +b: + ? a + : a`, + &M{"a": "b", + "b": M{ + "a": "a", + }}, + }, } type M map[interface{}]interface{} @@ -595,31 +754,102 @@ type inlineC struct { } func (s *S) TestUnmarshal(c *C) { - for _, item := range unmarshalTests { + for i, item := range unmarshalTests { + c.Logf("test %d: %q", i, item.data) t := reflect.ValueOf(item.value).Type() - var value interface{} - switch t.Kind() { - case reflect.Map: - value = reflect.MakeMap(t).Interface() - case reflect.String: - value = reflect.New(t).Interface() - case reflect.Ptr: - value = reflect.New(t.Elem()).Interface() - default: - c.Fatalf("missing case for %s", t) + value := reflect.New(t) + err := yaml.Unmarshal([]byte(item.data), value.Interface()) + if _, ok := err.(*yaml.TypeError); !ok { + c.Assert(err, IsNil) } - err := yaml.Unmarshal([]byte(item.data), value) + c.Assert(value.Elem().Interface(), DeepEquals, item.value, Commentf("error: %v", err)) + } +} + +// TODO(v3): This test should also work when unmarshaling onto an interface{}. +func (s *S) TestUnmarshalFullTimestamp(c *C) { + // Full timestamp in same format as encoded. This is confirmed to be + // properly decoded by Python as a timestamp as well. + var str = "2015-02-24T18:19:39.123456789-03:00" + var t time.Time + err := yaml.Unmarshal([]byte(str), &t) + c.Assert(err, IsNil) + c.Assert(t, Equals, time.Date(2015, 2, 24, 18, 19, 39, 123456789, t.Location())) + c.Assert(t.In(time.UTC), Equals, time.Date(2015, 2, 24, 21, 19, 39, 123456789, time.UTC)) +} + +func (s *S) TestDecoderSingleDocument(c *C) { + // Test that Decoder.Decode works as expected on + // all the unmarshal tests. + for i, item := range unmarshalTests { + c.Logf("test %d: %q", i, item.data) + if item.data == "" { + // Behaviour differs when there's no YAML. + continue + } + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.NewDecoder(strings.NewReader(item.data)).Decode(value.Interface()) if _, ok := err.(*yaml.TypeError); !ok { c.Assert(err, IsNil) } - if t.Kind() == reflect.String { - c.Assert(*value.(*string), Equals, item.value) - } else { - c.Assert(value, DeepEquals, item.value) + c.Assert(value.Elem().Interface(), DeepEquals, item.value) + } +} + +var decoderTests = []struct { + data string + values []interface{} +}{{ + "", + nil, +}, { + "a: b", + []interface{}{ + map[interface{}]interface{}{"a": "b"}, + }, +}, { + "---\na: b\n...\n", + []interface{}{ + map[interface{}]interface{}{"a": "b"}, + }, +}, { + "---\n'hello'\n...\n---\ngoodbye\n...\n", + []interface{}{ + "hello", + "goodbye", + }, +}} + +func (s *S) TestDecoder(c *C) { + for i, item := range decoderTests { + c.Logf("test %d: %q", i, item.data) + var values []interface{} + dec := yaml.NewDecoder(strings.NewReader(item.data)) + for { + var value interface{} + err := dec.Decode(&value) + if err == io.EOF { + break + } + c.Assert(err, IsNil) + values = append(values, value) } + c.Assert(values, DeepEquals, item.values) } } +type errReader struct{} + +func (errReader) Read([]byte) (int, error) { + return 0, errors.New("some read error") +} + +func (s *S) TestDecoderReadError(c *C) { + err := yaml.NewDecoder(errReader{}).Decode(&struct{}{}) + c.Assert(err, ErrorMatches, `yaml: input error: some read error`) +} + func (s *S) TestUnmarshalNaN(c *C) { value := map[string]interface{}{} err := yaml.Unmarshal([]byte("notanum: .NaN"), &value) @@ -633,19 +863,52 @@ var unmarshalErrorTests = []struct { {"v: !!float 'error'", "yaml: cannot decode !!str `error` as a !!float"}, {"v: [A,", "yaml: line 1: did not find expected node content"}, {"v:\n- [A,", "yaml: line 2: did not find expected node content"}, + {"a:\n- b: *,", "yaml: line 2: did not find expected alphabetic or numeric character"}, {"a: *b\n", "yaml: unknown anchor 'b' referenced"}, {"a: &a\n b: *a\n", "yaml: anchor 'a' value contains itself"}, + {"a: &x null\n<<:\n- *x\nb: &x {}\n", `yaml: map merge requires map or sequence of maps as the value`}, // Issue #529. {"value: -", "yaml: block sequence entries are not allowed in this context"}, {"a: !!binary ==", "yaml: !!binary value contains invalid base64 data"}, {"{[.]}", `yaml: invalid map key: \[\]interface \{\}\{"\."\}`}, {"{{.}}", `yaml: invalid map key: map\[interface\ \{\}\]interface \{\}\{".":interface \{\}\(nil\)\}`}, + {"b: *a\na: &a {c: 1}", `yaml: unknown anchor 'a' referenced`}, + {"%TAG !%79! tag:yaml.org,2002:\n---\nv: !%79!int '1'", "yaml: did not find expected whitespace"}, + {"a:\n 1:\nb\n 2:", ".*could not find expected ':'"}, + { + "a: &a [00,00,00,00,00,00,00,00,00]\n" + + "b: &b [*a,*a,*a,*a,*a,*a,*a,*a,*a]\n" + + "c: &c [*b,*b,*b,*b,*b,*b,*b,*b,*b]\n" + + "d: &d [*c,*c,*c,*c,*c,*c,*c,*c,*c]\n" + + "e: &e [*d,*d,*d,*d,*d,*d,*d,*d,*d]\n" + + "f: &f [*e,*e,*e,*e,*e,*e,*e,*e,*e]\n" + + "g: &g [*f,*f,*f,*f,*f,*f,*f,*f,*f]\n" + + "h: &h [*g,*g,*g,*g,*g,*g,*g,*g,*g]\n" + + "i: &i [*h,*h,*h,*h,*h,*h,*h,*h,*h]\n", + "yaml: document contains excessive aliasing", + }, } func (s *S) TestUnmarshalErrors(c *C) { - for _, item := range unmarshalErrorTests { + for i, item := range unmarshalErrorTests { + c.Logf("test %d: %q", i, item.data) var value interface{} err := yaml.Unmarshal([]byte(item.data), &value) c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + + if strings.Contains(item.data, ":") { + // Repeat test with typed value. + var value map[string]interface{} + err := yaml.Unmarshal([]byte(item.data), &value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) + } + } +} + +func (s *S) TestDecoderErrors(c *C) { + for _, item := range unmarshalErrorTests { + var value interface{} + err := yaml.NewDecoder(strings.NewReader(item.data)).Decode(&value) + c.Assert(err, ErrorMatches, item.error, Commentf("Partial unmarshal: %#v", value)) } } @@ -660,6 +923,7 @@ var unmarshalerTests = []struct { {`_: BAR!`, "!!str", "BAR!"}, {`_: "BAR!"`, "!!str", "BAR!"}, {"_: !!foo 'BAR!'", "!!foo", "BAR!"}, + {`_: ""`, "!!str", ""}, } var unmarshalerResult = map[int]error{} @@ -958,6 +1222,127 @@ func (s *S) TestUnmarshalSliceOnPreset(c *C) { c.Assert(v.A, DeepEquals, []int{2}) } +var unmarshalStrictTests = []struct { + data string + value interface{} + error string +}{{ + data: "a: 1\nc: 2\n", + value: struct{ A, B int }{A: 1}, + error: `yaml: unmarshal errors:\n line 2: field c not found in type struct { A int; B int }`, +}, { + data: "a: 1\nb: 2\na: 3\n", + value: struct{ A, B int }{A: 3, B: 2}, + error: `yaml: unmarshal errors:\n line 3: field a already set in type struct { A int; B int }`, +}, { + data: "c: 3\na: 1\nb: 2\nc: 4\n", + value: struct { + A int + inlineB `yaml:",inline"` + }{ + A: 1, + inlineB: inlineB{ + B: 2, + inlineC: inlineC{ + C: 4, + }, + }, + }, + error: `yaml: unmarshal errors:\n line 4: field c already set in type struct { A int; yaml_test.inlineB "yaml:\\",inline\\"" }`, +}, { + data: "c: 0\na: 1\nb: 2\nc: 1\n", + value: struct { + A int + inlineB `yaml:",inline"` + }{ + A: 1, + inlineB: inlineB{ + B: 2, + inlineC: inlineC{ + C: 1, + }, + }, + }, + error: `yaml: unmarshal errors:\n line 4: field c already set in type struct { A int; yaml_test.inlineB "yaml:\\",inline\\"" }`, +}, { + data: "c: 1\na: 1\nb: 2\nc: 3\n", + value: struct { + A int + M map[string]interface{} `yaml:",inline"` + }{ + A: 1, + M: map[string]interface{}{ + "b": 2, + "c": 3, + }, + }, + error: `yaml: unmarshal errors:\n line 4: key "c" already set in map`, +}, { + data: "a: 1\n9: 2\nnull: 3\n9: 4", + value: map[interface{}]interface{}{ + "a": 1, + nil: 3, + 9: 4, + }, + error: `yaml: unmarshal errors:\n line 4: key 9 already set in map`, +}} + +func (s *S) TestUnmarshalStrict(c *C) { + for i, item := range unmarshalStrictTests { + c.Logf("test %d: %q", i, item.data) + // First test that normal Unmarshal unmarshals to the expected value. + t := reflect.ValueOf(item.value).Type() + value := reflect.New(t) + err := yaml.Unmarshal([]byte(item.data), value.Interface()) + c.Assert(err, Equals, nil) + c.Assert(value.Elem().Interface(), DeepEquals, item.value) + + // Then test that UnmarshalStrict fails on the same thing. + t = reflect.ValueOf(item.value).Type() + value = reflect.New(t) + err = yaml.UnmarshalStrict([]byte(item.data), value.Interface()) + c.Assert(err, ErrorMatches, item.error) + } +} + +type textUnmarshaler struct { + S string +} + +func (t *textUnmarshaler) UnmarshalText(s []byte) error { + t.S = string(s) + return nil +} + +func (s *S) TestFuzzCrashers(c *C) { + cases := []string{ + // runtime error: index out of range + "\"\\0\\\r\n", + + // should not happen + " 0: [\n] 0", + "? ? \"\n\" 0", + " - {\n000}0", + "0:\n 0: [0\n] 0", + " - \"\n000\"0", + " - \"\n000\"\"", + "0:\n - {\n000}0", + "0:\n - \"\n000\"0", + "0:\n - \"\n000\"\"", + + // runtime error: index out of range + " \ufeff\n", + "? \ufeff\n", + "? \ufeff:\n", + "0: \ufeff\n", + "? \ufeff: \ufeff\n", + } + for _, data := range cases { + var v interface{} + _ = yaml.Unmarshal([]byte(data), &v) + } +} + //var data []byte //func init() { // var err error diff --git a/emitterc.go b/emitterc.go index 2befd553..a1c2cc52 100644 --- a/emitterc.go +++ b/emitterc.go @@ -2,6 +2,7 @@ package yaml import ( "bytes" + "fmt" ) // Flush the buffer if needed. @@ -664,9 +665,8 @@ func yaml_emitter_emit_node(emitter *yaml_emitter_t, event *yaml_event_t, return yaml_emitter_emit_mapping_start(emitter, event) default: return yaml_emitter_set_emitter_error(emitter, - "expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS") + fmt.Sprintf("expected SCALAR, SEQUENCE-START, MAPPING-START, or ALIAS, but got %v", event.typ)) } - return false } // Expect ALIAS. @@ -843,7 +843,7 @@ func yaml_emitter_select_scalar_style(emitter *yaml_emitter_t, event *yaml_event return true } -// Write an achor. +// Write an anchor. func yaml_emitter_process_anchor(emitter *yaml_emitter_t) bool { if emitter.anchor_data.anchor == nil { return true @@ -995,10 +995,10 @@ func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { break_space = false space_break = false - preceeded_by_whitespace = false - followed_by_whitespace = false - previous_space = false - previous_break = false + preceded_by_whitespace = false + followed_by_whitespace = false + previous_space = false + previous_break = false ) emitter.scalar_data.value = value @@ -1017,7 +1017,7 @@ func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { flow_indicators = true } - preceeded_by_whitespace = true + preceded_by_whitespace = true for i, w := 0, 0; i < len(value); i += w { w = width(value[i]) followed_by_whitespace = i+w >= len(value) || is_blank(value, i+w) @@ -1048,7 +1048,7 @@ func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { block_indicators = true } case '#': - if preceeded_by_whitespace { + if preceded_by_whitespace { flow_indicators = true block_indicators = true } @@ -1089,7 +1089,7 @@ func yaml_emitter_analyze_scalar(emitter *yaml_emitter_t, value []byte) bool { } // [Go]: Why 'z'? Couldn't be the end of the string as that's the loop condition. - preceeded_by_whitespace = is_blankz(value, i) + preceded_by_whitespace = is_blankz(value, i) } emitter.scalar_data.multiline = line_breaks diff --git a/encode.go b/encode.go index 84f84995..0ee738e1 100644 --- a/encode.go +++ b/encode.go @@ -3,38 +3,67 @@ package yaml import ( "encoding" "fmt" + "io" "reflect" "regexp" "sort" "strconv" "strings" "time" + "unicode/utf8" ) +// jsonNumber is the interface of the encoding/json.Number datatype. +// Repeating the interface here avoids a dependency on encoding/json, and also +// supports other libraries like jsoniter, which use a similar datatype with +// the same interface. Detecting this interface is useful when dealing with +// structures containing json.Number, which is a string under the hood. The +// encoder should prefer the use of Int64(), Float64() and string(), in that +// order, when encoding this type. +type jsonNumber interface { + Float64() (float64, error) + Int64() (int64, error) + String() string +} + type encoder struct { emitter yaml_emitter_t event yaml_event_t out []byte flow bool + // doneInit holds whether the initial stream_start_event has been + // emitted. + doneInit bool } -func newEncoder() (e *encoder) { - e = &encoder{} - e.must(yaml_emitter_initialize(&e.emitter)) +func newEncoder() *encoder { + e := &encoder{} + yaml_emitter_initialize(&e.emitter) yaml_emitter_set_output_string(&e.emitter, &e.out) yaml_emitter_set_unicode(&e.emitter, true) - e.must(yaml_stream_start_event_initialize(&e.event, yaml_UTF8_ENCODING)) - e.emit() - e.must(yaml_document_start_event_initialize(&e.event, nil, nil, true)) - e.emit() return e } -func (e *encoder) finish() { - e.must(yaml_document_end_event_initialize(&e.event, true)) +func newEncoderWithWriter(w io.Writer) *encoder { + e := &encoder{} + yaml_emitter_initialize(&e.emitter) + yaml_emitter_set_output_writer(&e.emitter, w) + yaml_emitter_set_unicode(&e.emitter, true) + return e +} + +func (e *encoder) init() { + if e.doneInit { + return + } + yaml_stream_start_event_initialize(&e.event, yaml_UTF8_ENCODING) e.emit() + e.doneInit = true +} + +func (e *encoder) finish() { e.emitter.open_ended = false - e.must(yaml_stream_end_event_initialize(&e.event)) + yaml_stream_end_event_initialize(&e.event) e.emit() } @@ -44,9 +73,7 @@ func (e *encoder) destroy() { func (e *encoder) emit() { // This will internally delete the e.event value. - if !yaml_emitter_emit(&e.emitter, &e.event) && e.event.typ != yaml_DOCUMENT_END_EVENT && e.event.typ != yaml_STREAM_END_EVENT { - e.must(false) - } + e.must(yaml_emitter_emit(&e.emitter, &e.event)) } func (e *encoder) must(ok bool) { @@ -59,13 +86,43 @@ func (e *encoder) must(ok bool) { } } +func (e *encoder) marshalDoc(tag string, in reflect.Value) { + e.init() + yaml_document_start_event_initialize(&e.event, nil, nil, true) + e.emit() + e.marshal(tag, in) + yaml_document_end_event_initialize(&e.event, true) + e.emit() +} + func (e *encoder) marshal(tag string, in reflect.Value) { - if !in.IsValid() { + if !in.IsValid() || in.Kind() == reflect.Ptr && in.IsNil() { e.nilv() return } iface := in.Interface() - if m, ok := iface.(Marshaler); ok { + switch m := iface.(type) { + case jsonNumber: + integer, err := m.Int64() + if err == nil { + // In this case the json.Number is a valid int64 + in = reflect.ValueOf(integer) + break + } + float, err := m.Float64() + if err == nil { + // In this case the json.Number is a valid float64 + in = reflect.ValueOf(float) + break + } + // fallback case - no number could be obtained + in = reflect.ValueOf(m.String()) + case time.Time, *time.Time: + // Although time.Time implements TextMarshaler, + // we don't want to treat it as a string for YAML + // purposes because YAML has special support for + // timestamps. + case Marshaler: v, err := m.MarshalYAML() if err != nil { fail(err) @@ -75,31 +132,34 @@ func (e *encoder) marshal(tag string, in reflect.Value) { return } in = reflect.ValueOf(v) - } else if m, ok := iface.(encoding.TextMarshaler); ok { + case encoding.TextMarshaler: text, err := m.MarshalText() if err != nil { fail(err) } in = reflect.ValueOf(string(text)) + case nil: + e.nilv() + return } switch in.Kind() { case reflect.Interface: - if in.IsNil() { - e.nilv() - } else { - e.marshal(tag, in.Elem()) - } + e.marshal(tag, in.Elem()) case reflect.Map: e.mapv(tag, in) case reflect.Ptr: - if in.IsNil() { - e.nilv() + if in.Type() == ptrTimeType { + e.timev(tag, in.Elem()) } else { e.marshal(tag, in.Elem()) } case reflect.Struct: - e.structv(tag, in) - case reflect.Slice: + if in.Type() == timeType { + e.timev(tag, in) + } else { + e.structv(tag, in) + } + case reflect.Slice, reflect.Array: if in.Type().Elem() == mapItemType { e.itemsv(tag, in) } else { @@ -191,10 +251,10 @@ func (e *encoder) mappingv(tag string, f func()) { e.flow = false style = yaml_FLOW_MAPPING_STYLE } - e.must(yaml_mapping_start_event_initialize(&e.event, nil, []byte(tag), implicit, style)) + yaml_mapping_start_event_initialize(&e.event, nil, []byte(tag), implicit, style) e.emit() f() - e.must(yaml_mapping_end_event_initialize(&e.event)) + yaml_mapping_end_event_initialize(&e.event) e.emit() } @@ -240,23 +300,36 @@ var base60float = regexp.MustCompile(`^[-+]?[0-9][0-9_]*(?::[0-5]?[0-9])+(?:\.[0 func (e *encoder) stringv(tag string, in reflect.Value) { var style yaml_scalar_style_t s := in.String() - rtag, rs := resolve("", s) - if rtag == yaml_BINARY_TAG { - if tag == "" || tag == yaml_STR_TAG { - tag = rtag - s = rs.(string) - } else if tag == yaml_BINARY_TAG { + canUsePlain := true + switch { + case !utf8.ValidString(s): + if tag == yaml_BINARY_TAG { failf("explicitly tagged !!binary data must be base64-encoded") - } else { + } + if tag != "" { failf("cannot marshal invalid UTF-8 data as %s", shortTag(tag)) } + // It can't be encoded directly as YAML so use a binary tag + // and encode it as base64. + tag = yaml_BINARY_TAG + s = encodeBase64(s) + case tag == "": + // Check to see if it would resolve to a specific + // tag when encoded unquoted. If it doesn't, + // there's no need to quote it. + rtag, _ := resolve("", s) + canUsePlain = rtag == yaml_STR_TAG && !isBase60Float(s) } - if tag == "" && (rtag != yaml_STR_TAG || isBase60Float(s)) { - style = yaml_DOUBLE_QUOTED_SCALAR_STYLE - } else if strings.Contains(s, "\n") { + // Note: it's possible for user code to emit invalid YAML + // if they explicitly specify a tag and a string containing + // text that's incompatible with that tag. + switch { + case strings.Contains(s, "\n"): style = yaml_LITERAL_SCALAR_STYLE - } else { + case canUsePlain: style = yaml_PLAIN_SCALAR_STYLE + default: + style = yaml_DOUBLE_QUOTED_SCALAR_STYLE } e.emitScalar(s, "", tag, style) } @@ -281,9 +354,20 @@ func (e *encoder) uintv(tag string, in reflect.Value) { e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) } +func (e *encoder) timev(tag string, in reflect.Value) { + t := in.Interface().(time.Time) + s := t.Format(time.RFC3339Nano) + e.emitScalar(s, "", tag, yaml_PLAIN_SCALAR_STYLE) +} + func (e *encoder) floatv(tag string, in reflect.Value) { - // FIXME: Handle 64 bits here. - s := strconv.FormatFloat(float64(in.Float()), 'g', -1, 32) + // Issue #352: When formatting, use the precision of the underlying value + precision := 64 + if in.Kind() == reflect.Float32 { + precision = 32 + } + + s := strconv.FormatFloat(in.Float(), 'g', -1, precision) switch s { case "+Inf": s = ".inf" diff --git a/encode_test.go b/encode_test.go index 84099bd3..00344430 100644 --- a/encode_test.go +++ b/encode_test.go @@ -1,18 +1,38 @@ package yaml_test import ( + "bytes" "fmt" "math" "strconv" "strings" "time" - . "gopkg.in/check.v1" - "gopkg.in/yaml.v2" "net" "os" + + . "gopkg.in/check.v1" + "go.yaml.in/yaml/v2" ) +type jsonNumberT string + +func (j jsonNumberT) Int64() (int64, error) { + val, err := strconv.Atoi(string(j)) + if err != nil { + return 0, err + } + return int64(val), nil +} + +func (j jsonNumberT) Float64() (float64, error) { + return strconv.ParseFloat(string(j), 64) +} + +func (j jsonNumberT) String() string { + return string(j) +} + var marshalIntTest = 123 var marshalTests = []struct { @@ -22,6 +42,9 @@ var marshalTests = []struct { { nil, "null\n", + }, { + (*marshalerType)(nil), + "null\n", }, { &struct{}{}, "{}\n", @@ -70,6 +93,9 @@ var marshalTests = []struct { }, { map[string]interface{}{"v": float64(0.1)}, "v: 0.1\n", + }, { + map[string]interface{}{"v": float32(0.99)}, + "v: 0.99\n", }, { map[string]interface{}{"v": -0.1}, "v: -0.1\n", @@ -142,6 +168,9 @@ var marshalTests = []struct { }, { &struct{ A []int }{[]int{1, 2}}, "a:\n- 1\n- 2\n", + }, { + &struct{ A [2]int }{[2]int{1, 2}}, + "a:\n- 1\n- 2\n", }, { &struct { B int "a" @@ -197,6 +226,25 @@ var marshalTests = []struct { }{1, 0}, "a: 1\n", }, + { + &struct { + T1 time.Time "t1,omitempty" + T2 time.Time "t2,omitempty" + T3 *time.Time "t3,omitempty" + T4 *time.Time "t4,omitempty" + }{ + T2: time.Date(2018, 1, 9, 10, 40, 47, 0, time.UTC), + T4: newTime(time.Date(2098, 1, 9, 10, 40, 47, 0, time.UTC)), + }, + "t2: 2018-01-09T10:40:47Z\nt4: 2098-01-09T10:40:47Z\n", + }, + // Nil interface that implements Marshaler. + { + map[string]yaml.Marshaler{ + "a": nil, + }, + "a: null\n", + }, // Flow flag { @@ -302,10 +350,25 @@ var marshalTests = []struct { map[string]net.IP{"a": net.IPv4(1, 2, 3, 4)}, "a: 1.2.3.4\n", }, + // time.Time gets a timestamp tag. + { + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC)}, + "a: 2015-02-24T18:19:39Z\n", + }, { - map[string]time.Time{"a": time.Unix(1424801979, 0)}, + map[string]*time.Time{"a": newTime(time.Date(2015, 2, 24, 18, 19, 39, 0, time.UTC))}, "a: 2015-02-24T18:19:39Z\n", }, + { + // This is confirmed to be properly decoded in Python (libyaml) without a timestamp tag. + map[string]time.Time{"a": time.Date(2015, 2, 24, 18, 19, 39, 123456789, time.FixedZone("FOO", -3*60*60))}, + "a: 2015-02-24T18:19:39.123456789-03:00\n", + }, + // Ensure timestamp-like strings are quoted. + { + map[string]string{"a": "2015-02-24T18:19:39Z"}, + "a: \"2015-02-24T18:19:39Z\"\n", + }, // Ensure strings containing ": " are quoted (reported as PR #43, but not reproducible). { @@ -322,18 +385,89 @@ var marshalTests = []struct { map[string]string{"a": "你好 #comment"}, "a: '你好 #comment'\n", }, + { + map[string]interface{}{"a": jsonNumberT("5")}, + "a: 5\n", + }, + { + map[string]interface{}{"a": jsonNumberT("100.5")}, + "a: 100.5\n", + }, + { + map[string]interface{}{"a": jsonNumberT("bogus")}, + "a: bogus\n", + }, +} + +func (s *S) TestLineWrapping(c *C) { + var v = map[string]string{ + "a": "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 ", + } + data, err := yaml.Marshal(v) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, + "a: 'abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz\n" + + " ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 '\n") + + // The API does not allow this process to be reversed as it's intended + // for migration only. v3 drops this method and instead offers more + // control on a per encoding basis. + yaml.FutureLineWrap() + + data, err = yaml.Marshal(v) + c.Assert(err, IsNil) + c.Assert(string(data), Equals, + "a: 'abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ 1234567890 '\n") } func (s *S) TestMarshal(c *C) { defer os.Setenv("TZ", os.Getenv("TZ")) os.Setenv("TZ", "UTC") - for _, item := range marshalTests { + for i, item := range marshalTests { + c.Logf("test %d: %q", i, item.data) data, err := yaml.Marshal(item.value) c.Assert(err, IsNil) c.Assert(string(data), Equals, item.data) } } +func (s *S) TestEncoderSingleDocument(c *C) { + for i, item := range marshalTests { + c.Logf("test %d. %q", i, item.data) + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + err := enc.Encode(item.value) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, item.data) + } +} + +func (s *S) TestEncoderMultipleDocuments(c *C) { + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + err := enc.Encode(map[string]string{"a": "b"}) + c.Assert(err, Equals, nil) + err = enc.Encode(map[string]string{"c": "d"}) + c.Assert(err, Equals, nil) + err = enc.Close() + c.Assert(err, Equals, nil) + c.Assert(buf.String(), Equals, "a: b\n---\nc: d\n") +} + +func (s *S) TestEncoderWriteError(c *C) { + enc := yaml.NewEncoder(errorWriter{}) + err := enc.Encode(map[string]string{"a": "b"}) + c.Assert(err, ErrorMatches, `yaml: write error: some write error`) // Data not flushed yet +} + +type errorWriter struct{} + +func (errorWriter) Write([]byte) (int, error) { + return 0, fmt.Errorf("some write error") +} + var marshalErrorTests = []struct { value interface{} error string @@ -455,8 +589,13 @@ func (s *S) TestSortedOutput(c *C) { "1", "2", "a!10", - "a/2", + "a/0001", + "a/002", + "a/3", "a/10", + "a/11", + "a/0012", + "a/100", "a~10", "ab/1", "b/1", @@ -471,6 +610,8 @@ func (s *S) TestSortedOutput(c *C) { "c2.10", "c10.2", "d1", + "d7", + "d7abc", "d12", "d12a", } @@ -499,3 +640,7 @@ func (s *S) TestSortedOutput(c *C) { last = index } } + +func newTime(t time.Time) *time.Time { + return &t +} diff --git a/example_embedded_test.go b/example_embedded_test.go new file mode 100644 index 00000000..1d17efd4 --- /dev/null +++ b/example_embedded_test.go @@ -0,0 +1,41 @@ +package yaml_test + +import ( + "fmt" + "log" + + "go.yaml.in/yaml/v2" +) + +// An example showing how to unmarshal embedded +// structs from YAML. + +type StructA struct { + A string `yaml:"a"` +} + +type StructB struct { + // Embedded structs are not treated as embedded in YAML by default. To do that, + // add the ",inline" annotation below + StructA `yaml:",inline"` + B string `yaml:"b"` +} + +var data = ` +a: a string from struct A +b: a string from struct B +` + +func ExampleUnmarshal_embedded() { + var b StructB + + err := yaml.Unmarshal([]byte(data), &b) + if err != nil { + log.Fatalf("cannot unmarshal data: %v", err) + } + fmt.Println(b.A) + fmt.Println(b.B) + // Output: + // a string from struct A + // a string from struct B +} diff --git a/go.mod b/go.mod new file mode 100644 index 00000000..5d1f0ea8 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module go.yaml.in/yaml/v2 + +go 1.15 + +require gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 diff --git a/limit_test.go b/limit_test.go new file mode 100644 index 00000000..109b110f --- /dev/null +++ b/limit_test.go @@ -0,0 +1,128 @@ +package yaml_test + +import ( + "strings" + "testing" + + . "gopkg.in/check.v1" + "go.yaml.in/yaml/v2" +) + +var limitTests = []struct { + name string + data []byte + error string +}{ + { + name: "1000kb of maps with 100 aliases", + data: []byte(`{a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-100) + `], b: &b [*a` + strings.Repeat(`,*a`, 99) + `]}`), + error: "yaml: document contains excessive aliasing", + }, { + name: "1000kb of deeply nested slices", + data: []byte(strings.Repeat(`[`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested maps", + data: []byte("x: " + strings.Repeat(`{`, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of deeply nested indents", + data: []byte(strings.Repeat(`- `, 1000*1024)), + error: "yaml: exceeded max depth of 10000", + }, { + name: "1000kb of 1000-indent lines", + data: []byte(strings.Repeat(strings.Repeat(`- `, 1000)+"\n", 1024/2)), + }, + {name: "1kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1*1024/4-1) + `]`)}, + {name: "10kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 10*1024/4-1) + `]`)}, + {name: "100kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 100*1024/4-1) + `]`)}, + {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, + {name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))}, + {name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))}, + {name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))}, +} + +func (s *S) TestLimits(c *C) { + if testing.Short() { + return + } + for _, tc := range limitTests { + var v interface{} + err := yaml.Unmarshal(tc.data, &v) + if len(tc.error) > 0 { + c.Assert(err, ErrorMatches, tc.error, Commentf("testcase: %s", tc.name)) + } else { + c.Assert(err, IsNil, Commentf("testcase: %s", tc.name)) + } + } +} + +func Benchmark1000KB100Aliases(b *testing.B) { + benchmark(b, "1000kb of maps with 100 aliases") +} +func Benchmark1000KBDeeplyNestedSlices(b *testing.B) { + benchmark(b, "1000kb of deeply nested slices") +} +func Benchmark1000KBDeeplyNestedMaps(b *testing.B) { + benchmark(b, "1000kb of deeply nested maps") +} +func Benchmark1000KBDeeplyNestedIndents(b *testing.B) { + benchmark(b, "1000kb of deeply nested indents") +} +func Benchmark1000KB1000IndentLines(b *testing.B) { + benchmark(b, "1000kb of 1000-indent lines") +} +func Benchmark1KBMaps(b *testing.B) { + benchmark(b, "1kb of maps") +} +func Benchmark10KBMaps(b *testing.B) { + benchmark(b, "10kb of maps") +} +func Benchmark100KBMaps(b *testing.B) { + benchmark(b, "100kb of maps") +} +func Benchmark1000KBMaps(b *testing.B) { + benchmark(b, "1000kb of maps") +} + +func BenchmarkDeepSlice(b *testing.B) { + benchmark(b, "1000kb slice nested at max-depth") +} + +func BenchmarkDeepFlow(b *testing.B) { + benchmark(b, "1000kb slice nested in maps at max-depth") +} + +func Benchmark1000KBMaxDepthNested(b *testing.B) { + benchmark(b, "1000kb of 10000-nested lines") +} + +func benchmark(b *testing.B, name string) { + for _, t := range limitTests { + if t.name != name { + continue + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + var v interface{} + err := yaml.Unmarshal(t.data, &v) + if len(t.error) > 0 { + if err == nil { + b.Errorf("expected error, got none") + } else if err.Error() != t.error { + b.Errorf("expected error '%s', got '%s'", t.error, err.Error()) + } + } else { + if err != nil { + b.Errorf("unexpected error: %v", err) + } + } + } + + return + } + + b.Errorf("testcase %q not found", name) +} diff --git a/parserc.go b/parserc.go index 0a7037ad..81d05dfe 100644 --- a/parserc.go +++ b/parserc.go @@ -166,7 +166,6 @@ func yaml_parser_state_machine(parser *yaml_parser_t, event *yaml_event_t) bool default: panic("invalid parser state") } - return false } // Parse the production: diff --git a/readerc.go b/readerc.go index f4507917..7c1f5fac 100644 --- a/readerc.go +++ b/readerc.go @@ -93,9 +93,18 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool { panic("read handler must be set") } + // [Go] This function was changed to guarantee the requested length size at EOF. + // The fact we need to do this is pretty awful, but the description above implies + // for that to be the case, and there are tests + // If the EOF flag is set and the raw buffer is empty, do nothing. if parser.eof && parser.raw_buffer_pos == len(parser.raw_buffer) { - return true + // [Go] ACTUALLY! Read the documentation of this function above. + // This is just broken. To return true, we need to have the + // given length in the buffer. Not doing that means every single + // check that calls this function to make sure the buffer has a + // given length is Go) panicking; or C) accessing invalid memory. + //return true } // Return if the buffer contains enough characters. @@ -389,6 +398,15 @@ func yaml_parser_update_buffer(parser *yaml_parser_t, length int) bool { break } } + // [Go] Read the documentation of this function above. To return true, + // we need to have the given length in the buffer. Not doing that means + // every single check that calls this function to make sure the buffer + // has a given length is Go) panicking; or C) accessing invalid memory. + // This happens here due to the EOF above breaking early. + for buffer_len < length { + parser.buffer[buffer_len] = 0 + buffer_len++ + } parser.buffer = parser.buffer[:buffer_len] return true } diff --git a/resolve.go b/resolve.go index 93a86327..4120e0c9 100644 --- a/resolve.go +++ b/resolve.go @@ -3,9 +3,10 @@ package yaml import ( "encoding/base64" "math" + "regexp" "strconv" "strings" - "unicode/utf8" + "time" ) type resolveMapItem struct { @@ -74,12 +75,14 @@ func longTag(tag string) string { func resolvableTag(tag string) bool { switch tag { - case "", yaml_STR_TAG, yaml_BOOL_TAG, yaml_INT_TAG, yaml_FLOAT_TAG, yaml_NULL_TAG: + case "", yaml_STR_TAG, yaml_BOOL_TAG, yaml_INT_TAG, yaml_FLOAT_TAG, yaml_NULL_TAG, yaml_TIMESTAMP_TAG: return true } return false } +var yamlStyleFloat = regexp.MustCompile(`^[-+]?(\.[0-9]+|[0-9]+(\.[0-9]*)?)([eE][-+]?[0-9]+)?$`) + func resolve(tag string, in string) (rtag string, out interface{}) { if !resolvableTag(tag) { return tag, in @@ -89,6 +92,19 @@ func resolve(tag string, in string) (rtag string, out interface{}) { switch tag { case "", rtag, yaml_STR_TAG, yaml_BINARY_TAG: return + case yaml_FLOAT_TAG: + if rtag == yaml_INT_TAG { + switch v := out.(type) { + case int64: + rtag = yaml_FLOAT_TAG + out = float64(v) + return + case int: + rtag = yaml_FLOAT_TAG + out = float64(v) + return + } + } } failf("cannot decode %s `%s` as a %s", shortTag(rtag), in, shortTag(tag)) }() @@ -122,6 +138,15 @@ func resolve(tag string, in string) (rtag string, out interface{}) { case 'D', 'S': // Int, float, or timestamp. + // Only try values as a timestamp if the value is unquoted or there's an explicit + // !!timestamp tag. + if tag == "" || tag == yaml_TIMESTAMP_TAG { + t, ok := parseTimestamp(in) + if ok { + return yaml_TIMESTAMP_TAG, t + } + } + plain := strings.Replace(in, "_", "", -1) intv, err := strconv.ParseInt(plain, 0, 64) if err == nil { @@ -135,9 +160,11 @@ func resolve(tag string, in string) (rtag string, out interface{}) { if err == nil { return yaml_INT_TAG, uintv } - floatv, err := strconv.ParseFloat(plain, 64) - if err == nil { - return yaml_FLOAT_TAG, floatv + if yamlStyleFloat.MatchString(plain) { + floatv, err := strconv.ParseFloat(plain, 64) + if err == nil { + return yaml_FLOAT_TAG, floatv + } } if strings.HasPrefix(plain, "0b") { intv, err := strconv.ParseInt(plain[2:], 2, 64) @@ -153,28 +180,20 @@ func resolve(tag string, in string) (rtag string, out interface{}) { return yaml_INT_TAG, uintv } } else if strings.HasPrefix(plain, "-0b") { - intv, err := strconv.ParseInt(plain[3:], 2, 64) + intv, err := strconv.ParseInt("-" + plain[3:], 2, 64) if err == nil { - if intv == int64(int(intv)) { - return yaml_INT_TAG, -int(intv) + if true || intv == int64(int(intv)) { + return yaml_INT_TAG, int(intv) } else { - return yaml_INT_TAG, -intv + return yaml_INT_TAG, intv } } } - // XXX Handle timestamps here. - default: panic("resolveTable item not yet handled: " + string(rune(hint)) + " (with " + in + ")") } } - if tag == yaml_BINARY_TAG { - return yaml_BINARY_TAG, in - } - if utf8.ValidString(in) { - return yaml_STR_TAG, in - } - return yaml_BINARY_TAG, encodeBase64(in) + return yaml_STR_TAG, in } // encodeBase64 encodes s as base64 that is broken up into multiple lines @@ -201,3 +220,39 @@ func encodeBase64(s string) string { } return string(out[:k]) } + +// This is a subset of the formats allowed by the regular expression +// defined at http://yaml.org/type/timestamp.html. +var allowedTimestampFormats = []string{ + "2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields. + "2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t". + "2006-1-2 15:4:5.999999999", // space separated with no time zone + "2006-1-2", // date only + // Notable exception: time.Parse cannot handle: "2001-12-14 21:59:43.10 -5" + // from the set of examples. +} + +// parseTimestamp parses s as a timestamp string and +// returns the timestamp and reports whether it succeeded. +// Timestamp formats are defined at http://yaml.org/type/timestamp.html +func parseTimestamp(s string) (time.Time, bool) { + // TODO write code to check all the formats supported by + // http://yaml.org/type/timestamp.html instead of using time.Parse. + + // Quick check: all date formats start with YYYY-. + i := 0 + for ; i < len(s); i++ { + if c := s[i]; c < '0' || c > '9' { + break + } + } + if i != 4 || i == len(s) || s[i] != '-' { + return time.Time{}, false + } + for _, format := range allowedTimestampFormats { + if t, err := time.Parse(format, s); err == nil { + return t, true + } + } + return time.Time{}, false +} diff --git a/scannerc.go b/scannerc.go index 25808000..d634dca4 100644 --- a/scannerc.go +++ b/scannerc.go @@ -9,7 +9,7 @@ import ( // ************ // // The following notes assume that you are familiar with the YAML specification -// (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in +// (http://yaml.org/spec/1.2/spec.html). We mostly follow it, although in // some cases we are less restrictive that it requires. // // The process of transforming a YAML stream into a sequence of events is @@ -611,7 +611,7 @@ func yaml_parser_set_scanner_tag_error(parser *yaml_parser_t, directive bool, co if directive { context = "while parsing a %TAG directive" } - return yaml_parser_set_scanner_error(parser, context, context_mark, "did not find URI escaped octet") + return yaml_parser_set_scanner_error(parser, context, context_mark, problem) } func trace(args ...interface{}) func() { @@ -626,30 +626,17 @@ func trace(args ...interface{}) func() { func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { // While we need more tokens to fetch, do it. for { - // Check if we really need to fetch more tokens. - need_more_tokens := false - - if parser.tokens_head == len(parser.tokens) { - // Queue is empty. - need_more_tokens = true - } else { - // Check if any potential simple key may occupy the head position. - if !yaml_parser_stale_simple_keys(parser) { + if parser.tokens_head != len(parser.tokens) { + // If queue is non-empty, check if any potential simple key may + // occupy the head position. + head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] + if !ok { + break + } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { return false + } else if !valid { + break } - - for i := range parser.simple_keys { - simple_key := &parser.simple_keys[i] - if simple_key.possible && simple_key.token_number == parser.tokens_parsed { - need_more_tokens = true - break - } - } - } - - // We are finished. - if !need_more_tokens { - break } // Fetch the next token. if !yaml_parser_fetch_next_token(parser) { @@ -678,11 +665,6 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { return false } - // Remove obsolete potential simple keys. - if !yaml_parser_stale_simple_keys(parser) { - return false - } - // Check the indentation level against the current column. if !yaml_parser_unroll_indent(parser, parser.mark.column) { return false @@ -837,29 +819,30 @@ func yaml_parser_fetch_next_token(parser *yaml_parser_t) bool { "found character that cannot start any token") } -// Check the list of potential simple keys and remove the positions that -// cannot contain simple keys anymore. -func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { - // Check for a potential simple key for each flow level. - for i := range parser.simple_keys { - simple_key := &parser.simple_keys[i] - - // The specification requires that a simple key - // - // - is limited to a single line, - // - is shorter than 1024 characters. - if simple_key.possible && (simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index) { - - // Check if the potential simple key to be removed is required. - if simple_key.required { - return yaml_parser_set_scanner_error(parser, - "while scanning a simple key", simple_key.mark, - "could not find expected ':'") - } - simple_key.possible = false +func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key_t) (valid, ok bool) { + if !simple_key.possible { + return false, true + } + + // The 1.2 specification says: + // + // "If the ? indicator is omitted, parsing needs to see past the + // implicit key to recognize it as such. To limit the amount of + // lookahead required, the “:” indicator must appear at most 1024 + // Unicode characters beyond the start of the key. In addition, the key + // is restricted to a single line." + // + if simple_key.mark.line < parser.mark.line || simple_key.mark.index+1024 < parser.mark.index { + // Check if the potential simple key to be removed is required. + if simple_key.required { + return false, yaml_parser_set_scanner_error(parser, + "while scanning a simple key", simple_key.mark, + "could not find expected ':'") } + simple_key.possible = false + return false, true } - return true + return true, true } // Check if a simple key may start at the current position and add it if @@ -871,12 +854,6 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { required := parser.flow_level == 0 && parser.indent == parser.mark.column - // A simple key is required only when it is the first token in the current - // line. Therefore it is always allowed. But we add a check anyway. - if required && !parser.simple_key_allowed { - panic("should not happen") - } - // // If the current position may start a simple key, save it. // @@ -885,13 +862,14 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { possible: true, required: required, token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, } - simple_key.mark = parser.mark if !yaml_parser_remove_simple_key(parser) { return false } parser.simple_keys[len(parser.simple_keys)-1] = simple_key + parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 } return true } @@ -906,19 +884,33 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { "while scanning a simple key", parser.simple_keys[i].mark, "could not find expected ':'") } + // Remove the key from the stack. + parser.simple_keys[i].possible = false + delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) } - // Remove the key from the stack. - parser.simple_keys[i].possible = false return true } +// max_flow_level limits the flow_level +const max_flow_level = 10000 + // Increase the flow level and resize the simple key list if needed. func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { // Reset the simple key on the next level. - parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{ + possible: false, + required: false, + token_number: parser.tokens_parsed + (len(parser.tokens) - parser.tokens_head), + mark: parser.mark, + }) // Increase the flow level. parser.flow_level++ + if parser.flow_level > max_flow_level { + return yaml_parser_set_scanner_error(parser, + "while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_flow_level)) + } return true } @@ -926,11 +918,16 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { if parser.flow_level > 0 { parser.flow_level-- - parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1] + last := len(parser.simple_keys) - 1 + delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) + parser.simple_keys = parser.simple_keys[:last] } return true } +// max_indents limits the indents stack size +const max_indents = 10000 + // Push the current indentation level to the stack and set the new level // the current column is greater than the indentation level. In this case, // append or insert the specified token into the token queue. @@ -945,6 +942,11 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml // indentation level. parser.indents = append(parser.indents, parser.indent) parser.indent = column + if len(parser.indents) > max_indents { + return yaml_parser_set_scanner_error(parser, + "while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark, + fmt.Sprintf("exceeded max depth of %d", max_indents)) + } // Create a token and insert it into the queue. token := yaml_token_t{ @@ -995,6 +997,8 @@ func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { // Initialize the simple key stack. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + parser.simple_keys_by_tok = make(map[int]int) + // A simple key is allowed at the beginning of the stream. parser.simple_key_allowed = true @@ -1276,7 +1280,11 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool { simple_key := &parser.simple_keys[len(parser.simple_keys)-1] // Have we found a simple key? - if simple_key.possible { + if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { + return false + + } else if valid { + // Create the KEY token and insert it into the queue. token := yaml_token_t{ typ: yaml_KEY_TOKEN, @@ -1294,6 +1302,7 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool { // Remove the simple key. simple_key.possible = false + delete(parser.simple_keys_by_tok, simple_key.token_number) // A simple key cannot follow another simple key. parser.simple_key_allowed = false @@ -1491,11 +1500,11 @@ func yaml_parser_scan_to_next_token(parser *yaml_parser_t) bool { // Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token. // // Scope: -// %YAML 1.1 # a comment \n -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -// %TAG !yaml! tag:yaml.org,2002: \n -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // +// %YAML 1.1 # a comment \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool { // Eat '%'. start_mark := parser.mark @@ -1592,11 +1601,11 @@ func yaml_parser_scan_directive(parser *yaml_parser_t, token *yaml_token_t) bool // Scan the directive name. // // Scope: -// %YAML 1.1 # a comment \n -// ^^^^ -// %TAG !yaml! tag:yaml.org,2002: \n -// ^^^ // +// %YAML 1.1 # a comment \n +// ^^^^ +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^ func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark_t, name *[]byte) bool { // Consume the directive name. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { @@ -1631,8 +1640,9 @@ func yaml_parser_scan_directive_name(parser *yaml_parser_t, start_mark yaml_mark // Scan the value of VERSION-DIRECTIVE. // // Scope: -// %YAML 1.1 # a comment \n -// ^^^^^^ +// +// %YAML 1.1 # a comment \n +// ^^^^^^ func yaml_parser_scan_version_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, major, minor *int8) bool { // Eat whitespaces. if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { @@ -1670,10 +1680,11 @@ const max_number_length = 2 // Scan the version number of VERSION-DIRECTIVE. // // Scope: -// %YAML 1.1 # a comment \n -// ^ -// %YAML 1.1 # a comment \n -// ^ +// +// %YAML 1.1 # a comment \n +// ^ +// %YAML 1.1 # a comment \n +// ^ func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark yaml_mark_t, number *int8) bool { // Repeat while the next character is digit. @@ -1707,9 +1718,9 @@ func yaml_parser_scan_version_directive_number(parser *yaml_parser_t, start_mark // Scan the value of a TAG-DIRECTIVE token. // // Scope: -// %TAG !yaml! tag:yaml.org,2002: \n -// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ // +// %TAG !yaml! tag:yaml.org,2002: \n +// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ func yaml_parser_scan_tag_directive_value(parser *yaml_parser_t, start_mark yaml_mark_t, handle, prefix *[]byte) bool { var handle_value, prefix_value []byte @@ -1944,7 +1955,7 @@ func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_ma } else { // It's either the '!' tag or not really a tag handle. If it's a %TAG // directive, it's an error. If it's a tag token, it must be a part of URI. - if directive && !(s[0] == '!' && s[1] == 0) { + if directive && string(s) != "!" { yaml_parser_set_scanner_tag_error(parser, directive, start_mark, "did not find expected '!'") return false @@ -1959,6 +1970,7 @@ func yaml_parser_scan_tag_handle(parser *yaml_parser_t, directive bool, start_ma func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte, start_mark yaml_mark_t, uri *[]byte) bool { //size_t length = head ? strlen((char *)head) : 0 var s []byte + hasTag := len(head) > 0 // Copy the head if needed. // @@ -2000,10 +2012,10 @@ func yaml_parser_scan_tag_uri(parser *yaml_parser_t, directive bool, head []byte if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { return false } + hasTag = true } - // Check if the tag is non-empty. - if len(s) == 0 { + if !hasTag { yaml_parser_set_scanner_tag_error(parser, directive, start_mark, "did not find expected tag URI") return false @@ -2474,6 +2486,10 @@ func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, si } } + if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { + return false + } + // Check if we are at the end of the scalar. if single { if parser.buffer[parser.buffer_pos] == '\'' { @@ -2486,10 +2502,6 @@ func yaml_parser_scan_flow_scalar(parser *yaml_parser_t, token *yaml_token_t, si } // Consume blank characters. - if parser.unread < 1 && !yaml_parser_update_buffer(parser, 1) { - return false - } - for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { if is_blank(parser.buffer, parser.buffer_pos) { // Consume a space or a tab character. @@ -2591,19 +2603,10 @@ func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) b // Consume non-blank characters. for !is_blankz(parser.buffer, parser.buffer_pos) { - // Check for 'x:x' in the flow context. TODO: Fix the test "spec-08-13". - if parser.flow_level > 0 && - parser.buffer[parser.buffer_pos] == ':' && - !is_blankz(parser.buffer, parser.buffer_pos+1) { - yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", - start_mark, "found unexpected ':'") - return false - } - // Check for indicators that may end a plain scalar. if (parser.buffer[parser.buffer_pos] == ':' && is_blankz(parser.buffer, parser.buffer_pos+1)) || (parser.flow_level > 0 && - (parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == ':' || + (parser.buffer[parser.buffer_pos] == ',' || parser.buffer[parser.buffer_pos] == '?' || parser.buffer[parser.buffer_pos] == '[' || parser.buffer[parser.buffer_pos] == ']' || parser.buffer[parser.buffer_pos] == '{' || parser.buffer[parser.buffer_pos] == '}')) { @@ -2655,10 +2658,10 @@ func yaml_parser_scan_plain_scalar(parser *yaml_parser_t, token *yaml_token_t) b for is_blank(parser.buffer, parser.buffer_pos) || is_break(parser.buffer, parser.buffer_pos) { if is_blank(parser.buffer, parser.buffer_pos) { - // Check for tab character that abuse indentation. + // Check for tab characters that abuse indentation. if leading_blanks && parser.mark.column < indent && is_tab(parser.buffer, parser.buffer_pos) { yaml_parser_set_scanner_error(parser, "while scanning a plain scalar", - start_mark, "found a tab character that violate indentation") + start_mark, "found a tab character that violates indentation") return false } diff --git a/sorter.go b/sorter.go index 5958822f..4c45e660 100644 --- a/sorter.go +++ b/sorter.go @@ -51,6 +51,15 @@ func (l keyList) Less(i, j int) bool { } var ai, bi int var an, bn int64 + if ar[i] == '0' || br[i] == '0' { + for j := i-1; j >= 0 && unicode.IsDigit(ar[j]); j-- { + if ar[j] != '0' { + an = 1 + bn = 1 + break + } + } + } for ai = i; ai < len(ar) && unicode.IsDigit(ar[ai]); ai++ { an = an*10 + int64(ar[ai]-'0') } diff --git a/writerc.go b/writerc.go index 190362f2..a2dde608 100644 --- a/writerc.go +++ b/writerc.go @@ -18,72 +18,9 @@ func yaml_emitter_flush(emitter *yaml_emitter_t) bool { return true } - // If the output encoding is UTF-8, we don't need to recode the buffer. - if emitter.encoding == yaml_UTF8_ENCODING { - if err := emitter.write_handler(emitter, emitter.buffer[:emitter.buffer_pos]); err != nil { - return yaml_emitter_set_writer_error(emitter, "write error: "+err.Error()) - } - emitter.buffer_pos = 0 - return true - } - - // Recode the buffer into the raw buffer. - var low, high int - if emitter.encoding == yaml_UTF16LE_ENCODING { - low, high = 0, 1 - } else { - high, low = 1, 0 - } - - pos := 0 - for pos < emitter.buffer_pos { - // See the "reader.c" code for more details on UTF-8 encoding. Note - // that we assume that the buffer contains a valid UTF-8 sequence. - - // Read the next UTF-8 character. - octet := emitter.buffer[pos] - - var w int - var value rune - switch { - case octet&0x80 == 0x00: - w, value = 1, rune(octet&0x7F) - case octet&0xE0 == 0xC0: - w, value = 2, rune(octet&0x1F) - case octet&0xF0 == 0xE0: - w, value = 3, rune(octet&0x0F) - case octet&0xF8 == 0xF0: - w, value = 4, rune(octet&0x07) - } - for k := 1; k < w; k++ { - octet = emitter.buffer[pos+k] - value = (value << 6) + (rune(octet) & 0x3F) - } - pos += w - - // Write the character. - if value < 0x10000 { - var b [2]byte - b[high] = byte(value >> 8) - b[low] = byte(value & 0xFF) - emitter.raw_buffer = append(emitter.raw_buffer, b[0], b[1]) - } else { - // Write the character using a surrogate pair (check "reader.c"). - var b [4]byte - value -= 0x10000 - b[high] = byte(0xD8 + (value >> 18)) - b[low] = byte((value >> 10) & 0xFF) - b[high+2] = byte(0xDC + ((value >> 8) & 0xFF)) - b[low+2] = byte(value & 0xFF) - emitter.raw_buffer = append(emitter.raw_buffer, b[0], b[1], b[2], b[3]) - } - } - - // Write the raw buffer. - if err := emitter.write_handler(emitter, emitter.raw_buffer); err != nil { + if err := emitter.write_handler(emitter, emitter.buffer[:emitter.buffer_pos]); err != nil { return yaml_emitter_set_writer_error(emitter, "write error: "+err.Error()) } emitter.buffer_pos = 0 - emitter.raw_buffer = emitter.raw_buffer[:0] return true } diff --git a/yaml.go b/yaml.go index 36d6b883..5248e126 100644 --- a/yaml.go +++ b/yaml.go @@ -2,13 +2,14 @@ // // Source code and other details for the project are available at GitHub: // -// https://github.com/go-yaml/yaml +// https://github.com/yaml/go-yaml // package yaml import ( "errors" "fmt" + "io" "reflect" "strings" "sync" @@ -77,8 +78,65 @@ type Marshaler interface { // supported tag options. // func Unmarshal(in []byte, out interface{}) (err error) { + return unmarshal(in, out, false) +} + +// UnmarshalStrict is like Unmarshal except that any fields that are found +// in the data that do not have corresponding struct members, or mapping +// keys that are duplicates, will result in +// an error. +func UnmarshalStrict(in []byte, out interface{}) (err error) { + return unmarshal(in, out, true) +} + +// A Decoder reads and decodes YAML values from an input stream. +type Decoder struct { + strict bool + parser *parser +} + +// NewDecoder returns a new decoder that reads from r. +// +// The decoder introduces its own buffering and may read +// data from r beyond the YAML values requested. +func NewDecoder(r io.Reader) *Decoder { + return &Decoder{ + parser: newParserFromReader(r), + } +} + +// SetStrict sets whether strict decoding behaviour is enabled when +// decoding items in the data (see UnmarshalStrict). By default, decoding is not strict. +func (dec *Decoder) SetStrict(strict bool) { + dec.strict = strict +} + +// Decode reads the next YAML-encoded value from its input +// and stores it in the value pointed to by v. +// +// See the documentation for Unmarshal for details about the +// conversion of YAML into a Go value. +func (dec *Decoder) Decode(v interface{}) (err error) { + d := newDecoder(dec.strict) defer handleErr(&err) - d := newDecoder() + node := dec.parser.parse() + if node == nil { + return io.EOF + } + out := reflect.ValueOf(v) + if out.Kind() == reflect.Ptr && !out.IsNil() { + out = out.Elem() + } + d.unmarshal(node, out) + if len(d.terrors) > 0 { + return &TypeError{d.terrors} + } + return nil +} + +func unmarshal(in []byte, out interface{}, strict bool) (err error) { + defer handleErr(&err) + d := newDecoder(strict) p := newParser(in) defer p.destroy() node := p.parse() @@ -99,8 +157,8 @@ func Unmarshal(in []byte, out interface{}) (err error) { // of the generated document will reflect the structure of the value itself. // Maps and pointers (to struct, string, int, etc) are accepted as the in value. // -// Struct fields are only unmarshalled if they are exported (have an upper case -// first letter), and are unmarshalled using the field name lowercased as the +// Struct fields are only marshalled if they are exported (have an upper case +// first letter), and are marshalled using the field name lowercased as the // default key. Custom keys may be defined via the "yaml" name in the field // tag: the content preceding the first comma is used as the key, and the // following comma-separated options are used to tweak the marshalling process. @@ -114,7 +172,10 @@ func Unmarshal(in []byte, out interface{}) (err error) { // // omitempty Only include the field if it's not set to the zero // value for the type or to empty slices or maps. -// Does not apply to zero valued structs. +// Zero valued structs will be omitted if all their public +// fields are zero, unless they implement an IsZero +// method (see the IsZeroer interface type), in which +// case the field will be excluded if IsZero returns true. // // flow Marshal using a flow style (useful for structs, // sequences and maps). @@ -129,7 +190,7 @@ func Unmarshal(in []byte, out interface{}) (err error) { // For example: // // type T struct { -// F int "a,omitempty" +// F int `yaml:"a,omitempty"` // B int // } // yaml.Marshal(&T{B: 2}) // Returns "b: 2\n" @@ -139,12 +200,47 @@ func Marshal(in interface{}) (out []byte, err error) { defer handleErr(&err) e := newEncoder() defer e.destroy() - e.marshal("", reflect.ValueOf(in)) + e.marshalDoc("", reflect.ValueOf(in)) e.finish() out = e.out return } +// An Encoder writes YAML values to an output stream. +type Encoder struct { + encoder *encoder +} + +// NewEncoder returns a new encoder that writes to w. +// The Encoder should be closed after use to flush all data +// to w. +func NewEncoder(w io.Writer) *Encoder { + return &Encoder{ + encoder: newEncoderWithWriter(w), + } +} + +// Encode writes the YAML encoding of v to the stream. +// If multiple items are encoded to the stream, the +// second and subsequent document will be preceded +// with a "---" document separator, but the first will not. +// +// See the documentation for Marshal for details about the conversion of Go +// values to YAML. +func (e *Encoder) Encode(v interface{}) (err error) { + defer handleErr(&err) + e.encoder.marshalDoc("", reflect.ValueOf(v)) + return nil +} + +// Close closes the encoder by writing any remaining data. +// It does not write a stream terminating string "...". +func (e *Encoder) Close() (err error) { + defer handleErr(&err) + e.encoder.finish() + return nil +} + func handleErr(err *error) { if v := recover(); v != nil { if e, ok := v.(yamlError); ok { @@ -200,6 +296,9 @@ type fieldInfo struct { Num int OmitEmpty bool Flow bool + // Id holds the unique field identifier, so we can cheaply + // check for field duplicates without maintaining an extra map. + Id int // Inline holds the field index if the field is part of an inlined struct. Inline []int @@ -279,6 +378,7 @@ func getStructInfo(st reflect.Type) (*structInfo, error) { } else { finfo.Inline = append([]int{i}, finfo.Inline...) } + finfo.Id = len(fieldsList) fieldsMap[finfo.Key] = finfo fieldsList = append(fieldsList, finfo) } @@ -300,11 +400,16 @@ func getStructInfo(st reflect.Type) (*structInfo, error) { return nil, errors.New(msg) } + info.Id = len(fieldsList) fieldsList = append(fieldsList, info) fieldsMap[info.Key] = info } - sinfo = &structInfo{fieldsMap, fieldsList, inlineMap} + sinfo = &structInfo{ + FieldsMap: fieldsMap, + FieldsList: fieldsList, + InlineMap: inlineMap, + } fieldMapMutex.Lock() structMap[st] = sinfo @@ -312,8 +417,23 @@ func getStructInfo(st reflect.Type) (*structInfo, error) { return sinfo, nil } +// IsZeroer is used to check whether an object is zero to +// determine whether it should be omitted when marshaling +// with the omitempty flag. One notable implementation +// is time.Time. +type IsZeroer interface { + IsZero() bool +} + func isZero(v reflect.Value) bool { - switch v.Kind() { + kind := v.Kind() + if z, ok := v.Interface().(IsZeroer); ok { + if (kind == reflect.Ptr || kind == reflect.Interface) && v.IsNil() { + return true + } + return z.IsZero() + } + switch kind { case reflect.String: return len(v.String()) == 0 case reflect.Interface, reflect.Ptr: @@ -344,3 +464,15 @@ func isZero(v reflect.Value) bool { } return false } + +// FutureLineWrap globally disables line wrapping when encoding long strings. +// This is a temporary and thus deprecated method introduced to faciliate +// migration towards v3, which offers more control of line lengths on +// individual encodings, and has a default matching the behavior introduced +// by this function. +// +// The default formatting of v2 was erroneously changed in v2.3.0 and reverted +// in v2.4.0, at which point this function was introduced to help migration. +func FutureLineWrap() { + disableLineWrapping = true +} diff --git a/yamlh.go b/yamlh.go index d60a6b6b..f6a9c8e3 100644 --- a/yamlh.go +++ b/yamlh.go @@ -1,6 +1,7 @@ package yaml import ( + "fmt" "io" ) @@ -239,6 +240,27 @@ const ( yaml_MAPPING_END_EVENT // A MAPPING-END event. ) +var eventStrings = []string{ + yaml_NO_EVENT: "none", + yaml_STREAM_START_EVENT: "stream start", + yaml_STREAM_END_EVENT: "stream end", + yaml_DOCUMENT_START_EVENT: "document start", + yaml_DOCUMENT_END_EVENT: "document end", + yaml_ALIAS_EVENT: "alias", + yaml_SCALAR_EVENT: "scalar", + yaml_SEQUENCE_START_EVENT: "sequence start", + yaml_SEQUENCE_END_EVENT: "sequence end", + yaml_MAPPING_START_EVENT: "mapping start", + yaml_MAPPING_END_EVENT: "mapping end", +} + +func (e yaml_event_type_t) String() string { + if e < 0 || int(e) >= len(eventStrings) { + return fmt.Sprintf("unknown event %d", e) + } + return eventStrings[e] +} + // The event structure. type yaml_event_t struct { @@ -508,7 +530,7 @@ type yaml_parser_t struct { problem string // Error description. - // The byte about which the problem occured. + // The byte about which the problem occurred. problem_offset int problem_value int problem_mark yaml_mark_t @@ -521,9 +543,9 @@ type yaml_parser_t struct { read_handler yaml_read_handler_t // Read handler. - input_file io.Reader // File input data. - input []byte // String input data. - input_pos int + input_reader io.Reader // File input data. + input []byte // String input data. + input_pos int eof bool // EOF flag @@ -557,6 +579,7 @@ type yaml_parser_t struct { simple_key_allowed bool // May a simple key occur at the current position? simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number // Parser stuff @@ -632,7 +655,7 @@ type yaml_emitter_t struct { write_handler yaml_write_handler_t // Write handler. output_buffer *[]byte // String output data. - output_file io.Writer // File output data. + output_writer io.Writer // File output data. buffer []byte // The working buffer. buffer_pos int // The current position of the buffer.