diff --git a/src/wasm-lib/kcl/src/parsing/parser.rs b/src/wasm-lib/kcl/src/parsing/parser.rs index 76dd46b789..d9c870814b 100644 --- a/src/wasm-lib/kcl/src/parsing/parser.rs +++ b/src/wasm-lib/kcl/src/parsing/parser.rs @@ -4,17 +4,17 @@ use std::{cell::RefCell, collections::BTreeMap}; use winnow::{ - combinator::{alt, delimited, opt, peek, preceded, repeat, separated, separated_pair, terminated}, + combinator::{alt, delimited, opt, peek, preceded, repeat, repeat_till, separated, separated_pair, terminated}, dispatch, error::{ErrMode, StrContext, StrContextValue}, prelude::*, stream::Stream, - token::{any, one_of, take_till}, + token::{any, none_of, one_of, take_till}, }; use super::{ ast::types::{ImportPath, LabelledExpression}, - token::NumericSuffix, + token::{NumericSuffix, RESERVED_WORDS}, }; use crate::{ docs::StdLibFn, @@ -742,21 +742,58 @@ pub(crate) fn array_elem_by_elem(i: &mut TokenSlice) -> PResult = peek(repeat_till( + 0.., + none_of(|token: Token| { + // bail out early if we encounter something that is for sure not allowed in an + // array, otherwise we could seek to find a closing bracket until the end of the + // file + RESERVED_WORDS + .keys() + .chain([",,", "{", "}", "["].iter()) + .any(|word| *word == token.value) + }) + .void(), + one_of(|term: Token| term.value == "]"), + )) + .parse_next(i); + let has_closing_bracket = maybe_closing_bracket.is_ok(); + if has_closing_bracket { + let start_range = i.as_source_range(); + // safe to unwrap here because we checked it was Ok above + let end_range = maybe_closing_bracket.unwrap().1.as_source_range(); + let e = ContextError { + context: vec![], + cause: Some(CompilationError::fatal( + SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]), + "Unexpected character encountered. You might be missing a comma in between elements.", + )), + }; + return Err(ErrMode::Cut(e)); + } + } + let end = maybe_end?.end; // Sort the array's elements (i.e. expression nodes) from the noncode nodes. let (elements, non_code_nodes): (Vec<_>, BTreeMap) = elements.into_iter().enumerate().fold( @@ -815,7 +852,7 @@ fn array_end_start(i: &mut TokenSlice) -> PResult> { } fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult> { - let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?; + let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height = 4', 'height' is the property key")).parse_next(i)?; ignore_whitespace(i); Ok(Node { start: key.start, @@ -842,7 +879,7 @@ fn object_property(i: &mut TokenSlice) -> PResult> { ignore_whitespace(i); let expr = expression_but_not_ascription .context(expected( - "the value which you're setting the property to, e.g. in 'height: 4', the value is 4", + "the value which you're setting the property to, e.g. in 'height = 4', the value is 4", )) .parse_next(i)?; @@ -876,7 +913,7 @@ fn property_separator(i: &mut TokenSlice) -> PResult<()> { alt(( // Normally you need a comma. comma_sep, - // But, if the array is ending, no need for a comma. + // But, if the object is ending, no need for a comma. peek(preceded(opt(whitespace), close_brace)).void(), )) .parse_next(i) @@ -910,10 +947,62 @@ pub(crate) fn object(i: &mut TokenSlice) -> PResult> { )), ) .context(expected( - "a comma-separated list of key-value pairs, e.g. 'height: 4, width: 3'", + "a comma-separated list of key-value pairs, e.g. 'height = 4, width = 3'", )) .parse_next(i)?; + ignore_trailing_comma(i); + ignore_whitespace(i); + let maybe_end = close_brace(i).map_err(|e| { + if let Some(mut err) = e.clone().into_inner() { + let start_range = open.as_source_range(); + let end_range = i.as_source_range(); + err.cause = Some(CompilationError::fatal( + SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]), + "Encountered an unexpected character(s) before finding a closing brace(`}`) for the object", + )); + ErrMode::Cut(err) + } else { + // ErrMode::Incomplete, not sure if it's actually possible to end up with this here + e + } + }); + if maybe_end.is_err() { + // if there is a closing brace at some point, but it wasn't the next token, it's likely that they forgot a comma between some + // of the properties + let maybe_closing_brace: PResult<((), Token)> = peek(repeat_till( + 0.., + none_of(|token: Token| { + // bail out early if we encounter something that is for sure not allowed in an + // object, otherwise we could seek to find a closing brace until the end of the + // file + RESERVED_WORDS + .keys() + .chain([",,", "[", "]", "{"].iter()) + .any(|word| *word == token.value) + }) + .void(), + one_of(|c: Token| c.value == "}"), + )) + .parse_next(i); + let has_closing_brace = maybe_closing_brace.is_ok(); + if has_closing_brace { + let start_range = i.as_source_range(); + // okay to unwrap here because we checked it was Ok above + let end_range = maybe_closing_brace.unwrap().1.as_source_range(); + + let e = ContextError { + context: vec![], + cause: Some(CompilationError::fatal( + SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]), + "Unexpected character encountered. You might be missing a comma in between properties.", + )), + }; + return Err(ErrMode::Cut(e)); + } + } + + let end = maybe_end?.end; // Sort the object's properties from the noncode nodes. let (properties, non_code_nodes): (Vec<_>, BTreeMap) = properties.into_iter().enumerate().fold( (Vec::new(), BTreeMap::new()), @@ -929,9 +1018,7 @@ pub(crate) fn object(i: &mut TokenSlice) -> PResult> { (properties, non_code_nodes) }, ); - ignore_trailing_comma(i); - ignore_whitespace(i); - let end = close_brace(i)?.end; + let non_code_meta = NonCodeMeta { non_code_nodes, ..Default::default() @@ -3803,7 +3890,7 @@ mySk1 = startSketchAt([0, 0])"#; assert_eq!( src_expected, src_actual, - "expected error would highlight {} but it actually highlighted {}", + "expected error would highlight `{}` but it actually highlighted `{}`", &p[src_expected[0]..src_expected[1]], &p[src_actual[0]..src_actual[1]], ); @@ -3994,7 +4081,11 @@ z(-[["#, #[test] fn test_parse_weird_lots_of_fancy_brackets() { - assert_err(r#"zz({{{{{{{{)iegAng{{{{{{{##"#, "Unexpected token: (", [2, 3]); + assert_err( + r#"zz({{{{{{{{)iegAng{{{{{{{##"#, + "Encountered an unexpected character(s) before finding a closing brace(`}`) for the object", + [3, 4], + ); } #[test] @@ -4547,10 +4638,123 @@ let myBox = box([0,0], -3, -16, -10) } #[test] - fn test_parse_missing_closing_bracket() { + fn test_parse_array_missing_closing_bracket() { let some_program_string = r#" sketch001 = startSketchOn('XZ') |> startProfileAt([90.45, 119.09, %)"#; - assert_err(some_program_string, "Array is missing a closing bracket(`]`)", [51, 52]); + assert_err( + some_program_string, + "Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array", + [51, 67], + ); + } + #[test] + fn test_parse_array_missing_comma() { + let some_program_string = r#" +sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 119.09], %)"#; + assert_err( + some_program_string, + "Unexpected character encountered. You might be missing a comma in between elements.", + [52, 65], + ); + } + #[test] + fn test_parse_array_reserved_word_early_exit() { + // since there is an early exit if encountering a reserved word, the error should be about + // that and not the missing comma + let some_program_string = r#" +sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 $sketch], %)"#; + assert_err( + some_program_string, + "Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array", + [51, 52], + ); + } + #[test] + fn test_parse_array_random_brace() { + let some_program_string = r#" +sketch001 = startSketchOn('XZ') |> startProfileAt([}], %)"#; + assert_err( + some_program_string, + "Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array", + [51, 52], + ); + } + + #[test] + fn test_parse_object_missing_closing_brace() { + let some_program_string = r#"{ + foo = bar,"#; + + assert_err( + some_program_string, + "Encountered an unexpected character(s) before finding a closing brace(`}`) for the object", + [0, 23], + ); + } + #[test] + fn test_parse_object_reserved_word_early_exit() { + // since there is an early exit if encountering a reserved word, the error should be about + // that and not the missing comma + let some_program_string = r#"{bar = foo sketch = man}"#; + + assert_err( + some_program_string, + "Encountered an unexpected character(s) before finding a closing brace(`}`) for the object", + [0, 1], + ); + } + #[test] + fn test_parse_object_missing_comma() { + let some_program_string = r#"{ + foo = bar, + bar = foo + bat = man + }"#; + + assert_err( + some_program_string, + "Unexpected character encountered. You might be missing a comma in between properties.", + [37, 78], + ); + } + + #[test] + fn test_parse_object_missing_comma_one_line() { + let some_program_string = r#"{bar = foo bat = man}"#; + + assert_err( + some_program_string, + "Unexpected character encountered. You might be missing a comma in between properties.", + [1, 21], + ); + } + + #[test] + fn test_parse_object_random_bracket() { + let some_program_string = r#"{]}"#; + + assert_err( + some_program_string, + "Encountered an unexpected character(s) before finding a closing brace(`}`) for the object", + [0, 1], + ); + } + + #[test] + fn test_parse_object_shorthand_missing_comma() { + let some_program_string = r#" +bar = 1 + { + foo = bar, + bar + bat = man + }"#; + + assert_err( + some_program_string, + "Unexpected character encountered. You might be missing a comma in between properties.", + [54, 89], + ); } #[test] diff --git a/src/wasm-lib/kcl/src/parsing/token/mod.rs b/src/wasm-lib/kcl/src/parsing/token/mod.rs index 154b5e0ebb..3c26bee067 100644 --- a/src/wasm-lib/kcl/src/parsing/token/mod.rs +++ b/src/wasm-lib/kcl/src/parsing/token/mod.rs @@ -24,7 +24,6 @@ use crate::{ mod tokeniser; -#[cfg(test)] pub(crate) use tokeniser::RESERVED_WORDS; // Note the ordering, it's important that `m` comes after `mm` and `cm`. @@ -162,7 +161,9 @@ impl IntoIterator for TokenStream { #[derive(Debug, Clone)] pub(crate) struct TokenSlice<'a> { stream: &'a TokenStream, + /// Current position of the leading Token in the stream start: usize, + /// The number of total Tokens in the stream end: usize, } @@ -190,6 +191,21 @@ impl<'a> TokenSlice<'a> { stream: self.stream, } } + + pub fn as_source_range(&self) -> SourceRange { + let stream_len = self.stream.tokens.len(); + let first_token = if stream_len == self.start { + &self.stream.tokens[self.start - 1] + } else { + self.token(0) + }; + let last_token = if stream_len == self.end { + &self.stream.tokens[stream_len - 1] + } else { + self.token(self.end - self.start) + }; + SourceRange::new(first_token.start, last_token.end, last_token.module_id) + } } impl<'a> IntoIterator for TokenSlice<'a> { @@ -294,6 +310,14 @@ impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> { } } +impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> { + fn find_slice(&self, substr: &str) -> Option> { + self.iter() + .enumerate() + .find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None }) + } +} + #[derive(Clone, Debug)] pub struct Checkpoint(usize, usize);