Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: add better errors for missing commas in arrays and objects #5079

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
260 changes: 232 additions & 28 deletions src/wasm-lib/kcl/src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
use std::{cell::RefCell, collections::BTreeMap};

use winnow::{
combinator::{alt, delimited, opt, peek, preceded, repeat, separated, separated_pair, terminated},
combinator::{alt, delimited, opt, peek, preceded, repeat, repeat_till, separated, separated_pair, terminated},
dispatch,
error::{ErrMode, StrContext, StrContextValue},
prelude::*,
stream::Stream,
token::{any, one_of, take_till},
token::{any, none_of, one_of, take_till},
};

use super::{
ast::types::{ImportPath, LabelledExpression},
token::NumericSuffix,
token::{NumericSuffix, RESERVED_WORDS},
};
use crate::{
docs::StdLibFn,
Expand Down Expand Up @@ -742,21 +742,58 @@ pub(crate) fn array_elem_by_elem(i: &mut TokenSlice) -> PResult<Node<ArrayExpres
)
.context(expected("array contents, a list of elements (like [1, 2, 3])"))
.parse_next(i)?;
ignore_trailing_comma(i);
ignore_whitespace(i);
let end = close_bracket(i)
.map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
err.cause = Some(CompilationError::fatal(
open.as_source_range(),
"Array is missing a closing bracket(`]`)",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e
}
})?
.end;

let maybe_end = close_bracket(i).map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
let start_range = open.as_source_range();
let end_range = i.as_source_range();
err.cause = Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]),
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e
}
});

if maybe_end.is_err() {
// if there is a closing bracket at some point, but it wasn't the next token, it's likely that they forgot a comma between some
// of the elements
let maybe_closing_bracket: PResult<((), Token)> = peek(repeat_till(
0..,
none_of(|token: Token| {
// bail out early if we encounter something that is for sure not allowed in an
// array, otherwise we could seek to find a closing bracket until the end of the
// file
RESERVED_WORDS
.keys()
.chain([",,", "{", "}", "["].iter())
.any(|word| *word == token.value)
})
.void(),
one_of(|term: Token| term.value == "]"),
))
.parse_next(i);
let has_closing_bracket = maybe_closing_bracket.is_ok();
if has_closing_bracket {
let start_range = i.as_source_range();
// safe to unwrap here because we checked it was Ok above
let end_range = maybe_closing_bracket.unwrap().1.as_source_range();
let e = ContextError {
context: vec![],
cause: Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]),
"Unexpected character encountered. You might be missing a comma in between elements.",
)),
};
return Err(ErrMode::Cut(e));
}
}
let end = maybe_end?.end;

// Sort the array's elements (i.e. expression nodes) from the noncode nodes.
let (elements, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = elements.into_iter().enumerate().fold(
Expand Down Expand Up @@ -815,7 +852,7 @@ fn array_end_start(i: &mut TokenSlice) -> PResult<Node<ArrayRangeExpression>> {
}

fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?;
let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height = 4', 'height' is the property key")).parse_next(i)?;
ignore_whitespace(i);
Ok(Node {
start: key.start,
Expand All @@ -842,7 +879,7 @@ fn object_property(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
ignore_whitespace(i);
let expr = expression_but_not_ascription
.context(expected(
"the value which you're setting the property to, e.g. in 'height: 4', the value is 4",
"the value which you're setting the property to, e.g. in 'height = 4', the value is 4",
))
.parse_next(i)?;

Expand Down Expand Up @@ -876,7 +913,7 @@ fn property_separator(i: &mut TokenSlice) -> PResult<()> {
alt((
// Normally you need a comma.
comma_sep,
// But, if the array is ending, no need for a comma.
// But, if the object is ending, no need for a comma.
peek(preceded(opt(whitespace), close_brace)).void(),
))
.parse_next(i)
Expand All @@ -899,10 +936,62 @@ pub(crate) fn object(i: &mut TokenSlice) -> PResult<Node<ObjectExpression>> {
)),
)
.context(expected(
"a comma-separated list of key-value pairs, e.g. 'height: 4, width: 3'",
"a comma-separated list of key-value pairs, e.g. 'height = 4, width = 3'",
))
.parse_next(i)?;
ignore_trailing_comma(i);
ignore_whitespace(i);

let maybe_end = close_brace(i).map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
let start_range = open.as_source_range();
let end_range = i.as_source_range();
err.cause = Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]),
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e
}
});
if maybe_end.is_err() {
// if there is a closing brace at some point, but it wasn't the next token, it's likely that they forgot a comma between some
// of the properties
let maybe_closing_brace: PResult<((), Token)> = peek(repeat_till(
0..,
none_of(|token: Token| {
// bail out early if we encounter something that is for sure not allowed in an
// object, otherwise we could seek to find a closing brace until the end of the
// file
RESERVED_WORDS
.keys()
.chain([",,", "[", "]", "{"].iter())
.any(|word| *word == token.value)
})
.void(),
one_of(|c: Token| c.value == "}"),
))
.parse_next(i);
let has_closing_brace = maybe_closing_brace.is_ok();
if has_closing_brace {
let start_range = i.as_source_range();
// okay to unwrap here because we checked it was Ok above
let end_range = maybe_closing_brace.unwrap().1.as_source_range();

let e = ContextError {
context: vec![],
cause: Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]),
"Unexpected character encountered. You might be missing a comma in between properties.",
)),
};
return Err(ErrMode::Cut(e));
}
}

let end = maybe_end?.end;
// Sort the object's properties from the noncode nodes.
let (properties, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = properties.into_iter().enumerate().fold(
(Vec::new(), BTreeMap::new()),
Expand All @@ -918,9 +1007,7 @@ pub(crate) fn object(i: &mut TokenSlice) -> PResult<Node<ObjectExpression>> {
(properties, non_code_nodes)
},
);
ignore_trailing_comma(i);
ignore_whitespace(i);
let end = close_brace(i)?.end;

let non_code_meta = NonCodeMeta {
non_code_nodes,
..Default::default()
Expand Down Expand Up @@ -3774,7 +3861,7 @@ mySk1 = startSketchAt([0, 0])"#;
assert_eq!(
src_expected,
src_actual,
"expected error would highlight {} but it actually highlighted {}",
"expected error would highlight `{}` but it actually highlighted `{}`",
&p[src_expected[0]..src_expected[1]],
&p[src_actual[0]..src_actual[1]],
);
Expand Down Expand Up @@ -3965,7 +4052,11 @@ z(-[["#,

#[test]
fn test_parse_weird_lots_of_fancy_brackets() {
assert_err(r#"zz({{{{{{{{)iegAng{{{{{{{##"#, "Unexpected token: (", [2, 3]);
assert_err(
r#"zz({{{{{{{{)iegAng{{{{{{{##"#,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[3, 4],
);
}

#[test]
Expand Down Expand Up @@ -4526,10 +4617,123 @@ let myBox = box([0,0], -3, -16, -10)
}

#[test]
fn test_parse_missing_closing_bracket() {
fn test_parse_array_missing_closing_bracket() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45, 119.09, %)"#;
assert_err(some_program_string, "Array is missing a closing bracket(`]`)", [51, 52]);
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 67],
);
}
#[test]
fn test_parse_array_missing_comma() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 119.09], %)"#;
assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between elements.",
[52, 65],
);
}
#[test]
fn test_parse_array_reserved_word_early_exit() {
// since there is an early exit if encountering a reserved word, the error should be about
// that and not the missing comma
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 $sketch], %)"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 52],
);
}
#[test]
fn test_parse_array_random_brace() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([}], %)"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 52],
);
}

#[test]
fn test_parse_object_missing_closing_brace() {
let some_program_string = r#"{
foo = bar,"#;

assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 23],
);
}
#[test]
fn test_parse_object_reserved_word_early_exit() {
// since there is an early exit if encountering a reserved word, the error should be about
// that and not the missing comma
let some_program_string = r#"{bar = foo sketch = man}"#;

assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 1],
);
}
#[test]
fn test_parse_object_missing_comma() {
let some_program_string = r#"{
foo = bar,
bar = foo
bat = man
}"#;

assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[37, 78],
);
}

#[test]
fn test_parse_object_missing_comma_one_line() {
let some_program_string = r#"{bar = foo bat = man}"#;

assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[1, 21],
);
}

#[test]
fn test_parse_object_random_bracket() {
let some_program_string = r#"{]}"#;

assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 1],
);
}

#[test]
fn test_parse_object_shorthand_missing_comma() {
let some_program_string = r#"
bar = 1
{
foo = bar,
bar
bat = man
}"#;

assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[54, 89],
);
}

#[test]
Expand Down
26 changes: 25 additions & 1 deletion src/wasm-lib/kcl/src/parsing/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use crate::{

mod tokeniser;

#[cfg(test)]
pub(crate) use tokeniser::RESERVED_WORDS;

// Note the ordering, it's important that `m` comes after `mm` and `cm`.
Expand Down Expand Up @@ -162,7 +161,9 @@ impl IntoIterator for TokenStream {
#[derive(Debug, Clone)]
pub(crate) struct TokenSlice<'a> {
stream: &'a TokenStream,
/// Current position of the leading Token in the stream
start: usize,
/// The number of total Tokens in the stream
end: usize,
}

Expand Down Expand Up @@ -190,6 +191,21 @@ impl<'a> TokenSlice<'a> {
stream: self.stream,
}
}

pub fn as_source_range(&self) -> SourceRange {
let stream_len = self.stream.tokens.len();
let first_token = if stream_len == self.start {
&self.stream.tokens[self.start - 1]
} else {
self.token(0)
};
let last_token = if stream_len == self.end {
&self.stream.tokens[stream_len - 1]
} else {
self.token(self.end - self.start)
};
SourceRange::new(first_token.start, last_token.end, last_token.module_id)
}
}

impl<'a> IntoIterator for TokenSlice<'a> {
Expand Down Expand Up @@ -294,6 +310,14 @@ impl<'a> winnow::stream::StreamIsPartial for TokenSlice<'a> {
}
}

impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had to implement this to use take_until. I wasn't sure if there was a better way to check and see if there was a closing bracket or not. Using take_till always returned Ok even if the token wasn't present

fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
self.iter()
.enumerate()
.find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
}
}

#[derive(Clone, Debug)]
pub struct Checkpoint(usize, usize);

Expand Down
Loading