Skip to content

fix: add better errors for missing commas in arrays and objects #5210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
260 changes: 232 additions & 28 deletions rust/kcl-lib/src/parsing/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
use std::{cell::RefCell, collections::BTreeMap};

use winnow::{
combinator::{alt, delimited, opt, peek, preceded, repeat, separated, separated_pair, terminated},
combinator::{alt, delimited, opt, peek, preceded, repeat, repeat_till, separated, separated_pair, terminated},
dispatch,
error::{ErrMode, StrContext, StrContextValue},
prelude::*,
stream::Stream,
token::{any, one_of, take_till},
token::{any, none_of, one_of, take_till},
};

use super::{
ast::types::{Ascription, ImportPath, LabelledExpression},
token::NumericSuffix,
token::{NumericSuffix, RESERVED_WORDS},
};
use crate::{
docs::StdLibFn,
Expand Down Expand Up @@ -746,21 +746,58 @@
)
.context(expected("array contents, a list of elements (like [1, 2, 3])"))
.parse_next(i)?;
ignore_trailing_comma(i);
ignore_whitespace(i);
let end = close_bracket(i)
.map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
err.cause = Some(CompilationError::fatal(
open.as_source_range(),
"Array is missing a closing bracket(`]`)",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e
}
})?
.end;

let maybe_end = close_bracket(i).map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
let start_range = open.as_source_range();
let end_range = i.as_source_range();
err.cause = Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]),
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e

Check warning on line 763 in rust/kcl-lib/src/parsing/parser.rs

View check run for this annotation

Codecov / codecov/patch

rust/kcl-lib/src/parsing/parser.rs#L763

Added line #L763 was not covered by tests
}
});

if maybe_end.is_err() {
// if there is a closing bracket at some point, but it wasn't the next token, it's likely that they forgot a comma between some
// of the elements
let maybe_closing_bracket: PResult<((), Token)> = peek(repeat_till(
0..,
none_of(|token: Token| {
// bail out early if we encounter something that is for sure not allowed in an
// array, otherwise we could seek to find a closing bracket until the end of the
// file
RESERVED_WORDS
.keys()
.chain([",,", "{", "}", "["].iter())
.any(|word| *word == token.value)
})
.void(),
one_of(|term: Token| term.value == "]"),
))
.parse_next(i);
let has_closing_bracket = maybe_closing_bracket.is_ok();
if has_closing_bracket {
let start_range = i.as_source_range();
// safe to unwrap here because we checked it was Ok above
let end_range = maybe_closing_bracket.unwrap().1.as_source_range();
let e = ContextError {
context: vec![],
cause: Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]),
"Unexpected character encountered. You might be missing a comma in between elements.",
)),
};
return Err(ErrMode::Cut(e));
}
}
let end = maybe_end?.end;

// Sort the array's elements (i.e. expression nodes) from the noncode nodes.
let (elements, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = elements.into_iter().enumerate().fold(
Expand Down Expand Up @@ -819,7 +856,7 @@
}

fn object_property_same_key_and_val(i: &mut TokenSlice) -> PResult<Node<ObjectProperty>> {
let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height: 4', 'height' is the property key")).parse_next(i)?;
let key = nameable_identifier.context(expected("the property's key (the name or identifier of the property), e.g. in 'height = 4', 'height' is the property key")).parse_next(i)?;
ignore_whitespace(i);
Ok(Node {
start: key.start,
Expand All @@ -846,7 +883,7 @@
ignore_whitespace(i);
let expr = match expression
.context(expected(
"the value which you're setting the property to, e.g. in 'height: 4', the value is 4",
"the value which you're setting the property to, e.g. in 'height = 4', the value is 4",
))
.parse_next(i)
{
Expand Down Expand Up @@ -892,7 +929,7 @@
alt((
// Normally you need a comma.
comma_sep,
// But, if the array is ending, no need for a comma.
// But, if the object is ending, no need for a comma.
peek(preceded(opt(whitespace), close_brace)).void(),
))
.parse_next(i)
Expand Down Expand Up @@ -926,10 +963,62 @@
)),
)
.context(expected(
"a comma-separated list of key-value pairs, e.g. 'height: 4, width: 3'",
"a comma-separated list of key-value pairs, e.g. 'height = 4, width = 3'",
))
.parse_next(i)?;
ignore_trailing_comma(i);
ignore_whitespace(i);

let maybe_end = close_brace(i).map_err(|e| {
if let Some(mut err) = e.clone().into_inner() {
let start_range = open.as_source_range();
let end_range = i.as_source_range();
err.cause = Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.start(), end_range.module_id().as_usize()]),
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
));
ErrMode::Cut(err)
} else {
// ErrMode::Incomplete, not sure if it's actually possible to end up with this here
e

Check warning on line 983 in rust/kcl-lib/src/parsing/parser.rs

View check run for this annotation

Codecov / codecov/patch

rust/kcl-lib/src/parsing/parser.rs#L983

Added line #L983 was not covered by tests
}
});
if maybe_end.is_err() {
// if there is a closing brace at some point, but it wasn't the next token, it's likely that they forgot a comma between some
// of the properties
let maybe_closing_brace: PResult<((), Token)> = peek(repeat_till(
0..,
none_of(|token: Token| {
// bail out early if we encounter something that is for sure not allowed in an
// object, otherwise we could seek to find a closing brace until the end of the
// file
RESERVED_WORDS
.keys()
.chain([",,", "[", "]", "{"].iter())
.any(|word| *word == token.value)
})
.void(),
one_of(|c: Token| c.value == "}"),
))
.parse_next(i);
let has_closing_brace = maybe_closing_brace.is_ok();
if has_closing_brace {
let start_range = i.as_source_range();
// okay to unwrap here because we checked it was Ok above
let end_range = maybe_closing_brace.unwrap().1.as_source_range();

let e = ContextError {
context: vec![],
cause: Some(CompilationError::fatal(
SourceRange::from([start_range.start(), end_range.end(), end_range.module_id().as_usize()]),
"Unexpected character encountered. You might be missing a comma in between properties.",
)),
};
return Err(ErrMode::Cut(e));
}
}

let end = maybe_end?.end;
// Sort the object's properties from the noncode nodes.
let (properties, non_code_nodes): (Vec<_>, BTreeMap<usize, _>) = properties.into_iter().enumerate().fold(
(Vec::new(), BTreeMap::new()),
Expand All @@ -945,9 +1034,7 @@
(properties, non_code_nodes)
},
);
ignore_trailing_comma(i);
ignore_whitespace(i);
let end = close_brace(i)?.end;

let non_code_meta = NonCodeMeta {
non_code_nodes,
..Default::default()
Expand Down Expand Up @@ -3869,7 +3956,7 @@
assert_eq!(
src_expected,
src_actual,
"expected error would highlight {} but it actually highlighted {}",
"expected error would highlight `{}` but it actually highlighted `{}`",
&p[src_expected[0]..src_expected[1]],
&p[src_actual[0]..src_actual[1]],
);
Expand Down Expand Up @@ -4060,7 +4147,11 @@

#[test]
fn test_parse_weird_lots_of_fancy_brackets() {
assert_err(r#"zz({{{{{{{{)iegAng{{{{{{{##"#, "Unexpected token: (", [2, 3]);
assert_err(
r#"zz({{{{{{{{)iegAng{{{{{{{##"#,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[3, 4],
);
}

#[test]
Expand Down Expand Up @@ -4601,10 +4692,123 @@
}

#[test]
fn test_parse_missing_closing_bracket() {
fn test_parse_array_missing_closing_bracket() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45, 119.09, %)"#;
assert_err(some_program_string, "Array is missing a closing bracket(`]`)", [51, 52]);
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 67],
);
}
#[test]
fn test_parse_array_missing_comma() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 119.09], %)"#;
assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between elements.",
[52, 65],
);
}
#[test]
fn test_parse_array_reserved_word_early_exit() {
// since there is an early exit if encountering a reserved word, the error should be about
// that and not the missing comma
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([90.45 $struct], %)"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 52],
);
}
#[test]
fn test_parse_array_random_brace() {
let some_program_string = r#"
sketch001 = startSketchOn('XZ') |> startProfileAt([}], %)"#;
assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing bracket(`]`) for the array",
[51, 52],
);
}

#[test]
fn test_parse_object_missing_closing_brace() {
let some_program_string = r#"{
foo = bar,"#;

assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 23],
);
}
#[test]
fn test_parse_object_reserved_word_early_exit() {
// since there is an early exit if encountering a reserved word, the error should be about
// that and not the missing comma
let some_program_string = r#"{bar = foo struct = man}"#;

assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 1],
);
}
#[test]
fn test_parse_object_missing_comma() {
let some_program_string = r#"{
foo = bar,
bar = foo
bat = man
}"#;

assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[37, 78],
);
}

#[test]
fn test_parse_object_missing_comma_one_line() {
let some_program_string = r#"{bar = foo bat = man}"#;

assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[1, 21],
);
}

#[test]
fn test_parse_object_random_bracket() {
let some_program_string = r#"{]}"#;

assert_err(
some_program_string,
"Encountered an unexpected character(s) before finding a closing brace(`}`) for the object",
[0, 1],
);
}

#[test]
fn test_parse_object_shorthand_missing_comma() {
let some_program_string = r#"
bar = 1
{
foo = bar,
bar
bat = man
}"#;

assert_err(
some_program_string,
"Unexpected character encountered. You might be missing a comma in between properties.",
[54, 89],
);
}

#[test]
Expand Down
26 changes: 25 additions & 1 deletion rust/kcl-lib/src/parsing/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

mod tokeniser;

#[cfg(test)]
pub(crate) use tokeniser::RESERVED_WORDS;

// Note the ordering, it's important that `m` comes after `mm` and `cm`.
Expand Down Expand Up @@ -162,7 +161,9 @@
#[derive(Debug, Clone)]
pub(crate) struct TokenSlice<'a> {
stream: &'a TokenStream,
/// Current position of the leading Token in the stream
start: usize,
/// The number of total Tokens in the stream
end: usize,
}

Expand Down Expand Up @@ -190,6 +191,21 @@
stream: self.stream,
}
}

pub fn as_source_range(&self) -> SourceRange {
let stream_len = self.stream.tokens.len();
let first_token = if stream_len == self.start {
&self.stream.tokens[self.start - 1]
} else {
self.token(0)
};
let last_token = if stream_len == self.end {
&self.stream.tokens[stream_len - 1]
} else {
self.token(self.end - self.start)

Check warning on line 205 in rust/kcl-lib/src/parsing/token/mod.rs

View check run for this annotation

Codecov / codecov/patch

rust/kcl-lib/src/parsing/token/mod.rs#L205

Added line #L205 was not covered by tests
};
SourceRange::new(first_token.start, last_token.end, last_token.module_id)
}
}

impl<'a> IntoIterator for TokenSlice<'a> {
Expand Down Expand Up @@ -294,6 +310,14 @@
}
}

impl<'a> winnow::stream::FindSlice<&str> for TokenSlice<'a> {
fn find_slice(&self, substr: &str) -> Option<std::ops::Range<usize>> {
self.iter()
.enumerate()
.find_map(|(i, b)| if b.value == substr { Some(i..self.end) } else { None })
}

Check warning on line 318 in rust/kcl-lib/src/parsing/token/mod.rs

View check run for this annotation

Codecov / codecov/patch

rust/kcl-lib/src/parsing/token/mod.rs#L314-L318

Added lines #L314 - L318 were not covered by tests
}

#[derive(Clone, Debug)]
pub struct Checkpoint(usize, usize);

Expand Down
Loading