Skip to content

Commit ee95225

Browse files
committed
Better recover invalid regex in expression position
We can confidently lex unless we have a previous token that indicates that we're in an argument list. In such a case, an unapplied operator can be parsed, and we want to ensure we maintain that behavior.
1 parent c2b7ad2 commit ee95225

File tree

5 files changed

+255
-36
lines changed

5 files changed

+255
-36
lines changed

Sources/SwiftParser/Lexer/RegexLiteralLexer.swift

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ fileprivate struct RegexLiteralLexer {
232232
}
233233

234234
// Try to lex the opening delimiter.
235+
let openSlash = cursor
235236
guard cursor.advance(matching: "/") else {
236237
return .notARegex
237238
}
@@ -263,6 +264,10 @@ fileprivate struct RegexLiteralLexer {
263264
break
264265
}
265266
}
267+
if openSlash.previous == UInt8(ascii: "*") {
268+
// End of block comment, not a regex.
269+
return .notARegex
270+
}
266271
}
267272

268273
// If the delimiter allows multi-line, try skipping over any whitespace to a
@@ -719,15 +724,17 @@ extension Lexer.Cursor {
719724
if !mustBeRegex && !operatorStart.isInRegexLiteralPosition() {
720725
return nil
721726
}
722-
// For better recovery, we can confidently lex a regex literal if the
723-
// previous token was a binary operator and we're in a binary operator
724-
// position, as that would otherwise be illegal.
725-
// TODO: We could probably expand this to other token kinds (or rely on
726-
// `isInRegexLiteralPosition`) for better recovery, but we'd need to be
727-
// wary not to interfere with unapplied operator parsing, which are binary
728-
// operators in expression position. For now, this handles the most common
729-
// case.
730-
if self.previousTokenKind?.is(.binaryOperator, .equal) == true {
727+
// For better recovery, we can confidently lex a regex literal if we're in
728+
// regex literal position, and the '/' is part of what looks like a binary
729+
// operator. This would otherwise be illegal code, as binary operators
730+
// cannot appear in expression position. The only exception to this is if
731+
// the previous token indicates we're in an argument list, in which case
732+
// an unapplied operator is legal, and we should prefer to lex as that
733+
// instead.
734+
switch previousTokenKind {
735+
case .leftParen, .leftSquareBracket, .comma, .colon:
736+
break
737+
default:
731738
mustBeRegex = true
732739
}
733740
}

Tests/SwiftParserTest/DeclarationTests.swift

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -741,16 +741,15 @@ final class DeclarationTests: XCTestCase {
741741
func testExpressionMember() {
742742
assertParse(
743743
"""
744-
struct S {
745-
1️⃣/ 2️⃣#3️⃣#4️⃣#line 5️⃣25 "line-directive.swift"
746-
}
744+
struct S {1️⃣
745+
/2️⃣ ###line 25 "line-directive.swift"3️⃣
746+
4️⃣}
747747
""",
748748
diagnostics: [
749-
DiagnosticSpec(locationMarker: "1️⃣", message: "expected 'func' in function"),
750-
DiagnosticSpec(locationMarker: "2️⃣", message: "expected parameter clause in function signature"),
751-
DiagnosticSpec(locationMarker: "3️⃣", message: "expected identifier in macro expansion"),
752-
DiagnosticSpec(locationMarker: "4️⃣", message: "expected identifier in macro expansion"),
753-
DiagnosticSpec(locationMarker: "5️⃣", message: #"unexpected code '25 "line-directive.swift"' in struct"#),
749+
DiagnosticSpec(locationMarker: "1️⃣", message: "expected '}' to end struct"),
750+
DiagnosticSpec(locationMarker: "2️⃣", message: "bare slash regex literal may not start with space"),
751+
DiagnosticSpec(locationMarker: "3️⃣", message: "expected '/' to end regex literal"),
752+
DiagnosticSpec(locationMarker: "4️⃣", message: "extraneous brace at top level"),
754753
]
755754
)
756755
}

Tests/SwiftParserTest/RegexLiteralTests.swift

Lines changed: 223 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,10 @@ final class RegexLiteralTests: XCTestCase {
7272
func testUnterminated2() {
7373
assertParse(
7474
#"""
75-
1️⃣/
75+
/1️⃣
7676
"""#,
7777
diagnostics: [
78-
DiagnosticSpec(message: "extraneous code '/' at top level")
78+
DiagnosticSpec(message: "expected '/' to end regex literal")
7979
]
8080
)
8181
}
@@ -358,10 +358,10 @@ final class RegexLiteralTests: XCTestCase {
358358
func testOpeningSpace1() {
359359
assertParse(
360360
"""
361-
1️⃣/ a/
361+
/1️⃣ a/
362362
""",
363363
diagnostics: [
364-
DiagnosticSpec(message: "extraneous code '/ a/' at top level")
364+
DiagnosticSpec(message: "bare slash regex literal may not start with space")
365365
]
366366
)
367367
}
@@ -418,10 +418,10 @@ final class RegexLiteralTests: XCTestCase {
418418
func testOpeningAndClosingSpace1() {
419419
assertParse(
420420
"""
421-
1️⃣/ /
421+
/1️⃣ /
422422
""",
423423
diagnostics: [
424-
DiagnosticSpec(message: "extraneous code '/ /' at top level")
424+
DiagnosticSpec(message: "bare slash regex literal may not start with space")
425425
]
426426
)
427427
}
@@ -448,10 +448,10 @@ final class RegexLiteralTests: XCTestCase {
448448
func testOpeningAndClosingSpace4() {
449449
assertParse(
450450
"""
451-
1️⃣/ /
451+
/1️⃣ /
452452
""",
453453
diagnostics: [
454-
DiagnosticSpec(message: "extraneous code '/ /' at top level")
454+
DiagnosticSpec(message: "bare slash regex literal may not start with space")
455455
]
456456
)
457457
}
@@ -911,6 +911,221 @@ final class RegexLiteralTests: XCTestCase {
911911
)
912912
}
913913

914+
func testBinOpDisambiguation51() {
915+
// Unapplied operators, not regex.
916+
assertParse(
917+
"""
918+
foo(a: /, /)
919+
""",
920+
substructure: Syntax(
921+
TupleExprElementListSyntax([
922+
.init(
923+
label: "a",
924+
colon: .colonToken(),
925+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/")),
926+
trailingComma: .commaToken()
927+
),
928+
.init(
929+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
930+
),
931+
])
932+
)
933+
)
934+
}
935+
936+
func testBinOpDisambiguation52() {
937+
// Unapplied operators, not regex.
938+
assertParse(
939+
"""
940+
foo(a, /, /)
941+
""",
942+
substructure: Syntax(
943+
TupleExprElementListSyntax([
944+
.init(
945+
expression: IdentifierExprSyntax(identifier: "a"),
946+
trailingComma: .commaToken()
947+
),
948+
.init(
949+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/")),
950+
trailingComma: .commaToken()
951+
),
952+
.init(
953+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
954+
),
955+
])
956+
)
957+
)
958+
}
959+
960+
func testBinOpDisambiguation53() {
961+
// Unapplied operators, not regex.
962+
assertParse(
963+
"""
964+
foo(a, ^/, /)
965+
""",
966+
substructure: Syntax(
967+
TupleExprElementListSyntax([
968+
.init(
969+
expression: IdentifierExprSyntax(identifier: "a"),
970+
trailingComma: .commaToken()
971+
),
972+
.init(
973+
expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")),
974+
trailingComma: .commaToken()
975+
),
976+
.init(
977+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
978+
),
979+
])
980+
)
981+
)
982+
}
983+
984+
func testBinOpDisambiguation54() {
985+
// Unapplied operators, not regex.
986+
assertParse(
987+
"""
988+
foo(a: ^/, /)
989+
""",
990+
substructure: Syntax(
991+
TupleExprElementListSyntax([
992+
.init(
993+
label: "a",
994+
colon: .colonToken(),
995+
expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")),
996+
trailingComma: .commaToken()
997+
),
998+
.init(
999+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
1000+
),
1001+
])
1002+
)
1003+
)
1004+
}
1005+
1006+
func testBinOpDisambiguation55() {
1007+
// Unapplied operators, not regex.
1008+
assertParse(
1009+
"""
1010+
foo(^/, /)
1011+
""",
1012+
substructure: Syntax(
1013+
TupleExprElementListSyntax([
1014+
.init(
1015+
expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")),
1016+
trailingComma: .commaToken()
1017+
),
1018+
.init(
1019+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
1020+
),
1021+
])
1022+
)
1023+
)
1024+
}
1025+
1026+
func testBinOpDisambiguation56() {
1027+
// Unapplied operators, not regex.
1028+
assertParse(
1029+
"""
1030+
(^/, /)
1031+
""",
1032+
substructure: Syntax(
1033+
TupleExprElementListSyntax([
1034+
.init(
1035+
expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")),
1036+
trailingComma: .commaToken()
1037+
),
1038+
.init(
1039+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
1040+
),
1041+
])
1042+
)
1043+
)
1044+
}
1045+
1046+
func testBinOpDisambiguation57() {
1047+
// Unapplied operators, not regex.
1048+
assertParse(
1049+
"""
1050+
(/, /)
1051+
""",
1052+
substructure: Syntax(
1053+
TupleExprElementListSyntax([
1054+
.init(
1055+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/")),
1056+
trailingComma: .commaToken()
1057+
),
1058+
.init(
1059+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
1060+
),
1061+
])
1062+
)
1063+
)
1064+
}
1065+
1066+
func testBinOpDisambiguation58() {
1067+
// Unapplied operators, not regex.
1068+
assertParse(
1069+
"""
1070+
x[/, /]
1071+
""",
1072+
substructure: Syntax(
1073+
TupleExprElementListSyntax([
1074+
.init(
1075+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/")),
1076+
trailingComma: .commaToken()
1077+
),
1078+
.init(
1079+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
1080+
),
1081+
])
1082+
)
1083+
)
1084+
}
1085+
1086+
func testBinOpDisambiguation59() {
1087+
// Unapplied operators, not regex.
1088+
assertParse(
1089+
"""
1090+
x[^/, /]
1091+
""",
1092+
substructure: Syntax(
1093+
TupleExprElementListSyntax([
1094+
.init(
1095+
expression: IdentifierExprSyntax(identifier: .binaryOperator("^/")),
1096+
trailingComma: .commaToken()
1097+
),
1098+
.init(
1099+
expression: IdentifierExprSyntax(identifier: .binaryOperator("/"))
1100+
),
1101+
])
1102+
)
1103+
)
1104+
}
1105+
1106+
func testBinOpDisambiguation60() {
1107+
// Invalid. We can't confidently lex as a regex (as the lexer thinks it
1108+
// could be a subscript), so we get a parser error.
1109+
assertParse(
1110+
"""
1111+
[1️⃣/, /]
1112+
""",
1113+
diagnostics: [
1114+
DiagnosticSpec(message: "unexpected code '/, /' in array")
1115+
]
1116+
)
1117+
}
1118+
1119+
func testBinOpDisambiguation61() {
1120+
// Fine if there's no trailing space though.
1121+
assertParse(
1122+
"""
1123+
[/,/]
1124+
""",
1125+
substructure: Syntax(RegexLiteralExprSyntax(regexPattern: .regexLiteralPattern(",")))
1126+
)
1127+
}
1128+
9141129
func testPrefixOpSplitting1() {
9151130
assertParse(
9161131
"""

Tests/SwiftParserTest/translated/ForwardSlashRegexSkippingAllowedTests.swift

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,32 +31,32 @@ final class ForwardSlashRegexSkippingAllowedTests: XCTestCase {
3131
// Balanced `{}`, so okay.
3232
assertParse(
3333
"""
34-
func a() { 1️⃣/ {}/ }
34+
func a() { /1️⃣ {}/ }
3535
""",
3636
diagnostics: [
37-
DiagnosticSpec(message: "unexpected code '/ {}/' in function")
37+
DiagnosticSpec(message: "bare slash regex literal may not start with space")
3838
]
3939
)
4040
}
4141

4242
func testForwardSlashRegexSkippingAllowed5() {
4343
assertParse(
4444
#"""
45-
func b() { 1️⃣/ \{}/ }
45+
func b() { /1️⃣ \{}/ }
4646
"""#,
4747
diagnostics: [
48-
DiagnosticSpec(message: #"unexpected code '/ \{}/' in function"#)
48+
DiagnosticSpec(message: "bare slash regex literal may not start with space")
4949
]
5050
)
5151
}
5252

5353
func testForwardSlashRegexSkippingAllowed6() {
5454
assertParse(
5555
#"""
56-
func c() { 1️⃣/ {"{"}/ }
56+
func c() { /1️⃣ {"{"}/ }
5757
"""#,
5858
diagnostics: [
59-
DiagnosticSpec(message: #"unexpected code '/ {"{"}/' in function"#)
59+
DiagnosticSpec(message: "bare slash regex literal may not start with space")
6060
]
6161
)
6262
}

0 commit comments

Comments
 (0)