Skip to content

Commit 92178c0

Browse files
authored
Merge pull request swiftlang#1463 from hamishknight/lets-go-back-to-the-island
Complete regex literal lexing
2 parents ffcbc91 + b2a2e75 commit 92178c0

36 files changed

+5810
-375
lines changed

CodeGeneration/Sources/SyntaxSupport/Classification.swift

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public let SYNTAX_CLASSIFICATIONS: [SyntaxClassification] = [
5858
SyntaxClassification(name: "ObjectLiteral", description: "An image, color, etc. literal."),
5959
SyntaxClassification(name: "OperatorIdentifier", description: "An identifier referring to an operator."),
6060
SyntaxClassification(name: "PoundDirectiveKeyword", description: "A `#` keyword like `#warning`."),
61+
SyntaxClassification(name: "RegexLiteral", description: "A regex literal, including multiline regex literals."),
6162
SyntaxClassification(name: "StringInterpolationAnchor", description: "The opening and closing parenthesis of string interpolation."),
6263
SyntaxClassification(name: "StringLiteral", description: "A string literal including multiline string literals."),
6364
SyntaxClassification(name: "TypeIdentifier", description: "An identifier referring to a type."),

CodeGeneration/Sources/SyntaxSupport/ExprNodes.swift

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1199,9 +1199,27 @@ public let EXPR_NODES: [Node] = [
11991199
kind: "Expr",
12001200
children: [
12011201
Child(
1202-
name: "Regex",
1203-
kind: .token(choices: [.token(tokenKind: "RegexLiteralToken")])
1204-
)
1202+
name: "OpeningPounds",
1203+
kind: .token(choices: [.token(tokenKind: "ExtendedRegexDelimiterToken")]),
1204+
isOptional: true
1205+
),
1206+
Child(
1207+
name: "OpenSlash",
1208+
kind: .token(choices: [.token(tokenKind: "RegexSlashToken")])
1209+
),
1210+
Child(
1211+
name: "RegexPattern",
1212+
kind: .token(choices: [.token(tokenKind: "RegexLiteralPatternToken")])
1213+
),
1214+
Child(
1215+
name: "CloseSlash",
1216+
kind: .token(choices: [.token(tokenKind: "RegexSlashToken")])
1217+
),
1218+
Child(
1219+
name: "ClosingPounds",
1220+
kind: .token(choices: [.token(tokenKind: "ExtendedRegexDelimiterToken")]),
1221+
isOptional: true
1222+
),
12051223
]
12061224
),
12071225

CodeGeneration/Sources/SyntaxSupport/TokenSpec.swift

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
178178
PunctuatorSpec(name: "Ellipsis", kind: "ellipsis", text: "..."),
179179
PunctuatorSpec(name: "Equal", kind: "equal", text: "=", requiresLeadingSpace: true, requiresTrailingSpace: true),
180180
PunctuatorSpec(name: "ExclamationMark", kind: "exclaim_postfix", text: "!"),
181+
MiscSpec(name: "ExtendedRegexDelimiter", kind: "extended_regex_delimiter", nameForDiagnostics: "extended delimiter", classification: "RegexLiteral"),
181182
LiteralSpec(name: "FloatingLiteral", kind: "floating_literal", nameForDiagnostics: "floating literal", classification: "FloatingLiteral"),
182183
MiscSpec(name: "Identifier", kind: "identifier", nameForDiagnostics: "identifier", classification: "Identifier"),
183184
PunctuatorSpec(name: "InfixQuestionMark", kind: "question_infix", text: "?"),
@@ -202,7 +203,8 @@ public let SYNTAX_TOKENS: [TokenSpec] = [
202203
PunctuatorSpec(name: "PrefixAmpersand", kind: "amp_prefix", text: "&"),
203204
MiscSpec(name: "PrefixOperator", kind: "oper_prefix", nameForDiagnostics: "prefix operator", classification: "OperatorIdentifier"),
204205
MiscSpec(name: "RawStringDelimiter", kind: "raw_string_delimiter", nameForDiagnostics: "raw string delimiter"),
205-
LiteralSpec(name: "RegexLiteral", kind: "regex_literal", nameForDiagnostics: "regex literal"),
206+
MiscSpec(name: "RegexLiteralPattern", kind: "regex_literal_pattern", nameForDiagnostics: "regex pattern", classification: "RegexLiteral"),
207+
PunctuatorSpec(name: "RegexSlash", kind: "regex_slash", text: "/", classification: "RegexLiteral"),
206208
PunctuatorSpec(name: "RightAngle", kind: "r_angle", text: ">"),
207209
PunctuatorSpec(name: "RightBrace", kind: "r_brace", text: "}"),
208210
PunctuatorSpec(name: "RightParen", kind: "r_paren", text: ")"),

Sources/IDEUtils/generated/SyntaxClassification.swift

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ public enum SyntaxClassification {
4747
case operatorIdentifier
4848
/// A `#` keyword like `#warning`.
4949
case poundDirectiveKeyword
50+
/// A regex literal, including multiline regex literals.
51+
case regexLiteral
5052
/// The opening and closing parenthesis of string interpolation.
5153
case stringInterpolationAnchor
5254
/// A string literal including multiline string literals.
@@ -138,6 +140,8 @@ extension RawTokenKind {
138140
return .none
139141
case .exclamationMark:
140142
return .none
143+
case .extendedRegexDelimiter:
144+
return .regexLiteral
141145
case .floatingLiteral:
142146
return .floatingLiteral
143147
case .identifier:
@@ -186,8 +190,10 @@ extension RawTokenKind {
186190
return .operatorIdentifier
187191
case .rawStringDelimiter:
188192
return .none
189-
case .regexLiteral:
190-
return .none
193+
case .regexLiteralPattern:
194+
return .regexLiteral
195+
case .regexSlash:
196+
return .regexLiteral
191197
case .rightAngle:
192198
return .none
193199
case .rightBrace:

Sources/SwiftParser/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ add_swift_host_library(SwiftParser
4242
Lexer/Lexeme.swift
4343
Lexer/LexemeSequence.swift
4444
Lexer/Lexer.swift
45+
Lexer/RegexLiteralLexer.swift
4546
Lexer/UnicodeScalarExtensions.swift
4647
)
4748

Sources/SwiftParser/Declarations.swift

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1315,28 +1315,6 @@ extension Parser {
13151315
}
13161316

13171317
extension Parser {
1318-
/// Are we at a regular expression literal that could act as an operator?
1319-
private mutating func atRegexLiteralThatCouldBeAnOperator() -> Bool {
1320-
guard self.at(.regexLiteral) else {
1321-
return false
1322-
}
1323-
1324-
/// Try to re-lex at regex literal as an operator. If it succeeds and
1325-
/// consumes the entire regex literal, we're done.
1326-
return self.currentToken.tokenText.withBuffer {
1327-
(buffer: UnsafeBufferPointer<UInt8>) -> Bool in
1328-
var cursor = Lexer.Cursor(input: buffer, previous: 0)
1329-
guard buffer[0] == UInt8(ascii: "/") else { return false }
1330-
switch cursor.lexOperatorIdentifier(sourceBufferStart: cursor).tokenKind {
1331-
case .unknown:
1332-
return false
1333-
1334-
default:
1335-
return cursor.input.isEmpty
1336-
}
1337-
}
1338-
}
1339-
13401318
@_spi(RawSyntax)
13411319
public mutating func parseFuncDeclaration(
13421320
_ attrs: DeclAttributes,
@@ -1345,7 +1323,7 @@ extension Parser {
13451323
let (unexpectedBeforeFuncKeyword, funcKeyword) = self.eat(handle)
13461324
let unexpectedBeforeIdentifier: RawUnexpectedNodesSyntax?
13471325
let identifier: RawTokenSyntax
1348-
if self.at(anyIn: Operator.self) != nil || self.at(.exclamationMark, .prefixAmpersand) || self.atRegexLiteralThatCouldBeAnOperator() {
1326+
if self.at(anyIn: Operator.self) != nil || self.at(.exclamationMark, .prefixAmpersand) {
13491327
var name = self.currentToken.tokenText
13501328
if name.count > 1 && name.hasSuffix("<") && self.peek().rawTokenKind == .identifier {
13511329
name = SyntaxText(rebasing: name.dropLast())

Sources/SwiftParser/Expressions.swift

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,7 +1170,7 @@ extension Parser {
11701170
)
11711171
case (.rawStringDelimiter, _)?, (.stringQuote, _)?, (.multilineStringQuote, _)?, (.singleQuote, _)?:
11721172
return RawExprSyntax(self.parseStringLiteral())
1173-
case (.regexLiteral, _)?:
1173+
case (.extendedRegexDelimiter, _)?, (.regexSlash, _)?:
11741174
return RawExprSyntax(self.parseRegexLiteral())
11751175
case (.nilKeyword, let handle)?:
11761176
let nilKeyword = self.eat(handle)
@@ -1433,13 +1433,37 @@ extension Parser {
14331433
/// Grammar
14341434
/// =======
14351435
///
1436-
/// regular-expression-literal → '\' `Any valid regular expression characters` '\'
1436+
/// regular-expression-literal → '#'* '/' `Any valid regular expression characters` '/' '#'*
14371437
@_spi(RawSyntax)
14381438
public mutating func parseRegexLiteral() -> RawRegexLiteralExprSyntax {
1439-
let (unexpectedBeforeLiteral, literal) = self.expect(.regexLiteral)
1439+
// See if we have an opening set of pounds.
1440+
let openPounds = self.consume(if: .extendedRegexDelimiter)
1441+
1442+
// Parse the opening slash.
1443+
let (unexpectedBeforeSlash, openSlash) = self.expect(.regexSlash)
1444+
1445+
// If we had opening pounds, there should be no trivia for the slash.
1446+
if let openPounds = openPounds {
1447+
precondition(openPounds.trailingTriviaByteLength == 0 && openSlash.leadingTriviaByteLength == 0)
1448+
}
1449+
1450+
// Parse the pattern and closing slash, avoiding recovery or leading trivia
1451+
// as the lexer should provide the tokens exactly in order without trivia,
1452+
// otherwise they should be treated as missing.
1453+
let pattern = self.expectWithoutRecoveryOrLeadingTrivia(.regexLiteralPattern)
1454+
let closeSlash = self.expectWithoutRecoveryOrLeadingTrivia(.regexSlash)
1455+
1456+
// Finally, parse a closing set of pounds.
1457+
let (unexpectedBeforeClosePounds, closePounds) = parsePoundDelimiter(.extendedRegexDelimiter, matching: openPounds)
1458+
14401459
return RawRegexLiteralExprSyntax(
1441-
unexpectedBeforeLiteral,
1442-
regex: literal,
1460+
openingPounds: openPounds,
1461+
unexpectedBeforeSlash,
1462+
openSlash: openSlash,
1463+
regexPattern: pattern,
1464+
closeSlash: closeSlash,
1465+
unexpectedBeforeClosePounds,
1466+
closingPounds: closePounds,
14431467
arena: self.arena
14441468
)
14451469
}

0 commit comments

Comments
 (0)