-
Couldn't load subscription status.
- Fork 1.1k
SIP-72: dedented triple-quoted string literals #24185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 18 commits
9eb87a2
5109265
ab9a589
00f04b8
40f397f
48680cc
3a36a0f
b181814
7e8e5a7
c9fbf70
aa18b7e
17205d9
300f300
5c8c892
b687fcc
3ea3e7e
b1613c7
2fd9e0e
f83defe
ac4c475
4f2c7f4
606e37a
02f9cf2
8862764
3aefb59
c288c38
68e1742
062b3ef
3ea6416
049b6cb
f4a507f
3475c58
c22a2b1
b4a134b
c96428f
9ef2e0b
b753383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1349,6 +1349,93 @@ object Parsers { | |
| else | ||
| literal(inTypeOrSingleton = true) | ||
|
|
||
| /** Dedent a string literal by removing common leading whitespace. | ||
| * The amount of whitespace to remove is determined by the indentation | ||
| * of the last line (which should contain only whitespace before the | ||
| * closing delimiter). | ||
| * | ||
| * @param str The string content to dedent | ||
| * @param offset The source offset where the string literal begins | ||
| * @return The dedented string, or str if errors were reported | ||
lihaoyi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| */ | ||
| private def dedentString(str: String, | ||
| offset: Offset, | ||
| closingIndent: String, | ||
| isFirstPart: Boolean, | ||
| isLastPart: Boolean): String = { | ||
|
|
||
| if (closingIndent == "") str | ||
| else { | ||
| // Check for mixed tabs and spaces in closing indent | ||
|
|
||
| val hasTabs = closingIndent.contains('\t') | ||
| val hasSpaces = closingIndent.contains(' ') | ||
| if (hasTabs && hasSpaces) { | ||
|
||
| syntaxError( | ||
| em"dedented string literal cannot mix tabs and spaces in indentation", | ||
| offset | ||
| ) | ||
| return str | ||
| } | ||
|
|
||
| // Split into lines | ||
| val linesAndWithSeps = (str.linesIterator.zip(str.linesWithSeparators)).toSeq | ||
|
|
||
| var lineOffset = offset | ||
|
|
||
| def dedentLine(line: String, lineWithSep: String) = { | ||
| val result = | ||
| if (line.startsWith(closingIndent)) line.substring(closingIndent.length) | ||
| else if (line.trim.isEmpty) "" // Empty or whitespace-only lines | ||
| else { | ||
| // Check if this line has mixed tabs/spaces that don't match closing indent | ||
| val lineIndent = line.takeWhile(_.isWhitespace) | ||
| val lineHasTabs = lineIndent.contains('\t') | ||
| val lineHasSpaces = lineIndent.contains(' ') | ||
| if ((hasTabs && lineHasSpaces && !lineHasTabs) || (hasSpaces && lineHasTabs && !lineHasSpaces)) { | ||
| syntaxError( | ||
| em"dedented string literal cannot mix tabs and spaces in indentation", | ||
| offset | ||
| ) | ||
| } else { | ||
| syntaxError( | ||
| em"line in dedented string literal must be indented at least as much as the closing delimiter", | ||
| lineOffset | ||
| ) | ||
| } | ||
| line | ||
| } | ||
| lineOffset += lineWithSep.length // Make sure to include any \n, \r, \r\n, or \n\r | ||
| result | ||
| } | ||
|
|
||
| // If this is the first part of a string, then the first line is the empty string following | ||
| // the opening `'''` delimiter, so we skip it. If not, then the first line is immediately | ||
| // following an interpolated value, and should be used raw without indenting | ||
| val firstLine = | ||
| if (isFirstPart) Nil | ||
| else { | ||
| val (line, lineWithSep) = linesAndWithSeps.head | ||
| lineOffset += lineWithSep.length | ||
| Seq(line) | ||
| } | ||
|
|
||
| // Process all lines except the first and last, which require special handling | ||
| val dedented = linesAndWithSeps.drop(1).dropRight(1).map { case (line, lineWithSep) => | ||
| dedentLine(line, lineWithSep) | ||
| } | ||
|
|
||
| // If this is the last part of the string, then the last line is the indentation-only | ||
| // line preceding the closing delimiter, and should be ignored. If not, then the last line | ||
| // also needs to be de-dented | ||
| val lastLine = | ||
| if (isLastPart) Nil | ||
| else Seq(dedentLine(linesAndWithSeps.last._1, linesAndWithSeps.last._2)) | ||
|
|
||
| (firstLine ++ dedented ++ lastLine).mkString("\n") | ||
| } | ||
| } | ||
|
|
||
| /** Literal ::= SimpleLiteral | ||
| * | processedStringLiteral | ||
| * | symbolLiteral | ||
|
|
@@ -1357,7 +1444,10 @@ object Parsers { | |
| * @param negOffset The offset of a preceding `-' sign, if any. | ||
| * If the literal is not negated, negOffset == in.offset. | ||
| */ | ||
| def literal(negOffset: Int = in.offset, inPattern: Boolean = false, inTypeOrSingleton: Boolean = false, inStringInterpolation: Boolean = false): Tree = { | ||
| def literal(negOffset: Int = in.offset, | ||
| inPattern: Boolean = false, | ||
| inTypeOrSingleton: Boolean = false, | ||
| inStringInterpolation: Boolean = false): Tree = { | ||
| def literalOf(token: Token): Tree = { | ||
| val isNegated = negOffset < in.offset | ||
| def digits0 = in.removeNumberSeparators(in.strVal) | ||
|
|
@@ -1377,7 +1467,13 @@ object Parsers { | |
| case FLOATLIT => floatFromDigits(digits) | ||
| case DOUBLELIT | DECILIT | EXPOLIT => doubleFromDigits(digits) | ||
| case CHARLIT => in.strVal.head | ||
| case STRINGLIT | STRINGPART => in.strVal | ||
| case STRINGLIT | STRINGPART => | ||
| // Check if this is a dedented string (non-interpolated) | ||
| // For non-interpolated dedented strings, check if the token starts with ''' | ||
| val str = in.strVal | ||
| if (token == STRINGLIT && !inStringInterpolation && isDedentedStringLiteral(negOffset)) { | ||
| dedentString(str, negOffset, extractClosingIndent(str, negOffset), true, true) | ||
| } else str | ||
| case TRUE => true | ||
| case FALSE => false | ||
| case NULL => null | ||
|
|
@@ -1391,6 +1487,15 @@ object Parsers { | |
| Literal(Constant(value)) | ||
| } | ||
|
|
||
| /** Check if a string literal at the given offset is a dedented string */ | ||
| def isDedentedStringLiteral(offset: Int): Boolean = { | ||
| val buf = in.buf | ||
| offset + 2 < buf.length && | ||
| buf(offset) == '\'' && | ||
| buf(offset + 1) == '\'' && | ||
| buf(offset + 2) == '\'' | ||
| } | ||
|
|
||
| if (inStringInterpolation) { | ||
| val t = in.token match { | ||
| case STRINGLIT | STRINGPART => | ||
|
|
@@ -1447,40 +1552,109 @@ object Parsers { | |
| in.charOffset + 1 < in.buf.length && | ||
| in.buf(in.charOffset) == '"' && | ||
| in.buf(in.charOffset + 1) == '"' | ||
| val isDedented = | ||
| in.charOffset + 2 < in.buf.length && | ||
| in.buf(in.charOffset - 1) == '\'' && | ||
lihaoyi marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| in.buf(in.charOffset) == '\'' && | ||
| in.buf(in.charOffset + 1) == '\'' | ||
| in.nextToken() | ||
| def nextSegment(literalOffset: Offset) = | ||
| segmentBuf += Thicket( | ||
| literal(literalOffset, inPattern = inPattern, inStringInterpolation = true), | ||
| atSpan(in.offset) { | ||
| if (in.token == IDENTIFIER) | ||
| termIdent() | ||
| else if (in.token == USCORE && inPattern) { | ||
| in.nextToken() | ||
| Ident(nme.WILDCARD) | ||
| } | ||
| else if (in.token == THIS) { | ||
| in.nextToken() | ||
| This(EmptyTypeIdent) | ||
| } | ||
| else if (in.token == LBRACE) | ||
| if (inPattern) Block(Nil, inBraces(pattern())) | ||
| else expr() | ||
| else { | ||
| report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) | ||
| EmptyTree | ||
| } | ||
| }) | ||
|
|
||
| var offsetCorrection = if isTripleQuoted then 3 else 1 | ||
| while (in.token == STRINGPART) | ||
| nextSegment(in.offset + offsetCorrection) | ||
| // Collect all string parts and their offsets | ||
| val stringParts = new ListBuffer[(String, Offset)] | ||
| val interpolatedExprs = new ListBuffer[Tree] | ||
|
|
||
| var offsetCorrection = if (isDedented) 3 else if (isTripleQuoted) 3 else 1 | ||
|
||
| while (in.token == STRINGPART) { | ||
| val literalOffset = in.offset + offsetCorrection | ||
| stringParts += ((in.strVal, literalOffset)) | ||
| offsetCorrection = 0 | ||
| if (in.token == STRINGLIT) | ||
| segmentBuf += literal(inPattern = inPattern, negOffset = in.offset + offsetCorrection, inStringInterpolation = true) | ||
| in.nextToken() | ||
|
|
||
| // Collect the interpolated expression | ||
| interpolatedExprs += atSpan(in.offset) { | ||
| if (in.token == IDENTIFIER) | ||
| termIdent() | ||
| else if (in.token == USCORE && inPattern) { | ||
| in.nextToken() | ||
| Ident(nme.WILDCARD) | ||
| } | ||
| else if (in.token == THIS) { | ||
| in.nextToken() | ||
| This(EmptyTypeIdent) | ||
| } | ||
| else if (in.token == LBRACE) | ||
| if (inPattern) Block(Nil, inBraces(pattern())) | ||
| else expr() | ||
| else { | ||
| report.error(InterpolatedStringError(), source.atSpan(Span(in.offset))) | ||
| EmptyTree | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Get the final STRINGLIT | ||
| val finalLiteral = if (in.token == STRINGLIT) { | ||
| val s = in.strVal | ||
| val off = in.offset + offsetCorrection | ||
| stringParts += ((s, off)) | ||
| in.nextToken() | ||
| true | ||
| } else false | ||
|
|
||
| val dedentedParts = | ||
| if (!isDedented || stringParts.isEmpty) stringParts | ||
| else { | ||
| val lastPart = stringParts.last._1 | ||
| val closingIndent = extractClosingIndent(lastPart, in.offset) | ||
| stringParts.zipWithIndex.map { case ((str, offset), index) => | ||
| val dedented = dedentString(str, in.offset, closingIndent, index == 0, index == stringParts.length - 1) | ||
| (dedented, offset) | ||
| } | ||
| } | ||
|
|
||
| // Build the segments with dedented strings | ||
| for ((str, expr) <- dedentedParts.zip(interpolatedExprs)) { | ||
| val (dedentedStr, offset) = str | ||
| segmentBuf += Thicket( | ||
| atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) }, | ||
| expr | ||
| ) | ||
| } | ||
|
|
||
| // Add the final literal if present | ||
| if (finalLiteral) { | ||
| val (dedentedStr, offset) = dedentedParts.last | ||
| segmentBuf += atSpan(offset, offset, offset + dedentedStr.length) { Literal(Constant(dedentedStr)) } | ||
| } | ||
|
|
||
| InterpolatedString(interpolator, segmentBuf.toList) | ||
| } | ||
|
|
||
| /** Extract the closing indentation from the last line of a string */ | ||
| private def extractClosingIndent(str: String, offset: Offset): String = { | ||
| // If the last line is empty, `linesIterator` and `linesWithSeparators` skips | ||
| // the empty string, so we must recognize that case and explicitly default to "" | ||
| // otherwise things will blow up | ||
| val closingIndent = str | ||
| .linesIterator | ||
| .zip(str.linesWithSeparators) | ||
| .toSeq | ||
| .lastOption | ||
| .filter((line, lineWithSep) => line == lineWithSep) | ||
| .map(_._1) | ||
| .getOrElse("") | ||
|
|
||
| if (closingIndent.exists(!_.isWhitespace)) { | ||
| syntaxError( | ||
| em"last line of dedented string literal must contain only whitespace before closing delimiter", | ||
| offset | ||
| ) | ||
| return str | ||
| } | ||
|
|
||
| closingIndent | ||
| } | ||
|
|
||
| /* ------------- NEW LINES ------------------------------------------------- */ | ||
|
|
||
| def newLineOpt(): Unit = | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For new code in the compiler we use indentation syntax and new conditional
if / then / elsesyntax. The old Java conditional syntax is already disabled under-language.future.