Skip to content

lexer: Treat more floats with empty exponent as valid tokens #131656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 71 additions & 29 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -194,7 +194,7 @@ pub enum DocStyle {
pub enum LiteralKind {
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
Int { base: Base, empty_int: bool },
/// `12.34f32`, `1e3`, but not `1f32`.
/// `12.34f32`, `1e3` and `1e+`, but not `1f32` or `1em`.
Float { base: Base, empty_exponent: bool },
/// `'a'`, `'\\'`, `'''`, `';`
Char { terminated: bool },
@@ -409,8 +409,8 @@ impl Cursor<'_> {

// Numeric literal.
c @ '0'..='9' => {
let literal_kind = self.number(c);
let suffix_start = self.pos_within_token();
let (literal_kind, suffix_start) = self.number(c);
let suffix_start = suffix_start.unwrap_or(self.pos_within_token());
self.eat_literal_suffix();
TokenKind::Literal { kind: literal_kind, suffix_start }
}
@@ -606,7 +606,9 @@ impl Cursor<'_> {
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
/// Parses a number and in `.1` returns the offset of the literal suffix if
/// different from the current position on return.
fn number(&mut self, first_digit: char) -> (LiteralKind, Option<u32>) {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
if first_digit == '0' {
@@ -616,21 +618,21 @@ impl Cursor<'_> {
base = Base::Binary;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
'o' => {
base = Base::Octal;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
'x' => {
base = Base::Hexadecimal;
self.bump();
if !self.eat_hexadecimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
// Not a base prefix; consume additional digits.
@@ -642,40 +644,79 @@ impl Cursor<'_> {
'.' | 'e' | 'E' => {}

// Just a 0.
_ => return Int { base, empty_int: false },
_ => return (Int { base, empty_int: false }, None),
}
} else {
// No base prefix, parse number in the usual way.
self.eat_decimal_digits();
};

match self.first() {
match (self.first(), self.second()) {
// Don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
'.' if self.second() != '.' && !is_id_start(self.second()) => {
// might have stuff after the ., and if it does, it needs to start
// with a number
('.', second) if second != '.' && !is_id_start(second) => {
self.bump();
self.eat_decimal_digits();

let mut empty_exponent = false;
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
match self.first() {
'e' | 'E' => {
self.bump();
empty_exponent = !self.eat_float_exponent();
}
_ => (),
let suffix_start = match (self.first(), self.second()) {
('e' | 'E', '_') => self.eat_underscore_exponent(),
('e' | 'E', '0'..='9' | '+' | '-') => {
// Definitely an exponent (which still can be empty).
self.bump();
empty_exponent = !self.eat_float_exponent();
None
}
_ => None,
};
(Float { base, empty_exponent }, suffix_start)
}
('e' | 'E', '_') => {
match self.eat_underscore_exponent() {
Some(suffix_start) => {
// The suffix begins at `e`, meaning the number is an integer.
(Int { base, empty_int: false }, Some(suffix_start))
}
None => (Float { base, empty_exponent: false }, None),
}
Float { base, empty_exponent }
}
'e' | 'E' => {
('e' | 'E', '0'..='9' | '+' | '-') => {
// Definitely an exponent (which still can be empty).
self.bump();
let empty_exponent = !self.eat_float_exponent();
Float { base, empty_exponent }
(Float { base, empty_exponent }, None)
}
_ => Int { base, empty_int: false },
_ => (Int { base, empty_int: false }, None),
}
}

/// Try to find and eat an exponent
///
/// Assumes the first character is `e`/`E` and second is `_`, and consumes
/// `e`/`E` followed by all consecutive `_`s.
///
/// Returns `Some` if no exponent was found. In this case, the suffix is partially
/// consumed, and began at the return value.
fn eat_underscore_exponent(&mut self) -> Option<u32> {
debug_assert!(matches!(self.first(), 'e' | 'E'));
debug_assert!(matches!(self.second(), '_'));
let suffix_start = self.pos_within_token();

// check if series of `_` is ended by a digit. If yes
// include it in the number as exponent. If no include
// it in suffix.
self.bump();
while matches!(self.first(), '_') {
self.bump();
}
// If we find a digit, then the exponential was valid
// so the suffix will start at the cursor as usual.
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
None
} else {
Some(suffix_start)
}
}

@@ -924,6 +965,7 @@ impl Cursor<'_> {
}
}

/// Returns `true` if a digit was consumed (rather than just '_'s).
fn eat_decimal_digits(&mut self) -> bool {
let mut has_digits = false;
loop {
@@ -961,20 +1003,20 @@ impl Cursor<'_> {
/// Eats the float exponent. Returns true if at least one digit was met,
/// and returns false otherwise.
fn eat_float_exponent(&mut self) -> bool {
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
debug_assert!(matches!(self.prev(), 'e' | 'E'));
if self.first() == '-' || self.first() == '+' {
self.bump();
}
self.eat_decimal_digits()
}

// Eats the suffix of the literal, e.g. "u8".
/// Eats the suffix of the literal, e.g. "u8".
fn eat_literal_suffix(&mut self) {
self.eat_identifier();
self.eat_identifier()
}

// Eats the identifier. Note: succeeds on `_`, which isn't a valid
// identifier.
/// Eats the identifier. Note: succeeds on `_`, which isn't a valid
/// identifier.
fn eat_identifier(&mut self) {
if !is_id_start(self.first()) {
return;
2 changes: 2 additions & 0 deletions compiler/rustc_session/messages.ftl
Original file line number Diff line number Diff line change
@@ -14,6 +14,8 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at

session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled

session_empty_float_exponent = expected at least one digit in exponent

session_expr_parentheses_needed = parentheses are required to parse this as an expression

session_failed_to_create_profiler = failed to create profiler: {$err}
15 changes: 15 additions & 0 deletions compiler/rustc_session/src/errors.rs
Original file line number Diff line number Diff line change
@@ -377,6 +377,10 @@ pub fn report_lit_error(
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
}

fn looks_like_empty_exponent(s: &str) -> bool {
s.len() == 1 && matches!(s.chars().next(), Some('e' | 'E'))
}

// Try to lowercase the prefix if the prefix and suffix are valid.
fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option<String> {
let mut chars = suffix.chars();
@@ -409,6 +413,8 @@ pub fn report_lit_error(
if looks_like_width_suffix(&['i', 'u'], suf) {
// If it looks like a width, try to be helpful.
dcx.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() })
} else if looks_like_empty_exponent(suf) {
dcx.emit_err(EmptyFloatExponent { span })
} else if let Some(fixed) = fix_base_capitalisation(lit.symbol.as_str(), suf) {
dcx.emit_err(InvalidNumLiteralBasePrefix { span, fixed })
} else {
@@ -420,6 +426,8 @@ pub fn report_lit_error(
if looks_like_width_suffix(&['f'], suf) {
// If it looks like a width, try to be helpful.
dcx.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() })
} else if looks_like_empty_exponent(suf) {
dcx.emit_err(EmptyFloatExponent { span })
} else {
dcx.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() })
}
@@ -489,3 +497,10 @@ pub(crate) struct SoftFloatIgnored;
#[note]
#[note(session_soft_float_deprecated_issue)]
pub(crate) struct SoftFloatDeprecated;

#[derive(Diagnostic)]
#[diag(session_empty_float_exponent)]
pub(crate) struct EmptyFloatExponent {
#[primary_span]
pub span: Span,
}
72 changes: 36 additions & 36 deletions tests/ui/consts/const-eval/issue-104390.stderr
Original file line number Diff line number Diff line change
@@ -1,39 +1,3 @@
error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:1:27
|
LL | fn f1() -> impl Sized { & 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:2:28
|
LL | fn f2() -> impl Sized { && 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:3:29
|
LL | fn f3() -> impl Sized { &'a 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:5:34
|
LL | fn f4() -> impl Sized { &'static 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:7:28
|
LL | fn f5() -> impl Sized { *& 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:8:29
|
LL | fn f6() -> impl Sized { &'_ 2E }
| ^^

error: borrow expressions cannot be annotated with lifetimes
--> $DIR/issue-104390.rs:3:25
|
@@ -76,5 +40,41 @@ LL - fn f6() -> impl Sized { &'_ 2E }
LL + fn f6() -> impl Sized { &2E }
|

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:1:27
|
LL | fn f1() -> impl Sized { & 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:2:28
|
LL | fn f2() -> impl Sized { && 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:3:29
|
LL | fn f3() -> impl Sized { &'a 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:5:34
|
LL | fn f4() -> impl Sized { &'static 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:7:28
|
LL | fn f5() -> impl Sized { *& 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:8:29
|
LL | fn f6() -> impl Sized { &'_ 2E }
| ^^

error: aborting due to 9 previous errors

12 changes: 6 additions & 6 deletions tests/ui/consts/issue-91434.stderr
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
error: expected at least one digit in exponent
--> $DIR/issue-91434.rs:2:11
|
LL | [9; [[9E; h]]];
| ^^

error[E0425]: cannot find value `h` in this scope
--> $DIR/issue-91434.rs:2:15
|
LL | [9; [[9E; h]]];
| ^ not found in this scope

error: expected at least one digit in exponent
--> $DIR/issue-91434.rs:2:11
|
LL | [9; [[9E; h]]];
| ^^

error: aborting due to 2 previous errors

For more information about this error, try `rustc --explain E0425`.
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
error: expected at least one digit in exponent
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
|
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
| ^^^^^^

error: unknown start of token: \u{2212}
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:53
|
@@ -16,5 +10,11 @@ LL - const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹
LL + const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e-11; // m³⋅kg⁻¹⋅s⁻²
|

error: expected at least one digit in exponent
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
|
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
| ^^^^^^

error: aborting due to 2 previous errors

16 changes: 16 additions & 0 deletions tests/ui/lexer/custom-suffixes-exponent-like.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const _A: f64 = 1em;
//~^ ERROR invalid suffix `em` for number literal
const _B: f64 = 1e0m;
//~^ ERROR invalid suffix `m` for float literal
const _C: f64 = 1e_______________0m;
//~^ ERROR invalid suffix `m` for float literal
const _D: f64 = 1e_______________m;
//~^ ERROR invalid suffix `e_______________m` for number literal

// All the above patterns should not generate an error when used in a macro
macro_rules! do_nothing {
($($toks:tt)*) => {};
}
do_nothing!(1em 1e0m 1e_______________0m 1e_______________m);

fn main() {}
34 changes: 34 additions & 0 deletions tests/ui/lexer/custom-suffixes-exponent-like.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
error: invalid suffix `em` for number literal
--> $DIR/custom-suffixes-exponent-like.rs:1:17
|
LL | const _A: f64 = 1em;
| ^^^ invalid suffix `em`
|
= help: the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.)

error: invalid suffix `m` for float literal
--> $DIR/custom-suffixes-exponent-like.rs:3:17
|
LL | const _B: f64 = 1e0m;
| ^^^^ invalid suffix `m`
|
= help: valid suffixes are `f32` and `f64`

error: invalid suffix `m` for float literal
--> $DIR/custom-suffixes-exponent-like.rs:5:17
|
LL | const _C: f64 = 1e_______________0m;
| ^^^^^^^^^^^^^^^^^^^ invalid suffix `m`
|
= help: valid suffixes are `f32` and `f64`

error: invalid suffix `e_______________m` for number literal
--> $DIR/custom-suffixes-exponent-like.rs:7:17
|
LL | const _D: f64 = 1e_______________m;
| ^^^^^^^^^^^^^^^^^^ invalid suffix `e_______________m`
|
= help: the suffix must be one of the numeric types (`u32`, `isize`, `f32`, etc.)

error: aborting due to 4 previous errors

22 changes: 22 additions & 0 deletions tests/ui/lexer/custom-suffixes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//@ check-pass

// This tests different kinds of valid suffixes.

fn main() {
const _A: f64 = 1.;
const _B: f64 = 1f64;
const _C: f64 = 1.0f64;
const _D: f64 = 1e6;
const _E: f64 = 1.0e9;
const _F: f64 = 1e-6;
const _G: f64 = 1.0e-6;
const _H: f64 = 1.0e06;
const _I: f64 = 1.0e+6;
// these ones are perhaps more suprising.
const _J: f64 = 1.0e0________________________6;
const _K: f64 = 1.0e________________________6;
const _L: f64 = 1.0e+________________________6;
const _M: f64 = 1.0e-________________________6;
const _N: f64 = 1.0e-________________________9;
const _O: f64 = 1e_______________0f64;
}