rust-lang · Jan 25, 2024
diff --git a/‎compiler/rustc_ast/src/util/literal.rs
Lines changed: 10 additions & 6 deletions b/‎compiler/rustc_ast/src/util/literal.rs
Lines changed: 10 additions & 6 deletions
diff --git a/‎compiler/rustc_ast_passes/src/feature_gate.rs
Lines changed: 1 addition & 0 deletions b/‎compiler/rustc_ast_passes/src/feature_gate.rs
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/rustc_feature/src/unstable.rs
Lines changed: 2 additions & 0 deletions b/‎compiler/rustc_feature/src/unstable.rs
Lines changed: 2 additions & 0 deletions
diff --git a/‎compiler/rustc_lexer/src/unescape.rs
Lines changed: 64 additions & 25 deletions b/‎compiler/rustc_lexer/src/unescape.rs
Lines changed: 64 additions & 25 deletions
diff --git a/‎compiler/rustc_lexer/src/unescape/tests.rs
Lines changed: 41 additions & 20 deletions b/‎compiler/rustc_lexer/src/unescape/tests.rs
Lines changed: 41 additions & 20 deletions
diff --git a/‎compiler/rustc_parse/src/lexer/mod.rs
Lines changed: 8 additions & 6 deletions b/‎compiler/rustc_parse/src/lexer/mod.rs
Lines changed: 8 additions & 6 deletions
diff --git a/‎compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Lines changed: 2 additions & 1 deletion b/‎compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Lines changed: 2 additions & 1 deletion
diff --git a/‎compiler/rustc_parse_format/src/lib.rs
Lines changed: 8 additions & 7 deletions b/‎compiler/rustc_parse_format/src/lib.rs
Lines changed: 8 additions & 7 deletions
diff --git a/‎compiler/rustc_span/src/symbol.rs
Lines changed: 1 addition & 0 deletions b/‎compiler/rustc_span/src/symbol.rs
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/doc/unstable-book/src/language-features/mixed-utf8-literals.md
Lines changed: 16 additions & 0 deletions b/‎src/doc/unstable-book/src/language-features/mixed-utf8-literals.md
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
Lines changed: 8 additions & 4 deletions b/‎src/tools/rust-analyzer/crates/parser/src/lexed_str.rs
Lines changed: 8 additions & 4 deletions
diff --git a/‎src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
Lines changed: 79 additions & 90 deletions b/‎src/tools/rust-analyzer/crates/syntax/src/ast/token_ext.rs
Lines changed: 79 additions & 90 deletions
diff --git a/‎src/tools/rust-analyzer/crates/syntax/src/validation.rs
Lines changed: 7 additions & 5 deletions b/‎src/tools/rust-analyzer/crates/syntax/src/validation.rs
Lines changed: 7 additions & 5 deletions
diff --git a/‎src/tools/tidy/src/ui_tests.rs
Lines changed: 1 addition & 1 deletion b/‎src/tools/tidy/src/ui_tests.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/ui/attributes/key-value-non-ascii.rs
Lines changed: 1 addition & 1 deletion b/‎tests/ui/attributes/key-value-non-ascii.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/ui/attributes/key-value-non-ascii.stderr
Lines changed: 6 additions & 8 deletions b/‎tests/ui/attributes/key-value-non-ascii.stderr
Lines changed: 6 additions & 8 deletions
diff --git a/‎tests/ui/feature-gates/feature-gate-mixed-utf8-literals.rs
Lines changed: 5 additions & 0 deletions b/‎tests/ui/feature-gates/feature-gate-mixed-utf8-literals.rs
Lines changed: 5 additions & 0 deletions
diff --git a/‎tests/ui/feature-gates/feature-gate-mixed-utf8-literals.stderr
Lines changed: 33 additions & 0 deletions b/‎tests/ui/feature-gates/feature-gate-mixed-utf8-literals.stderr
Lines changed: 33 additions & 0 deletions
diff --git a/‎tests/ui/mixed-utf8-literals/basic.rs
Lines changed: 19 additions & 0 deletions b/‎tests/ui/mixed-utf8-literals/basic.rs
Lines changed: 19 additions & 0 deletions
diff --git a/‎tests/ui/parser/byte-string-literals.rs
Lines changed: 0 additions & 2 deletions b/‎tests/ui/parser/byte-string-literals.rs
Lines changed: 0 additions & 2 deletions
diff --git a/‎tests/ui/parser/byte-string-literals.stderr
Lines changed: 2 additions & 19 deletions b/‎tests/ui/parser/byte-string-literals.stderr
Lines changed: 2 additions & 19 deletions
diff --git a/‎tests/ui/parser/issues/issue-23620-invalid-escapes.rs
Lines changed: 3 additions & 7 deletions b/‎tests/ui/parser/issues/issue-23620-invalid-escapes.rs
Lines changed: 3 additions & 7 deletions
diff --git a/‎tests/ui/parser/issues/issue-23620-invalid-escapes.stderr
Lines changed: 15 additions & 31 deletions b/‎tests/ui/parser/issues/issue-23620-invalid-escapes.stderr
Lines changed: 15 additions & 31 deletions
diff --git a/‎tests/ui/parser/raw/raw-byte-string-literals.rs
Lines changed: 0 additions & 1 deletion b/‎tests/ui/parser/raw/raw-byte-string-literals.rs
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/ui/parser/raw/raw-byte-string-literals.stderr
Lines changed: 2 additions & 8 deletions b/‎tests/ui/parser/raw/raw-byte-string-literals.stderr
Lines changed: 2 additions & 8 deletions
diff --git a/‎tests/ui/parser/unicode-control-codepoints.rs
Lines changed: 3 additions & 10 deletions b/‎tests/ui/parser/unicode-control-codepoints.rs
Lines changed: 3 additions & 10 deletions
diff --git a/‎tests/ui/parser/unicode-control-codepoints.stderr
Lines changed: 26 additions & 79 deletions b/‎tests/ui/parser/unicode-control-codepoints.stderr
Lines changed: 26 additions & 79 deletions
diff --git a/‎tests/ui/suggestions/multibyte-escapes.rs
Lines changed: 0 additions & 5 deletions b/‎tests/ui/suggestions/multibyte-escapes.rs
Lines changed: 0 additions & 5 deletions
diff --git a/‎tests/ui/suggestions/multibyte-escapes.stderr
Lines changed: 1 addition & 12 deletions b/‎tests/ui/suggestions/multibyte-escapes.stderr
Lines changed: 1 addition & 12 deletions
@@ -3,7 +3,7 @@
 use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
 use crate::token::{self, Token};
 use rustc_lexer::unescape::{
-    byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode,
+    unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit, Mode,
 };
 use rustc_span::symbol::{kw, sym, Symbol};
 use rustc_span::Span;
@@ -49,7 +49,8 @@ impl LitKind {
 
         // For byte/char/string literals, chars and escapes have already been
         // checked in the lexer (in `cook_lexer_literal`). So we can assume all
-        // chars and escapes are valid here.
+        // chars and escapes are valid here, and ignore `Rfc3349` return
+        // values.
         Ok(match kind {
             token::Bool => {
                 assert!(symbol.is_bool_lit());
@@ -84,7 +85,7 @@ impl LitKind {
                     // Force-inlining here is aggressive but the closure is
                     // called on every char in the string, so it can be hot in
                     // programs with many long strings containing escapes.
-                    unescape_unicode(
+                    _ = unescape_unicode(
                         s,
                         Mode::Str,
                         &mut #[inline(always)]
@@ -108,8 +109,11 @@ impl LitKind {
             token::ByteStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                unescape_unicode(s, Mode::ByteStr, &mut |_, c| match c {
-                    Ok(c) => buf.push(byte_from_char(c)),
+                _ = unescape_mixed(s, Mode::ByteStr, &mut |_, c| match c {
+                    Ok(MixedUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Ok(MixedUnit::HighByte(b)) => buf.push(b),
                     Err(err) => {
                         assert!(!err.is_fatal(), "failed to unescape string literal")
                     }
@@ -125,7 +129,7 @@ impl LitKind {
             token::CStr => {
                 let s = symbol.as_str();
                 let mut buf = Vec::with_capacity(s.len());
-                unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
+                _ = unescape_mixed(s, Mode::CStr, &mut |_span, c| match c {
                     Ok(MixedUnit::Char(c)) => {
                         buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
                     }
 
@@ -508,6 +508,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {
             }
         };
     }
+    gate_all!(mixed_utf8_literals, r#"mixed utf8 b"" and br"" literals are experimental"#);
     gate_all!(
         if_let_guard,
         "`if let` guards are experimental",
 
@@ -520,6 +520,8 @@ declare_features! (
     /// standard library until the soundness issues with specialization
     /// are fixed.
     (unstable, min_specialization, "1.7.0", Some(31844)),
+    /// Allows mixed utf8 b"" and br"" literals.
+    (unstable, mixed_utf8_literals, "CURRENT_RUSTC_VERSION", Some(116907)),
     /// Allows qualified paths in struct expressions, struct patterns and tuple struct patterns.
     (unstable, more_qualified_paths, "1.54.0", Some(86935)),
     /// Allows the `#[must_not_suspend]` attribute.
 
@@ -85,7 +85,7 @@ impl EscapeError {
 ///
 /// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
 /// the callback will be called exactly once.
-pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
+pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F) -> Rfc3349
 where
     F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
@@ -94,16 +94,17 @@ where
             let mut chars = src.chars();
             let res = unescape_char_or_byte(&mut chars, mode);
             callback(0..(src.len() - chars.as_str().len()), res);
+            Rfc3349::Unused // rfc3349 not relevant for `Mode::{Char,Byte}`
         }
-        Str | ByteStr => unescape_non_raw_common(src, mode, callback),
+        Str => unescape_non_raw_common(src, mode, callback),
         RawStr | RawByteStr => check_raw_common(src, mode, callback),
         RawCStr => check_raw_common(src, mode, &mut |r, mut result| {
             if let Ok('\0') = result {
                 result = Err(EscapeError::NulInCStr);
             }
             callback(r, result)
         }),
-        CStr => unreachable!(),
+        ByteStr | CStr => unreachable!(),
     }
 }
 
@@ -142,18 +143,19 @@ impl From<u8> for MixedUnit {
 /// a sequence of escaped characters or errors.
 ///
 /// Values are returned by invoking `callback`.
-pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
+pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F) -> Rfc3349
 where
     F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
 {
     match mode {
+        ByteStr => unescape_non_raw_common(src, mode, callback),
         CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| {
             if let Ok(MixedUnit::Char('\0')) = result {
                 result = Err(EscapeError::NulInCStr);
             }
             callback(r, result)
         }),
-        Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(),
+        Char | Byte | Str | RawStr | RawByteStr | RawCStr => unreachable!(),
     }
 }
 
@@ -169,6 +171,15 @@ pub fn unescape_byte(src: &str) -> Result<u8, EscapeError> {
     unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char)
 }
 
+/// Used to indicate if rfc3349 (mixed-utf8-literals) was required for the
+/// literal to be valid. Once rfc3349 is stabilized this type can be removed.
+#[derive(Debug, PartialEq)]
+#[must_use]
+pub enum Rfc3349 {
+    Used,
+    Unused,
+}
+
 /// What kind of literal do we parse.
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum Mode {
@@ -205,17 +216,25 @@ impl Mode {
 
     /// Are unicode (non-ASCII) chars allowed?
     #[inline]
-    fn allow_unicode_chars(self) -> bool {
+    fn allow_unicode_chars(self, rfc3349: &mut Rfc3349) -> bool {
         match self {
-            Byte | ByteStr | RawByteStr => false,
+            Byte => false,
+            ByteStr | RawByteStr => {
+                *rfc3349 = Rfc3349::Used;
+                true
+            }
             Char | Str | RawStr | CStr | RawCStr => true,
         }
     }
 
     /// Are unicode escapes (`\u`) allowed?
-    fn allow_unicode_escapes(self) -> bool {
+    fn allow_unicode_escapes(self, rfc3349: &mut Rfc3349) -> bool {
         match self {
-            Byte | ByteStr => false,
+            Byte => false,
+            ByteStr => {
+                *rfc3349 = Rfc3349::Used;
+                true
+            }
             Char | Str | CStr => true,
             RawByteStr | RawStr | RawCStr => unreachable!(),
         }
@@ -233,6 +252,7 @@ impl Mode {
 fn scan_escape<T: From<char> + From<u8>>(
     chars: &mut Chars<'_>,
     mode: Mode,
+    rfc3349: &mut Rfc3349,
 ) -> Result<T, EscapeError> {
     // Previous character was '\\', unescape what follows.
     let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? {
@@ -262,13 +282,17 @@ fn scan_escape<T: From<char> + From<u8>>(
                 Ok(T::from(value as u8))
             };
         }
-        'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from),
+        'u' => return scan_unicode(chars, mode, rfc3349).map(T::from),
         _ => return Err(EscapeError::InvalidEscape),
     };
     Ok(T::from(res))
 }
 
-fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<char, EscapeError> {
+fn scan_unicode(
+    chars: &mut Chars<'_>,
+    mode: Mode,
+    rfc3349: &mut Rfc3349,
+) -> Result<char, EscapeError> {
     // We've parsed '\u', now we have to parse '{..}'.
 
     if chars.next() != Some('{') {
@@ -296,7 +320,7 @@ fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<ch
 
                 // Incorrect syntax has higher priority for error reporting
                 // than unallowed value for a literal.
-                if !allow_unicode_escapes {
+                if !mode.allow_unicode_escapes(rfc3349) {
                     return Err(EscapeError::UnicodeEscapeInByte);
                 }
 
@@ -322,18 +346,27 @@ fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result<ch
 }
 
 #[inline]
-fn ascii_check(c: char, allow_unicode_chars: bool) -> Result<char, EscapeError> {
-    if allow_unicode_chars || c.is_ascii() { Ok(c) } else { Err(EscapeError::NonAsciiCharInByte) }
+fn ascii_check(c: char, mode: Mode, rfc3349: &mut Rfc3349) -> Result<char, EscapeError> {
+    // We must check `is_ascii` first, to avoid setting `rfc3349` unnecessarily.
+    if c.is_ascii() || mode.allow_unicode_chars(rfc3349) {
+        Ok(c)
+    } else {
+        Err(EscapeError::NonAsciiCharInByte)
+    }
 }
 
 fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
     let c = chars.next().ok_or(EscapeError::ZeroChars)?;
+    let mut rfc3349 = Rfc3349::Unused;
     let res = match c {
-        '\\' => scan_escape(chars, mode),
+        '\\' => scan_escape(chars, mode, &mut rfc3349),
         '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar),
         '\r' => Err(EscapeError::BareCarriageReturn),
-        _ => ascii_check(c, mode.allow_unicode_chars()),
+        _ => ascii_check(c, mode, &mut rfc3349),
     }?;
+
+    assert_eq!(rfc3349, Rfc3349::Unused); // rfc3349 not relevant for `Mode::{Char,Byte}`
+
     if chars.next().is_some() {
         return Err(EscapeError::MoreThanOneChar);
     }
@@ -342,12 +375,16 @@ fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, Esca
 
 /// Takes a contents of a string literal (without quotes) and produces a
 /// sequence of escaped characters or errors.
-fn unescape_non_raw_common<F, T: From<char> + From<u8>>(src: &str, mode: Mode, callback: &mut F)
+fn unescape_non_raw_common<F, T: From<char> + From<u8>>(
+    src: &str,
+    mode: Mode,
+    callback: &mut F,
+) -> Rfc3349
 where
     F: FnMut(Range<usize>, Result<T, EscapeError>),
 {
     let mut chars = src.chars();
-    let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop
+    let mut rfc3349 = Rfc3349::Unused;
 
     // The `start` and `end` computation here is complicated because
     // `skip_ascii_whitespace` makes us to skip over chars without counting
@@ -367,16 +404,17 @@ where
                         });
                         continue;
                     }
-                    _ => scan_escape::<T>(&mut chars, mode),
+                    _ => scan_escape::<T>(&mut chars, mode, &mut rfc3349),
                 }
             }
             '"' => Err(EscapeError::EscapeOnlyChar),
             '\r' => Err(EscapeError::BareCarriageReturn),
-            _ => ascii_check(c, allow_unicode_chars).map(T::from),
+            _ => ascii_check(c, mode, &mut rfc3349).map(T::from),
         };
         let end = src.len() - chars.as_str().len();
         callback(start..end, res);
     }
+    rfc3349
 }
 
 fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
@@ -409,12 +447,12 @@ where
 /// sequence of characters or errors.
 /// NOTE: Raw strings do not perform any explicit character escaping, here we
 /// only produce errors on bare CR.
-fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F)
+fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F) -> Rfc3349
 where
     F: FnMut(Range<usize>, Result<char, EscapeError>),
 {
     let mut chars = src.chars();
-    let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop
+    let mut rfc3349 = Rfc3349::Unused;
 
     // The `start` and `end` computation here matches the one in
     // `unescape_non_raw_common` for consistency, even though this function
@@ -423,16 +461,17 @@ where
         let start = src.len() - chars.as_str().len() - c.len_utf8();
         let res = match c {
             '\r' => Err(EscapeError::BareCarriageReturnInRawString),
-            _ => ascii_check(c, allow_unicode_chars),
+            _ => ascii_check(c, mode, &mut rfc3349),
         };
         let end = src.len() - chars.as_str().len();
         callback(start..end, res);
     }
+    rfc3349
 }
 
 #[inline]
-pub fn byte_from_char(c: char) -> u8 {
+pub(crate) fn byte_from_char(c: char) -> u8 {
     let res = c as u32;
-    debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
+    debug_assert!(res <= u8::MAX as u32, "guaranteed because of Byte");
     res as u8
 }
@@ -100,7 +100,9 @@ fn test_unescape_char_good() {
 fn test_unescape_str_warn() {
     fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
         let mut unescaped = Vec::with_capacity(literal.len());
-        unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
+        let rfc3349 =
+            unescape_unicode(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
+        assert_eq!(rfc3349, Rfc3349::Unused); // rfc3349 not relevant for `Mode::Str`
         assert_eq!(unescaped, expected);
     }
 
@@ -124,14 +126,15 @@ fn test_unescape_str_warn() {
 fn test_unescape_str_good() {
     fn check(literal_text: &str, expected: &str) {
         let mut buf = Ok(String::with_capacity(literal_text.len()));
-        unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
+        let rfc3349 = unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
             if let Ok(b) = &mut buf {
                 match c {
                     Ok(c) => b.push(c),
                     Err(e) => buf = Err((range, e)),
                 }
             }
         });
+        assert_eq!(rfc3349, Rfc3349::Unused); // rfc3349 not relevant for `Mode::Str`
         assert_eq!(buf.as_deref(), Ok(expected))
     }
 
@@ -239,32 +242,43 @@ fn test_unescape_byte_good() {
 
 #[test]
 fn test_unescape_byte_str_good() {
-    fn check(literal_text: &str, expected: &[u8]) {
-        let mut buf = Ok(Vec::with_capacity(literal_text.len()));
-        unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
-            if let Ok(b) = &mut buf {
+    fn check(literal_text: &str, expected: &[u8], rfc3349_expected: Rfc3349) {
+        let mut buf_res = Ok(Vec::with_capacity(literal_text.len()));
+        let rfc3349_actual = unescape_mixed(literal_text, Mode::ByteStr, &mut |range, c| {
+            if let Ok(buf) = &mut buf_res {
                 match c {
-                    Ok(c) => b.push(byte_from_char(c)),
-                    Err(e) => buf = Err((range, e)),
+                    Ok(MixedUnit::Char(c)) => {
+                        buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
+                    }
+                    Ok(MixedUnit::HighByte(b)) => buf.push(b),
+                    Err(e) => buf_res = Err((range, e)),
                 }
             }
         });
-        assert_eq!(buf.as_deref(), Ok(expected))
+        assert_eq!(rfc3349_actual, rfc3349_expected);
+        assert_eq!(buf_res.as_deref(), Ok(expected))
     }
 
-    check("foo", b"foo");
-    check("", b"");
-    check(" \t\n", b" \t\n");
+    check("foo", b"foo", Rfc3349::Unused);
+    check("", b"", Rfc3349::Unused);
+    check(" \t\n", b" \t\n", Rfc3349::Unused);
+
+    check("hello \\\n     world", b"hello world", Rfc3349::Unused);
+    check("thread's", b"thread's", Rfc3349::Unused);
 
-    check("hello \\\n     world", b"hello world");
-    check("thread's", b"thread's")
+    let a_pound_up_smiley = &[0x61, 0xc2, 0xa3, 0xe2, 0x86, 0x91, 0xf0, 0x9f, 0x98, 0x80];
+    check("a£↑😀", a_pound_up_smiley, Rfc3349::Used);
+    check(r"\u{61}\u{a3}\u{2191}\u{1f600}", a_pound_up_smiley, Rfc3349::Used);
+    check(r"\x00\x7f\x80\xffa¥", &[0, 0x7f, 0x80, 0xff, 0x61, 0xc2, 0xa5], Rfc3349::Used);
 }
 
 #[test]
 fn test_unescape_raw_str() {
     fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
         let mut unescaped = Vec::with_capacity(literal.len());
-        unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
+        let rfc3349 =
+            unescape_unicode(literal, Mode::RawStr, &mut |range, res| unescaped.push((range, res)));
+        assert_eq!(rfc3349, Rfc3349::Unused); // rfc3349 not relevant for `Mode::RawStr`
         assert_eq!(unescaped, expected);
     }
 
@@ -274,13 +288,20 @@ fn test_unescape_raw_str() {
 
 #[test]
 fn test_unescape_raw_byte_str() {
-    fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
+    fn check(
+        literal: &str,
+        expected: &[(Range<usize>, Result<char, EscapeError>)],
+        rfc3349_expected: Rfc3349,
+    ) {
         let mut unescaped = Vec::with_capacity(literal.len());
-        unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| unescaped.push((range, res)));
+        let rfc3349_actual = unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| {
+            unescaped.push((range, res))
+        });
+        assert_eq!(rfc3349_actual, rfc3349_expected);
         assert_eq!(unescaped, expected);
     }
 
-    check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
-    check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByte))]);
-    check("🦀a", &[(0..4, Err(EscapeError::NonAsciiCharInByte)), (4..5, Ok('a'))]);
+    check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))], Rfc3349::Unused);
+    check("🦀", &[(0..4, Ok('🦀'))], Rfc3349::Used);
+    check("¥a", &[(0..2, Ok('¥')), (2..3, Ok('a'))], Rfc3349::Used);
 }
@@ -8,9 +8,8 @@ use rustc_ast::token::{self, CommentKind, Delimiter, Token, TokenKind};
 use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::{error_code, Applicability, DiagCtxt, DiagnosticBuilder, StashKey};
-use rustc_lexer::unescape::{self, EscapeError, Mode};
-use rustc_lexer::{Base, DocStyle, RawStrError};
-use rustc_lexer::{Cursor, LiteralKind};
+use rustc_lexer::unescape::{self, EscapeError, Mode, Rfc3349};
+use rustc_lexer::{Base, Cursor, DocStyle, LiteralKind, RawStrError};
 use rustc_session::lint::builtin::{
     RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
 };
@@ -436,7 +435,7 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
                         .with_code(error_code!(E0766))
                         .emit()
                 }
-                self.cook_unicode(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
+                self.cook_mixed(token::ByteStr, Mode::ByteStr, start, end, 2, 1) // b" "
             }
             rustc_lexer::LiteralKind::CStr { terminated } => {
                 if !terminated {
@@ -697,13 +696,13 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
         end: BytePos,
         prefix_len: u32,
         postfix_len: u32,
-        unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)),
+        unescape: fn(&str, Mode, &mut dyn FnMut(Range<usize>, Result<(), EscapeError>)) -> Rfc3349,
     ) -> (token::LitKind, Symbol) {
         let mut has_fatal_err = false;
         let content_start = start + BytePos(prefix_len);
         let content_end = end - BytePos(postfix_len);
         let lit_content = self.str_from_to(content_start, content_end);
-        unescape(lit_content, mode, &mut |range, result| {
+        let rfc3349 = unescape(lit_content, mode, &mut |range, result| {
             // Here we only check for errors. The actual unescaping is done later.
             if let Err(err) = result {
                 let span_with_quotes = self.mk_sp(start, end);
@@ -725,6 +724,9 @@ impl<'sess, 'src> StringReader<'sess, 'src> {
                 );
             }
         });
+        if rfc3349 == Rfc3349::Used {
+            self.sess.gated_spans.gate(sym::mixed_utf8_literals, self.mk_sp(start, end));
+        }
 
         // We normally exclude the quotes for the symbol, but for errors we
         // include it because it results in clearer error messages.
 
@@ -175,6 +175,7 @@ pub(crate) fn emit_unescape_error(
         EscapeError::NonAsciiCharInByte => {
             let (c, span) = last_char();
             let desc = match mode {
+                // Note: once rfc3349 stabilizes, only `Mode::Byte` will be reachable here.
                 Mode::Byte => "byte literal",
                 Mode::ByteStr => "byte string literal",
                 Mode::RawByteStr => "raw byte string literal",
@@ -188,7 +189,7 @@ pub(crate) fn emit_unescape_error(
             };
             err.span_label(span, format!("must be ASCII{postfix}"));
             // Note: the \\xHH suggestions are not given for raw byte string
-            // literals, because they are araw and so cannot use any escapes.
+            // literals, because they cannot use escapes.
             if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
                 err.span_suggestion(
                     span,
 
@@ -1056,13 +1056,14 @@ fn find_width_map_from_snippet(
 fn unescape_string(string: &str) -> Option<string::String> {
     let mut buf = string::String::new();
     let mut ok = true;
-    unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
-        match unescaped_char {
-            Ok(c) => buf.push(c),
-            Err(_) => ok = false,
-        }
-    });
-
+    let rfc3349 =
+        unescape::unescape_unicode(string, unescape::Mode::Str, &mut |_, unescaped_char| {
+            match unescaped_char {
+                Ok(c) => buf.push(c),
+                Err(_) => ok = false,
+            }
+        });
+    assert_eq!(rfc3349, unescape::Rfc3349::Unused); // rfc3349 not relevant for `Mode::Str`
     ok.then_some(buf)
 }
 
 
@@ -1060,6 +1060,7 @@ symbols! {
         mir_unwind_unreachable,
         mir_variant,
         miri,
+        mixed_utf8_literals,
         mmx_reg,
         modifiers,
         module,
 
@@ -0,0 +1,16 @@
+# `mixed_utf8_literals`
+
+The tracking issue for this feature is: [#116907]
+
+[#116907]: https://github.com/rust-lang/rust/issues/116907
+
+------------------------
+
+This feature extends the syntax of string literals in the following ways.
+- Byte string literals can contain unicode chars (e.g. `b"🦀"`) and unicode
+  escapes (e.g. `b"\u{1f980}"`.
+- Raw byte string literals can contain unicode chars (e.g. `br"🦀"`).
+
+This makes it easier to work with strings that are mostly UTF-8 encoded but
+also contain some non UTF-8 bytes, which are sometimes called "conventionally
+UTF-8" strings.
@@ -365,9 +365,11 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
         EscapeError::NonAsciiCharInByte if mode == Mode::Byte => {
             "non-ASCII character in byte literal"
         }
+        // Note: once rfc3349 stabilizes, this arm will be unreachable.
         EscapeError::NonAsciiCharInByte if mode == Mode::ByteStr => {
             "non-ASCII character in byte string literal"
         }
+        // Note: once rfc3349 stabilizes, this arm will be unreachable.
         EscapeError::NonAsciiCharInByte => "non-ASCII character in raw byte string literal",
         EscapeError::NulInCStr => "null character in C string literal",
         EscapeError::UnskippedWhitespaceWarning => "",
@@ -378,15 +380,17 @@ fn error_to_diagnostic_message(error: EscapeError, mode: Mode) -> &'static str {
 fn unescape_string_error_message(text: &str, mode: Mode) -> &'static str {
     let mut error_message = "";
     match mode {
-        Mode::CStr => {
-            rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| {
+        Mode::ByteStr | Mode::CStr => {
+            // Can ignore the `Rfc3349` return value.
+            _ = rustc_lexer::unescape::unescape_mixed(text, mode, &mut |_, res| {
                 if let Err(e) = res {
                     error_message = error_to_diagnostic_message(e, mode);
                 }
             });
         }
-        Mode::ByteStr | Mode::Str => {
-            rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| {
+        Mode::Str => {
+            // Can ignore the `Rfc3349` return value.
+            _ = rustc_lexer::unescape::unescape_unicode(text, mode, &mut |_, res| {
                 if let Err(e) = res {
                     error_message = error_to_diagnostic_message(e, mode);
                 }
 
@@ -193,7 +193,8 @@ pub trait IsString: AstToken {
         let text = &self.text()[text_range_no_quotes - start];
         let offset = text_range_no_quotes.start() - start;
 
-        unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| {
+        // Ignores the `Rfc3349` return value, thus permitting mixed utf8 literals.
+        _ = unescape_unicode(text, Self::MODE, &mut |range, unescaped_char| {
             let text_range =
                 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
             cb(text_range + offset, unescaped_char);
@@ -226,7 +227,8 @@ impl ast::String {
         let mut buf = String::new();
         let mut prev_end = 0;
         let mut has_error = false;
-        unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
+        // Ignores the `Rfc3349` return value, thus permitting mixed utf8 literals.
+        _ = unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
             unescaped_char,
             buf.capacity() == 0,
         ) {
@@ -253,44 +255,18 @@ impl ast::String {
 impl IsString for ast::ByteString {
     const RAW_PREFIX: &'static str = "br";
     const MODE: Mode = Mode::ByteStr;
+
+    fn escaped_char_ranges(
+        &self,
+        cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
+    ) {
+        escaped_char_ranges_impl(self, cb);
+    }
 }
 
 impl ast::ByteString {
     pub fn value(&self) -> Option<Cow<'_, [u8]>> {
-        if self.is_raw() {
-            let text = self.text();
-            let text =
-                &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
-            return Some(Cow::Borrowed(text.as_bytes()));
-        }
-
-        let text = self.text();
-        let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
-
-        let mut buf: Vec<u8> = Vec::new();
-        let mut prev_end = 0;
-        let mut has_error = false;
-        unescape_unicode(text, Self::MODE, &mut |char_range, unescaped_char| match (
-            unescaped_char,
-            buf.capacity() == 0,
-        ) {
-            (Ok(c), false) => buf.push(c as u8),
-            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
-                prev_end = char_range.end
-            }
-            (Ok(c), true) => {
-                buf.reserve_exact(text.len());
-                buf.extend_from_slice(text[..prev_end].as_bytes());
-                buf.push(c as u8);
-            }
-            (Err(_), _) => has_error = true,
-        });
-
-        match (has_error, buf.capacity() == 0) {
-            (true, _) => None,
-            (false, true) => Some(Cow::Borrowed(text.as_bytes())),
-            (false, false) => Some(Cow::Owned(buf)),
-        }
+        value_impl(self)
     }
 }
 
@@ -302,65 +278,13 @@ impl IsString for ast::CString {
         &self,
         cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
     ) {
-        let text_range_no_quotes = match self.text_range_between_quotes() {
-            Some(it) => it,
-            None => return,
-        };
-
-        let start = self.syntax().text_range().start();
-        let text = &self.text()[text_range_no_quotes - start];
-        let offset = text_range_no_quotes.start() - start;
-
-        unescape_mixed(text, Self::MODE, &mut |range, unescaped_char| {
-            let text_range =
-                TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
-            // XXX: This method should only be used for highlighting ranges. The unescaped
-            // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
-            cb(text_range + offset, unescaped_char.map(|_| ' '));
-        });
+        escaped_char_ranges_impl(self, cb);
     }
 }
 
 impl ast::CString {
     pub fn value(&self) -> Option<Cow<'_, [u8]>> {
-        if self.is_raw() {
-            let text = self.text();
-            let text =
-                &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
-            return Some(Cow::Borrowed(text.as_bytes()));
-        }
-
-        let text = self.text();
-        let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
-
-        let mut buf = Vec::new();
-        let mut prev_end = 0;
-        let mut has_error = false;
-        let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
-            MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
-            MixedUnit::HighByte(b) => buf.push(b),
-        };
-        unescape_mixed(text, Self::MODE, &mut |char_range, unescaped| match (
-            unescaped,
-            buf.capacity() == 0,
-        ) {
-            (Ok(u), false) => extend_unit(&mut buf, u),
-            (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
-                prev_end = char_range.end
-            }
-            (Ok(u), true) => {
-                buf.reserve_exact(text.len());
-                buf.extend(text[..prev_end].as_bytes());
-                extend_unit(&mut buf, u);
-            }
-            (Err(_), _) => has_error = true,
-        });
-
-        match (has_error, buf.capacity() == 0) {
-            (true, _) => None,
-            (false, true) => Some(Cow::Borrowed(text.as_bytes())),
-            (false, false) => Some(Cow::Owned(buf)),
-        }
+        value_impl(self)
     }
 }
 
@@ -457,6 +381,71 @@ impl ast::FloatNumber {
     }
 }
 
+fn escaped_char_ranges_impl<I: IsString>(
+    this: &I,
+    cb: &mut dyn FnMut(TextRange, Result<char, rustc_lexer::unescape::EscapeError>),
+) {
+    let text_range_no_quotes = match this.text_range_between_quotes() {
+        Some(it) => it,
+        None => return,
+    };
+
+    let start = this.syntax().text_range().start();
+    let text = &this.text()[text_range_no_quotes - start];
+    let offset = text_range_no_quotes.start() - start;
+
+    // Ignores the `Rfc3349` return value, thus permitting mixed utf8 literals.
+    _ = unescape_mixed(text, I::MODE, &mut |range, unescaped_char| {
+        let text_range =
+            TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
+        // XXX: This method should only be used for highlighting ranges. The unescaped
+        // char/byte is not used. For simplicity, we return an arbitrary placeholder char.
+        cb(text_range + offset, unescaped_char.map(|_| ' '));
+    });
+}
+
+fn value_impl<I: IsString>(this: &I) -> Option<Cow<'_, [u8]>> {
+    if this.is_raw() {
+        let text = this.text();
+        let text =
+            &text[this.text_range_between_quotes()? - this.syntax().text_range().start()];
+        return Some(Cow::Borrowed(text.as_bytes()));
+    }
+
+    let text = this.text();
+    let text = &text[this.text_range_between_quotes()? - this.syntax().text_range().start()];
+
+    let mut buf: Vec<u8> = Vec::new();
+    let mut prev_end = 0;
+    let mut has_error = false;
+    let extend_unit = |buf: &mut Vec<u8>, unit: MixedUnit| match unit {
+        MixedUnit::Char(c) => buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()),
+        MixedUnit::HighByte(b) => buf.push(b),
+    };
+    // Ignores the `Rfc3349` return value, thus permitting mixed utf8 literals.
+    _ = unescape_mixed(text, I::MODE, &mut |char_range, unescaped_char| match (
+        unescaped_char,
+        buf.capacity() == 0,
+    ) {
+        (Ok(u), false) => extend_unit(&mut buf, u),
+        (Ok(_), true) if char_range.len() == 1 && char_range.start == prev_end => {
+            prev_end = char_range.end
+        }
+        (Ok(u), true) => {
+            buf.reserve_exact(text.len());
+            buf.extend(text[..prev_end].as_bytes());
+            extend_unit(&mut buf, u);
+        }
+        (Err(_), _) => has_error = true,
+    });
+
+    match (has_error, buf.capacity() == 0) {
+        (true, _) => None,
+        (false, true) => Some(Cow::Borrowed(text.as_bytes())),
+        (false, false) => Some(Cow::Owned(buf)),
+    }
+}
+
 #[derive(Debug, PartialEq, Eq, Copy, Clone)]
 pub enum Radix {
     Binary = 2,
 
@@ -136,11 +136,13 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         }
     };
 
+    // Ignores the `Rfc3349` return value from the `unescape_*` functions, thus
+    // permitting mixed utf8 literals.
     match literal.kind() {
         ast::LiteralKind::String(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 1, '"') {
-                    unescape_unicode(without_quotes, Mode::Str, &mut |range, char| {
+                    _ = unescape_unicode(without_quotes, Mode::Str, &mut |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -151,7 +153,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::ByteString(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_unicode(without_quotes, Mode::ByteStr, &mut |range, char| {
+                    _ = unescape_mixed(without_quotes, Mode::ByteStr, &mut |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -162,7 +164,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         ast::LiteralKind::CString(s) => {
             if !s.is_raw() {
                 if let Some(without_quotes) = unquote(text, 2, '"') {
-                    unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| {
+                    _ = unescape_mixed(without_quotes, Mode::CStr, &mut |range, char| {
                         if let Err(err) = char {
                             push_err(1, range.start, err);
                         }
@@ -172,7 +174,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         }
         ast::LiteralKind::Char(_) => {
             if let Some(without_quotes) = unquote(text, 1, '\'') {
-                unescape_unicode(without_quotes, Mode::Char, &mut |range, char| {
+                _ = unescape_unicode(without_quotes, Mode::Char, &mut |range, char| {
                     if let Err(err) = char {
                         push_err(1, range.start, err);
                     }
@@ -181,7 +183,7 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
         }
         ast::LiteralKind::Byte(_) => {
             if let Some(without_quotes) = unquote(text, 2, '\'') {
-                unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| {
+                _ = unescape_unicode(without_quotes, Mode::Byte, &mut |range, char| {
                     if let Err(err) = char {
                         push_err(2, range.start, err);
                     }
 
@@ -11,7 +11,7 @@ use std::path::{Path, PathBuf};
 const ENTRY_LIMIT: usize = 900;
 // FIXME: The following limits should be reduced eventually.
 const ISSUES_ENTRY_LIMIT: usize = 1849;
-const ROOT_ENTRY_LIMIT: usize = 870;
+const ROOT_ENTRY_LIMIT: usize = 871;
 
 const EXPECTED_TEST_FILE_EXTENSIONS: &[&str] = &[
     "rs",     // test source files
 
@@ -1,4 +1,4 @@
 #![feature(rustc_attrs)]
 
-#[rustc_dummy = b"ﬃ.rs"] //~ ERROR non-ASCII character in byte string literal
+#[rustc_dummy = b'ﬃ'] //~ ERROR non-ASCII character in byte literal
 fn main() {}
@@ -1,13 +1,11 @@
-error: non-ASCII character in byte string literal
+error: non-ASCII character in byte literal
   --> $DIR/key-value-non-ascii.rs:3:19
    |
-LL | #[rustc_dummy = b"ﬃ.rs"]
-   |                   ^ must be ASCII
-   |
-help: if you meant to use the UTF-8 encoding of 'ﬃ', use \xHH escapes
-   |
-LL | #[rustc_dummy = b"/xEF/xAC/x83.rs"]
-   |                   ~~~~~~~~~~~~
+LL | #[rustc_dummy = b'ﬃ']
+   |                   ^
+   |                   |
+   |                   must be ASCII
+   |                   this multibyte character does not fit into a single byte
 
 error: aborting due to 1 previous error
 
@@ -0,0 +1,5 @@
+fn main() {
+    _ = b"a¥🦀"; //~ ERROR mixed utf8
+    _ = br"a¥🦀"; //~ ERROR mixed utf8
+    _ = b"a\u{a5}\u{1f980}"; //~ ERROR mixed utf8
+}
@@ -0,0 +1,33 @@
+error[E0658]: mixed utf8 b"" and br"" literals are experimental
+  --> $DIR/feature-gate-mixed-utf8-literals.rs:2:9
+   |
+LL |     _ = b"a¥🦀";
+   |         ^^^^^^^
+   |
+   = note: see issue #116907 <https://github.com/rust-lang/rust/issues/116907> for more information
+   = help: add `#![feature(mixed_utf8_literals)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
+
+error[E0658]: mixed utf8 b"" and br"" literals are experimental
+  --> $DIR/feature-gate-mixed-utf8-literals.rs:3:9
+   |
+LL |     _ = br"a¥🦀";
+   |         ^^^^^^^^
+   |
+   = note: see issue #116907 <https://github.com/rust-lang/rust/issues/116907> for more information
+   = help: add `#![feature(mixed_utf8_literals)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
+
+error[E0658]: mixed utf8 b"" and br"" literals are experimental
+  --> $DIR/feature-gate-mixed-utf8-literals.rs:4:9
+   |
+LL |     _ = b"a\u{a5}\u{1f980}";
+   |         ^^^^^^^^^^^^^^^^^^^
+   |
+   = note: see issue #116907 <https://github.com/rust-lang/rust/issues/116907> for more information
+   = help: add `#![feature(mixed_utf8_literals)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
+
+error: aborting due to 3 previous errors
+
+For more information about this error, try `rustc --explain E0658`.
@@ -0,0 +1,19 @@
+// check-pass
+
+#![feature(mixed_utf8_literals)]
+
+fn main() {
+    b"a¥🦀";
+    b"é";
+    b"字";
+
+    br"a¥🦀";
+    br"é";
+    br##"é"##;
+
+    b"\u{a66e}";
+    b"a\u{a5}\u{1f980}";
+    b"\u{a4a4}";
+
+    b"hello\xff我叫\u{1F980}";
+}
@@ -3,7 +3,5 @@ static FOO: &'static [u8] = b"\f";  //~ ERROR unknown byte escape
 pub fn main() {
     b"\f";  //~ ERROR unknown byte escape
     b"\x0Z";  //~ ERROR invalid character in numeric character escape: `Z`
-    b"é";  //~ ERROR non-ASCII character in byte string literal
-    br##"é"##;  //~ ERROR non-ASCII character in raw byte string literal
     b"a  //~ ERROR unterminated double quote byte string
 }
@@ -20,31 +20,14 @@ error: invalid character in numeric character escape: `Z`
 LL |     b"\x0Z";
    |          ^ invalid character in numeric character escape
 
-error: non-ASCII character in byte string literal
-  --> $DIR/byte-string-literals.rs:6:7
-   |
-LL |     b"é";
-   |       ^ must be ASCII
-   |
-help: if you meant to use the unicode code point for 'é', use a \xHH escape
-   |
-LL |     b"\xE9";
-   |       ~~~~
-
-error: non-ASCII character in raw byte string literal
-  --> $DIR/byte-string-literals.rs:7:10
-   |
-LL |     br##"é"##;
-   |          ^ must be ASCII
-
 error[E0766]: unterminated double quote byte string
-  --> $DIR/byte-string-literals.rs:8:6
+  --> $DIR/byte-string-literals.rs:6:6
    |
 LL |       b"a
    |  ______^
 LL | | }
    | |__^
 
-error: aborting due to 6 previous errors
+error: aborting due to 4 previous errors
 
 For more information about this error, try `rustc --explain E0766`.
@@ -1,7 +1,4 @@
 fn main() {
-    let _ = b"\u{a66e}";
-    //~^ ERROR unicode escape in byte string
-
     let _ = b'\u{a66e}';
     //~^ ERROR unicode escape in byte string
 
@@ -20,10 +17,9 @@ fn main() {
     let _ = '\xxy';
     //~^ ERROR invalid character in numeric character escape: `x`
 
-    let _ = b"\u{a4a4} \xf \u";
-    //~^ ERROR unicode escape in byte string
-    //~^^ ERROR invalid character in numeric character escape: ` `
-    //~^^^ ERROR incorrect unicode escape sequence
+    let _ = b"\xf \u";
+    //~^ ERROR invalid character in numeric character escape: ` `
+    //~^^ ERROR incorrect unicode escape sequence
 
     let _ = "\xf \u";
     //~^ ERROR invalid character in numeric character escape: ` `
 
@@ -1,94 +1,78 @@
 error: unicode escape in byte string
   --> $DIR/issue-23620-invalid-escapes.rs:2:15
    |
-LL |     let _ = b"\u{a66e}";
-   |               ^^^^^^^^ unicode escape in byte string
-   |
-   = help: unicode escape sequences cannot be used as a byte or in a byte string
-
-error: unicode escape in byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:5:15
-   |
 LL |     let _ = b'\u{a66e}';
    |               ^^^^^^^^ unicode escape in byte string
    |
    = help: unicode escape sequences cannot be used as a byte or in a byte string
 
 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:8:15
+  --> $DIR/issue-23620-invalid-escapes.rs:5:15
    |
 LL |     let _ = b'\u';
    |               ^^ incorrect unicode escape sequence
    |
    = help: format of unicode escape sequences is `\u{...}`
 
 error: numeric character escape is too short
-  --> $DIR/issue-23620-invalid-escapes.rs:11:15
+  --> $DIR/issue-23620-invalid-escapes.rs:8:15
    |
 LL |     let _ = b'\x5';
    |               ^^^
 
 error: invalid character in numeric character escape: `x`
-  --> $DIR/issue-23620-invalid-escapes.rs:14:17
+  --> $DIR/issue-23620-invalid-escapes.rs:11:17
    |
 LL |     let _ = b'\xxy';
    |                 ^ invalid character in numeric character escape
 
 error: numeric character escape is too short
-  --> $DIR/issue-23620-invalid-escapes.rs:17:14
+  --> $DIR/issue-23620-invalid-escapes.rs:14:14
    |
 LL |     let _ = '\x5';
    |              ^^^
 
 error: invalid character in numeric character escape: `x`
-  --> $DIR/issue-23620-invalid-escapes.rs:20:16
+  --> $DIR/issue-23620-invalid-escapes.rs:17:16
    |
 LL |     let _ = '\xxy';
    |                ^ invalid character in numeric character escape
 
-error: unicode escape in byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:23:15
-   |
-LL |     let _ = b"\u{a4a4} \xf \u";
-   |               ^^^^^^^^ unicode escape in byte string
-   |
-   = help: unicode escape sequences cannot be used as a byte or in a byte string
-
 error: invalid character in numeric character escape: ` `
-  --> $DIR/issue-23620-invalid-escapes.rs:23:27
+  --> $DIR/issue-23620-invalid-escapes.rs:20:18
    |
-LL |     let _ = b"\u{a4a4} \xf \u";
-   |                           ^ invalid character in numeric character escape
+LL |     let _ = b"\xf \u";
+   |                  ^ invalid character in numeric character escape
 
 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:23:28
+  --> $DIR/issue-23620-invalid-escapes.rs:20:19
    |
-LL |     let _ = b"\u{a4a4} \xf \u";
-   |                            ^^ incorrect unicode escape sequence
+LL |     let _ = b"\xf \u";
+   |                   ^^ incorrect unicode escape sequence
    |
    = help: format of unicode escape sequences is `\u{...}`
 
 error: invalid character in numeric character escape: ` `
-  --> $DIR/issue-23620-invalid-escapes.rs:28:17
+  --> $DIR/issue-23620-invalid-escapes.rs:24:17
    |
 LL |     let _ = "\xf \u";
    |                 ^ invalid character in numeric character escape
 
 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:28:18
+  --> $DIR/issue-23620-invalid-escapes.rs:24:18
    |
 LL |     let _ = "\xf \u";
    |                  ^^ incorrect unicode escape sequence
    |
    = help: format of unicode escape sequences is `\u{...}`
 
 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:32:14
+  --> $DIR/issue-23620-invalid-escapes.rs:28:14
    |
 LL |     let _ = "\u8f";
    |              ^^^-
    |              |
    |              help: format of unicode escape sequences uses braces: `\u{8f}`
 
-error: aborting due to 13 previous errors
+error: aborting due to 11 previous errors
 
@@ -2,6 +2,5 @@
 
 pub fn main() {
     br"a"; //~ ERROR bare CR not allowed in raw string
-    br"é";  //~ ERROR non-ASCII character in raw byte string literal
     br##~"a"~##;  //~ ERROR only `#` is allowed in raw string delimitation
 }
 
@@ -4,17 +4,11 @@ error: bare CR not allowed in raw string
 LL |     br"a";
    |         ^
 
-error: non-ASCII character in raw byte string literal
-  --> $DIR/raw-byte-string-literals.rs:5:8
-   |
-LL |     br"é";
-   |        ^ must be ASCII
-
 error: found invalid character; only `#` is allowed in raw string delimitation: ~
-  --> $DIR/raw-byte-string-literals.rs:6:5
+  --> $DIR/raw-byte-string-literals.rs:5:5
    |
 LL |     br##~"a"~##;
    |     ^^^^^
 
-error: aborting due to 3 previous errors
+error: aborting due to 2 previous errors
 
 
@@ -4,8 +4,7 @@ fn main() {
     println!("us\u{202B}e\u{202A}r");
     println!("{:?}", r#"us\u{202B}e\u{202A}r"#);
     println!("{:?}", b"us\u{202B}e\u{202A}r");
-    //~^ ERROR unicode escape in byte string
-    //~| ERROR unicode escape in byte string
+    //~^ ERROR mixed utf8 b"" and br"" literals are experimental
     println!("{:?}", br##"us\u{202B}e\u{202A}r"##);
 
     println!("{:?}", "/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only ");
@@ -14,15 +13,9 @@ fn main() {
     println!("{:?}", r##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##);
     //~^ ERROR unicode codepoint changing visible direction of text present in literal
     println!("{:?}", b"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only ");
-    //~^ ERROR non-ASCII character in byte string literal
-    //~| ERROR non-ASCII character in byte string literal
-    //~| ERROR non-ASCII character in byte string literal
-    //~| ERROR non-ASCII character in byte string literal
+    //~^ ERROR mixed utf8 b"" and br"" literals are experimental
     println!("{:?}", br##"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only "##);
-    //~^ ERROR non-ASCII character in raw byte string literal
-    //~| ERROR non-ASCII character in raw byte string literal
-    //~| ERROR non-ASCII character in raw byte string literal
-    //~| ERROR non-ASCII character in raw byte string literal
+    //~^ ERROR mixed utf8 b"" and br"" literals are experimental
     println!("{:?}", '‮');
     //~^ ERROR unicode codepoint changing visible direction of text present in literal
 }
 
@@ -1,86 +1,32 @@
-error: unicode escape in byte string
-  --> $DIR/unicode-control-codepoints.rs:6:26
+error[E0658]: mixed utf8 b"" and br"" literals are experimental
+  --> $DIR/unicode-control-codepoints.rs:6:22
    |
 LL |     println!("{:?}", b"us\u{202B}e\u{202A}r");
-   |                          ^^^^^^^^ unicode escape in byte string
+   |                      ^^^^^^^^^^^^^^^^^^^^^^^
    |
-   = help: unicode escape sequences cannot be used as a byte or in a byte string
+   = note: see issue #116907 <https://github.com/rust-lang/rust/issues/116907> for more information
+   = help: add `#![feature(mixed_utf8_literals)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
 
-error: unicode escape in byte string
-  --> $DIR/unicode-control-codepoints.rs:6:35
-   |
-LL |     println!("{:?}", b"us\u{202B}e\u{202A}r");
-   |                                   ^^^^^^^^ unicode escape in byte string
-   |
-   = help: unicode escape sequences cannot be used as a byte or in a byte string
-
-error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:26
-   |
-LL |     println!("{:?}", b"/* } if isAdmin  begin admins only ");
-   |                          ^ must be ASCII but is '\u{202e}'
-   |
-help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes
-   |
-LL |     println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin  begin admins only ");
-   |                          ~~~~~~~~~~~~
-
-error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:30
-   |
-LL |     println!("{:?}", b"/* } if isAdmin  begin admins only ");
-   |                             ^ must be ASCII but is '\u{2066}'
-   |
-help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
-   |
-LL |     println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin  begin admins only ");
-   |                             ~~~~~~~~~~~~
-
-error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:41
+error[E0658]: mixed utf8 b"" and br"" literals are experimental
+  --> $DIR/unicode-control-codepoints.rs:15:22
    |
 LL |     println!("{:?}", b"/* } if isAdmin  begin admins only ");
-   |                                       ^ must be ASCII but is '\u{2069}'
+   |                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes
-   |
-LL |     println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9  begin admins only ");
-   |                                       ~~~~~~~~~~~~
-
-error: non-ASCII character in byte string literal
-  --> $DIR/unicode-control-codepoints.rs:16:43
-   |
-LL |     println!("{:?}", b"/* } if isAdmin  begin admins only ");
-   |                                        ^ must be ASCII but is '\u{2066}'
-   |
-help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
-   |
-LL |     println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only ");
-   |                                        ~~~~~~~~~~~~
-
-error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:29
-   |
-LL |     println!("{:?}", br##"/* } if isAdmin  begin admins only "##);
-   |                             ^ must be ASCII but is '\u{202e}'
+   = note: see issue #116907 <https://github.com/rust-lang/rust/issues/116907> for more information
+   = help: add `#![feature(mixed_utf8_literals)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
 
-error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:33
+error[E0658]: mixed utf8 b"" and br"" literals are experimental
+  --> $DIR/unicode-control-codepoints.rs:17:22
    |
 LL |     println!("{:?}", br##"/* } if isAdmin  begin admins only "##);
-   |                                ^ must be ASCII but is '\u{2066}'
-
-error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:44
+   |                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    |
-LL |     println!("{:?}", br##"/* } if isAdmin  begin admins only "##);
-   |                                          ^ must be ASCII but is '\u{2069}'
-
-error: non-ASCII character in raw byte string literal
-  --> $DIR/unicode-control-codepoints.rs:21:46
-   |
-LL |     println!("{:?}", br##"/* } if isAdmin  begin admins only "##);
-   |                                           ^ must be ASCII but is '\u{2066}'
+   = note: see issue #116907 <https://github.com/rust-lang/rust/issues/116907> for more information
+   = help: add `#![feature(mixed_utf8_literals)]` to the crate attributes to enable
+   = note: this compiler was built on YYYY-MM-DD; consider upgrading it if it is out of date
 
 error: unicode codepoint changing visible direction of text present in comment
   --> $DIR/unicode-control-codepoints.rs:2:5
@@ -97,7 +43,7 @@ LL |     // if access_level != "user" { // Check if admin
    = help: if their presence wasn't intentional, you can remove them
 
 error: unicode codepoint changing visible direction of text present in comment
-  --> $DIR/unicode-control-codepoints.rs:30:1
+  --> $DIR/unicode-control-codepoints.rs:23:1
    |
 LL | //"/* } if isAdmin  begin admins only */"
    | ^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^
@@ -112,7 +58,7 @@ LL | //"/* } if isAdmin  begin admins only */"
    = help: if their presence wasn't intentional, you can remove them
 
 error: unicode codepoint changing visible direction of text present in literal
-  --> $DIR/unicode-control-codepoints.rs:11:22
+  --> $DIR/unicode-control-codepoints.rs:10:22
    |
 LL |     println!("{:?}", "/* } if isAdmin  begin admins only ");
    |                      ^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^
@@ -132,7 +78,7 @@ LL |     println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi
    |                         ~~~~~~~~   ~~~~~~~~          ~~~~~~~~ ~~~~~~~~
 
 error: unicode codepoint changing visible direction of text present in literal
-  --> $DIR/unicode-control-codepoints.rs:14:22
+  --> $DIR/unicode-control-codepoints.rs:13:22
    |
 LL |     println!("{:?}", r##"/* } if isAdmin  begin admins only "##);
    |                      ^^^^^^-^^-^^^^^^^^^--^^^^^^^^^^^^^^^^^^^^^
@@ -151,7 +97,7 @@ LL |     println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b
    |                            ~~~~~~~~   ~~~~~~~~          ~~~~~~~~ ~~~~~~~~
 
 error: unicode codepoint changing visible direction of text present in literal
-  --> $DIR/unicode-control-codepoints.rs:26:22
+  --> $DIR/unicode-control-codepoints.rs:19:22
    |
 LL |     println!("{:?}", '');
    |                      ^-
@@ -167,7 +113,7 @@ LL |     println!("{:?}", '\u{202e}');
    |                       ~~~~~~~~
 
 error: unicode codepoint changing visible direction of text present in doc comment
-  --> $DIR/unicode-control-codepoints.rs:33:1
+  --> $DIR/unicode-control-codepoints.rs:26:1
    |
 LL | /**  ''); */fn foo() {}
    | ^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint
@@ -177,7 +123,7 @@ LL | /**  ''); */fn foo() {}
    = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
 
 error: unicode codepoint changing visible direction of text present in doc comment
-  --> $DIR/unicode-control-codepoints.rs:36:1
+  --> $DIR/unicode-control-codepoints.rs:29:1
    |
 LL | / /**
 LL | |  *
@@ -188,5 +134,6 @@ LL | |  *  ''); */fn bar() {}
    = note: if their presence wasn't intentional, you can remove them
    = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}'
 
-error: aborting due to 17 previous errors
+error: aborting due to 10 previous errors
 
+For more information about this error, try `rustc --explain E0658`.
@@ -10,9 +10,4 @@ fn main() {
     //~^ ERROR: non-ASCII character in byte literal
     //~| NOTE: this multibyte character does not fit into a single byte
     //~| NOTE: must be ASCII
-
-    b"字";
-    //~^ ERROR: non-ASCII character in byte string literal
-    //~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes
-    //~| NOTE: must be ASCII
 }
@@ -18,16 +18,5 @@ LL |     b'字';
    |       must be ASCII
    |       this multibyte character does not fit into a single byte
 
-error: non-ASCII character in byte string literal
-  --> $DIR/multibyte-escapes.rs:14:7
-   |
-LL |     b"字";
-   |       ^^ must be ASCII
-   |
-help: if you meant to use the UTF-8 encoding of '字', use \xHH escapes
-   |
-LL |     b"\xE5\xAD\x97";
-   |       ~~~~~~~~~~~~
-
-error: aborting due to 3 previous errors
+error: aborting due to 2 previous errors
Original file line number	Diff line number	Diff line change
`@@ -508,6 +508,7 @@ pub fn check_crate(krate: &ast::Crate, sess: &Session, features: &Features) {`
`508`	`508`	`}`
`509`	`509`	`};`
`510`	`510`	`}`
	`511`	`+ gate_all!(mixed_utf8_literals, r#"mixed utf8 b"" and br"" literals are experimental"#);`
`511`	`512`	`gate_all!(`
`512`	`513`	`if_let_guard,`
`513`	`514`	"`if let` guards are experimental",
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,5 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape`
`3`	`3`	`pub fn main() {`
`4`	`4`	`b"\f"; //~ ERROR unknown byte escape`
`5`	`5`	b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z`
`6`		`- b"é"; //~ ERROR non-ASCII character in byte string literal`
`7`		`- br##"é"##; //~ ERROR non-ASCII character in raw byte string literal`
`8`	`6`	`b"a //~ ERROR unterminated double quote byte string`
`9`	`7`	`}`
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,5 @@`
`2`	`2`
`3`	`3`	`pub fn main() {`
`4`	`4`	`br"a"; //~ ERROR bare CR not allowed in raw string`
`5`		`- br"é"; //~ ERROR non-ASCII character in raw byte string literal`
`6`	`5`	br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
`7`	`6`	`}`