Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f661a16

Browse files
committedJun 2, 2025·
Auto merge of #141875 - nnethercote:ByteSymbol, r=<try>
Introduce `ByteSymbol` r? `@ghost`
2 parents 2fc3dee + e83f532 commit f661a16

File tree

42 files changed

+482
-121
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+482
-121
lines changed
 

‎compiler/rustc_ast/src/ast.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use rustc_data_structures::tagged_ptr::Tag;
3232
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
3333
pub use rustc_span::AttrId;
3434
use rustc_span::source_map::{Spanned, respan};
35-
use rustc_span::{DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
35+
use rustc_span::{ByteSymbol, DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
3636
use thin_vec::{ThinVec, thin_vec};
3737

3838
pub use crate::format::*;
@@ -1766,7 +1766,7 @@ pub enum ExprKind {
17661766
/// Added for optimization purposes to avoid the need to escape
17671767
/// large binary blobs - should always behave like [`ExprKind::Lit`]
17681768
/// with a `ByteStr` literal.
1769-
IncludedBytes(Arc<[u8]>),
1769+
IncludedBytes(Arc<[u8]>), // njn: change to ByteSymbol?
17701770

17711771
/// A `format_args!()` expression.
17721772
FormatArgs(P<FormatArgs>),
@@ -2024,7 +2024,8 @@ impl YieldKind {
20242024
}
20252025

20262026
/// A literal in a meta item.
2027-
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
2027+
// njn: look for clones
2028+
#[derive(Clone, Copy, Encodable, Decodable, Debug, HashStable_Generic)]
20282029
pub struct MetaItemLit {
20292030
/// The original literal as written in the source code.
20302031
pub symbol: Symbol,
@@ -2087,16 +2088,17 @@ pub enum LitFloatType {
20872088
/// deciding the `LitKind`. This means that float literals like `1f32` are
20882089
/// classified by this type as `Float`. This is different to `token::LitKind`
20892090
/// which does *not* consider the suffix.
2090-
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
2091+
#[derive(Clone, Copy, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
2092+
// njn: look for clones
20912093
pub enum LitKind {
20922094
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
20932095
/// from the original token's symbol.
20942096
Str(Symbol, StrStyle),
20952097
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
20962098
/// non-utf8, and symbols only allow utf8 strings.
2097-
ByteStr(Arc<[u8]>, StrStyle),
2099+
ByteStr(ByteSymbol, StrStyle),
20982100
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
2099-
CStr(Arc<[u8]>, StrStyle),
2101+
CStr(ByteSymbol, StrStyle),
21002102
/// A byte char (`b'f'`).
21012103
Byte(u8),
21022104
/// A character literal (`'a'`).

‎compiler/rustc_ast/src/util/literal.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::{ascii, fmt, str};
55
use rustc_literal_escaper::{
66
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
77
};
8-
use rustc_span::{Span, Symbol, kw, sym};
8+
use rustc_span::{ByteSymbol, Span, Symbol, kw, sym};
99
use tracing::debug;
1010

1111
use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
@@ -117,13 +117,13 @@ impl LitKind {
117117
assert!(!err.is_fatal(), "failed to unescape string literal")
118118
}
119119
});
120-
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
120+
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
121121
}
122122
token::ByteStrRaw(n) => {
123123
// Raw strings have no escapes so we can convert the symbol
124124
// directly to a `Arc<u8>`.
125125
let buf = symbol.as_str().to_owned().into_bytes();
126-
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
126+
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
127127
}
128128
token::CStr => {
129129
let s = symbol.as_str();
@@ -138,15 +138,15 @@ impl LitKind {
138138
}
139139
});
140140
buf.push(0);
141-
LitKind::CStr(buf.into(), StrStyle::Cooked)
141+
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
142142
}
143143
token::CStrRaw(n) => {
144144
// Raw strings have no escapes so we can convert the symbol
145145
// directly to a `Arc<u8>` after appending the terminating NUL
146146
// char.
147147
let mut buf = symbol.as_str().to_owned().into_bytes();
148148
buf.push(0);
149-
LitKind::CStr(buf.into(), StrStyle::Raw(n))
149+
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
150150
}
151151
token::Err(guar) => LitKind::Err(guar),
152152
})
@@ -169,11 +169,11 @@ impl fmt::Display for LitKind {
169169
string = sym
170170
)?,
171171
LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
172-
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
172+
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
173173
}
174174
LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
175175
// Unwrap because raw byte string literals can only contain ASCII.
176-
let symbol = str::from_utf8(bytes).unwrap();
176+
let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
177177
write!(
178178
f,
179179
"br{delim}\"{string}\"{delim}",
@@ -182,11 +182,11 @@ impl fmt::Display for LitKind {
182182
)?;
183183
}
184184
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
185-
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
185+
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
186186
}
187187
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
188188
// This can only be valid UTF-8.
189-
let symbol = str::from_utf8(bytes).unwrap();
189+
let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
190190
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
191191
}
192192
LitKind::Int(n, ty) => {

0 commit comments

Comments
 (0)
Please sign in to comment.