From 1629aa3dc0dd74694a9d654e8607b8470084020c Mon Sep 17 00:00:00 2001 From: KingCol13 <48412633+KingCol13@users.noreply.github.com> Date: Wed, 8 Oct 2025 00:37:32 +0100 Subject: [PATCH 1/3] Split out samply-debugid --- Cargo.lock | 10 ++ Cargo.toml | 1 + samply-debugid/Cargo.toml | 21 ++++ samply-debugid/README.md | 5 + samply-debugid/src/codeid.rs | 148 +++++++++++++++++++++++++++ samply-debugid/src/debugid.rs | 115 +++++++++++++++++++++ samply-debugid/src/lib.rs | 5 + samply-symbols/Cargo.toml | 1 + samply-symbols/src/binary_image.rs | 5 +- samply-symbols/src/breakpad/index.rs | 4 +- samply-symbols/src/debugid_util.rs | 3 +- samply-symbols/src/elf.rs | 2 +- samply-symbols/src/error.rs | 3 +- samply-symbols/src/lib.rs | 11 +- samply-symbols/src/shared.rs | 148 +-------------------------- 15 files changed, 322 insertions(+), 160 deletions(-) create mode 100644 samply-debugid/Cargo.toml create mode 100644 samply-debugid/README.md create mode 100644 samply-debugid/src/codeid.rs create mode 100644 samply-debugid/src/debugid.rs create mode 100644 samply-debugid/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 429d5b2a..ec613568 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2277,6 +2277,15 @@ dependencies = [ "yaxpeax-x86", ] +[[package]] +name = "samply-debugid" +version = "0.1.0" +dependencies = [ + "debugid", + "object 0.37.3", + "uuid", +] + [[package]] name = "samply-quota-manager" version = "0.1.0" @@ -2318,6 +2327,7 @@ dependencies = [ "pdb-addr2line", "rangemap", "rustc-demangle", + "samply-debugid", "scala-native-demangle", "srcsrv", "thiserror 2.0.16", diff --git a/Cargo.toml b/Cargo.toml index aad478d0..42ddd0ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "fxprof-processed-profile", "gecko_profile", "samply-api", + "samply-debugid", "samply-quota-manager", "samply-symbols", "samply", diff --git a/samply-debugid/Cargo.toml b/samply-debugid/Cargo.toml new file mode 100644 index 00000000..714d6c5e --- /dev/null +++ b/samply-debugid/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "samply-debugid" +version = "0.1.0" +authors = ["Markus Stange "] +license = "MIT OR Apache-2.0" +edition = "2021" +description = "Samply compatible debugids." +repository = "https://github.com/mstange/samply/" +readme = "README.md" + +[dependencies.debugid] +default-features = false +version = "0.8.0" + +[dependencies.object] +default-features = false +version = "0.37" + +[dependencies.uuid] +default-features = false +version = "1" diff --git a/samply-debugid/README.md b/samply-debugid/README.md new file mode 100644 index 00000000..a3cc0846 --- /dev/null +++ b/samply-debugid/README.md @@ -0,0 +1,5 @@ +# samply-debugid + +This crate allows generating [`debugid`s](https://crates.io/crates/debugid) +that are compatible with `samply`. Useful for writing your own profiles to be +symbolicated and displayed with `samply load`. diff --git a/samply-debugid/src/codeid.rs b/samply-debugid/src/codeid.rs new file mode 100644 index 00000000..f54d48a0 --- /dev/null +++ b/samply-debugid/src/codeid.rs @@ -0,0 +1,148 @@ +use std::str::FromStr; + +use uuid::Uuid; + +/// An enum carrying an identifier for a binary. This is stores the same information +/// as a [`debugid::CodeId`], but without projecting it down to a string. +/// +/// All types need to be treated rather differently, see their respective documentation. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum CodeId { + /// The code ID for a Windows PE file. When combined with the binary name, + /// the code ID lets you obtain binaries from symbol servers. It is not useful + /// on its own, it has to be paired with the binary name. + /// + /// On Windows, a binary's code ID is distinct from its debug ID (= pdb GUID + age). + /// If you have a binary file, you can get both the code ID and the debug ID + /// from it. If you only have a PDB file, you usually *cannot* get the code ID of + /// the corresponding binary from it. + PeCodeId(PeCodeId), + + /// The code ID for a macOS / iOS binary (mach-O). This is just the mach-O UUID. + /// The mach-O UUID is shared between both the binary file and the debug file (dSYM), + /// and it can be used on its own to find dSYMs using Spotlight. + /// + /// The debug ID and the code ID contain the same information; the debug ID + /// is literally just the UUID plus a zero at the end. + MachoUuid(Uuid), + + /// The code ID for a Linux ELF file. This is the "ELF build ID" (also called "GNU build ID"). + /// The build ID is usually 20 bytes, commonly written out as 40 hex chars. + /// + /// It can be used to find debug files on the local file system or to download + /// binaries or debug files from a `debuginfod` symbol server. it does not have to be + /// paired with the binary name. + /// + /// An ELF binary's code ID is more useful than its debug ID: The debug ID is truncated + /// to 16 bytes (32 hex characters), whereas the code ID is the full ELF build ID. + ElfBuildId(ElfBuildId), +} + +impl FromStr for CodeId { + type Err = (); + + fn from_str(s: &str) -> Result { + if s.len() <= 17 { + // 8 bytes timestamp + 1 to 8 bytes of image size + Ok(CodeId::PeCodeId(PeCodeId::from_str(s)?)) + } else if s.len() == 32 && is_uppercase_hex(s) { + // mach-O UUID + Ok(CodeId::MachoUuid(Uuid::from_str(s).map_err(|_| ())?)) + } else { + // ELF build ID. These are usually 40 hex characters (= 20 bytes). + Ok(CodeId::ElfBuildId(ElfBuildId::from_str(s)?)) + } + } +} + +fn is_uppercase_hex(s: &str) -> bool { + s.chars() + .all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_uppercase())) +} + +impl std::fmt::Display for CodeId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CodeId::PeCodeId(pe) => std::fmt::Display::fmt(pe, f), + CodeId::MachoUuid(uuid) => f.write_fmt(format_args!("{:X}", uuid.simple())), + CodeId::ElfBuildId(elf) => std::fmt::Display::fmt(elf, f), + } + } +} + +/// The code ID for a Windows PE file. +/// +/// When combined with the binary name, the `PeCodeId` lets you obtain binaries from +/// symbol servers. It is not useful on its own, it has to be paired with the binary name. +/// +/// A Windows binary's `PeCodeId` is distinct from its debug ID (= pdb GUID + age). +/// If you have a binary file, you can get both the `PeCodeId` and the debug ID +/// from it. If you only have a PDB file, you usually *cannot* get the `PeCodeId` of +/// the corresponding binary from it. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct PeCodeId { + pub timestamp: u32, + pub image_size: u32, +} + +impl FromStr for PeCodeId { + type Err = (); + + fn from_str(s: &str) -> Result { + if s.len() < 9 || s.len() > 16 { + return Err(()); + } + let timestamp = u32::from_str_radix(&s[..8], 16).map_err(|_| ())?; + let image_size = u32::from_str_radix(&s[8..], 16).map_err(|_| ())?; + Ok(Self { + timestamp, + image_size, + }) + } +} + +impl std::fmt::Display for PeCodeId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{:08X}{:x}", self.timestamp, self.image_size)) + } +} + +/// The build ID for an ELF file (also called "GNU build ID"). +/// +/// The build ID can be used to find debug files on the local file system or to download +/// binaries or debug files from a `debuginfod` symbol server. it does not have to be +/// paired with the binary name. +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct ElfBuildId(pub Vec); + +impl ElfBuildId { + /// Create a new `ElfBuildId` from a slice of bytes (commonly a sha1 hash + /// generated by the linker, i.e. 20 bytes). + pub fn from_bytes(bytes: &[u8]) -> Self { + Self(bytes.to_owned()) + } +} + +impl FromStr for ElfBuildId { + type Err = (); + + fn from_str(s: &str) -> Result { + let byte_count = s.len() / 2; + let mut bytes = Vec::with_capacity(byte_count); + for i in 0..byte_count { + let hex_byte = &s[i * 2..i * 2 + 2]; + let b = u8::from_str_radix(hex_byte, 16).map_err(|_| ())?; + bytes.push(b); + } + Ok(Self(bytes)) + } +} + +impl std::fmt::Display for ElfBuildId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for byte in &self.0 { + f.write_fmt(format_args!("{byte:02x}"))?; + } + Ok(()) + } +} diff --git a/samply-debugid/src/debugid.rs b/samply-debugid/src/debugid.rs new file mode 100644 index 00000000..eece29fd --- /dev/null +++ b/samply-debugid/src/debugid.rs @@ -0,0 +1,115 @@ +use debugid::DebugId; +use object::{Object, ObjectSection}; +use uuid::Uuid; + +use crate::{CodeId, ElfBuildId}; + +pub trait DebugIdExt { + /// Creates a DebugId from some identifier. The identifier could be + /// an ELF build ID, or a hash derived from the text section. + /// The `little_endian` argument specifies whether the object file + /// is targeting a little endian architecture. + fn from_identifier(identifier: &[u8], little_endian: bool) -> Self; + + /// Creates a DebugId from a hash of the first 4096 bytes of the .text section. + /// The `little_endian` argument specifies whether the object file + /// is targeting a little endian architecture. + fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self; +} + +impl DebugIdExt for DebugId { + fn from_identifier(identifier: &[u8], little_endian: bool) -> Self { + // Make sure that we have exactly 16 bytes available, either truncate or fill + // the remainder with zeros. + // ELF build IDs are usually 20 bytes, so if the identifier is an ELF build ID + // then we're performing a lossy truncation. + let mut d = [0u8; 16]; + let shared_len = identifier.len().min(d.len()); + d[0..shared_len].copy_from_slice(&identifier[0..shared_len]); + + // Pretend that the build ID was stored as a UUID with u32 u16 u16 fields inside + // the file. Parse those fields in the endianness of the file. Then use + // Uuid::from_fields to serialize them as big endian. + // For ELF build IDs this is a bit silly, because ELF build IDs aren't actually + // field-based UUIDs, but this is what the tools in the breakpad and + // sentry/symbolic universe do, so we do the same for compatibility with those + // tools. + let (d1, d2, d3) = if little_endian { + ( + u32::from_le_bytes([d[0], d[1], d[2], d[3]]), + u16::from_le_bytes([d[4], d[5]]), + u16::from_le_bytes([d[6], d[7]]), + ) + } else { + ( + u32::from_be_bytes([d[0], d[1], d[2], d[3]]), + u16::from_be_bytes([d[4], d[5]]), + u16::from_be_bytes([d[6], d[7]]), + ) + }; + let uuid = Uuid::from_fields(d1, d2, d3, d[8..16].try_into().unwrap()); + DebugId::from_uuid(uuid) + } + + // This algorithm XORs 16-byte chunks directly into a 16-byte buffer. + fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self { + const UUID_SIZE: usize = 16; + const PAGE_SIZE: usize = 4096; + let mut hash = [0; UUID_SIZE]; + for (i, byte) in text_first_page.iter().cloned().take(PAGE_SIZE).enumerate() { + hash[i % UUID_SIZE] ^= byte; + } + DebugId::from_identifier(&hash, little_endian) + } +} + +/// Tries to obtain a DebugId for an object. This uses the build ID, if available, +/// and falls back to hashing the first page of the text section otherwise. +/// Returns None on failure. +pub fn debug_id_for_object<'data>(obj: &impl Object<'data>) -> Option { + // Windows + if let Ok(Some(pdb_info)) = obj.pdb_info() { + return Some(DebugId::from_guid_age(&pdb_info.guid(), pdb_info.age()).unwrap()); + } + + // ELF + if let Ok(Some(build_id)) = obj.build_id() { + return Some(DebugId::from_identifier(build_id, obj.is_little_endian())); + } + + // mach-O + if let Ok(Some(uuid)) = obj.mach_uuid() { + return Some(DebugId::from_uuid(Uuid::from_bytes(uuid))); + } + + // We were not able to locate a build ID, so fall back to creating a synthetic + // identifier from a hash of the first page of the ".text" (program code) section. + if let Some(section) = obj.section_by_name(".text") { + let data_len = section.size().min(4096); + if let Ok(Some(first_page_data)) = section.data_range(section.address(), data_len) { + return Some(DebugId::from_text_first_page( + first_page_data, + obj.is_little_endian(), + )); + } + } + + None +} + +/// Tries to obtain a CodeId for an object. +/// +/// This currently only handles mach-O and ELF. +pub fn code_id_for_object<'data>(obj: &impl Object<'data>) -> Option { + // ELF + if let Ok(Some(build_id)) = obj.build_id() { + return Some(CodeId::ElfBuildId(ElfBuildId::from_bytes(build_id))); + } + + // mach-O + if let Ok(Some(uuid)) = obj.mach_uuid() { + return Some(CodeId::MachoUuid(Uuid::from_bytes(uuid))); + } + + None +} diff --git a/samply-debugid/src/lib.rs b/samply-debugid/src/lib.rs new file mode 100644 index 00000000..d8449f4b --- /dev/null +++ b/samply-debugid/src/lib.rs @@ -0,0 +1,5 @@ +mod codeid; +mod debugid; + +pub use codeid::{CodeId, ElfBuildId, PeCodeId}; +pub use debugid::{code_id_for_object, debug_id_for_object, DebugIdExt}; diff --git a/samply-symbols/Cargo.toml b/samply-symbols/Cargo.toml index e0c6eb89..2b5e70e1 100644 --- a/samply-symbols/Cargo.toml +++ b/samply-symbols/Cargo.toml @@ -49,6 +49,7 @@ srcsrv = "0.2.2" lzma-rs = "0.3" macho-unwind-info = "0.5.0" debugid = "0.8.0" +samply-debugid = { version = "0.1.0", path = "../samply-debugid" } flate2 = "1" yoke = "0.8" yoke-derive = "0.8" diff --git a/samply-symbols/src/binary_image.rs b/samply-symbols/src/binary_image.rs index e862fabe..2633fa20 100644 --- a/samply-symbols/src/binary_image.rs +++ b/samply-symbols/src/binary_image.rs @@ -3,14 +3,15 @@ use linux_perf_data::jitdump::JitDumpHeader; use linux_perf_data::linux_perf_event_reader::RawData; use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, PeFile, PeFile32, PeFile64}; use object::{FileKind, Object, ReadRef}; +use samply_debugid::{CodeId, ElfBuildId, PeCodeId}; use crate::debugid_util::{code_id_for_object, debug_id_for_object}; use crate::error::Error; use crate::jitdump::{debug_id_and_code_id_for_jitdump, JitDumpIndex}; use crate::macho::{DyldCacheFileData, MachOData, MachOFatArchiveMemberData}; use crate::shared::{ - relative_address_base, CodeId, ElfBuildId, FileAndPathHelperError, FileContents, - FileContentsWrapper, LibraryInfo, PeCodeId, RangeReadRef, + relative_address_base, FileAndPathHelperError, FileContents, FileContentsWrapper, LibraryInfo, + RangeReadRef, }; #[derive(thiserror::Error, Debug)] diff --git a/samply-symbols/src/breakpad/index.rs b/samply-symbols/src/breakpad/index.rs index 6eff9e86..f4ab7627 100644 --- a/samply-symbols/src/breakpad/index.rs +++ b/samply-symbols/src/breakpad/index.rs @@ -13,11 +13,11 @@ use nom::error::{Error, ErrorKind, ParseError}; use nom::sequence::{terminated, tuple}; use nom::{Err, IResult}; use object::ReadRef; +use samply_debugid::CodeId; use zerocopy::{IntoBytes, LittleEndian, Ref, U16, U32, U64}; use zerocopy_derive::*; use crate::source_file_path::SourceFilePathIndex; -use crate::CodeId; #[derive(Debug, Clone, PartialEq, Eq)] pub struct BreakpadIndex<'a> { @@ -1233,7 +1233,7 @@ mod test { use std::str::FromStr; use super::*; - use crate::{ElfBuildId, PeCodeId}; + use samply_debugid::{ElfBuildId, PeCodeId}; #[test] fn test1() { diff --git a/samply-symbols/src/debugid_util.rs b/samply-symbols/src/debugid_util.rs index 79b762ee..40d54631 100644 --- a/samply-symbols/src/debugid_util.rs +++ b/samply-symbols/src/debugid_util.rs @@ -1,9 +1,8 @@ use debugid::DebugId; use object::{Object, ObjectSection}; +use samply_debugid::{CodeId, ElfBuildId}; use uuid::Uuid; -use crate::shared::{CodeId, ElfBuildId}; - pub trait DebugIdExt { /// Creates a DebugId from some identifier. The identifier could be /// an ELF build ID, or a hash derived from the text section. diff --git a/samply-symbols/src/elf.rs b/samply-symbols/src/elf.rs index 733f3fac..53b0fd90 100644 --- a/samply-symbols/src/elf.rs +++ b/samply-symbols/src/elf.rs @@ -5,6 +5,7 @@ use debugid::DebugId; use elsa::sync::FrozenVec; use gimli::{CieOrFde, Dwarf, EhFrame, EndianSlice, RunTimeEndian, UnwindSection}; use object::{File, FileKind, Object, ObjectSection, ReadRef}; +use samply_debugid::{debug_id_for_object, ElfBuildId}; use yoke::Yoke; use yoke_derive::Yokeable; @@ -15,7 +16,6 @@ use crate::symbol_map::SymbolMap; use crate::symbol_map_object::{ DwoDwarfMaker, ObjectSymbolMap, ObjectSymbolMapInnerWrapper, ObjectSymbolMapOuter, }; -use crate::{debug_id_for_object, ElfBuildId}; pub async fn load_symbol_map_for_elf( file_location: H::FL, diff --git a/samply-symbols/src/error.rs b/samply-symbols/src/error.rs index 832a8905..e2ba5c46 100644 --- a/samply-symbols/src/error.rs +++ b/samply-symbols/src/error.rs @@ -4,10 +4,11 @@ use debugid::DebugId; use linux_perf_data::jitdump::JitDumpError; use object::FileKind; use pdb_addr2line::pdb::Error as PdbError; +use samply_debugid::CodeId; use thiserror::Error; use crate::breakpad::BreakpadParseError; -use crate::{CodeId, FatArchiveMember, LibraryInfo}; +use crate::{FatArchiveMember, LibraryInfo}; /// The error type used in this crate. #[derive(Error, Debug)] diff --git a/samply-symbols/src/lib.rs b/samply-symbols/src/lib.rs index 85cfe06e..1c1b4f04 100644 --- a/samply-symbols/src/lib.rs +++ b/samply-symbols/src/lib.rs @@ -211,6 +211,7 @@ use jitdump::JitDumpIndex; use linux_perf_data::jitdump::JitDumpReader; use object::read::FileKind; pub use pdb_addr2line::pdb; +pub use samply_debugid::{CodeId, ElfBuildId, PeCodeId}; use shared::FileContentsCursor; pub use {debugid, object}; @@ -254,11 +255,11 @@ pub use crate::macho::FatArchiveMember; pub use crate::mapped_path::MappedPath; pub use crate::path_interner::PathInterner; pub use crate::shared::{ - relative_address_base, AddressInfo, CandidatePathInfo, CodeId, ElfBuildId, - ExternalFileAddressInFileRef, ExternalFileAddressRef, ExternalFileRef, FileAndPathHelper, - FileAndPathHelperError, FileAndPathHelperResult, FileContents, FileContentsWrapper, - FileLocation, FrameDebugInfo, FramesLookupResult, LibraryInfo, LookupAddress, - MultiArchDisambiguator, OptionallySendFuture, PeCodeId, SymbolInfo, SyncAddressInfo, + relative_address_base, AddressInfo, CandidatePathInfo, ExternalFileAddressInFileRef, + ExternalFileAddressRef, ExternalFileRef, FileAndPathHelper, FileAndPathHelperError, + FileAndPathHelperResult, FileContents, FileContentsWrapper, FileLocation, FrameDebugInfo, + FramesLookupResult, LibraryInfo, LookupAddress, MultiArchDisambiguator, OptionallySendFuture, + SymbolInfo, SyncAddressInfo, }; pub use crate::source_file_path::{SourceFilePath, SourceFilePathHandle, SourceFilePathIndex}; pub use crate::symbol_map::{AccessPatternHint, SymbolMap, SymbolMapTrait}; diff --git a/samply-symbols/src/shared.rs b/samply-symbols/src/shared.rs index aef88c21..a93e5a7a 100644 --- a/samply-symbols/src/shared.rs +++ b/samply-symbols/src/shared.rs @@ -4,7 +4,6 @@ use std::fmt::{Debug, Display}; use std::future::Future; use std::marker::PhantomData; use std::ops::{Deref, Range}; -use std::str::FromStr; use std::sync::Arc; #[cfg(feature = "partial_read_stats")] @@ -12,7 +11,7 @@ use bitvec::{bitvec, prelude::BitVec}; use debugid::DebugId; use object::read::ReadRef; use object::FileFlags; -use uuid::Uuid; +use samply_debugid::{CodeId, ElfBuildId}; use crate::symbol_map::SymbolMapTrait; use crate::SourceFilePathHandle; @@ -146,151 +145,6 @@ pub enum MultiArchDisambiguator { DebugId(DebugId), } -/// An enum carrying an identifier for a binary. This is stores the same information -/// as a [`debugid::CodeId`], but without projecting it down to a string. -/// -/// All types need to be treated rather differently, see their respective documentation. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub enum CodeId { - /// The code ID for a Windows PE file. When combined with the binary name, - /// the code ID lets you obtain binaries from symbol servers. It is not useful - /// on its own, it has to be paired with the binary name. - /// - /// On Windows, a binary's code ID is distinct from its debug ID (= pdb GUID + age). - /// If you have a binary file, you can get both the code ID and the debug ID - /// from it. If you only have a PDB file, you usually *cannot* get the code ID of - /// the corresponding binary from it. - PeCodeId(PeCodeId), - - /// The code ID for a macOS / iOS binary (mach-O). This is just the mach-O UUID. - /// The mach-O UUID is shared between both the binary file and the debug file (dSYM), - /// and it can be used on its own to find dSYMs using Spotlight. - /// - /// The debug ID and the code ID contain the same information; the debug ID - /// is literally just the UUID plus a zero at the end. - MachoUuid(Uuid), - - /// The code ID for a Linux ELF file. This is the "ELF build ID" (also called "GNU build ID"). - /// The build ID is usually 20 bytes, commonly written out as 40 hex chars. - /// - /// It can be used to find debug files on the local file system or to download - /// binaries or debug files from a `debuginfod` symbol server. it does not have to be - /// paired with the binary name. - /// - /// An ELF binary's code ID is more useful than its debug ID: The debug ID is truncated - /// to 16 bytes (32 hex characters), whereas the code ID is the full ELF build ID. - ElfBuildId(ElfBuildId), -} - -impl FromStr for CodeId { - type Err = (); - - fn from_str(s: &str) -> Result { - if s.len() <= 17 { - // 8 bytes timestamp + 1 to 8 bytes of image size - Ok(CodeId::PeCodeId(PeCodeId::from_str(s)?)) - } else if s.len() == 32 && is_uppercase_hex(s) { - // mach-O UUID - Ok(CodeId::MachoUuid(Uuid::from_str(s).map_err(|_| ())?)) - } else { - // ELF build ID. These are usually 40 hex characters (= 20 bytes). - Ok(CodeId::ElfBuildId(ElfBuildId::from_str(s)?)) - } - } -} - -fn is_uppercase_hex(s: &str) -> bool { - s.chars() - .all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_uppercase())) -} - -impl std::fmt::Display for CodeId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - CodeId::PeCodeId(pe) => std::fmt::Display::fmt(pe, f), - CodeId::MachoUuid(uuid) => f.write_fmt(format_args!("{:X}", uuid.simple())), - CodeId::ElfBuildId(elf) => std::fmt::Display::fmt(elf, f), - } - } -} - -/// The code ID for a Windows PE file. -/// -/// When combined with the binary name, the `PeCodeId` lets you obtain binaries from -/// symbol servers. It is not useful on its own, it has to be paired with the binary name. -/// -/// A Windows binary's `PeCodeId` is distinct from its debug ID (= pdb GUID + age). -/// If you have a binary file, you can get both the `PeCodeId` and the debug ID -/// from it. If you only have a PDB file, you usually *cannot* get the `PeCodeId` of -/// the corresponding binary from it. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct PeCodeId { - pub timestamp: u32, - pub image_size: u32, -} - -impl FromStr for PeCodeId { - type Err = (); - - fn from_str(s: &str) -> Result { - if s.len() < 9 || s.len() > 16 { - return Err(()); - } - let timestamp = u32::from_str_radix(&s[..8], 16).map_err(|_| ())?; - let image_size = u32::from_str_radix(&s[8..], 16).map_err(|_| ())?; - Ok(Self { - timestamp, - image_size, - }) - } -} - -impl std::fmt::Display for PeCodeId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_fmt(format_args!("{:08X}{:x}", self.timestamp, self.image_size)) - } -} - -/// The build ID for an ELF file (also called "GNU build ID"). -/// -/// The build ID can be used to find debug files on the local file system or to download -/// binaries or debug files from a `debuginfod` symbol server. it does not have to be -/// paired with the binary name. -#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct ElfBuildId(pub Vec); - -impl ElfBuildId { - /// Create a new `ElfBuildId` from a slice of bytes (commonly a sha1 hash - /// generated by the linker, i.e. 20 bytes). - pub fn from_bytes(bytes: &[u8]) -> Self { - Self(bytes.to_owned()) - } -} - -impl FromStr for ElfBuildId { - type Err = (); - - fn from_str(s: &str) -> Result { - let byte_count = s.len() / 2; - let mut bytes = Vec::with_capacity(byte_count); - for i in 0..byte_count { - let hex_byte = &s[i * 2..i * 2 + 2]; - let b = u8::from_str_radix(hex_byte, 16).map_err(|_| ())?; - bytes.push(b); - } - Ok(Self(bytes)) - } -} - -impl std::fmt::Display for ElfBuildId { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for byte in &self.0 { - f.write_fmt(format_args!("{byte:02x}"))?; - } - Ok(()) - } -} - /// Information about a library ("binary" / "module" / "DSO") which allows finding /// symbol files for it. The information can be partial. #[derive(Debug, Clone, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] From 28450a34fa95e11f8cb5d01c97e8ec3bbe6a9b03 Mon Sep 17 00:00:00 2001 From: KingCol13 <48412633+KingCol13@users.noreply.github.com> Date: Wed, 8 Oct 2025 00:48:49 +0100 Subject: [PATCH 2/3] Move code_id_for_object --- samply-debugid/src/codeid.rs | 18 ++++++++++++++++++ samply-debugid/src/debugid.rs | 19 ------------------- samply-debugid/src/lib.rs | 4 ++-- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/samply-debugid/src/codeid.rs b/samply-debugid/src/codeid.rs index f54d48a0..a3b9baee 100644 --- a/samply-debugid/src/codeid.rs +++ b/samply-debugid/src/codeid.rs @@ -1,5 +1,6 @@ use std::str::FromStr; +use object::Object; use uuid::Uuid; /// An enum carrying an identifier for a binary. This is stores the same information @@ -146,3 +147,20 @@ impl std::fmt::Display for ElfBuildId { Ok(()) } } + +/// Tries to obtain a CodeId for an object. +/// +/// This currently only handles mach-O and ELF. +pub fn code_id_for_object<'data>(obj: &impl Object<'data>) -> Option { + // ELF + if let Ok(Some(build_id)) = obj.build_id() { + return Some(CodeId::ElfBuildId(ElfBuildId::from_bytes(build_id))); + } + + // mach-O + if let Ok(Some(uuid)) = obj.mach_uuid() { + return Some(CodeId::MachoUuid(Uuid::from_bytes(uuid))); + } + + None +} diff --git a/samply-debugid/src/debugid.rs b/samply-debugid/src/debugid.rs index eece29fd..9f25cd66 100644 --- a/samply-debugid/src/debugid.rs +++ b/samply-debugid/src/debugid.rs @@ -2,8 +2,6 @@ use debugid::DebugId; use object::{Object, ObjectSection}; use uuid::Uuid; -use crate::{CodeId, ElfBuildId}; - pub trait DebugIdExt { /// Creates a DebugId from some identifier. The identifier could be /// an ELF build ID, or a hash derived from the text section. @@ -96,20 +94,3 @@ pub fn debug_id_for_object<'data>(obj: &impl Object<'data>) -> Option { None } - -/// Tries to obtain a CodeId for an object. -/// -/// This currently only handles mach-O and ELF. -pub fn code_id_for_object<'data>(obj: &impl Object<'data>) -> Option { - // ELF - if let Ok(Some(build_id)) = obj.build_id() { - return Some(CodeId::ElfBuildId(ElfBuildId::from_bytes(build_id))); - } - - // mach-O - if let Ok(Some(uuid)) = obj.mach_uuid() { - return Some(CodeId::MachoUuid(Uuid::from_bytes(uuid))); - } - - None -} diff --git a/samply-debugid/src/lib.rs b/samply-debugid/src/lib.rs index d8449f4b..5db8d1f5 100644 --- a/samply-debugid/src/lib.rs +++ b/samply-debugid/src/lib.rs @@ -1,5 +1,5 @@ mod codeid; mod debugid; -pub use codeid::{CodeId, ElfBuildId, PeCodeId}; -pub use debugid::{code_id_for_object, debug_id_for_object, DebugIdExt}; +pub use codeid::{code_id_for_object, CodeId, ElfBuildId, PeCodeId}; +pub use debugid::{debug_id_for_object, DebugIdExt}; From 127c0f83208e1d5ab65accbb93c3c5ef215d4225 Mon Sep 17 00:00:00 2001 From: KingCol13 <48412633+KingCol13@users.noreply.github.com> Date: Wed, 8 Oct 2025 01:22:46 +0100 Subject: [PATCH 3/3] Add read_core feature for object in samply-debugid --- samply-debugid/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/samply-debugid/Cargo.toml b/samply-debugid/Cargo.toml index 714d6c5e..853d8a4f 100644 --- a/samply-debugid/Cargo.toml +++ b/samply-debugid/Cargo.toml @@ -14,6 +14,7 @@ version = "0.8.0" [dependencies.object] default-features = false +features = ["read_core"] version = "0.37" [dependencies.uuid]