Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ members = [
"fxprof-processed-profile",
"gecko_profile",
"samply-api",
"samply-debugid",
"samply-quota-manager",
"samply-symbols",
"samply",
Expand Down
22 changes: 22 additions & 0 deletions samply-debugid/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[package]
name = "samply-debugid"
version = "0.1.0"
authors = ["Markus Stange <[email protected]>"]
license = "MIT OR Apache-2.0"
edition = "2021"
description = "Samply compatible debugids."
repository = "https://github.com/mstange/samply/"
readme = "README.md"

[dependencies.debugid]
default-features = false
version = "0.8.0"

[dependencies.object]
default-features = false
features = ["read_core"]
version = "0.37"

[dependencies.uuid]
default-features = false
version = "1"
5 changes: 5 additions & 0 deletions samply-debugid/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# samply-debugid

This crate allows generating [`debugid`s](https://crates.io/crates/debugid)
that are compatible with `samply`. Useful for writing your own profiles to be
symbolicated and displayed with `samply load`.
166 changes: 166 additions & 0 deletions samply-debugid/src/codeid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use std::str::FromStr;

use object::Object;
use uuid::Uuid;

/// An enum carrying an identifier for a binary. This is stores the same information
/// as a [`debugid::CodeId`], but without projecting it down to a string.
///
/// All types need to be treated rather differently, see their respective documentation.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum CodeId {
/// The code ID for a Windows PE file. When combined with the binary name,
/// the code ID lets you obtain binaries from symbol servers. It is not useful
/// on its own, it has to be paired with the binary name.
///
/// On Windows, a binary's code ID is distinct from its debug ID (= pdb GUID + age).
/// If you have a binary file, you can get both the code ID and the debug ID
/// from it. If you only have a PDB file, you usually *cannot* get the code ID of
/// the corresponding binary from it.
PeCodeId(PeCodeId),

/// The code ID for a macOS / iOS binary (mach-O). This is just the mach-O UUID.
/// The mach-O UUID is shared between both the binary file and the debug file (dSYM),
/// and it can be used on its own to find dSYMs using Spotlight.
///
/// The debug ID and the code ID contain the same information; the debug ID
/// is literally just the UUID plus a zero at the end.
MachoUuid(Uuid),

/// The code ID for a Linux ELF file. This is the "ELF build ID" (also called "GNU build ID").
/// The build ID is usually 20 bytes, commonly written out as 40 hex chars.
///
/// It can be used to find debug files on the local file system or to download
/// binaries or debug files from a `debuginfod` symbol server. it does not have to be
/// paired with the binary name.
///
/// An ELF binary's code ID is more useful than its debug ID: The debug ID is truncated
/// to 16 bytes (32 hex characters), whereas the code ID is the full ELF build ID.
ElfBuildId(ElfBuildId),
}

impl FromStr for CodeId {
type Err = ();

fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.len() <= 17 {
// 8 bytes timestamp + 1 to 8 bytes of image size
Ok(CodeId::PeCodeId(PeCodeId::from_str(s)?))
} else if s.len() == 32 && is_uppercase_hex(s) {
// mach-O UUID
Ok(CodeId::MachoUuid(Uuid::from_str(s).map_err(|_| ())?))
} else {
// ELF build ID. These are usually 40 hex characters (= 20 bytes).
Ok(CodeId::ElfBuildId(ElfBuildId::from_str(s)?))
}
}
}

fn is_uppercase_hex(s: &str) -> bool {
s.chars()
.all(|c| c.is_ascii_hexdigit() && (c.is_ascii_digit() || c.is_ascii_uppercase()))
}

impl std::fmt::Display for CodeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
CodeId::PeCodeId(pe) => std::fmt::Display::fmt(pe, f),
CodeId::MachoUuid(uuid) => f.write_fmt(format_args!("{:X}", uuid.simple())),
CodeId::ElfBuildId(elf) => std::fmt::Display::fmt(elf, f),
}
}
}

/// The code ID for a Windows PE file.
///
/// When combined with the binary name, the `PeCodeId` lets you obtain binaries from
/// symbol servers. It is not useful on its own, it has to be paired with the binary name.
///
/// A Windows binary's `PeCodeId` is distinct from its debug ID (= pdb GUID + age).
/// If you have a binary file, you can get both the `PeCodeId` and the debug ID
/// from it. If you only have a PDB file, you usually *cannot* get the `PeCodeId` of
/// the corresponding binary from it.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct PeCodeId {
pub timestamp: u32,
pub image_size: u32,
}

impl FromStr for PeCodeId {
type Err = ();

fn from_str(s: &str) -> Result<Self, Self::Err> {
if s.len() < 9 || s.len() > 16 {
return Err(());
}
let timestamp = u32::from_str_radix(&s[..8], 16).map_err(|_| ())?;
let image_size = u32::from_str_radix(&s[8..], 16).map_err(|_| ())?;
Ok(Self {
timestamp,
image_size,
})
}
}

impl std::fmt::Display for PeCodeId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_fmt(format_args!("{:08X}{:x}", self.timestamp, self.image_size))
}
}

/// The build ID for an ELF file (also called "GNU build ID").
///
/// The build ID can be used to find debug files on the local file system or to download
/// binaries or debug files from a `debuginfod` symbol server. it does not have to be
/// paired with the binary name.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ElfBuildId(pub Vec<u8>);

impl ElfBuildId {
/// Create a new `ElfBuildId` from a slice of bytes (commonly a sha1 hash
/// generated by the linker, i.e. 20 bytes).
pub fn from_bytes(bytes: &[u8]) -> Self {
Self(bytes.to_owned())
}
}

impl FromStr for ElfBuildId {
type Err = ();

fn from_str(s: &str) -> Result<Self, Self::Err> {
let byte_count = s.len() / 2;
let mut bytes = Vec::with_capacity(byte_count);
for i in 0..byte_count {
let hex_byte = &s[i * 2..i * 2 + 2];
let b = u8::from_str_radix(hex_byte, 16).map_err(|_| ())?;
bytes.push(b);
}
Ok(Self(bytes))
}
}

impl std::fmt::Display for ElfBuildId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for byte in &self.0 {
f.write_fmt(format_args!("{byte:02x}"))?;
}
Ok(())
}
}

/// Tries to obtain a CodeId for an object.
///
/// This currently only handles mach-O and ELF.
pub fn code_id_for_object<'data>(obj: &impl Object<'data>) -> Option<CodeId> {
// ELF
if let Ok(Some(build_id)) = obj.build_id() {
return Some(CodeId::ElfBuildId(ElfBuildId::from_bytes(build_id)));
}

// mach-O
if let Ok(Some(uuid)) = obj.mach_uuid() {
return Some(CodeId::MachoUuid(Uuid::from_bytes(uuid)));
}

None
}
96 changes: 96 additions & 0 deletions samply-debugid/src/debugid.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
use debugid::DebugId;
use object::{Object, ObjectSection};
use uuid::Uuid;

pub trait DebugIdExt {
/// Creates a DebugId from some identifier. The identifier could be
/// an ELF build ID, or a hash derived from the text section.
/// The `little_endian` argument specifies whether the object file
/// is targeting a little endian architecture.
fn from_identifier(identifier: &[u8], little_endian: bool) -> Self;

/// Creates a DebugId from a hash of the first 4096 bytes of the .text section.
/// The `little_endian` argument specifies whether the object file
/// is targeting a little endian architecture.
fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self;
}

impl DebugIdExt for DebugId {
fn from_identifier(identifier: &[u8], little_endian: bool) -> Self {
// Make sure that we have exactly 16 bytes available, either truncate or fill
// the remainder with zeros.
// ELF build IDs are usually 20 bytes, so if the identifier is an ELF build ID
// then we're performing a lossy truncation.
let mut d = [0u8; 16];
let shared_len = identifier.len().min(d.len());
d[0..shared_len].copy_from_slice(&identifier[0..shared_len]);

// Pretend that the build ID was stored as a UUID with u32 u16 u16 fields inside
// the file. Parse those fields in the endianness of the file. Then use
// Uuid::from_fields to serialize them as big endian.
// For ELF build IDs this is a bit silly, because ELF build IDs aren't actually
// field-based UUIDs, but this is what the tools in the breakpad and
// sentry/symbolic universe do, so we do the same for compatibility with those
// tools.
let (d1, d2, d3) = if little_endian {
(
u32::from_le_bytes([d[0], d[1], d[2], d[3]]),
u16::from_le_bytes([d[4], d[5]]),
u16::from_le_bytes([d[6], d[7]]),
)
} else {
(
u32::from_be_bytes([d[0], d[1], d[2], d[3]]),
u16::from_be_bytes([d[4], d[5]]),
u16::from_be_bytes([d[6], d[7]]),
)
};
let uuid = Uuid::from_fields(d1, d2, d3, d[8..16].try_into().unwrap());
DebugId::from_uuid(uuid)
}

// This algorithm XORs 16-byte chunks directly into a 16-byte buffer.
fn from_text_first_page(text_first_page: &[u8], little_endian: bool) -> Self {
const UUID_SIZE: usize = 16;
const PAGE_SIZE: usize = 4096;
let mut hash = [0; UUID_SIZE];
for (i, byte) in text_first_page.iter().cloned().take(PAGE_SIZE).enumerate() {
hash[i % UUID_SIZE] ^= byte;
}
DebugId::from_identifier(&hash, little_endian)
}
}

/// Tries to obtain a DebugId for an object. This uses the build ID, if available,
/// and falls back to hashing the first page of the text section otherwise.
/// Returns None on failure.
pub fn debug_id_for_object<'data>(obj: &impl Object<'data>) -> Option<DebugId> {
// Windows
if let Ok(Some(pdb_info)) = obj.pdb_info() {
return Some(DebugId::from_guid_age(&pdb_info.guid(), pdb_info.age()).unwrap());
}

// ELF
if let Ok(Some(build_id)) = obj.build_id() {
return Some(DebugId::from_identifier(build_id, obj.is_little_endian()));
}

// mach-O
if let Ok(Some(uuid)) = obj.mach_uuid() {
return Some(DebugId::from_uuid(Uuid::from_bytes(uuid)));
}

// We were not able to locate a build ID, so fall back to creating a synthetic
// identifier from a hash of the first page of the ".text" (program code) section.
if let Some(section) = obj.section_by_name(".text") {
let data_len = section.size().min(4096);
if let Ok(Some(first_page_data)) = section.data_range(section.address(), data_len) {
return Some(DebugId::from_text_first_page(
first_page_data,
obj.is_little_endian(),
));
}
}

None
}
5 changes: 5 additions & 0 deletions samply-debugid/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod codeid;
mod debugid;

pub use codeid::{code_id_for_object, CodeId, ElfBuildId, PeCodeId};
pub use debugid::{debug_id_for_object, DebugIdExt};
1 change: 1 addition & 0 deletions samply-symbols/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ srcsrv = "0.2.2"
lzma-rs = "0.3"
macho-unwind-info = "0.5.0"
debugid = "0.8.0"
samply-debugid = { version = "0.1.0", path = "../samply-debugid" }
flate2 = "1"
yoke = "0.8"
yoke-derive = "0.8"
Expand Down
5 changes: 3 additions & 2 deletions samply-symbols/src/binary_image.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@ use linux_perf_data::jitdump::JitDumpHeader;
use linux_perf_data::linux_perf_event_reader::RawData;
use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, PeFile, PeFile32, PeFile64};
use object::{FileKind, Object, ReadRef};
use samply_debugid::{CodeId, ElfBuildId, PeCodeId};

use crate::debugid_util::{code_id_for_object, debug_id_for_object};
use crate::error::Error;
use crate::jitdump::{debug_id_and_code_id_for_jitdump, JitDumpIndex};
use crate::macho::{DyldCacheFileData, MachOData, MachOFatArchiveMemberData};
use crate::shared::{
relative_address_base, CodeId, ElfBuildId, FileAndPathHelperError, FileContents,
FileContentsWrapper, LibraryInfo, PeCodeId, RangeReadRef,
relative_address_base, FileAndPathHelperError, FileContents, FileContentsWrapper, LibraryInfo,
RangeReadRef,
};

#[derive(thiserror::Error, Debug)]
Expand Down
4 changes: 2 additions & 2 deletions samply-symbols/src/breakpad/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ use nom::error::{Error, ErrorKind, ParseError};
use nom::sequence::{terminated, tuple};
use nom::{Err, IResult};
use object::ReadRef;
use samply_debugid::CodeId;
use zerocopy::{IntoBytes, LittleEndian, Ref, U16, U32, U64};
use zerocopy_derive::*;

use crate::source_file_path::SourceFilePathIndex;
use crate::CodeId;

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BreakpadIndex<'a> {
Expand Down Expand Up @@ -1233,7 +1233,7 @@ mod test {
use std::str::FromStr;

use super::*;
use crate::{ElfBuildId, PeCodeId};
use samply_debugid::{ElfBuildId, PeCodeId};

#[test]
fn test1() {
Expand Down
3 changes: 1 addition & 2 deletions samply-symbols/src/debugid_util.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
use debugid::DebugId;
use object::{Object, ObjectSection};
use samply_debugid::{CodeId, ElfBuildId};
use uuid::Uuid;

use crate::shared::{CodeId, ElfBuildId};

pub trait DebugIdExt {
/// Creates a DebugId from some identifier. The identifier could be
/// an ELF build ID, or a hash derived from the text section.
Expand Down
Loading
Loading