diff --git a/Cargo.toml b/Cargo.toml index 9b07523a2..1f468fefb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,12 @@ cpp_demangle = { default-features = false, version = "0.2.3", optional = true } # Optional dependencies enabled through the `gimli-symbolize` feature addr2line = { version = "0.11.0", optional = true, default-features = false, features = ['std'] } -goblin = { version = "0.2", optional = true, default-features = false, features = ['elf32', 'elf64', 'mach32', 'mach64', 'pe32', 'pe64', 'std'] } + +[dependencies.object] +version = "0.19" +optional = true +default-features = false +features = ['read_core', 'elf', 'macho', 'pe'] [target.'cfg(windows)'.dependencies] winapi = { version = "0.3.3", optional = true } @@ -49,7 +54,7 @@ winapi = { version = "0.3.3", optional = true } # Note that not all features are available on all platforms, so even though a # feature is enabled some other feature may be used instead. [features] -default = ["std", "libunwind", "libbacktrace", "dladdr", "dbghelp"] +default = ["std", "libunwind", "libbacktrace", "dbghelp"] # Include std support. std = [] @@ -81,17 +86,13 @@ kernel32 = [] # can also provide filename/line number information if debuginfo is # compiled in. This library currently only primarily works on unixes that # are not OSX, however. -# - dladdr: this feature uses the dladdr(3) function (a glibc extension) to -# resolve symbol names. This is fairly unreliable on linux, but works well -# enough on OSX. # - gimli-symbolize: use the `gimli-rs/addr2line` crate to symbolicate # addresses into file, line, and name using DWARF debug information. At # the moment, this is only possible when targetting Linux, since macOS # splits DWARF out into a separate object file. Enabling this feature # means one less C dependency. libbacktrace = ["backtrace-sys/backtrace-sys"] -dladdr = [] -gimli-symbolize = ["addr2line", "goblin"] +gimli-symbolize = ["addr2line", "object"] #======================================= # Methods of serialization @@ -105,6 +106,7 @@ serialize-serde = ["serde"] # # Only here for backwards compatibility purposes, they do nothing now. coresymbolication = [] +dladdr = [] #======================================= # Internal features for testing and such. diff --git a/crates/without_debuginfo/Cargo.toml b/crates/without_debuginfo/Cargo.toml index 44b0948db..5d9bd85c4 100644 --- a/crates/without_debuginfo/Cargo.toml +++ b/crates/without_debuginfo/Cargo.toml @@ -12,9 +12,6 @@ features = [ 'libunwind', 'dbghelp', - # Allow fallback to dladdr - 'dladdr', - # Yes, we have `std` 'std', ] diff --git a/src/symbolize/dladdr.rs b/src/symbolize/dladdr.rs deleted file mode 100644 index 9509e9986..000000000 --- a/src/symbolize/dladdr.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Common support for resolving with `dladdr`, often used as a fallback if -//! other strategies don't work. - -#![allow(dead_code)] - -cfg_if::cfg_if! { - if #[cfg(all(unix, not(target_os = "emscripten"), feature = "dladdr"))] { - use core::ffi::c_void; - use core::marker; - use core::{mem, slice}; - use crate::SymbolName; - use crate::types::BytesOrWideString; - use libc::{self, Dl_info}; - - pub struct Symbol<'a> { - inner: Dl_info, - _marker: marker::PhantomData<&'a i32>, - } - - impl Symbol<'_> { - pub fn name(&self) -> Option { - if self.inner.dli_sname.is_null() { - None - } else { - let ptr = self.inner.dli_sname as *const u8; - unsafe { - let len = libc::strlen(self.inner.dli_sname); - Some(SymbolName::new(slice::from_raw_parts(ptr, len))) - } - } - } - - pub fn addr(&self) -> Option<*mut c_void> { - Some(self.inner.dli_saddr as *mut _) - } - - pub fn filename_raw(&self) -> Option { - None - } - - #[cfg(feature = "std")] - pub fn filename(&self) -> Option<&::std::path::Path> { - None - } - - pub fn lineno(&self) -> Option { - None - } - } - - pub unsafe fn resolve(addr: *mut c_void, cb: &mut FnMut(Symbol<'static>)) { - let mut info = Symbol { - inner: mem::zeroed(), - _marker: marker::PhantomData, - }; - // Skip null addresses to avoid calling into libc and having it do - // things with the dynamic symbol table for no reason. - if !addr.is_null() && libc::dladdr(addr as *mut _, &mut info.inner) != 0 { - cb(info) - } - } - } else { - use core::ffi::c_void; - use core::marker; - use crate::symbolize::SymbolName; - use crate::types::BytesOrWideString; - - pub struct Symbol<'a> { - a: Void, - _b: marker::PhantomData<&'a i32>, - } - - enum Void {} - - impl Symbol<'_> { - pub fn name(&self) -> Option { - match self.a {} - } - - pub fn addr(&self) -> Option<*mut c_void> { - match self.a {} - } - - pub fn filename_raw(&self) -> Option { - match self.a {} - } - - #[cfg(feature = "std")] - pub fn filename(&self) -> Option<&::std::path::Path> { - match self.a {} - } - - pub fn lineno(&self) -> Option { - match self.a {} - } - } - - pub unsafe fn resolve(addr: *mut c_void, cb: &mut FnMut(Symbol<'static>)) { - drop((addr, cb)); - } - } -} diff --git a/src/symbolize/dladdr_resolve.rs b/src/symbolize/dladdr_resolve.rs deleted file mode 100644 index 7790703ff..000000000 --- a/src/symbolize/dladdr_resolve.rs +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Symbolication strategy using `dladdr` -//! -//! The `dladdr` API is available on most Unix implementations but it's quite -//! basic, not handling inline frame information at all. Since it's so prevalent -//! though we have an option to use it! - -use crate::symbolize::{dladdr, ResolveWhat, SymbolName}; -use crate::types::BytesOrWideString; -use core::ffi::c_void; - -pub struct Symbol<'a>(dladdr::Symbol<'a>); - -impl Symbol<'_> { - pub fn name(&self) -> Option { - self.0.name() - } - - pub fn addr(&self) -> Option<*mut c_void> { - self.0.addr() - } - - pub fn filename_raw(&self) -> Option { - self.0.filename_raw() - } - - #[cfg(feature = "std")] - pub fn filename(&self) -> Option<&::std::path::Path> { - self.0.filename() - } - - pub fn lineno(&self) -> Option { - self.0.lineno() - } -} - -pub unsafe fn resolve(what: ResolveWhat, cb: &mut FnMut(&super::Symbol)) { - dladdr::resolve(what.address_or_ip(), &mut |sym| { - cb(&super::Symbol { inner: Symbol(sym) }) - }); -} diff --git a/src/symbolize/gimli.rs b/src/symbolize/gimli.rs index a13657a4a..886e13d51 100644 --- a/src/symbolize/gimli.rs +++ b/src/symbolize/gimli.rs @@ -7,7 +7,6 @@ use self::gimli::read::EndianSlice; use self::gimli::LittleEndian as Endian; use self::mmap::Mmap; -use crate::symbolize::dladdr; use crate::symbolize::ResolveWhat; use crate::types::BytesOrWideString; use crate::SymbolName; @@ -89,23 +88,32 @@ fn mmap(path: &Path) -> Option { cfg_if::cfg_if! { if #[cfg(windows)] { - use goblin::pe::{self, PE}; - use goblin::strtab::Strtab; - use std::cmp; + use object::{Bytes, LittleEndian as LE}; + use object::pe::{ImageDosHeader, ImageSymbol}; + use object::read::StringTable; + use object::read::pe::{ImageNtHeaders, ImageOptionalHeader, SectionTable}; + #[cfg(target_pointer_width = "32")] + type Pe = object::pe::ImageNtHeaders32; + #[cfg(target_pointer_width = "64")] + type Pe = object::pe::ImageNtHeaders64; use std::convert::TryFrom; struct Object<'a> { - pe: PE<'a>, - data: &'a [u8], - symbols: Vec<(usize, pe::symbol::Symbol)>, - strtab: Strtab<'a>, + data: Bytes<'a>, + sections: SectionTable<'a>, + symbols: Vec<(usize, &'a ImageSymbol)>, + strings: StringTable<'a>, } impl<'a> Object<'a> { fn parse(data: &'a [u8]) -> Option> { - let pe = PE::parse(data).ok()?; - let syms = pe.header.coff_header.symbols(data).ok()?; - let strtab = pe.header.coff_header.strings(data).ok()?; + let data = Bytes(data); + let dos_header = ImageDosHeader::parse(data).ok()?; + let (nt_headers, _, nt_tail) = dos_header.nt_headers::(data).ok()?; + let sections = nt_headers.sections(nt_tail).ok()?; + let symtab = nt_headers.symbols(data).ok()?; + let strings = symtab.strings(); + let image_base = usize::try_from(nt_headers.optional_header().image_base()).ok()?; // Collect all the symbols into a local vector which is sorted // by address and contains enough data to learn about the symbol @@ -113,32 +121,33 @@ cfg_if::cfg_if! { // note that the sections are 1-indexed because the zero section // is special (apparently). let mut symbols = Vec::new(); - for (_, _, sym) in syms.iter() { - if sym.derived_type() != pe::symbol::IMAGE_SYM_DTYPE_FUNCTION - || sym.section_number == 0 + let mut i = 0; + let len = symtab.len(); + while i < len { + let sym = symtab.symbol(i)?; + i += 1 + sym.number_of_aux_symbols as usize; + let section_number = sym.section_number.get(LE); + if sym.derived_type() != object::pe::IMAGE_SYM_DTYPE_FUNCTION + || section_number == 0 { continue; } - let addr = usize::try_from(sym.value).ok()?; - let section = pe.sections.get(usize::try_from(sym.section_number).ok()? - 1)?; - let va = usize::try_from(section.virtual_address).ok()?; - symbols.push((addr + va + pe.image_base, sym)); + let addr = usize::try_from(sym.value.get(LE)).ok()?; + let section = sections.section(usize::try_from(section_number).ok()?).ok()?; + let va = usize::try_from(section.virtual_address.get(LE)).ok()?; + symbols.push((addr + va + image_base, sym)); } symbols.sort_unstable_by_key(|x| x.0); - Some(Object { pe, data, symbols, strtab }) + Some(Object { data, sections, strings, symbols }) } fn section(&self, name: &str) -> Option<&'a [u8]> { - let section = self.pe - .sections - .iter() - .find(|section| section.name().ok() == Some(name)); - section - .and_then(|section| { - let offset = section.pointer_to_raw_data as usize; - let size = cmp::min(section.virtual_size, section.size_of_raw_data) as usize; - self.data.get(offset..).and_then(|data| data.get(..size)) - }) + Some(self.sections + .section_by_name(self.strings, name.as_bytes())? + .1 + .pe_data(self.data) + .ok()? + .0) } fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> { @@ -156,7 +165,7 @@ cfg_if::cfg_if! { // greatest less than `addr` Err(i) => i.checked_sub(1)?, }; - Some(self.symbols[i].1.name(&self.strtab).ok()?.as_bytes()) + self.symbols[i].1.name(self.strings).ok() } } @@ -164,57 +173,70 @@ cfg_if::cfg_if! { Vec::new() } } else if #[cfg(target_os = "macos")] { - use goblin::mach::MachO; use std::os::unix::prelude::*; use std::ffi::{OsStr, CStr}; + use object::{Bytes, NativeEndian}; + use object::macho; + use object::read::macho::{MachHeader, Section, Segment as _, Nlist}; + + #[cfg(target_pointer_width = "32")] + type Mach = object::macho::MachHeader32; + #[cfg(target_pointer_width = "64")] + type Mach = object::macho::MachHeader64; + type MachSegment = ::Segment; + type MachSection = ::Section; + type MachNlist = ::Nlist; struct Object<'a> { - macho: MachO<'a>, - dwarf: Option, - syms: Vec<(&'a str, u64)>, + endian: NativeEndian, + data: Bytes<'a>, + dwarf: Option<&'a [MachSection]>, + syms: Vec<(&'a [u8], u64)>, } impl<'a> Object<'a> { - fn parse(macho: MachO<'a>) -> Option> { - if !macho.little_endian { - return None; - } - let dwarf = macho - .segments - .iter() - .enumerate() - .find(|(_, segment)| segment.name().ok() == Some("__DWARF")) - .map(|p| p.0); + fn parse(mach: &'a Mach, endian: NativeEndian, data: Bytes<'a>) -> Option> { + let mut dwarf = None; let mut syms = Vec::new(); - if let Some(s) = &macho.symbols { - syms = s.iter() - .filter_map(|e| e.ok()) - .filter(|(name, nlist)| name.len() > 0 && !nlist.is_undefined()) - .map(|(name, nlist)| (name, nlist.n_value)) - .collect(); + let mut commands = mach.load_commands(endian, data).ok()?; + while let Ok(Some(command)) = commands.next() { + if let Some((segment, section_data)) = MachSegment::from_command(command).ok()? { + if segment.name() == b"__DWARF" { + dwarf = segment.sections(endian, section_data).ok(); + } + } else if let Some(symtab) = command.symtab().ok()? { + let symbols = symtab.symbols::(endian, data).ok()?; + syms = symbols.iter() + .filter_map(|nlist: &MachNlist| { + let name = nlist.name(endian, symbols.strings()).ok()?; + if name.len() > 0 && !nlist.is_undefined() { + Some((name, nlist.n_value(endian))) + } else { + None + } + }) + .collect(); + syms.sort_unstable_by_key(|(_, addr)| *addr); + } } - syms.sort_unstable_by_key(|(_, addr)| *addr); - Some(Object { macho, dwarf, syms }) + + Some(Object { endian, data, dwarf, syms }) } fn section(&self, name: &str) -> Option<&'a [u8]> { + let name = name.as_bytes(); let dwarf = self.dwarf?; - let dwarf = &self.macho.segments[dwarf]; - dwarf + let section = dwarf .into_iter() - .filter_map(|s| s.ok()) - .find(|(section, _data)| { - let section_name = match section.name() { - Ok(s) => s, - Err(_) => return false, - }; - §ion_name[..] == name || { - section_name.starts_with("__") - && name.starts_with(".") + .find(|section| { + let section_name = section.name(); + section_name == name || { + section_name.starts_with(b"__") + && name.starts_with(b".") && §ion_name[2..] == &name[1..] } - }) - .map(|p| p.1) + })?; + Some(section.data(self.endian, self.data).ok()?.0) } fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> { @@ -223,153 +245,304 @@ cfg_if::cfg_if! { Err(i) => i.checked_sub(1)?, }; let (sym, _addr) = self.syms.get(i)?; - Some(sym.as_bytes()) + Some(sym) + } + } + + fn find_header(mut data: Bytes<'_>) -> Option<(&'_ Mach, Bytes<'_>)> { + use object::endian::BigEndian; + + let desired_cpu = || { + if cfg!(target_arch = "x86") { + Some(macho::CPU_TYPE_X86) + } else if cfg!(target_arch = "x86_64") { + Some(macho::CPU_TYPE_X86_64) + } else if cfg!(target_arch = "arm") { + Some(macho::CPU_TYPE_ARM) + } else if cfg!(target_arch = "aarch64") { + Some(macho::CPU_TYPE_ARM64) + } else { + None + } + }; + + match data.clone().read::>().ok()?.get(NativeEndian) { + macho::MH_MAGIC_64 + | macho::MH_CIGAM_64 + | macho::MH_MAGIC + | macho::MH_CIGAM => {} + + macho::FAT_MAGIC | macho::FAT_CIGAM => { + let mut header_data = data; + let endian = BigEndian; + let header = header_data.read::().ok()?; + let nfat = header.nfat_arch.get(endian); + let arch = (0..nfat) + .filter_map(|_| header_data.read::().ok()) + .find(|arch| desired_cpu() == Some(arch.cputype.get(endian)))?; + let offset = arch.offset.get(endian); + let size = arch.size.get(endian); + data = data.read_bytes_at( + offset.try_into().ok()?, + size.try_into().ok()?, + ).ok()?; + } + + macho::FAT_MAGIC_64 | macho::FAT_CIGAM_64 => { + let mut header_data = data; + let endian = BigEndian; + let header = header_data.read::().ok()?; + let nfat = header.nfat_arch.get(endian); + let arch = (0..nfat) + .filter_map(|_| header_data.read::().ok()) + .find(|arch| desired_cpu() == Some(arch.cputype.get(endian)))?; + let offset = arch.offset.get(endian); + let size = arch.size.get(endian); + data = data.read_bytes_at( + offset.try_into().ok()?, + size.try_into().ok()?, + ).ok()?; + } + + _ => return None, } + + Mach::parse(data).ok().map(|h| (h, data)) } #[allow(deprecated)] fn native_libraries() -> Vec { let mut ret = Vec::new(); - unsafe { - for i in 0..libc::_dyld_image_count() { - ret.extend(native_library(i)); - } + let images = unsafe { libc::_dyld_image_count() }; + for i in 0..images { + ret.extend(native_library(i)); } return ret; } #[allow(deprecated)] - unsafe fn native_library(i: u32) -> Option { - let name = libc::_dyld_get_image_name(i); - if name.is_null() { - return None; - } - let name = CStr::from_ptr(name); - let header = libc::_dyld_get_image_header(i); - if header.is_null() { - return None; - } + fn native_library(i: u32) -> Option { + // Fetch the name of this library which corresponds to the path of + // where to load it as well. + let name = unsafe { + let name = libc::_dyld_get_image_name(i); + if name.is_null() { + return None; + } + CStr::from_ptr(name) + }; + + // Load the image header of this library and delegate to `object` to + // parse all the load commands so we can figure out all the segments + // involved here. + let (mut load_commands, endian) = unsafe { + let header = libc::_dyld_get_image_header(i); + if header.is_null() { + return None; + } + match (*header).magic { + macho::MH_MAGIC => { + let endian = NativeEndian; + let header = &*(header as *const macho::MachHeader32); + let data = core::slice::from_raw_parts( + header as *const _ as *const u8, + mem::size_of_val(header) + header.sizeofcmds.get(endian) as usize + ); + (header.load_commands(endian, Bytes(data)).ok()?, endian) + } + macho::MH_MAGIC_64 => { + let endian = NativeEndian; + let header = &*(header as *const macho::MachHeader64); + let data = core::slice::from_raw_parts( + header as *const _ as *const u8, + mem::size_of_val(header) + header.sizeofcmds.get(endian) as usize + ); + (header.load_commands(endian, Bytes(data)).ok()?, endian) + } + _ => return None, + } + }; + + // Iterate over the segments and register known regions for segments + // that we find. Additionally record information bout text segments + // for processing later, see comments below. let mut segments = Vec::new(); - match (*header).magic { - libc::MH_MAGIC => { - let mut next_cmd = header.offset(1) as *const libc::load_command; - for _ in 0..(*header).ncmds { - segments.extend(segment(next_cmd)); - next_cmd = (next_cmd as usize + (*next_cmd).cmdsize as usize) as *const _; + let mut first_text = 0; + let mut text_fileoff_zero = false; + while let Some(cmd) = load_commands.next().ok()? { + if let Some((seg, _)) = cmd.segment_32().ok()? { + if seg.name() == b"__TEXT" { + first_text = segments.len(); + if seg.fileoff(endian) == 0 && seg.filesize(endian) > 0 { + text_fileoff_zero = true; + } } + segments.push(LibrarySegment { + len: seg.vmsize(endian).try_into().ok()?, + stated_virtual_memory_address: seg.vmaddr(endian).try_into().ok()?, + }); } - libc::MH_MAGIC_64 => { - let header = header as *const libc::mach_header_64; - let mut next_cmd = header.offset(1) as *const libc::load_command; - for _ in 0..(*header).ncmds { - segments.extend(segment(next_cmd)); - next_cmd = (next_cmd as usize + (*next_cmd).cmdsize as usize) as *const _; + if let Some((seg, _)) = cmd.segment_64().ok()? { + if seg.name() == b"__TEXT" { + first_text = segments.len(); + if seg.fileoff(endian) == 0 && seg.filesize(endian) > 0 { + text_fileoff_zero = true; + } } + segments.push(LibrarySegment { + len: seg.vmsize(endian).try_into().ok()?, + stated_virtual_memory_address: seg.vmaddr(endian).try_into().ok()?, + }); } - _ => return None, } + + // Determine the "slide" for this library which ends up being the + // bias we use to figure out where in memory objects are loaded. + // This is a bit of a weird computation though and is the result of + // trying a few things in the wild and seeing what sticks. + // + // The general idea is that the `bias` plus a segment's + // `stated_virtual_memory_address` is going to be where in the + // actual address space the segment resides. The other thing we rely + // on though is that a real address minus the `bias` is the index to + // look up in the symbol table and debuginfo. + // + // It turns out, though, that for system loaded libraries these + // calculations are incorrect. For native executables, however, it + // appears correct. Lifting some logic from LLDB's source it has + // some special-casing for the first `__TEXT` section loaded from + // file offset 0 with a nonzero size. For whatever reason when this + // is present it appears to mean that the symbol table is relative + // to just the vmaddr slide for the library. If it's *not* present + // then the symbol table is relative to the the vmaddr slide plus + // the segment's stated address. + // + // To handle this situation if we *don't* find a text section at + // file offset zero then we increase the bias by the first text + // sections's stated address and decrease all stated addresses by + // that amount as well. That way the symbol table is always appears + // relative to the library's bias amount. This appears to have the + // right results for symbolizing via the symbol table. + // + // Honestly I'm not entirely sure whether this is right or if + // there's something else that should indicate how to do this. For + // now though this seems to work well enough (?) and we should + // always be able to tweak this over time if necessary. + // + // For some more information see #318 + let mut slide = unsafe { libc::_dyld_get_image_vmaddr_slide(i) as usize }; + if !text_fileoff_zero { + let adjust = segments[first_text].stated_virtual_memory_address; + for segment in segments.iter_mut() { + segment.stated_virtual_memory_address -= adjust; + } + slide += adjust; + } + Some(Library { name: OsStr::from_bytes(name.to_bytes()).to_owned(), segments, - bias: libc::_dyld_get_image_vmaddr_slide(i) as *const u8, - }) - } - - unsafe fn segment(cmd: *const libc::load_command) -> Option { - Some(match (*cmd).cmd { - libc::LC_SEGMENT => { - let cmd = cmd as *const libc::segment_command; - LibrarySegment { - len: (*cmd).vmsize as usize, - stated_virtual_memory_address: (*cmd).vmaddr as *const u8, - } - } - libc::LC_SEGMENT_64 => { - let cmd = cmd as *const libc::segment_command_64; - LibrarySegment { - len: (*cmd).vmsize as usize, - stated_virtual_memory_address: (*cmd).vmaddr as *const u8, - } - } - _ => return None, + bias: slide, }) } } else { - use goblin::elf::Elf; use std::os::unix::prelude::*; use std::ffi::{OsStr, CStr}; + use object::{Bytes, NativeEndian}; + use object::read::StringTable; + use object::read::elf::{FileHeader, SectionHeader, SectionTable, Sym}; + #[cfg(target_pointer_width = "32")] + type Elf = object::elf::FileHeader32; + #[cfg(target_pointer_width = "64")] + type Elf = object::elf::FileHeader64; + + struct ParsedSym { + address: u64, + size: u64, + name: u32, + } struct Object<'a> { - elf: Elf<'a>, - data: &'a [u8], - // List of pre-parsed and sorted symbols by base address. The - // boolean indicates whether it comes from the dynamic symbol table - // or the normal symbol table, affecting where it's symbolicated. - syms: Vec<(goblin::elf::Sym, bool)>, + /// Zero-sized type representing the native endianness. + /// + /// We could use a literal instead, but this helps ensure correctness. + endian: NativeEndian, + /// The entire file data. + data: Bytes<'a>, + sections: SectionTable<'a, Elf>, + strings: StringTable<'a>, + /// List of pre-parsed and sorted symbols by base address. + syms: Vec, } impl<'a> Object<'a> { fn parse(data: &'a [u8]) -> Option> { + let data = object::Bytes(data); let elf = Elf::parse(data).ok()?; - if !elf.little_endian { - return None; + let endian = elf.endian().ok()?; + let sections = elf.sections(endian, data).ok()?; + let mut syms = sections.symbols(endian, data, object::elf::SHT_SYMTAB).ok()?; + if syms.is_empty() { + syms = sections.symbols(endian, data, object::elf::SHT_DYNSYM).ok()?; } - let mut syms = elf - .syms + let strings = syms.strings(); + + let mut syms = syms .iter() - .map(|s| (s, false)) - .chain(elf.dynsyms.iter().map(|s| (s, true))) // Only look at function/object symbols. This mirrors what // libbacktrace does and in general we're only symbolicating // function addresses in theory. Object symbols correspond // to data, and maybe someone's crazy enough to have a // function go into static data? - .filter(|(s, _)| { - s.is_function() || s.st_type() == goblin::elf::sym::STT_OBJECT + .filter(|sym| { + let st_type = sym.st_type(); + st_type == object::elf::STT_FUNC || st_type == object::elf::STT_OBJECT }) // skip anything that's in an undefined section header, // since it means it's an imported function and we're only // symbolicating with locally defined functions. - .filter(|(s, _)| { - s.st_shndx != goblin::elf::section_header::SHN_UNDEF as usize + .filter(|sym| { + sym.st_shndx(endian) != object::elf::SHN_UNDEF + }) + .map(|sym| { + let address = sym.st_value(endian); + let size = sym.st_size(endian); + let name = sym.st_name(endian); + ParsedSym { + address, + size, + name, + } }) .collect::>(); - syms.sort_unstable_by_key(|s| s.0.st_value); + syms.sort_unstable_by_key(|s| s.address); Some(Object { - syms, - elf, + endian, data, + sections, + strings, + syms, }) } fn section(&self, name: &str) -> Option<&'a [u8]> { - let section = self.elf.section_headers.iter().find(|section| { - match self.elf.shdr_strtab.get(section.sh_name) { - Some(Ok(section_name)) => section_name == name, - _ => false, - } - }); - section - .and_then(|section| { - self.data.get(section.sh_offset as usize..) - .and_then(|data| data.get(..section.sh_size as usize)) - }) + Some(self.sections + .section_by_name(self.endian, name.as_bytes())? + .1 + .data(self.endian, self.data) + .ok()? + .0) } fn search_symtab<'b>(&'b self, addr: u64) -> Option<&'b [u8]> { // Same sort of binary search as Windows above - let i = match self.syms.binary_search_by_key(&addr, |s| s.0.st_value) { + let i = match self.syms.binary_search_by_key(&addr, |sym| sym.address) { Ok(i) => i, Err(i) => i.checked_sub(1)?, }; - let (sym, dynamic) = self.syms.get(i)?; - if sym.st_value <= addr && addr <= sym.st_value + sym.st_size { - let strtab = if *dynamic { - &self.elf.dynstrtab - } else { - &self.elf.strtab - }; - Some(strtab.get(sym.st_name)?.ok()?.as_bytes()) + let sym = self.syms.get(i)?; + if sym.address <= addr && addr <= sym.address + sym.size { + self.strings.get(sym.name).ok() } else { None } @@ -407,10 +580,10 @@ cfg_if::cfg_if! { .iter() .map(|header| LibrarySegment { len: (*header).p_memsz as usize, - stated_virtual_memory_address: (*header).p_vaddr as *const u8, + stated_virtual_memory_address: (*header).p_vaddr as usize, }) .collect(), - bias: (*info).dlpi_addr as *const u8, + bias: (*info).dlpi_addr as usize, }); 0 } @@ -433,8 +606,9 @@ impl Mapping { // First up we need to load the unique UUID which is stored in the macho // header of the file we're reading, specified at `path`. let map = mmap(path)?; - let macho = MachO::parse(&map, 0).ok()?; - let uuid = find_uuid(&macho)?; + let (macho, data) = find_header(Bytes(&map))?; + let endian = macho.endian().ok()?; + let uuid = macho.uuid(endian, data).ok()??; // Next we need to look for a `*.dSYM` file. For now we just probe the // containing directory and look around for something that matches @@ -453,7 +627,7 @@ impl Mapping { continue; } let candidates = entry.path().join("Contents/Resources/DWARF"); - if let Some(mapping) = load_dsym(&candidates, &uuid) { + if let Some(mapping) = load_dsym(&candidates, uuid) { return Some(mapping); } } @@ -461,38 +635,26 @@ impl Mapping { // Looks like nothing matched our UUID, so let's at least return our own // file. This should have the symbol table for at least some // symbolication purposes. - let inner = cx(Object::parse(macho)?)?; + let inner = cx(Object::parse(macho, endian, data)?)?; return Some(mk!(Mapping { map, inner })); - fn load_dsym(dir: &Path, uuid: &[u8; 16]) -> Option { + fn load_dsym(dir: &Path, uuid: [u8; 16]) -> Option { for entry in dir.read_dir().ok()? { let entry = entry.ok()?; let map = mmap(&entry.path())?; - let macho = MachO::parse(&map, 0).ok()?; - let entry_uuid = find_uuid(&macho)?; + let (macho, data) = find_header(Bytes(&map))?; + let endian = macho.endian().ok()?; + let entry_uuid = macho.uuid(endian, data).ok()??; if entry_uuid != uuid { continue; } - if let Some(cx) = Object::parse(macho).and_then(cx) { + if let Some(cx) = Object::parse(macho, endian, data).and_then(cx) { return Some(mk!(Mapping { map, cx })); } } None } - - fn find_uuid<'a>(object: &'a MachO) -> Option<&'a [u8; 16]> { - use goblin::mach::load_command::CommandVariant; - - object - .load_commands - .iter() - .filter_map(|cmd| match &cmd.command { - CommandVariant::Uuid(u) => Some(&u.uuid), - _ => None, - }) - .next() - } } } @@ -515,13 +677,23 @@ struct Cache { struct Library { name: OsString, + /// Segments of this library loaded into memory, and where they're loaded. segments: Vec, - bias: *const u8, + /// The "bias" of this library, typically where it's loaded into memory. + /// This value is added to each segment's stated address to get the actual + /// virtual memory address that the segment is loaded into. Additionally + /// this bias is subtracted from real virtual memory addresses to index into + /// debuginfo and the symbol table. + bias: usize, } struct LibrarySegment { + /// The stated address of this segment in the object file. This is not + /// actually where the segment is loaded, but rather this address plus the + /// containing library's `bias` is where to find it. + stated_virtual_memory_address: usize, + /// The size of ths segment in memory. len: usize, - stated_virtual_memory_address: *const u8, } // unsafe because this is required to be externally synchronized @@ -632,10 +804,12 @@ impl Cache { pub unsafe fn resolve(what: ResolveWhat, cb: &mut FnMut(&super::Symbol)) { let addr = what.address_or_ip(); - let mut cb = DladdrFallback { - cb, - addr, - called: false, + let mut call = |sym: Symbol| { + // Extend the lifetime of `sym` to `'static` since we are unfortunately + // required to here, but it's ony ever going out as a reference so no + // reference to it should be persisted beyond this frame anyway. + let sym = mem::transmute::>(sym); + (cb)(&super::Symbol { inner: sym }); }; Cache::with_global(|cache| { @@ -650,9 +824,11 @@ pub unsafe fn resolve(what: ResolveWhat, cb: &mut FnMut(&super::Symbol)) { Some(cx) => cx, None => return, }; + let mut any_frames = false; if let Ok(mut frames) = cx.dwarf.find_frames(addr as u64) { while let Ok(Some(frame)) = frames.next() { - cb.call(Symbol::Frame { + any_frames = true; + call(Symbol::Frame { addr: addr as *mut c_void, location: frame.location, name: frame.function.map(|f| f.name.slice()), @@ -660,50 +836,15 @@ pub unsafe fn resolve(what: ResolveWhat, cb: &mut FnMut(&super::Symbol)) { } } - if !cb.called { + if !any_frames { if let Some(name) = cx.object.search_symtab(addr as u64) { - cb.call(Symbol::Symtab { + call(Symbol::Symtab { addr: addr as *mut c_void, name, }); } } }); - - drop(cb); -} - -struct DladdrFallback<'a, 'b> { - addr: *mut c_void, - called: bool, - cb: &'a mut (FnMut(&super::Symbol) + 'b), -} - -impl DladdrFallback<'_, '_> { - fn call(&mut self, sym: Symbol) { - self.called = true; - - // Extend the lifetime of `sym` to `'static` since we are unfortunately - // required to here, but it's ony ever going out as a reference so no - // reference to it should be persisted beyond this frame anyway. - let sym = unsafe { mem::transmute::>(sym) }; - (self.cb)(&super::Symbol { inner: sym }); - } -} - -impl Drop for DladdrFallback<'_, '_> { - fn drop(&mut self) { - if self.called { - return; - } - unsafe { - dladdr::resolve(self.addr, &mut |sym| { - (self.cb)(&super::Symbol { - inner: Symbol::Dladdr(sym), - }) - }); - } - } } pub enum Symbol<'a> { @@ -717,15 +858,11 @@ pub enum Symbol<'a> { /// Couldn't find debug information, but we found it in the symbol table of /// the elf executable. Symtab { addr: *mut c_void, name: &'a [u8] }, - /// We weren't able to find anything in the original file, so we had to fall - /// back to using `dladdr` which had a hit. - Dladdr(dladdr::Symbol<'a>), } impl Symbol<'_> { pub fn name(&self) -> Option { match self { - Symbol::Dladdr(s) => s.name(), Symbol::Frame { name, .. } => { let name = name.as_ref()?; Some(SymbolName::new(name)) @@ -736,7 +873,6 @@ impl Symbol<'_> { pub fn addr(&self) -> Option<*mut c_void> { match self { - Symbol::Dladdr(s) => s.addr(), Symbol::Frame { addr, .. } => Some(*addr), Symbol::Symtab { .. } => None, } @@ -744,7 +880,6 @@ impl Symbol<'_> { pub fn filename_raw(&self) -> Option { match self { - Symbol::Dladdr(s) => return s.filename_raw(), Symbol::Frame { location, .. } => { let file = location.as_ref()?.file?; Some(BytesOrWideString::Bytes(file.as_bytes())) @@ -755,7 +890,6 @@ impl Symbol<'_> { pub fn filename(&self) -> Option<&Path> { match self { - Symbol::Dladdr(s) => return s.filename(), Symbol::Frame { location, .. } => { let file = location.as_ref()?.file?; Some(Path::new(file)) @@ -766,7 +900,6 @@ impl Symbol<'_> { pub fn lineno(&self) -> Option { match self { - Symbol::Dladdr(s) => return s.lineno(), Symbol::Frame { location, .. } => location.as_ref()?.line, Symbol::Symtab { .. } => None, } diff --git a/src/symbolize/libbacktrace.rs b/src/symbolize/libbacktrace.rs index 0a4afdec4..ac1101de8 100644 --- a/src/symbolize/libbacktrace.rs +++ b/src/symbolize/libbacktrace.rs @@ -35,10 +35,9 @@ extern crate backtrace_sys as bt; -use core::{ptr, slice}; +use core::{marker, ptr, slice}; use libc::{self, c_char, c_int, c_void, uintptr_t}; -use crate::symbolize::dladdr; use crate::symbolize::{ResolveWhat, SymbolName}; use crate::types::BytesOrWideString; @@ -46,6 +45,7 @@ pub enum Symbol<'a> { Syminfo { pc: uintptr_t, symname: *const c_char, + _marker: marker::PhantomData<&'a ()>, }, Pcinfo { pc: uintptr_t, @@ -54,7 +54,6 @@ pub enum Symbol<'a> { function: *const c_char, symname: *const c_char, }, - Dladdr(dladdr::Symbol<'a>), } impl Symbol<'_> { @@ -89,7 +88,6 @@ impl Symbol<'_> { } symbol(symname) } - Symbol::Dladdr(ref s) => s.name(), } } @@ -97,7 +95,6 @@ impl Symbol<'_> { let pc = match *self { Symbol::Syminfo { pc, .. } => pc, Symbol::Pcinfo { pc, .. } => pc, - Symbol::Dladdr(ref s) => return s.addr(), }; if pc == 0 { None @@ -119,7 +116,6 @@ impl Symbol<'_> { Some(slice::from_raw_parts(ptr, len)) } } - Symbol::Dladdr(_) => None, } } @@ -151,7 +147,6 @@ impl Symbol<'_> { match *self { Symbol::Syminfo { .. } => None, Symbol::Pcinfo { lineno, .. } => Some(lineno as u32), - Symbol::Dladdr(ref s) => s.lineno(), } } } @@ -201,6 +196,7 @@ extern "C" fn syminfo_cb( inner: Symbol::Syminfo { pc: pc, symname: symname, + _marker: marker::PhantomData, }, }); } @@ -448,42 +444,21 @@ pub unsafe fn resolve(what: ResolveWhat, cb: &mut FnMut(&super::Symbol)) { // backtrace errors are currently swept under the rug let state = init_state(); if state.is_null() { - return dladdr_fallback(what.address_or_ip(), cb); + return; } - // Call the `backtrace_syminfo` API first. This is (from reading the code) - // guaranteed to call `syminfo_cb` exactly once (or fail with an error + // Call the `backtrace_syminfo` API which (from reading the code) + // should call `syminfo_cb` exactly once (or fail with an error // presumably). We then handle more within the `syminfo_cb`. // // Note that we do this since `syminfo` will consult the symbol table, // finding symbol names even if there's no debug information in the binary. - let mut called = false; - { - let mut syminfo_state = SyminfoState { - pc: symaddr, - cb: &mut |sym| { - called = true; - cb(sym); - }, - }; - bt::backtrace_syminfo( - state, - symaddr as uintptr_t, - syminfo_cb, - error_cb, - &mut syminfo_state as *mut _ as *mut _, - ); - } - - if !called { - dladdr_fallback(what.address_or_ip(), cb); - } -} - -unsafe fn dladdr_fallback(addr: *mut c_void, cb: &mut FnMut(&super::Symbol)) { - dladdr::resolve(addr, &mut |sym| { - cb(&super::Symbol { - inner: Symbol::Dladdr(sym), - }) - }); + let mut syminfo_state = SyminfoState { pc: symaddr, cb }; + bt::backtrace_syminfo( + state, + symaddr as uintptr_t, + syminfo_cb, + error_cb, + &mut syminfo_state as *mut _ as *mut _, + ); } diff --git a/src/symbolize/mod.rs b/src/symbolize/mod.rs index f3e9d3649..793b2a897 100644 --- a/src/symbolize/mod.rs +++ b/src/symbolize/mod.rs @@ -455,8 +455,6 @@ pub fn clear_symbol_cache() { } } -mod dladdr; - cfg_if::cfg_if! { if #[cfg(all(windows, target_env = "msvc", feature = "dbghelp", not(target_vendor = "uwp")))] { mod dbghelp; @@ -485,15 +483,6 @@ cfg_if::cfg_if! { use self::libbacktrace::resolve as resolve_imp; use self::libbacktrace::Symbol as SymbolImp; unsafe fn clear_symbol_cache_imp() {} - } else if #[cfg(all(unix, - not(target_os = "emscripten"), - not(target_os = "fuchsia"), - not(target_env = "uclibc"), - feature = "dladdr"))] { - mod dladdr_resolve; - use self::dladdr_resolve::resolve as resolve_imp; - use self::dladdr_resolve::Symbol as SymbolImp; - unsafe fn clear_symbol_cache_imp() {} } else { mod noop; use self::noop::resolve as resolve_imp; diff --git a/tests/smoke.rs b/tests/smoke.rs index 5e01ab6b4..f2ccd0079 100644 --- a/tests/smoke.rs +++ b/tests/smoke.rs @@ -6,7 +6,6 @@ use std::thread; static LIBUNWIND: bool = cfg!(all(unix, feature = "libunwind")); static UNIX_BACKTRACE: bool = cfg!(all(unix, feature = "unix-backtrace")); static LIBBACKTRACE: bool = cfg!(feature = "libbacktrace") && !cfg!(target_os = "fuchsia"); -static DLADDR: bool = cfg!(all(unix, feature = "dladdr")) && !cfg!(target_os = "fuchsia"); static DBGHELP: bool = cfg!(all(windows, feature = "dbghelp")); static MSVC: bool = cfg!(target_env = "msvc"); static GIMLI_SYMBOLIZE: bool = cfg!(all(feature = "gimli-symbolize", unix, target_os = "linux")); @@ -134,7 +133,7 @@ fn smoke_test_frames() { } let mut resolved = 0; - let can_resolve = DLADDR || LIBBACKTRACE || DBGHELP || GIMLI_SYMBOLIZE; + let can_resolve = LIBBACKTRACE || DBGHELP || GIMLI_SYMBOLIZE; let mut name = None; let mut addr = None; @@ -154,9 +153,8 @@ fn smoke_test_frames() { _ => {} } - // * linux dladdr doesn't work (only consults local symbol table) // * windows dbghelp isn't great for GNU - if can_resolve && !(cfg!(target_os = "linux") && DLADDR) && !(DBGHELP && !MSVC) { + if can_resolve && !(DBGHELP && !MSVC) { let name = name.expect("didn't find a name"); // in release mode names get weird as functions can get merged