diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3def0a2d4..1d4120571 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,14 +35,12 @@ concurrency: jobs: test: - # We're stuck on Ubuntu 20.04 as long as we want to keep testing on Python - # 3.6 due to actions/setup-python#544. - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 strategy: matrix: python-version: ${{ (github.event_name == 'push' || inputs.test_all_python_versions) - && fromJSON('["3.13", "3.12", "3.11", "3.10", "3.9", "3.8", "3.7", "3.6"]') - || fromJSON('["3.12", "3.6"]')}} + && fromJSON('["3.14", "3.13", "3.12", "3.11", "3.10", "3.9", "3.8"]') + || fromJSON('["3.13", "3.8"]')}} cc: [gcc, clang] fail-fast: false env: @@ -55,21 +53,18 @@ jobs: with: python-version: ${{ matrix.python-version }} allow-prereleases: true - - name: Check Python version for pre-commit - # Only run pre-commit / mypy on upstream supported Python versions - run: | - if [[ "${{ matrix.python-version }}" =~ ^3\.([89]|[0-9][0-9])$ ]]; then - echo USE_PRE_COMMIT=1 >> $GITHUB_ENV - fi - name: Install dependencies run: | sudo apt-get update -y sudo apt-get install -y btrfs-progs check dwarves libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} - pip install pyroute2 setuptools ${USE_PRE_COMMIT/1/pre-commit} + # pyroute2 0.9.1 dropped support for Python < 3.9. + if [[ "${{ matrix.python-version }}" =~ ^3\.[678]$ ]]; then + pyroute2_version="<0.9.1" + fi + pip install "pyroute2$pyroute2_version" setuptools pre-commit - name: Generate version.py run: python setup.py --version - name: Check with mypy - if: ${{ env.USE_PRE_COMMIT == '1' }} run: pre-commit run --all-files mypy - name: Build and test with ${{ matrix.cc }} run: CONFIGURE_FLAGS="--enable-compiler-warnings=error" python setup.py test -K ${{ inputs.test_all_kernel_flavors && '-F' || '' }} diff --git a/.github/workflows/dco-check.yml b/.github/workflows/dco-check.yml index 06822fe41..6ab2bba39 100644 --- a/.github/workflows/dco-check.yml +++ b/.github/workflows/dco-check.yml @@ -18,14 +18,22 @@ jobs: git init git fetch --filter=blob:none "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" "$GITHUB_BASE_REF" "$GITHUB_REF" - name: Check for DCO sign-offs + shell: bash run: | - no_sign_off="$(git log --no-merges --grep=Signed-off-by --invert-grep "FETCH_HEAD..$GITHUB_SHA")" - if [ -z "$no_sign_off" ]; then + status=0 + while read -r commit; do + author="$(git show --no-patch --pretty='format:%an <%ae>' "$commit")" + if ! git show --no-patch --pretty='format:%(trailers:key=Signed-off-by,valueonly)' "$commit" | grep -Fxq "$author"; then + if [ $status -eq 0 ]; then + echo "The following commits are missing a Developer Certificate of Origin sign-off;" + echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" + echo + fi + status=1 + git show --no-patch "$commit" + fi + done < <(git rev-list --no-merges "FETCH_HEAD..$GITHUB_SHA") + if [ $status -eq 0 ]; then echo "All commits have a Developer Certificate of Origin sign-off" - else - echo "The following commits are missing a Developer Certificate of Origin sign-off;" - echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" - echo - echo "$no_sign_off" - exit 1 fi + exit $status diff --git a/.github/workflows/vmtest-build.yml b/.github/workflows/vmtest-build.yml index e442240be..010ec0f15 100644 --- a/.github/workflows/vmtest-build.yml +++ b/.github/workflows/vmtest-build.yml @@ -13,9 +13,7 @@ jobs: arch: [x86_64, aarch64, ppc64, s390x, arm] fail-fast: false max-parallel: 5 - # Build on 20.04 so that we don't get host binaries (e.g., objtool) that - # depend on libraries too new for other distros. - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 permissions: contents: write env: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..86b14d7b8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,38 @@ +exclude: ^contrib/ +repos: +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + hooks: + - id: isort + name: isort (python) +- repo: https://github.com/psf/black + rev: 24.8.0 + hooks: + - id: black + exclude: ^docs/exts/details\.py$ +- repo: https://github.com/pycqa/flake8 + rev: 7.1.2 + hooks: + - id: flake8 +#- repo: https://github.com/pre-commit/mirrors-mypy +# rev: v1.14.1 +# hooks: +# - id: mypy +# args: [--show-error-codes, --strict, --no-warn-return-any] +# files: ^(drgn/.*\.py|_drgn.pyi|_drgn_util/.*\.py|tools/.*\.py)$ +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + exclude_types: [diff] + - id: end-of-file-fixer + exclude_types: [diff] + - id: check-yaml + - id: check-added-large-files + - id: debug-statements + - id: check-merge-conflict +- repo: https://github.com/netromdk/vermin + rev: v1.6.0 + hooks: + - id: vermin + args: ['-t=3.8-', '--violations', '--eval-annotations'] diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 03853730e..14fff4e2e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,6 +3,8 @@ build: os: ubuntu-22.04 tools: python: "3" + apt_packages: + - graphviz sphinx: configuration: docs/conf.py python: diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index dc7630412..809f5000e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -18,12 +18,11 @@ instructions `_, then run: $ CONFIGURE_FLAGS="--enable-compiler-warnings=error" python3 setup.py build_ext -i $ python3 -m drgn --help -Drgn can build, run, and pass its test suite on Python 3.6 or later. However, +Drgn can build, run, and pass its test suite on Python 3.8 or later. However, many of the tools used as part of the development workflow do not support Python versions once they have reached their end-of-life. Thus, your main drgn development environment should use a Python version which is actively supported -upstream. In particular, the drgn development workflow no longer supported on -Python 3.6. +upstream. Testing ------- @@ -74,9 +73,6 @@ Or you can run them manually: $ pre-commit run --all-files -Please remember that these pre-commit hooks do not support Python 3.6; they -require a Python major version which is actively supported upstream. - Coding Guidelines ----------------- @@ -189,7 +185,7 @@ drgn assumes some `implementation-defined behavior Python ^^^^^^ -Python code in drgn should be compatible with Python 3.6 and newer. +Python code in drgn should be compatible with Python 3.8 and newer. Python code is formatted with `Black `_ and `isort `_. diff --git a/README.rst b/README.rst index 1bd7c2a34..f5ec2447c 100644 --- a/README.rst +++ b/README.rst @@ -66,17 +66,17 @@ Package Manager drgn can be installed using the package manager on some Linux distributions. -.. image:: https://repology.org/badge/vertical-allrepos/drgn.svg +.. image:: https://repology.org/badge/vertical-allrepos/drgn.svg?exclude_unsupported=1 :target: https://repology.org/project/drgn/versions :alt: Packaging Status -* Fedora >= 32 +* Fedora, RHEL/CentOS Stream >= 9 .. code-block:: console $ sudo dnf install drgn -* RHEL/CentOS >= 8 +* RHEL/CentOS < 9 `Enable EPEL `_. Then: @@ -86,14 +86,14 @@ drgn can be installed using the package manager on some Linux distributions. * Oracle Linux >= 8 - Enable the ``ol8_addons`` or ``ol9_addons`` repository and install drgn: + Enable the ``ol8_addons`` or ``ol9_addons`` repository. Then: .. code-block:: console $ sudo dnf config-manager --enable ol8_addons # OR: ol9_addons $ sudo dnf install drgn - Drgn is also available for Python versions in application streams. For + drgn is also available for Python versions in application streams. For example, use ``dnf install python3.12-drgn`` to install drgn for Python 3.12. See the documentation for drgn in `Oracle Linux 9 `_ @@ -101,17 +101,20 @@ drgn can be installed using the package manager on some Linux distributions. `_ for more information. -* Arch Linux +* Debian >= 12 (Bookworm)/Ubuntu >= 24.04 (Noble Numbat) .. code-block:: console - $ sudo pacman -S drgn + $ sudo apt install python3-drgn -* Debian >= 12 (Bookworm) + To get the latest version on Ubuntu, enable the `michel-slm/kernel-utils PPA + `_ first. + +* Arch Linux .. code-block:: console - $ sudo apt install python3-drgn + $ sudo pacman -S drgn * Gentoo @@ -125,15 +128,6 @@ drgn can be installed using the package manager on some Linux distributions. $ sudo zypper install python3-drgn -* Ubuntu - - Enable the `michel-slm/kernel-utils PPA `_. - Then: - - .. code-block:: console - - $ sudo apt install python3-drgn - pip ^^^ @@ -152,9 +146,11 @@ This will install a binary wheel by default. If you get a build error, then pip wasn't able to use the binary wheel. Install the dependencies listed `below <#from-source>`_ and try again. -Note that RHEL/CentOS 6, Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and -older) ship Python versions which are too old. Python 3.6 or newer must be -installed. +Note that RHEL/CentOS 7, Debian 10 ("buster"), and Ubuntu 18.04 ("Bionic +Beaver") (and older) ship Python versions which are too old. Python 3.8 or +newer must be installed. + +.. _installation-from-source: From Source ^^^^^^^^^^^ @@ -162,17 +158,17 @@ From Source To get the development version of drgn, you will need to build it from source. First, install dependencies: -* Fedora +* Fedora, RHEL/CentOS Stream >= 9 .. code-block:: console - $ sudo dnf install autoconf automake check-devel elfutils-devel gcc git libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo dnf install autoconf automake check-devel elfutils-debuginfod-client-devel elfutils-devel gcc git libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools xz-devel -* RHEL/CentOS/Oracle Linux +* RHEL/CentOS < 9, Oracle Linux .. code-block:: console - $ sudo dnf install autoconf automake check-devel elfutils-devel gcc git libtool make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo dnf install autoconf automake check-devel elfutils-devel gcc git libtool make pkgconf python3 python3-devel python3-pip python3-setuptools xz-devel Optionally, install ``libkdumpfile-devel`` from EPEL on RHEL/CentOS >= 8 or install `libkdumpfile `_ from @@ -191,28 +187,29 @@ First, install dependencies: .. code-block:: console - $ sudo apt install autoconf automake check gcc git liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev + $ sudo apt install autoconf automake check gcc git libdebuginfod-dev libkdumpfile-dev liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev - Optionally, install libkdumpfile from source if you want support for the - makedumpfile format. + On Debian <= 11 (Bullseye) and Ubuntu <= 22.04 (Jammy Jellyfish), + ``libkdumpfile-dev`` is not available, so you must install libkdumpfile from + source if you want support for the makedumpfile format. * Arch Linux .. code-block:: console - $ sudo pacman -S --needed autoconf automake check gcc git libelf libkdumpfile libtool make pkgconf python python-pip python-setuptools + $ sudo pacman -S --needed autoconf automake check gcc git libelf libkdumpfile libtool make pkgconf python python-pip python-setuptools xz * Gentoo .. code-block:: console - $ sudo emerge --noreplace --oneshot dev-build/autoconf dev-build/automake dev-libs/check dev-libs/elfutils sys-devel/gcc dev-vcs/git dev-libs/libkdumpfile dev-build/libtool dev-build/make dev-python/pip virtual/pkgconfig dev-lang/python dev-python/setuptools + $ sudo emerge --noreplace --oneshot dev-build/autoconf dev-build/automake dev-libs/check dev-libs/elfutils sys-devel/gcc dev-vcs/git dev-libs/libkdumpfile dev-build/libtool dev-build/make dev-python/pip virtual/pkgconfig dev-lang/python dev-python/setuptools app-arch/xz-utils * openSUSE .. code-block:: console - $ sudo zypper install autoconf automake check-devel gcc git libdw-devel libelf-devel libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools + $ sudo zypper install autoconf automake check-devel gcc git libdebuginfod-devel libdw-devel libelf-devel libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools xz-devel Then, run: @@ -233,20 +230,20 @@ Quick Start .. start-quick-start -drgn debugs the running kernel by default; run ``sudo drgn``. To debug a -running program, run ``sudo drgn -p $PID``. To debug a core dump (either a -kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to -`install debugging symbols +drgn debugs the running kernel by default; simply run ``drgn``. To debug a +running program, run ``drgn -p $PID``. To debug a core dump (either a kernel +vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to `install +debugging symbols `_ for whatever you are debugging. -Then, you can access variables in the program with ``prog['name']`` and access +Then, you can access variables in the program with ``prog["name"]`` and access structure members with ``.``: .. code-block:: pycon - $ sudo drgn - >>> prog['init_task'].comm + $ drgn + >>> prog["init_task"].comm (char [16])"swapper/0" You can use various predefined helpers: @@ -260,14 +257,14 @@ You can use various predefined helpers: [b'findmnt', b'-p'] You can get stack traces with ``stack_trace()`` and access parameters or local -variables with ``trace['name']``: +variables with ``trace["name"]``: .. code-block:: pycon >>> trace = stack_trace(task) >>> trace[5] #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) - >>> poll_list = trace[5]['list'] + >>> poll_list = trace[5]["list"] >>> file = fget(task, poll_list.entries[0].fd) >>> d_path(file.f_path.address_of_()) b'/proc/115/mountinfo' diff --git a/_drgn.pyi b/_drgn.pyi index 9fcd8e75d..569c696cc 100644 --- a/_drgn.pyi +++ b/_drgn.pyi @@ -16,23 +16,23 @@ from typing import ( Callable, ClassVar, Dict, + Final, Iterable, Iterator, List, Mapping, + MutableMapping, + NamedTuple, Optional, + Protocol, Sequence, Set, + SupportsIndex, Tuple, Union, overload, ) -if sys.version_info < (3, 8): - from typing_extensions import Final, Protocol -else: - from typing import Final, Protocol - if sys.version_info < (3, 10): from typing_extensions import TypeAlias else: @@ -43,18 +43,15 @@ if sys.version_info < (3, 12): else: from collections.abc import Buffer -# This is effectively typing.SupportsIndex without @typing.runtime_checkable -# (both of which are only available since Python 3.8), with a more -# self-explanatory name. -class IntegerLike(Protocol): - """ - An :class:`int` or integer-like object. +IntegerLike: TypeAlias = SupportsIndex +""" +An :class:`int` or integer-like object. - Parameters annotated with this type expect an integer which may be given as - a Python :class:`int` or an :class:`Object` with integer type. - """ +Parameters annotated with this type expect an integer which may be given as a +Python :class:`int` or an :class:`Object` with integer type. - def __index__(self) -> int: ... +This is equivalent to :class:`typing.SupportsIndex`. +""" Path: TypeAlias = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] """ @@ -229,14 +226,17 @@ class Program: Get a symbol containing the given address, or a symbol with the given name. - Global symbols are preferred over weak symbols, and weak symbols are - preferred over other symbols. In other words: if a matching - :attr:`SymbolBinding.GLOBAL` or :attr:`SymbolBinding.UNIQUE` symbol is - found, it is returned. Otherwise, if a matching - :attr:`SymbolBinding.WEAK` symbol is found, it is returned. Otherwise, - any matching symbol (e.g., :attr:`SymbolBinding.LOCAL`) is returned. If - there are multiple matching symbols with the same binding, one is - returned arbitrarily. To retrieve all matching symbols, use + If there are multiple symbols containing a given address, then this + will attempt to find the closest match. + + If searching by name or if there is a tie, global symbols are preferred + over weak symbols, and weak symbols are preferred over other symbols. + In other words: if a matching :attr:`SymbolBinding.GLOBAL` or + :attr:`SymbolBinding.UNIQUE` symbol is found, it is returned. + Otherwise, if a matching :attr:`SymbolBinding.WEAK` symbol is found, it + is returned. Otherwise, any matching symbol (e.g., + :attr:`SymbolBinding.LOCAL`) is returned. If there is still a tie, one + is returned arbitrarily. To retrieve all matching symbols, use :meth:`symbols()`. :param address_or_name: Address or name to search for. @@ -379,7 +379,7 @@ class Program: def read( self, address: IntegerLike, size: IntegerLike, physical: bool = False ) -> bytes: - """ + r""" Read *size* bytes of memory starting at *address* in the program. The address may be virtual (the default) or physical if the program supports it. @@ -691,34 +691,262 @@ class Program: """ ... + def modules(self) -> Iterator[Module]: + """Get an iterator over all of the created modules in the program.""" + + def loaded_modules(self) -> Iterator[Tuple[Module, bool]]: + """ + Get an iterator over executables, libraries, etc. that are loaded in + the program, creating modules to represent them. + + Modules are created lazily as items are consumed. + + This may automatically load some debugging information necessary to + enumerate the modules. Other than that, it does not load debugging + information. + + See :meth:`load_debug_info()` for a higher-level interface that does + load debugging information. + + :return: Iterator of module and ``True`` if it was newly created + or ``False`` if it was previously found. + """ + ... + + def create_loaded_modules(self) -> None: + """ + Determine what executables, libraries, etc. are loaded in the program + and create modules to represent them. + + This is a shortcut for exhausting a :meth:`loaded_modules()` iterator. + It is equivalent to: + + .. code-block:: python3 + + for _ in prog.loaded_modules(): + pass + """ + + @overload + def main_module(self) -> MainModule: + """ + Find the main module. + + :raises LookupError: if the main module has not been created + """ + ... + + @overload + def main_module(self, name: Path, *, create: bool = False) -> MainModule: + """ + Find the main module. + + :param name: :attr:`Module.name` + :param create: Create the module if it doesn't exist. + :raises LookupError: if the main module has not been created and + *create* is ``False``, or if the main module has already been + created with a different name + """ + ... + + def shared_library_module( + self, + name: Path, + dynamic_address: IntegerLike, + *, + create: bool = False, + ) -> SharedLibraryModule: + """ + Find a shared library module. + + :param name: :attr:`Module.name` + :param dynamic_address: :attr:`SharedLibraryModule.dynamic_address` + :param create: Create the module if it doesn't exist. + :return: Shared library module with the given name and dynamic address. + :raises LookupError: if no matching module has been created and + *create* is ``False`` + """ + ... + + def vdso_module( + self, + name: Path, + dynamic_address: IntegerLike, + *, + create: bool = False, + ) -> VdsoModule: + """ + Find a vDSO module. + + :param name: :attr:`Module.name` + :param dynamic_address: :attr:`VdsoModule.dynamic_address` + :param create: Create the module if it doesn't exist. + :return: vDSO module with the given name and dynamic address. + :raises LookupError: if no matching module has been created and + *create* is ``False`` + """ + ... + + def relocatable_module( + self, name: Path, address: IntegerLike, *, create: bool = False + ) -> RelocatableModule: + """ + Find a relocatable module. + + :param name: :attr:`Module.name` + :param address: :attr:`RelocatableModule.address` + :param create: Create the module if it doesn't exist. + :return: Relocatable module with the given name and address. + :raises LookupError: if no matching module has been created and + *create* is ``False`` + """ + ... + + def linux_kernel_loadable_module( + self, module_obj: Object, *, create: bool = False + ) -> RelocatableModule: + """ + Find a Linux kernel loadable module from a ``struct module *`` object. + + Note that kernel modules are represented as relocatable modules. + + :param module_obj: ``struct module *`` object for the kernel module. + :param create: Create the module if it doesn't exist. + :return: Relocatable module with a name and address matching + *module_obj*. + :raises LookupError: if no matching module has been created and + *create* is ``False`` + """ + ... + + def extra_module( + self, name: Path, id: IntegerLike = 0, *, create: bool = False + ) -> ExtraModule: + """ + Find an extra module. + + :param name: :attr:`Module.name` + :param id: :attr:`ExtraModule.id` + :param create: Create the module if it doesn't exist. + :return: Extra module with the given name and ID number. + :raises LookupError: if no matching module has been created and + *create* is ``False`` + """ + ... + + def module(self, __address_or_name: Union[IntegerLike, str]) -> Module: + """ + Find the module containing the given address, or the module with the + given name. + + Addresses are matched based on :attr:`Module.address_ranges`. + + If there are multiple modules with the given name, one is returned + arbitrarily. + + :param address_or_name: Address or name to search for. + :raises LookupError: if no module contains the given address or has the + given name + """ + ... + + def register_debug_info_finder( + self, + name: str, + fn: Callable[[Sequence[Module]], None], + *, + enable_index: Optional[int] = None, + ) -> None: + """ + Register a callback for finding debugging information. + + This does not enable the finder unless *enable_index* is given. + + :param name: Finder name. + :param fn: Callable taking a list of :class:`Module`\\ s that want + debugging information. + + This should check :meth:`Module.wants_loaded_file()` and + :meth:`Module.wants_debug_file()` and do one of the following for + each module: + + * Obtain and/or locate a file wanted by the module and call + :meth:`Module.try_file()`. + * Install files for a later finder to use. + * Set :attr:`Module.loaded_file_status` or + :attr:`Module.debug_file_status` to + :attr:`ModuleFileStatus.DONT_NEED` if the finder believes that + the file is not needed. + * Ignore it, for example if the finder doesn't know how to find the + wanted files for the module. + :param enable_index: Insert the finder into the list of enabled object + finders at the given index. If -1 or greater than the number of + enabled finders, insert it at the end. If ``None`` or not given, + don't enable the finder. + :raises ValueError: if there is already a finder with the given name + """ + ... + + def registered_debug_info_finders(self) -> Set[str]: + """Return the names of all registered debugging information finders.""" + ... + + def set_enabled_debug_info_finders(self, names: Sequence[str]) -> None: + """ + Set the list of enabled debugging information finders. + + Finders are called in the same order as the list until all wanted files + have been found. + + Finders that are not in the list are not called. + + :param names: Names of finders to enable, in order. + :raises ValueError: if no finder has a given name or the same name is + given more than once + """ + ... + + def enabled_debug_info_finders(self) -> List[str]: + """ + Return the names of enabled debugging information finders, in order. + """ + ... + debug_info_options: DebugInfoOptions + """Default options for debugging information searches.""" + def load_debug_info( self, - paths: Optional[Iterable[Path]] = None, + paths: Optional[Iterable[Path]] = (), default: bool = False, main: bool = False, ) -> None: """ - Load debugging information for a list of executable or library files. + Load debugging information for the given set of files and/or modules. - Note that this is parallelized, so it is usually faster to load - multiple files at once rather than one by one. + This determines what executables, libraries, etc. are loaded in the + program (see :meth:`loaded_modules()`) and tries to load their + debugging information from the given *paths*. - :param paths: Paths of binary files. - :param default: Also load debugging information which can automatically - be determined from the program. + .. note:: + It is much more efficient to load multiple files at once rather + than one by one when possible. - For the Linux kernel, this tries to load ``vmlinux`` and any loaded - kernel modules from a few standard locations. + :param paths: Paths of binary files to try. - For userspace programs, this tries to load the executable and any - loaded libraries. + Files that don't correspond to any loaded modules are ignored. See + :class:`ExtraModule` for a way to provide arbitrary debugging + information. + :param default: Try to load all debugging information for all loaded + modules. - This implies ``main=True``. - :param main: Also load debugging information for the main executable. + The files in *paths* are tried first before falling back to the + enabled debugging information finders. - For the Linux kernel, this tries to load ``vmlinux``. + This implies ``main=True``. + :param main: Try to load all debugging information for the main module. - This is currently ignored for userspace programs. + The files in *paths* are tried first before falling back to the + enabled debugging information finders. :raises MissingDebugInfoError: if debugging information was not available for some files; other files with debugging information are still loaded @@ -727,12 +955,40 @@ class Program: def load_default_debug_info(self) -> None: """ - Load debugging information which can automatically be determined from - the program. + Load all debugging information that can automatically be determined + from the program. - This is equivalent to ``load_debug_info(None, True)``. + This is equivalent to ``load_debug_info(default=True)``. """ ... + + def load_module_debug_info(self, *modules: Module) -> None: + """ + Load debugging information for the given modules using the enabled + debugging information finders. + + The files to search for are controlled by + :attr:`Module.loaded_file_status` and :attr:`Module.debug_file_status`. + """ + ... + + def find_standard_debug_info( + self, modules: Iterable[Module], options: Optional[DebugInfoOptions] = None + ) -> None: + """ + Load debugging information for the given modules from the standard + locations. + + This is equivalent to the ``standard`` debugging information finder + that is registered by default. It is intended for use by other + debugging information finders that need a variation of the standard + finder (e.g., after installing something or setting specific options). + + :param modules: Modules to load debugging information for. + :param options: Options to use when searching for debugging + information. If ``None`` or not given, this uses + :attr:`self.debug_info_options `. + """ cache: Dict[Any, Any] """ Dictionary for caching program metadata. @@ -1082,6 +1338,194 @@ class FindObjectFlags(enum.Flag): ANY = ... "" +class DebugInfoOptions: + """ + Options for debugging information searches. + + All of these options can be reassigned. + """ + + def __init__( + self, + __options: Optional[DebugInfoOptions] = None, + *, + directories: Iterable[Path] = ..., + try_module_name: bool = ..., + try_build_id: bool = ..., + try_debug_link: bool = ..., + try_procfs: bool = ..., + try_embedded_vdso: bool = ..., + try_reuse: bool = ..., + try_supplementary: bool = ..., + kernel_directories: Iterable[Path] = ..., + try_kmod: KmodSearchMethod = ..., + ) -> None: + """ + Create a ``DebugInfoOptions``. + + :param options: If given, create a copy of the given options. + Otherwise, use the default options. + + Any remaining arguments override the copied/default options. + """ + ... + directories: Tuple[str, ...] + """ + Directories to search for debugging information files. + + Defaults to ``("/usr/lib/debug",)``, which should work out of the box on + most Linux distributions. Empty strings are not allowed. + + By default, this is used for searches by build ID (see + :attr:`try_build_id`), debug link (see :attr:`debug_link_directories`), for + supplementary files (see :attr:`try_supplementary`), and for kernel files + (see :attr:`kernel_directories`). + """ + try_module_name: bool + """ + If the name of a module resembles a filesystem path, try the file at that + path. + + Defaults to ``True``. + """ + try_build_id: bool + """ + Try finding files using build IDs. + + Defaults to ``True``. + + A *build ID* is a unique byte string present in a module's :ref:`loaded + file ` and :ref:`debug file `. If + configured correctly, it is also present in core dumps and provides a + reliable way to identify the correct files for a module. + + Searches by build ID check under each path in :attr:`directories` for a + file named ``.build-id/xx/yyyy`` (for loaded files) or + ``.build-id/xx/yyyy.debug`` (for debug files), where ``xxyyyy`` is the + lowercase hexadecimal representation of the build ID. + """ + debug_link_directories: Tuple[str, ...] + """ + Directories to search for debug links. + + Defaults to ``("$ORIGIN", "$ORIGIN/.debug", "")``, which should work out of + the box on most Linux distributions. + + ``$ORIGIN`` (or ``${ORIGIN}``) is replaced with the absolute path of the + directory containing the loaded file. An empty string means to check under + each path in :attr:`directories` (i.e., ``path$ORIGIN`` for each path in + :attr:`directories`). + + See :attr:`try_debug_link`. + """ + try_debug_link: bool + """ + Try finding files using debug links. + + Defaults to ``True``. + + A *debug link* is a pointer in a module's :ref:`loaded file + ` to its :ref:`debug file `. It + consists of a name and a checksum. + + Searches by debug link check every path in :attr:`debug_link_directories` + for a file with a matching name and checksum. + """ + try_procfs: bool + """ + For local processes, try getting files via the ``proc`` filesystem (e.g., + :manpage:`proc_pid_exe(5)`, :manpage:`proc_pid_map_files(5)`). + + Defaults to ``True``. + """ + try_embedded_vdso: bool + """ + Try reading the vDSO embedded in a process's memory/core dump. + + Defaults to ``True``. + + The entire (stripped) vDSO is included in core dumps, so this is a reliable + way to get it. + """ + try_reuse: bool + """ + Try reusing a module's loaded file as its debug file and vice versa. + + Defaults to ``True``. + """ + try_supplementary: bool + """ + Try finding :ref:`supplementary files `. + + Defaults to ``True``. + """ + kernel_directories: Tuple[str, ...] + """ + Directories to search for the kernel image and loadable kernel modules. + + Defaults to ``("",)``. + + An empty string means to check standard paths (e.g., + :file:`/boot/vmlinux-{release}`, :file:`/lib/modules/{release}`) absolutely + and under each path in :attr:`directories`. + """ + try_kmod: KmodSearchMethod + """ + How to search for loadable kernel modules. + + Defaults to :attr:`KmodSearchMethod.DEPMOD_OR_WALK`. + """ + +class KmodSearchMethod(enum.Enum): + """ + Methods of searching for loadable kernel module debugging information. + + In addition to searching by build ID, there are currently two methods of + searching for debugging information specific to loadable kernel modules: + + 1. Using :manpage:`depmod(8)` metadata. This looks for :command:`depmod` + metadata (specifically, :file:`modules.dep.bin`) at the top level of + each directory in :attr:`DebugInfoOptions.kernel_directories` (an empty + path means :file:`/lib/modules/{release}`). The metadata is used to + quickly find the path of each module, which is then checked relative to + each directory specified by :attr:`DebugInfoOptions.kernel_directories`. + + This method is faster but typically only applicable to installed + kernels. + 2. Walking kernel directories. This traverses each directory specified by + :attr:`DebugInfoOptions.kernel_directories` looking for ``.ko`` files. + Module names are matched to filenames before the ``.ko`` extension and + with dashes (``-``) replaced with underscores (``_``). + + This method is slower but not limited to installed kernels. + + Debugging information searches can be configured to use one, both, or + neither method. + """ + + NONE = ... + """Don't search using kernel module-specific methods.""" + DEPMOD = ... + """Search using :command:`depmod` metadata.""" + WALK = ... + """Search by walking kernel directories.""" + DEPMOD_OR_WALK = ... + """ + Search using :command:`depmod` metadata, falling back to walking kernel + directories only if no :command:`depmod` metadata is found. + + Since :command:`depmod` metadata is expected to be reliable if present, + this is the default. + """ + DEPMOD_AND_WALK = ... + """ + Search using :command:`depmod` metadata and by walking kernel directories. + + Unlike :attr:`DEPMOD_OR_WALK`, if :command:`depmod` metadata is found but + doesn't result in the desired debugging information, this will still walk + kernel directories. + """ + def get_default_prog() -> Program: """ Get the default program for the current thread. @@ -1105,6 +1549,412 @@ class NoDefaultProgramError(Exception): ... +class Module: + """ + A ``Module`` represents an executable, library, or other binary file used + by a program. It has several subclasses representing specific types of + modules. + + Modules are uniquely identified by their type, name, and a type-specific + value. + + Modules have several attributes that are determined automatically whenever + possible but may be overridden manually if needed. + + Modules can be assigned files that provide debugging and runtime + information: + + * .. _module-loaded-file: + + The "loaded file" is the file containing the executable code, data, etc. + used by the program at runtime. + + + * .. _module-debug-file: + + The "debug file" is the file containing debugging information (e.g., + `DWARF `_). + + The loaded file and debug file may be the same file, for example, an + unstripped binary. They may be different files if the binary was stripped + and its debugging information was split into a separate file. + + + * .. _module-supplementary-debug-file: + + The debug file may depend on a "supplementary debug file" such as one + generated by `dwz(1) `_. If so, + then the supplementary debug file must be found before the debug file can + be used. + """ + + prog: Final[Program] + """Program that this module is from.""" + name: Final[str] + """ + Name of this module. + + Its exact meaning varies by module type. + """ + address_ranges: Optional[Sequence[Tuple[int, int]]] + """ + Address ranges where this module is loaded. + + This is a sequence of tuples of the start (inclusive) and end (exclusive) + addresses. For each range, the start address is strictly less than the end + address. If the module is not loaded in memory, then the sequence is empty. + If not known yet, then this is ``None``. + + :meth:`Program.loaded_modules()` sets this automatically from the program + state/core dump when possible. Otherwise, for :class:`MainModule`, + :class:`SharedLibraryModule`, and :class:`VdsoModule`, it may be set + automatically when a file is assigned to the module. It is never set + automatically for :class:`ExtraModule`. It can also be set manually. + + Other than Linux kernel loadable modules, most modules have only one + address range. See :attr:`address_range`. + """ + address_range: Optional[Tuple[int, int]] + """ + Address range where this module is loaded. + + This is an alias of :attr:`address_ranges[0] ` with a + couple of small differences: + + * If the module has more than one address range, then reading this raises a + :class:`ValueError`. + * If the module is not loaded in memory, then this is ``(0, 0)``. + """ + build_id: Optional[bytes] + """ + Unique byte string (e.g., GNU build ID) identifying files used by this + module. + + If not known, then this is ``None``. + + :meth:`Program.loaded_modules()` sets this automatically from the program + state/core dump when possible. Otherwise, when a file is assigned to the + module, it is set to the file's build ID if it is not already set. It can + also be set manually. + """ + object: Object + """ + The object associated with this module. + + For Linux kernel loadable modules, this is the ``struct module *`` + associated with the kernel module. For other kinds, this is currently an + absent object. The object may be set manually. + """ + loaded_file_status: ModuleFileStatus + """Status of the module's :ref:`loaded file `.""" + loaded_file_path: Optional[str] + """ + Absolute path of the module's :ref:`loaded file `, or + ``None`` if not known. + """ + loaded_file_bias: Optional[int] + """ + Difference between the load address in the program and addresses in the + :ref:`loaded file ` itself. + + This is often non-zero due to address space layout randomization (ASLR). + + It is set automatically based on the module type when the loaded file is + added: + + * For :class:`MainModule`, it is set based on metadata from the process or + core dump (the `auxiliary vector + `_ for userspace + programs, the ``VMCOREINFO`` note for the Linux kernel). + * For :class:`SharedLibraryModule` and :class:`VdsoModule`, it is set to + :attr:`~SharedLibraryModule.dynamic_address` minus the address of the + dynamic section in the file. + * For :class:`RelocatableModule`, it is set to zero. Addresses are adjusted + according to :attr:`~RelocatableModule.section_addresses` instead. + * For :class:`ExtraModule`, if :attr:`~Module.address_ranges` is set to a + single range before the file is added, then the bias is set to + :attr:`address_ranges[0][0] ` (i.e., the module's + start address) minus the file's start address. If + :attr:`~Module.address_ranges` is not set when the file is added, is + empty, or comprises more than one range, then the bias is set to zero. + + This cannot be set manually. + """ + debug_file_status: ModuleFileStatus + """Status of the module's :ref:`debug file `.""" + debug_file_path: Optional[str] + """ + Absolute path of the module's :ref:`debug file `, or + ``None`` if not known. + """ + debug_file_bias: Optional[int] + """ + Difference between the load address in the program and addresses in the + :ref:`debug file `. + + See :attr:`loaded_file_bias`. + """ + supplementary_debug_file_kind: Optional[SupplementaryFileKind] + """ + Kind of the module's :ref:`supplementary debug file + `, or ``None`` if not known or not needed. + """ + supplementary_debug_file_path: Optional[str] + """ + Absolute path of the module's :ref:`supplementary debug file + `, or ``None`` if not known or not needed. + """ + + def wants_loaded_file(self) -> bool: + """ + Return whether this module wants a :ref:`loaded file + `. + + This should be preferred over checking :attr:`loaded_file_status` + directly since this is future-proof against new status types being + added. It is currently equivalent to ``module.loaded_file_status == + ModuleFileStatus.WANT``. + """ + ... + + def wants_debug_file(self) -> bool: + """ + Return whether this module wants a :ref:`debug file + `. + + This should be preferred over checking :attr:`debug_file_status` + directly since this is future-proof against new status types being + added. It is currently equivalent to ``module.debug_file_status == + ModuleFileStatus.WANT or module.debug_file_status == + ModuleFileStatus.WANT_SUPPLEMENTARY``. + """ + ... + + def wanted_supplementary_debug_file(self) -> WantedSupplementaryFile: + """ + Return information about the :ref:`supplementary debug file + ` that this module currently wants. + + :raises ValueError: if the module doesn't currently want a + supplementary debug file (i.e., ``module.debug_file_status != + ModuleFileStatus.WANT_SUPPLEMENTARY``) + """ + ... + + def try_file( + self, + path: Path, + *, + fd: int = -1, + force: bool = False, + ) -> None: + """ + Try to use the given file for this module. + + If the file does not appear to belong to this module, then it is + ignored. This currently checks that the file and the module have the + same build ID. + + If :attr:`loaded_file_status` is :attr:`~ModuleFileStatus.WANT` and the + file is loadable, then it is used as the :ref:`loaded file + ` and :attr:`loaded_file_status` is set to + :attr:`~ModuleFileStatus.HAVE`. + + If :attr:`debug_file_status` is :attr:`~ModuleFileStatus.WANT` or + :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` and the file provides + debugging information, then it is used as the :ref:`debug file + ` and :attr:`debug_file_status` is set to + :attr:`~ModuleFileStatus.HAVE`. However, if the file requires a + supplementary debug file, then it is not used as the debug file yet and + :attr:`debug_file_status` is set to + :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` instead. + + If :attr:`debug_file_status` is + :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` and the file matches + :meth:`wanted_supplementary_debug_file()`, then the previously found + file is used as the debug file, the given file is used as the + :ref:`supplementary debug file `, and + :attr:`debug_file_status` is set to :attr:`~ModuleFileStatus.HAVE`. + + The file may be used as both the loaded file and debug file if + applicable. + + :param path: Path to file. + :param fd: If nonnegative, an open file descriptor referring to the + file. This always takes ownership of the file descriptor even if + the file is not used or on error, so the caller must not close it. + :param force: If ``True``, then don't check whether the file matches + the module. + """ + ... + +class MainModule(Module): + """ + Main module. + + There is only one main module in a program. For userspace programs, it is + the executable, and its name is usually the absolute path of the + executable. For the Linux kernel, it is the kernel image, a.k.a. + ``vmlinux``, and its name is "kernel". + """ + +class SharedLibraryModule(Module): + """ + Shared library (a.k.a. dynamic library, dynamic shared object, or ``.so``) + module. + + Shared libraries are uniquely identified by their name (usually the + absolute path of the shared object file) and dynamic address. + """ + + dynamic_address: Final[int] + """Address of the shared object's dynamic section.""" + +class VdsoModule(Module): + """ + Virtual dynamic shared object (vDSO) module. + + The vDSO is a special shared library automatically loaded into a process by + the kernel; see :manpage:`vdso(7)`. It is uniquely identified by its name + (the ``SONAME`` field of the shared object file) and dynamic address. + """ + + dynamic_address: Final[int] + """Address of the shared object's dynamic section.""" + +class RelocatableModule(Module): + """ + Relocatable object module. + + A relocatable object is an object file requiring a linking step to assign + section addresses and adjust the file to reference those addresses. + + Linux kernel loadable modules (``.ko`` files) are a special kind of + relocatable object. + + For userspace programs, relocatable objects are usually intermediate + products of the compilation process (``.o`` files). They are not typically + loaded at runtime. However, drgn allows manually defining a relocatable + module and assigning its section addresses if needed. + + Relocatable modules are uniquely identified by a name and address. + """ + + address: Final[int] + """ + Address identifying the module. + + For Linux kernel loadable modules, this is the module base address. + """ + + section_addresses: MutableMapping[str, int] + """ + Mapping from section names to assigned addresses. + + Once a file has been assigned to the module, this can no longer be + modified. + + :meth:`Program.linux_kernel_loadable_module()` and + :meth:`Program.loaded_modules()` prepopulate this for Linux kernel loadable + modules. + """ + +class ExtraModule(Module): + """ + Module with extra debugging information. + + For advanced use cases, it may be necessary to manually add debugging + information that does not fit into any of the categories above. + ``ExtraModule`` is intended for these use cases. For example, it can be + used to add debugging information from a standalone file that is not in use + by a particular program. + + Extra modules are uniquely identified by a name and ID number. Both the + name and ID number are arbitrary. + """ + + id: Final[int] + """Arbitrary identification number.""" + +class ModuleFileStatus(enum.Enum): + """ + Status of a file in a :class:`Module`. + + This is usually used to communicate with debugging information finders; see + :meth:`Program.register_debug_info_finder()`. + """ + + WANT = ... + """File has not been found and should be searched for.""" + + HAVE = ... + """File has already been found and assigned.""" + + DONT_WANT = ... + """ + File has not been found, but it should not be searched for. + + :meth:`Module.try_file()` and debugging information finders are required to + honor this and will never change it. However, other operations may reset + this to :attr:`WANT` when they load debugging information automatically. + """ + + DONT_NEED = ... + """ + File has not been found and is not needed (e.g., because its debugging + information is not applicable or is provided through another mechanism). + + In contrast to :attr:`DONT_WANT`, drgn itself will never change this to + :attr:`WANT`. + """ + + WANT_SUPPLEMENTARY = ... + """ + File has been found, but it requires a supplementary file before it can be + used. See :meth:`Module.wanted_supplementary_debug_file()`. + """ + +class WantedSupplementaryFile(NamedTuple): + """Information about a wanted supplementary file.""" + + kind: SupplementaryFileKind + """Kind of supplementary file.""" + path: str + """Path of main file that wants the supplementary file.""" + supplementary_path: str + """ + Path to the supplementary file. + + This may be absolute or relative to :attr:`path`. + """ + checksum: bytes + """ + Unique identifier of the supplementary file. + + The interpretation depends on :attr:`kind`. + """ + +class SupplementaryFileKind(enum.Enum): + """ + Kind of supplementary file. + + .. note:: + DWARF 5 supplementary files are not currently supported but may be in + the future. + + DWARF package files are not considered supplementary files. They are + considered part of the debug file and must have the same path as the + debug file plus a ".dwp" extension. + """ + + GNU_DEBUGALTLINK = ... + """ + GNU-style supplementary debug file referred to by a ``.gnu_debugaltlink`` + section. + + Its :attr:`~WantedSupplementaryFile.checksum` is the file's GNU build ID. + """ + class Thread: """A thread in a program.""" @@ -1426,6 +2276,7 @@ class Object: prog: Program, type: Union[str, Type], *, + absence_reason: AbsenceReason = AbsenceReason.OTHER, bit_field_size: Optional[IntegerLike] = None, ) -> None: """Create an absent object.""" @@ -1436,6 +2287,12 @@ class Object: type_: Final[Type] """Type of this object.""" + address_: Final[Optional[int]] + """ + Address of this object if it is a reference, ``None`` if it is a value or + absent. + """ + absent_: Final[bool] """ Whether this object is absent. @@ -1444,10 +2301,11 @@ class Object: an invalid address). """ - address_: Final[Optional[int]] + absence_reason_: Final[Optional[AbsenceReason]] """ - Address of this object if it is a reference, ``None`` if it is a value or - absent. + Reason that this object is absent. + + This is ``None`` for all values and references. """ bit_offset_: Final[Optional[int]] @@ -1531,6 +2389,20 @@ class Object: returns a ``dict`` of members. For arrays, this returns a ``list`` of values. + .. note:: + Helpers that wish to accept an argument that may be an + :class:`Object` or an :class:`int` should use + :func:`operator.index()` and :class:`IntegerLike` instead: + + .. code-block:: python3 + + import operator + from drgn import IntegerLike + + def my_helper(i: IntegerLike) -> ...: + value = operator.index(i) # Returns an int + ... + :raises FaultError: if reading the object causes a bad memory access :raises TypeError: if this object has an unreadable type (e.g., ``void``) @@ -1621,7 +2493,7 @@ class Object: bit_offset: IntegerLike = 0, bit_field_size: Optional[IntegerLike] = None, ) -> Object: - """ + r""" Return a value object from its binary representation. >>> print(Object.from_bytes_(prog, "int", b"\x10\x00\x00\x00")) @@ -1756,6 +2628,16 @@ class Object: def __ceil__(self) -> int: ... def _repr_pretty_(self, p: Any, cycle: bool) -> None: ... +class AbsenceReason(enum.Enum): + """Reason an object is :ref:absent `.""" + + OTHER = ... + """Another reason not listed below.""" + OPTIMIZED_OUT = ... + """Object was optimized out by the compiler.""" + NOT_IMPLEMENTED = ... + """Encountered unknown debugging information.""" + def NULL(prog: Program, type: Union[str, Type]) -> Object: """ Get an object representing ``NULL`` casted to the given type. @@ -2079,23 +2961,28 @@ class StackFrame: (int)1 """ - name: Final[Optional[str]] + name: Final[str] + """ + Name of the function or symbol at this frame. + + This tries to get the best available name for this frame in the following + order: + + 1. The name of the function in the source code based on debugging + information (:attr:`frame.function_name `). + 2. The name of the symbol in the binary (:meth:`frame.symbol().name + `). + 3. The program counter in hexadecimal (:attr:`hex(frame.pc) `). + 4. The string "???". + """ + + function_name: Final[Optional[str]] """ Name of the function at this frame, or ``None`` if it could not be determined. The name cannot be determined if debugging information is not available for - the function, e.g., because it is implemented in assembly. It may be - desirable to use the symbol name or program counter as a fallback: - - .. code-block:: python3 - - name = frame.name - if name is None: - try: - name = frame.symbol().name - except LookupError: - name = hex(frame.pc) + the function, e.g., because it is implemented in assembly. """ is_inline: Final[bool] @@ -2778,7 +3665,10 @@ class OutOfBoundsError(Exception): ... _elfutils_version: str +_have_debuginfod: bool +_enable_dlopen_debuginfod: bool _with_libkdumpfile: bool +_with_lzma: bool def _linux_helper_direct_mapping_offset(__prog: Program) -> int: ... def _linux_helper_read_vm( diff --git a/_drgn_util/plugins.py b/_drgn_util/plugins.py new file mode 100644 index 000000000..6b5ec4903 --- /dev/null +++ b/_drgn_util/plugins.py @@ -0,0 +1,122 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +import fnmatch +from importlib import import_module +import logging +import os +import runpy +import sys +from types import SimpleNamespace +from typing import Any, Callable, Dict, List, Tuple + +logger = logging.getLogger("drgn.plugins") + +_plugins = None +_hooks: Dict[str, List[Tuple[str, Callable[..., Any]]]] = {} + + +def _load_plugins() -> List[Tuple[str, object]]: + plugins = [] + # Mapping from plugin name requested with DRGN_PLUGINS to whether we found + # an entry point with that name. + enabled_entry_points = {} + + env = os.getenv("DRGN_PLUGINS") + if env: + for item in env.split(","): + if not item: + # Ignore empty items for convenience. + continue + name, sep, value = item.partition("=") + if sep: + try: + if "/" in value: + plugin: object = SimpleNamespace(**runpy.run_path(value)) + else: + plugin = import_module(value) + except Exception: + logger.warning("failed to load %r:", item, exc_info=True) + else: + plugins.append((name, plugin)) + logger.debug("loaded %r", item) + else: + enabled_entry_points[name] = False + + env = os.getenv("DRGN_DISABLE_PLUGINS") + # If all plugins are disabled, avoid the entry point machinery entirely. + if env != "*" or enabled_entry_points: + disable_plugins = env.split(",") if env else [] + + import importlib.metadata + + group = "drgn.plugins" + if sys.version_info >= (3, 10): + entry_points = importlib.metadata.entry_points(group=group) + else: + entry_points = importlib.metadata.entry_points().get(group, ()) + + for entry_point in entry_points: + if entry_point.name in enabled_entry_points: + enabled_entry_points[entry_point.name] = True + elif any( + fnmatch.fnmatch(entry_point.name, disable) + for disable in disable_plugins + ): + continue + try: + plugin = entry_point.load() + except Exception: + logger.warning( + "failed to load %r:", + f"{entry_point.name} = {entry_point.value}", + exc_info=True, + ) + else: + plugins.append((entry_point.name, plugin)) + logger.debug( + "loaded entry point %r", + f"{entry_point.name} = {entry_point.value}", + ) + + missing_entry_points = [ + key for key, value in enabled_entry_points.items() if not value + ] + if missing_entry_points: + missing_entry_points.sort() + logger.warning( + "not found: %s", + ", ".join([repr(name) for name in missing_entry_points]), + ) + + return plugins + + +def _load_hook(hook_name: str) -> List[Tuple[str, Callable[..., Any]]]: + global _plugins + if _plugins is None: + _plugins = _load_plugins() + + hooks = [] + for name, plugin in _plugins: + try: + hook = getattr(plugin, hook_name) + except AttributeError: + continue + hooks.append((name, hook)) + + hooks.sort(key=lambda hook: (getattr(hook[1], "drgn_priority", 50), hook[0])) + return hooks + + +def call_plugins(hook_name: str, *args: object) -> None: + try: + hooks = _hooks[hook_name] + except KeyError: + _hooks[hook_name] = hooks = _load_hook(hook_name) + + for name, hook in hooks: + try: + hook(*args) + except Exception: + logger.warning("%r %s failed:", name, hook_name, exc_info=True) diff --git a/contrib/btrfs_tree.py b/contrib/btrfs_tree.py index ca8349be4..a2896b44d 100644 --- a/contrib/btrfs_tree.py +++ b/contrib/btrfs_tree.py @@ -369,13 +369,9 @@ def from_bytes(b: bytes) -> "BtrfsKeyPtr": return BtrfsKeyPtr(BtrfsKey._make(t[:3]), *t[3:]) -# class _BtrfsItemHandler(NamedTuple, Generic[_T]) and replacing Any with _T -# would be more accurate, but that fails at runtime on Python 3.6; see -# python/typing#449. This is good enough since it's checked more strictly -# through _register_item_handler(). -class _BtrfsItemHandler(NamedTuple): - parse: Callable[[BtrfsKey, bytes], Any] - print: Callable[[BtrfsKey, bytes, Any, str, "Optional[SupportsWrite[str]]"], None] +class _BtrfsItemHandler(NamedTuple, Generic[_T]): + parse: Callable[[BtrfsKey, bytes], _T] + print: Callable[[BtrfsKey, bytes, _T, str, "Optional[SupportsWrite[str]]"], None] _btrfs_item_handlers = {} diff --git a/contrib/negdentdelete.py b/contrib/negdentdelete.py index 889a2ebb1..186faa1e2 100644 --- a/contrib/negdentdelete.py +++ b/contrib/negdentdelete.py @@ -37,7 +37,7 @@ def for_each_child_dentry(dentry: Object) -> Iterator[Object]: dentry.d_children.address_of_(), "d_sib", ) - except LookupError: + except AttributeError: return list_for_each_entry( "struct dentry", dentry.d_subdirs.address_of_(), "d_child" ) diff --git a/contrib/pp_leak/README.rst b/contrib/pp_leak/README.rst new file mode 100644 index 000000000..e02fbc5ce --- /dev/null +++ b/contrib/pp_leak/README.rst @@ -0,0 +1,154 @@ +======================= +Chasing Page Pool Leaks +======================= + +The following scripts were demonstrated in the *Chasing Page Pool Leaks* +talk at netdevconf 0x19. They can be used to inspect inflight pages that have +not yet been returned to the page_pool even after the page_pool was destroyed. + +The two ways described in the talk for getting more information on leaked pages +are presented below. Note that some scripts have changed from the talk so +please read below. + +Solution 1 +========== + +One way to find leaked page_pool pages is by scanning all the sockets for SKBs +that are using a leaked page. The leaked page can be in either the linear part +or a fragment in shared_info. + +This is the most straightforward way, but it also requires access to the SKBs +which means implementing iteration over many types of sockets. + +Example 1.1 +------------ + +The ``scan_tcp_socks.oy`` does just that for TCP sockets. Here's an example output:: + + #> ./scan_tcp_socks.py -i eth1 + Found leaked page 0xffffea0000231b80 in linear part of skb: 0xffff888007289328. sk: 0xffff888007289280 + +From here the SKB can be investgated in the drgn CLI:: + + skb = Object(prog, "struct sk_buff *", address=0xffff888007289328) + *(struct sk_buff *)0xffff88800731c900 = { + .next = (struct sk_buff *)0xffff888007289328, + .prev = (struct sk_buff *)0xffff888007289328, + .dev = (struct net_device *)0x0, + .dev_scratch = (unsigned long)0, + ... + .tail = (sk_buff_data_t)160, + .end = (sk_buff_data_t)192, + .head = (unsigned char *)0xffff888008c6e000 = "", ---> leaking page + .data = (unsigned char *)0xffff888008c6e082 = "packet data", + ... + +Note that the script can take a long time to run. It is recommended to filter +by interface name via the ``-i`` switch. + +Solution 2 +========== + +The other way to investigate leaked page_pool pages is through tracking back +from the page to the SKB. This involves more guesswork. In broad strokes, +the algorithm can be summarized as below: + +1. Scan pages for page_pool pages that are leaked (are linked to a destroyed + page_pool). +2. Search the kernel memory for references to to the page's virtual address range. +3. Peek memory around the found reference to check if it looks like an SKB. + This assumes that the leaked page is in the linear area of the SKB. + If something was found. Stop here. Otherwise go on to next steps. +4. Search the kernel memory for the actual page *pointer*. + This is looking for the page as a fragment in ``skb_shared_info``. There, + the actual page pointer is used. What was found *could* be part of ``skb_shared_info``. +5. ``skb_shared_info`` lives in the linear part of the SKB. Search for references to this + page to find the actual SKB. This is similar to 3. + +``ls_pp_leaks.py`` does step 1. ``guess_leaky_skbs.py`` will do steps 2-5. + +Here are some examples: + +Example 2.1 +----------- + +The leaked page could be in the linear part of the SKB. First, let's scan the +leaked pages:: + + #> ./ls_pp_leaks.py + Page content: + ADDRESS VALUE + ffff888008c6e000: 0000000000000000 + ffff888008c6e008: 0000000000000000 + ffff888008c6e010: 0000000000000000 + ffff888008c6e018: 0000000000000000 + ffff888008c6e020: 0000000000000000 + ffff888008c6e028: 0000000000000000 + ffff888008c6e030: 0000000000000000 + ffff888008c6e038: 0000000000000000 + ffff888008c6e040: 7dfe573412005452 + ffff888008c6e048: 0045000809f1739e + ffff888008c6e050: 06400040a3ae5200 + ffff888008c6e058: 01010a010101f687 + ffff888008c6e060: 0000000064980101 + +Now, let's look for the SKB from the found leak:: + + #> ./guess_leaky_skbs.py 0xffffea0000231b80 + Possible skb match at address 0xffff88800731c900 + +From here the SKB can be printed via the drgn CLI or via the ``--show-skb`` +option. + +How do you know if the leaked page is in the linear part? You either expect +that SKBs don't have fragments (based on the current configuration) your you +just guess. In any way, it is a good starting point. + +If nothing relevant is found, proceed to steps in example 2.2. + +Example 2.2 +----------- + +The leaked page could be in a ``skb_shared_info`` fragment (see +``skb_frag_t``). Once again, let's scan the leaked pages:: + + #> ./ls_pp_leaks.py + Leaked page: 0xffffea00001cea00 + Page content: + ADDRESS VALUE + ffff8880073a8000: 87feffffffffffff + ffff8880073a8008: 01000608087a2ce1 + ffff8880073a8010: 87fe010004060008 + ffff8880073a8018: 0a020101087a2ce1 + ffff8880073a8020: 0101000000000000 + ffff8880073a8028: 0000000000000102 + ffff8880073a8030: 0000000000000000 + ffff8880073a8038: 0000000000000000 + ffff8880073a8040: 0000000000000000 + ffff8880073a8048: 0000000000000000 + ffff8880073a8050: 0000000000000000 + ffff8880073a8058: 0000000000000000 + ffff8880073a8060: 0000000000000000 + Leaked page: 0xffffea00001cf5c0 + ... + +Now we can let the script do the guess work:: + + #> ./guess_leaky_skbs.py 0xffffea00001cea00 --as-frag + Possible skb match at address 0xffff8880047d8a00 + +You can look at the SKB to see if it makes sense. From there the socket can be +tracked back. + +Final Notes +=========== + +The examples above make the work seem easy. In fact, a lot of guesswork might +be required. Check the options provided by the scripts to dig deeper into +the possibilities. + +References +========== + +.. _Chasing Page Pool Pages talk https://lore.kernel.org/netdev/20240814075603.05f8b0f5@kernel.org/ +.. _Original solution from netdev https://netdevconf.info/0x19/sessions/tutorial/diagnosing-page-pool-leaks.html diff --git a/contrib/pp_leak/guess_leaky_skbs.py b/contrib/pp_leak/guess_leaky_skbs.py new file mode 100755 index 000000000..72458d904 --- /dev/null +++ b/contrib/pp_leak/guess_leaky_skbs.py @@ -0,0 +1,239 @@ +#!/usr/bin/env drgn + +# Copyright (c) 2025 NVIDIA Corporation & Affiliates +# SPDX-License-Identifier: LGPL-2.1-or-later + +help=""" +Takes in a virtual address from a page or a `struct page *` and tries to find +a SKB that references this page either in the linear part or in skb_frag_info. + +Endianess is handled by the script. + +This only works if the kernel was built with CONFIG_PROC_KCORE=y. The +script is based on search_kernel_memory.py +""" + +import argparse +import math +import sys +from drgn import ( + Object, + PlatformFlags, + FaultError, + Object, + offsetof, + sizeof, +) +from drgn.helpers.common.memory import ( + identify_address, +) +from drgn.helpers.linux.list import list_for_each_entry +from drgn.helpers.linux.mm import ( + for_each_vmap_area, + virt_to_page, + page_to_virt, +) +from drgn.helpers.linux.net import ( + skb_shinfo, +) + + +byteorder = "little" if prog.platform.flags & PlatformFlags.IS_LITTLE_ENDIAN else "big" +PAGE_SIZE = prog["PAGE_SIZE"].value_() +PAGE_SHIFT = prog["PAGE_SHIFT"].value_() + + +def get_opts(): + parser = argparse.ArgumentParser(description=help) + parser.add_argument( + "bytes", + nargs="?", + help="hexadecimal bytes to read. By default they represent a " + "virtual address.", + ) + parser.add_argument( + "--as-frag", default=False, action="store_true", + help="Interpret address as being skb_shinfo(skb).frag.netmem.") + parser.add_argument( + "--virt", default=False, action="store_true", + help="Given address is a virtual addresss in a page.") + parser.add_argument( + "--show-skb", default=False, action="store_true", + help="Show matched SKB.") + parser.add_argument( + "--verbose", default=False, action="store_true", + help="Print partial matches.") + + return parser.parse_args() + + +def virt_to_vmap_address(prog, addr): + page = virt_to_page(addr) + for va in for_each_vmap_area(prog): + vm = va.vm.read_() + if vm: + for i, va_page in enumerate( + Object( + prog, prog.array_type(page.type_, vm.nr_pages), address=vm.pages + ).read_() + ): + if va_page == page: + return ( + va.va_start.value_() + + (i << prog["PAGE_SHIFT"]) + + (addr & (prog["PAGE_SIZE"].value_() - 1)) + ) + return None + + +def search_memory(prog, needle): + KCORE_RAM = prog["KCORE_RAM"] + CHUNK_SIZE = 1024 * 1024 + + for kc in list_for_each_entry( + "struct kcore_list", prog["kclist_head"].address_of_(), "list" + ): + if kc.type != KCORE_RAM: + continue + start = kc.addr.value_() + end = start + kc.size.value_() + for addr in range(start, end, CHUNK_SIZE): + buf = prog.read(addr, min(CHUNK_SIZE, end - addr)) + i = 0 + while i < len(buf): + i = buf.find(needle, i) + if i < 0: + break + + yield addr + i + i += 8 + + +def search_page_reference(page): + """ + Search kernel memory for references to the given page contents + (virtual addresses within the PAGE_SIZE range). + + Does page conversion. + """ + + val = page_to_virt(page).value_() + + skip_bytes = math.ceil(PAGE_SHIFT / 8) + ptr_size = 8 + + val_endian = val.to_bytes(ptr_size, byteorder) + if byteorder == "little": + big_needle = val_endian[skip_bytes:ptr_size - skip_bytes] + else: + big_needle = val_endian[0:ptr_size - skip_bytes] + + small_needle = val >> PAGE_SHIFT + + # Search for first 6 bytes: + for addr in search_memory(prog, big_needle): + + if byteorder == "little": + # Adjust address to skipped bytes: + addr = addr - skip_bytes + + mem_bytes = prog.read(addr, ptr_size) + mem_val = int.from_bytes(mem_bytes, byteorder) + + if mem_val >> PAGE_SHIFT == small_needle: + yield (addr, mem_val) + + +def search_raw(value): + """ + Search kernel memory for value respectinv the value pointer. + """ + + ptr_size = 8 + needle = value.to_bytes(ptr_size, byteorder) + + for addr in search_memory(prog, needle): + mem_bytes = prog.read(addr, ptr_size) + mem_val = int.from_bytes(mem_bytes, byteorder) + yield (addr, mem_val) + + +def guess_skb_is_legit(skb) -> bool: + """ + Guess if there is a legit SKB at the given address. + """ + + # 2 consecutive pointers that point to the same page indicate that + # this could be skb.head and skb.data. + if virt_to_page(skb.head).value_() != virt_to_page(skb.data).value_(): + return False + + if skb.end.value_() > PAGE_SIZE or skb.tail.value_() > PAGE_SIZE: + return False + + # Many checks could be added here ... + return True + + +def search_skb_with_page_as_linear(page, verbose=False): + """ + Search SKB for given page. + """ + for addr, val in search_page_reference(page): + + if verbose: + print(f"Found reference at {hex(addr)}: value {hex(val)}. {identify_address(prog, addr)}") + + skb_addr = addr - offsetof(prog.type("struct sk_buff"), "head") + skb = Object(prog, "struct sk_buff", address=skb_addr) + if guess_skb_is_legit(skb): + yield skb + + +def search_skb_with_page_as_shinfo_frag(page_ptr, verbose): + + for addr, _ in search_raw(page_ptr): + + if verbose: + print(f"Found raw value at addr {hex(addr)}. {identify_address(prog, addr)}") + + page = virt_to_page(addr) + for skb in search_skb_with_page_as_linear(page): + + # For shinfo, a match happens for + shinfo = skb_shinfo(skb) + shinfo_start = shinfo.value_() + shinfo_end = shinfo.value_() + sizeof(prog.type("struct skb_shared_info")) + if shinfo_start <= addr and addr < shinfo_end: + yield skb + + +opts = get_opts() + +# Drop hex prefix. +if opts.bytes.startswith("0x"): + opts.bytes = opts.bytes[2:] + +value = int.from_bytes(bytes.fromhex(opts.bytes)) + +if opts.as_frag: + for skb in search_skb_with_page_as_shinfo_frag(value, opts.verbose): + print(f"Possible skb match at address {hex(skb.address_of_())}") + if opts.show_skb: + print(skb) + +else: + if opts.virt: + try: + page = virt_to_page(value) + except FaultError: + print("Given address doesn't seem to be a virtual address or it can't be converted to a page.") + sys.exit(1) + else: + page = Object(prog, "struct page", address=value).address_of_() + + for skb in search_skb_with_page_as_linear(page, opts.verbose): + print(f"Possible skb match at address {hex(skb.address_of_())}") + if opts.show_skb: + print(skb) + diff --git a/contrib/pp_leak/ls_pp_leaks.py b/contrib/pp_leak/ls_pp_leaks.py new file mode 100755 index 000000000..3dec540b7 --- /dev/null +++ b/contrib/pp_leak/ls_pp_leaks.py @@ -0,0 +1,50 @@ +#!/usr/bin/env drgn + +# Copyright (c) 2025 NVIDIA Corporation & Affiliates +# SPDX-License-Identifier: LGPL-2.1-or-later + +help=""" +Detect leaked page_pool pages by scanning through all the pages. + +Has options for peeking into the page memory and showing the struct page. +""" + + +import argparse +from drgn import FaultError +from drgn.helpers.common.memory import ( + print_annotated_memory +) +from drgn.helpers.linux.mm import ( + for_each_page, + page_to_virt +) +from drgn.helpers.linux.net import is_pp_page + + +def get_opts(): + parser = argparse.ArgumentParser(description=help) + parser.add_argument( + "-l", "--peek", default=100, type=int, help="Peek into page given amount of bytes.") + parser.add_argument( + "-s", "--show", default=False, action="store_true", help="Show page struct.") + + args = parser.parse_args() + return args + + +opt = get_opts() + +for page in for_each_page(): + try: + if is_pp_page(page) and page.pp.user.detach_time > 0: + if opt.show: + print(page) + else: + print(f"Leaked page: {hex(page)}") + if opt.peek > 0: + print("Page content: ") + print_annotated_memory(page_to_virt(page), opt.peek) + except FaultError: + continue + diff --git a/contrib/pp_leak/scan_tcp_socks.py b/contrib/pp_leak/scan_tcp_socks.py new file mode 100755 index 000000000..711c28447 --- /dev/null +++ b/contrib/pp_leak/scan_tcp_socks.py @@ -0,0 +1,90 @@ +#!/usr/bin/env drgn + +# Copyright (c) 2025 NVIDIA Corporation & Affiliates +# SPDX-License-Identifier: LGPL-2.1-or-later + +help=""" +Detect leaked page_pool pages by scanning TCP sockets for SKBs from the +receive queue that are using such leaked pages. + +It is a good idea to filter out by interface to reduce the run time +of the script. +""" + + +import sys +import argparse + +from drgn import ( + Object, + FaultError, +) +from drgn.helpers.linux import ( + hlist_nulls_empty, + sk_nulls_for_each, +) +from drgn.helpers.linux.mm import virt_to_page +from drgn.helpers.linux.net import ( + netdev_get_by_name, + skb_shinfo, + is_pp_page +) + + +def get_opts(): + parser = argparse.ArgumentParser(description=help) + parser.add_argument( + "-i", "--interface", default=None, type=str, help="Filter by interface name.") + + args = parser.parse_args() + return args + + +opts = get_opts() +ifindex = -1 +if opts.interface: + netdev = netdev_get_by_name(opts.interface) + if netdev.value_() == 0: + print(f"Netdev interface '{opts.interface}' not found.") + sys.exit(1) + + ifindex = netdev.ifindex + +tcp_hashinfo = prog.object("tcp_hashinfo") + +for i in range(tcp_hashinfo.ehash_mask + 1): + head = tcp_hashinfo.ehash[i].chain + if hlist_nulls_empty(head): + continue + + for sk in sk_nulls_for_each(head): + + # Filter by interface: + if ifindex > 0 and sk.sk_rx_dst_ifindex.value_() != ifindex: + continue + + first_skb = sk.sk_receive_queue.next + skb = first_skb + while skb != None: + + try: + # Check linear part of skb: + page = virt_to_page(skb.data) + if is_pp_page(page) and page.pp.user.detach_time: + print(f"Found leaked page {hex(page)} in linear part of skb: {hex(skb.address_of_())}. sk: {hex(sk)}") + + # Check fragments: + shinfo = skb_shinfo(skb) + for i in range(0, shinfo.nr_frags): + frag = shinfo.frags[i] + page = Object(prog, "struct page", address=frag.netmem) + if is_pp_page(page) and page.pp.user.detach_time: + print(f"Found leaked page {hex(page.address_of_())} in skb frag {i} of skb: {hex(skb.address_of_())}") + + except FaultError: + continue + + # Move to next skb: + skb = skb.next + if skb == first_skb: + break diff --git a/contrib/stack_trace_call_fault.py b/contrib/stack_trace_call_fault.py index c82190a89..530422302 100644 --- a/contrib/stack_trace_call_fault.py +++ b/contrib/stack_trace_call_fault.py @@ -3,44 +3,73 @@ # SPDX-License-Identifier: LGPL-2.1-or-later """ -Get a stack trace from a call to an invalid address on x86-64. drgn should do -this automatically eventually. +Get a stack trace from a call to an invalid address on x86-64 or AArch64. drgn +should do this automatically eventually. """ +import drgn from drgn import Object +from drgn.helpers.common.prog import takes_program_or_default -def pt_regs_members_from_stack_frame(frame): +def pt_regs_members_from_stack_frame(prog, frame): regs = frame.registers() - return { - "r15": regs.get("r15", 0), - "r14": regs.get("r14", 0), - "r13": regs.get("r13", 0), - "r12": regs.get("r12", 0), - "bp": regs.get("rbp", 0), - "bx": regs.get("rbx", 0), - "r11": regs.get("r11", 0), - "r10": regs.get("r10", 0), - "r9": regs.get("r9", 0), - "r8": regs.get("r8", 0), - "ax": regs.get("rax", 0), - "cx": regs.get("rcx", 0), - "dx": regs.get("rdx", 0), - "si": regs.get("rsi", 0), - "di": regs.get("rdi", 0), - "orig_ax": -1, - "ip": regs.get("rip", 0), - "cs": regs.get("cs", 0), - "flags": regs.get("rflags", 0), - "sp": regs.get("rsp", 0), - "ss": regs.get("ss", 0), - } - - -pt_regs_members = pt_regs_members_from_stack_frame( - prog.crashed_thread().stack_trace()[0] -) -pt_regs_members["ip"] = prog.read_word(pt_regs_members["sp"]) - 1 -pt_regs_members["sp"] += 8 -trace = prog.stack_trace(Object(prog, "struct pt_regs", pt_regs_members)) -print(trace) + if prog.platform.arch == drgn.Architecture.X86_64: + return { + "r15": regs.get("r15", 0), + "r14": regs.get("r14", 0), + "r13": regs.get("r13", 0), + "r12": regs.get("r12", 0), + "bp": regs.get("rbp", 0), + "bx": regs.get("rbx", 0), + "r11": regs.get("r11", 0), + "r10": regs.get("r10", 0), + "r9": regs.get("r9", 0), + "r8": regs.get("r8", 0), + "ax": regs.get("rax", 0), + "cx": regs.get("rcx", 0), + "dx": regs.get("rdx", 0), + "si": regs.get("rsi", 0), + "di": regs.get("rdi", 0), + "orig_ax": -1, + "ip": regs.get("rip", 0), + "cs": regs.get("cs", 0), + "flags": regs.get("rflags", 0), + "sp": regs.get("rsp", 0), + "ss": regs.get("ss", 0), + } + elif prog.platform.arch == drgn.Architecture.AARCH64: + try: + pc = frame.pc + except LookupError: + pc = 0 + return { + "regs": [regs.get(f"x{i}", 0) for i in range(31)], + "sp": regs.get("sp", 0), + "pc": pc, + "pstate": regs.get("pstate", 0), + } + else: + raise NotImplementedError() + + +@takes_program_or_default +def call_fault_stack_trace(prog): + pt_regs_members = pt_regs_members_from_stack_frame( + prog, prog.crashed_thread().stack_trace()[0] + ) + + if prog.platform.arch == drgn.Architecture.X86_64: + pt_regs_members["ip"] = prog.read_word(pt_regs_members["sp"]) - 1 + pt_regs_members["sp"] += 8 + elif prog.platform.arch == drgn.Architecture.AARCH64: + pt_regs_members["pc"] = pt_regs_members["regs"][30] - 4 + else: + raise NotImplementedError() + + return prog.stack_trace(Object(prog, "struct pt_regs", pt_regs_members)) + + +if __name__ == "__main__": + trace = call_fault_stack_trace(prog) + print(trace) diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 037d460de..3949b818c 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,3 +1,13 @@ +div.sphinxsidebar p.caption { + font-weight: 300; + font-size: 1.4rem; +} + +details { + margin-block-start: 1em; + margin-block-end: 1em; +} + div.admonition { padding-bottom: 0; } @@ -7,6 +17,20 @@ div.admonition p.admonition-title { font-weight: bold; } +div.tip { + background-color: #DFD; + border-color: #ACA; +} + +div.scroll-y pre { + max-height: 20em; + overflow-y: auto; +} + +div.tutorial pre { + border-left: 5px solid #5A5; +} + @media screen and (min-width: 875px) { div.document { width: 100%; diff --git a/docs/advanced_usage.rst b/docs/advanced_usage.rst index 84c44eb84..5baa4a473 100644 --- a/docs/advanced_usage.rst +++ b/docs/advanced_usage.rst @@ -6,24 +6,206 @@ Advanced Usage The :doc:`user_guide` covers basic usage of drgn, but drgn also supports more advanced use cases which are covered here. -Loading Debugging Symbols -------------------------- +.. _advanced-modules: -drgn will automatically load debugging information based on the debugged -program (e.g., from loaded kernel modules or loaded shared libraries). -:meth:`drgn.Program.load_debug_info()` can be used to load additional debugging -information:: +Modules and Debugging Symbols +----------------------------- - >>> prog.load_debug_info(['./libfoo.so', '/usr/lib/libbar.so']) +drgn tries to determine what executable, libraries, etc. a program uses and +load debugging symbols automatically. As long as :doc:`debugging symbols are +installed `, this should work out of the box on +standard setups. + +For non-standard scenarios, drgn allows overriding the defaults with different +levels of control and complexity. + +Loading Debugging Symbols From Non-Standard Locations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. program:: drgn + +drgn searches standard locations for debugging symbols. If you have debugging +symbols available in a non-standard location, you can provide it to the CLI +with the :option:`-s`/:option:`--symbols` option: + +.. code-block:: console + + $ drgn -s ./libfoo.so -s /usr/lib/libbar.so.debug + +Or with the :meth:`drgn.Program.load_debug_info()` method:: + + >>> prog.load_debug_info(["./libfoo.so", "/usr/lib/libbar.so.debug"]) + +Loading Debugging Symbols For Specific Modules +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:option:`-s` and :meth:`~drgn.Program.load_debug_info()` try the given files +against all of the modules loaded in the program based on build IDs. You can +also :ref:`look up ` a specific module and try a given +file for just that module with :meth:`drgn.Module.try_file()`:: + + >>> prog.main_module().try_file("build/vmlinux") + +Loading Additional Debugging Symbols +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:option:`-s` and :meth:`~drgn.Program.load_debug_info()` ignore files that +don't correspond to a loaded module. To load debugging symbols from an +arbitrary file, pass :option:`--extra-symbols` to the CLI: + +.. code-block:: console + + $ drgn --extra-symbols ./my_extra_symbols.debug + +Or create a :class:`drgn.ExtraModule`:: + + >>> module = prog.extra_module("my_extra_symbols", create=True) + >>> module.try_file("./my_extra_symbols.debug") + +Listing Modules +^^^^^^^^^^^^^^^ + +By default, drgn creates a module for everything loaded in the program. You can +disable this in the CLI with :option:`--no-default-symbols`. + +You can find or create the loaded modules programmatically with +:meth:`drgn.Program.loaded_modules()`:: + + >>> for module, new in prog.loaded_modules(): + ... print("Created" if new else "Found", module) + +You can see all of the created modules with :meth:`drgn.Program.modules()`. + +Overriding Modules +^^^^^^^^^^^^^^^^^^ + +You can create modules with the :ref:`module factory functions +`. You can also modify various attributes of the +:class:`drgn.Module` class. + +.. _debugging-information-finders-example: + +Debugging Information Finders +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A callback for automatically finding debugging symbols for a set of modules can +be registered with :meth:`drgn.Program.register_debug_info_finder()`. Here is +an example for getting debugging symbols on Fedora Linux using DNF: + +.. code-block:: python3 + + import subprocess + + import drgn + + # Install debugging symbols using the DNF debuginfo-install plugin. Note that + # this is mainly for demonstration purposes; debuginfod, which drgn supports + # out of the box, is more reliable. + def dnf_debug_info_finder(modules: list[drgn.Module]) -> None: + # Determine all of the packages for the given modules. + packages = set() + for module in modules: + if not module.wants_debug_file(): + continue + + if not module.name.startswith("/"): + continue + + proc = subprocess.run( + ["rpm", "--query", "--file", module.name], + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + text=True, + ) + if proc.returncode == 0: + packages.add(proc.stdout.rstrip("\n")) + + # Try installing their debug info. + subprocess.call( + ["sudo", "dnf", "debuginfo-install", "--skip-broken", "--"] + + sorted(packages) + ) + + # Now that it's installed, try the standard locations. Other finders may + # need to try specific files for specific modules with module.try_file() + # instead. + modules[0].prog.find_standard_debug_info(modules) + + + prog.register_debug_info_finder("dnf", dnf_debug_info_finder, enable_index=-1) + +Custom debugging information finders can even be configured automatically +through the :ref:`plugin system `. + +.. _writing-plugins: + +Writing Plugins +--------------- + +In order for drgn to load a plugin automatically, it must be registered as an +`entry point `_ for +the ``drgn.plugins`` group. Here is a minimal example. First: + +.. code-block:: console + + $ mkdir drgn_plugin_example + $ cd drgn_plugin_example + +Then, create ``pyproject.toml`` with the following contents: + +.. code-block:: toml + :caption: pyproject.toml + :emphasize-lines: 5-6 + + [project] + name = 'drgn_plugin_example' + version = '0.0.1' + + [project.entry-points.'drgn.plugins'] + example = 'drgn_plugin_example' + +See the `Python Packaging User Guide +`_ for a complete +description of ``pyproject.toml``. We are most interested in the last two +lines, which define the entry point. In ``example = 'drgn_plugin_example'``, +``example`` is the plugin name, and ``drgn_plugin_example`` is the plugin +module. + +Create ``drgn_plugin_example.py`` with the following contents: + +.. code-block:: python3 + :caption: drgn_plugin_example.py + + import drgn + + def example_debug_info_finder(modules: list[drgn.Module]) -> None: + for module in modules: + if isinstance(module, drgn.MainModule): + module.try_file("/my/vmlinux") + + def drgn_prog_set(prog: drgn.Program) -> None: + if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: + prog.register_debug_info_finder( + "example", example_debug_info_finder, enable_index=-1 + ) + # Optional; the default is 50; + drgn_prog_set.drgn_priority = 100 + +This is a typical usage of the :func:`drgn_prog_set()` hook to register +finders. See :ref:`plugins` for more details. + +After creating the above files, the plugin can be installed with +``pip install .``. Library ------- In addition to the CLI, drgn is also available as a library. :func:`drgn.program_from_core_dump()`, :func:`drgn.program_from_kernel()`, and -:func:`drgn.program_from_pid()` correspond to the ``-c``, ``-k``, and ``-p`` -command line options, respectively; they return a :class:`drgn.Program` that -can be used just like the one initialized by the CLI:: +:func:`drgn.program_from_pid()` correspond to the :option:`-c`, :option:`-k`, +and :option:`-p` command line options, respectively; they return a +:class:`drgn.Program` that can be used just like the one initialized by the +CLI:: >>> import drgn >>> prog = drgn.program_from_kernel() @@ -44,7 +226,7 @@ Custom Programs --------------- The main components of a :class:`drgn.Program` are the program memory, types, -and symbols. The CLI and equivalent library interfaces automatically determine +and objects. The CLI and equivalent library interfaces automatically determine these. However, it is also possible to create a "blank" ``Program`` and plug in the main components. The :func:`drgn.cli.run_interactive()` function allows you to run the same drgn CLI once you've created a :class:`drgn.Program`, so it's @@ -56,30 +238,33 @@ program "memory": .. code-block:: python3 - import drgn import os import sys + + import drgn from drgn.cli import run_interactive def btrfs_debugger(dev): - file = open(dev, 'rb') + file = open(dev, "rb") size = file.seek(0, 2) def read_file(address, count, offset, physical): file.seek(offset) return file.read(count) - platform = drgn.Platform(drgn.Architecture.UNKNOWN, - drgn.PlatformFlags.IS_LITTLE_ENDIAN) + platform = drgn.Platform( + drgn.Architecture.UNKNOWN, drgn.PlatformFlags.IS_LITTLE_ENDIAN + ) prog = drgn.Program(platform) prog.add_memory_segment(0, size, read_file) - prog.load_debug_info([f'/lib/modules/{os.uname().release}/kernel/fs/btrfs/btrfs.ko']) + module = prog.extra_module("btrfs", create=True) + module.try_file(f"/lib/modules/{os.uname().release}/kernel/fs/btrfs/btrfs.ko") return prog - prog = btrfs_debugger(sys.argv[1] if len(sys.argv) >= 2 else '/dev/sda') - print(drgn.Object(prog, 'struct btrfs_super_block', address=65536)) + prog = btrfs_debugger(sys.argv[1] if len(sys.argv) >= 2 else "/dev/sda") + print(drgn.Object(prog, "struct btrfs_super_block", address=65536)) run_interactive(prog, banner_func=lambda _: "BTRFS debugger") :meth:`drgn.Program.register_type_finder()` and @@ -91,12 +276,37 @@ Environment Variables Some of drgn's behavior can be modified through environment variables: -``DRGN_MAX_DEBUG_INFO_ERRORS`` - The maximum number of individual errors to report in a - :exc:`drgn.MissingDebugInfoError`. Any additional errors are truncated. The - default is 5; -1 is unlimited. +.. envvar:: DRGN_DISABLE_PLUGINS + + Comma-separated list of plugins to disable. Each item is a glob pattern + matching plugin entry point names. + +.. envvar:: DRGN_PLUGINS + + Comma-separated list of plugins to enable. Each item is either a plugin + entry point name, a file path, or a module name. Empty items are ignored. + + An item not containing ``=`` is interpreted as a plugin entry point name. + This takes precedence over :envvar:`DRGN_DISABLE_PLUGINS`. + + An item containing ``=`` is interpreted as an extra plugin to load manually + instead of via an entry point. The string before ``=`` is the plugin name. + The string after ``=`` is the value. If the value contains a ``/``, it is + the file path of a Python module. Otherwise, it is a module name. + + So, ``DRGN_DISABLE_PLUGINS=* DRGN_PLUGINS=foo,bar=/hello/world.py,baz=my.module`` + results in three plugins being loaded: the entry point ``foo``, the file + ``/hello/world.py`` as ``bar``, and the module ``my.module`` as ``baz``. + All other plugins are disabled. + +.. envvar:: DRGN_MAX_DEBUG_INFO_ERRORS + + The maximum number of warnings about missing debugging information to log + on CLI startup or from :meth:`drgn.Program.load_debug_info()`. Any + additional errors are truncated. The default is 5; -1 is unlimited. + +.. envvar:: DRGN_PREFER_ORC_UNWINDER -``DRGN_PREFER_ORC_UNWINDER`` Whether to prefer using `ORC `_ over DWARF for stack unwinding (0 or 1). The default is 0. Note that drgn will always @@ -104,23 +314,20 @@ Some of drgn's behavior can be modified through environment variables: vice versa. This environment variable is mainly intended for testing and may be ignored in the future. -``DRGN_USE_LIBDWFL_REPORT`` - Whether drgn should use libdwfl to find debugging information for core - dumps instead of its own implementation (0 or 1). The default is 0. This - environment variable is mainly intended as an escape hatch in case of bugs - in drgn's implementation and will be ignored in the future. +.. envvar:: DRGN_USE_LIBKDUMPFILE_FOR_ELF -``DRGN_USE_LIBKDUMPFILE_FOR_ELF`` Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. -``DRGN_USE_SYS_MODULE`` +.. envvar:: DRGN_USE_SYS_MODULE + Whether drgn should use ``/sys/module`` to find information about loaded kernel modules for the running kernel instead of getting them from the core dump (0 or 1). The default is 1. This environment variable is mainly intended for testing and may be ignored in the future. -``PYTHON_BASIC_REPL`` +.. envvar:: PYTHON_BASIC_REPL + If non-empty, don't try to use the `new interactive REPL `_ added in Python 3.13. drgn makes use of the new REPL through internal diff --git a/docs/api_reference.rst b/docs/api_reference.rst index b3c4d7b22..ac9feee60 100644 --- a/docs/api_reference.rst +++ b/docs/api_reference.rst @@ -7,10 +7,13 @@ Programs -------- .. drgndoc:: Program - :exclude: (void|int|bool|float|struct|union|class|enum|typedef|pointer|array|function)_type + :exclude: (void|int|bool|float|struct|union|class|enum|typedef|pointer|array|function)_type|(main|shared_library|vdso|relocatable|linux_kernel_loadable|extra)_module .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags +.. drgndoc:: DebugInfoOptions +.. drgndoc:: KmodSearchMethod + .. drgndoc:: Thread .. _api-filenames: @@ -97,6 +100,7 @@ Objects ------- .. drgndoc:: Object +.. drgndoc:: AbsenceReason .. drgndoc:: NULL .. drgndoc:: cast .. drgndoc:: implicit_convert @@ -159,6 +163,43 @@ can be used just like types obtained from :meth:`Program.type()`. .. drgndoc:: Program.array_type .. drgndoc:: Program.function_type +Modules +------- + +.. drgndoc:: Module +.. drgndoc:: MainModule +.. drgndoc:: SharedLibraryModule +.. drgndoc:: VdsoModule +.. drgndoc:: RelocatableModule +.. drgndoc:: ExtraModule +.. drgndoc:: ModuleFileStatus +.. drgndoc:: WantedSupplementaryFile +.. drgndoc:: SupplementaryFileKind + +.. _api-module-constructors: + +Module Lookups/Constructors +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For each module type, there is a corresponding method to create a module of +that type or find one that was previously created:: + + >>> prog.extra_module("foo", 1234) + Traceback (most recent call last): + ... + LookupError: module not found + >>> prog.extra_module("foo", 1234, create=True) + prog.extra_module(name='foo', id=0x4d2) + >>> prog.extra_module("foo", 1234) + prog.extra_module(name='foo', id=0x4d2) + +.. drgndoc:: Program.main_module +.. drgndoc:: Program.shared_library_module +.. drgndoc:: Program.vdso_module +.. drgndoc:: Program.relocatable_module +.. drgndoc:: Program.linux_kernel_loadable_module +.. drgndoc:: Program.extra_module + Miscellaneous ------------- @@ -180,12 +221,47 @@ CLI .. drgndoc:: cli +.. _plugins: + +Plugins +------- + +drgn can be extended with plugins. A drgn plugin is a Python module defining +one or more hook functions that are called at specific times. + +By default, drgn loads installed modules registered as :ref:`entry points +` for the ``drgn.plugins`` group. The :envvar:`DRGN_PLUGINS` +and :envvar:`DRGN_DISABLE_PLUGINS` environment variables can be used to +configure this. + +The following hooks are currently defined: + +.. py:currentmodule:: None + +.. function:: drgn_prog_set(prog: drgn.Program) -> None + + Called after the program target has been set (e.g., one of + :meth:`drgn.Program.set_core_dump()`, :meth:`drgn.Program.set_kernel()`, or + :meth:`drgn.Program.set_pid()` has been called). + +A ``drgn_priority`` integer attribute can be assigned to a hook function to +define when it is called relative to other plugins. Hook functions with lower +``drgn_priority`` values are called earlier. Functions with equal +``drgn_priority`` values are called in an unspecified order. The default if not +defined is 50. + +See :ref:`writing-plugins` for an example. + Logging ------- drgn logs using the standard :mod:`logging` module to a logger named ``"drgn"``. +drgn will also display progress bars on standard error if standard error is a +terminal, the ``"drgn"`` logger has a :class:`~logging.StreamHandler` for +``stderr``, and its log level is less than or equal to ``WARNING``. + Thread Safety ------------- diff --git a/docs/conf.py b/docs/conf.py index 1a270c4a0..703a20477 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,16 +1,24 @@ import os.path import sys -sys.path.append(os.path.abspath("..")) -sys.path.append(os.path.abspath("exts")) +sys.path.insert(0, os.path.abspath("..")) +sys.path.insert(0, os.path.abspath("exts")) master_doc = "index" +man_pages = [ + ("man/drgn", "drgn", "programmable debugger", "", "1"), +] + +option_emphasise_placeholders = True + extensions = [ + "details", "drgndoc.ext", "linuxsrc", "setuptools_config", "sphinx.ext.extlinks", + "sphinx.ext.graphviz", "sphinx.ext.intersphinx", ] @@ -18,6 +26,10 @@ drgndoc_substitutions = [ (r"^_drgn\b", "drgn"), ] +drgndoc_submodule_sort = [ + # Sort experimental helpers after everything else. + (r"drgn\.helpers", [(r"experimental", 1)]), +] extlinks = { "contrib": ( diff --git a/docs/exts/details.py b/docs/exts/details.py new file mode 100644 index 000000000..4d179ed52 --- /dev/null +++ b/docs/exts/details.py @@ -0,0 +1,85 @@ +# Copyright 2017-2019 by Takeshi KOMIYA +# SPDX-License-Identifier: Apache-2.0 +# From https://pypi.org/project/sphinxcontrib-details-directive/, patched to +# use the proper name for the :class: option. + +from docutils import nodes +from docutils.parsers.rst import Directive, directives +from sphinx.transforms.post_transforms import SphinxPostTransform +from sphinx.util.nodes import NodeMatcher + + +class details(nodes.Element, nodes.General): + pass + + +class summary(nodes.TextElement, nodes.General): + pass + + +def visit_details(self, node): + if node.get('opened'): + self.body.append(self.starttag(node, 'details', open="open")) + else: + self.body.append(self.starttag(node, 'details')) + + +def depart_details(self, node): + self.body.append('') + + +def visit_summary(self, node): + self.body.append(self.starttag(node, 'summary')) + + +def depart_summary(self, node): + self.body.append('') + + +class DetailsDirective(Directive): + required_arguments = 1 + final_argument_whitespace = True + has_content = True + option_spec = { + 'class': directives.class_option, + 'name': directives.unchanged, + 'open': directives.flag, + } + + def run(self): + admonition = nodes.container('', + classes=self.options.get('class', []), + opened='open' in self.options, + type='details') + textnodes, messages = self.state.inline_text(self.arguments[0], + self.lineno) + admonition += nodes.paragraph(self.arguments[0], '', *textnodes) + admonition += messages + self.state.nested_parse(self.content, self.content_offset, admonition) + self.add_name(admonition) + return [admonition] + + +class DetailsTransform(SphinxPostTransform): + default_priority = 200 + builders = ('html',) + + def run(self): + matcher = NodeMatcher(nodes.container, type='details') + for node in self.document.traverse(matcher): + newnode = details(**node.attributes) + newnode += summary('', '', *node[0]) + newnode.extend(node[1:]) + node.replace_self(newnode) + + +def setup(app): + app.add_node(details, html=(visit_details, depart_details)) + app.add_node(summary, html=(visit_summary, depart_summary)) + app.add_directive('details', DetailsDirective) + app.add_post_transform(DetailsTransform) + + return { + 'parallel_read_safe': True, + 'parallel_write_safe': True, + } diff --git a/docs/exts/drgndoc/docstrings.py b/docs/exts/drgndoc/docstrings.py index 0bc7ac622..aedf003d7 100644 --- a/docs/exts/drgndoc/docstrings.py +++ b/docs/exts/drgndoc/docstrings.py @@ -73,18 +73,7 @@ def escape_string(s: str) -> str: output_file = sys.stdout if args.header: - output_file.write( - """\ -/* - * Generated by drgndoc.docstrings -H. - * - * Before Python 3.7, various docstring fields were defined as char * (see - * https://bugs.python.org/issue28761). We still want the strings to be - * read-only, so just cast away the const. - */ - -""" - ) + output_file.write("/* Generated by drgndoc.docstrings -H. */\n\n") else: output_file.write("/* Generated by drgndoc.docstrings. */\n\n") @@ -111,8 +100,6 @@ def aux(resolved: ResolvedNode[Node], name: str) -> None: else: output_file.write(' ""') output_file.write(";\n") - if args.header: - output_file.write(f"#define {var_name} (char *){var_name}\n") for attr in resolved.attrs(): if isinstance(node, Class) and attr.name == "__init__": continue diff --git a/docs/exts/drgndoc/ext.py b/docs/exts/drgndoc/ext.py index eab7d9a6e..b72df66ff 100644 --- a/docs/exts/drgndoc/ext.py +++ b/docs/exts/drgndoc/ext.py @@ -107,6 +107,17 @@ def missing_reference( reftarget = node.get("reftarget") if reftarget and node.get("reftype") == "class": resolved = env.drgndoc_namespace.resolve_global_name(reftarget) + if not isinstance(resolved, ResolvedNode): + py_module = node.get("py:module", "") + if py_module: + resolved = env.drgndoc_namespace.resolve_global_name( + dot_join(py_module, reftarget) + ) + classes = node.get("classes") + if not isinstance(resolved, ResolvedNode) and classes: + resolved = env.drgndoc_namespace.resolve_global_name( + dot_join(py_module, *classes, reftarget) + ) if ( isinstance(resolved, ResolvedNode) and isinstance(resolved.node, Variable) @@ -255,12 +266,14 @@ def _run_module( except KeyError: have_old_py_module = False + module_name = dot_join(top_name, attr_name) + sourcename = node.path or "" if sourcename: self.env.note_dependency(sourcename) contents = docutils.statemachine.StringList( [ - ".. py:module:: " + dot_join(top_name, attr_name), + ".. py:module:: " + module_name, "", *node.docstring.splitlines(), ], @@ -277,7 +290,32 @@ def _run_module( section = child break + attrs = [] + submodules = [] for attr in resolved.attrs(): + if isinstance(attr.node, Module): + submodules.append(attr) + else: + attrs.append(attr) + + # Submodules are initially sorted by name (guaranteed by + # parse_package()). Apply any sorting configuration. + for module_pattern, sort_key_patterns in self.config.drgndoc_submodule_sort: + if re.fullmatch(module_pattern, module_name): + # list.sort() is stable, so this preserves the previous order + # for submodules with the same key. + def sort_key(attr: ResolvedNode[Node]) -> Any: + for pattern, key in sort_key_patterns: + if re.fullmatch(pattern, attr.name): + return key + return 0 + + submodules.sort(key=sort_key) + + # Normal attributes go before submodules. + attrs.extend(submodules) + + for attr in attrs: self._run( top_name, dot_join(attr_name, attr.name), attr.name, attr, section ) @@ -294,5 +332,14 @@ def setup(app: sphinx.application.Sphinx) -> Dict[str, Any]: app.add_config_value("drgndoc_paths", [], "env") # List of (regex pattern, substitution) to apply to resolved names. app.add_config_value("drgndoc_substitutions", [], "env") + # List of (parent regex pattern, list of (submodule regex pattern, key)) + # controlling sort order of submodules. + # + # Submodules are initially sorted by name. For each parent regex pattern + # matching the fully qualified name of the parent module, the list of + # submodules is sorted. The sort key is given by the first submodule regex + # pattern matching the relative name of the subvolume, or 0 if no patterns + # match. + app.add_config_value("drgndoc_submodule_sort", [], "env") app.add_directive("drgndoc", DrgnDocDirective) return {"env_version": 1, "parallel_read_safe": True, "parallel_write_safe": True} diff --git a/docs/exts/drgndoc/format.py b/docs/exts/drgndoc/format.py index 9d77e14ea..f66c9cc98 100644 --- a/docs/exts/drgndoc/format.py +++ b/docs/exts/drgndoc/format.py @@ -277,12 +277,11 @@ def _format_function_signature( signature = ["("] need_comma = False - need_blank_line = bool(lines) def visit_arg( arg: ast.arg, default: Optional[ast.expr] = None, name: Optional[str] = None ) -> None: - nonlocal need_comma, need_blank_line + nonlocal need_comma if need_comma: signature.append(", ") signature.append(arg.arg if name is None else name) diff --git a/docs/exts/drgndoc/namespace.py b/docs/exts/drgndoc/namespace.py index 6bdff6771..464c470a9 100644 --- a/docs/exts/drgndoc/namespace.py +++ b/docs/exts/drgndoc/namespace.py @@ -1,6 +1,7 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import dataclasses import itertools from typing import Generic, Iterator, List, Mapping, Sequence, TypeVar, Union @@ -18,24 +19,18 @@ NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True) +@dataclasses.dataclass class BoundNode(Generic[NodeT_co]): - def __init__(self, name: str, node: NodeT_co) -> None: - self.name = name - self.node = node + name: str + node: NodeT_co +@dataclasses.dataclass class ResolvedNode(Generic[NodeT_co]): - def __init__( - self, - modules: Sequence[BoundNode[Module]], - classes: Sequence[BoundNode[Class]], - name: str, - node: NodeT_co, - ) -> None: - self.modules = modules - self.classes = classes - self.name = name - self.node = node + modules: Sequence[BoundNode[Module]] + classes: Sequence[BoundNode[Class]] + name: str + node: NodeT_co def qualified_name(self) -> str: return ".".join( diff --git a/docs/exts/drgndoc/parse.py b/docs/exts/drgndoc/parse.py index 6b541536a..b87041272 100644 --- a/docs/exts/drgndoc/parse.py +++ b/docs/exts/drgndoc/parse.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: LGPL-2.1-or-later import ast +import dataclasses import inspect import operator import os.path @@ -59,54 +60,26 @@ def visit_AnnAssign(self, node: ast.AnnAssign) -> ast.AnnAssign: node.annotation = self._visit_annotation(node.annotation) return node - # Replace the old constant nodes produced by ast.parse() before Python 3.8 - # with Constant. - def visit_Num(self, node: Any) -> ast.Constant: - return ast.copy_location(ast.Constant(node.n), node) - - def visit_Str(self, node: Any) -> ast.Constant: - return ast.copy_location(ast.Constant(node.s), node) - - def visit_Bytes(self, node: Any) -> ast.Constant: - return ast.copy_location(ast.Constant(node.s), node) - - def visit_Ellipsis(self, node: Any) -> ast.Constant: - return ast.copy_location(ast.Constant(...), node) - - def visit_NameConstant(self, node: Any) -> ast.Constant: - return ast.copy_location(ast.Constant(node.value), node) - # Get rid of Index nodes, which are deprecated as of Python 3.9. def visit_Index(self, node: Any) -> Any: return self.visit(node.value) -# Once we don't care about Python 3.6, we can replace all of this boilerplate -# with dataclasses. - - +@dataclasses.dataclass class Module: - def __init__( - self, path: Optional[str], docstring: Optional[str], attrs: Mapping[str, "Node"] - ) -> None: - self.path = path - self.docstring = docstring - self.attrs = attrs + path: Optional[str] + docstring: Optional[str] + attrs: Mapping[str, "Node"] def has_docstring(self) -> bool: return self.docstring is not None +@dataclasses.dataclass class Class: - def __init__( - self, - bases: Sequence[ast.expr], - docstring: Optional[str], - attrs: Mapping[str, "NonModuleNode"], - ) -> None: - self.bases = bases - self.docstring = docstring - self.attrs = attrs + bases: Sequence[ast.expr] + docstring: Optional[str] + attrs: Mapping[str, "NonModuleNode"] def has_docstring(self) -> bool: if self.docstring is not None: @@ -115,18 +88,12 @@ def has_docstring(self) -> bool: return isinstance(init, Function) and init.has_docstring() +@dataclasses.dataclass class FunctionSignature: - def __init__( - self, - args: ast.arguments, - returns: Optional[ast.expr], - decorator_list: Sequence[ast.expr], - docstring: Optional[str], - ) -> None: - self.args = args - self.returns = returns - self.decorator_list = decorator_list - self.docstring = docstring + args: ast.arguments + returns: Optional[ast.expr] + decorator_list: Sequence[ast.expr] + docstring: Optional[str] def has_decorator(self, name: str) -> bool: return any( @@ -135,43 +102,39 @@ def has_decorator(self, name: str) -> bool: ) +@dataclasses.dataclass class Function: - def __init__(self, async_: bool, signatures: Sequence[FunctionSignature]) -> None: - self.async_ = async_ - self.signatures = signatures + async_: bool + signatures: Sequence[FunctionSignature] def has_docstring(self) -> bool: return any(signature.docstring is not None for signature in self.signatures) +@dataclasses.dataclass class Variable: - def __init__( - self, annotation: Optional[ast.expr], docstring: Optional[str] - ) -> None: - self.annotation = annotation - self.docstring = docstring + annotation: Optional[ast.expr] + docstring: Optional[str] def has_docstring(self) -> bool: return self.docstring is not None +@dataclasses.dataclass class Import: - def __init__(self, module: str, aliased: bool) -> None: - self.module = module - self.aliased = aliased + module: str + aliased: bool def has_docstring(self) -> bool: return False +@dataclasses.dataclass class ImportFrom: - def __init__( - self, name: str, module: Optional[str], level: int, aliased: bool - ) -> None: - self.name = name - self.module = module - self.level = level - self.aliased = aliased + name: str + module: Optional[str] + level: int + aliased: bool def has_docstring(self) -> bool: return False diff --git a/docs/getting_debugging_symbols.rst b/docs/getting_debugging_symbols.rst index fc0f61f9c..841c67565 100644 --- a/docs/getting_debugging_symbols.rst +++ b/docs/getting_debugging_symbols.rst @@ -3,45 +3,324 @@ Getting Debugging Symbols .. highlight:: console -Most Linux distributions don't install debugging symbols for installed packages -by default. This page documents how to install debugging symbols on common -distributions. If drgn prints an error like:: +drgn needs debugging symbols in order to interpret the target program. If drgn +prints a warning like:: - $ sudo drgn - could not get debugging information for: - kernel (could not find vmlinux for 5.14.14-200.fc34.x86_64) + $ drgn + warning: missing debugging symbols for kernel 6.13.8-200.fc41.x86_64 + critical: missing some debugging symbols; see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html ... -Then you need to install debugging symbols. +then you need to get debugging symbols. The method depends on whether the +binary that is missing debugging symbols was built manually or is provided by +your Linux distribution. + +Note that you only need debugging symbols for the binaries you're actually +debugging. If the warnings are for modules, shared libraries, etc. that you +don't care about, feel free to ignore them. + +Since drgn 0.0.31, you can run drgn with ``--log-level debug`` to get logs of +where drgn looked for debugging symbols. + +Building With Debugging Symbols +------------------------------- + +If the binary that drgn warns about is one that you built yourself, then you +need to rebuild it with debugging symbols. Here is a quick overview of how to +do that in different build systems: + +.. list-table:: + :header-rows: 1 + + * - Build System + - Instructions + * - Linux Kernel + - Since Linux 5.18: In ``menuconfig``, set ``Kernel hacking -> + Compile-time checks and compiler options -> Debug information`` to + ``Rely on the toolchain's implicit default DWARF version``. Or, add + ``CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y`` to :file:`.config`. + + Before Linux 5.18: In ``menuconfig``, enable ``Kernel hacking -> + Compile-time checks and compiler options -> Compile the kernel with + debug info``. Or, add ``CONFIG_DEBUG_INFO=y`` to :file:`.config`. + * - `Meson `_ + - Run ``meson setup --buildtype=debugoptimized $builddir`` or + ``meson setup --buildtype=debug $builddir``. + * - `CMake `_ + - Run ``cmake --build $builddir -DCMAKE_BUILD_TYPE=RelWithDebInfo`` or + ``cmake --build $builddir -DCMAKE_BUILD_TYPE=Debug``. + + Or, add ``set(CMAKE_BUILD_TYPE RelWithDebInfo)`` or + ``set(CMAKE_BUILD_TYPE Debug)`` to :file:`CMakeLists.txt`. + * - Autotools + - Depends on the project, but usually ``CFLAGS="-Og -g" ./configure``. + * - Make + - Depends on the project, but usually ``CFLAGS="-Og -g" make``. + * - None (GCC or Clang directly) + - Pass ``-Og -g`` options. + +Consult your build system's documentation for details. + +Debugging Symbols for Linux Distribution Packages +------------------------------------------------- + +Most Linux distributions don't install debugging symbols for installed packages +by default. If the binary that drgn warns about is part of your Linux +distribution, then you have two options: manual installation through the +package manager or automatic downloads using debuginfod. This section documents +how to do both on common Linux distributions, including flow charts for +recommended practices. + +.. contents:: Contents + :depth: 1 + :local: + :backlinks: none + +Debuginfod +^^^^^^^^^^ + +`debuginfod `_ is a service +providing debugging symbols via an HTTP API. Many Linux distributions run a +debuginfod server for their packages, and some automatically enable it. + +Debugging symbols can be downloaded via debuginfod automatically, so it +typically provides the best user experience. However, there are a few caveats, +especially when debugging the Linux kernel: + +1. Before drgn 0.0.31, drgn did not support using debuginfod for the Linux kernel. +2. Except on Fedora's debuginfod server, downloading debugging symbols for the + Linux kernel is extremely slow due to `technical limitations that have been + fixed upstream + `_ + but not yet deployed on other distributions. As a result, since drgn 0.0.31, + when debugging the Linux kernel, drgn only uses debuginfod on Fedora. +3. Before drgn 0.0.31, while drgn is downloading from debuginfod, it can't be + interrupted with :kbd:`Ctrl-C`, and it doesn't print a progress bar. + +.. _debuginfod-support: + +Since drgn 0.0.31, drgn includes whether it was built with debuginfod support +in its version string (look for "with debuginfod"):: + + $ drgn --version + drgn 0.0.31 (using Python 3.13.2, elfutils 0.192, with debuginfod (dlopen), with libkdumpfile) + +If you built drgn from source and the version string includes "without +debuginfod", make sure you installed the :ref:`necessary dependencies +` and rebuild drgn. Before drgn 0.0.31, drgn doesn't +need to be built specifically with debuginfod support. Fedora ------- +^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging\nsymbols on Fedora" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + use_dnf [ + label = "Manually install with\ndnf debuginfo-install" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> use_debuginfod [ label = ">= 0.0.31" ] + drgn_version -> use_dnf [ label = "< 0.0.31" ] + } + +Debuginfod +"""""""""" + +Fedora automatically enables debuginfod by default. Since drgn 0.0.31, drgn can +even use debuginfod for Linux kernel debugging symbols. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo dnf install elfutils-debuginfod-client + $ source /etc/profile.d/debuginfod.sh + +Also see the `Fedora debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +Debugging symbols can also be installed manually on Fedora with ``sudo dnf +debuginfo-install $package``. -Fedora makes it very easy to install debugging symbols with the `DNF -debuginfo-install plugin -`_, -which is installed by default. Simply run ``sudo dnf debuginfo-install -$package``:: +To install symbols for the running kernel:: - $ sudo dnf debuginfo-install python3 + $ sudo dnf debuginfo-install kernel-$(uname -r) To find out what package owns a binary, use ``rpm -qf``:: - $ rpm -qf $(which python3) - python3-3.9.7-1.fc34.x86_64 + $ rpm -qf "$(command -v python3)" + python3-3.13.2-1.fc41.x86_64 + $ sudo dnf debuginfo-install python3 + +Also see the `Fedora documentation +`_. + +CentOS Stream +^^^^^^^^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging symbols\non CentOS Stream" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + use_dnf [ + label = "Manually install with\ndnf debuginfo-install" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> use_dnf [ label = "< 0.0.31" ] + kernel -> use_dnf [ label = "Yes" ] + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +CentOS Stream automatically enables debuginfod by default since CentOS Stream +9. drgn will not use it for Linux kernel debugging symbols by default. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo dnf install elfutils-debuginfod-client + $ source /etc/profile.d/debuginfod.sh + +Manual Installation +""""""""""""""""""" + +Debugging symbols can be installed manually on CentOS Stream with ``sudo dnf +debuginfo-install $package``. To install symbols for the running kernel:: $ sudo dnf debuginfo-install kernel-$(uname -r) -Also see the `Fedora documentation -`_. - -Debian ------- +To find out what package owns a binary, use ``rpm -qf``:: -Debian requires you to manually add the debugging symbol repositories:: + $ rpm -qf "$(command -v python3)" + python3-3.12.9-1.el10.x86_64 + $ sudo dnf debuginfo-install python3 +Debian +^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging\nsymbols on Debian" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + enable_debug_repos [ + label = "Enable debug\nrepositories" + shape = rectangle + style = filled + fillcolor = bisque + ] + use_apt [ + label = "Manually install\nwith apt" + style = filled + fillcolor = palegreen + ] + enable_debuginfod [ + label = "Enable debuginfod" + shape = rectangle + style = filled + fillcolor = bisque + ] + use_debuginfod [ + label = "Use debuginfod" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> enable_debug_repos [ label = "< 0.0.31" ] + kernel -> enable_debug_repos [ label = "Yes" ] + enable_debug_repos -> use_apt + kernel -> enable_debuginfod [ label = "No" ] + enable_debuginfod -> use_debuginfod + } + +Debuginfod +"""""""""" + +On Debian, debuginfod must be enabled manually:: + + $ sudo apt install libdebuginfod-common + $ sudo ln -s /usr/share/libdebuginfod-common/debuginfod.sh /usr/share/libdebuginfod-common/debuginfod.csh /etc/profile.d + $ source /etc/profile.d/debuginfod.sh + +drgn will not use it for Linux kernel debugging symbols by default. + +Also see the `Debian debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +On Debian, the debugging symbol repositories must be added manually:: + + $ sudo apt install lsb-release $ sudo tee /etc/apt/sources.list.d/debug.list << EOF deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-debug main deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-proposed-updates-debug main @@ -49,38 +328,99 @@ Debian requires you to manually add the debugging symbol repositories:: $ sudo apt update Then, debugging symbol packages can be installed with ``sudo apt install``. -Some debugging symbol packages are named with a ``-dbg`` suffix:: - $ sudo apt install python3-dbg +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbg -And some are named with a ``-dbgsym`` suffix:: +Some debugging symbol packages are named with a ``-dbg`` suffix and some are +named with a ``-dbgsym`` suffix:: + $ sudo apt install python3-dbg $ sudo apt install coreutils-dbgsym You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` package to find the correct name:: $ sudo apt install debian-goodies - $ find-dbgsym-packages $(which python3) - libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym - $ find-dbgsym-packages $(which cat) + $ find-dbgsym-packages $(command -v python3) + libc6-dbg libexpat1-dbgsym python3.11-dbg zlib1g-dbgsym + $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg -To install symbols for the running kernel:: - - $ sudo apt install linux-image-$(uname -r)-dbg - Also see the `Debian documentation `_. Ubuntu ------- - -On Ubuntu, you must install the debugging symbol archive signing key and -manually add the debugging symbol repositories:: - - $ sudo apt update - $ sudo apt install ubuntu-dbgsym-keyring +^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging\nsymbols on Ubuntu" + style = filled + fillcolor = lightpink + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + enable_debug_repos [ + label = "Enable debug\nrepositories" + shape = rectangle + style = filled + fillcolor = bisque + ] + use_apt [ + label = "Manually install\nwith apt" + style = filled + fillcolor = palegreen + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + + start -> drgn_version + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> enable_debug_repos [ label = "< 0.0.31" ] + kernel -> enable_debug_repos [ label = "Yes" ] + enable_debug_repos -> use_apt + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +Ubuntu automatically enables debuginfod by default since Ubuntu 22.04 (Jammy +Jellyfish). drgn will not use it for Linux kernel debugging symbols by default. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo apt install libdebuginfod-common + $ source /etc/profile.d/debuginfod.sh + +Also see the `Ubuntu debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +On Ubuntu, the debugging symbol archive signing key must be installed and the +debugging symbol repositories must be added manually:: + + $ sudo apt install lsb-release ubuntu-dbgsym-keyring $ sudo tee /etc/apt/sources.list.d/debug.list << EOF deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse @@ -88,8 +428,14 @@ manually add the debugging symbol repositories:: EOF $ sudo apt update -Like Debian, some debugging symbol packages are named with a ``-dbg`` suffix -and some are named with a ``-dbgsym`` suffix:: +Then, debugging symbol packages can be installed with ``sudo apt install``. + +To install symbols for the running kernel:: + + $ sudo apt install linux-image-$(uname -r)-dbgsym + +Some debugging symbol packages are named with a ``-dbg`` suffix and some are +named with a ``-dbgsym`` suffix:: $ sudo apt install python3-dbg $ sudo apt install coreutils-dbgsym @@ -98,31 +444,152 @@ You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` package to find the correct name:: $ sudo apt install debian-goodies - $ find-dbgsym-packages $(which python3) - libc6-dbg libexpat1-dbgsym python3.9-dbg zlib1g-dbgsym - $ find-dbgsym-packages $(which cat) + $ find-dbgsym-packages $(command -v python3) + libc6-dbg libexpat1-dbgsym python3.12-dbg zlib1g-dbgsym + $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg -To install symbols for the running kernel:: - - $ sudo apt install linux-image-$(uname -r)-dbgsym - Also see the `Ubuntu documentation -`_. +`_. Arch Linux ----------- +^^^^^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging symbols\non Arch Linux" + style = filled + fillcolor = lightpink + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + not_available [ + label = "Debugging symbols\nare not available" + style = filled + fillcolor = lightpink + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + + start -> kernel + kernel -> not_available [ label = "Yes" ] + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +Arch Linux automatically enables debuginfod by default. However, debugging +symbols are not available for the Linux kernel. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo pacman -S --needed libelf + $ source /etc/profile.d/debuginfod.sh + +Also see the `Arch Linux debuginfod documentation +`_. + +Manual Installation +""""""""""""""""""" + +Arch Linux does not provide debugging symbol packages. + +openSUSE +^^^^^^^^ + +.. graphviz:: + + digraph { + start [ + label = "Need debugging symbols\non openSUSE" + style = filled + fillcolor = lightpink + ] + distribution [ + label = "Which\ndistribution?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + drgn_version [ + label = "What version\nof drgn?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + kernel [ + label = "Are you\ndebugging the\nLinux kernel?" + shape = diamond + style = filled + fillcolor = khaki1 + ] + use_debuginfod [ + label = "Use debuginfod\n(automatic)" + style = filled + fillcolor = palegreen + ] + use_zypper [ + label = "Manually install\nwith zypper" + style = filled + fillcolor = palegreen + ] + + start -> distribution + distribution -> drgn_version [ label = "Tumbleweed" ] + distribution -> use_zypper [ label = "Leap" ] + drgn_version -> kernel [ label = ">= 0.0.31" ] + drgn_version -> use_zypper [ label = "< 0.0.31" ] + kernel -> use_zypper [ label = "Yes" ] + kernel -> use_debuginfod [ label = "No" ] + } + +Debuginfod +"""""""""" + +openSUSE Tumbleweed automatically enables debuginfod by default. drgn will not +use it for Linux kernel debugging symbols by default. + +If debuginfod is not working, :ref:`make sure ` your build +of drgn supports it and try running:: + + $ sudo zypper install debuginfod-client + $ source /etc/profile.d/debuginfod.sh + +openSUSE Leap does not support debuginfod. + +Manual Installation +""""""""""""""""""" + +Debugging symbols can be installed manually on openSUSE with:: + + $ sudo zypper --plus-content debug install "${package}-debuginfo" + +To install symbols for the running kernel:: + + $ zypper --plus-content debug install "$(rpm --qf '%{NAME}-debuginfo-%{VERSION}-%{RELEASE}.%{ARCH}' -qf /boot/vmlinuz-"$(uname -r)")" + +To find out what package owns a binary, use ``rpm -qf``:: -Arch Linux unfortunately does not make debugging symbols available. Packages -must be manually rebuilt with debugging symbols enabled. See the `ArchWiki -`_ and the `feature -request `_. + $ rpm -qf "$(command -v python3)" + python313-base-3.13.2-3.1.x86_64 + $ sudo zypper --plus-content debug install python313-base-debuginfo Oracle Linux ------------- +^^^^^^^^^^^^ -Oracle Linux provides documentation on using installing the necessary debugging -symbols. See the documentation for `Oracle Linux 9 -`_ +Oracle Linux provides documentation on installing debugging symbols for the +Linux kernel. See the documentation for `Oracle Linux 9 +`_ and `Oracle Linux 8 -`_. +`_. diff --git a/docs/index.rst b/docs/index.rst index 6649fc36c..e67e849f0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,14 +26,27 @@ Table of Contents ----------------- .. toctree:: + :caption: Using drgn :maxdepth: 3 installation user_guide advanced_usage + getting_debugging_symbols + +.. toctree:: + :caption: Tutorials + :maxdepth: 3 + + tutorials + case_studies + +.. toctree:: + :caption: Reference + :maxdepth: 3 + api_reference helpers + Man Page support_matrix - case_studies - getting_debugging_symbols release_highlights diff --git a/docs/installation.rst b/docs/installation.rst index aaae8be44..467d94e35 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -8,7 +8,7 @@ Dependencies drgn depends on: -- `Python `_ 3.6 or newer +- `Python `_ 3.8 or newer - `elfutils `_ 0.165 or newer It optionally depends on: @@ -16,6 +16,9 @@ It optionally depends on: - `libkdumpfile `_ for `makedumpfile `_ compressed kernel core dump format support +- `liblzma `_ for `MiniDebuginfo + `_ + support The build requires: diff --git a/docs/man/drgn.rst b/docs/man/drgn.rst new file mode 100644 index 000000000..793ec745e --- /dev/null +++ b/docs/man/drgn.rst @@ -0,0 +1,207 @@ +drgn +==== + +Synopsis +-------- + +| **drgn** [*OPTION*...] +| **drgn** [*OPTION*...] *SCRIPT* [*ARGUMENT*...] +| **drgn** [*OPTION*...] **-e** *CODE* [*ARGUMENT*...] + +Description +----------- + +:command:`drgn` (pronounced "dragon") is a debugger with an emphasis on +programmability. It provides APIs for using the types, variables, and stack +traces in a program or core dump from Python, allowing for easy, expressive +scripting and more complex debugging. + +Full documentation is available online at https://drgn.readthedocs.io/. + +For in-program documentation, try ``help(drgn)``. + +Options +------- + +.. program:: drgn + +If no positional arguments are given (and :option:`-e` is not given), then drgn +runs in *interactive mode*: commands are read from the terminal. Relevant +helpers are automatically imported. An empty string is prepended to +:py:data:`sys.path`. + +If positional arguments are given (and :option:`-e` is not given), then drgn +runs in *script mode*: *SCRIPT* is executed with the given *ARGUMENT*\ s. +Nothing is imported automatically. :py:data:`sys.argv[0] ` is set to +*SCRIPT* and the remaining arguments are added to :py:data:`sys.argv`. The +parent directory of *SCRIPT* is prepended to :py:data:`sys.path`. + +.. option:: -e {CODE} + + Evaluate the given code and exit. Relevant helpers are automatically + imported. :py:data:`sys.argv[0] ` is set to *-e* and the + remaining arguments are added to :py:data:`sys.argv`. An empty string is + prepended to :py:data:`sys.path`. + +Program Selection +^^^^^^^^^^^^^^^^^ + +One of these options may be given to specify what program to debug. + +.. option:: -k, --kernel + + Debug the running kernel. This is the default. + +.. option:: -c, --core {PATH} + + Debug the given core dump. + +.. option:: -p, --pid {PID} + + Debug the running process with the given process ID. + +Debugging Symbols +^^^^^^^^^^^^^^^^^ + +.. option:: -s, --symbols {PATH} + + Load debugging symbols from the given file. If the file does not correspond + to a loaded executable, library, or module, then a warning is printed and + it is ignored; see :option:`--extra-symbols` for an alternative. + + This option may be given more than once. + +.. option:: --main-symbols + + Only load debugging symbols for the main executable and those added with + :option:`-s` or :option:`--extra-symbols`. + +.. option:: --no-default-symbols + + Don't load any debugging symbols that were not explicitly added with + :option:`-s` or :option:`--extra-symbols`. + +.. option:: --extra-symbols {PATH} + + Load additional debugging symbols from the given file, which is assumed not + to correspond to a loaded executable, library, or module. + + This option may be given more than once. + +The following options correspond to :py:attr:`drgn.Program.debug_info_options` +in the Python API. + +.. option:: --try-symbols-by {METHOD[,METHOD...]} + + Enable loading debugging symbols using the given methods. *METHOD* may be: + + * The name of a debugging information finder (``standard``, ``debuginfod``, + or any added by plugins). + * ``module-name``: if the name of a module looks like a filesystem path, try the + file at that path. + * ``build-id``: search by build ID. + * ``debug-link``: search by debug link (e.g., ``.gnu_debuglink``). + * ``procfs``: try :file:`/proc/{pid}/exe` or :file:`/proc/{pid}/map_files`. + * ``embedded-vdso``: try vDSO data saved in a core dump. + * ``reuse``: try reusing a previously used file. + * ``supplementary``: try finding supplementary files (e.g., + ``.gnu_debugaltlink``). + * ``kmod=depmod``: search using *depmod* metadata. + * ``kmod=walk``: search by walking kernel directories. + * ``kmod=depmod-or-walk``: search using *depmod* metadata if it is + available or by walking kernel directories if *depmod* metadata does not + exist. + * ``kmod=depmod-and-walk``: search using *depmod* metadata if it is + available, then by walking kernel directories if *depmod* metadata does + not exist or does not contain the desired module. + + Multiple methods may be enabled by passing a comma-separated list. This + option may be given more than once, in which case the lists will be + combined. + +.. option:: --no-symbols-by {METHOD[,METHOD...]} + + Disable loading debugging symbols using the given methods. *METHOD* may be + the name of a debugging information finder, ``module-name``, ``build-id``, + ``debug-link``, ``procfs``, ``embedded-vdso``, ``reuse``, + ``supplementary``, or ``kmod``. + + Multiple methods may be disabled by passing a comma-separated list. This + option may be given more than once, in which case the lists will be + combined. + +Debugging Symbol Directories +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +These options correspond to :attr:`drgn.DebugInfoOptions.directories` in the +Python API. + +.. option:: --debug-directory {PATH} + + Search for debugging symbols in the given directory. + + By default, these directories are used for searches by build ID, debug + link, for supplementary files, and for kernel files. + + This option may be given more than once to search in multiple directories. + +.. option:: --no-default-debug-directories + + Don't search for debugging symbols in the standard directories or those + added by plugins. + +These options correspond to +:attr:`drgn.DebugInfoOptions.debug_link_directories` in the Python API. + +.. option:: --debug-link-directory {PATH} + + Search for debugging symbols by debug link in the given directory. + + ``$ORIGIN`` (or ``${ORIGIN}``) is replaced with the absolute path of the + directory containing the loaded file. + + This option may be given more than once to search in multiple directories. + +.. option:: --no-default-debug-link-directories + + Don't search for debugging symbols by debug link in the standard + directories or those added by plugins. + +These options correspond to :attr:`drgn.DebugInfoOptions.kernel_directories` in +the Python API. + +.. option:: --kernel-directory {PATH} + + Search for the kernel image and loadable kernel modules in the given + directory. + + This option may be given more than once to search in multiple directories. + +.. option:: --no-default-kernel-directories + + Don't search for the kernel image and loadable kernel modules in the + standard directories or those added by plugins. + +Logging +^^^^^^^ + +.. option:: --log-level {\{debug,info,warning,error,critical,none\}} + + Log messages of at least the given level to standard error. The default is + *warning*. + +.. option:: -q, --quiet + + Don't print any logs or download progress. This is equivalent to + :option:`--log-level none <--log-level>`. + +Generic Information +^^^^^^^^^^^^^^^^^^^ + +.. option:: -h, --help + + Show a help message and exit. + +.. option:: --version + + Show :command:`drgn`'s version information and exit. diff --git a/docs/release_highlights.rst b/docs/release_highlights.rst index d5142f08d..7f57e5d5a 100644 --- a/docs/release_highlights.rst +++ b/docs/release_highlights.rst @@ -6,6 +6,8 @@ from the full `release notes `_. .. toctree:: + release_highlights/0.0.32.rst + release_highlights/0.0.31.rst release_highlights/0.0.30.rst release_highlights/0.0.28.rst release_highlights/0.0.27.rst diff --git a/docs/release_highlights/0.0.31.rst b/docs/release_highlights/0.0.31.rst new file mode 100644 index 000000000..efd098548 --- /dev/null +++ b/docs/release_highlights/0.0.31.rst @@ -0,0 +1,184 @@ +0.0.31 (Released April 16th, 2025) +================================== + +These are some of the highlights of drgn 0.0.31. See the `GitHub release +`_ for the full release +notes, including more improvements and bug fixes. + +Fun fact: this is the largest release of drgn since the first ever release, +both in terms of number of commits and changed lines of code. + +.. highlight:: pycon +.. program:: drgn + +Module API +---------- + +One of the first things drgn does when it starts up is figure out what binaries +are loaded in your program: executables, shared libraries, Linux kernel image, +Linux kernel modules, etc. Until this release, this all happened internally to +drgn with no way to inspect or override it. This release adds new APIs to +address this. + +First, the :class:`drgn.Module` class and its subclasses were added to +represent binaries used by a program. + +:class:`drgn.Program` gained a couple of methods for querying what modules were +created for a program, :meth:`drgn.Program.modules()` and +:meth:`drgn.Program.module()`:: + + >>> for module in prog.modules(): + ... print(module) + ... + prog.main_module(name='kernel') + prog.relocatable_module(name='scsi_dh_rdac', address=0xffffffffc02fb000) + prog.relocatable_module(name='nvme', address=0xffffffffc051f000) + prog.relocatable_module(name='spi_intel', address=0xffffffffc0fa3000) + ... + >>> prog.module("kernel") + prog.main_module(name='kernel') + >>> prog.module(0xffffffff92811100) + prog.main_module(name='kernel') + +Modules are normally created automatically for all loaded binaries when +debugging symbols are loaded. This can also be done manually with +:meth:`drgn.Program.loaded_modules()` or +:meth:`drgn.Program.create_loaded_modules()`. Arbitrary modules can also be +:ref:`created manually `. This enables more +:ref:`advanced use cases `. + +Options for Finding Debugging Symbols +------------------------------------- + +drgn now provides much more control over how debugging symbols are found. + +The :option:`--try-symbols-by` and :option:`--no-symbols-by` command line +options allow enabling or disabling methods of searching for debugging symbols. +The :option:`--debug-directory` and :option:`--no-default-debug-directories` +options allow controlling the directories that are searched for debugging +symbols. The :option:`--kernel-directory` and +:option:`--no-default-kernel-directories` options allow controlling the +directories that are searched for Linux kernel files. + +For example, if you have a kernel core dump and a directory containing kernel +debugging symbols: + +.. code-block:: console + + $ ls + kernel-6.15.0-rc1-debuginfo vmcore + $ drgn -c vmcore --kernel-directory kernel-6.15.0-rc1-debuginfo + +These options are also available programmatically as +:attr:`drgn.Program.debug_info_options`. + +Stricter Debugging Symbol File Matching +--------------------------------------- + +A common pitfall for users is passing the wrong debugging symbol file to +:option:`-s` (for example, the vmlinux from a different kernel build, or a +kernel module or library that wasn't loaded at the time). Before this release, +drgn was quite permissive and would use the file anyways, usually with +confusing results. + +Starting in this release, drgn now always checks that files passed to +:option:`-s` or :meth:`drgn.Program.load_debug_info()` correspond to a loaded +module (based on build IDs). If not, it logs a warning and ignores them. + +However, there are valid use cases for adding unloaded files, like corrupted +core dumps or reading debugging symbols from arbitrary files. If you really +want to use a file for a specific module, then you can find the module with +:meth:`drgn.Program.modules()` or :meth:`drgn.Program.module()` and add the +file with :meth:`drgn.Module.try_file(path, force=True) +`. If you really want to load debugging symbols from a +file without associating it with a loaded module, you can use +:option:`--extra-symbols` or +:meth:`drgn.Program.extra_module(...).try_file(path) +`. + +Debuginfod Integration +---------------------- + +`debuginfod `_ is a service +for automatically downloading debugging symbols. drgn has had partial +debuginfod support for a long time (via the libdwfl library), with a few +important limitations: + +1. It couldn't use debuginfod for the Linux kernel. +2. Downloads couldn't be interrupted with Ctrl-C. +3. The download progress bar wasn't very pretty. + +This release improves drgn's integration with debuginfod and fixes these +issues. + +There's still one caveat for the Linux kernel: drgn only enables debuginfod for +the Linux kernel on Fedora, because other distributions haven't yet deployed +the `fix for extremely slow downloads of kernel debugging symbols +`_ +on their debuginfod servers. Contact your distribution to request that they +update their debuginfod server to at least elfutils 0.192 and compress their +kernel debug info packages with parallel xz. + +Custom Debugging Information Finders +------------------------------------ + +If the above options for finding debugging symbols don't provide enough +flexibility, you can define totally custom ways of finding debugging symbols by +registering a debugging information finder. See :ref:`here +` for an example. + +Plugins +------- + +drgn now has a basic plugin system. Currently, the main use case is +automatically setting system- or user-specific configuration when drgn starts +up. For example, system administrators may install a plugin that registers a +debugging information finder for their specific system. See :ref:`here +` for an overview and :ref:`here ` for an example. + +Running Code Snippets on the Command Line +----------------------------------------- + +Sometimes, you don't want an interactive drgn session or a full drgn script; +you just want to run a short snippet of code. In this release, Stephen Brennan +added the :option:`-e` option, which takes a string of code to evaluate: + +.. code-block:: console + + $ python3 -m drgn -e 'print(kaslr_offset())' + 251658240 + +(We would have used ``-c`` like the Python CLI, but that is already used to +specify a core dump.) + +Kernel Stack Unwinding Without Debugging Symbols +------------------------------------------------ + +drgn has had support for the Linux kernel's `ORC unwinder +`_ for a long time. +However, although ORC data is typically saved in kernel core dumps, drgn +previously only supported reading ORC data from the kernel debugging symbol +files. + +In this release, Stephen Brennan expanded drgn's ORC support to be able to read +ORC data directly from the core dump. This enables reliable stack unwinding +even through unknown or out-of-tree kernel modules. This is the latest step +towards support for `debugging the Linux kernel without full DWARF debugging +information `_. + +Linux 6.14 and 6.15 Support +--------------------------- + +A change in Linux 6.14 broke how drgn determines module section addresses. This +error on startup is fixed in this release:: + + /lib/modules/6.14.2/kernel/fs/binfmt_misc.ko (could not get section addresses: 'struct module_sect_attrs' has no member 'nsections') + +A change in Linux 6.15 broke the :mod:`~drgn.helpers.linux.kernfs` helpers. +This error is fixed in this release:: + + AttributeError: 'struct kernfs_node' has no member 'parent' + +Another change in Linux 6.15 broke the +:func:`~drgn.helpers.linux.fs.path_lookup()` helper's handling of mount points. +This is fixed in this release. diff --git a/docs/release_highlights/0.0.32.rst b/docs/release_highlights/0.0.32.rst new file mode 100644 index 000000000..25c4e0729 --- /dev/null +++ b/docs/release_highlights/0.0.32.rst @@ -0,0 +1,115 @@ +0.0.32 (Released June 18th, 2025) +================================== + +These are some of the highlights of drgn 0.0.32. See the `GitHub release +`_ for the full release +notes, including more improvements and bug fixes. + +.. highlight:: pycon +.. program:: drgn + +MiniDebugInfo Symbol Support +---------------------------- + +Stephen Brennan added support for `MiniDebugInfo +`_, +a format providing additional, compressed symbols for stack traces used in +Fedora Linux and its derivatives. Support for this feature requires liblzma. + +Page Pool Helper and Leak Scripts +--------------------------------- + +Dragos Tatulea contributed the :func:`~drgn.helpers.linux.net.is_pp_page()` +helper for identifying page pool pages, as well as ``contrib/pp_leak`` +containing scripts from his `talk +`_ +at Netdev 0x19. + +Timekeeping Helpers +------------------- + +This release adds :mod:`drgn.helpers.linux.timekeeping` containing helpers for +getting various timestamps in seconds:: + + >>> ktime_get_seconds() + (time64_t)1586340 + >>> ktime_get_real_seconds() + (time64_t)1750283629 + >>> ktime_get_boottime_seconds() + (time64_t)3625799 + >>> ktime_get_clocktai_seconds() + (time64_t)1750283674 + +and nanoseconds:: + + >>> ktime_get_coarse_ns() + (u64)1586359078118365 + >>> ktime_get_coarse_real_ns() + (u64)1750283652394676256 + >>> ktime_get_coarse_boottime_ns() + (u64)3625825587469558 + >>> ktime_get_coarse_clocktai_ns() + (u64)1750283701569703118 + +There are also shortcuts for getting the system uptime:: + + >>> uptime() + 3625837.988496921 + >>> uptime_pretty() + '5 weeks, 6 days, 23 hours, 10 minutes, 42 seconds' + +Note that these helpers have a maximum granularity of one tick (~1-10 ms) and +can race with timekeeping updates for an error of up to 1 second. + +CPU Mask Weight Helpers +----------------------- + +This release adds helpers for getting the number of CPUs to +:mod:`drgn.helpers.linux.cpumask`:: + + >>> num_online_cpus() + 8 + >>> num_possible_cpus() + 8 + >>> num_present_cpus() + 8 + +Relative Paths for ``--debug-directory`` +---------------------------------------- + +The :option:`--debug-directory` command line option (and the underlying +:attr:`drgn.DebugInfoOptions.directories` setting) now supports relative paths. +Previously, a relative path was used for searches by debug link. That has been +split into a separate option, :option:`--debug-link-directory` +(:attr:`drgn.DebugInfoOptions.debug_link_directories`). This is a breaking +change that was deemed necessary to fix the previous surprising behavior. + +Bug Fixes +--------- + +The following bugs were fixed in this release: + +* Symbols from data sections in Linux kernel loadable modules could previously + not be found since Linux 6.4. +* If a partial ELF symbol table (i.e., ``.dynsym``) was found, then later a + full ELF symbol table (i.e., ``.symtab``) was found, then the latter was + supposed to override the former, but previously it wouldn't. +* Stack unwinding through Linux kernel modules using ORC would previously fail + when the :option:`--main-symbols` command line option was used. Fixed by + Stephen Brennan. + +Linux 6.16 Support +------------------ + +No drgn changes were required to support Linux 6.16 as of rc2. + +Last Release With Python 3.6 & 3.7 Support +------------------------------------------ + +This will be the last release of drgn with support for Python 3.6 and 3.7. Both +versions have been EOL for awhile, and the maintenance burden has become +unsustainable. See `here `_ for the +announcement. Python 3.8 support will probably follow suit soon. + +(drgn 0.0.31 was intended to be the last release to support Python 3.6 and 3.7, +but it was postponed by one release.) diff --git a/docs/requirements.txt b/docs/requirements.txt index cdd5f14b9..4732cd8a0 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1 +1 @@ -sphinx==6.2.1 +sphinx==8.1.3 diff --git a/docs/support_matrix.rst b/docs/support_matrix.rst index 255cf6389..fabc42f02 100644 --- a/docs/support_matrix.rst +++ b/docs/support_matrix.rst @@ -72,7 +72,7 @@ currently fully supported are: .. Keep this in sync with vmtest/config.py. -- 6.0-6.13 +- 6.0-6.16 - 5.10-5.19 - 5.4 - 4.19 diff --git a/docs/tutorials.rst b/docs/tutorials.rst new file mode 100644 index 000000000..a40b927e5 --- /dev/null +++ b/docs/tutorials.rst @@ -0,0 +1,9 @@ +Tutorials +========= + +Hands-on tutorials for learning how to use drgn. + +.. toctree:: + :maxdepth: 1 + + tutorials/blk_rq_qos_crash.rst diff --git a/docs/tutorials/blk_rq_qos_crash.rst b/docs/tutorials/blk_rq_qos_crash.rst new file mode 100644 index 000000000..ed32f27ae --- /dev/null +++ b/docs/tutorials/blk_rq_qos_crash.rst @@ -0,0 +1,859 @@ +Stack Traces and Mystery Addresses (blk-rq-qos Crash) +===================================================== + +| Author: Omar Sandoval +| Date: February 12, 2025 + +.. linuxversion:: v6.11 + +This is a hands-on tutorial walking through a real Linux kernel bug that caused +kernel crashes in production. We'll read kernel code and use a few important +drgn techniques for reading stack traces and interpreting memory in order to +identify the root cause of the bug. + +We saw this crash on storage workloads on multiple kernel versions, up to and +including the latest at the time, Linux 6.11. The kernel logs all implicated +something in the block layer. + +A core dump and debugging symbols are provided for you to follow along with. + +This tutorial is also available as a video: + +.. raw:: html + + + +Setup +----- + +.. highlight:: console + +Follow the :doc:`../installation` instructions to get drgn. + +Download and extract the tutorial files: + +.. code-block:: + :class: tutorial + + $ curl -L https://github.com/osandov/drgn/releases/download/tutorial-assets/blk_rq_qos_crash_tutorial.tar.zst \ + | zstd -d | tar -x + +This will create a directory named ``blk_rq_qos_crash_tutorial``. Enter it: + +.. code-block:: + :class: tutorial + + $ cd blk_rq_qos_crash_tutorial + +Then, run drgn as follows. It will print a version banner and automatically +import the relevant :doc:`../helpers`: + +.. code-block:: + :class: tutorial + + $ drgn -c vmcore -s vmlinux --main-symbols + drgn 0.0.30 (using Python 3.13.1, elfutils 0.192, with libkdumpfile) + For help, type help(drgn). + >>> import drgn + >>> from drgn import FaultError, NULL, Object, alignof, cast, container_of, execscript, implicit_convert, offsetof, reinterpret, sizeof, stack_trace + >>> from drgn.helpers.common import * + >>> from drgn.helpers.linux import * + +In another window, check out the source code for Linux 6.11. For example, run +``git checkout v6.11`` in an existing Linux repo, or run: + +.. code-block:: + :class: tutorial + + $ git clone -b v6.11 --depth 1 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git + ... + $ cd linux + +Now we can dive into the core dump. + +Starting With Dmesg +------------------- + +.. highlight:: pycon + +The kernel log buffer is usually the first place to look when debugging a +crash. In drgn, call :func:`~drgn.helpers.linux.printk.print_dmesg()` and +scroll up until you find the line starting with ``BUG:``. You should see the +following trace: + +.. code-block:: + :class: scroll-y tutorial + :emphasize-lines: 3,11 + + >>> print_dmesg() + ... + [ 18.051123] BUG: kernel NULL pointer dereference, address: 00000000000006fc + [ 18.051597] #PF: supervisor write access in kernel mode + [ 18.051936] #PF: error_code(0x0002) - not-present page + [ 18.052241] PGD 0 P4D 0 + [ 18.052336] Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI + [ 18.052629] CPU: 0 UID: 0 PID: 906 Comm: fio Kdump: loaded Not tainted 6.11.0 #1 + [ 18.053123] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 + [ 18.053739] RIP: 0010:_raw_spin_lock_irqsave+0x36/0x70 + [ 18.054059] Code: 04 25 28 00 00 00 48 89 44 24 08 48 c7 04 24 00 00 00 00 9c 8f 04 24 48 8b 1c 24 fa 65 ff 05 89 2a b7 7e b9 01 00 00 00 31 c0 0f b1 0f 75 1e 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 75 17 + [ 18.055467] RSP: 0000:ffffc900011abcd0 EFLAGS: 00010046 + [ 18.055788] RAX: 0000000000000000 RBX: 0000000000000082 RCX: 0000000000000001 + [ 18.056260] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 00000000000006fc + [ 18.056725] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000015000e + [ 18.057202] R10: ffff888002fa5900 R11: ffffffff81312090 R12: 0000000000000003 + [ 18.057669] R13: ffff888002d4b678 R14: 00000000000006fc R15: 0000000000000003 + [ 18.058138] FS: 00007f1ee66c06c0(0000) GS:ffff888005a00000(0000) knlGS:0000000000000000 + [ 18.058677] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + [ 18.059039] CR2: 00000000000006fc CR3: 0000000002f4a005 CR4: 0000000000770ef0 + [ 18.059508] PKRU: 55555554 + [ 18.059614] Call Trace: + [ 18.059700] + [ 18.059782] ? __die_body+0x16/0x60 + [ 18.059982] ? page_fault_oops+0x31e/0x3a0 + [ 18.060205] ? exc_page_fault+0x55/0xa0 + [ 18.060409] ? asm_exc_page_fault+0x26/0x30 + [ 18.060640] ? __pfx_wbt_inflight_cb+0x10/0x10 + [ 18.060892] ? _raw_spin_lock_irqsave+0x36/0x70 + [ 18.061150] try_to_wake_up+0x3e/0x400 + [ 18.061342] rq_qos_wake_function+0x4d/0x60 + [ 18.061572] __wake_up_common+0x42/0x80 + [ 18.061770] __wake_up_common_lock+0x33/0x60 + [ 18.062007] wbt_done+0x60/0x80 + [ 18.062152] __rq_qos_done+0x22/0x40 + [ 18.062330] blk_mq_free_request+0x62/0xb0 + [ 18.062551] virtblk_done+0x99/0x120 + [ 18.062731] vring_interrupt+0x71/0x80 + [ 18.062928] vp_interrupt+0xa8/0xe0 + [ 18.063100] __handle_irq_event_percpu+0x89/0x1b0 + [ 18.063373] handle_irq_event_percpu+0xf/0x40 + [ 18.063614] handle_irq_event+0x30/0x50 + [ 18.063831] handle_fasteoi_irq+0xaa/0x1b0 + [ 18.064051] __common_interrupt+0x3a/0xb0 + [ 18.064266] common_interrupt+0x3d/0x90 + [ 18.064462] asm_common_interrupt+0x26/0x40 + [ 18.064691] RIP: 0033:0x7f1ef33679b9 + [ 18.064886] Code: ff 48 85 c0 0f 84 32 35 00 00 48 8b bd b8 f9 ff ff 4c 89 b5 80 f9 ff ff 48 89 07 4c 01 f8 48 89 85 78 f9 ff ff e9 8d ca ff ff <48> 8b 85 60 fa ff ff 48 8d 50 08 48 89 95 60 fa ff ff e9 c7 d5 ff + [ 18.066333] RSP: 002b:00007f1ee66baad0 EFLAGS: 00000212 + [ 18.066624] RAX: 00007f1ee66bad56 RBX: 00007f1ee66bb1d0 RCX: 00007f1ee66bad56 + [ 18.066999] RDX: 0000000000000030 RSI: 00000000000f12b3 RDI: 000000000000000a + [ 18.067476] RBP: 00007f1ee66bb1a0 R08: 000000000000002c R09: 0000000000000000 + [ 18.068003] R10: 00007f1ef348dfe0 R11: 0000000000000020 R12: 0000000000000020 + [ 18.068482] R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000001 + [ 18.069005] + [ 18.069097] CR2: 00000000000006fc + +``BUG: kernel NULL pointer dereference, address: 00000000000006fc`` tells us +that the kernel crashed because it dereferenced a null pointer and tried to +access the address 0x6fc. + +``RIP: 0010:_raw_spin_lock_irqsave+0x36/0x70`` tells us that the bad access +happened in the function :linux:`_raw_spin_lock_irqsave() +`. Below that, the stack trace tells us how we +got there. + +.. tip:: + + Ignore call trace lines starting with ``?``. These are stale function + addresses on the stack that are not part of the actual call trace. They are + printed as a :linuxt:`hint/fail-safe `, + but they are misleading as often as not. + +We'll look at the trace in more detail with drgn soon, but we can see that we +got an interrupt for a disk I/O completion, which then tried to wake up a task +and acquire a spinlock. + +Stack Trace in drgn +------------------- + +Now let's look at drgn's view of the stack trace. Save the stack trace of the +crashed thread: + +.. code-block:: + :class: tutorial + + >>> trace = prog.crashed_thread().stack_trace() + +And print it: + +.. code-block:: + :class: scroll-y tutorial + + >>> trace + #0 arch_atomic_try_cmpxchg (./arch/x86/include/asm/atomic.h:107:9) + #1 raw_atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-arch-fallback.h:2170:9) + #2 atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-instrumented.h:1302:9) + #3 queued_spin_lock (./include/asm-generic/qspinlock.h:111:6) + #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) + #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) + #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) + #7 class_raw_spinlock_irqsave_constructor (./include/linux/spinlock.h:551:1) + #8 try_to_wake_up (kernel/sched/core.c:4051:2) + #9 rq_qos_wake_function (block/blk-rq-qos.c:223:2) + #10 __wake_up_common (kernel/sched/wait.c:89:9) + #11 __wake_up_common_lock (kernel/sched/wait.c:106:14) + #12 wbt_done (block/blk-wbt.c:259:3) + #13 __rq_qos_done (block/blk-rq-qos.c:39:4) + #14 rq_qos_done (block/blk-rq-qos.h:122:3) + #15 blk_mq_free_request (block/blk-mq.c:737:2) + #16 virtblk_done (drivers/block/virtio_blk.c:367:5) + #17 vring_interrupt (drivers/virtio/virtio_ring.c:2595:3) + #18 vp_vring_interrupt (drivers/virtio/virtio_pci_common.c:82:7) + #19 vp_interrupt (drivers/virtio/virtio_pci_common.c:113:9) + #20 __handle_irq_event_percpu (kernel/irq/handle.c:158:9) + #21 handle_irq_event_percpu (kernel/irq/handle.c:193:11) + #22 handle_irq_event (kernel/irq/handle.c:210:8) + #23 handle_fasteoi_irq (kernel/irq/chip.c:720:2) + #24 generic_handle_irq_desc (./include/linux/irqdesc.h:173:2) + #25 handle_irq (arch/x86/kernel/irq.c:247:3) + #26 call_irq_handler (arch/x86/kernel/irq.c:259:3) + #27 __common_interrupt (arch/x86/kernel/irq.c:285:6) + #28 common_interrupt (arch/x86/kernel/irq.c:278:1) + #29 asm_common_interrupt+0x26/0x2b (./arch/x86/include/asm/idtentry.h:693) + #30 0x7f1ef33679b9 + +Notice that drgn's stack trace includes information not in the kernel trace, +namely: + +1. File names and line and column numbers. These are very useful for navigating + the code that you're debugging. +2. Inlined function calls. For example, frames 0-5 are all inlined calls, and + frame 6 was the last actual call. You can verify this by printing each frame + individually: + + .. code-block:: + :class: tutorial + + + >>> trace[0] + #0 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in arch_atomic_try_cmpxchg at ./arch/x86/include/asm/atomic.h:107:9 (inlined) + >>> trace[1] + #1 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in raw_atomic_try_cmpxchg_acquire at ./include/linux/atomic/atomic-arch-fallback.h:2170:9 (inlined) + >>> trace[2] + #2 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in atomic_try_cmpxchg_acquire at ./include/linux/atomic/atomic-instrumented.h:1302:9 (inlined) + >>> trace[3] + #3 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in queued_spin_lock at ./include/asm-generic/qspinlock.h:111:6 (inlined) + >>> trace[4] + #4 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in do_raw_spin_lock at ./include/linux/spinlock.h:187:2 (inlined) + >>> trace[5] + #5 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in __raw_spin_lock_irqsave at ./include/linux/spinlock_api_smp.h:111:2 (inlined) + >>> trace[6] + #6 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in _raw_spin_lock_irqsave at kernel/locking/spinlock.c:162:9 + + Notice that frames 0-5 end with ``(inlined)``, and all of the frames have + the same instruction pointer, ``0xffffffff814b6446``. + +Tracing Local Variables +----------------------- + +Next, let's walk through the stack trace to figure out where the null pointer +came from. + +Frames 0-2 are low-level atomic operations:: + + #0 arch_atomic_try_cmpxchg (./arch/x86/include/asm/atomic.h:107:9) + #1 raw_atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-arch-fallback.h:2170:9) + #2 atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-instrumented.h:1302:9) + +That's essentially a fancy memory access, so let's skip those frames. Frame 3 +is in :linux:`queued_spin_lock() `, the +kernel's spinlock implementation:: + + #3 queued_spin_lock (./include/asm-generic/qspinlock.h:111:6) + +In your window with the Linux source code, open +:file:`include/asm-generic/qspinlock.h` and jump to line 111: + +.. code-block:: c + :caption: include/asm-generic/qspinlock.h + :lineno-start: 107 + :emphasize-lines: 5 + + static __always_inline void queued_spin_lock(struct qspinlock *lock) + { + int val = 0; + + if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) + return; + + queued_spin_lock_slowpath(lock, val); + } + +Notice that it accesses the ``lock`` parameter. Print it in drgn: + +.. code-block:: + :class: tutorial + + >>> trace[3]["lock"] + (struct qspinlock *)0x6fc + +This matches the address from the ``BUG`` message in dmesg! + +Now let's find out where ``lock`` came from. Frames 4-7 wrap the low-level +spinlock implementation:: + + #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) + #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) + #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) + #7 class_raw_spinlock_irqsave_constructor (./include/linux/spinlock.h:551:1) + +Feel free to open the source code for these, but we can quickly check that the +lock simply gets passed through: + +.. code-block:: + :class: tutorial + + >>> trace[4]["lock"] + (raw_spinlock_t *)0x6fc + >>> trace[5]["lock"] + (raw_spinlock_t *)0x6fc + >>> trace[6]["lock"] + (raw_spinlock_t *)0x6fc + +:linux:`class_raw_spinlock_irqsave_constructor() +` is slightly different. It is generated by a +macro and doesn't use the name ``lock``: + +.. code-block:: + :class: tutorial + + >>> trace[7]["lock"] + Traceback (most recent call last): + ... + KeyError: 'lock' + +Let's list all of its local variables and make a guess: + +.. code-block:: + :class: tutorial + + >>> trace[7].locals() + ['l', '_t'] + >>> trace[7]["l"] + (raw_spinlock_t *)0x6fc + +.. tip:: + + Use :meth:`drgn.StackFrame.locals()` to get the list of parameters and + local variables in a stack frame when finding the implementation of the + function is inconvenient. + +The caller must have passed 0x6fc. Let's look at it. The next frame is in +:linux:`try_to_wake_up() `:: + + #8 try_to_wake_up (kernel/sched/core.c:4051:2) + +Open :file:`kernel/sched/core.c` at line 4051: + +.. code-block:: c + :caption: kernel/sched/core.c + :emphasize-lines: 4 + + int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) + { + ... + scoped_guard (raw_spinlock_irqsave, &p->pi_lock) { + +It is acquiring :linux:`pi_lock ` in a +:linux:`task_struct ` (using a `scoped guard +`_). Print the ``task_struct``: + +.. code-block:: + :class: tutorial + + >>> trace[8]["p"] + (struct task_struct *)0x0 + +There's our null pointer! But where did 0x6fc come from? Look at the offset of +``pi_lock`` in ``struct task_struct``: + +.. code-block:: + :class: tutorial + + >>> hex(offsetof(prog.type("struct task_struct"), "pi_lock")) + '0x6fc' + +Or do the inverse and see what's at offset 0x6fc in ``struct task_struct``: + +.. code-block:: + :class: tutorial + + >>> member_at_offset(prog.type("struct task_struct"), 0x6fc) + 'pi_lock.raw_lock.val.counter or pi_lock.raw_lock.locked or pi_lock.raw_lock.locked_pending' + +.. tip:: + + Use :func:`~drgn.offsetof()` and + :func:`~drgn.helpers.common.type.member_at_offset()` to decipher pointers + to struct members. + +So where did ``p`` come from? Let's look at the caller, +:linux:`rq_qos_wake_function() `, in frame 9:: + + #9 rq_qos_wake_function (block/blk-rq-qos.c:223:2) + +Open :file:`block/blk-rq-qos.c` at line 223: + +.. code-block:: c + :caption: block/blk-rq-qos.c + :lineno-start: 206 + :emphasize-lines: 18 + + static int rq_qos_wake_function(struct wait_queue_entry *curr, + unsigned int mode, int wake_flags, void *key) + { + struct rq_qos_wait_data *data = container_of(curr, + struct rq_qos_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up loop + * in __wake_up_common. + */ + if (!data->cb(data->rqw, data->private_data)) + return -1; + + data->got_token = true; + smp_wmb(); + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; + } + +(Note: :linux:`wake_up_process() ` doesn't show up in +the stack trace because of `tail call elimination +`_. This `may be fixed +`_ in a future release of drgn.) + +``p`` came from ``data->task``. Print ``data``: + +.. code-block:: + :class: tutorial + + >>> trace[9]["data"] + *(struct rq_qos_wait_data *)0xffffc900011b3558 = { + .wq = (struct wait_queue_entry){ + .flags = (unsigned int)2168637095, + .private = (void *)0xffff888002d6c000, + .func = (wait_queue_func_t)0x0, + .entry = (struct list_head){ + .next = (struct list_head *)0xffff888002d6c000, + .prev = (struct list_head *)0xffff888002da2100, + }, + }, + .task = (struct task_struct *)0xffff888000fd6001, + .rqw = (struct rq_wait *)0xffffc900011b3a30, + .cb = (acquire_inflight_cb_t *)0xffff888002763030, + .private_data = (void *)0x1, + .got_token = (bool)201, + } + +Notice that ``data->task`` is NOT null. Print the ``comm`` member, which should +be the thread name: + +.. code-block:: + :class: tutorial + + >>> trace[9]["data"].task.comm + (char [16])"" + +Instead, it's empty. This doesn't appear to be a valid ``task_struct``. + +Identifying Mystery Addresses +----------------------------- + +If ``data->task`` isn't a valid ``task_struct``, then what is it? Pass it to +:func:`~drgn.helpers.common.memory.identify_address()` to answer that: + +.. code-block:: + :class: tutorial + + >>> identify_address(trace[9]["data"].task) + 'slab object: buffer_head+0x1' + +It's a pointer to a completely unrelated type. + +Since our problem seems to stem from ``data``, pass it to +``identify_address()`` to see where it comes from: + +.. code-block:: + :class: tutorial + + >>> identify_address(trace[9]["data"]) + 'vmap stack: 909 (fio) +0x3558' + +This means that ``data`` is on the stack of the task with PID 909. + +.. tip:: + + Use :func:`~drgn.helpers.common.memory.identify_address()` to figure out + what an unknown address refers to. + +Other Stacks +------------ + +Notice that we've seen three possibilities for ``data->task``: + +1. When it was passed to ``wake_up_process()``, it was ``NULL``. +2. By the time of the crash, it was an unrelated pointer. +3. It's supposed to point to a ``task_struct``. + +This suggests that there's a data race on ``data->task``. + +We know that ``data`` is on the stack of another task. Let's find where it's +created. In :file:`block/blk-rq-qos.c`, search for ``struct rq_qos_wait_data``. +You should find it being used in :linux:`rq_qos_wait() +`: + +.. code-block:: c + :caption: block/blk-rq-qos.c + :lineno-start: 243 + :emphasize-lines: 5 + + void rq_qos_wait(struct rq_wait *rqw, void *private_data, + acquire_inflight_cb_t *acquire_inflight_cb, + cleanup_cb_t *cleanup_cb) + { + struct rq_qos_wait_data data = { + .wq = { + .func = rq_qos_wake_function, + .entry = LIST_HEAD_INIT(data.wq.entry), + }, + .task = current, + .rqw = rqw, + .cb = acquire_inflight_cb, + .private_data = private_data, + }; + bool has_sleeper; + + has_sleeper = wq_has_sleeper(&rqw->wait); + if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) + return; + + has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, + TASK_UNINTERRUPTIBLE); + do { + /* The memory barrier in set_task_state saves us here. */ + if (data.got_token) + break; + if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { + finish_wait(&rqw->wait, &data.wq); + + /* + * We raced with rq_qos_wake_function() getting a token, + * which means we now have two. Put our local token + * and wake anyone else potentially waiting for one. + */ + smp_rmb(); + if (data.got_token) + cleanup_cb(rqw, private_data); + break; + } + io_schedule(); + has_sleeper = true; + set_current_state(TASK_UNINTERRUPTIBLE); + } while (1); + finish_wait(&rqw->wait, &data.wq); + } + +This function creates ``data`` on the stack, with ``data->task`` set to the +current task, and then tries to acquire an "inflight counter". If one is not +available, it puts itself on a wait queue and blocks until it can get one. + +So, ``rq_qos_wait()`` waits for an inflight counter, and +``rq_qos_wake_function()`` wakes it up when one becomes available. We would +expect that the PID we found earlier, 909, is currently blocked in +``rq_qos_wait()``. Pass the PID to :func:`~drgn.stack_trace()` to check: + +.. code-block:: + :class: scroll-y tutorial + + >>> stack_trace(909) + #0 rep_nop (./arch/x86/include/asm/vdso/processor.h:0:2) + #1 cpu_relax (./arch/x86/include/asm/vdso/processor.h:18:2) + #2 queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380:3) + #3 queued_spin_lock (./include/asm-generic/qspinlock.h:114:2) + #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) + #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) + #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) + #7 virtblk_add_req_batch (drivers/block/virtio_blk.c:481:2) + #8 virtio_queue_rqs (drivers/block/virtio_blk.c:519:11) + #9 __blk_mq_flush_plug_list (block/blk-mq.c:2704:2) + #10 blk_mq_flush_plug_list (block/blk-mq.c:2781:4) + #11 blk_add_rq_to_plug (block/blk-mq.c:1292:3) + #12 blk_mq_submit_bio (block/blk-mq.c:3028:3) + #13 __submit_bio (block/blk-core.c:615:3) + #14 __submit_bio_noacct_mq (block/blk-core.c:696:3) + #15 submit_bio_noacct_nocheck (block/blk-core.c:725:3) + #16 ext4_io_submit (fs/ext4/page-io.c:377:3) + #17 io_submit_add_bh (fs/ext4/page-io.c:418:3) + #18 ext4_bio_write_folio (fs/ext4/page-io.c:560:3) + #19 mpage_submit_folio (fs/ext4/inode.c:1943:8) + #20 mpage_process_page_bufs (fs/ext4/inode.c:2056:9) + #21 mpage_prepare_extent_to_map (fs/ext4/inode.c:2564:11) + #22 ext4_do_writepages (fs/ext4/inode.c:2706:8) + #23 ext4_writepages (fs/ext4/inode.c:2842:8) + #24 do_writepages (mm/page-writeback.c:2683:10) + #25 __filemap_fdatawrite_range (mm/filemap.c:430:9) + #26 generic_fadvise (mm/fadvise.c:114:3) + #27 vfs_fadvise (mm/fadvise.c:185:9) + #28 ksys_fadvise64_64 (mm/fadvise.c:199:8) + #29 __do_sys_fadvise64 (mm/fadvise.c:214:9) + #30 __se_sys_fadvise64 (mm/fadvise.c:212:1) + #31 __x64_sys_fadvise64 (mm/fadvise.c:212:1) + #32 do_syscall_x64 (arch/x86/entry/common.c:52:14) + #33 do_syscall_64 (arch/x86/entry/common.c:83:7) + #34 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121) + #35 0x7f1ef340203a + +It's not in ``rq_qos_wait()``! It seems to have moved on to something else. + +Analysis +-------- + +At this point, we've gotten everything that we need from drgn. Now we need to +interpret what we've gathered and analyze the kernel code. + +Based on the stack trace for PID 909, we can conclude that the *waiter* got a +counter, returned, and moved on to something else. It reused the stack for +unrelated data, which explains the mystery pointer that we saw in +``data->task``. The series of events is something like this: + +1. ``acquire_inflight_cb()`` on line 260 fails. +2. ``prepare_to_wait_exclusive()`` puts ``data`` on the waitqueue. +3. ``acquire_inflight_cb()`` on line 269 succeeds. +4. ``finish_wait()`` removes ``data`` from the waitqueue. +5. ``rq_qos_wait()`` returns and the task moves on to something else, reusing + the stack memory. + +This means that the *waker* found the waiter's ``data`` in between steps 2 and +4, but by the time the waker called ``wake_up_process(data->task)``, the waiter +was past step 5. + +Wakers and waiters are supposed to be synchronized. Going back to the crashing +stack trace, we see that ``rq_qos_wake_function()`` is called via +:linux:`__wake_up_common_lock() `:: + + #10 __wake_up_common (kernel/sched/wait.c:89:9) + #11 __wake_up_common_lock (kernel/sched/wait.c:106:14) + +Open :file:`kernel/sched/wait.c` at line 106 and see that it's holding +``wq_head->lock``: + +.. code-block:: c + :caption: kernel/sched/wait.c + :lineno-start: 99 + :emphasize-lines: 8 + + static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, + int nr_exclusive, int wake_flags, void *key) + { + unsigned long flags; + int remaining; + + spin_lock_irqsave(&wq_head->lock, flags); + remaining = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, + key); + spin_unlock_irqrestore(&wq_head->lock, flags); + + return nr_exclusive - remaining; + } + +On the waiter side, :linux:`finish_wait() ` also grabs +``wq_head->lock``: + +.. code-block:: c + :caption: kernel/sched/wait.c + :lineno-start: 446 + + void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) + { + unsigned long flags; + + __set_current_state(TASK_RUNNING); + /* + * We can check for list emptiness outside the lock + * IFF: + * - we use the "careful" check that verifies both + * the next and prev pointers, so that there cannot + * be any half-pending updates in progress on other + * CPU's that we haven't seen yet (and that might + * still change the stack area. + * and + * - all other users take the lock (ie we can only + * have _one_ other CPU that looks at or modifies + * the list). + */ + if (!list_empty_careful(&wq_entry->entry)) { + spin_lock_irqsave(&wq_head->lock, flags); + list_del_init(&wq_entry->entry); + spin_unlock_irqrestore(&wq_head->lock, flags); + } + } + +But there's an important detail here: ``finish_wait()`` doesn't take the lock +if the wait queue list entry is empty, i.e., if it has already been removed +from the wait queue. + +Go back to ``rq_qos_wake_function()``: + +.. code-block:: c + :caption: block/blk-rq-qos.c + :lineno-start: 206 + + static int rq_qos_wake_function(struct wait_queue_entry *curr, + unsigned int mode, int wake_flags, void *key) + { + struct rq_qos_wait_data *data = container_of(curr, + struct rq_qos_wait_data, + wq); + + /* + * If we fail to get a budget, return -1 to interrupt the wake up loop + * in __wake_up_common. + */ + if (!data->cb(data->rqw, data->private_data)) + return -1; + + data->got_token = true; + smp_wmb(); + list_del_init(&curr->entry); + wake_up_process(data->task); + return 1; + } + +It removes the entry from the wait queue on line 222, then accesses the entry +on line 223. + +That's the race condition: as soon as the entry has been removed from the wait +queue, ``finish_wait()`` in the waiter can return instantly, and the waiter is +free to move on. Therefore, after the entry has been removed, the waker must +not access it. + +The Fix +------- + +The fix is trivial: don't delete the wait queue entry until *after* using it. + +.. code-block:: diff + + diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c + index 2cfb297d9a62..058f92c4f9d5 100644 + --- a/block/blk-rq-qos.c + +++ b/block/blk-rq-qos.c + @@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, + + data->got_token = true; + smp_wmb(); + - list_del_init(&curr->entry); + wake_up_process(data->task); + + list_del_init_careful(&curr->entry); + return 1; + } + +The deletion also needs careful memory ordering to pair with the +:linux:`list_empty_careful() ` in ``finish_wait()``, +hence the replacement of :linux:`list_del_init() ` +with :linux:`list_del_init_careful() `. + +This fix was merged in Linux 6.12 in `commit e972b08b91ef ("blk-rq-qos: fix +crash on rq_qos_wait vs. rq_qos_wake_function race") +`_. + +Conclusion +---------- + +Debugging a core dump involves a lot of cross-referencing code and core dump +state. drgn gives you some powerful capabilities for understanding kernel +state, which you can use to discern subtle bugs like this one. In particular, +:func:`~drgn.helpers.common.memory.identify_address()`, +:func:`~drgn.helpers.common.type.member_at_offset()`, and +:meth:`drgn.StackFrame.locals()` are often crucial to an investigation. + +Feel free to reference the :doc:`../helpers` and explore this core dump further. + +Bonus Challenge: Reading File Pages +----------------------------------- + +As a bonus, try dumping the contents of the file ``/init`` in the core dump +(this is the script that I used to reproduce the bug). + +First, find the inode for ``/init`` and its file size. + +.. details:: Hint + + See :func:`~drgn.helpers.linux.fs.path_lookup()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> inode = path_lookup("/init").dentry.d_inode + >>> inode + *(struct inode *)0xffff88800289c568 = { + ... + } + >>> inode.i_size + (loff_t)578 + +The page cache for an inode is in an XArray, ``inode->i_mapping->i_pages``. Get +the cached page at offset 0. + +.. details:: Hint + + See :func:`~drgn.helpers.linux.xarray.xa_load()` and :func:`~drgn.cast()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> entry = xa_load(inode.i_mapping.i_pages.address_of_(), 0) + >>> page = cast("struct page *", entry) + >>> page + *(struct page *)0xffffea000015f840 = { + ... + } + +Get the page's virtual address. + +.. details:: Hint + + See :func:`~drgn.helpers.linux.mm.page_to_virt()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> addr = page_to_virt(page) + >>> addr + (void *)0xffff8880057e1000 + +Finally, read from the virtual address. + +.. details:: Hint + + See :meth:`drgn.Program.read()`. + +.. details:: Answer + + .. code-block:: + :class: tutorial + + >>> print(prog.read(addr, inode.i_size).decode()) + #!/bin/sh -e + + mount -t proc -o nosuid,nodev,noexec proc /proc + mount -t devtmpfs -o nosuid dev /dev + mkdir /dev/shm + mount -t tmpfs -o nosuid,nodev tmpfs /dev/shm + mount -t sysfs -o nosuid,nodev,noexec sys /sys + mount -t tmpfs -o nosuid,nodev tmpfs /tmp + kexec --load-panic --kexec-syscall-auto --command-line="root=/dev/vda rw console=ttyS0,115200 init=/kdump-init" vmlinuz + echo 1 > /sys/block/vda/queue/wbt_lat_usec + while true; do + cat /init > /dev/null + done & + fio --name=writer --rw=randwrite --ioengine=sync --buffered=1 --bs=4K --time_based --runtime=3600 --size=16M + poweroff -f diff --git a/docs/user_guide.rst b/docs/user_guide.rst index 0ebf60b4d..9cdd395e5 100644 --- a/docs/user_guide.rst +++ b/docs/user_guide.rst @@ -26,10 +26,14 @@ the only ``Program`` you will need. A ``Program`` is used to look up type definitions, access variables, and read arbitrary memory:: - >>> prog.type('unsigned long') - prog.int_type(name='unsigned long', size=8, is_signed=False) - >>> prog['jiffies'] - Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) + + >>> prog.type("struct list_head") + struct list_head { + struct list_head *next; + struct list_head *prev; + } + >>> prog["jiffies"] + (volatile unsigned long)4416739513 >>> prog.read(0xffffffffbe411e10, 16) b'swapper/0\x00\x00\x00\x00\x00\x00\x00' @@ -40,7 +44,7 @@ memory from the program's address space. The :meth:`[] ` operator looks up a variable, constant, or function:: - >>> prog['jiffies'] == prog.variable('jiffies') + >>> prog["jiffies"] == prog.variable("jiffies") True It is usually more convenient to use the ``[]`` operator rather than the @@ -67,11 +71,11 @@ members can be accessed with the dot (``.``) operator, arrays can be subscripted with ``[]``, arithmetic can be performed, and objects can be compared:: - >>> print(prog['init_task'].comm[0]) + >>> print(prog["init_task"].comm[0]) (char)115 - >>> print(repr(prog['init_task'].nsproxy.mnt_ns.mounts + 1)) + >>> print(repr(prog["init_task"].nsproxy.mnt_ns.mounts + 1)) Object(prog, 'unsigned int', value=34) - >>> prog['init_task'].nsproxy.mnt_ns.pending_mounts > 0 + >>> prog["init_task"].nsproxy.mnt_ns.pending_mounts > 0 False Python doesn't have all of the operators that C or C++ do, so some @@ -111,16 +115,19 @@ References vs. Values The main difference between reference objects and value objects is how they are evaluated. References are read from the program's memory every time they are -evaluated; values simply return the stored value (:meth:`drgn.Object.read_()` -reads a reference object and returns it as a value object):: +evaluated:: >>> import time - >>> jiffies = prog['jiffies'] + >>> jiffies = prog["jiffies"] >>> jiffies.value_() 4391639989 >>> time.sleep(1) >>> jiffies.value_() 4391640290 + +Values simply return the stored value (:meth:`drgn.Object.read_()` reads a +reference object and returns it as a value object):: + >>> jiffies2 = jiffies.read_() >>> jiffies2.value_() 4391640291 @@ -131,24 +138,28 @@ reads a reference object and returns it as a value object):: 4391640593 References have a :attr:`drgn.Object.address_` attribute, which is the object's -address as a Python ``int``. This is slightly different from the -:meth:`drgn.Object.address_of_()` method, which returns the address as a -``drgn.Object``. Of course, both references and values can have a pointer type; -``address_`` refers to the address of the pointer object itself, and -:meth:`drgn.Object.value_()` refers to the value of the pointer (i.e., the -address it points to):: +address as a Python ``int``:: - >>> address = prog['jiffies'].address_ + >>> address = prog["jiffies"].address_ >>> type(address) >>> print(hex(address)) 0xffffffffbe405000 - >>> jiffiesp = prog['jiffies'].address_of_() - >>> jiffiesp + +This is slightly different from the :meth:`drgn.Object.address_of_()` method, +which returns the address as a ``drgn.Object``:: + + >>> jiffiesp = prog["jiffies"].address_of_() + >>> print(repr(jiffiesp)) Object(prog, 'volatile unsigned long *', value=0xffffffffbe405000) >>> print(hex(jiffiesp.value_())) 0xffffffffbe405000 +Of course, both references and values can have a pointer type; +``address_`` refers to the address of the pointer object itself, and +:meth:`drgn.Object.value_()` refers to the value of the pointer (i.e., the +address it points to). + .. _absent-objects: Absent Objects @@ -203,41 +214,6 @@ Of course, it would be a waste of time and effort for everyone to have to define these helpers for themselves, so drgn includes a collection of helpers for many use cases. See :doc:`helpers`. -.. _validators: - -Validators -"""""""""" - -Validators are a special category of helpers that check the consistency of a -data structure. In general, helpers assume that the data structures that they -examine are valid. Validators do not make this assumption and do additional -(potentially expensive) checks to detect broken invariants, corruption, etc. - -Validators raise :class:`drgn.helpers.ValidationError` if the data structure is -not valid or :class:`drgn.FaultError` if the data structure is invalid in a way -that causes a bad memory access. They have names prefixed with ``validate_``. - -For example, :func:`drgn.helpers.linux.list.validate_list()` checks the -consistency of a linked list in the Linux kernel (in particular, the -consistency of the ``next`` and ``prev`` pointers):: - - >>> validate_list(prog["my_list"].address_of_()) - drgn.helpers.ValidationError: (struct list_head *)0xffffffffc029e460 next 0xffffffffc029e000 has prev 0xffffffffc029e450 - -:func:`drgn.helpers.linux.list.validate_list_for_each_entry()` does the same -checks while also returning the entries in the list for further validation: - -.. code-block:: python3 - - def validate_my_list(prog): - for entry in validate_list_for_each_entry( - "struct my_entry", - prog["my_list"].address_of_(), - "list", - ): - if entry.value < 0: - raise ValidationError("list contains negative entry") - Other Concepts -------------- @@ -268,9 +244,7 @@ Stack Traces drgn represents stack traces with the :class:`drgn.StackTrace` and :class:`drgn.StackFrame` classes. :func:`drgn.stack_trace()`, :meth:`drgn.Program.stack_trace()`, and :meth:`drgn.Thread.stack_trace()` -return the call stack for a thread. The :meth:`[] -` operator looks up an object in the scope of a -``StackFrame``:: +return the call stack for a thread:: >>> trace = stack_trace(115) >>> trace @@ -288,11 +262,22 @@ return the call stack for a thread. The :meth:`[] #11 do_syscall_64 (./arch/x86/entry/common.c:80:7) #12 entry_SYSCALL_64+0x7c/0x15b (./arch/x86/entry/entry_64.S:113) #13 0x7f3344072af7 + +The :meth:`[] ` operator on a ``StackTrace`` gets +the ``StackFrame`` at the given index:: + >>> trace[5] #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) - >>> prog['do_poll'] - (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time)) - >>> trace[5]['list'] + +The :meth:`[] ` operator on a ``StackFrame`` looks +up an object in the scope of that frame. :meth:`drgn.StackFrame.locals()` +returns a list of the available names:: + + >>> prog["do_poll"] + (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time))0xffffffff905c6e10 + >>> trace[5].locals() + ['list', 'wait', 'end_time', 'pt', 'expire', 'to', 'timed_out', 'count', 'slack', 'busy_flag', 'busy_start', 'walk', 'can_busy_loop'] + >>> trace[5]["list"] *(struct poll_list *)0xffffacca402e3b50 = { .next = (struct poll_list *)0x0, .len = (int)1, @@ -313,12 +298,53 @@ drgn automatically obtains type definitions from the program. Types are represented by the :class:`drgn.Type` class and created by various factory functions like :meth:`drgn.Program.int_type()`:: - >>> prog.type('int') + >>> prog.type("int") prog.int_type(name='int', size=4, is_signed=True) You won't usually need to work with types directly, but see :ref:`api-reference-types` if you do. +Modules +^^^^^^^ + +drgn tracks executables, shared libraries, loadable kernel modules, and other +binary files used by a program with the :class:`drgn.Module` class. Modules +store their name, identifying information, load address, and debugging symbols. + +.. code-block:: pycon + :caption: Linux kernel example + + >>> for module in prog.modules(): + ... print(module) + ... + prog.main_module(name='kernel') + prog.relocatable_module(name='rng_core', address=0xffffffffc0400000) + prog.relocatable_module(name='virtio_rng', address=0xffffffffc0402000) + prog.relocatable_module(name='binfmt_misc', address=0xffffffffc0401000) + >>> prog.main_module().debug_file_path + '/usr/lib/modules/6.13.0-rc1-vmtest34.1default/build/vmlinux' + +.. code-block:: pycon + :caption: Userspace example + + >>> for module in prog.modules(): + ... print(module) + ... + prog.main_module(name='/usr/bin/grep') + prog.shared_library_module(name='/lib64/ld-linux-x86-64.so.2', dynamic_address=0x7f51772b6e68) + prog.shared_library_module(name='/lib64/libc.so.6', dynamic_address=0x7f51771af960) + prog.shared_library_module(name='/lib64/libpcre2-8.so.0', dynamic_address=0x7f5177258c68) + prog.vdso_module(name='linux-vdso.so.1', dynamic_address=0x7f51772803e0) + >>> prog.main_module().loaded_file_path + '/usr/bin/grep' + >>> prog.main_module().debug_file_path + '/usr/lib/debug/usr/bin/grep-3.11-7.fc40.x86_64.debug' + +drgn normally initializes the appropriate modules and loads their debugging +symbols automatically. Advanced use cases can create or modify modules and load +debugging symbols manually; see the :ref:`advanced usage guide +`. + Platforms ^^^^^^^^^ @@ -347,7 +373,7 @@ along with any arguments: pid = int(sys.argv[1]) uid = find_task(pid).cred.uid.val.value_() - print(f'PID {pid} is being run by UID {uid}') + print(f"PID {pid} is being run by UID {uid}") $ sudo drgn script.py 601 PID 601 is being run by UID 1000 @@ -357,8 +383,8 @@ It's even possible to run drgn scripts directly with the proper `shebang $ cat script2.py #!/usr/bin/env drgn - mounts = prog['init_task'].nsproxy.mnt_ns.mounts.value_() - print(f'You have {mounts} filesystems mounted') + mounts = prog["init_task"].nsproxy.mnt_ns.mounts.value_() + print(f"You have {mounts} filesystems mounted") $ sudo ./script2.py You have 36 filesystems mounted @@ -381,18 +407,18 @@ The default behavior of the Python `REPL print the output of :func:`repr()`. For :class:`drgn.Object` and :class:`drgn.Type`, this is a raw representation:: - >>> print(repr(prog['jiffies'])) + >>> print(repr(prog["jiffies"])) Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) - >>> print(repr(prog.type('atomic_t'))) + >>> print(repr(prog.type("atomic_t"))) prog.typedef_type(name='atomic_t', type=prog.struct_type(tag=None, size=4, members=(TypeMember(prog.type('int'), name='counter', bit_offset=0),))) The standard :func:`print()` function uses the output of :func:`str()`. For drgn objects and types, this is a representation in programming language syntax:: - >>> print(prog['jiffies']) + >>> print(prog["jiffies"]) (volatile unsigned long)4395387628 - >>> print(prog.type('atomic_t')) + >>> print(prog.type("atomic_t")) typedef struct { int counter; } atomic_t @@ -401,10 +427,10 @@ In interactive mode, the drgn CLI automatically uses ``str()`` instead of ``repr()`` for objects and types, so you don't need to call ``print()`` explicitly:: - $ sudo drgn - >>> prog['jiffies'] + $ drgn + >>> prog["jiffies"] (volatile unsigned long)4395387628 - >>> prog.type('atomic_t') + >>> prog.type("atomic_t") typedef struct { int counter; } atomic_t @@ -412,7 +438,8 @@ explicitly:: Next Steps ---------- -Refer to the :doc:`api_reference`. Look through the :doc:`helpers`. Read some -:doc:`case_studies`. Browse through the `tools +Follow along with a :doc:`tutorial ` or :doc:`case study +`. Refer to the :doc:`api_reference` and look through the +:doc:`helpers`. Browse through the `tools `_. Check out the `community contributions `_. diff --git a/drgn/__init__.py b/drgn/__init__.py index 5a03f5a30..7ac341ae8 100644 --- a/drgn/__init__.py +++ b/drgn/__init__.py @@ -44,16 +44,23 @@ import pkgutil import sys import types -from typing import Union +from typing import Any, Dict, Optional, Union from _drgn import ( NULL, + AbsenceReason, Architecture, + DebugInfoOptions, + ExtraModule, FaultError, FindObjectFlags, IntegerLike, + KmodSearchMethod, Language, + MainModule, MissingDebugInfoError, + Module, + ModuleFileStatus, NoDefaultProgramError, Object, ObjectAbsentError, @@ -66,8 +73,11 @@ ProgramFlags, Qualifiers, Register, + RelocatableModule, + SharedLibraryModule, StackFrame, StackTrace, + SupplementaryFileKind, Symbol, SymbolBinding, SymbolIndex, @@ -80,6 +90,8 @@ TypeMember, TypeParameter, TypeTemplateParameter, + VdsoModule, + WantedSupplementaryFile, alignof, cast, container_of, @@ -100,17 +112,27 @@ # isort: split from _drgn import ( # noqa: F401 _elfutils_version as _elfutils_version, + _enable_dlopen_debuginfod as _enable_dlopen_debuginfod, + _have_debuginfod as _have_debuginfod, _with_libkdumpfile as _with_libkdumpfile, + _with_lzma as _with_lzma, ) from drgn.internal.version import __version__ as __version__ # noqa: F401 __all__ = ( + "AbsenceReason", "Architecture", + "DebugInfoOptions", + "ExtraModule", "FaultError", "FindObjectFlags", "IntegerLike", + "KmodSearchMethod", "Language", + "MainModule", "MissingDebugInfoError", + "Module", + "ModuleFileStatus", "NULL", "NoDefaultProgramError", "Object", @@ -124,8 +146,11 @@ "ProgramFlags", "Qualifiers", "Register", + "RelocatableModule", + "SharedLibraryModule", "StackFrame", "StackTrace", + "SupplementaryFileKind", "Symbol", "SymbolBinding", "SymbolIndex", @@ -138,6 +163,8 @@ "TypeMember", "TypeParameter", "TypeTemplateParameter", + "VdsoModule", + "WantedSupplementaryFile", "alignof", "cast", "container_of", @@ -157,15 +184,6 @@ ) -if sys.version_info >= (3, 8): - _open_code = io.open_code # novermin -else: - from typing import BinaryIO - - def _open_code(path: str) -> BinaryIO: - return open(path, "rb") - - # From https://docs.python.org/3/reference/import.html#import-related-module-attributes. _special_globals = frozenset( [ @@ -180,7 +198,7 @@ def _open_code(path: str) -> BinaryIO: ) -def execscript(path: str, *args: str) -> None: +def execscript(path: str, *args: str, globals: Optional[Dict[str, Any]] = None) -> None: """ Execute a script. @@ -222,6 +240,7 @@ def task_exe_path(task): :param path: File path of the script. :param args: Zero or more additional arguments to pass to the script. This is a :ref:`variable argument list `. + :param globals: If provided, globals to use instead of the caller's. """ # This is based on runpy.run_path(), which we can't use because we want to # update globals even if the script throws an exception. @@ -237,16 +256,19 @@ def task_exe_path(task): sys.argv = [path] sys.argv.extend(args) - with _open_code(path) as f: + with io.open_code(path) as f: code = pkgutil.read_code(f) if code is None: - with _open_code(path) as f: + with io.open_code(path) as f: code = compile(f.read(), path, "exec") module.__spec__ = None module.__file__ = path module.__cached__ = None # type: ignore[attr-defined] - caller_globals = sys._getframe(1).f_globals + if globals is not None: + caller_globals = globals + else: + caller_globals = sys._getframe(1).f_globals caller_special_globals = { name: caller_globals[name] for name in _special_globals diff --git a/drgn/cli.py b/drgn/cli.py index 8d3497588..0fff4c10b 100644 --- a/drgn/cli.py +++ b/drgn/cli.py @@ -14,50 +14,75 @@ import runpy import shutil import sys -from typing import Any, Callable, Dict, Optional +from typing import IO, Any, Callable, Dict, Optional, Tuple import drgn from drgn.internal.repl import interact, readline from drgn.internal.rlcompleter import Completer from drgn.internal.sudohelper import open_via_sudo -__all__ = ("run_interactive", "version_header") +__all__ = ("default_globals", "run_interactive", "version_header") logger = logging.getLogger("drgn") +# The list of attributes from the drgn module which are imported and inserted +# into the global namespace for interactive debugging. +_DRGN_GLOBALS = [ + "FaultError", + "NULL", + "Object", + "alignof", + "cast", + "container_of", + "execscript", + "implicit_convert", + "offsetof", + "reinterpret", + "sizeof", + "stack_trace", +] + + +def _is_tty(file: IO[Any]) -> bool: + try: + return os.isatty(file.fileno()) + except (AttributeError, OSError): + return False + class _LogFormatter(logging.Formatter): _LEVELS = ( - (logging.DEBUG, "debug", "36"), - (logging.INFO, "info", "32"), - (logging.WARNING, "warning", "33"), - (logging.ERROR, "error", "31"), - (logging.CRITICAL, "critical", "31;1"), + (logging.DEBUG, "debug", "\033[36m", "\033[m", ""), + (logging.INFO, "info", "\033[32m", "\033[m", ""), + (logging.WARNING, "warning", "\033[33m", "\033[m", ""), + (logging.ERROR, "error", "\033[31m", "\033[m", ""), + (logging.CRITICAL, "critical", "\033[31;1m", "\033[0;1m", "\033[m"), ) def __init__(self, color: bool) -> None: if color: - level_prefixes = { - level: f"\033[{level_color}m{level_name}:\033[0m" - for level, level_name, level_color in self._LEVELS + levels = { + level: (f"{level_prefix}{level_name}:{message_prefix}", message_suffix) + for level, level_name, level_prefix, message_prefix, message_suffix in self._LEVELS } else: - level_prefixes = { - level: f"{level_name}:" for level, level_name, _ in self._LEVELS + levels = { + level: (f"{level_name}:", "") + for level, level_name, _, _, _ in self._LEVELS } default_prefix = "%(levelname)s:" self._drgn_formatters = { - level: logging.Formatter(f"{prefix} %(message)s") - for level, prefix in level_prefixes.items() + level: logging.Formatter(f"{prefix} %(message)s{suffix}") + for level, (prefix, suffix) in levels.items() } self._default_drgn_formatter = logging.Formatter( f"{default_prefix} %(message)s" ) self._other_formatters = { - level: logging.Formatter(f"{prefix}%(name)s: %(message)s") - for level, prefix in level_prefixes.items() + level: logging.Formatter(f"{prefix}%(name)s: %(message)s{suffix}") + for level, (prefix, suffix) in levels.items() } self._default_other_formatter = logging.Formatter( f"{default_prefix}%(name)s: %(message)s" @@ -85,21 +110,39 @@ def version_header() -> str: calling :func:`run_interactive()`. """ python_version = ".".join(str(v) for v in sys.version_info[:3]) + debuginfod = f'with{"" if drgn._have_debuginfod else "out"} debuginfod' + if drgn._enable_dlopen_debuginfod: + debuginfod += " (dlopen)" libkdumpfile = f'with{"" if drgn._with_libkdumpfile else "out"} libkdumpfile' - return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {libkdumpfile})" + lzma = f'with{"" if drgn._with_lzma else "out"} lzma' + return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {debuginfod}, {libkdumpfile}, {lzma})" -class _QuietAction(argparse.Action): - def __init__( - self, option_strings: Any, dest: Any, nargs: Any = 0, **kwds: Any - ) -> None: - super().__init__(option_strings, dest, nargs=nargs, **kwds) +def default_globals(prog: drgn.Program) -> Dict[str, Any]: + """ + Return the default globals for an interactive drgn session - def __call__( - self, parser: Any, namespace: Any, values: Any, option_string: Any = None - ) -> None: - setattr(namespace, self.dest, True) - namespace.log_level = "none" + :param prog: the program which will be debugged + :return: a dict of globals + """ + # Don't forget to update the default banner in run_interactive() + # with any new additions. + init_globals: Dict[str, Any] = { + "prog": prog, + "drgn": drgn, + "__name__": "__main__", + "__doc__": None, + } + for attr in _DRGN_GLOBALS: + init_globals[attr] = getattr(drgn, attr) + module = importlib.import_module("drgn.helpers.common") + for name in module.__dict__["__all__"]: + init_globals[name] = getattr(module, name) + if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: + module = importlib.import_module("drgn.helpers.linux") + for name in module.__dict__["__all__"]: + init_globals[name] = getattr(module, name) + return init_globals def _identify_script(path: str) -> str: @@ -159,11 +202,175 @@ def _displayhook(value: Any) -> None: setattr(builtins, "_", value) +def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: + return { + option: ("try_" + option.replace("-", "_"), value) + for option in ( + "module-name", + "build-id", + "debug-link", + "procfs", + "embedded-vdso", + "reuse", + "supplementary", + ) + } + + +class _TrySymbolsByBaseAction(argparse.Action): + _enable: bool + _finder = ("disable_debug_info_finders", "enable_debug_info_finders") + + _options = ( + { + **_bool_options(False), + "kmod": ("try_kmod", drgn.KmodSearchMethod.NONE), + }, + { + **_bool_options(True), + "kmod=depmod": ("try_kmod", drgn.KmodSearchMethod.DEPMOD), + "kmod=walk": ("try_kmod", drgn.KmodSearchMethod.WALK), + "kmod=depmod-or-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_OR_WALK), + "kmod=depmod-and-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_AND_WALK), + }, + ) + + def __init__(self, *args: Any, **kwargs: Any) -> None: + kwargs["dest"] = argparse.SUPPRESS + super().__init__(*args, **kwargs) + + def __call__( + self, + parser: argparse.ArgumentParser, + namespace: argparse.Namespace, + values: Any, + option_string: Optional[str] = None, + ) -> None: + for value in values.split(","): + try: + option_name, option_value = self._options[self._enable][value] + except KeyError: + # Raise an error if passed an option meant for the opposite + # argument. + if value in self._options[not self._enable]: + raise argparse.ArgumentError(self, f"invalid option: {value!r}") + + if not hasattr(namespace, self._finder[self._enable]): + setattr(namespace, self._finder[self._enable], {}) + getattr(namespace, self._finder[self._enable])[value] = None + + if hasattr(namespace, self._finder[not self._enable]): + getattr(namespace, self._finder[not self._enable]).pop(value, None) + else: + if not hasattr(namespace, "debug_info_options"): + namespace.debug_info_options = {} + namespace.debug_info_options[option_name] = option_value + + +class _TrySymbolsByAction(_TrySymbolsByBaseAction): + _enable = True + + +class _NoSymbolsByAction(_TrySymbolsByBaseAction): + _enable = False + + +def _load_debugging_symbols(prog: drgn.Program, args: argparse.Namespace) -> None: + enable_debug_info_finders = getattr(args, "enable_debug_info_finders", ()) + disable_debug_info_finders = getattr(args, "disable_debug_info_finders", ()) + if enable_debug_info_finders or disable_debug_info_finders: + debug_info_finders = prog.enabled_debug_info_finders() + registered_debug_info_finders = prog.registered_debug_info_finders() + + unknown_finders = [] + + for finder in enable_debug_info_finders: + if finder not in debug_info_finders: + if finder in registered_debug_info_finders: + debug_info_finders.append(finder) + else: + unknown_finders.append(finder) + + for finder in disable_debug_info_finders: + try: + debug_info_finders.remove(finder) + except ValueError: + if finder not in registered_debug_info_finders: + unknown_finders.append(finder) + + if unknown_finders: + if len(unknown_finders) == 1: + unknown_finders_repr = repr(unknown_finders[0]) + elif len(unknown_finders) == 2: + unknown_finders_repr = ( + f"{unknown_finders[0]!r} or {unknown_finders[1]!r}" + ) + elif len(unknown_finders) > 2: + unknown_finders = [repr(finder) for finder in unknown_finders] + unknown_finders[-1] = "or " + unknown_finders[-1] + unknown_finders_repr = ", ".join(unknown_finders) + logger.warning( + "no matching debugging information finders or options for %s", + unknown_finders_repr, + ) + + prog.set_enabled_debug_info_finders(debug_info_finders) + + debug_info_options = getattr(args, "debug_info_options", None) + if debug_info_options: + for option, value in debug_info_options.items(): + setattr(prog.debug_info_options, option, value) + + def directories_option(arg_name: str, option_name: Optional[str] = None) -> None: + if option_name is None: + option_name = arg_name + arg = getattr(args, arg_name) + no_default = getattr(args, "no_default_" + arg_name) + if arg is not None: + if no_default: + setattr(prog.debug_info_options, option_name, arg) + else: + setattr( + prog.debug_info_options, + option_name, + tuple(arg) + getattr(prog.debug_info_options, option_name), + ) + elif no_default: + setattr(prog.debug_info_options, option_name, ()) + + directories_option("debug_directories", "directories") + directories_option("debug_link_directories") + directories_option("kernel_directories") + + if args.default_symbols is None: + args.default_symbols = {"default": True, "main": True} + try: + prog.load_debug_info(args.symbols, **args.default_symbols) + except drgn.MissingDebugInfoError as e: + if args.default_symbols.get("main"): + try: + main_module = prog.main_module() + critical = ( + main_module.wants_debug_file() or main_module.wants_loaded_file() + ) + except LookupError: + critical = True + else: + critical = False + logger.log(logging.CRITICAL if critical else logging.WARNING, "%s", e) + + if args.extra_symbols: + for extra_symbol_path in args.extra_symbols: + extra_symbol_path = os.path.abspath(extra_symbol_path) + prog.extra_module(extra_symbol_path, create=True).try_file( + extra_symbol_path + ) + + def _main() -> None: handler = logging.StreamHandler() - handler.setFormatter( - _LogFormatter(hasattr(sys.stderr, "fileno") and os.isatty(sys.stderr.fileno())) - ) + color = _is_tty(sys.stderr) + handler.setFormatter(_LogFormatter(color)) logging.getLogger().addHandler(handler) version = version_header() @@ -193,7 +400,9 @@ def _main() -> None: metavar="PATH", type=str, action="append", - help="load additional debugging symbols from the given file; this option may be given more than once", + help="load debugging symbols from the given file. " + "If the file does not correspond to a loaded executable, library, or module, " + "then it is ignored. This option may be given more than once", ) default_symbols_group = symbol_group.add_mutually_exclusive_group() default_symbols_group.add_argument( @@ -201,15 +410,97 @@ def _main() -> None: dest="default_symbols", action="store_const", const={"main": True}, - help="only load debugging symbols for the main executable and those added with -s; " - "for userspace programs, this is currently equivalent to --no-default-symbols", + help="only load debugging symbols for the main executable " + "and those added with -s or --extra-symbols", ) default_symbols_group.add_argument( "--no-default-symbols", dest="default_symbols", action="store_const", const={}, - help="don't load any debugging symbols that were not explicitly added with -s", + help="don't load any debugging symbols that were not explicitly added " + "with -s or --extra-symbols", + ) + symbol_group.add_argument( + "--extra-symbols", + metavar="PATH", + type=str, + action="append", + help="load additional debugging symbols from the given file, " + "which is assumed not to correspond to a loaded executable, library, or module. " + "This option may be given more than once", + ) + symbol_group.add_argument( + "--try-symbols-by", + metavar="METHOD[,METHOD...]", + action=_TrySymbolsByAction, + help="enable loading debugging symbols using the given methods. " + "Choices are debugging information finder names " + "(standard, debuginfod, or any added by plugins) " + "or debugging information options (" + + ", ".join(_TrySymbolsByBaseAction._options[True]) + + "). " + "This option may be given more than once", + ) + symbol_group.add_argument( + "--no-symbols-by", + metavar="METHOD[,METHOD...]", + action=_NoSymbolsByAction, + help="disable loading debugging symbols using the given methods. " + "Choices are debugging information finder names " + "(standard, debuginfod, or any added by plugins) " + "or debugging information options (" + + ", ".join(_TrySymbolsByBaseAction._options[False]) + + "). " + "This option may be given more than once", + ) + + directories_group = parser.add_argument_group("debugging symbol directories") + directories_group.add_argument( + "--debug-directory", + dest="debug_directories", + metavar="PATH", + type=str, + action="append", + help="search for debugging symbols in the given directory. " + "This option may be given more than once", + ) + directories_group.add_argument( + "--no-default-debug-directories", + action="store_true", + help="don't search for debugging symbols " + "in the standard directories or those added by plugins", + ) + directories_group.add_argument( + "--debug-link-directory", + dest="debug_link_directories", + metavar="PATH", + type=str, + action="append", + help="search for debugging symbols by debug link in the given directory. " + "$ORIGIN is replaced with the directory containing the loaded file. " + "This option may be given more than once", + ) + directories_group.add_argument( + "--no-default-debug-link-directories", + action="store_true", + help="don't search for debugging symbols by debug link " + "in the standard directories or those added by plugins", + ) + directories_group.add_argument( + "--kernel-directory", + dest="kernel_directories", + metavar="PATH", + type=str, + action="append", + help="search for the kernel image and loadable kernel modules in the given directory. " + "This option may be given more than once", + ) + directories_group.add_argument( + "--no-default-kernel-directories", + action="store_true", + help="don't search for the kernel image and loadable kernel modules " + "in the standard directories or those added by plugins", ) advanced_group = parser.add_argument_group("advanced") @@ -235,41 +526,50 @@ def _main() -> None: parser.add_argument( "-q", "--quiet", - action=_QuietAction, + dest="log_level", + action="store_const", + const="none", help="don't print any logs or download progress", ) parser.add_argument( - "script", + "-e", + dest="exec", + metavar="CODE", + help="an expression or statement to evaluate, instead of running in interactive mode", + ) + parser.add_argument( + "args", metavar="ARG", type=str, nargs=argparse.REMAINDER, - help="script to execute instead of running in interactive mode", + help="script to execute instead of running in interactive mode " + "(unless -e is given) and arguments to pass", ) parser.add_argument("--version", action="version", version=version) args = parser.parse_args() - if args.script: + script = bool(args.exec is None and args.args) + interactive = bool(args.exec is None and not args.args and _is_tty(sys.stdin)) + if script: # A common mistake users make is running drgn $core_dump, which tries # to run $core_dump as a Python script. Rather than failing later with # some inscrutable syntax or encoding error, try to catch this early # and provide a helpful message. try: - script_type = _identify_script(args.script[0]) + script_type = _identify_script(args.args[0]) except OSError as e: sys.exit(str(e)) if script_type == "core": sys.exit( - f"error: {args.script[0]} is a core dump\n" - f'Did you mean "-c {args.script[0]}"?' + f"error: {args.args[0]} is a core dump\n" + f'Did you mean "-c {args.args[0]}"?' ) elif script_type == "elf": - sys.exit(f"error: {args.script[0]} is a binary, not a drgn script") - else: + sys.exit(f"error: {args.args[0]} is a binary, not a drgn script") + elif interactive: print(version, file=sys.stderr, flush=True) - if not args.quiet: - os.environ["DEBUGINFOD_PROGRESS"] = "1" if args.log_level == "none": logger.setLevel(logging.CRITICAL + 1) else: @@ -311,22 +611,30 @@ def _main() -> None: # E.g., "not an ELF core file" sys.exit(f"error: {e}") - if args.default_symbols is None: - args.default_symbols = {"default": True, "main": True} - try: - prog.load_debug_info(args.symbols, **args.default_symbols) - except drgn.MissingDebugInfoError as e: - logger.warning("%s", e) + _load_debugging_symbols(prog, args) - if args.script: - sys.argv = args.script - script = args.script[0] - if pkgutil.get_importer(script) is None: - sys.path.insert(0, os.path.dirname(os.path.abspath(script))) - drgn.set_default_prog(prog) - runpy.run_path(script, init_globals={"prog": prog}, run_name="__main__") - else: + if interactive: run_interactive(prog) + else: + drgn.set_default_prog(prog) + if script: + sys.argv = args.args + script_path = args.args[0] + if pkgutil.get_importer(script_path) is None: + sys.path.insert(0, os.path.dirname(os.path.abspath(script_path))) + runpy.run_path( + script_path, init_globals={"prog": prog}, run_name="__main__" + ) + else: + sys.path.insert(0, "") + exec_globals = default_globals(prog) + if args.exec is None: + sys.argv = [""] + exec_globals["__file__"] = "" + exec(compile(sys.stdin.read(), "", "exec"), exec_globals) + else: + sys.argv = ["-e"] + args.args + exec(args.exec, exec_globals) def run_interactive( @@ -364,44 +672,14 @@ def run_interactive( function, applications should restore their history and settings before using ``readline``. """ - init_globals: Dict[str, Any] = { - "prog": prog, - "drgn": drgn, - "__name__": "__main__", - "__doc__": None, - } - drgn_globals = [ - "FaultError", - "NULL", - "Object", - "alignof", - "cast", - "container_of", - "execscript", - "implicit_convert", - "offsetof", - "reinterpret", - "sizeof", - "stack_trace", - ] - for attr in drgn_globals: - init_globals[attr] = getattr(drgn, attr) - + init_globals = default_globals(prog) banner = f"""\ For help, type help(drgn). >>> import drgn ->>> from drgn import {", ".join(drgn_globals)} +>>> from drgn import {", ".join(_DRGN_GLOBALS)} >>> from drgn.helpers.common import *""" - - module = importlib.import_module("drgn.helpers.common") - for name in module.__dict__["__all__"]: - init_globals[name] = getattr(module, name) if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: banner += "\n>>> from drgn.helpers.linux import *" - module = importlib.import_module("drgn.helpers.linux") - for name in module.__dict__["__all__"]: - init_globals[name] = getattr(module, name) - if banner_func: banner = banner_func(banner) if globals_func: diff --git a/drgn/helpers/__init__.py b/drgn/helpers/__init__.py index 6c25b75fc..32173d0b5 100644 --- a/drgn/helpers/__init__.py +++ b/drgn/helpers/__init__.py @@ -14,6 +14,17 @@ class ValidationError(Exception): """ - Error raised by a :ref:`validator ` when an inconsistent or - invalid state is detected. + Error raised by a validator when an inconsistent or invalid state is + detected. + + Validators are a special category of helpers that check the consistency of + a data structure. In general, helpers assume that the data structures that + they examine are valid. Validators do not make this assumption and do + additional (potentially expensive) checks to detect broken invariants, + corruption, etc. + + Validators raise :class:`drgn.helpers.ValidationError` if the data + structure is not valid or :class:`drgn.FaultError` if the data structure is + invalid in a way that causes a bad memory access. They have names prefixed + with ``validate_``. """ diff --git a/drgn/helpers/common/format.py b/drgn/helpers/common/format.py index 489ff08bf..2da428dac 100644 --- a/drgn/helpers/common/format.py +++ b/drgn/helpers/common/format.py @@ -9,16 +9,31 @@ formatting different things as text. """ -from typing import Iterable, SupportsFloat, Tuple +import re +from typing import ( + TYPE_CHECKING, + Any, + Iterable, + List, + Optional, + Sequence, + SupportsFloat, + Tuple, +) from drgn import IntegerLike, Type +if TYPE_CHECKING: + from _typeshed import SupportsWrite + __all__ = ( + "CellFormat", "decode_enum_type_flags", "decode_flags", "escape_ascii_character", "escape_ascii_string", "number_in_binary_units", + "print_table", ) @@ -238,3 +253,105 @@ def number_in_binary_units(n: SupportsFloat, precision: int = 1) -> str: if n.is_integer(): precision = 0 return f"{n:.{precision}f}{prefix}" + + +def print_table( + rows: Sequence[Sequence[Any]], + *, + sep: str = " ", + file: "Optional[SupportsWrite[str]]" = None, +) -> None: + """ + Print data as a table. + + The input is given as a sequence (e.g., :class:`list` or :class:`tuple`) of + rows, where each row is a sequence of values. Rows can have different + lengths. + + >>> print_table([[2, 2000, 4], ["", 3, 13, 19]]) + 2 2000 4 + 3 13 19 + + By default, numbers are right-aligned and most other objects are + left-aligned. This (and other format options) can be changed by wrapping + the value in a :class:`CellFormat`. + + >>> print_table( + ... [ + ... ["DECIMAL", "HEXADECIMAL"], + ... [CellFormat(10, "<"), CellFormat(10, " + (?: + .? # fill + [<>=^] # align + )? + [-+ ]? # sign + z? + [#]? + 0? + ) + (?P[0-9]+)? + (?P + [,_]? # grouping + (?:\.[0-9])? # precision + [bcdeEfFgGnosxX%]? # type + ) + """, + flags=re.VERBOSE, + ) + + def __init__(self, value: Any, format_spec: str) -> None: + """ + Wrap a value with additional format options to apply when it is + formatted by :func:`print_table()`. + + :param value: Value to wrap. + :param format_spec: :ref:`Format specification `. It may + not specify a width. + """ + self._value = value + match = self._FORMAT_SPEC_RE.fullmatch(format_spec) + if not match: + raise ValueError(f"invalid format_spec {format_spec!r}") + if match.group("width"): + raise ValueError("format_spec must not have width") + self._options = match.group("options") + self._rest = match.group("rest") + + def __str__(self) -> str: + return f"{self._value:{self._options}{self._rest}}" + + def __format__(self, format_spec: str) -> str: + return f"{self._value:{self._options}{format_spec}{self._rest}}" diff --git a/drgn/helpers/common/memory.py b/drgn/helpers/common/memory.py index d73d5aa26..1204d4bc9 100644 --- a/drgn/helpers/common/memory.py +++ b/drgn/helpers/common/memory.py @@ -10,8 +10,7 @@ import operator import os -import typing -from typing import Any, Dict, Optional +from typing import Any, Dict, Literal, Optional import drgn from drgn import FaultError, IntegerLike, Object, PlatformFlags, Program, SymbolKind @@ -216,7 +215,7 @@ def print_annotated_memory( # The platform must be known if we were able to read memory. assert prog.platform is not None - byteorder: 'typing.Literal["little", "big"]' + byteorder: Literal["little", "big"] if prog.platform.flags & PlatformFlags.IS_LITTLE_ENDIAN: byteorder = "little" else: diff --git a/drgn/helpers/common/prog.py b/drgn/helpers/common/prog.py index 54349534d..e739cc5ad 100644 --- a/drgn/helpers/common/prog.py +++ b/drgn/helpers/common/prog.py @@ -114,11 +114,11 @@ def wrapper(*args: "Any", **kwds: "Any") -> "R": elif isinstance(args[0], Object): return f(args[0].prog_, *args, **kwds) elif "prog" in kwds: - return f(**kwds) + return f(**kwds) # type: ignore elif param1 in kwds: arg1 = kwds[param1] if isinstance(arg1, Object): - return f(arg1.prog_, **kwds) + return f(arg1.prog_, **kwds) # type: ignore return f(get_default_prog(), *args, **kwds) # Update the docstring for pydoc. diff --git a/drgn/helpers/experimental/kmodify.py b/drgn/helpers/experimental/kmodify.py index 8840d9b68..2c76c9a00 100644 --- a/drgn/helpers/experimental/kmodify.py +++ b/drgn/helpers/experimental/kmodify.py @@ -400,7 +400,7 @@ def relocation_data(relocations: Sequence[_ElfRelocation]) -> bytes: class _Integer: def __init__(self, size: int, value: IntegerLike) -> None: self.size = size - self.value = operator.index(value) & ((1 << (size * 8)) - 1) + self.value = operator.index(value) class _Symbol(NamedTuple): @@ -503,7 +503,8 @@ def leave_frame(self) -> None: b"\xC3" ) - def _mov_imm(self, value: int, reg: int) -> None: + def _mov_imm(self, i: _Integer, reg: int, sign_extend_bits: int = 0) -> None: + value = i.value & ((1 << max(i.size * 8, sign_extend_bits)) - 1) assert value >= 0 and value <= 0xFFFFFFFFFFFFFFFF assert reg < 16 if value <= 0xFFFFFFFF: @@ -556,7 +557,8 @@ def _store_rax_on_stack(self, offset: int) -> None: self.code.extend(b"\x48\x89\x84\x24") self.code.extend(offset.to_bytes(4, "little", signed=True)) - def _store_imm_on_stack(self, value: int, offset: int) -> None: + def _store_imm_on_stack(self, i: _Integer, offset: int) -> None: + value = i.value & ((1 << max(i.size * 8, 64)) - 1) if (0 <= value <= 0x7FFFFFFF) or ( 0xFFFFFFFF80000000 <= value <= 0xFFFFFFFFFFFFFFFF ): @@ -571,7 +573,7 @@ def _store_imm_on_stack(self, value: int, offset: int) -> None: self.code.extend(offset.to_bytes(4, "little", signed=True)) self.code.extend((value & 0xFFFFFFFF).to_bytes(4, "little")) else: - self._mov_imm(value, self._rax) + self._mov_imm(i, self._rax, 64) self._store_rax_on_stack(offset) def _store_symbol_on_stack(self, sym: _Symbol, offset: int) -> None: @@ -583,13 +585,21 @@ def call(self, func: _Symbol, args: Sequence[Union[_Integer, _Symbol]]) -> None: if i < len(self._argument_registers): reg = self._argument_registers[i] if isinstance(arg, _Integer): - self._mov_imm(arg.value, reg) + # Clang/LLVM as of version 19 relies on <32-bit arguments + # being sign-extended to 32 bits despite this not being + # guaranteed by the psABI. It's unclear whether this will + # be resolved by changing LLVM or the psABI, so work around + # it for now. See: + # https://groups.google.com/g/x86-64-abi/c/h7FFh30oS3s/m/Gksanh3WAAAJ + # https://github.com/llvm/llvm-project/issues/12579 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46942 + self._mov_imm(arg, reg, 32) else: self._mov_symbol(arg, reg) else: stack_offset = 8 * (i - len(self._argument_registers)) if isinstance(arg, _Integer): - self._store_imm_on_stack(arg.value, stack_offset) + self._store_imm_on_stack(arg, stack_offset) else: self._store_symbol_on_stack(arg, stack_offset) @@ -638,7 +648,7 @@ def return_(self, value: _Integer, last: bool) -> None: raise NotImplementedError( "return values larger than 8 bytes not implemented" ) - self._mov_imm(value.value, self._rax) + self._mov_imm(value, self._rax) # Jump to the function epilogue. If this return is the last operation, # we can fall through instead of jumping. if not last: @@ -654,7 +664,7 @@ def return_if_last_return_value_nonzero(self, value: _Integer) -> None: ) # mov %rax, %rdx self.code.extend(b"\x48\x89\xC2") - self._mov_imm(value.value, self._rax) + self._mov_imm(value, self._rax) # Jump to the function epilogue if the last return value was non-zero. self.code.extend( # test %rdx, %rdx @@ -1147,6 +1157,12 @@ def call_function(prog: Program, func: Union[str, Object], *args: Any) -> Object """ Call a function in the kernel. + >>> task = find_task(99) + >>> if task: + ... call_function("wake_up_process", task) + ... + (int)1 + Arguments can be either :class:`~drgn.Object`\\ s or Python values. The function return value is returned as an :class:`~drgn.Object`: @@ -1336,13 +1352,12 @@ def align_data(alignment: int) -> None: call_args.append(_Symbol(".data", section=True, offset=len(data))) data.extend(value) else: - if isinstance(arg, Object): - if i < len(func_type.parameters): - arg = implicit_convert(func_type.parameters[i].type, arg) - else: - arg = _default_argument_promotions(arg) - else: + if not isinstance(arg, Object): arg = Object(prog, value=arg) + if i < len(func_type.parameters): + arg = implicit_convert(func_type.parameters[i].type, arg) + else: + arg = _default_argument_promotions(arg) type = _underlying_type(arg.type_) if type.kind not in { diff --git a/drgn/helpers/linux/bitmap.py b/drgn/helpers/linux/bitmap.py new file mode 100644 index 000000000..08aaf2287 --- /dev/null +++ b/drgn/helpers/linux/bitmap.py @@ -0,0 +1,56 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +""" +Bitmaps +------- + +The ``drgn.helpers.linux.bitmap`` module provides helpers for working with +bitmaps from :linux:`include/linux/bitmap.h`. + +The following helpers from :mod:`drgn.helpers.linux.bitops` also apply to +bitmaps: + +* :func:`~drgn.helpers.linux.bitops.for_each_set_bit()` +* :func:`~drgn.helpers.linux.bitops.for_each_clear_bit()` +* :func:`~drgn.helpers.linux.bitops.test_bit()` +""" + +import operator +import sys + +from drgn import IntegerLike, Object, sizeof + +__all__ = ("bitmap_weight",) + +if sys.version_info >= (3, 10): + _bit_count = int.bit_count # novermin +else: + + # Fallback for old Python versions. Surprisingly, this is faster than any + # bit manipulation tricks. + def _bit_count(n: int) -> int: + return bin(n).count("1") + + +def bitmap_weight(bitmap: Object, size: IntegerLike) -> int: + """ + Return the number of set (one) bits in a bitmap + + :param bitmap: ``unsigned long *`` + :param size: Size of *bitmap* in bits. + """ + size = operator.index(size) + word_bits = 8 * sizeof(bitmap.type_.type) + + weight = 0 + for i in range(size // word_bits): + weight += _bit_count(bitmap[i].value_()) + + last_word_bits = size % word_bits + if last_word_bits: + weight += _bit_count( + bitmap[size // word_bits].value_() & ((1 << last_word_bits) - 1) + ) + + return weight diff --git a/drgn/helpers/linux/bitops.py b/drgn/helpers/linux/bitops.py index 1bf5dbbd4..3f19e3c70 100644 --- a/drgn/helpers/linux/bitops.py +++ b/drgn/helpers/linux/bitops.py @@ -9,6 +9,7 @@ operations in the Linux kernel. """ +import operator from typing import Iterator from drgn import IntegerLike, Object, sizeof @@ -27,7 +28,7 @@ def for_each_set_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: :param bitmap: ``unsigned long *`` :param size: Size of *bitmap* in bits. """ - size = int(size) + size = operator.index(size) word_bits = 8 * sizeof(bitmap.type_.type) for i in range((size + word_bits - 1) // word_bits): word = bitmap[i].value_() @@ -43,7 +44,7 @@ def for_each_clear_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: :param bitmap: ``unsigned long *`` :param size: Size of *bitmap* in bits. """ - size = int(size) + size = operator.index(size) word_bits = 8 * sizeof(bitmap.type_.type) for i in range((size + word_bits - 1) // word_bits): word = bitmap[i].value_() @@ -59,6 +60,6 @@ def test_bit(nr: IntegerLike, bitmap: Object) -> bool: :param nr: Bit number. :param bitmap: ``unsigned long *`` """ - nr = int(nr) + nr = operator.index(nr) word_bits = 8 * sizeof(bitmap.type_.type) return ((bitmap[nr // word_bits].value_() >> (nr & (word_bits - 1))) & 1) != 0 diff --git a/drgn/helpers/linux/cpumask.py b/drgn/helpers/linux/cpumask.py index 414e3166e..5a38e3bfc 100644 --- a/drgn/helpers/linux/cpumask.py +++ b/drgn/helpers/linux/cpumask.py @@ -13,6 +13,7 @@ from drgn import Object, Program from drgn.helpers.common.prog import takes_program_or_default +from drgn.helpers.linux.bitmap import bitmap_weight from drgn.helpers.linux.bitops import for_each_set_bit __all__ = ( @@ -20,10 +21,14 @@ "cpu_possible_mask", "cpu_present_mask", "cpumask_to_cpulist", + "cpumask_weight", "for_each_cpu", "for_each_online_cpu", "for_each_possible_cpu", "for_each_present_cpu", + "num_online_cpus", + "num_possible_cpus", + "num_present_cpus", ) @@ -100,6 +105,40 @@ def for_each_present_cpu(prog: Program) -> Iterator[int]: return for_each_cpu(cpu_present_mask(prog)) +def cpumask_weight(mask: Object) -> int: + """ + Return the number of CPUs in the given mask + + :param mask: ``struct cpumask *`` + """ + try: + nr_cpu_ids = mask.prog_["nr_cpu_ids"].value_() + except KeyError: + nr_cpu_ids = 1 + return bitmap_weight(mask.bits, nr_cpu_ids) + + +@takes_program_or_default +def num_online_cpus(prog: Program) -> int: + """Return the number of online CPUs.""" + # Since Linux kernel commit 0c09ab96fc82 ("cpu/hotplug: Cache number of + # online CPUs") (in v5.4), there's a cached atomic_t __num_online_cpus, but + # it's not worth it to do the version detection. + return cpumask_weight(cpu_online_mask(prog)) + + +@takes_program_or_default +def num_possible_cpus(prog: Program) -> int: + """Return the number of possible CPUs.""" + return cpumask_weight(cpu_possible_mask(prog)) + + +@takes_program_or_default +def num_present_cpus(prog: Program) -> int: + """Return the number of present CPUs.""" + return cpumask_weight(cpu_present_mask(prog)) + + def cpumask_to_cpulist(mask: Object) -> str: """ Return a CPU mask as a CPU list string. diff --git a/drgn/helpers/linux/fs.py b/drgn/helpers/linux/fs.py index 7424db96e..808a3972b 100644 --- a/drgn/helpers/linux/fs.py +++ b/drgn/helpers/linux/fs.py @@ -40,9 +40,22 @@ def _follow_mount(mnt: Object, dentry: Object) -> Tuple[Object, Object]: - # DCACHE_MOUNTED is a macro, so we can't easily get the value. But, it - # hasn't changed since v2.6.38, so let's hardcode it for now. - DCACHE_MOUNTED = 0x10000 + prog = dentry.prog_ + try: + DCACHE_MOUNTED = prog.cache["DCACHE_MOUNTED"] + except KeyError: + tokens = prog["UTS_RELEASE"].string_().split(b".", 2) + major, minor = int(tokens[0]), int(tokens[1]) + # Linux kernel commit 9748cb2dc393 ("VFS: repack DENTRY_ flags.") (in + # v6.15) changed the value of DCACHE_MOUNTED. Unfortunately, it's a + # macro, so we have to hardcode it based on a version check until it's + # converted to an enum. + if (major, minor) >= (6, 15): + DCACHE_MOUNTED = 1 << 15 + else: + DCACHE_MOUNTED = 1 << 16 + prog.cache["DCACHE_MOUNTED"] = DCACHE_MOUNTED + while dentry.d_flags & DCACHE_MOUNTED: for mounted in list_for_each_entry( "struct mount", mnt.mnt_mounts.address_of_(), "mnt_child" @@ -361,7 +374,8 @@ def for_each_mount( ) for mnt in mounts: if ( - (src is None or mount_src(mnt) == src) + mnt.mnt.mnt_sb # skip cursors (v5.8 - v6.8) + and (src is None or mount_src(mnt) == src) and (dst is None or mount_dst(mnt) == dst) and (fstype is None or mount_fstype(mnt) == fstype) ): @@ -383,7 +397,7 @@ def print_mounts( but prints the value of each ``struct mount *``. """ for mnt in for_each_mount( - prog if ns is None else ns, # type: ignore # python/mypy#12056 + prog if ns is None else ns, src=src, dst=dst, fstype=fstype, diff --git a/drgn/helpers/linux/kallsyms.py b/drgn/helpers/linux/kallsyms.py index 66df44f2b..8d4eecf09 100644 --- a/drgn/helpers/linux/kallsyms.py +++ b/drgn/helpers/linux/kallsyms.py @@ -149,7 +149,7 @@ def _elf_sym_to_symbol(name: str, obj: Object, has_typetab: bool) -> Symbol: binding, kind = _st_info_to_binding_kind(obj.st_info.value_()) else: binding, kind = _nm_type_to_binding_kind(chr(obj.st_info.value_())) - return Symbol( # type: ignore + return Symbol( name, obj.st_value.value_(), obj.st_size.value_(), diff --git a/drgn/helpers/linux/kernfs.py b/drgn/helpers/linux/kernfs.py index b60c958f3..5a1604b85 100644 --- a/drgn/helpers/linux/kernfs.py +++ b/drgn/helpers/linux/kernfs.py @@ -16,11 +16,41 @@ __all__ = ( "kernfs_name", + "kernfs_parent", "kernfs_path", + "kernfs_root", "kernfs_walk", ) +def kernfs_root(kn: Object) -> Object: + """ + Get the kernfs root that the given kernfs node belongs to. + + :param kn: ``struct kernfs_node *`` + :return: ``struct kernfs_root *`` + """ + knp = kernfs_parent(kn) + if knp: + kn = knp + return kn.dir.root.read_() + + +def kernfs_parent(kn: Object) -> Object: + """ + Get the parent of the given kernfs node. + + :param kn: ``struct kernfs_node *`` + :return: ``struct kernfs_node *`` + """ + # Linux kernel commit 633488947ef6 ("kernfs: Use RCU to access + # kernfs_node::parent.") (in v6.15) renamed the parent member. + try: + return kn.__parent.read_() + except AttributeError: + return kn.parent.read_() + + def kernfs_name(kn: Object) -> bytes: """ Get the name of the given kernfs node. @@ -29,13 +59,7 @@ def kernfs_name(kn: Object) -> bytes: """ if not kn: return b"(null)" - return kn.name.string_() if kn.parent else b"/" - - -def _kernfs_root(kn: Object) -> Object: - if kn.parent: - kn = kn.parent - return kn.dir.root + return kn.name.string_() if kernfs_parent(kn) else b"/" def kernfs_path(kn: Object) -> bytes: @@ -47,14 +71,14 @@ def kernfs_path(kn: Object) -> bytes: if not kn: return b"(null)" - root_kn = _kernfs_root(kn).kn + root_kn = kernfs_root(kn).kn if kn == root_kn: return b"/" names = [] while kn != root_kn: names.append(kn.name.string_()) - kn = kn.parent + kn = kernfs_parent(kn) names.append(root_kn.name.string_()) names.reverse() diff --git a/drgn/helpers/linux/kthread.py b/drgn/helpers/linux/kthread.py new file mode 100644 index 000000000..cfa6d8340 --- /dev/null +++ b/drgn/helpers/linux/kthread.py @@ -0,0 +1,61 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +""" +Kernel Threads +-------------- + +The ``drgn.helpers.linux.kthread`` module provides helpers for working with +Linux kernel threads, a.k.a. kthreads. +""" + +from drgn import Object, cast, container_of + +__all__ = ( + "kthread_data", + "to_kthread", +) + + +def to_kthread(task: Object) -> Object: + """ + Get the kthread information for a task. + + >>> to_kthread(find_task(3)) + *(struct kthread *)0xffff8ef600191580 = { + ... + .threadfn = (int (*)(void *))kthread_worker_fn+0x0 = 0xffffffffba1e61b0, + .full_name = (char *)0xffff8ef6003d4ac0 = "pool_workqueue_release", + } + + :param task: ``struct task *`` + :return: ``struct kthread *`` + """ + try: + # Since Linux kernel commit e32cf5dfbe22 ("kthread: Generalize + # pf_io_worker so it can point to struct kthread") (in v5.17), the + # struct kthread * is in task->worker_private. + return cast("struct kthread *", task.worker_private) + except AttributeError: + if "free_kthread_struct" in task.prog_: + # Between that and Linux kernel commit 1da5c46fa965 ("kthread: Make + # struct kthread kmalloc'ed") (in v4.10), it is in + # task->set_child_tid. Unfortunately we can only distinguish this + # by looking for another function added in that commit. + return cast("struct kthread *", task.set_child_tid) + else: + # Before that, task->vfork_done points to kthread->exited. + return container_of(task.vfork_done, "struct kthread", "exited") + + +def kthread_data(task: Object) -> Object: + """ + Get the data that was specified when a kthread was created. + + >>> kthread_data(find_task(3)) + (void *)0xffff8ef6001812c0 + + :param task: ``struct task *`` + :return: ``void *`` + """ + return to_kthread(task).data.read_() diff --git a/drgn/helpers/linux/list.py b/drgn/helpers/linux/list.py index 807afd3c4..8511476b3 100644 --- a/drgn/helpers/linux/list.py +++ b/drgn/helpers/linux/list.py @@ -204,6 +204,9 @@ def validate_list(head: Object) -> None: """ Validate that the ``next`` and ``prev`` pointers in a list are consistent. + >>> validate_list(prog["my_list"].address_of_()) + drgn.helpers.ValidationError: (struct list_head *)0xffffffffc029e460 next 0xffffffffc029e000 has prev 0xffffffffc029e450 + :param head: ``struct list_head *`` :raises ValidationError: if the list is invalid """ @@ -241,6 +244,17 @@ def validate_list_for_each_entry( Like :func:`list_for_each_entry()`, but validates the list like :func:`validate_list()` while iterating. + .. code-block:: python3 + + def validate_my_list(prog): + for entry in validate_list_for_each_entry( + "struct my_entry", + prog["my_list"].address_of_(), + "list", + ): + if entry.value < 0: + raise ValidationError("list contains negative entry") + :param type: Entry type. :param head: ``struct list_head *`` :param member: Name of list node member in entry type. diff --git a/drgn/helpers/linux/mm.py b/drgn/helpers/linux/mm.py index a7a02ed5d..eae96669d 100644 --- a/drgn/helpers/linux/mm.py +++ b/drgn/helpers/linux/mm.py @@ -1218,8 +1218,11 @@ def for_each_vmap_area(prog: Program) -> Iterator[Object]: >>> for va in for_each_vmap_area(): ... caller = "" ... if va.vm: - ... sym = prog.symbol(va.vm.caller) - ... if sym: + ... try: + ... sym = prog.symbol(va.vm.caller) + ... except LookupError: + ... pass + ... else: ... caller = f" {sym.name}" ... print(f"{hex(va.va_start)}-{hex(va.va_end)}{caller}") ... @@ -1424,7 +1427,8 @@ def totalram_pages(prog: Program) -> int: @takes_program_or_default def in_direct_map(prog: Program, addr: IntegerLike) -> bool: """ - Return True if an address is within the kernel's direct memory mapping + Return whether an address is within the kernel's direct memory mapping. + :param addr: address to check """ addr = operator.index(addr) diff --git a/drgn/helpers/linux/net.py b/drgn/helpers/linux/net.py index d97f22167..c1bd7105c 100644 --- a/drgn/helpers/linux/net.py +++ b/drgn/helpers/linux/net.py @@ -34,6 +34,7 @@ "sk_fullsock", "sk_nulls_for_each", "skb_shinfo", + "is_pp_page", ) @@ -260,3 +261,49 @@ def skb_shinfo(skb: Object) -> Object: return cast("struct skb_shared_info *", skb.head + skb.end) else: return cast("struct skb_shared_info *", skb.end) + + +def _poison_pointer_delta(prog: Program) -> int: + # The value of POISON_POINTER_DELTA depends on + # CONFIG_ILLEGAL_POINTER_VALUE, which varies by architecture and kernel + # version. To avoid hard-coding values, derive the value from + # TIMER_ENTRY_STATIC, which we can find in any statically-defined timer. + # This still requires hard-coding an offset, but that offset is the same on + # all architectures and kernel versions since Linux kernel commit + # b8a0255db958 ("include/linux/poison.h: use POISON_POINTER_DELTA for + # poison pointers") (in v4.5). + try: + return prog.cache["POISON_POINTER_DELTA"] + except KeyError: + pass + TIMER_ENTRY_STATIC = prog["poll_spurious_irq_timer"].entry.next.value_() + POISON_POINTER_DELTA = TIMER_ENTRY_STATIC - 0x300 + prog.cache["POISON_POINTER_DELTA"] = POISON_POINTER_DELTA + return POISON_POINTER_DELTA + + +def is_pp_page(page: Object) -> bool: + """ + Check if given page is a page_pool page. + + :param page: ``struct page *`` + :raises NotImplementedError: If page_pool pages cannot be identified on + this kernel. This is the case from Linux 4.18 (when page_pool was + introduced) up to and including Linux 5.13. + """ + PP_SIGNATURE = _poison_pointer_delta(page.prog_) + 0x40 + PP_MAGIC_MASK = ~0x3 + + try: + return (page.pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE + except AttributeError: + pass + # Before Linux kernel commit ff7d6b27f894 ("page_pool: refurbish version of + # page_pool code") (in v4.18), page_pool didn't exist. + try: + page.prog_.type("struct page_pool") + except LookupError: + return False + # Between that and Linux kernel commit c07aea3ef4d4 ("mm: add a signature + # in struct page") (in v5.14), there is no way to identify page_pool pages. + raise NotImplementedError("page_pool pages cannot be identified before Linux 5.14") diff --git a/drgn/helpers/linux/pid.py b/drgn/helpers/linux/pid.py index e8edd9e8b..9d3c4e905 100644 --- a/drgn/helpers/linux/pid.py +++ b/drgn/helpers/linux/pid.py @@ -96,9 +96,7 @@ def for_each_task(prog: Program, ns: Optional[Object]) -> Iterator[Object]: :return: Iterator of ``struct task_struct *`` objects. """ PIDTYPE_PID = prog["PIDTYPE_PID"].value_() - for pid in for_each_pid( - prog if ns is None else ns # type: ignore # python/mypy#12056 - ): + for pid in for_each_pid(prog if ns is None else ns): task = pid_task(pid, PIDTYPE_PID) if task: yield task diff --git a/drgn/helpers/linux/printk.py b/drgn/helpers/linux/printk.py index 22508b976..71a5e527b 100644 --- a/drgn/helpers/linux/printk.py +++ b/drgn/helpers/linux/printk.py @@ -15,8 +15,11 @@ if TYPE_CHECKING: from _typeshed import SupportsWrite +from datetime import datetime + from drgn import Object, Program, cast, sizeof from drgn.helpers.common.prog import takes_program_or_default +from drgn.helpers.linux.timekeeping import ktime_get_coarse_ns, ktime_get_coarse_real_ns __all__ = ( "get_dmesg", @@ -243,7 +246,7 @@ def get_printk_records(prog: Program) -> List[PrintkRecord]: @takes_program_or_default -def get_dmesg(prog: Program) -> bytes: +def get_dmesg(prog: Program, *, human_readable_time: bool = False) -> bytes: """ Get the contents of the kernel log buffer formatted like :manpage:`dmesg(1)`. @@ -258,22 +261,46 @@ def get_dmesg(prog: Program) -> bytes: If you need to format the log buffer differently, use :func:`get_printk_records()` and format it yourself. + + :param human_readable_time: Print human-readable timestamps. Note that this + is only correct for messages printed since the last suspend/resume. """ - lines = [ - b"[% 5d.%06d] %s" - % ( - record.timestamp // 1000000000, - record.timestamp % 1000000000 // 1000, - record.text, + if human_readable_time: + boot_time_s = int(ktime_get_coarse_real_ns(prog)) - int( + ktime_get_coarse_ns(prog) ) - for record in get_printk_records(prog) - ] - lines.append(b"") # So we get a trailing newline. + lines = [ + b"[%s] %s" + % ( + datetime.fromtimestamp((boot_time_s + record.timestamp) // 1000000000) + .astimezone() + .strftime("%a %b %e %T %Z %Y") + .encode("utf-8"), + record.text, + ) + for record in get_printk_records(prog) + ] + else: + lines = [ + b"[% 5d.%06d] %s" + % ( + record.timestamp // 1000000000, + record.timestamp % 1000000000 // 1000, + record.text, + ) + for record in get_printk_records(prog) + ] + lines.append(b"") return b"\n".join(lines) @takes_program_or_default -def print_dmesg(prog: Program, *, file: "Optional[SupportsWrite[str]]" = None) -> None: +def print_dmesg( + prog: Program, + *, + human_readable_time: bool = False, + file: "Optional[SupportsWrite[str]]" = None +) -> None: """ Print the contents of the kernel log buffer. @@ -283,8 +310,13 @@ def print_dmesg(prog: Program, *, file: "Optional[SupportsWrite[str]]" = None) - [ 0.000000] BIOS-provided physical RAM map: ... + :param human_readable_time: Print human-readable timestamps. Note that this + is only correct for messages printed since the last suspend/resume. + :param file: File to print to. Defaults to :data:`sys.stdout`. """ (sys.stdout if file is None else file).write( - get_dmesg(prog).decode(errors="replace") + get_dmesg(prog, human_readable_time=human_readable_time).decode( + errors="replace" + ) ) diff --git a/drgn/helpers/linux/timekeeping.py b/drgn/helpers/linux/timekeeping.py new file mode 100644 index 000000000..bfd6d8b95 --- /dev/null +++ b/drgn/helpers/linux/timekeeping.py @@ -0,0 +1,261 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +""" +Timekeeping +----------- + +The ``drgn.helpers.linux.timekeeping`` module provides helpers for timestamps. + +.. note:: + + In core dumps, it is only possible to recover coarse timestamps, which are + only updated once per tick (~1-10 ms). Therefore, only the + ``ktime_get_seconds()`` and ``ktime_get_coarse_ns()`` families of functions + are provided. These functions can also race with timekeeping updates and + return a value with an error of up to 1 second. +""" + +import functools +import logging +from typing import Optional + +from drgn import Object, Program, ProgramFlags, cast, sizeof +from drgn.helpers.common.prog import takes_program_or_default + +logger = logging.getLogger("drgn") + +__all__ = ( + "ktime_get_boottime_seconds", + "ktime_get_clocktai_seconds", + "ktime_get_coarse_boottime_ns", + "ktime_get_coarse_clocktai_ns", + "ktime_get_coarse_ns", + "ktime_get_coarse_real_ns", + "ktime_get_real_seconds", + "ktime_get_seconds", + "uptime", + "uptime_pretty", +) + + +@takes_program_or_default +def ktime_get_seconds(prog: Program) -> Object: + """ + Get the seconds component of the monotonic time + (``CLOCK_MONOTONIC_COARSE``). + + :return: ``time64_t`` + """ + return cast("time64_t", prog["tk_core"].timekeeper.ktime_sec) + + +def _ktime_get_coarse_ns(prog: Program, offs_name: Optional[str]) -> Object: + # The timekeeper is updated every tick, so we're very likely to catch it in + # the middle of an update. To make that less likely, we take a "snapshot" + # of the relevant timekeeper memory, check whether the seqcount was + # write-locked during our snapshot, and retry (if live) or warn if so. The + # snapshot isn't atomic, so it's still possible to race, but the window is + # narrow. + # + # An alternative that fully avoids the race would be simulating a + # read_seqlock operation, but that requires multiple reads and would likely + # never succeed on, for example, slow remote connections. + # + # We cache the address and size to read and functions for extracting + # members. + try: + address, size, from_snapshot = prog.cache["timekeeper_snapshot"] + except KeyError: + tk_core = prog["tk_core"] + tk = tk_core.timekeeper + members = { + "base": tk.tkr_mono.base, + "offs_real": tk.offs_real, + "offs_boot": tk.offs_boot, + "offs_tai": tk.offs_tai, + } + + # Since Linux kernel commit 025e82bcbc34 ("timekeeping: Use sequence + # counter with associated raw spinlock") (in v5.9), struct tk_data::seq + # is a seqcount_raw_spinlock_t. Before that, it is a seqcount_t. + try: + members["seq"] = tk_core.seq.seqcount.sequence + except AttributeError: + members["seq"] = tk_core.seq.sequence + + # Since Linux kernel commit b71f9804f66c ("timekeeping: Prevent coarse + # clocks going backwards") (in v6.15), the nanoseconds part is directly + # in struct timekeeper::coarse_nsec. Before that, it is computed from + # struct timekeeper::tkr_mono. + try: + members["coarse_nsec"] = tk.coarse_nsec + except AttributeError: + members["xtime_nsec"] = tk.tkr_mono.xtime_nsec + members["shift"] = tk.tkr_mono.shift + + address = min( # type: ignore[type-var] # member.address_ can't be None + member.address_ for member in members.values() + ) + size = ( + max( + member.address_ + sizeof(member) # type: ignore[operator] # member.address_ can't be None. + for member in members.values() + ) + - address + ) + assert size <= 1024 + + from_snapshot = {} + for name, member in members.items(): + member_type = member.type_ + # Since Linux kernel commit 2456e8553544 ("ktime: Get rid of the + # union") (in v4.10), ktime_t is a typedef of s64. Before that, it + # was a dummy union wrapping an s64. In both cases, we actually + # want a u64. + if member_type.type_name() == "ktime_t": + member_type = prog.type("u64") + from_snapshot[name] = functools.partial( + Object.from_bytes_, + prog, + member_type, + bit_offset=(member.address_ - address) * 8, + ) + + prog.cache["timekeeper_snapshot"] = address, size, from_snapshot + + # On live kernels, we retry a limited number of times, then warn and move + # on. + for _ in range(1000): + snapshot = prog.read(address, size) + seq = from_snapshot["seq"](snapshot) + if seq & 1: + if prog.flags & ProgramFlags.IS_LIVE: + continue + else: + # For core dumps, the best we can do is warn. + logger.warning("timekeeper was write-locked; ktime may be inconsistent") + break + break + else: + logger.warning( + "couldn't get unlocked snapshot of timekeeper; ktime may be inconsistent" + ) + + ns = from_snapshot["base"](snapshot) + try: + ns += from_snapshot["coarse_nsec"](snapshot) + except KeyError: + ns += from_snapshot["xtime_nsec"](snapshot) >> from_snapshot["shift"](snapshot) + if offs_name is not None: + ns += from_snapshot[offs_name](snapshot) + return ns + + +@takes_program_or_default +def ktime_get_coarse_ns(prog: Program) -> Object: + """ + Get the coarse monotonic time in nanoseconds (``CLOCK_MONOTONIC_COARSE``). + + :return: ``u64`` + """ + return _ktime_get_coarse_ns(prog, None) + + +@takes_program_or_default +def ktime_get_real_seconds(prog: Program) -> Object: + """ + Get the seconds component of the real (wall) time + (``CLOCK_REALTIME_COARSE``). + + :return: ``time64_t`` + """ + return cast("time64_t", prog["tk_core"].timekeeper.xtime_sec) + + +@takes_program_or_default +def ktime_get_coarse_real_ns(prog: Program) -> Object: + """ + Get the coarse real (wall) time in nanoseconds (``CLOCK_REALTIME_COARSE``). + + :return: ``u64`` + """ + return _ktime_get_coarse_ns(prog, "offs_real") + + +@takes_program_or_default +def ktime_get_boottime_seconds(prog: Program) -> Object: + """ + Get the seconds component of the monotonic time since boot (coarse version + of ``CLOCK_BOOTTIME``). + + :return: ``time64_t`` + """ + return cast("time64_t", ktime_get_coarse_boottime_ns(prog) / 1000000000) + + +@takes_program_or_default +def ktime_get_coarse_boottime_ns(prog: Program) -> Object: + """ + Get the the coarse monotonic time since boot in nanoseconds (coarse version + of ``CLOCK_BOOTTIME``). + + :return: ``u64`` + """ + return _ktime_get_coarse_ns(prog, "offs_boot") + + +@takes_program_or_default +def ktime_get_clocktai_seconds(prog: Program) -> Object: + """ + Get the seconds component of the International Atomic Time (coarse version + of ``CLOCK_TAI``). + + :return: ``time64_t`` + """ + return cast("time64_t", ktime_get_coarse_clocktai_ns(prog) / 1000000000) + + +@takes_program_or_default +def ktime_get_coarse_clocktai_ns(prog: Program) -> Object: + """ + Get the coarse International Atomic Time in nanoseconds (coarse version of ``CLOCK_TAI``). + + :return: ``u64`` + """ + return _ktime_get_coarse_ns(prog, "offs_tai") + + +@takes_program_or_default +def uptime(prog: Program) -> float: + """Get the system uptime (as of the last tick) in fractional seconds.""" + return ktime_get_coarse_boottime_ns(prog).value_() / 1e9 + + +@takes_program_or_default +def uptime_pretty(prog: Program) -> str: + """ + Get the system uptime as a human-readable string. + + >>> uptime_pretty() + '1 day, 6 hours, 56 minutes, 40 seconds' + """ + seconds = ktime_get_boottime_seconds(prog).value_() + + parts = [] + for unit, seconds_in_unit in ( + ("year", 365 * 24 * 60 * 60), + ("week", 7 * 24 * 60 * 60), + ("day", 24 * 60 * 60), + ("hour", 60 * 60), + ("minute", 60), + ): + if seconds >= seconds_in_unit: + units = seconds // seconds_in_unit + seconds -= units * seconds_in_unit + s = "" if units == 1 else "s" + parts.append(f"{units} {unit}{s}") + if seconds or not parts: + s = "" if seconds == 1 else "s" + parts.append(f"{seconds} second{s}") + return ", ".join(parts) diff --git a/libdrgn/Makefile.am b/libdrgn/Makefile.am index 5cffa80da..2b24867b1 100644 --- a/libdrgn/Makefile.am +++ b/libdrgn/Makefile.am @@ -5,7 +5,8 @@ ACLOCAL_AMFLAGS = -I m4 .DELETE_ON_ERROR: -AM_CPPFLAGS = -I $(top_srcdir)/include -D_GNU_SOURCE +AM_CPPFLAGS = -I $(top_srcdir)/include -iquote $(dir $@) \ + -iquote $(srcdir)/$(dir $@) -D_GNU_SOURCE AM_CFLAGS = $(WARN_CFLAGS) $(SANITIZER_CFLAGS) AM_LDFLAGS= $(SANITIZER_LDFLAGS) @@ -32,109 +33,168 @@ BUILT_SOURCES = $(ARCH_DEFS_INCS) \ drgn_section_name_to_index.inc \ elf_sections.h -noinst_LTLIBRARIES = libdrgnimpl.la - -libdrgnimpl_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ - $(ARCH_DEFS_INCS) \ - $(STRSWITCH_INCS) \ - accessors.c \ - arch_i386.c \ - arch_riscv.c \ - array.h \ - binary_buffer.c \ - binary_buffer.h \ - binary_search.h \ - binary_search_tree.h \ - bitops.h \ - c_keywords.inc \ - c_lexer.h \ - cfi.c \ - cfi.h \ - cityhash.h \ - cleanup.h \ - debug_info.c \ - debug_info.h \ - drgn_internal.h \ - drgn_section_name_to_index.inc \ - dwarf_constants.c \ - dwarf_constants.h \ - dwarf_info.c \ - dwarf_info.h \ - elf_file.c \ - elf_file.h \ - elf_notes.c \ - elf_notes.h \ - elf_sections.h \ - error.c \ - error.h \ - generics.h \ - handler.c \ - handler.h \ - hash_table.c \ - hash_table.h \ - helpers.h \ - io.c \ - io.h \ - kallsyms.c \ - kallsyms.h \ - language.c \ - language.h \ - language_c.c \ - lazy_object.c \ - lazy_object.h \ - lexer.c \ - lexer.h \ - linux_kernel.c \ - linux_kernel.h \ - linux_kernel_helpers.c \ - log.c \ - log.h \ - memory_reader.c \ - memory_reader.h \ - minmax.h \ - nstring.h \ - object.c \ - object.h \ - openmp.c \ - openmp.h \ - orc.h \ - orc_info.c \ - orc_info.h \ - path.c \ - path.h \ - platform.c \ - platform.h \ - pp.h \ - program.c \ - program.h \ - register_state.c \ - register_state.h \ - serialize.c \ - serialize.h \ - splay_tree.c \ - stack_trace.c \ - stack_trace.h \ - string_builder.c \ - string_builder.h \ - symbol.c \ - symbol.h \ - type.c \ - type.h \ - util.c \ - util.h \ - vector.h - -libdrgnimpl_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden $(OPENMP_CFLAGS) \ - $(elfutils_CFLAGS) -libdrgnimpl_la_CPPFLAGS = $(AM_CPPFLAGS) -iquote . -libdrgnimpl_la_LIBADD = $(OPENMP_LIBS) $(elfutils_LIBS) -lm +if ENABLE_PYTHON +BUILT_SOURCES += python/docstrings.h +endif + +noinst_LTLIBRARIES = libdrgn_common.la + +libdrgn_common_la_SOURCES = $(ARCH_DEFS_PYS:_defs.py=.c) \ + $(ARCH_DEFS_INCS) \ + $(STRSWITCH_INCS) \ + accessors.c \ + arch_i386.c \ + arch_riscv.c \ + array.h \ + binary_buffer.c \ + binary_buffer.h \ + binary_search.h \ + binary_search_tree.h \ + bitops.h \ + c_keywords.inc \ + c_lexer.h \ + cfi.c \ + cfi.h \ + cityhash.h \ + cleanup.h \ + crc32.c \ + crc32.h \ + debug_info.c \ + debug_info.h \ + debug_info_options.c \ + debug_info_options.h \ + drgn_internal.h \ + drgn_section_name_to_index.inc \ + dwarf_constants.c \ + dwarf_constants.h \ + dwarf_info.c \ + dwarf_info.h \ + elf_file.c \ + elf_file.h \ + elf_notes.c \ + elf_notes.h \ + elf_symtab.c \ + elf_symtab.h \ + elf_sections.h \ + error.c \ + error.h \ + generics.h \ + handler.c \ + handler.h \ + hash_table.c \ + hash_table.h \ + helpers.h \ + hexlify.c \ + hexlify.h \ + io.c \ + io.h \ + kallsyms.c \ + kallsyms.h \ + language.c \ + language.h \ + language_c.c \ + lazy_object.c \ + lazy_object.h \ + lexer.c \ + lexer.h \ + linux_kernel.c \ + linux_kernel.h \ + linux_kernel_helpers.c \ + log.c \ + log.h \ + memory_reader.c \ + memory_reader.h \ + minmax.h \ + nstring.h \ + object.c \ + object.h \ + openmp.c \ + openmp.h \ + orc.h \ + orc_info.c \ + orc_info.h \ + path.c \ + path.h \ + platform.c \ + platform.h \ + plugins.h \ + pp.h \ + program.c \ + program.h \ + register_state.c \ + register_state.h \ + serialize.c \ + serialize.h \ + splay_tree.c \ + stack_trace.c \ + stack_trace.h \ + string_builder.c \ + string_builder.h \ + symbol.c \ + symbol.h \ + type.c \ + type.h \ + util.c \ + util.h \ + vector.h + +libdrgn_common_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden $(OPENMP_CFLAGS) \ + $(elfutils_CFLAGS) +libdrgn_common_la_LIBADD = $(OPENMP_LIBS) $(elfutils_LIBS) $(lzma_LIBS) -lm + +if WITH_DEBUGINFOD +if ENABLE_DLOPEN_DEBUGINFOD +libdrgn_common_la_LIBADD += -ldl +else +libdrgn_common_la_CFLAGS += $(libdebuginfod_CFLAGS) +libdrgn_common_la_LIBADD += $(libdebuginfod_LIBS) +endif +endif if WITH_LIBKDUMPFILE -libdrgnimpl_la_SOURCES += kdump.c -libdrgnimpl_la_CFLAGS += $(libkdumpfile_CFLAGS) -libdrgnimpl_la_LIBADD += $(libkdumpfile_LIBS) +libdrgn_common_la_SOURCES += kdump.c +libdrgn_common_la_CFLAGS += $(libkdumpfile_CFLAGS) +libdrgn_common_la_LIBADD += $(libkdumpfile_LIBS) endif +if ENABLE_PYTHON +noinst_LTLIBRARIES += libdrgn_common_python.la + +libdrgn_common_python_la_SOURCES = python/constants.c \ + python/debug_info_options.c \ + python/docstrings.c \ + python/docstrings.h \ + python/drgnpy.h \ + python/error.c \ + python/helpers.c \ + python/language.c \ + python/main.c \ + python/module.c \ + python/module_section_addresses.c \ + python/object.c \ + python/platform.c \ + python/plugins.c \ + python/program.c \ + python/stack_trace.c \ + python/symbol.c \ + python/symbol_index.c \ + python/test.c \ + python/thread.c \ + python/type.c \ + python/type_kind_set.c \ + python/util.c + +libdrgn_common_python_la_CFLAGS = $(libdrgn_common_la_CFLAGS) +libdrgn_common_python_la_CPPFLAGS = $(AM_CPPFLAGS) $(PYTHON_CPPFLAGS) +libdrgn_common_python_la_LIBADD = libdrgn_common.la +endif + +EXTRA_LTLIBRARIES = libdrgn_common_no_python.la + +libdrgn_common_no_python_la_SOURCES = no_python.c +libdrgn_common_no_python_la_CFLAGS = $(libdrgn_common_la_CFLAGS) +libdrgn_common_no_python_la_LIBADD = libdrgn_common.la + %: %.strswitch build-aux/gen_strswitch.py build-aux/codegen_utils.py $(AM_V_GEN)$(PYTHON) $(word 2, $^) -o $@ $< @@ -150,49 +210,6 @@ drgn_section_name_to_index.inc: build-aux/gen_elf_sections.py build-aux/gen_strs elf_sections.h: build-aux/gen_elf_sections.py build-aux/codegen_utils.py $(AM_V_GEN)$(PYTHON) $< -H > $@ -lib_LTLIBRARIES = libdrgn.la - -libdrgn_la_SOURCES = -libdrgn_la_LDFLAGS = $(AM_LDFLAGS) -version-info 0:0:0 -libdrgn_la_LIBADD = libdrgnimpl.la - -if ENABLE_PYTHON -BUILT_SOURCES += python/docstrings.h - -noinst_LTLIBRARIES += _drgn.la -endif - -_drgn_la_SOURCES = python/constants.c \ - python/docstrings.c \ - python/docstrings.h \ - python/drgnpy.h \ - python/error.c \ - python/helpers.c \ - python/language.c \ - python/main.c \ - python/object.c \ - python/platform.c \ - python/program.c \ - python/stack_trace.c \ - python/symbol.c \ - python/symbol_index.c \ - python/test.c \ - python/thread.c \ - python/type.c \ - python/type_kind_set.c \ - python/util.c - -_drgn_la_CFLAGS = $(AM_CFLAGS) -fvisibility=hidden -_drgn_la_CPPFLAGS = $(AM_CPPFLAGS) $(PYTHON_CPPFLAGS) -iquote $(srcdir)/python \ - -iquote python -_drgn_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--exclude-libs,ALL -avoid-version -module \ - -shared -rpath $(pkgpyexecdir) -_drgn_la_LIBADD = libdrgnimpl.la - -if WITH_LIBKDUMPFILE -_drgn_la_CFLAGS += $(libkdumpfile_CFLAGS) -endif - python/constants.c: drgn.h build-aux/gen_constants.py $(AM_V_GEN)$(PYTHON) $(word 2, $^) < $< > $@ @@ -205,6 +222,32 @@ python/docstrings.c: ../_drgn.pyi $(drgndoc_docstrings_deps) python/docstrings.h: ../_drgn.pyi $(drgndoc_docstrings_deps) $(AM_V_GEN)$(drgndoc_docstrings) -H -m _drgn:drgn $< > $@ +if ENABLE_LIBDRGN +if ENABLE_PYTHON +libdrgn_libs = $(PYTHON_LIBS) libdrgn_common_python.la +else +libdrgn_libs = libdrgn_common_no_python.la +endif +else +libdrgn_libs = libdrgn_common_no_python.la +endif + +if ENABLE_LIBDRGN +lib_LTLIBRARIES = libdrgn.la + +libdrgn_la_SOURCES = +libdrgn_la_LDFLAGS = $(AM_LDFLAGS) -version-info 0:0:0 +libdrgn_la_LIBADD = $(libdrgn_libs) +endif + +if ENABLE_PYTHON_EXTENSION +noinst_LTLIBRARIES += _drgn.la + +_drgn_la_SOURCES = +_drgn_la_LDFLAGS = $(AM_LDFLAGS) -avoid-version -module -shared -rpath $(pkgpyexecdir) +_drgn_la_LIBADD = libdrgn_common_python.la +endif + EXTRA_DIST = $(ARCH_DEFS_PYS) \ $(STRSWITCH_INCS:.inc=.inc.strswitch) \ Doxyfile \ @@ -219,7 +262,7 @@ EXTRA_DIST = $(ARCH_DEFS_PYS) \ EXTRA_PROGRAMS = examples/load_debug_info examples_load_debug_info_SOURCES = examples/load_debug_info.c -examples_load_debug_info_LDADD = libdrgnimpl.la +examples_load_debug_info_LDADD = $(libdrgn_libs) # Only test internals here. Anything exposed via Python should be tested in # Python unit tests instead. @@ -227,10 +270,13 @@ TESTS = $(check_PROGRAMS) check_PROGRAMS = tests/binary_search \ tests/cityhash \ + tests/crc32 \ + tests/hexlify \ tests/language_c \ tests/lexer \ tests/path \ - tests/recursion_guard + tests/recursion_guard \ + tests/serialize EXTRA_DIST += $(addsuffix .c.in,$(check_PROGRAMS)) @@ -244,7 +290,7 @@ tests/%.c: build-aux/checkmk tests/%.c.in test_cflags = $(AM_CFLAGS) $(check_CFLAGS) test_cppflags = $(AM_CPPFLAGS) -iquote $(srcdir)/tests -test_ldadd = $(check_LIBS) libdrgnimpl.la +test_ldadd = $(check_LIBS) $(libdrgn_libs) tests_binary_search_CFLAGS = $(test_cflags) tests_binary_search_CPPFLAGS = $(test_cppflags) @@ -252,6 +298,12 @@ tests_binary_search_LDADD = $(test_ldadd) tests_cityhash_CFLAGS = $(test_cflags) tests_cityhash_CPPFLAGS = $(test_cppflags) tests_cityhash_LDADD = $(test_ldadd) +tests_crc32_CFLAGS = $(test_cflags) +tests_crc32_CPPFLAGS = $(test_cppflags) +tests_crc32_LDADD = $(test_ldadd) +tests_hexlify_CFLAGS = $(test_cflags) +tests_hexlify_CPPFLAGS = $(test_cppflags) +tests_hexlify_LDADD = $(test_ldadd) tests_language_c_CFLAGS = $(test_cflags) tests_language_c_CPPFLAGS = $(test_cppflags) tests_language_c_LDADD = $(test_ldadd) @@ -264,6 +316,9 @@ tests_path_LDADD = $(test_ldadd) tests_recursion_guard_CFLAGS = $(test_cflags) tests_recursion_guard_CPPFLAGS = $(test_cppflags) tests_recursion_guard_LDADD = $(test_ldadd) +tests_serialize_CFLAGS = $(test_cflags) +tests_serialize_CPPFLAGS = $(test_cppflags) +tests_serialize_LDADD = $(test_ldadd) # Don't delete test-suite.log on failure. .PRECIOUS: $(TEST_SUITE_LOG) diff --git a/libdrgn/arch_aarch64.c b/libdrgn/arch_aarch64.c index 1f050f08c..135d1eead 100644 --- a/libdrgn/arch_aarch64.c +++ b/libdrgn/arch_aarch64.c @@ -129,6 +129,31 @@ fallback_unwind_aarch64(struct drgn_program *prog, return NULL; } +// Unwind a single bl or blr instruction. +static struct drgn_error * +bad_call_unwind_aarch64(struct drgn_program *prog, + struct drgn_register_state *regs, + struct drgn_register_state **ret) +{ + struct optional_uint64 lr = + drgn_register_state_get_u64(prog, regs, x30); + if (!lr.has_value) + return &drgn_stop; + + struct drgn_register_state *tmp = drgn_register_state_dup(regs); + if (!tmp) + return &drgn_enomem; + + // lr contains the the old pc + 4. + drgn_register_state_set_pc(prog, tmp, lr.value - 4); + // We don't know the old lr. + drgn_register_state_unset_has_register(tmp, DRGN_REGISTER_NUMBER(x30)); + // The interrupted pc is no longer applicable. + drgn_register_state_unset_has_register(tmp, DRGN_REGISTER_NUMBER(pc)); + *ret = tmp; + return NULL; +} + // elf_gregset_t (in PRSTATUS) and struct user_pt_regs have the same layout. // This layout is a prefix of the in-kernel struct pt_regs (but we don't care // about any of the extra fields). @@ -491,6 +516,7 @@ const struct drgn_architecture_info arch_info_aarch64 = { .default_dwarf_cfi_row = &default_dwarf_cfi_row_aarch64, .demangle_cfi_registers = demangle_cfi_registers_aarch64, .fallback_unwind = fallback_unwind_aarch64, + .bad_call_unwind = bad_call_unwind_aarch64, .pt_regs_get_initial_registers = pt_regs_get_initial_registers_aarch64, .prstatus_get_initial_registers = prstatus_get_initial_registers_aarch64, .linux_kernel_get_initial_registers = diff --git a/libdrgn/arch_x86_64.c b/libdrgn/arch_x86_64.c index fce948cd5..cb17abc3e 100644 --- a/libdrgn/arch_x86_64.c +++ b/libdrgn/arch_x86_64.c @@ -175,29 +175,33 @@ drgn_orc_to_cfi_x86_64(const struct drgn_orc_entry *orc, drgn_orc_type(orc)); } - switch (drgn_orc_bp_reg(orc)) { - case DRGN_ORC_REG_UNDEFINED: - rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; - rule.regno = DRGN_REGISTER_NUMBER(rbp); - rule.offset = 0; - break; - case DRGN_ORC_REG_PREV_SP: - rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; - rule.offset = orc->bp_offset; - break; - case DRGN_ORC_REG_BP: - rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET; - rule.regno = DRGN_REGISTER_NUMBER(rbp); - rule.offset = orc->bp_offset; - break; - default: - return drgn_error_format(DRGN_ERROR_OTHER, - "unknown ORC BP base register %d", - drgn_orc_bp_reg(orc)); + // For ORC_TYPE_REGS, rbp is already set. + if (drgn_orc_type(orc) != DRGN_ORC_TYPE_REGS) { + switch (drgn_orc_bp_reg(orc)) { + case DRGN_ORC_REG_UNDEFINED: + rule.kind = DRGN_CFI_RULE_REGISTER_PLUS_OFFSET; + rule.regno = DRGN_REGISTER_NUMBER(rbp); + rule.offset = 0; + break; + case DRGN_ORC_REG_PREV_SP: + rule.kind = DRGN_CFI_RULE_AT_CFA_PLUS_OFFSET; + rule.offset = orc->bp_offset; + break; + case DRGN_ORC_REG_BP: + rule.kind = DRGN_CFI_RULE_AT_REGISTER_PLUS_OFFSET; + rule.regno = DRGN_REGISTER_NUMBER(rbp); + rule.offset = orc->bp_offset; + break; + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "unknown ORC BP base register %d", + drgn_orc_bp_reg(orc)); + } + if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(rbp), + &rule)) + return &drgn_enomem; } - if (!drgn_cfi_row_set_register(row_ret, DRGN_REGISTER_NUMBER(rbp), - &rule)) - return &drgn_enomem; + *interrupted_ret = drgn_orc_signal(orc); *ret_addr_regno_ret = DRGN_REGISTER_NUMBER(rip); return NULL; @@ -228,22 +232,20 @@ get_registers_from_frame_pointer(struct drgn_program *prog, return NULL; } -// Unwind from a call instruction, assuming that nothing else has been changed -// since. -static struct drgn_error *unwind_call(struct drgn_program *prog, - struct drgn_register_state *regs, - struct drgn_register_state **ret) + +static struct drgn_error * +fallback_unwind_x86_64(struct drgn_program *prog, + struct drgn_register_state *regs, + struct drgn_register_state **ret) { struct drgn_error *err; - struct optional_uint64 rsp = - drgn_register_state_get_u64(prog, regs, rsp); - if (!rsp.has_value) + struct optional_uint64 rbp = + drgn_register_state_get_u64(prog, regs, rbp); + if (!rbp.has_value) return &drgn_stop; - // Read the return address from the top of the stack. - uint64_t ret_addr; - err = drgn_program_read_u64(prog, rsp.value, false, &ret_addr); + err = get_registers_from_frame_pointer(prog, rbp.value, ret); if (err) { if (err->code == DRGN_ERROR_FAULT) { drgn_error_destroy(err); @@ -251,41 +253,26 @@ static struct drgn_error *unwind_call(struct drgn_program *prog, } return err; } - - // Most of the registers are unchanged. - struct drgn_register_state *tmp = drgn_register_state_dup(regs); - if (!tmp) - return &drgn_enomem; - - // The PC and rip are the return address we just read. - drgn_register_state_set_pc(prog, tmp, ret_addr); - drgn_register_state_set_from_u64(prog, tmp, rip, ret_addr); - // rsp is after the saved return address. - drgn_register_state_set_from_u64(prog, tmp, rsp, rsp.value + 8); - *ret = tmp; + drgn_register_state_set_cfa(prog, regs, rbp.value + 16); return NULL; } +// Unwind a single call instruction. static struct drgn_error * -fallback_unwind_x86_64(struct drgn_program *prog, +bad_call_unwind_x86_64(struct drgn_program *prog, struct drgn_register_state *regs, struct drgn_register_state **ret) { struct drgn_error *err; - // If the program counter is 0, it's likely that a NULL function pointer - // was called. Assume that the only thing we need to unwind is a single - // call instruction. - struct optional_uint64 pc = drgn_register_state_get_pc(regs); - if (pc.has_value && pc.value == 0) - return unwind_call(prog, regs, ret); - - struct optional_uint64 rbp = - drgn_register_state_get_u64(prog, regs, rbp); - if (!rbp.has_value) + struct optional_uint64 rsp = + drgn_register_state_get_u64(prog, regs, rsp); + if (!rsp.has_value) return &drgn_stop; - err = get_registers_from_frame_pointer(prog, rbp.value, ret); + // Read the return address from the top of the stack. + uint64_t ret_addr; + err = drgn_program_read_u64(prog, rsp.value, false, &ret_addr); if (err) { if (err->code == DRGN_ERROR_FAULT) { drgn_error_destroy(err); @@ -293,7 +280,18 @@ fallback_unwind_x86_64(struct drgn_program *prog, } return err; } - drgn_register_state_set_cfa(prog, regs, rbp.value + 16); + + // Most of the registers are unchanged. + struct drgn_register_state *tmp = drgn_register_state_dup(regs); + if (!tmp) + return &drgn_enomem; + + // The PC and rip are the return address we just read. + drgn_register_state_set_pc(prog, tmp, ret_addr); + drgn_register_state_set_from_u64(prog, tmp, rip, ret_addr); + // rsp is after the saved return address. + drgn_register_state_set_from_u64(prog, tmp, rsp, rsp.value + 8); + *ret = tmp; return NULL; } @@ -663,6 +661,7 @@ const struct drgn_architecture_info arch_info_x86_64 = { DRGN_ARCHITECTURE_REGISTERS, .default_dwarf_cfi_row = &default_dwarf_cfi_row_x86_64, .fallback_unwind = fallback_unwind_x86_64, + .bad_call_unwind = bad_call_unwind_x86_64, .pt_regs_get_initial_registers = pt_regs_get_initial_registers_x86_64, .prstatus_get_initial_registers = prstatus_get_initial_registers_x86_64, .linux_kernel_get_initial_registers = diff --git a/libdrgn/binary_search_tree.h b/libdrgn/binary_search_tree.h index 6c1411d70..cbaa7b2e4 100644 --- a/libdrgn/binary_search_tree.h +++ b/libdrgn/binary_search_tree.h @@ -150,6 +150,16 @@ struct binary_search_tree_iterator binary_search_tree_delete_iterator(struct binary_search_tree *tree, struct binary_search_tree_iterator it); +/** + * Delete an entry in a @ref binary_search_tree. + * + * @return An iterator pointing to the next entry in the tree. See @ref + * binary_search_tree_next(). + */ +struct binary_search_tree_iterator +binary_search_tree_delete_entry(struct binary_search_tree *tree, + entry_type *entry); + /** * Get an iterator pointing to the first (in-order) entry in a @ref * binary_search_tree. @@ -492,6 +502,13 @@ tree##_delete_iterator(struct tree *tree, struct tree##_iterator it) \ return it; \ } \ \ +__attribute__((__always_inline__, __unused__)) \ +static inline struct tree##_iterator \ +tree##_delete_entry(struct tree *tree, tree##_entry_type *entry) \ +{ \ + return tree##_delete_iterator(tree, (struct tree##_iterator){ entry }); \ +} \ + \ __attribute__((__unused__)) \ static struct tree##_iterator tree##_first(struct tree *tree) \ { \ diff --git a/libdrgn/build-aux/gen_constants.py b/libdrgn/build-aux/gen_constants.py index 8e2180b1d..008ece802 100644 --- a/libdrgn/build-aux/gen_constants.py +++ b/libdrgn/build-aux/gen_constants.py @@ -14,8 +14,11 @@ class ConstantClass(NamedTuple): CONSTANTS = ( + ConstantClass("AbsenceReason", "Enum", r"DRGN_ABSENCE_REASON_([a-zA-Z0-9_]+)"), ConstantClass("Architecture", "Enum", r"DRGN_ARCH_([a-zA-Z0-9_]+)"), ConstantClass("FindObjectFlags", "Flag", r"DRGN_FIND_OBJECT_([a-zA-Z0-9_]+)"), + ConstantClass("KmodSearchMethod", "Enum", r"DRGN_KMOD_SEARCH_([a-zA-Z0-9_]+)"), + ConstantClass("ModuleFileStatus", "Enum", r"DRGN_MODULE_FILE_([a-zA-Z0-9_]+)"), ConstantClass( "PlatformFlags", "Flag", @@ -28,6 +31,11 @@ class ConstantClass(NamedTuple): ConstantClass( "Qualifiers", "Flag", r"DRGN_QUALIFIER_([a-zA-Z0-9_]+)", [("NONE", "0")] ), + ConstantClass( + "SupplementaryFileKind", + "Enum", + r"DRGN_SUPPLEMENTARY_FILE_([a-z-A-Z0-9_]+)(? None: out_file.write(f"\t{section_enumerator_name(section_name)},\n") out_file.write( """\ - /** Indices less than this are cached when the module is loaded. */ - DRGN_SECTION_INDEX_NUM_PRECACHE, + /** Indices less than this are used by the DWARF index. */ + DRGN_SECTION_INDEX_NUM_DWARF_INDEX, """ ) for i, section_name in enumerate(CACHED_SECTIONS): if i == 0: out_file.write( - f"\t{section_enumerator_name(section_name)} = DRGN_SECTION_INDEX_NUM_PRECACHE,\n" + f"\t{section_enumerator_name(section_name)} = DRGN_SECTION_INDEX_NUM_DWARF_INDEX,\n" ) else: out_file.write(f"\t{section_enumerator_name(section_name)},\n") diff --git a/libdrgn/cleanup.h b/libdrgn/cleanup.h index 9ca90b4ab..9b71fb3d1 100644 --- a/libdrgn/cleanup.h +++ b/libdrgn/cleanup.h @@ -10,8 +10,10 @@ #ifndef DRGN_CLEANUP_H #define DRGN_CLEANUP_H +#include #include #include +#include #include #define _cleanup_(x) __attribute__((__cleanup__(x))) @@ -39,6 +41,14 @@ static inline void closep(int *fd) close(*fd); } +/** Call @c closedir() when the variable goes out of scope. */ +#define _cleanup_closedir_ _cleanup_(closedirp) +static inline void closedirp(DIR **dirp) +{ + if (*dirp) + closedir(*dirp); +} + /** * Get the value of a pointer variable and reset it to @c NULL. * diff --git a/libdrgn/configure.ac b/libdrgn/configure.ac index c532429d4..bfdc8f7a2 100644 --- a/libdrgn/configure.ac +++ b/libdrgn/configure.ac @@ -1,7 +1,7 @@ dnl Copyright (c) Meta Platforms, Inc. and affiliates. dnl SPDX-License-Identifier: LGPL-2.1-or-later -AC_INIT([libdrgn], [0.0.30], +AC_INIT([libdrgn], [0.0.32], [https://github.com/osandov/drgn/issues],, [https://github.com/osandov/drgn]) @@ -44,43 +44,65 @@ AC_SUBST(OPENMP_CFLAGS) AC_SUBST(OPENMP_LIBS) dnl We need Python for code generation even if we're not building the bindings. -AM_PATH_PYTHON([3.6]) +AM_PATH_PYTHON([3.8]) + +AC_ARG_ENABLE([libdrgn], + [AS_HELP_STRING([--disable-libdrgn], + [don't build the libdrgn C library. Note that + libdrgn's API and ABI are unstable])], + [], [enable_libdrgn=yes]) AC_ARG_ENABLE([python], [AS_HELP_STRING([--enable-python], - [build Python bindings @<:@default=no@:>@])], + [enable Python support in libdrgn, which allows + it to use Python plugins and double as the + Python extension module])], [], [enable_python=no]) -AM_CONDITIONAL([ENABLE_PYTHON], [test "x$enable_python" != xno]) +AC_ARG_ENABLE([python-extension], + [AS_HELP_STRING([--enable-python-extension], + [build the drgn Python extension module separate + from libdrgn])], + [], [enable_python_extension=no]) + +AM_CONDITIONAL([ENABLE_LIBDRGN], [test "x$enable_libdrgn" != xno]) +AM_CONDITIONAL([ENABLE_PYTHON], + [test "x$enable_python" != xno || test "x$enable_python_extension" != xno]) +AM_CONDITIONAL([ENABLE_PYTHON_EXTENSION], + [test "x$enable_python_extension" != xno]) + AM_COND_IF([ENABLE_PYTHON], - [AS_IF([test -z "$PYTHON_CPPFLAGS"], - [prog="import sysconfig -include = sysconfig.get_path('include') -platinclude = sysconfig.get_path('platinclude') -include_paths = [[include]] -if platinclude != include: - include_paths.append(plat_include) -print(' '.join('-I' + path for path in include_paths))" - PYTHON_CPPFLAGS=`"$PYTHON" -c "$prog"`]) - AC_SUBST(PYTHON_CPPFLAGS) - AC_MSG_CHECKING([for $PYTHON development headers]) - save_CPPFLAGS="$CPPFLAGS" - CPPFLAGS="$CPPFLAGS $PYTHON_CPPFLAGS" - AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include ]])], - [AC_MSG_RESULT([yes])], - [AC_MSG_RESULT([no]) - AC_MSG_ERROR( -[Could not compile test program with Python headers. - -You may need to install your distribution's Python development package (e.g., -python3-devel or python3-dev) or specify the location of the Python development -headers by setting the PYTHON_CPPFLAGS environment variable.])]) - CPPFLAGS="$save_CPPFLAGS"]) + [AS_IF([test "x$enable_libdrgn" != xno], + [find_libpython=yes], [find_libpython=no]) + MY_PYTHON_DEVEL([$find_libpython])]) PKG_PROG_PKG_CONFIG PKG_CHECK_MODULES(elfutils, [libelf >= 0.165 libdw >= 0.165]) +AC_ARG_WITH([debuginfod], + [AS_HELP_STRING([--with-debuginfod], + [build with support for debuginfod + @<:@default=auto@:>@])], + [], [with_debuginfod=auto]) +AS_CASE(["x$with_debuginfod"], + [xyes], [PKG_CHECK_MODULES(libdebuginfod, [libdebuginfod])], + [xauto], [PKG_CHECK_MODULES(libdebuginfod, [libdebuginfod], + [with_debuginfod=yes], + [with_debuginfod=no])]) +AM_CONDITIONAL([WITH_DEBUGINFOD], [test "x$with_debuginfod" != xno]) +AM_COND_IF([WITH_DEBUGINFOD], [AC_DEFINE(WITH_DEBUGINFOD)]) + +AC_ARG_ENABLE([dlopen-debuginfod], + [AS_HELP_STRING([--disable-dlopen-debuginfod], + [if building with support for debuginfod, link + against libdebuginfod instead of loading it with + dlopen(3)])], + [], [enable_dlopen_debuginfod=yes]) +AM_CONDITIONAL([ENABLE_DLOPEN_DEBUGINFOD], + [test "x$enable_dlopen_debuginfod" != xno]) +AM_COND_IF([ENABLE_DLOPEN_DEBUGINFOD], [AC_DEFINE(ENABLE_DLOPEN_DEBUGINFOD)]) + AC_ARG_WITH([libkdumpfile], [AS_HELP_STRING([--with-libkdumpfile], [build with support for the makedumpfile kernel @@ -95,6 +117,19 @@ AS_CASE(["x$with_libkdumpfile"], AM_CONDITIONAL([WITH_LIBKDUMPFILE], [test "x$with_libkdumpfile" = xyes]) AM_COND_IF([WITH_LIBKDUMPFILE], [AC_DEFINE(WITH_LIBKDUMPFILE)]) +AC_ARG_WITH([lzma], + [AS_HELP_STRING([--with-lzma], + [build with support for lzma decompression of ELF sections + @<:@default=auto@:>@])], + [], [with_lzma=auto]) +AS_CASE(["x$with_lzma"], + [xyes], [PKG_CHECK_MODULES(lzma, [liblzma])], + [xauto], [PKG_CHECK_MODULES(lzma, [liblzma], + [with_lzma=yes], + [with_lzma=no])]) +AM_CONDITIONAL([WITH_LZMA], [test "x$with_lzma" != xno]) +AM_COND_IF([WITH_LZMA], [AC_DEFINE(WITH_LZMA)]) + dnl We need check for running tests, but we don't want to fail the build over dnl it. Instead, if it's not found, set variables so that only `make check` dnl fails. @@ -120,6 +155,7 @@ AS_IF([test "x$enable_compiler_warnings" != xno], [AX_APPEND_COMPILE_FLAGS([ dnl -Wall dnl -Wformat-overflow=2 dnl + -Wformat-security dnl -Wformat-truncation=2 dnl -Wimplicit-fallthrough dnl -Wmissing-prototypes dnl diff --git a/libdrgn/crc32.c b/libdrgn/crc32.c new file mode 100644 index 000000000..a4c627330 --- /dev/null +++ b/libdrgn/crc32.c @@ -0,0 +1,65 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crc32.h" + +uint32_t crc32_update(uint32_t crc, const void *buf, size_t len) +{ + static const uint32_t table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d, + }; + for (const uint8_t *p = buf, *end = p + len; p < end; p++) + crc = (crc >> 8) ^ table[(crc ^ *p) & 0xff]; + return crc; +} diff --git a/libdrgn/crc32.h b/libdrgn/crc32.h new file mode 100644 index 000000000..e202ff202 --- /dev/null +++ b/libdrgn/crc32.h @@ -0,0 +1,49 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * CRC-32 checksums. + * + * See @ref CRC32. + */ + +#ifndef DRGN_CRC32_H +#define DRGN_CRC32_H + +#include +#include + +/** + * @ingroup Internals + * + * @defgroup CRC32 CRC-32 + * + * CRC-32 checksums. + * + * @{ + */ + +/** + * Update a CRC-32 checksum with additional data. + * + * This uses the IEEE CRC-32 polynomial (x32 + + * x26 + x23 + x22 + + * x16 + x12 + x11 + + * x10 + x8 + x7 + + * x5 + x4 + x2 + + * x + 1). + * + * @param[in] crc Checksum to update. For the first call, this is the initial + * checksum value (often `0xffffffff`). + * @param[in] buf Data to checksum. + * @param[in] len Size of @p buf in bytes. + * @return Updated checksum. This is not bitwise negated as is often required + * for the final result. + */ +uint32_t crc32_update(uint32_t crc, const void *buf, size_t len); + +/** @} */ + +#endif /* DRGN_CRC32_H */ diff --git a/libdrgn/debug_info.c b/libdrgn/debug_info.c index 85040eddd..d770fc205 100644 --- a/libdrgn/debug_info.c +++ b/libdrgn/debug_info.c @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -10,2127 +12,5572 @@ #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include +#include +#include "array.h" #include "binary_buffer.h" +#include "binary_search.h" #include "cleanup.h" +#include "crc32.h" #include "debug_info.h" #include "elf_file.h" #include "elf_notes.h" #include "error.h" +#include "hexlify.h" +#include "io.h" #include "linux_kernel.h" +#include "log.h" #include "openmp.h" #include "platform.h" +#include "pp.h" #include "program.h" +#include "serialize.h" #include "util.h" -static inline Dwarf *drgn_elf_file_dwarf_key(struct drgn_elf_file * const *entry) +#define _cleanup_elf_end_ _cleanup_(elf_endp) +static inline void elf_endp(Elf **elfp) { - return (*entry)->dwarf; + elf_end(*elfp); } -DEFINE_HASH_TABLE_FUNCTIONS(drgn_elf_file_dwarf_table, drgn_elf_file_dwarf_key, - ptr_key_hash_pair, scalar_key_eq); -DEFINE_VECTOR_FUNCTIONS(drgn_module_vector); -struct drgn_module_key { - const void *build_id; - size_t build_id_len; - uint64_t start, end; +#if !_ELFUTILS_PREREQ(0, 175) +// If we don't have dwelf_elf_begin(), this is equivalent except that it doesn't +// handle compressed files. +static inline Elf *dwelf_elf_begin(int fd) +{ + return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); +} +#endif + +DEFINE_HASH_MAP_FUNCTIONS(drgn_module_section_address_map, + c_string_key_hash_pair, c_string_key_eq); + +// This is currently always DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK. +struct drgn_module_wanted_supplementary_file { + struct drgn_elf_file *file; + // supplementary_path and checksum are owned by file. + const char *supplementary_path; + const void *checksum; + size_t checksum_len; + // checksum_str is a separate allocation. + char *checksum_str; + // Used to detect when the wanted supplementary file has changed in + // order to avoid redundant attempts. + uint64_t generation; }; -static inline struct drgn_module_key -drgn_module_key(struct drgn_module * const *entry) +#if WITH_DEBUGINFOD +#if _ELFUTILS_PREREQ(0, 179) +#define DRGN_DEBUGINFOD_0_179_FUNCTIONS \ + X(debuginfod_set_user_data) \ + X(debuginfod_get_user_data) \ + X(debuginfod_get_url) +#else +#define DRGN_DEBUGINFOD_0_179_FUNCTIONS +#endif + +#define DRGN_DEBUGINFOD_FUNCTIONS \ + X(debuginfod_begin) \ + X(debuginfod_end) \ + X(debuginfod_find_debuginfo) \ + X(debuginfod_find_executable) \ + X(debuginfod_set_progressfn) \ + DRGN_DEBUGINFOD_0_179_FUNCTIONS + +#if ENABLE_DLOPEN_DEBUGINFOD +#include + +#define X(name) static typeof(&name) drgn_##name; +DRGN_DEBUGINFOD_FUNCTIONS +#undef X + +__attribute__((__constructor__)) +static void drgn_dlopen_debuginfod(void) { - return (struct drgn_module_key){ - .build_id = (*entry)->build_id, - .build_id_len = (*entry)->build_id_len, - .start = (*entry)->start, - .end = (*entry)->end, - }; + void *handle = dlopen(DEBUGINFOD_SONAME, RTLD_LAZY); + if (handle) { + #define X(name) drgn_##name = dlsym(handle, #name); + DRGN_DEBUGINFOD_FUNCTIONS + #undef X + + #define X(name) || !drgn_##name + if (0 DRGN_DEBUGINFOD_FUNCTIONS) { + #undef X + #define X(name) drgn_##name = NULL; + DRGN_DEBUGINFOD_FUNCTIONS + #undef X + dlclose(handle); + } + } } -static inline struct hash_pair -drgn_module_key_hash_pair(const struct drgn_module_key *key) +bool drgn_have_debuginfod(void) { - size_t hash = hash_bytes(key->build_id, key->build_id_len); - hash = hash_combine(hash, key->start); - hash = hash_combine(hash, key->end); - return hash_pair_from_avalanching_hash(hash); + return drgn_debuginfod_begin != NULL; } -static inline bool drgn_module_key_eq(const struct drgn_module_key *a, - const struct drgn_module_key *b) -{ - return (a->build_id_len == b->build_id_len && - memcmp(a->build_id, b->build_id, a->build_id_len) == 0 && - a->start == b->start && a->end == b->end); -} -DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_key, - drgn_module_key_hash_pair, drgn_module_key_eq); - -DEFINE_HASH_SET_FUNCTIONS(c_string_set, c_string_key_hash_pair, - c_string_key_eq); - -/** - * @c Dwfl_Callbacks::find_elf() implementation. - * - * If the ELF file was reported directly, this returns it. Otherwise, it falls - * back to an appropriate callback. - * - * Ideally we'd use @c dwfl_report_elf() instead, but that doesn't take an @c - * Elf handle, which we need for a couple of reasons: - * - * - We usually already have the @c Elf handle open in order to identify the - * file. - * - For kernel modules, we set the section addresses in the @c Elf handle - * ourselves instead of using @c Dwfl_Callbacks::section_address(). - * - * Additionally, there's a special case for vmlinux. It is usually an @c ET_EXEC - * ELF file, but when KASLR is enabled, it needs to be handled like an @c ET_DYN - * file. libdwfl has a hack for this when @c dwfl_report_module() is used, but - * @ref dwfl_report_elf() bypasses this hack. - * - * So, we're stuck using @c dwfl_report_module() and this dummy callback. - */ -static int drgn_dwfl_find_elf(Dwfl_Module *dwfl_module, void **userdatap, - const char *name, Dwarf_Addr base, - char **file_name, Elf **elfp) -{ - struct drgn_module *module = *userdatap; - if (module->elf) { - *file_name = module->path; - int fd = module->fd; - *elfp = module->elf; - // libdwfl consumes the returned path, file descriptor, and ELF - // handle, so clear the fields. - module->path = NULL; - module->fd = -1; - module->elf = NULL; - return fd; - } - if (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { - *elfp = NULL; - return -1; - } else if (module->prog->flags & DRGN_PROGRAM_IS_LIVE) { - return dwfl_linux_proc_find_elf(dwfl_module, userdatap, name, - base, file_name, elfp); - } else { - return dwfl_build_id_find_elf(dwfl_module, userdatap, name, - base, file_name, elfp); - } +#else +// GCC and Clang optimize out the function pointer. +#define X(name) static const typeof(&name) drgn_##name = name; +DRGN_DEBUGINFOD_FUNCTIONS +#undef X +#endif + +#undef DRGN_DEBUGINFOD_FUNCTIONS +#undef DRGN_DEBUGINFOD_0_179_FUNCTIONS +#endif + +static inline Dwarf *drgn_elf_file_dwarf_key(struct drgn_elf_file * const *entry) +{ + return (*entry)->_dwarf; } +DEFINE_HASH_TABLE_FUNCTIONS(drgn_elf_file_dwarf_table, drgn_elf_file_dwarf_key, + ptr_key_hash_pair, scalar_key_eq); +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); -/** - * @c Dwfl_Callbacks::section_address() implementation. - * - * We set the section header @c sh_addr in memory instead of using this, but - * libdwfl requires the callback pointer to be non-@c NULL. It will be called - * for any sections that still have a zero @c sh_addr, meaning they are not - * present in memory. - */ -static int drgn_dwfl_section_address(Dwfl_Module *module, void **userdatap, - const char *name, Dwarf_Addr base, - const char *secname, Elf32_Word shndx, - const GElf_Shdr *shdr, Dwarf_Addr *addr) +static inline const char *drgn_module_entry_name(struct drgn_module * const *entry) { - *addr = -1; - return DWARF_CB_OK; + return (*entry)->name; } -static const Dwfl_Callbacks drgn_dwfl_callbacks = { - .find_elf = drgn_dwfl_find_elf, - .find_debuginfo = dwfl_standard_find_debuginfo, - .section_address = drgn_dwfl_section_address, -}; +DEFINE_HASH_TABLE_FUNCTIONS(drgn_module_table, drgn_module_entry_name, + c_string_key_hash_pair, c_string_key_eq); -static void drgn_module_destroy(struct drgn_module *module) +static inline uint64_t +drgn_module_address_range_key(const struct drgn_module_address_range *entry) { - if (module) { - drgn_error_destroy(module->err); - drgn_module_orc_info_deinit(module); - drgn_module_dwarf_info_deinit(module); - elf_end(module->elf); - if (module->fd != -1) - close(module->fd); - free(module->path); - for (struct drgn_elf_file_dwarf_table_iterator it = - drgn_elf_file_dwarf_table_first(&module->split_dwarf_files); - it.entry; - it = drgn_elf_file_dwarf_table_next(it)) - drgn_elf_file_destroy(*it.entry); - drgn_elf_file_dwarf_table_deinit(&module->split_dwarf_files); - if (module->debug_file != module->loaded_file) - drgn_elf_file_destroy(module->debug_file); - drgn_elf_file_destroy(module->loaded_file); - free(module->name); - free(module); - } -} - -static void drgn_module_finish_indexing(struct drgn_debug_info *dbinfo, - struct drgn_module *module) -{ - module->state = DRGN_DEBUG_INFO_MODULE_INDEXED; - if (module->name) { - int ret = c_string_set_insert(&dbinfo->module_names, - (const char **)&module->name, - NULL); - /* drgn_debug_info_update_index() should've reserved enough. */ - assert(ret != -1); - } -} - -/* - * Wrapper around dwfl_report_end() that works around a libdwfl bug which causes - * it to close stdin when it frees some modules that were reported by - * dwfl_core_file_report(). This was fixed in elfutils 0.177 by commit - * d37f6ea7e3e5 ("libdwfl: Fix fd leak/closing wrong fd after - * dwfl_core_file_report()"), but we support older versions. - */ -static int my_dwfl_report_end(struct drgn_debug_info *dbinfo, - int (*removed)(Dwfl_Module *, void *, - const char *, Dwarf_Addr, void *), - void *arg) -{ - int fd = -1; - if ((dbinfo->prog->flags - & (DRGN_PROGRAM_IS_LINUX_KERNEL | DRGN_PROGRAM_IS_LIVE)) == 0) - fd = dup(0); - int ret = dwfl_report_end(dbinfo->dwfl, removed, arg); - if (fd != -1) { - dup2(fd, 0); - close(fd); - } - return ret; -} - -struct drgn_dwfl_module_removed_arg { - struct drgn_debug_info *dbinfo; - bool finish_indexing; - bool free_all; -}; + return entry->start; +} -static int drgn_dwfl_module_removed(Dwfl_Module *dwfl_module, void *userdatap, - const char *name, Dwarf_Addr base, - void *_arg) -{ - struct drgn_dwfl_module_removed_arg *arg = _arg; - /* - * userdatap is actually a void ** like for the other libdwfl callbacks, - * but dwfl_report_end() has the wrong signature for the removed - * callback. - */ - struct drgn_module *module = *(void **)userdatap; - if (arg->finish_indexing && module && - module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) - drgn_module_finish_indexing(arg->dbinfo, module); - if (arg->free_all || !module || - module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { - drgn_module_destroy(module); - } else { - /* - * The module was already indexed. Report it again so libdwfl - * doesn't remove it. - */ - Dwarf_Addr end; - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, - NULL, NULL); - dwfl_report_module(arg->dbinfo->dwfl, name, base, end); - } - return DWARF_CB_OK; +DEFINE_BINARY_SEARCH_TREE_FUNCTIONS(drgn_module_address_tree, node, + drgn_module_address_range_key, + binary_search_tree_scalar_cmp, splay); + +static void drgn_module_free_section_addresses(struct drgn_module *module) +{ + hash_table_for_each(drgn_module_section_address_map, it, + &module->section_addresses) + free(it.entry->key); } -static void drgn_debug_info_free_modules(struct drgn_debug_info *dbinfo, - bool finish_indexing, bool free_all) +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_by_name(struct drgn_program *prog, + const char *name) { - for (struct drgn_module_table_iterator it = - drgn_module_table_first(&dbinfo->modules); it.entry; ) { - struct drgn_module *module = *it.entry; - struct drgn_module **nextp = it.entry; - do { - struct drgn_module *next = module->next; - if (finish_indexing && - module->state == DRGN_DEBUG_INFO_MODULE_INDEXING) - drgn_module_finish_indexing(dbinfo, module); - if (free_all || - module->state != DRGN_DEBUG_INFO_MODULE_INDEXED) { - if (module == *nextp) { - if (nextp == it.entry && !next) { - it = drgn_module_table_delete_iterator(&dbinfo->modules, - it); - } else { - if (!next) - it = drgn_module_table_next(it); - *nextp = next; - } - } - void **userdatap; - dwfl_module_info(module->dwfl_module, - &userdatap, NULL, NULL, NULL, - NULL, NULL, NULL); - *userdatap = NULL; - drgn_module_destroy(module); - } else { - if (!next) - it = drgn_module_table_next(it); - nextp = &module->next; - } - module = next; - } while (module); - } + struct drgn_module_table_iterator it = + drgn_module_table_search(&prog->dbinfo.modules, &name); + return it.entry ? *it.entry : NULL; +} - dwfl_report_begin(dbinfo->dwfl); - struct drgn_dwfl_module_removed_arg arg = { - .dbinfo = dbinfo, - .finish_indexing = finish_indexing, - .free_all = free_all, - }; - my_dwfl_report_end(dbinfo, drgn_dwfl_module_removed, &arg); +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, + uint64_t address) +{ + struct drgn_module_address_tree_iterator it = + drgn_module_address_tree_search_le(&prog->dbinfo.modules_by_address, + &address); + if (!it.entry || address >= it.entry->end) + return NULL; + return it.entry->module; } -struct drgn_error * -drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, - const char *name, const char *message, - struct drgn_error *err) -{ - if (err && err->code == DRGN_ERROR_NO_MEMORY) { - /* Always fail hard if we're out of memory. */ - goto err; - } - if (load->num_errors == 0 && - !string_builder_append(&load->errors, - "missing some debugging symbols (see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html):")) - goto err; - if (load->num_errors < load->max_errors) { - if (!string_builder_line_break(&load->errors)) - goto err; - if (!string_builder_append(&load->errors, " ")) - goto err; - if (name && !string_builder_append(&load->errors, name)) - goto err; - if (name && (message || err) && - !string_builder_append(&load->errors, " (")) - goto err; - if (message && !string_builder_append(&load->errors, message)) - goto err; - if (message && err && - !string_builder_append(&load->errors, ": ")) - goto err; - if (err && !string_builder_append_error(&load->errors, err)) - goto err; - if (name && (message || err) && - !string_builder_appendc(&load->errors, ')')) - goto err; +static struct drgn_module *drgn_module_find(struct drgn_program *prog, + enum drgn_module_kind kind, + const char *name, uint64_t info) +{ + struct drgn_module_table_iterator it = + drgn_module_table_search(&prog->dbinfo.modules, &name); + if (!it.entry) + return NULL; + struct drgn_module *module = *it.entry; + while (module->kind != kind || module->info != info) { + module = module->next_same_name; + if (!module) + break; } - load->num_errors++; - drgn_error_destroy(err); - return NULL; - -err: - drgn_error_destroy(err); - return &drgn_enomem; + return module; } static struct drgn_error * -drgn_debug_info_report_module(struct drgn_debug_info_load_state *load, - const void *build_id, size_t build_id_len, - uint64_t start, uint64_t end, const char *name, - Dwfl_Module *dwfl_module, const char *path, - int fd, Elf *elf, bool *new_ret) +drgn_module_find_or_create(struct drgn_program *prog, + enum drgn_module_kind kind, const char *name, + uint64_t info, struct drgn_module **ret, + bool *new_ret) { - struct drgn_debug_info *dbinfo = load->dbinfo; struct drgn_error *err; - char *path_key = NULL; - - if (new_ret) - *new_ret = false; struct hash_pair hp; - // Silence -Wmaybe-uninitialized false positive last seen with GCC 12 on - // i386 and Arm. - struct drgn_module_table_iterator it = {}; - if (build_id_len) { - struct drgn_module_key key = { - .build_id = build_id, - .build_id_len = build_id_len, - .start = start, - .end = end, - }; - hp = drgn_module_table_hash(&key); - it = drgn_module_table_search_hashed(&dbinfo->modules, &key, - hp); - if (it.entry && - (*it.entry)->state == DRGN_DEBUG_INFO_MODULE_INDEXED) { - /* We've already indexed this module. */ - err = NULL; - goto free; - } - } - - if (!dwfl_module) { - path_key = realpath(path, NULL); - if (!path_key) { - path_key = strdup(path); - if (!path_key) { - err = &drgn_enomem; - goto free; + struct drgn_module_table_iterator it; + if (kind == DRGN_MODULE_MAIN) { + if (prog->dbinfo.main_module) { + if (strcmp(prog->dbinfo.main_module->name, name) != 0) { + return drgn_error_create(DRGN_ERROR_LOOKUP, + "main module already exists with different name"); } + *ret = prog->dbinfo.main_module; + if (new_ret) + *new_ret = false; + return NULL; } - - dwfl_module = dwfl_report_module(dbinfo->dwfl, path_key, start, - end); - if (!dwfl_module) { - err = drgn_error_libdwfl(); - goto free; + hp = drgn_module_table_hash(&name); + it.entry = NULL; + } else { + hp = drgn_module_table_hash(&name); + it = drgn_module_table_search_hashed(&prog->dbinfo.modules, + &name, hp); + if (it.entry) { + struct drgn_module *module = *it.entry; + do { + if (module->kind == kind && module->info == info) { + *ret = module; + if (new_ret) + *new_ret = false; + return NULL; + } + module = module->next_same_name; + } while (module); } } - void **userdatap; - dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, NULL, NULL, NULL, - NULL); - if (*userdatap) { - /* We've already reported this file at this offset. */ - err = NULL; - goto free; - } - if (new_ret) - *new_ret = true; - struct drgn_module *module = calloc(1, sizeof(*module)); - if (!module) { + if (!module) + return &drgn_enomem; + + module->prog = prog; + module->kind = kind; + module->info = info; + drgn_object_init(&module->object, prog); + // Linux userspace core dumps usually filter out file-backed mappings + // (see coredump_filter in core(5)), so we need the loaded file to read + // the text. Additionally, .eh_frame is in the loaded file and not the + // debug file. + // + // Linux kernel core dumps preserve the main kernel and kernel module + // text, and the kernel doesn't use .eh_frame, so we don't need the + // loaded file for the kernel. + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL + && (kind == DRGN_MODULE_MAIN || kind == DRGN_MODULE_RELOCATABLE)) + module->loaded_file_status = DRGN_MODULE_FILE_DONT_NEED; + else + module->loaded_file_status = DRGN_MODULE_FILE_WANT; + module->debug_file_status = DRGN_MODULE_FILE_WANT; + + module->name = strdup(name); + if (!module->name) { err = &drgn_enomem; - goto free; + goto err_module; } - module->prog = load->dbinfo->prog; - module->state = DRGN_DEBUG_INFO_MODULE_NEW; - module->build_id = build_id; - module->build_id_len = build_id_len; - module->start = start; - module->end = end; - if (name) { - module->name = strdup(name); - if (!module->name) { - err = &drgn_enomem; - free(module); - goto free; - } - } - module->dwfl_module = dwfl_module; - module->path = path_key; - module->fd = fd; - module->elf = elf; - drgn_elf_file_dwarf_table_init(&module->split_dwarf_files); - /* path_key, fd and elf are owned by the module now. */ - - if (!drgn_module_vector_append(&load->new_modules, &module)) { - drgn_module_destroy(module); - return &drgn_enomem; + if (it.entry) { + module->next_same_name = *it.entry; + *it.entry = module; + } else if (drgn_module_table_insert_searched(&prog->dbinfo.modules, + &module, hp, NULL) < 0) { + err = &drgn_enomem; + goto err_name; } - if (build_id_len) { - if (it.entry) { - /* - * The first module with this build ID is in - * new_modules, so insert it after in the list, not - * before. - */ - module->next = (*it.entry)->next; - (*it.entry)->next = module; - } else if (drgn_module_table_insert_searched(&dbinfo->modules, - &module, hp, - NULL) < 0) { - drgn_module_vector_pop(&load->new_modules); - drgn_module_destroy(module); - return &drgn_enomem; - } + if (kind == DRGN_MODULE_MAIN) + prog->dbinfo.main_module = module; + prog->dbinfo.modules_generation++; + + drgn_elf_file_dwarf_table_init(&module->split_dwarf_files); + drgn_module_section_address_map_init(&module->section_addresses); + + SWITCH_ENUM(module->kind) { + case DRGN_MODULE_MAIN: + drgn_log_debug(prog, "created main module %s", module->name); + break; + case DRGN_MODULE_SHARED_LIBRARY: + drgn_log_debug(prog, + "created shared library module %s@0x%" PRIx64, + module->name, module->info); + break; + case DRGN_MODULE_VDSO: + drgn_log_debug(prog, "created vDSO module %s@0x%" PRIx64, + module->name, module->info); + break; + case DRGN_MODULE_RELOCATABLE: + drgn_log_debug(prog, "created relocatable module %s@0x%" PRIx64, + module->name, module->info); + break; + case DRGN_MODULE_EXTRA: + drgn_log_debug(prog, "created extra module %s 0x%" PRIx64, + module->name, module->info); + break; + default: + UNREACHABLE(); } - *userdatap = module; + + *ret = module; + if (new_ret) + *new_ret = true; return NULL; -free: - elf_end(elf); - if (fd != -1) - close(fd); - free(path_key); +err_name: + free(module->name); +err_module: + drgn_object_deinit(&module->object); + free(module); return err; } -struct drgn_error * -drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, - const char *path, int fd, Elf *elf, uint64_t start, - uint64_t end, const char *name, bool *new_ret) +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_main(struct drgn_program *prog, + const char *name) { - - struct drgn_error *err; - const void *build_id; - ssize_t build_id_len = drgn_elf_gnu_build_id(elf, &build_id); - if (build_id_len < 0) { - err = drgn_debug_info_report_error(load, path, NULL, - drgn_error_libelf()); - elf_end(elf); - close(fd); - return err; - } else if (build_id_len == 0) { - build_id = NULL; - } - return drgn_debug_info_report_module(load, build_id, build_id_len, - start, end, name, NULL, path, fd, - elf, new_ret); + if (name && prog->dbinfo.main_module + && strcmp(prog->dbinfo.main_module->name, name) != 0) + return NULL; + return prog->dbinfo.main_module; } -static int drgn_debug_info_report_dwfl_module(Dwfl_Module *dwfl_module, - void **userdatap, - const char *name, Dwarf_Addr base, - void *arg) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, + const char *name, + struct drgn_module **ret, + bool *new_ret) { - struct drgn_debug_info_load_state *load = arg; - struct drgn_error *err; - - if (*userdatap) { - /* - * This was either reported from drgn_debug_info_report_elf() or - * already indexed. - */ - return DWARF_CB_OK; - } - - const unsigned char *build_id; - GElf_Addr build_id_vaddr; - int build_id_len = dwfl_module_build_id(dwfl_module, &build_id, - &build_id_vaddr); - if (build_id_len < 0) { - err = drgn_debug_info_report_error(load, name, NULL, - drgn_error_libdwfl()); - if (err) - goto err; - } else if (build_id_len == 0) { - build_id = NULL; - } - Dwarf_Addr end; - dwfl_module_info(dwfl_module, NULL, NULL, &end, NULL, NULL, NULL, NULL); - err = drgn_debug_info_report_module(load, build_id, build_id_len, base, - end, NULL, dwfl_module, name, -1, - NULL, NULL); - if (err) - goto err; - return DWARF_CB_OK; + return drgn_module_find_or_create(prog, DRGN_MODULE_MAIN, name, 0, ret, + new_ret); +} -err: - drgn_error_destroy(err); - return DWARF_CB_ABORT; +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address) +{ + return drgn_module_find(prog, DRGN_MODULE_SHARED_LIBRARY, name, + dynamic_address); } -static struct drgn_error *drgn_get_nt_file(Elf *elf, const char **ret, - size_t *len_ret) +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_or_create_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret) { - size_t phnum; - if (elf_getphdrnum(elf, &phnum) != 0) - return drgn_error_libelf(); - for (size_t i = 0; i < phnum; i++) { - GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type == PT_NOTE) { - Elf_Data *data = elf_getdata_rawchunk(elf, - phdr->p_offset, - phdr->p_filesz, - note_header_type(phdr->p_align)); - if (!data) - return drgn_error_libelf(); - GElf_Nhdr nhdr; - size_t offset = 0, name_offset, desc_offset; - while (offset < data->d_size && - (offset = gelf_getnote(data, offset, &nhdr, - &name_offset, - &desc_offset))) { - const char *name = - (char *)data->d_buf + name_offset; - if (nhdr.n_namesz == sizeof("CORE") && - memcmp(name, "CORE", sizeof("CORE")) == 0 && - nhdr.n_type == NT_FILE) { - *ret = (char *)data->d_buf + desc_offset; - *len_ret = nhdr.n_descsz; - return NULL; - } - } - } - } - *ret = NULL; - *len_ret = 0; - return NULL; + return drgn_module_find_or_create(prog, DRGN_MODULE_SHARED_LIBRARY, + name, dynamic_address, ret, new_ret); } -struct drgn_mapped_file_segment { - uint64_t start; - uint64_t end; - uint64_t file_offset; -}; +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address) +{ + return drgn_module_find(prog, DRGN_MODULE_VDSO, name, dynamic_address); +} -DEFINE_VECTOR(drgn_mapped_file_segment_vector, struct drgn_mapped_file_segment); +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret) +{ + return drgn_module_find_or_create(prog, DRGN_MODULE_VDSO, name, + dynamic_address, ret, new_ret); +} -DEFINE_HASH_MAP(drgn_mapped_files, const char *, - struct drgn_mapped_file_segment_vector, c_string_key_hash_pair, - c_string_key_eq); +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_relocatable(struct drgn_program *prog, + const char *name, + uint64_t address) +{ + return drgn_module_find(prog, DRGN_MODULE_RELOCATABLE, name, address); +} -struct userspace_core_report_state { - struct drgn_mapped_files files; - void *phdr_buf; - size_t phdr_buf_capacity; - void *segment_buf; - size_t segment_buf_capacity; -}; +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_or_create_relocatable(struct drgn_program *prog, + const char *name, uint64_t address, + struct drgn_module **ret, bool *new_ret) +{ + return drgn_module_find_or_create(prog, DRGN_MODULE_RELOCATABLE, name, + address, ret, new_ret); +} -static struct drgn_error *parse_nt_file_error(struct binary_buffer *bb, - const char *pos, - const char *message) +LIBDRGN_PUBLIC +struct drgn_module *drgn_module_find_extra(struct drgn_program *prog, + const char *name, uint64_t id) { - return drgn_error_create(DRGN_ERROR_OTHER, "couldn't parse NT_FILE"); + return drgn_module_find(prog, DRGN_MODULE_EXTRA, name, id); } -static bool -drgn_mapped_file_segments_contiguous(const struct drgn_mapped_file_segment *segment1, - const struct drgn_mapped_file_segment *segment2) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, + const char *name, + uint64_t id, + struct drgn_module **ret, + bool *new_ret) { - if (segment1->end != segment2->start) - return false; - uint64_t size = segment1->end - segment1->start; - return segment1->file_offset + size == segment2->file_offset; + return drgn_module_find_or_create(prog, DRGN_MODULE_EXTRA, name, id, + ret, new_ret); } -static struct drgn_error * -userspace_core_get_mapped_files(struct drgn_debug_info_load_state *load, - struct userspace_core_report_state *core, - const char *nt_file, size_t nt_file_len) +static void +drgn_module_clear_wanted_supplementary_debug_file(struct drgn_module *module) { - struct drgn_error *err; + struct drgn_module_wanted_supplementary_file *wanted = + module->wanted_supplementary_debug_file; + if (wanted) { + free(wanted->checksum_str); + if (wanted->file != module->loaded_file + && wanted->file != module->debug_file) + drgn_elf_file_destroy(wanted->file); + free(wanted); + module->wanted_supplementary_debug_file = NULL; + } +} - GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(load->dbinfo->prog->core, - &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - bool is_64_bit = ehdr->e_ident[EI_CLASS] == ELFCLASS64; - bool little_endian = ehdr->e_ident[EI_DATA] == ELFDATA2LSB; +// Note: this doesn't remove the module from the module tables. +static void drgn_module_destroy(struct drgn_module *module) +{ + drgn_module_free_section_addresses(module); + drgn_module_section_address_map_deinit(&module->section_addresses); + drgn_module_orc_info_deinit(module); + drgn_module_dwarf_info_deinit(module); + drgn_module_clear_wanted_supplementary_debug_file(module); + drgn_elf_file_destroy(module->gnu_debugdata_file); + drgn_elf_file_destroy(module->supplementary_debug_file); + if (module->debug_file != module->loaded_file) + drgn_elf_file_destroy(module->debug_file); + drgn_elf_file_destroy(module->loaded_file); + if (module->address_ranges != &module->single_address_range) + free(module->address_ranges); + free(module->build_id); + free(module->name); + drgn_object_deinit(&module->object); + free(module); +} - struct binary_buffer bb; - binary_buffer_init(&bb, nt_file, nt_file_len, little_endian, - parse_nt_file_error); +static void drgn_module_delete_address_ranges(struct drgn_module *module) +{ + for (size_t i = 0; i < module->num_address_ranges; i++) { + drgn_module_address_tree_delete_entry(&module->prog->dbinfo.modules_by_address, + &module->address_ranges[i]); + } + if (module->address_ranges != &module->single_address_range) + free(module->address_ranges); +} - /* - * fs/binfmt_elf.c in the Linux kernel source code documents the format - * of NT_FILE as: - * - * long count -- how many files are mapped - * long page_size -- units for file_ofs - * array of [COUNT] elements of - * long start - * long end - * long file_ofs - * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... - */ - uint64_t count, page_size; - if (is_64_bit) { - if ((err = binary_buffer_next_u64(&bb, &count))) - return err; - if (count > UINT64_MAX / 24) - return binary_buffer_error(&bb, "count is too large"); - if ((err = binary_buffer_next_u64(&bb, &page_size)) || - (err = binary_buffer_skip(&bb, count * 24))) - return err; +void drgn_module_delete(struct drgn_module *module) +{ + assert(!module->loaded_file); + assert(!module->debug_file); + drgn_module_delete_address_ranges(module); + // So drgn_module_destroy() doesn't free it again. + module->address_ranges = NULL; + + const char *name = module->name; + struct drgn_module_table_iterator it = + drgn_module_table_search(&module->prog->dbinfo.modules, &name); + if (*it.entry == module && !module->next_same_name) { + drgn_module_table_delete_iterator(&module->prog->dbinfo.modules, + it); } else { - if ((err = binary_buffer_next_u32_into_u64(&bb, &count))) - return err; - if (count > UINT64_MAX / 12) - return binary_buffer_error(&bb, "count is too large"); - if ((err = binary_buffer_next_u32_into_u64(&bb, &page_size)) || - (err = binary_buffer_skip(&bb, count * 12))) - return err; + struct drgn_module **modulep = it.entry; + while (*modulep != module) + modulep = &(*modulep)->next_same_name; + *modulep = module->next_same_name; } + if (module->kind == DRGN_MODULE_MAIN) + module->prog->dbinfo.main_module = NULL; + module->prog->dbinfo.modules_generation++; - for (uint64_t i = 0; i < count; i++) { - struct drgn_mapped_file_segment segment; - if (is_64_bit) { - memcpy(&segment, nt_file + 16 + i * 24, 24); - if (bb.bswap) { - segment.start = bswap_64(segment.start); - segment.end = bswap_64(segment.end); - segment.file_offset = bswap_64(segment.file_offset); - } - } else { - struct { - uint32_t start; - uint32_t end; - uint32_t file_offset; - } segment32; - memcpy(&segment32, nt_file + 8 + i * 12, 12); - if (bb.bswap) { - segment.start = bswap_32(segment32.start); - segment.end = bswap_32(segment32.end); - segment.file_offset = bswap_32(segment32.file_offset); - } else { - segment.start = segment32.start; - segment.end = segment32.end; - segment.file_offset = segment32.file_offset; - } - } - segment.file_offset *= page_size; + drgn_module_destroy(module); +} - struct drgn_mapped_files_entry entry = { - .key = bb.pos, - }; - if ((err = binary_buffer_skip_string(&bb))) - return err; - struct drgn_mapped_files_iterator it; - int r = drgn_mapped_files_insert(&core->files, &entry, &it); - if (r < 0) - return &drgn_enomem; - if (r == 1) - drgn_mapped_file_segment_vector_init(&it.entry->value); - - /* - * The Linux kernel creates separate entries for contiguous - * mappings with different memory protections even though the - * protection is not included in NT_FILE. Merge them if we can. - */ - if (!drgn_mapped_file_segment_vector_empty(&it.entry->value) - && drgn_mapped_file_segments_contiguous(drgn_mapped_file_segment_vector_last(&it.entry->value), - &segment)) - drgn_mapped_file_segment_vector_last(&it.entry->value)->end = segment.end; - else if (!drgn_mapped_file_segment_vector_append(&it.entry->value, - &segment)) - return &drgn_enomem; - } - return NULL; +LIBDRGN_PUBLIC +struct drgn_program *drgn_module_program(const struct drgn_module *module) +{ + return module->prog; } -static bool build_id_matches(Elf *elf, const void *build_id, - size_t build_id_len) +LIBDRGN_PUBLIC +enum drgn_module_kind drgn_module_kind(const struct drgn_module *module) { - const void *elf_build_id; - ssize_t elf_build_id_len = drgn_elf_gnu_build_id(elf, &elf_build_id); - if (elf_build_id_len < 0) + return module->kind; +} + +LIBDRGN_PUBLIC const char *drgn_module_name(const struct drgn_module *module) +{ + return module->name; +} + +LIBDRGN_PUBLIC uint64_t drgn_module_info(const struct drgn_module *module) +{ + return module->info; +} + +LIBDRGN_PUBLIC +bool drgn_module_num_address_ranges(const struct drgn_module *module, + size_t *ret) +{ + *ret = module->num_address_ranges; + return module->address_ranges != NULL; +} + +LIBDRGN_PUBLIC bool drgn_module_address_range(const struct drgn_module *module, + size_t i, uint64_t *start_ret, + uint64_t *end_ret) +{ + if (i >= module->num_address_ranges) return false; - return (elf_build_id_len == build_id_len && - memcmp(elf_build_id, build_id, build_id_len) == 0); + *start_ret = module->address_ranges[i].start; + *end_ret = module->address_ranges[i].end; + return true; } -static struct drgn_error * -userspace_core_elf_address_range(uint16_t e_type, size_t phnum, - struct drgn_error *(*get_phdr)(void *, size_t, GElf_Phdr *), - void *arg, - const struct drgn_mapped_file_segment *segments, - size_t num_segments, - const struct drgn_mapped_file_segment *ehdr_segment, - uint64_t *bias_ret, uint64_t *start_ret, - uint64_t *end_ret) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_set_address_range(struct drgn_module *module, + uint64_t start, uint64_t end) { - struct drgn_error *err; + // This is a special case instead of a wrapper around + // drgn_module_set_address_ranges() so we can avoid allocating memory. + // Since the old address range might be module->single_address_range, + // this has to do things in a different order. + + if (start >= end) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "invalid module address range"); + } - /* - * First, find the virtual address of the ELF header so that we can - * calculate the bias. - */ - uint64_t ehdr_vaddr; - size_t i; - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr; - err = get_phdr(arg, i, &phdr); - if (err) - return err; - if (phdr.p_type == PT_LOAD) { - uint64_t align = phdr.p_align ? phdr.p_align : 1; - if ((phdr.p_offset & -align) == 0) { - ehdr_vaddr = phdr.p_vaddr & -align; - break; + drgn_module_delete_address_ranges(module); + + module->single_address_range.start = start; + module->single_address_range.end = end; + module->single_address_range.module = module; + + // We don't bother checking for overlapping address ranges, which + // shouldn't happen with well-formed programs and at worst causes + // spurious failed lookups. We may need to revisit this if it's a + // problem in practice. + drgn_module_address_tree_insert(&module->prog->dbinfo.modules_by_address, + &module->single_address_range, NULL); + + module->address_ranges = &module->single_address_range; + module->num_address_ranges = 1; + return NULL; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_set_address_ranges(struct drgn_module *module, + uint64_t ranges[][2], + size_t num_ranges) +{ + if (num_ranges == 1) { + return drgn_module_set_address_range(module, ranges[0][0], + ranges[0][1]); + } + + _cleanup_free_ struct drgn_module_address_range *address_ranges = NULL; + if (num_ranges) { + address_ranges = + malloc_array(num_ranges, sizeof(*address_ranges)); + if (!address_ranges) + return &drgn_enomem; + for (size_t i = 0; i < num_ranges; i++) { + if (ranges[i][0] >= ranges[i][1]) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "invalid module address range"); } + address_ranges[i].start = ranges[i][0]; + address_ranges[i].end = ranges[i][1]; + address_ranges[i].module = module; } } - if (i >= phnum) { - /* - * No loadable segments contain the ELF header. This can't be - * our file. - */ - *bias_ret = 0; -not_loaded: - *start_ret = *end_ret = 0; - return NULL; + + drgn_module_delete_address_ranges(module); + + for (size_t i = 0; i < num_ranges; i++) { + // We don't bother checking for overlapping address ranges; see + // drgn_module_set_address_range(). + drgn_module_address_tree_insert(&module->prog->dbinfo.modules_by_address, + &address_ranges[i], NULL); } - *bias_ret = ehdr_segment->start - ehdr_vaddr; - if (*bias_ret != 0 && e_type == ET_EXEC) { - /* The executable is not loaded at the correct address. */ - goto not_loaded; - } - - /* - * Now check all of the program headers to (1) get the module address - * range and (2) make sure that they are mapped as expected. If we're - * lucky, this can detect a file that was mmap'd and not actually loaded - * by the kernel or dynamic loader. This could also be the wrong file. - */ - const struct drgn_mapped_file_segment *segment = segments; - const struct drgn_mapped_file_segment *end_segment = - segments + num_segments; - uint64_t start = 0, end = 0; - bool first = true; - for (i = 0; i < phnum; i++) { - GElf_Phdr phdr; - err = get_phdr(arg, i, &phdr); - if (err) - return err; - if (phdr.p_type != PT_LOAD) - continue; - uint64_t vaddr = phdr.p_vaddr + *bias_ret; - if (phdr.p_filesz != 0) { - /* - * Advance to the mapped segment containing the start - * address. - */ - while (vaddr >= segment->end) { - if (++segment == end_segment) - goto not_loaded; - if (vaddr < segment->start) - goto not_loaded; - } - if (segment->file_offset + (vaddr - segment->start) != - phdr.p_offset) { - /* - * The address in the core dump does not map to - * the segment's file offset. - */ - goto not_loaded; - } - if (phdr.p_filesz > segment->end - vaddr) { - /* Part of the segment is not mapped. */ - goto not_loaded; - } - } - if (first) { - uint64_t align = phdr.p_align ? phdr.p_align : 1; - start = vaddr & -align; - first = false; - } - end = vaddr + phdr.p_memsz; + + if (num_ranges) { + module->address_ranges = no_cleanup_ptr(address_ranges); + } else { + // We need a non-NULL pointer to distinguish this from the unset + // case. + module->address_ranges = &module->single_address_range; } - if (start >= end) - goto not_loaded; - *start_ret = start; - *end_ret = end; + module->num_address_ranges = num_ranges; return NULL; } -/* ehdr_buf must be aligned as Elf64_Ehdr. */ -static void read_ehdr(const void *ehdr_buf, GElf_Ehdr *ret, bool *is_64_bit_ret, - bool *bswap_ret) -{ - *is_64_bit_ret = ((unsigned char *)ehdr_buf)[EI_CLASS] == ELFCLASS64; - bool little_endian = - ((unsigned char *)ehdr_buf)[EI_DATA] == ELFDATA2LSB; - *bswap_ret = little_endian != HOST_LITTLE_ENDIAN; - if (*is_64_bit_ret) { - const Elf64_Ehdr *ehdr64 = ehdr_buf; - if (*bswap_ret) { - memcpy(ret->e_ident, ehdr64->e_ident, EI_NIDENT); - ret->e_type = bswap_16(ehdr64->e_type); - ret->e_machine = bswap_16(ehdr64->e_machine); - ret->e_version = bswap_32(ehdr64->e_version); - ret->e_entry = bswap_64(ehdr64->e_entry); - ret->e_phoff = bswap_64(ehdr64->e_phoff); - ret->e_shoff = bswap_64(ehdr64->e_shoff); - ret->e_flags = bswap_32(ehdr64->e_flags); - ret->e_ehsize = bswap_16(ehdr64->e_ehsize); - ret->e_phentsize = bswap_16(ehdr64->e_phentsize); - ret->e_phnum = bswap_16(ehdr64->e_phnum); - ret->e_shentsize = bswap_16(ehdr64->e_shentsize); - ret->e_shnum = bswap_16(ehdr64->e_shnum); - ret->e_shstrndx = bswap_16(ehdr64->e_shstrndx); - } else { - *ret = *ehdr64; - } - } else { - const Elf32_Ehdr *ehdr32 = ehdr_buf; - memcpy(ret->e_ident, ehdr32->e_ident, EI_NIDENT); - if (*bswap_ret) { - ret->e_type = bswap_16(ehdr32->e_type); - ret->e_machine = bswap_16(ehdr32->e_machine); - ret->e_version = bswap_32(ehdr32->e_version); - ret->e_entry = bswap_32(ehdr32->e_entry); - ret->e_phoff = bswap_32(ehdr32->e_phoff); - ret->e_shoff = bswap_32(ehdr32->e_shoff); - ret->e_flags = bswap_32(ehdr32->e_flags); - ret->e_ehsize = bswap_16(ehdr32->e_ehsize); - ret->e_phentsize = bswap_16(ehdr32->e_phentsize); - ret->e_phnum = bswap_16(ehdr32->e_phnum); - ret->e_shentsize = bswap_16(ehdr32->e_shentsize); - ret->e_shnum = bswap_16(ehdr32->e_shnum); - ret->e_shstrndx = bswap_16(ehdr32->e_shstrndx); - } else { - ret->e_type = ehdr32->e_type; - ret->e_machine = ehdr32->e_machine; - ret->e_version = ehdr32->e_version; - ret->e_entry = ehdr32->e_entry; - ret->e_phoff = ehdr32->e_phoff; - ret->e_shoff = ehdr32->e_shoff; - ret->e_flags = ehdr32->e_flags; - ret->e_ehsize = ehdr32->e_ehsize; - ret->e_phentsize = ehdr32->e_phentsize; - ret->e_phnum = ehdr32->e_phnum; - ret->e_shentsize = ehdr32->e_shentsize; - ret->e_shnum = ehdr32->e_shnum; - ret->e_shstrndx = ehdr32->e_shstrndx; - } - } +LIBDRGN_PUBLIC void drgn_module_unset_address_ranges(struct drgn_module *module) +{ + drgn_module_delete_address_ranges(module); + module->address_ranges = NULL; + module->num_address_ranges = 0; } -/* phdr_buf must be aligned as Elf64_Phdr. */ -static void read_phdr(const void *phdr_buf, size_t i, bool is_64_bit, - bool bswap, GElf_Phdr *ret) +LIBDRGN_PUBLIC +bool drgn_module_contains_address(const struct drgn_module *module, + uint64_t address) { - if (is_64_bit) { - const Elf64_Phdr *phdr64 = (Elf64_Phdr *)phdr_buf + i; - if (bswap) { - ret->p_type = bswap_32(phdr64->p_type); - ret->p_flags = bswap_32(phdr64->p_flags); - ret->p_offset = bswap_64(phdr64->p_offset); - ret->p_vaddr = bswap_64(phdr64->p_vaddr); - ret->p_paddr = bswap_64(phdr64->p_paddr); - ret->p_filesz = bswap_64(phdr64->p_filesz); - ret->p_memsz = bswap_64(phdr64->p_memsz); - ret->p_align = bswap_64(phdr64->p_align); - } else { - *ret = *phdr64; - } - } else { - const Elf32_Phdr *phdr32 = (Elf32_Phdr *)phdr_buf + i; - if (bswap) { - ret->p_type = bswap_32(phdr32->p_type); - ret->p_offset = bswap_32(phdr32->p_offset); - ret->p_vaddr = bswap_32(phdr32->p_vaddr); - ret->p_paddr = bswap_32(phdr32->p_paddr); - ret->p_filesz = bswap_32(phdr32->p_filesz); - ret->p_memsz = bswap_32(phdr32->p_memsz); - ret->p_flags = bswap_32(phdr32->p_flags); - ret->p_align = bswap_32(phdr32->p_align); - } else { - ret->p_type = phdr32->p_type; - ret->p_offset = phdr32->p_offset; - ret->p_vaddr = phdr32->p_vaddr; - ret->p_paddr = phdr32->p_paddr; - ret->p_filesz = phdr32->p_filesz; - ret->p_memsz = phdr32->p_memsz; - ret->p_flags = phdr32->p_flags; - ret->p_align = phdr32->p_align; - } + for (size_t i = 0; i < module->num_address_ranges; i++) { + if (module->address_ranges[i].start <= address + && address < module->address_ranges[i].end) + return true; } + return false; } -struct core_get_phdr_arg { - const void *phdr_buf; - bool is_64_bit; - bool bswap; -}; - -static struct drgn_error * -core_get_phdr(void *arg_, size_t i, GElf_Phdr *ret) +LIBDRGN_PUBLIC +const char *drgn_module_build_id(const struct drgn_module *module, + const void **raw_ret, size_t *raw_len_ret) { - struct core_get_phdr_arg *arg = arg_; - read_phdr(arg->phdr_buf, i, arg->is_64_bit, arg->bswap, ret); - return NULL; + if (raw_ret) + *raw_ret = module->build_id; + if (raw_len_ret) + *raw_len_ret = module->build_id_len; + return module->build_id_str; } -struct userspace_core_identified_file { - const void *build_id; - size_t build_id_len; - uint64_t start, end; - bool ignore; - bool have_address_range; -}; +static void *drgn_module_alloc_build_id(size_t build_id_len) +{ + size_t alloc_size; + if (__builtin_mul_overflow(build_id_len, 3U, &alloc_size) || + __builtin_add_overflow(alloc_size, 1U, &alloc_size)) + return NULL; + return malloc(alloc_size); +} -static struct drgn_error * -userspace_core_identify_file(struct drgn_program *prog, - struct userspace_core_report_state *core, - const struct drgn_mapped_file_segment *segments, - size_t num_segments, - const struct drgn_mapped_file_segment *ehdr_segment, - struct userspace_core_identified_file *ret) +static void drgn_module_set_build_id_impl(struct drgn_module *module, + const void *build_id, + size_t build_id_len, + void *build_id_buf) { - struct drgn_error *err; + module->build_id = build_id_buf; + memcpy(module->build_id, build_id, build_id_len); - Elf64_Ehdr ehdr_buf; - err = drgn_program_read_memory(prog, &ehdr_buf, ehdr_segment->start, - sizeof(ehdr_buf), false); - if (err) { - if (err->code == DRGN_ERROR_FAULT) { - drgn_error_destroy(err); - err = NULL; - } - return err; - } - if (memcmp(&ehdr_buf, ELFMAG, SELFMAG) != 0) { - ret->ignore = true; - return NULL; - } + module->build_id_len = build_id_len; - GElf_Ehdr ehdr; - struct core_get_phdr_arg arg; - read_ehdr(&ehdr_buf, &ehdr, &arg.is_64_bit, &arg.bswap); - if (ehdr.e_type == ET_CORE || - ehdr.e_phnum == 0 || - ehdr.e_phentsize != - (arg.is_64_bit ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr))) { - ret->ignore = true; + module->build_id_str = (char *)build_id_buf + build_id_len; + hexlify(build_id, build_id_len, module->build_id_str); + module->build_id_str[2 * build_id_len] = '\0'; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_set_build_id(struct drgn_module *module, + const void *build_id, + size_t build_id_len) +{ + if (build_id_len == 0) { + free(module->build_id); + module->build_id = NULL; + module->build_id_len = 0; + module->build_id_str = NULL; return NULL; } - if (ehdr.e_phnum > SIZE_MAX / ehdr.e_phentsize || - !alloc_or_reuse(&core->phdr_buf, &core->phdr_buf_capacity, - ehdr.e_phnum * ehdr.e_phentsize)) + char *build_id_buf = drgn_module_alloc_build_id(build_id_len); + if (!build_id_buf) return &drgn_enomem; + free(module->build_id); + drgn_module_set_build_id_impl(module, build_id, build_id_len, + build_id_buf); + return NULL; +} - /* - * Check whether the mapped segment containing the file header also - * contains the program headers. This seems to be the case in practice. - */ - uint64_t ehdr_segment_file_end = - (ehdr_segment->file_offset + - (ehdr_segment->end - ehdr_segment->start)); - if (ehdr_segment_file_end < ehdr.e_phoff || - ehdr_segment_file_end - ehdr.e_phoff < - ehdr.e_phnum * ehdr.e_phentsize) - return NULL; +static struct drgn_error * +drgn_module_section_addresses_allowed(struct drgn_module *module, bool modify) +{ + if (module->kind != DRGN_MODULE_RELOCATABLE) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "section addresses are only supported for relocatable modules"); + } + if (modify && (module->loaded_file || module->debug_file)) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "section addresses cannot be modified after file is set"); + } + return NULL; +} - err = drgn_program_read_memory(prog, core->phdr_buf, - ehdr_segment->start + ehdr.e_phoff, - ehdr.e_phnum * ehdr.e_phentsize, false); - if (err) { - if (err->code == DRGN_ERROR_FAULT) { - drgn_error_destroy(err); - err = NULL; - } +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_get_section_address(struct drgn_module *module, + const char *name, + uint64_t *ret) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, false); + if (err) return err; - } - arg.phdr_buf = core->phdr_buf; - - /* - * In theory, if the program has a huge number of program headers, they - * may not all be dumped. However, the largest binary I was able to find - * still had all program headers within 1k. - * - * It'd be more reliable to determine the bias based on the headers that - * were saved, use that to read the build ID, use that to find the ELF - * file, and then determine the address range directly from the ELF - * file. However, we need the address range to report the build ID to - * libdwfl, so we do it this way. - */ - uint64_t bias; - err = userspace_core_elf_address_range(ehdr.e_type, ehdr.e_phnum, - core_get_phdr, &arg, segments, - num_segments, ehdr_segment, - &bias, &ret->start, &ret->end); + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search(&module->section_addresses, + (char **)&name); + if (!it.entry) + return &drgn_not_found; + *ret = it.entry->value; + return NULL; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_set_section_address(struct drgn_module *module, + const char *name, + uint64_t address) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, true); if (err) return err; - if (ret->start >= ret->end) { - ret->ignore = true; + + struct hash_pair hp = + drgn_module_section_address_map_hash((char **)&name); + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search_hashed(&module->section_addresses, + (char **)&name, + hp); + if (it.entry) { + it.entry->value = address; return NULL; } - ret->have_address_range = true; - - for (uint16_t i = 0; i < ehdr.e_phnum; i++) { - GElf_Phdr phdr; - core_get_phdr(&arg, i, &phdr); - if (phdr.p_type == PT_NOTE) { - if (phdr.p_filesz > SIZE_MAX || - !alloc_or_reuse(&core->segment_buf, - &core->segment_buf_capacity, - phdr.p_filesz)) - return &drgn_enomem; - err = drgn_program_read_memory(prog, core->segment_buf, - phdr.p_vaddr + bias, - phdr.p_filesz, false); - if (err) { - if (err->code == DRGN_ERROR_FAULT) { - drgn_error_destroy(err); - continue; - } else { - return err; - } - } - ret->build_id_len = - parse_gnu_build_id_from_notes(core->segment_buf, - phdr.p_filesz, - phdr.p_align == 8 - ? 8 : 4, - arg.bswap, - &ret->build_id); - if (ret->build_id_len) - break; - } + struct drgn_module_section_address_map_entry entry = { + .key = strdup(name), + .value = address, + }; + if (!entry.key) + return &drgn_enomem; + if (drgn_module_section_address_map_insert_searched(&module->section_addresses, + &entry, hp, + NULL) < 0) { + free(entry.key); + return &drgn_enomem; } + module->section_addresses_generation++; return NULL; } -static struct drgn_error *elf_file_get_phdr(void *arg, size_t i, - GElf_Phdr *phdr) +struct drgn_error *drgn_module_delete_section_address(struct drgn_module *module, + const char *name) { - if (!gelf_getphdr(arg, i, phdr)) - return drgn_error_libelf(); + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, true); + if (err) + return err; + + struct hash_pair hp = + drgn_module_section_address_map_hash((char **)&name); + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search_hashed(&module->section_addresses, + (char **)&name, + hp); + if (!it.entry) + return &drgn_not_found; + + _cleanup_free_ _unused_ char *key_to_free = it.entry->key; + drgn_module_section_address_map_delete_iterator_hashed(&module->section_addresses, + it, hp); + module->section_addresses_generation++; return NULL; } -static struct drgn_error * -userspace_core_maybe_report_file(struct drgn_debug_info_load_state *load, - struct userspace_core_report_state *core, - const char *path, - const struct drgn_mapped_file_segment *segments, - size_t num_segments) +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_num_section_addresses(struct drgn_module *module, + size_t *ret) { - struct drgn_error *err; - struct drgn_program *prog = load->dbinfo->prog; - for (size_t ehdr_idx = 0; ehdr_idx < num_segments; ehdr_idx++) { - const struct drgn_mapped_file_segment *ehdr_segment = - &segments[ehdr_idx]; - /* - * There should always be a full page mapped, so even if it's a - * 32-bit file, we can read the 64-bit size. - */ - if (ehdr_segment->file_offset != 0 || - ehdr_segment->end - ehdr_segment->start < sizeof(Elf64_Ehdr)) - continue; + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, false); + if (err) + return err; + *ret = drgn_module_section_address_map_size(&module->section_addresses); + return NULL; +} - /* - * This logic is complicated because we're dealing with two data - * sources that we can't completely trust: the memory in the - * core dump and the file at the path found in the core dump. - * - * First, we try to identify the mapped file contents in the - * core dump. Ideally, this will find a build ID. However, this - * can fail for a few reasons: - * - * 1. The file is not an ELF file. - * 2. The ELF file is not an executable or library. - * 3. The ELF file does not have a build ID. - * 4. The file header was not dumped to the core dump, in which - * case we can't tell whether this is an ELF file. Dumping - * the first page of an executable file has been the default - * behavior since Linux kernel commit 895021552d6f - * ("coredump: default - * CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y") (in v2.6.37), but - * it can be disabled at kernel build time or toggled at - * runtime. - * 5. The build ID or the necessary ELF metadata were not dumped - * in the core dump. This can happen if the necessary program - * headers or note segment were not in the first page of the - * file. - * 6. The file is mapped but not actually loaded into the - * program (e.g., if the program is a tool like a profiler or - * a debugger that mmaps binaries [like drgn itself!]). - * - * In cases 1 and 2, we can simply ignore the file. In cases - * 3-5, we blindly trust the path in the core dump. We can - * sometimes detect case 6 in - * userspace_core_elf_address_range(). - * - * There is also the possibility that the program modified or - * corrupted the ELF metadata in memory (more likely if the file - * was explicitly mmap'd, since the metadata will usually be - * read-only if it was loaded properly). We don't deal with that - * yet. - */ - struct userspace_core_identified_file identity = {}; - err = userspace_core_identify_file(prog, core, segments, - num_segments, ehdr_segment, - &identity); - if (err) - return err; - if (identity.ignore) - continue; +struct drgn_module_section_address_iterator { + struct drgn_module *module; + struct drgn_module_section_address_map_iterator map_it; + uint64_t generation; +}; -#define CLEAR_ELF() do { \ - elf = NULL; \ - fd = -1; \ -} while (0) -#define CLOSE_ELF() do { \ - elf_end(elf); \ - close(fd); \ - CLEAR_ELF(); \ -} while (0) - int fd; - Elf *elf; - /* - * There are a few things that can go wrong here: - * - * 1. The path no longer exists. - * 2. The path refers to a different ELF file than was in the - * core dump. - * 3. The path refers to something which isn't a valid ELF file. - */ - err = open_elf_file(path, &fd, &elf); - if (err) { - drgn_error_destroy(err); - CLEAR_ELF(); - } else if (identity.build_id_len > 0) { - if (!build_id_matches(elf, identity.build_id, - identity.build_id_len)) - CLOSE_ELF(); - } - - if (elf && !identity.have_address_range) { - GElf_Ehdr ehdr_mem, *ehdr; - size_t phnum; - if ((ehdr = gelf_getehdr(elf, &ehdr_mem)) && - (elf_getphdrnum(elf, &phnum) == 0)) { - uint64_t bias; - err = userspace_core_elf_address_range(ehdr->e_type, - phnum, - elf_file_get_phdr, - elf, - segments, - num_segments, - ehdr_segment, - &bias, - &identity.start, - &identity.end); - if (err || identity.start >= identity.end) { - drgn_error_destroy(err); - CLOSE_ELF(); - } else { - identity.have_address_range = true; - } - } else { - CLOSE_ELF(); - } - } +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_section_address_iterator_create(struct drgn_module *module, + struct drgn_module_section_address_iterator **ret) +{ + struct drgn_error *err = + drgn_module_section_addresses_allowed(module, false); + if (err) + return err; - if (elf) { - err = drgn_debug_info_report_elf(load, path, fd, elf, - identity.start, - identity.end, NULL, - NULL); - if (err) - return err; - } else { - if (!identity.have_address_range) - identity.start = identity.end = 0; - Dwfl_Module *dwfl_module = - dwfl_report_module(load->dbinfo->dwfl, path, - identity.start, - identity.end); - if (!dwfl_module) - return drgn_error_libdwfl(); - if (identity.build_id_len > 0 && - dwfl_module_report_build_id(dwfl_module, - identity.build_id, - identity.build_id_len, - 0)) - return drgn_error_libdwfl(); - } -#undef CLOSE_ELF -#undef CLEAR_ELF - } + struct drgn_module_section_address_iterator *it = malloc(sizeof(*it)); + if (!it) + return &drgn_enomem; + it->module = module; + it->map_it = drgn_module_section_address_map_first(&module->section_addresses); + it->generation = module->section_addresses_generation; + *ret = it; return NULL; } -static struct drgn_error * -userspace_core_report_mapped_files(struct drgn_debug_info_load_state *load, - struct userspace_core_report_state *core) +LIBDRGN_PUBLIC void +drgn_module_section_address_iterator_destroy(struct drgn_module_section_address_iterator *it) { + free(it); +} - struct drgn_error *err; - for (struct drgn_mapped_files_iterator it = - drgn_mapped_files_first(&core->files); - it.entry; it = drgn_mapped_files_next(it)) { - err = userspace_core_maybe_report_file(load, core, - it.entry->key, - drgn_mapped_file_segment_vector_begin(&it.entry->value), - drgn_mapped_file_segment_vector_size(&it.entry->value)); - if (err) - return err; +LIBDRGN_PUBLIC struct drgn_module * +drgn_module_section_address_iterator_module(struct drgn_module_section_address_iterator *it) +{ + return it->module; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_section_address_iterator_next(struct drgn_module_section_address_iterator *it, + const char **name_ret, + uint64_t *address_ret) +{ + if (it->map_it.entry) { + if (it->generation != it->module->section_addresses_generation) { + return drgn_error_create(DRGN_ERROR_OTHER, + "section addresses changed during iteration"); + } + *name_ret = it->map_it.entry->key; + if (address_ret) + *address_ret = it->map_it.entry->value; + it->map_it = drgn_module_section_address_map_next(it->map_it); + } else { + *name_ret = NULL; } return NULL; } -static struct drgn_error * -userspace_core_report_debug_info(struct drgn_debug_info_load_state *load, - const char *nt_file, size_t nt_file_len) +LIBDRGN_PUBLIC enum drgn_module_file_status +drgn_module_loaded_file_status(const struct drgn_module *module) { - struct drgn_error *err; + return module->loaded_file_status; +} - struct userspace_core_report_state core = { - .files = HASH_TABLE_INIT, - }; - err = userspace_core_get_mapped_files(load, &core, nt_file, - nt_file_len); - if (err) - goto out; - err = userspace_core_report_mapped_files(load, &core); -out: - free(core.segment_buf); - free(core.phdr_buf); - for (struct drgn_mapped_files_iterator it = - drgn_mapped_files_first(&core.files); - it.entry; it = drgn_mapped_files_next(it)) - drgn_mapped_file_segment_vector_deinit(&it.entry->value); - drgn_mapped_files_deinit(&core.files); - return err; +static bool +drgn_can_change_module_file_status(enum drgn_module_file_status old_status, + enum drgn_module_file_status new_status) +{ + SWITCH_ENUM(old_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + SWITCH_ENUM(new_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + return true; + case DRGN_MODULE_FILE_HAVE: + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + default: + return false; + } + case DRGN_MODULE_FILE_HAVE: + return new_status == DRGN_MODULE_FILE_HAVE; + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + SWITCH_ENUM(new_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + return true; + case DRGN_MODULE_FILE_HAVE: + default: + return false; + } + default: + UNREACHABLE(); + } } -static struct drgn_error * -userspace_report_elf_file(struct drgn_debug_info_load_state *load, - const char *path) +LIBDRGN_PUBLIC +bool drgn_module_set_loaded_file_status(struct drgn_module *module, + enum drgn_module_file_status status) { - struct drgn_error *err; + if (!drgn_can_change_module_file_status(module->loaded_file_status, + status)) + return false; + module->loaded_file_status = status; + return true; +} - int fd; - Elf *elf; - err = open_elf_file(path, &fd, &elf); - if (err) - goto err; +LIBDRGN_PUBLIC +bool drgn_module_wants_loaded_file(const struct drgn_module *module) +{ + SWITCH_ENUM(module->loaded_file_status) { + case DRGN_MODULE_FILE_WANT: + return true; + case DRGN_MODULE_FILE_HAVE: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + return false; + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + default: + UNREACHABLE(); + } +} - GElf_Ehdr ehdr_mem, *ehdr; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) { - err = drgn_error_libelf(); - goto err_close; - } - /* - * We haven't implemented a way to get the load address for dynamically - * loaded or relocatable files, so for now we report those as unloaded. - */ - uint64_t start = 0, end = 0; - if (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_CORE) { - err = elf_address_range(elf, 0, &start, &end); - if (err) - goto err_close; +LIBDRGN_PUBLIC enum drgn_module_file_status +drgn_module_debug_file_status(const struct drgn_module *module) +{ + return module->debug_file_status; +} + +LIBDRGN_PUBLIC +bool drgn_module_set_debug_file_status(struct drgn_module *module, + enum drgn_module_file_status status) +{ + if (!drgn_can_change_module_file_status(module->debug_file_status, + status)) + return false; + if (module->debug_file_status == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY + && status != DRGN_MODULE_FILE_WANT_SUPPLEMENTARY) + drgn_module_clear_wanted_supplementary_debug_file(module); + module->debug_file_status = status; + return true; +} + +LIBDRGN_PUBLIC +bool drgn_module_wants_debug_file(const struct drgn_module *module) +{ + SWITCH_ENUM(module->debug_file_status) { + case DRGN_MODULE_FILE_WANT: + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + return true; + case DRGN_MODULE_FILE_HAVE: + case DRGN_MODULE_FILE_DONT_WANT: + case DRGN_MODULE_FILE_DONT_NEED: + return false; + default: + UNREACHABLE(); } +} - return drgn_debug_info_report_elf(load, path, fd, elf, start, end, NULL, - NULL); +LIBDRGN_PUBLIC +const char *drgn_module_loaded_file_path(const struct drgn_module *module) +{ + return module->loaded_file ? module->loaded_file->path : NULL; +} -err_close: - elf_end(elf); - close(fd); -err: - return drgn_debug_info_report_error(load, path, NULL, err); +LIBDRGN_PUBLIC +uint64_t drgn_module_loaded_file_bias(const struct drgn_module *module) +{ + return module->loaded_file_bias; } -static struct drgn_error * -userspace_report_debug_info(struct drgn_debug_info_load_state *load) +LIBDRGN_PUBLIC +const char *drgn_module_debug_file_path(const struct drgn_module *module) { - struct drgn_error *err; + return module->debug_file ? module->debug_file->path : NULL; +} - for (size_t i = 0; i < load->num_paths; i++) { - err = userspace_report_elf_file(load, load->paths[i]); - if (err) - return err; - } +LIBDRGN_PUBLIC +uint64_t drgn_module_debug_file_bias(const struct drgn_module *module) +{ + return module->debug_file_bias; +} - if (load->load_default) { - Dwfl *dwfl = load->dbinfo->dwfl; - struct drgn_program *prog = load->dbinfo->prog; - if (prog->flags & DRGN_PROGRAM_IS_LIVE) { - int ret = dwfl_linux_proc_report(dwfl, prog->pid); - if (ret == -1) { - return drgn_error_libdwfl(); - } else if (ret) { - return drgn_error_create_os("dwfl_linux_proc_report", - ret, NULL); - } - } else { - const char *nt_file; - size_t nt_file_len; - char *env = getenv("DRGN_USE_LIBDWFL_REPORT"); - if (env && atoi(env)) { - nt_file = NULL; - nt_file_len = 0; - } else { - err = drgn_get_nt_file(prog->core, &nt_file, - &nt_file_len); - if (err) - return err; - } - if (nt_file) { - err = userspace_core_report_debug_info(load, - nt_file, - nt_file_len); - if (err) - return err; - } else if (dwfl_core_file_report(dwfl, prog->core, - NULL) == -1) { - return drgn_error_libdwfl(); - } - } - } - return NULL; +LIBDRGN_PUBLIC enum drgn_supplementary_file_kind +drgn_module_supplementary_debug_file_kind(const struct drgn_module *module) +{ + return module->supplementary_debug_file + ? DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK + : DRGN_SUPPLEMENTARY_FILE_NONE; } -static int should_apply_relocation_section(Elf *elf, size_t shstrndx, - const GElf_Shdr *shdr) +LIBDRGN_PUBLIC const char * +drgn_module_supplementary_debug_file_path(const struct drgn_module *module) { - if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) - return 0; + return module->supplementary_debug_file + ? module->supplementary_debug_file->path : NULL; +} - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - return -1; - if (shdr->sh_type == SHT_RELA) { - if (!strstartswith(scnname, ".rela.")) - return 0; - scnname += sizeof(".rela.") - 1; - } else { - if (!strstartswith(scnname, ".rel.")) - return 0; - scnname += sizeof(".rel.") - 1; - } - return (strstartswith(scnname, "debug_") || - strstartswith(scnname, "orc_")); +LIBDRGN_PUBLIC enum drgn_supplementary_file_kind +drgn_module_wanted_supplementary_debug_file(struct drgn_module *module, + const char **debug_file_path_ret, + const char **supplementary_path_ret, + const void **checksum_ret, + size_t *checksum_len_ret) +{ + struct drgn_module_wanted_supplementary_file *wanted = + module->wanted_supplementary_debug_file; + if (debug_file_path_ret) + *debug_file_path_ret = wanted ? wanted->file->path : NULL; + if (supplementary_path_ret) + *supplementary_path_ret = wanted ? wanted->supplementary_path : NULL; + if (checksum_ret) + *checksum_ret = wanted ? wanted->checksum : NULL; + if (checksum_len_ret) + *checksum_len_ret = wanted ? wanted->checksum_len : 0; + return wanted + ? DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK + : DRGN_SUPPLEMENTARY_FILE_NONE; } -static inline struct drgn_error *get_reloc_sym_value(const void *syms, - size_t num_syms, - const uint64_t *sh_addrs, - size_t shdrnum, - bool is_64_bit, - bool bswap, - uint32_t r_sym, - uint64_t *ret) +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_object(const struct drgn_module *module, struct drgn_object *ret) { - if (r_sym >= num_syms) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid ELF relocation symbol"); - } - uint16_t st_shndx; - uint64_t st_value; - if (is_64_bit) { - const Elf64_Sym *sym = (Elf64_Sym *)syms + r_sym; - memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); - memcpy(&st_value, &sym->st_value, sizeof(st_value)); - if (bswap) { - st_shndx = bswap_16(st_shndx); - st_value = bswap_64(st_value); - } + return drgn_object_copy(ret, &module->object); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_set_object(struct drgn_module *module, const struct drgn_object *obj) +{ + return drgn_object_copy(&module->object, obj); +} + +static struct drgn_error * +drgn_program_register_debug_info_finder_impl(struct drgn_program *prog, + struct drgn_debug_info_finder *finder, + const char *name, + const struct drgn_debug_info_finder_ops *ops, + void *arg, size_t enable_index) +{ + struct drgn_error *err; + bool should_free = !finder; + if (finder) { + finder->handler.name = name; } else { - const Elf32_Sym *sym = (Elf32_Sym *)syms + r_sym; - memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); - uint32_t st_value32; - memcpy(&st_value32, &sym->st_value, sizeof(st_value32)); - if (bswap) { - st_shndx = bswap_16(st_shndx); - st_value32 = bswap_32(st_value32); + finder = malloc(sizeof(*finder)); + if (!finder) + return &drgn_enomem; + finder->handler.name = strdup(name); + if (!finder->handler.name) { + free(finder); + return &drgn_enomem; } - st_value = st_value32; } - if (st_shndx >= shdrnum) { - return drgn_error_create(DRGN_ERROR_OTHER, - "invalid ELF symbol section index"); + finder->handler.free = should_free; + finder->ops = *ops; + finder->arg = arg; + err = drgn_handler_list_register(&prog->dbinfo.debug_info_finders, + &finder->handler, enable_index, + "module debug info finder"); + if (err && should_free) { + free((char *)finder->handler.name); + free(finder); } - *ret = sh_addrs[st_shndx] + st_value; - return NULL; + return err; } -static struct drgn_error * -apply_elf_relas(const struct drgn_relocating_section *relocating, - Elf_Data *reloc_data, Elf_Data *symtab_data, - const uint64_t *sh_addrs, size_t shdrnum, - const struct drgn_platform *platform) +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_register_debug_info_finder(struct drgn_program *prog, + const char *name, + const struct drgn_debug_info_finder_ops *ops, + void *arg, size_t enable_index) { - struct drgn_error *err; - - bool is_64_bit = drgn_platform_is_64_bit(platform); - bool bswap = drgn_platform_bswap(platform); - apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; - - const void *relocs = reloc_data->d_buf; - size_t reloc_size = is_64_bit ? sizeof(Elf64_Rela) : sizeof(Elf32_Rela); - size_t num_relocs = reloc_data->d_size / reloc_size; - - const void *syms = symtab_data->d_buf; - size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); - size_t num_syms = symtab_data->d_size / sym_size; - - for (size_t i = 0; i < num_relocs; i++) { - uint64_t r_offset; - uint32_t r_sym; - uint32_t r_type; - int64_t r_addend; - if (is_64_bit) { - const Elf64_Rela *rela = (Elf64_Rela *)relocs + i; - uint64_t r_info; - memcpy(&r_offset, &rela->r_offset, sizeof(r_offset)); - memcpy(&r_info, &rela->r_info, sizeof(r_info)); - memcpy(&r_addend, &rela->r_addend, sizeof(r_addend)); - if (bswap) { - r_offset = bswap_64(r_offset); - r_info = bswap_64(r_info); - r_addend = bswap_64(r_addend); - } - r_sym = ELF64_R_SYM(r_info); - r_type = ELF64_R_TYPE(r_info); - } else { - const Elf32_Rela *rela32 = (Elf32_Rela *)relocs + i; - uint32_t r_offset32; - uint32_t r_info32; - int32_t r_addend32; - memcpy(&r_offset32, &rela32->r_offset, sizeof(r_offset32)); - memcpy(&r_info32, &rela32->r_info, sizeof(r_info32)); - memcpy(&r_addend32, &rela32->r_addend, sizeof(r_addend32)); - if (bswap) { - r_offset32 = bswap_32(r_offset32); - r_info32 = bswap_32(r_info32); - r_addend32 = bswap_32(r_addend32); - } - r_offset = r_offset32; - r_sym = ELF32_R_SYM(r_info32); - r_type = ELF32_R_TYPE(r_info32); - r_addend = r_addend32; - } - uint64_t sym_value; - err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, - is_64_bit, bswap, r_sym, &sym_value); - if (err) - return err; + return drgn_program_register_debug_info_finder_impl(prog, NULL, name, + ops, arg, + enable_index); +} - err = apply_elf_reloc(relocating, r_offset, r_type, &r_addend, - sym_value); - if (err) - return err; - } - return NULL; +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_registered_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret) +{ + return drgn_handler_list_registered(&prog->dbinfo.debug_info_finders, + names_ret, count_ret); } -static struct drgn_error * -apply_elf_rels(const struct drgn_relocating_section *relocating, - Elf_Data *reloc_data, Elf_Data *symtab_data, - const uint64_t *sh_addrs, size_t shdrnum, - const struct drgn_platform *platform) +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_set_enabled_debug_info_finders(struct drgn_program *prog, + const char * const *names, + size_t count) { - struct drgn_error *err; + return drgn_handler_list_set_enabled(&prog->dbinfo.debug_info_finders, + names, count, + "module debug info finder"); +} - bool is_64_bit = drgn_platform_is_64_bit(platform); - bool bswap = drgn_platform_bswap(platform); - apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; - - const void *relocs = reloc_data->d_buf; - size_t reloc_size = is_64_bit ? sizeof(Elf64_Rel) : sizeof(Elf32_Rel); - size_t num_relocs = reloc_data->d_size / reloc_size; - - const void *syms = symtab_data->d_buf; - size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); - size_t num_syms = symtab_data->d_size / sym_size; - - for (size_t i = 0; i < num_relocs; i++) { - uint64_t r_offset; - uint32_t r_sym; - uint32_t r_type; - if (is_64_bit) { - const Elf64_Rel *rel = (Elf64_Rel *)relocs + i; - uint64_t r_info; - memcpy(&r_offset, &rel->r_offset, sizeof(r_offset)); - memcpy(&r_info, &rel->r_info, sizeof(r_info)); - if (bswap) { - r_offset = bswap_64(r_offset); - r_info = bswap_64(r_info); - } - r_sym = ELF64_R_SYM(r_info); - r_type = ELF64_R_TYPE(r_info); - } else { - const Elf32_Rel *rel32 = (Elf32_Rel *)relocs + i; - uint32_t r_offset32; - uint32_t r_info32; - memcpy(&r_offset32, &rel32->r_offset, sizeof(r_offset32)); - memcpy(&r_info32, &rel32->r_info, sizeof(r_info32)); - if (bswap) { - r_offset32 = bswap_32(r_offset32); - r_info32 = bswap_32(r_info32); - } - r_offset = r_offset32; - r_sym = ELF32_R_SYM(r_info32); - r_type = ELF32_R_TYPE(r_info32); - } - uint64_t sym_value; - err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, - is_64_bit, bswap, r_sym, &sym_value); - if (err) - return err; +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_enabled_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret) +{ + return drgn_handler_list_enabled(&prog->dbinfo.debug_info_finders, + names_ret, count_ret); +} - err = apply_elf_reloc(relocating, r_offset, r_type, NULL, - sym_value); - if (err) - return err; - } - return NULL; +LIBDRGN_PUBLIC struct drgn_debug_info_options * +drgn_program_debug_info_options(struct drgn_program *prog) +{ + return &prog->dbinfo.options; } -/* - * Before the debugging information in a relocatable ELF file (e.g., Linux - * kernel module) can be used, it must have ELF relocations applied. This is - * usually done by libdwfl. However, libdwfl is relatively slow at it. This is a - * much faster implementation. - */ -static struct drgn_error *relocate_elf_file(Elf *elf) +static struct drgn_error * +drgn_module_set_wanted_gnu_debugaltlink(struct drgn_module *module, + struct drgn_elf_file *file) { struct drgn_error *err; + struct drgn_program *prog = module->prog; - GElf_Ehdr ehdr_mem, *ehdr; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return drgn_error_libelf(); - - if (ehdr->e_type != ET_REL) { - /* Not a relocatable file. */ - return NULL; + // We don't cache .gnu_debugaltlink, and it doesn't need relocation, so + // don't use drgn_elf_file_read_section(). + Elf_Data *data; + err = read_elf_section(file->scns[DRGN_SCN_GNU_DEBUGALTLINK], &data); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, + "%s: couldn't read .gnu_debugaltlink; ignoring debug info: ", + file->path); + drgn_error_destroy(err); + err = NULL; + } + return err; } - struct drgn_platform platform; - drgn_platform_from_elf(ehdr, &platform); - if (!platform.arch->apply_elf_reloc) { - /* Unsupported; fall back to libdwfl. */ + const char *debugaltlink = data->d_buf; + const char *nul = memchr(debugaltlink, 0, data->d_size); + if (!nul || nul + 1 == debugaltlink + data->d_size) { + drgn_log_debug(prog, + "%s: couldn't parse .gnu_debugaltlink; ignoring debug info", + file->path); return NULL; } + const void *build_id = nul + 1; + size_t build_id_len = debugaltlink + data->d_size - (nul + 1); + _cleanup_free_ char *build_id_str = ahexlify(build_id, build_id_len); + if (!build_id_str) + return &drgn_enomem; + drgn_log_debug(prog, "%s has gnu_debugaltlink %s build ID %s", + file->path, debugaltlink, build_id_str); - size_t shdrnum; - if (elf_getshdrnum(elf, &shdrnum)) - return drgn_error_libelf(); - _cleanup_free_ uint64_t *sh_addrs = - calloc(shdrnum, sizeof(sh_addrs[0])); - if (!sh_addrs && shdrnum > 0) + struct drgn_module_wanted_supplementary_file *wanted = + malloc(sizeof(*wanted)); + if (!wanted) return &drgn_enomem; + *wanted = (struct drgn_module_wanted_supplementary_file){ + .file = file, + .supplementary_path = debugaltlink, + .checksum = build_id, + .checksum_len = build_id_len, + .checksum_str = no_cleanup_ptr(build_id_str), + .generation = ++prog->dbinfo.supplementary_file_generation, + }; + drgn_module_clear_wanted_supplementary_debug_file(module); + module->wanted_supplementary_debug_file = wanted; + module->debug_file_status = DRGN_MODULE_FILE_WANT_SUPPLEMENTARY; + return NULL; +} + +static bool +drgn_module_copy_section_addresses(struct drgn_module *module, Elf *elf) +{ + if (drgn_module_section_address_map_empty(&module->section_addresses)) + return true; + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return false; Elf_Scn *scn = NULL; while ((scn = elf_nextscn(elf, scn))) { GElf_Shdr *shdr, shdr_mem; shdr = gelf_getshdr(scn, &shdr_mem); if (!shdr) - return drgn_error_libelf(); - sh_addrs[elf_ndxscn(scn)] = shdr->sh_addr; + return false; + + char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return false; + + struct drgn_module_section_address_map_iterator it = + drgn_module_section_address_map_search(&module->section_addresses, + &scnname); + if (!it.entry) + continue; + + shdr->sh_addr = it.entry->value; + if (!gelf_update_shdr(scn, shdr)) + return false; + } + return true; +} + +static bool elf_main_bias(struct drgn_program *prog, Elf *elf, uint64_t *ret) +{ + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) { + drgn_log_debug(prog, "gelf_getehdr: %s", elf_errmsg(-1)); + return false; + } + + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) { + drgn_log_debug(prog, "elf_getphdrnum: %s", elf_errmsg(-1)); + return false; + } + + uint64_t phdr_vaddr; + bool have_phdr_vaddr = false; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) { + drgn_log_debug(prog, "gelf_getphdr: %s", + elf_errmsg(-1)); + return false; + } + if (phdr->p_type == PT_LOAD && + phdr->p_offset <= ehdr->e_phoff && + ehdr->e_phoff < phdr->p_offset + phdr->p_filesz) { + phdr_vaddr = ehdr->e_phoff - phdr->p_offset + phdr->p_vaddr; + have_phdr_vaddr = true; + } + } + if (!have_phdr_vaddr) { + drgn_log_debug(prog, + "file does not have loadable segment containing e_phoff"); + return false; + } + *ret = prog->auxv.at_phdr - phdr_vaddr; + return true; +} + +static bool elf_dso_bias(struct drgn_program *prog, Elf *elf, + uint64_t dynamic_address, uint64_t *ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) { + drgn_log_debug(prog, "elf_getphdrnum: %s", elf_errmsg(-1)); + return false; + } + + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) { + drgn_log_debug(prog, "gelf_getphdr: %s", + elf_errmsg(-1)); + return false; + } + if (phdr->p_type == PT_DYNAMIC) { + *ret = dynamic_address - phdr->p_vaddr; + drgn_log_debug(prog, + "got bias 0x%" PRIx64 " from PT_DYNAMIC program header", + *ret); + return true; + } + } + drgn_log_debug(prog, "file does not have PT_DYNAMIC program header"); + return false; +} + +static bool drgn_module_elf_file_bias(struct drgn_module *module, + struct drgn_elf_file *file, uint64_t *ret) +{ + struct drgn_program *prog = module->prog; + SWITCH_ENUM(module->kind) { + case DRGN_MODULE_MAIN: + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { + *ret = prog->vmcoreinfo.kaslr_offset; + drgn_log_debug(prog, + "got bias 0x%" PRIx64 " from VMCOREINFO", + *ret); + return true; + } else { + return elf_main_bias(prog, file->elf, ret); + } + case DRGN_MODULE_SHARED_LIBRARY: + case DRGN_MODULE_VDSO: + return elf_dso_bias(prog, file->elf, module->info, ret); + case DRGN_MODULE_EXTRA: { + size_t num_address_ranges; + if (drgn_module_num_address_ranges(module, &num_address_ranges) + && num_address_ranges == 1) { + uint64_t start, end; + drgn_module_address_range(module, 0, &start, &end); + uint64_t elf_start, elf_end; + if (!drgn_elf_file_address_range(file, &elf_start, + &elf_end)) + return false; + if (elf_start < elf_end) { + *ret = start - elf_start; + drgn_log_debug(prog, + "got bias 0x%" PRIx64 " from ELF start address", + *ret); + return true; + } + } + fallthrough; + } + case DRGN_MODULE_RELOCATABLE: + default: + *ret = 0; + return true; + } +} + +static bool +drgn_module_should_set_address_range_from_elf_file(struct drgn_module *module) +{ + if (module->address_ranges) + return false; + + SWITCH_ENUM(module->kind) { + case DRGN_MODULE_MAIN: + case DRGN_MODULE_SHARED_LIBRARY: + case DRGN_MODULE_VDSO: + return true; + case DRGN_MODULE_RELOCATABLE: + case DRGN_MODULE_EXTRA: + default: + return false; + } +} + +// Takes ownership of file unless it is already owned by module. +static struct drgn_error * +drgn_module_maybe_use_elf_file(struct drgn_module *module, + struct drgn_elf_file *file, + bool is_gnu_debugaltlink_file) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + struct drgn_elf_file *gnu_debugaltlink_file = NULL; + + bool use_loaded, has_dwarf, use_debug; + if (is_gnu_debugaltlink_file) { + assert(module->debug_file_status + == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY); + gnu_debugaltlink_file = file; + file = module->wanted_supplementary_debug_file->file; + use_loaded = false; + has_dwarf = use_debug = true; + } else { + // We should only be here if we want a file. + assert(drgn_module_wants_file(module)); + use_loaded = module->loaded_file_status == DRGN_MODULE_FILE_WANT + && file->is_loadable; + has_dwarf = drgn_elf_file_has_dwarf(file); + use_debug = drgn_module_wants_debug_file(module) && has_dwarf; + } + + _cleanup_free_ void *build_id_buf = NULL; + + if (!is_gnu_debugaltlink_file + && use_debug && file->scns[DRGN_SCN_GNU_DEBUGALTLINK]) { + // If we're trying to reuse a debug file that wants a + // supplementary file, then don't reset it, otherwise we'll free + // the file that we're trying to reuse. + if (!module->wanted_supplementary_debug_file + || module->wanted_supplementary_debug_file->file != file) { + err = drgn_module_set_wanted_gnu_debugaltlink(module, file); + if (err) + goto unused; + } + if (!use_loaded && module->wanted_supplementary_debug_file + && module->wanted_supplementary_debug_file->file == file) + return NULL; + use_debug = false; + } + + if (!use_loaded && !use_debug) { + if (file->is_loadable) { + drgn_log_debug(prog, + "%s is loadable, but don't want loaded file; ignoring", + file->path); + } else if (has_dwarf) { + drgn_log_debug(prog, + "%s has debug info, but don't want debug info; ignoring", + file->path); + } else { + drgn_log_debug(prog, + "%s is not loadable and no debug info; ignoring", + file->path); + } + err = NULL; + goto unused; + } + + // Get everything that might fail before we commit to using the file. + const void *elf_build_id; + ssize_t elf_build_id_len = 0; + if (module->build_id_len == 0) { + elf_build_id_len = drgn_elf_gnu_build_id(file->elf, + &elf_build_id); + if (elf_build_id_len < 0) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + err = NULL; + goto unused; + } + if (elf_build_id_len > 0) { + build_id_buf = + drgn_module_alloc_build_id(elf_build_id_len); + if (!build_id_buf) { + err = &drgn_enomem; + goto unused; + } + } + } + + if (file != module->loaded_file && file != module->debug_file + && !drgn_module_copy_section_addresses(module, file->elf)) { + drgn_log_debug(prog, "%s: %s", file->path, elf_errmsg(-1)); + err = NULL; + goto unused; + } + + uint64_t bias; + if (!drgn_module_elf_file_bias(module, file, &bias)) { + err = NULL; + goto unused; + } + uint64_t elf_start = 0, elf_end = 0; + if (drgn_module_should_set_address_range_from_elf_file(module)) { + if (!drgn_elf_file_address_range(file, &elf_start, &elf_end)) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + err = NULL; + goto unused; + } + elf_start += bias; + elf_end += bias; + if (elf_start >= elf_end) { + drgn_log_debug(prog, "%s: address range is invalid", + file->path); + } + } + + // At this point, we've committed to using the file. Nothing after this + // is allowed to fail. + + if (use_loaded && use_debug) { + drgn_log_info(prog, + "%s: using loadable file with debug info %s", + module->name, file->path); + } else if (use_loaded) { + drgn_log_info(prog, "%s: using loadable file %s", module->name, + file->path); + } else if (is_gnu_debugaltlink_file) { + drgn_log_info(prog, + "%s: using debug info file %s with supplementary file %s", + module->name, file->path, gnu_debugaltlink_file->path); + } else { + drgn_log_info(prog, "%s: using debug info file %s", + module->name, file->path); + } + + // If we got a build ID or address range earlier, install them. + if (elf_build_id_len > 0) { + drgn_module_set_build_id_impl(module, elf_build_id, + elf_build_id_len, + no_cleanup_ptr(build_id_buf)); + drgn_log_debug(prog, "%s: set build ID %s from file", + module->name, module->build_id_str); + } + if (elf_start < elf_end) { + drgn_log_debug(prog, + "%s: set address range 0x%" PRIx64 + "-0x%" PRIx64 " from file", module->name, + elf_start, elf_end); + err = drgn_module_set_address_range(module, elf_start, elf_end); + // This can only fail if the address range is invalid, which we + // just checked for. + assert(!err); + } + + if (use_loaded) { + module->loaded_file = file; + module->loaded_file_bias = bias; + module->loaded_file_status = DRGN_MODULE_FILE_HAVE; + module->elf_symtab_pending_files |= + DRGN_MODULE_FILE_MASK_LOADED; + } + if (use_debug) { + module->debug_file = file; + module->debug_file_bias = bias; + module->supplementary_debug_file = gnu_debugaltlink_file; + drgn_module_clear_wanted_supplementary_debug_file(module); + module->debug_file_status = DRGN_MODULE_FILE_HAVE; + module->pending_indexing_next = + prog->dbinfo.modules_pending_indexing; + prog->dbinfo.modules_pending_indexing = module; + prog->tried_main_language = false; + module->elf_symtab_pending_files |= + DRGN_MODULE_FILE_MASK_DEBUG; + } + if (!prog->has_platform) { + drgn_log_debug(prog, "setting program platform from %s", + file->path); + drgn_program_set_platform(prog, &file->platform); + } + return NULL; + +unused: + drgn_elf_file_destroy(gnu_debugaltlink_file); + if (module->wanted_supplementary_debug_file + && file == module->wanted_supplementary_debug_file->file) { + module->wanted_supplementary_debug_file->file = NULL; + drgn_module_clear_wanted_supplementary_debug_file(module); + module->debug_file_status = DRGN_MODULE_FILE_WANT; + } + if (file != module->loaded_file && file != module->debug_file) + drgn_elf_file_destroy(file); + return err; +} + +// Always takes ownership of fd_. Attempts to resolve the real path of path. +static struct drgn_error * +drgn_module_try_file_internal(struct drgn_module *module, const char *path, + int fd_, bool check_build_id, + const uint32_t *expected_crc) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + _cleanup_close_ int fd = fd_; + if (fd >= 0) { + if (path) { + drgn_log_debug(prog, "%s: trying %s with fd %d", + module->name, path, fd); + } else { + drgn_log_debug(prog, "%s: trying fd %d", module->name, + fd); + } + } else { + fd = open(path, O_RDONLY); + if (fd < 0) { + drgn_log_debug(prog, "%s: %m", path); + return NULL; + } + drgn_log_debug(prog, "%s: trying %s", module->name, path); + } + + // Try to canonicalize the path, first via + // readlink("/proc/self/fd/$fd"), then via realpath(). +#define FORMAT "/proc/self/fd/%d" + char fd_path[sizeof(FORMAT) + - (sizeof("%d") - 1) + + max_decimal_length(int)]; + snprintf(fd_path, sizeof(fd_path), FORMAT, fd); +#undef FORMAT + + size_t link_buf_size = PATH_MAX; + _cleanup_free_ char *link_buf = malloc(link_buf_size); + if (!link_buf) + return &drgn_enomem; + + for (;;) { + ssize_t r = readlink(fd_path, link_buf, link_buf_size); + if (r < 0) { + drgn_log_debug(prog, "readlink: %s: %m", fd_path); + if (path) { + free(link_buf); + link_buf = realpath(path, NULL); + if (link_buf) { + drgn_log_debug(prog, + "canonical path is %s", + link_buf); + path = link_buf; + } else { + drgn_log_debug(prog, "realpath: %s: %m", + path); + } + } else { + path = fd_path; + } + break; + } + + if (r < link_buf_size) { + link_buf[r] = '\0'; + if (drgn_log_is_enabled(prog, DRGN_LOG_DEBUG) + && (!path || strcmp(path, link_buf) != 0)) { + drgn_log_debug(prog, "canonical path is %s", + link_buf); + } + path = link_buf; + break; + } + + if (__builtin_mul_overflow(link_buf_size, 2U, &link_buf_size)) + return &drgn_enomem; + free(link_buf); + link_buf = malloc(link_buf_size); + if (!link_buf) + return &drgn_enomem; + } + + _cleanup_elf_end_ Elf *elf = dwelf_elf_begin(fd); + if (!elf) { + drgn_log_debug(prog, "%s: %s", path, elf_errmsg(-1)); + return NULL; + } + if (elf_kind(elf) != ELF_K_ELF) { + drgn_log_debug(prog, "%s: not an ELF file", path); + return NULL; + } + + // This code assumes that DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK is + // the only kind of supplementary file, which is currently true. + bool log_build_id = check_build_id + || drgn_log_is_enabled(prog, DRGN_LOG_DEBUG); + const void *elf_build_id; + ssize_t elf_build_id_len; + if (module->debug_file_status == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY + || (log_build_id && module->build_id_len > 0)) { + elf_build_id_len = drgn_elf_gnu_build_id(elf, &elf_build_id); + if (elf_build_id_len < 0) { + drgn_log_debug(prog, "%s: %s%s", path, elf_errmsg(-1), + check_build_id ? "" : "; ignoring build ID"); + } + } + + bool is_gnu_debugaltlink_file = false; + if (module->debug_file_status == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY + && elf_build_id_len >= 0 + && elf_build_id_len + == module->wanted_supplementary_debug_file->checksum_len + && memcmp(elf_build_id, + module->wanted_supplementary_debug_file->checksum, + elf_build_id_len) == 0) { + drgn_log_debug(prog, "%s: %s build ID matches gnu_debugaltlink", + module->name, path); + is_gnu_debugaltlink_file = true; + } else if (log_build_id && module->build_id_len > 0) { + if (elf_build_id_len < 0) { + if (check_build_id) + return NULL; + } else if (elf_build_id_len == module->build_id_len + && memcmp(elf_build_id, module->build_id, + elf_build_id_len) == 0) { + drgn_log_debug(prog, "%s: %s build ID matches", + module->name, path); + } else { + if (elf_build_id_len == 0) { + drgn_log_debug(prog, + "%s: %s is missing build ID%s", + module->name, path, + check_build_id ? "" : "; forcing"); + } else { + drgn_log_debug(prog, + "%s: %s build ID does not match%s", + module->name, path, + check_build_id ? "" : "; forcing"); + } + if (check_build_id) + return NULL; + } + } + if (expected_crc) { + size_t size; + const void *rawfile = elf_rawfile(elf, &size); + if (!rawfile) { + drgn_log_debug(prog, "%s: %s", path, elf_errmsg(-1)); + return NULL; + } + uint32_t crc = ~crc32_update(-1, rawfile, size); + if (crc != *expected_crc) { + drgn_log_debug(prog, + "%s: %s CRC 0x%08" PRIx32 " does not match", + module->name, path, crc); + return NULL; + } + drgn_log_debug(prog, "%s: %s CRC matches", module->name, path); + } + + struct drgn_elf_file *file; + err = drgn_elf_file_create(module, path, fd, NULL, elf, &file); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, ""); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + // fd and elf are owned by the drgn_elf_file now. + fd = -1; + elf = NULL; + return drgn_module_maybe_use_elf_file(module, file, + is_gnu_debugaltlink_file); +} + +// Arbitrary limit on the number of bytes we'll allocate and read from the +// program's memory at once when finding modules/debug info. +static const uint64_t MAX_MEMORY_READ_FOR_DEBUG_INFO = UINT64_C(1048576); + +#define drgn_module_try_files_log(module, how_format, ...) \ +({ \ + struct drgn_module *_module = (module); \ + bool _want_loaded = _module->loaded_file_status == DRGN_MODULE_FILE_WANT;\ + bool _want_debug = _module->debug_file_status == DRGN_MODULE_FILE_WANT; \ + bool _want_supplementary_debug = _module->debug_file_status \ + == DRGN_MODULE_FILE_WANT_SUPPLEMENTARY;\ + drgn_log_debug(_module->prog, \ + "%s (%s%s): " how_format " %s%s%s file%s", _module->name,\ + _module->build_id_str ? "build ID " : "no build ID", \ + _module->build_id_str ?: "", \ + ## __VA_ARGS__, \ + _want_loaded ? "loaded" : "", \ + _want_loaded && (_want_debug || _want_supplementary_debug)\ + ? " and " : "", \ + _want_debug ? "debug" \ + : _want_supplementary_debug ? "supplementary debug" : "",\ + _want_loaded && (_want_debug || _want_supplementary_debug)\ + ? "s" : ""); \ +}) + +static struct drgn_error * +drgn_module_try_vdso_in_core(struct drgn_module *module, + const struct drgn_debug_info_options *options) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + if (!options->try_embedded_vdso) + return NULL; + + // The Linux kernel has included the entire vDSO in core dumps since + // Linux kernel commit f47aef55d9a1 ("[PATCH] i386 vDSO: use + // VM_ALWAYSDUMP") (in v2.6.20). Try to read it from program memory. + + // The vDSO in memory is always stripped. + if (module->loaded_file_status != DRGN_MODULE_FILE_WANT) + return NULL; + + size_t num_address_ranges; + if (!drgn_module_num_address_ranges(module, &num_address_ranges)) { + drgn_log_debug(prog, + "vDSO address range is not known; " + "can't read from program"); + return NULL; + } + if (num_address_ranges != 1) { + drgn_log_debug(prog, "vDSO has %s; can't read from program", + num_address_ranges + ? "multiple address ranges" + : "empty address range"); + return NULL; + } + uint64_t start, end; + drgn_module_address_range(module, 0, &start, &end); + uint64_t size = end - start; + if (size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "vDSO is unreasonably large (%" PRIu64 " bytes); " + "not reading from program", + size); + return NULL; + } + + _cleanup_free_ char *image = malloc(size); + if (!image) + return &drgn_enomem; + err = drgn_program_read_memory(prog, image, start, size, false); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, "couldn't read vDSO: "); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + + _cleanup_elf_end_ Elf *elf = elf_memory(image, size); + if (!elf) { + drgn_log_debug(prog, "couldn't read vDSO: %s", elf_errmsg(-1)); + return NULL; + } + struct drgn_elf_file *file; + err = drgn_elf_file_create(module, "[vdso]", -1, image, elf, &file); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, ""); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + // image and elf are owned by the drgn_elf_file now. + image = NULL; + elf = NULL; + + drgn_log_debug(prog, "trying vDSO in %s", + (module->prog->flags & DRGN_PROGRAM_IS_LIVE) + ? "memory" : "core"); + return drgn_module_maybe_use_elf_file(module, file, false); +} + +static void +drgn_module_try_supplementary_debug_file_log(struct drgn_module *module, + const char *how) +{ + const char *debug_file_path; + const char *debugaltlink_path; + if (drgn_module_wanted_supplementary_debug_file(module, + &debug_file_path, + &debugaltlink_path, + NULL, NULL) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return; + const char *debugaltlink_build_id_str = + module->wanted_supplementary_debug_file->checksum_str; + drgn_log_debug(module->prog, + "%s: %s gnu_debugaltlink %s build ID %s in file %s", + module->name, how, debugaltlink_path, + debugaltlink_build_id_str, debug_file_path); +} + +static struct drgn_error * +drgn_module_try_standard_supplementary_files(struct drgn_module *module, + const struct drgn_debug_info_options *options) +{ + struct drgn_error *err; + + if (!options->try_supplementary) + return NULL; + + const char *debug_file_path; + const char *debugaltlink_path; + if (drgn_module_wanted_supplementary_debug_file(module, + &debug_file_path, + &debugaltlink_path, + NULL, NULL) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return NULL; + + drgn_module_try_supplementary_debug_file_log(module, + "trying standard paths for"); + + STRING_BUILDER(sb); + const char *slash; + if (debugaltlink_path[0] == '/' + || !(slash = strrchr(debug_file_path, '/'))) { + // debugaltlink is absolute, or the debug file doesn't have a + // directory component and is therefore in the current working + // directory. Try debugaltlink directly. + err = drgn_module_try_file_internal(module, debugaltlink_path, + -1, true, NULL); + } else { + // Try $(dirname $path)/$debugaltlink. + if (!string_builder_appendn(&sb, debug_file_path, + slash + 1 - debug_file_path) + || !string_builder_append(&sb, debugaltlink_path) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_file_internal(module, sb.str, -1, true, + NULL); + } + if (err + || module->debug_file_status != DRGN_MODULE_FILE_WANT_SUPPLEMENTARY) + return err; + + // All of the Linux distributions that use gnu_debugaltlink that I'm + // aware of (Debian, Fedora, SUSE, and their derivatives) put + // gnu_debugaltlink files in a ".dwz" subdirectory under the debug + // directory (e.g., "/usr/lib/debug/.dwz"). Try the path starting with + // the ".dwz" directory under all of the configured debug directories. + // This can help in a couple of cases: + // + // 1. When the gnu_debugaltlink path is absolute (which is the case on + // Debian and its derivatives as of Debian 12/Ubuntu 23.10) and the + // debug directory has been copied to a different path. See + // https://bugs.launchpad.net/ubuntu/+source/gdb/+bug/1818918. + // 2. When the gnu_debugaltlink path is relative (which is the case on + // Fedora, SUSE, and their derivatives) and the debug file was found + // outside of the debug directory. + const char *dwz = strstr(debugaltlink_path, "/.dwz/"); + if (dwz) { + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + + sb.len = 0; + if (!string_builder_append(&sb, debug_dir) + || !string_builder_append(&sb, dwz) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + + // Don't bother trying debugaltlink directly again. + if (strcmp(sb.str, debugaltlink_path) == 0) + continue; + + err = drgn_module_try_file_internal(module, sb.str, -1, + true, NULL); + if (err + || module->debug_file_status + != DRGN_MODULE_FILE_WANT_SUPPLEMENTARY) + return err; + } + } + return NULL; +} + +static bool +drgn_module_wanted_supplementary_debug_file_is_new(struct drgn_module *module, + uint64_t orig_supplementary_file_generation) +{ + return module->wanted_supplementary_debug_file + && module->wanted_supplementary_debug_file->generation + > orig_supplementary_file_generation; +} + +struct drgn_error * +drgn_module_try_standard_file(struct drgn_module *module, + const struct drgn_debug_info_options *options, + const char *path, int fd, bool check_build_id, + const uint32_t *expected_crc) +{ + struct drgn_error *err; + uint64_t orig_supplementary_file_generation = + module->prog->dbinfo.supplementary_file_generation; + err = drgn_module_try_file_internal(module, path, fd, check_build_id, + expected_crc); + if (err) + return err; + // If the wanted supplementary debug file changed, try finding it again. + if (drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = drgn_module_try_standard_supplementary_files(module, + options); + if (err) + return err; + } + return NULL; +} + +// An entry in /proc/$pid/map_files. +struct drgn_map_files_segment { + uint64_t start; + uint64_t end; +}; + +DEFINE_VECTOR(drgn_map_files_segment_vector, struct drgn_map_files_segment); + +static inline int drgn_map_files_segment_compare(const void *_a, const void *_b) +{ + const struct drgn_map_files_segment *a = _a; + const struct drgn_map_files_segment *b = _b; + return (a->start > b->start) - (a->start < b->start); +} + +static void +drgn_debug_info_set_map_files_segments(struct drgn_debug_info *dbinfo, + struct drgn_map_files_segment_vector *segments, + bool sorted) +{ + free(dbinfo->map_files_segments); + drgn_map_files_segment_vector_shrink_to_fit(segments); + drgn_map_files_segment_vector_steal(segments, + &dbinfo->map_files_segments, + &dbinfo->num_map_files_segments); + // The Linux kernel always returns these entries in order, but sort it + // just in case. + if (!sorted) { + qsort(dbinfo->map_files_segments, + dbinfo->num_map_files_segments, + sizeof(dbinfo->map_files_segments[0]), + drgn_map_files_segment_compare); + } +} + +static struct drgn_error * +drgn_module_try_proc_files_for_shared_library(struct drgn_module *module, + const struct drgn_debug_info_options *options, + bool *tried) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + const uint64_t address = module->info; + +#define DIR_FORMAT "/proc/%ld/map_files" +#define ENTRY_FORMAT "/%" PRIx64 "-%" PRIx64 + char path[sizeof(DIR_FORMAT ENTRY_FORMAT) + - (sizeof("%ld") - 1) + + max_decimal_length(long) + - 2 * (sizeof("%" PRIx64) - 1) + + 2 * 16]; + int dir_len = sprintf(path, DIR_FORMAT, (long)prog->pid); + + // Check the cache first. + #define less_than_start(a, b) (*(a) < (b)->start) + size_t cache_index = binary_search_gt(prog->dbinfo.map_files_segments, + prog->dbinfo.num_map_files_segments, + &address, less_than_start); + #undef less_than_start + if (cache_index > 0 + && address < prog->dbinfo.map_files_segments[cache_index - 1].end) { + struct drgn_map_files_segment *cache = + &prog->dbinfo.map_files_segments[cache_index - 1]; + sprintf(path + dir_len, ENTRY_FORMAT, cache->start, cache->end); + drgn_log_debug(prog, + "found %s containing dynamic section 0x%" PRIx64 " in map_files cache", + path, address); + int fd = open(path, O_RDONLY); + if (fd >= 0) { + *tried = true; + return drgn_module_try_standard_file(module, options, + path, fd, false, + NULL); + } else { + // We found a match in the cache, but we couldn't open + // it. If it doesn't exist anymore, then we need to + // rebuild the cache. If it failed for any other reason, + // ignore it like we do in the cache miss case. + bool rebuild_cache = errno == ENOENT; + drgn_log_debug(prog, "%s: %m", path); + if (!rebuild_cache) + return NULL; + } + drgn_log_debug(prog, "rebuilding map_files cache"); + path[dir_len] = '\0'; + } +#undef ENTRY_FORMAT +#undef DIR_FORMAT + + // Walk /proc/$pid/map_files, caching it while looking for a match. + _cleanup_closedir_ DIR *dir = opendir(path); + if (!dir) { + if (errno != ENOENT) + return drgn_error_create_os("opendir", errno, path); + drgn_log_debug(prog, "%s: %m", path); + return NULL; + } + VECTOR(drgn_map_files_segment_vector, segments); + bool sorted = true; + bool found = false; + struct dirent *ent; + while ((errno = 0, ent = readdir(dir))) { + struct drgn_map_files_segment segment; + if (sscanf(ent->d_name, "%" SCNx64 "-%" SCNx64, &segment.start, + &segment.end) != 2) + continue; + + if (!drgn_map_files_segment_vector_empty(&segments) + && segment.start + < drgn_map_files_segment_vector_last(&segments)->start) + sorted = false; + if (!drgn_map_files_segment_vector_append(&segments, &segment)) + return &drgn_enomem; + + if (segment.start <= address && address < segment.end + && !found + && strlen(ent->d_name) + 1 < sizeof(path) - dir_len) { + found = true; + path[dir_len] = '/'; + memcpy(path + dir_len + 1, ent->d_name, + strlen(ent->d_name) + 1); + drgn_log_debug(prog, + "found %s containing dynamic section 0x%" PRIx64, + path, address); + int fd = openat(dirfd(dir), ent->d_name, O_RDONLY); + if (fd >= 0) { + *tried = true; + err = drgn_module_try_standard_file(module, + options, + path, fd, + false, + NULL); + if (err) + return err; + } else { + drgn_log_debug(prog, "%s: %m", path); + } + path[dir_len] = '\0'; + } + } + if (errno) + return drgn_error_create_os("readdir", errno, path); + + drgn_debug_info_set_map_files_segments(&prog->dbinfo, &segments, + sorted); + + if (!found) { + drgn_log_debug(prog, + "didn't find entry in %s containing dynamic section 0x%" PRIx64, + path, address); + } + return NULL; +} + +static struct drgn_error *drgn_module_try_proc_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, + bool *tried) +{ + struct drgn_program *prog = module->prog; + + if (!options->try_procfs) + return NULL; + + *tried = false; + if (module->kind == DRGN_MODULE_MAIN) { +#define FORMAT "/proc/%ld/exe" + char path[sizeof(FORMAT) + - (sizeof("%ld") - 1) + + max_decimal_length(long)]; + snprintf(path, sizeof(path), FORMAT, (long)prog->pid); +#undef FORMAT + int fd = open(path, O_RDONLY); + if (fd < 0) { + drgn_log_debug(prog, "%s: %m", path); + return NULL; + } + *tried = true; + return drgn_module_try_standard_file(module, options, path, fd, + false, NULL); + } else if (module->kind == DRGN_MODULE_SHARED_LIBRARY) { + return drgn_module_try_proc_files_for_shared_library(module, + options, + tried); + } else { + return NULL; + } +} + +static struct drgn_error * +drgn_module_try_files_by_build_id(struct drgn_module *module, + const struct drgn_debug_info_options *options) +{ + struct drgn_error *err; + + if (!options->try_build_id) + return NULL; + + size_t build_id_len; + const char *build_id_str = + drgn_module_build_id(module, NULL, &build_id_len); + // We need at least 2 bytes (4 hex characters) to build the paths. + if (build_id_len < 2) + return NULL; + + STRING_BUILDER(sb); + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + if (!string_builder_append(&sb, debug_dir) + || !string_builder_appendf(&sb, "/.build-id/%c%c/%s.debug", + build_id_str[0], build_id_str[1], + &build_id_str[2]) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + // We trust the build ID encoded in the path and don't check it + // again. + if (module->debug_file_status == DRGN_MODULE_FILE_WANT) { + err = drgn_module_try_standard_file(module, options, + sb.str, -1, false, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + if (module->loaded_file_status == DRGN_MODULE_FILE_WANT) { + // Remove the ".debug" extension. + sb.str[sb.len - sizeof(".debug") + 1] = '\0'; + err = drgn_module_try_standard_file(module, options, + sb.str, -1, false, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + sb.len = 0; + } + return NULL; +} + +// Return the first occurrence of either $ORIGIN followed by a word boundary or +// ${ORIGIN}, and set *end_ret to the character after that occurrence. Return +// NULL if not found (and *end_ret is not modified). +static const char *find_dollar_origin(const char *s, const char **end_ret) +{ + const char *dollar; + while ((dollar = strchr(s, '$'))) { + if (strstartswith(dollar + 1, "ORIGIN")) { + s = dollar + (sizeof("$ORIGIN") - 1); + // Skip it if it doesn't end at a word boundary. + if (*s == '_' || isalnum(*s)) + continue; + *end_ret = s; + break; + } else if (strstartswith(dollar + 1, "{ORIGIN}")) { + *end_ret = dollar + (sizeof("${ORIGIN}") - 1); + break; + } else { + s = dollar + 1; + } + } + return dollar; +} + +static struct drgn_error * +drgn_module_try_files_by_gnu_debuglink(struct drgn_module *module, + const struct drgn_debug_info_options *options) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + if (!options->try_debug_link) + return NULL; + + struct drgn_elf_file *file = module->loaded_file; + if (!file || !file->scns[DRGN_SCN_GNU_DEBUGLINK]) + return NULL; + // We don't cache .gnu_debuglink, and it doesn't need relocation, so + // don't use drgn_elf_file_read_section(). + Elf_Data *data; + err = read_elf_section(file->scns[DRGN_SCN_GNU_DEBUGLINK], &data); + if (err) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, + "%s: couldn't read .gnu_debuglink: ", + file->path); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + + struct drgn_elf_file_section_buffer buffer; + drgn_elf_file_section_buffer_init(&buffer, file, + file->scns[DRGN_SCN_GNU_DEBUGLINK], + data); + const char *debuglink; + size_t debuglink_len; + uint32_t crc; + if ((err = binary_buffer_next_string(&buffer.bb, &debuglink, + &debuglink_len)) + // Align up to 4-byte boundary. + || (err = binary_buffer_skip(&buffer.bb, -(debuglink_len + 1) & 3)) + || (err = binary_buffer_next_u32(&buffer.bb, &crc))) { + if (!drgn_error_is_fatal(err)) { + drgn_error_log_debug(prog, err, ""); + drgn_error_destroy(err); + err = NULL; + } + return err; + } + drgn_log_debug(prog, "%s has debuglink %s CRC 0x%08" PRIx32, file->path, + debuglink, crc); + + STRING_BUILDER(sb); + if (debuglink[0] == '/') { + // debuglink is absolute. Try it directly. + return drgn_module_try_standard_file(module, options, debuglink, + -1, false, &crc); + } + + if (!debuglink[0] || file->path[0] != '/') { + // debuglink is empty or file path is not absolute. Ignore it. + return NULL; + } + + // debuglink is relative. Try it in the debug link directories. + const char *slash = strrchr(file->path, '/'); + // We just checked that the file path is absolute, so there must be a + // slash. Also trim extra slashes just in case. + while (slash != file->path && slash[-1] == '/') + slash--; + size_t dir_len = slash - file->path; + const char * const *next_debug_link_dir = + options->debug_link_directories; + const char * const *next_debug_dir = NULL; + for (;;) { + if (next_debug_dir) { + const char *debug_dir = *next_debug_dir++; + if (!debug_dir) { + next_debug_dir = NULL; + continue; + } + if (!string_builder_append(&sb, debug_dir) + || !string_builder_appendn(&sb, file->path, dir_len)) + return &drgn_enomem; + } else { + const char *debug_link_dir = *next_debug_link_dir++; + if (!debug_link_dir) + return NULL; + if (!debug_link_dir[0]) { + // Empty path. Try under the debug directories. + next_debug_dir = options->directories; + continue; + } + const char *s = debug_link_dir; + const char *dollar, *end; + while ((dollar = find_dollar_origin(s, &end))) { + if (!string_builder_appendn(&sb, s, dollar - s) + || !string_builder_appendn(&sb, file->path, + dir_len)) + return &drgn_enomem; + s = end; + } + if (!string_builder_append(&sb, s)) + return &drgn_enomem; + } + if (!string_builder_appendc(&sb, '/') + || !string_builder_appendn(&sb, debuglink, debuglink_len) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, sb.str, -1, + false, &crc); + if (err || !drgn_module_wants_file(module)) + return err; + sb.len = 0; + } +} + +static struct drgn_error * +drgn_module_try_standard_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct drgn_standard_debug_info_find_state *state) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + // This can't happen when called from the standard debug info finder, + // but it can from drgn_find_standard_debug_info(). + if (!drgn_module_wants_file(module)) + return NULL; + + drgn_module_try_files_log(module, "trying standard paths for"); + + // If we need a supplementary file, try that first. + err = drgn_module_try_standard_supplementary_files(module, options); + if (err || !drgn_module_wants_file(module)) + return err; + + // If a previous attempt used a loadable file with debug info but didn't + // want both, we might be able to reuse it. + if (options->try_reuse + && module->loaded_file_status == DRGN_MODULE_FILE_WANT) { + struct drgn_elf_file *reuse_file = NULL; + if (module->debug_file && module->debug_file->is_loadable) + reuse_file = module->debug_file; + else if (module->wanted_supplementary_debug_file + && module->wanted_supplementary_debug_file->file->is_loadable) + reuse_file = module->wanted_supplementary_debug_file->file; + if (reuse_file) { + drgn_log_debug(prog, + "reusing loadable debug file %s as loaded file", + reuse_file->path); + err = drgn_module_maybe_use_elf_file(module, reuse_file, + false); + if (err || !drgn_module_wants_file(module)) + return err; + } + } + if (options->try_reuse + && module->debug_file_status == DRGN_MODULE_FILE_WANT + && module->loaded_file + && drgn_elf_file_has_dwarf(module->loaded_file)) { + drgn_log_debug(prog, + "reusing loaded file with debug info %s as debug file", + module->loaded_file->path); + err = drgn_module_maybe_use_elf_file(module, + module->loaded_file, + false); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // First, try methods that are guaranteed to find the right file: + // reading a vDSO from the core dump and opening a file via a magic + // symlink in /proc. + bool tried_proc_symlink = false; + if (module->kind == DRGN_MODULE_VDSO) { + err = drgn_module_try_vdso_in_core(module, options); + if (err || !drgn_module_wants_file(module)) + return err; + } else if (drgn_program_is_userspace_process(prog)) { + err = drgn_module_try_proc_files(module, options, + &tried_proc_symlink); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // If we already have the build ID, try it now before wasting time with + // the expected paths. If this is a Linux kernel loadable module, this + // can save us from needing the depmod index. If not, it can still save + // us from trying a file with the wrong build ID. + const bool had_build_id = module->build_id_len > 0; + if (had_build_id) { + err = drgn_module_try_files_by_build_id(module, options); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // Next, try opening things at their expected paths. If this is the + // Linux kernel or a Linux kernel loadable module, try some well-known + // paths. + if (module->kind == DRGN_MODULE_MAIN + && (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { + err = drgn_module_try_vmlinux_files(module, options); + if (err || !drgn_module_wants_file(module)) + return err; + } else if (module->kind == DRGN_MODULE_RELOCATABLE + && (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { + err = drgn_module_try_linux_kmod_files(module, options, state); + if (err || !drgn_module_wants_file(module)) + return err; + // Otherwise, if the module name looks like a path (i.e., it contains a + // slash), try it. The vDSO is embedded in the kernel and isn't on disk, + // so there's no point in trying it. Additionally, if we already tried a + // /proc symlink, then we already tried the file that the path is + // supposed to refer to, so don't try again. + } else if (module->kind != DRGN_MODULE_VDSO + && options->try_module_name + && !tried_proc_symlink + && strchr(module->name, '/')) { + err = drgn_module_try_standard_file(module, options, + module->name, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // If we didn't have the build ID before, we might have found the loaded + // file and gotten a build ID from it. Try to find the debug file by + // build ID now. + if (!had_build_id) { + err = drgn_module_try_files_by_build_id(module, options); + if (err || !drgn_module_wants_file(module)) + return err; + } + + // We might have a loaded file with a .gnu_debuglink. Try to find the + // corresponding debug file. + return drgn_module_try_files_by_gnu_debuglink(module, options); +} + +static struct drgn_error * +drgn_standard_debug_info_find(struct drgn_module * const *modules, + size_t num_modules, void *arg) +{ + struct drgn_error *err; + struct drgn_debug_info_options *options = arg; + + if (drgn_log_is_enabled(modules[0]->prog, DRGN_LOG_DEBUG)) { + _cleanup_free_ char *options_str = + drgn_format_debug_info_options(options); + if (!options_str) + return &drgn_enomem; + drgn_log_debug(modules[0]->prog, + "trying standard debug info finder with %s%s", + options == &modules[0]->prog->dbinfo.options + ? "" : "given ", + options_str); + } + + _cleanup_(drgn_standard_debug_info_find_state_deinit) + struct drgn_standard_debug_info_find_state state = { + .modules = modules, + .num_modules = num_modules, + .kmod_walk = { + .modules = HASH_TABLE_INIT, + .stack = VECTOR_INIT, + .path = STRING_BUILDER_INIT, + .visited_dirs = HASH_TABLE_INIT, + .next_kernel_dir = options->kernel_directories, + }, + }; + for (size_t i = 0; i < num_modules; i++) { + err = drgn_module_try_standard_files(modules[i], options, + &state); + if (err) + return err; + } + return NULL; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_find_standard_debug_info(struct drgn_module * const *modules, + size_t num_modules, + struct drgn_debug_info_options *options) +{ + if (num_modules == 0) + return NULL; + + struct drgn_program *prog = modules[0]->prog; + for (size_t i = 0; i < num_modules; i++) { + if (modules[i]->prog != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "modules are from different programs"); + } + } + + if (!options) + options = &modules[0]->prog->dbinfo.options; + return drgn_standard_debug_info_find(modules, num_modules, options); +} + +#if WITH_DEBUGINFOD +static int count_columns(const char *s, size_t n) +{ + int columns = 0; + while (n > 0) { + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + do { + wchar_t wc; + size_t r = mbrtowc(&wc, s, n, &ps); + if (r == (size_t)-1) // Invalid multibyte sequence. + return -1; + if (r == (size_t)-2) // Incomplete multibyte character. + return -2; + if (r == 0) // Null wide character. + r = 1; + + int w = wcwidth(wc); + if (w < 0) // Nonprintable wide character. + return -3; + s += r; + n -= r; + columns += w; + } while (!mbsinit(&ps)); + } + return columns; +} + +static int truncate_columns(struct string_builder *sb, size_t start, size_t end, + int max_columns) +{ + int columns = 0; + + size_t truncate_len = start; + int truncate_column = 0; + mbstate_t truncate_ps; + memset(&truncate_ps, 0, sizeof(truncate_ps)); + + while (start < end) { + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + do { + wchar_t wc; + size_t r = mbrtowc(&wc, &sb->str[start], end - start, + &ps); + if (r == (size_t)-1) // Invalid multibyte sequence. + return -1; + if (r == (size_t)-2) // Incomplete multibyte character. + return -2; + if (r == 0) // Null wide character. + r = 1; + + int w = wcwidth(wc); + if (w < 0) // Nonprintable wide character. + return -3; + + if (w > max_columns - columns) { + int dots = min(max_columns, 3); + char reset[MB_LEN_MAX]; + size_t reset_len = 0; + if (!mbsinit(&truncate_ps)) { + reset_len = wcrtomb(reset, L'\0', + &truncate_ps) - 1; + } + size_t new_len = (truncate_len + + reset_len + + dots + + (sb->len - end)); + if (!string_builder_reserve(sb, new_len)) + return INT_MIN; + memmove(&sb->str[truncate_len + reset_len + dots], + &sb->str[end], sb->len - end); + memset(&sb->str[truncate_len + reset_len], '.', + dots); + memcpy(&sb->str[truncate_len], reset, + reset_len); + sb->len = new_len; + return truncate_column + dots; + } + + start += r; + columns += w; + if (columns <= max_columns - 3) { + truncate_len = start; + truncate_column = columns; + memcpy(&truncate_ps, &ps, sizeof(ps)); + } + } while (!mbsinit(&ps)); + } + return columns; +} + +static void reset_shift_state(struct string_builder *sb, mbstate_t *ps) +{ + if (!mbsinit(ps)) + sb->len += wcrtomb(&sb->str[sb->len], L'\0', ps) - 1; +} + +static bool write_unicode_progress_bar(struct string_builder *sb, int columns, + double ratio) +{ + size_t orig_len = sb->len; + + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + + // "Right one eighth block" character. + size_t r = wcrtomb(&sb->str[sb->len], L'\u2595', &ps); + if (r == (size_t)-1) + return false; + sb->len += r; + + // + 0.25 so that we round up if the piece would be at least 75% full. + int eighths = columns * ratio * 8.0 + 0.25; + int blocks = eighths / 8; + int i; + for (i = 0; i < blocks; i++) { + // "Full block" character. + r = wcrtomb(&sb->str[sb->len], L'\u2588', &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + } + // "Left one eighth block" through "left seven eighths block" + // characters. + static const wchar_t eighths_blocks[7] = + L"\u258f\u258e\u258d\u258c\u258b\u258a\u2589"; + if (eighths % 8 != 0) { + r = wcrtomb(&sb->str[sb->len], eighths_blocks[eighths % 8 - 1], + &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + i++; + } + + for (; i < columns; i++) { + r = wcrtomb(&sb->str[sb->len], L' ', &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + } + + // "Left one eighth block" character. + r = wcrtomb(&sb->str[sb->len], L'\u258f', &ps); + if (r == (size_t)-1) + goto undo; + sb->len += r; + + reset_shift_state(sb, &ps); + return true; + +undo: + sb->len = orig_len; + return false; +} + +static void write_ascii_progress_bar(struct string_builder *sb, int columns, + double ratio) +{ + sb->str[sb->len++] = '['; + // + 0.25 so that we round up if the block would be at least 75% full. + int blocks = columns * ratio + 0.25; + memset(&sb->str[sb->len], '#', blocks); + sb->len += blocks; + memset(&sb->str[sb->len], ' ', columns - blocks); + sb->len += columns - blocks; + sb->str[sb->len++] = ']'; +} + +static bool write_unicode_spinner(struct string_builder *sb, int pos) +{ + static const wchar_t spinner[] = { + L'\u2596', // Quadrant lower left + L'\u2598', // Quadrant upper left + L'\u259d', // Quadrant upper right + L'\u2597', // Quadrant lower right + }; + mbstate_t ps; + memset(&ps, 0, sizeof(ps)); + size_t r = wcrtomb(&sb->str[sb->len], + spinner[pos % array_size(spinner)], &ps); + if (r == (size_t)-1) + return false; + sb->len += r; + reset_shift_state(sb, &ps); + return true; +} + +static void write_ascii_spinner(struct string_builder *sb, int pos) +{ + static const char spinner[] = { '|', '/', '-', '\\' }; + sb->str[sb->len++] = spinner[pos % array_size(spinner)]; +} + +// debuginfod_set_user_data() and debuginfod_get_user_data() were added in +// elfutils 0.179. Before that, we emulate them with a thread-local variable. +#if !_ELFUTILS_PREREQ(0, 179) +static _Thread_local void *drgn_debuginfod_user_data; +#endif + +// This is called with: +// - a >= 0 && b == 0 while cleaning the debuginfod cache, where a is the number +// of files in the cache that have been checked. +// - a >= 0 && b == 0 while waiting to read the first chunk of data from a +// debuginfod server, where a is an increasing counter. Note that this cannot +// be distinguished from the previous case. +// - a >= 0 && b > 0 while downloading, where a is the number of bytes +// downloaded and b is the total size to download in bytes. +// - a >= 0 && b <= 0 while downloading, where a is the number of bytes +// downloaded and the total size is not known. This can be distinguished from +// the first two cases because debuginfod_get_url() will return non-NULL. +// - a < 0 && b >= 0 when the download has finished successfully. b is the +// downloaded file descriptor. +// - a < 0 && b < 0 when the download failed. b is a negative errno. +static void drgn_log_debuginfod_progress(debuginfod_client *client, long a, + long b) +{ +#if _ELFUTILS_PREREQ(0, 179) + struct drgn_program *prog = drgn_debuginfod_get_user_data(client); +#else + struct drgn_program *prog = drgn_debuginfod_user_data; +#endif + + const bool done = a < 0; + + // If we already started logging progress for this download when it + // failed, we log the error like progress below. Otherwise, the download + // failed very early, so we only log a debug message. + if (done && b < 0 && !prog->dbinfo.logged_debuginfod_progress) { + if (b != -ENOSYS) { + errno = -b; + drgn_log_debug(prog, + "%s: couldn't download%s from debuginfod: %m", + prog->dbinfo.debuginfod_current_name, + prog->dbinfo.debuginfod_current_type); + } else if (!prog->dbinfo.logged_no_debuginfod) { + drgn_log_debug(prog, + "no debuginfod servers configured; " + "try setting the DEBUGINFOD_URLS environment variable"); + prog->dbinfo.logged_no_debuginfod = true; + } + return; + } + prog->dbinfo.logged_debuginfod_progress = true; + + int columns; + FILE *file = drgn_program_get_progress_file(prog, &columns); + + // ANSI escape sequence to clear the current line and return the cursor + // to the beginning of the line. + static const char ansi_erase_line[] = "\33[2K\r"; + + // Once we know what URL we are downloading from, log it. + if (!prog->dbinfo.debuginfod_have_url) { + // debuginfod_get_url() was added in elfutils 0.179. Before + // that, we have to assume that we have a URL. +#if _ELFUTILS_PREREQ(0, 179) + const char *url = drgn_debuginfod_get_url(client); + if (url) { + prog->dbinfo.debuginfod_have_url = true; + // Erase the current line since we may have logged + // progress. + if (columns >= 0) { + fwrite(ansi_erase_line, 1, + sizeof(ansi_erase_line) - 1, file); + fflush(file); + } + drgn_log_debug(prog, "downloading from debuginfod at %s", url); + } +#else + prog->dbinfo.debuginfod_have_url = true; +#endif + } + + // If we succeeded without ever getting a URL, it must have been cached. + if (done && b >= 0 && !prog->dbinfo.debuginfod_have_url) { + // We may have logged download progress when we were actually + // cleaning the cache. Clear it to avoid confusion. + if (columns >= 0) { + fwrite(ansi_erase_line, 1, sizeof(ansi_erase_line) - 1, + file); + fflush(file); + } + drgn_log_debug(prog, "%s: found%s in debuginfod cache", + prog->dbinfo.debuginfod_current_name, + prog->dbinfo.debuginfod_current_type); + return; + } + + if (!file) + return; + + // We only do the progress animation if we would have at least one + // column for a progress bar. Using the calculation for bar_columns + // below: + // + // columns - (floor(columns / 2) - 10) - 2 - 4 >= 1 + // => columns - floor(columns / 2) >= 17 + // => ceil(columns / 2) >= 17 + // => columns >= 33 + bool animate = columns >= 33; + const bool orig_animate = animate; + + STRING_BUILDER(sb); + + if (animate && !string_builder_appendc(&sb, '\r')) + return; + + int fill_columns = 0; + int bar_columns = 0; + if (animate) { + if (done) { + // We need to erase anything left in the line with + // spaces. + fill_columns = columns; + } else if (b > 0) { + // Use half of the line plus a bit for the name and + // download size so that it doesn't get too short in + // small terminals. + fill_columns = columns / 2 + 10; + // Use the rest for the progress bar. + bar_columns = (columns - fill_columns + - 2 // Ends of progress bar + - 4 // " XX%" + ); + } else { + // Use the whole line, minus the spinner, for the name + // and download size + fill_columns = columns - 1; + } + } + + if (!string_builder_append(&sb, + done && b >= 0 + ? "Downloaded " : "Downloading ") + || !string_builder_append(&sb, + prog->dbinfo.debuginfod_current_name) + || !string_builder_append(&sb, + prog->dbinfo.debuginfod_current_type)) + return; + + size_t download_size_start = sb.len; + if (done && b < 0) { + errno = -b; + if (!string_builder_appendf(&sb, " failed: %m")) + return; + } else if (prog->dbinfo.debuginfod_have_url) { + intmax_t download_size; + if (done) { + struct stat st; + if (fstat(b, &st) < 0) { + drgn_log_warning(prog, "fstat: %m"); + return; + } + download_size = st.st_size; + } else { + download_size = a; + } + if (download_size < 2048) { + if (!string_builder_appendf(&sb, " (%" PRIdMAX " B)", + download_size)) + return; + } else { + static const char prefixes[] = "KMGTPEZY"; + int i = 1; + while (i < sizeof(prefixes) - 1 + && (download_size >> (10 * i)) >= 2048) + i++; + double unit = INTMAX_C(1) << (10 * i); + if (!string_builder_appendf(&sb, " (%.1f %ciB)", + download_size / unit, + prefixes[i - 1])) + return; + } + } + + if (animate) { + int current_column; + if (done) { + // Start at byte 1 to skip the "\r". + current_column = count_columns(&sb.str[1], sb.len - 1); + } else { + int download_size_len = sb.len - download_size_start; + // Leave room for the download size and an extra space. + int max_columns = + max(fill_columns - download_size_len - 1, 0); + // Start at byte 1 to skip the "\r". + current_column = truncate_columns(&sb, 1, + download_size_start, + max_columns); + if (current_column == INT_MIN) + return; // Memory allocation failed. + if (current_column >= 0) + current_column += download_size_len; + } + if (current_column < 0) { + // We either couldn't decode the string or the string + // contained a nonprintable character. Give up on the + // animation. + animate = false; + } else if (current_column < fill_columns) { + if (!string_builder_reserve_for_append(&sb, + fill_columns + - current_column)) + return; + memset(&sb.str[sb.len], ' ', + fill_columns - current_column); + sb.len += fill_columns - current_column; + } + } + + // If we can't encode any of the following Unicode characters in the + // current locale, we fall back to ASCII. + if (!done && b > 0) { + // Clamp the ratio in case we get bogus sizes. + double ratio = a < b ? (double)a / (double)b : 1.0; + if (animate) { + // One multibyte character for each bar column, one for + // each end, and one to reset the shift state. + if (!string_builder_reserve_for_append(&sb, + (bar_columns + 3) + * MB_CUR_MAX)) + return; + if (!write_unicode_progress_bar(&sb, bar_columns, + ratio)) { + write_ascii_progress_bar(&sb, bar_columns, + ratio); + } + } + unsigned int percent = 100.0 * ratio; + // We're not 100% done until we're called with done = true. + if (percent > 99) + percent = 99; + if (!string_builder_appendf(&sb, " %*u%%", animate ? 2 : 0, + percent)) + return; + } else if (!done && animate) { + // One multibyte character for the spinner, one to reset the + // shift state. + if (!string_builder_reserve_for_append(&sb, 2 * MB_CUR_MAX)) + return; + unsigned int pos = prog->dbinfo.debuginfod_spinner_position++; + if (!write_unicode_spinner(&sb, pos)) + write_ascii_spinner(&sb, pos); + } + + if ((done || !animate) && !string_builder_appendc(&sb, '\n')) + return; + + // If we were originally animating but gave up, we need to skip the + // "\r". + fwrite(sb.str + (orig_animate && !animate ? 1 : 0), 1, + sb.len - (orig_animate && !animate ? 1 : 0), file); +} + +static struct sigaction drgn_cancel_debuginfod_oldact; +static volatile sig_atomic_t drgn_cancel_debuginfod; +static void drgn_cancel_debuginfod_handler(int sig) +{ + drgn_cancel_debuginfod = 1; + drgn_cancel_debuginfod_oldact.sa_handler(sig); +} +static void drgn_cancel_debuginfod_sigaction(int sig, siginfo_t *info, + void *ucontext) +{ + drgn_cancel_debuginfod = 1; + drgn_cancel_debuginfod_oldact.sa_sigaction(sig, info, ucontext); +} +static bool drgn_prepare_debuginfod_find(struct drgn_program *prog) +{ +#if !_ELFUTILS_PREREQ(0, 179) + drgn_debuginfod_user_data = prog; +#endif + // If the application has a signal handler for SIGINT, temporarily wrap + // it with our own signal handler that sets a flag for the debuginfod + // progressfn. This allows Ctrl+C to interrupt a download in + // applications that handle SIGINT (like the Python interpreter). + drgn_cancel_debuginfod = 0; + if (sigaction(SIGINT, NULL, &drgn_cancel_debuginfod_oldact) != 0) + return false; + struct sigaction act = drgn_cancel_debuginfod_oldact; + if ((act.sa_flags & SA_SIGINFO) + // SIG_DFL and SIG_IGN are meant to be assigned to sa_handler, but + // the Linux kernel treats them the same for sa_sigaction. + && act.sa_sigaction != (void *)SIG_DFL + && act.sa_sigaction != (void *)SIG_IGN) + act.sa_sigaction = drgn_cancel_debuginfod_sigaction; + else if (!(act.sa_flags & SA_SIGINFO) + && act.sa_handler != SIG_DFL && act.sa_handler != SIG_IGN) + act.sa_handler = drgn_cancel_debuginfod_handler; + else + return false; + return sigaction(SIGINT, &act, NULL) == 0; +} +static void drgn_finish_debuginfod_find(bool restore_sigaction) +{ + if (restore_sigaction) + sigaction(SIGINT, &drgn_cancel_debuginfod_oldact, NULL); +} + +static int drgn_debuginfod_progressfn(debuginfod_client *client, long a, long b) +{ + if (drgn_cancel_debuginfod) + return 1; + if (a >= 0) + drgn_log_debuginfod_progress(client, a, b); + return 0; +} + +static struct drgn_error * +drgn_module_try_file_from_debuginfod(struct drgn_module *module, + const char *build_id_str, + bool debug, bool supplementary, + struct string_builder *cache_sb) +{ + struct drgn_program *prog = module->prog; + + if (!string_builder_appendf(cache_sb, "/%s/%s", build_id_str, + debug ? "debuginfo" : "executable") + || !string_builder_null_terminate(cache_sb)) + return &drgn_enomem; + + prog->dbinfo.debuginfod_current_name = module->name; + if (supplementary) + prog->dbinfo.debuginfod_current_type = " supplementary debug info"; + else if (debug) + prog->dbinfo.debuginfod_current_type = " debug info"; + else + prog->dbinfo.debuginfod_current_type = ""; + prog->dbinfo.debuginfod_have_url = false; + prog->dbinfo.logged_debuginfod_progress = false; + bool restore_sigaction = drgn_prepare_debuginfod_find(prog); + char *path; + auto find = debug + ? drgn_debuginfod_find_debuginfo + : drgn_debuginfod_find_executable; + int fd = find(prog->dbinfo.debuginfod_client, + (const unsigned char *)build_id_str, 0, &path); + drgn_finish_debuginfod_find(restore_sigaction); + if (fd == -ENOENT && drgn_cancel_debuginfod) { + // Before elfutils commit 5527216460c6 ("debuginfod-client.c: + // Skip empty file creation for cancelled queries") (in elfutils + // 0.190), libdebuginfod has a nasty bug that causes it to cache + // a cancelled download as a negative hit. Work around it by + // deleting the cache file. + unlink(cache_sb->str); + return drgn_error_create_os("download cancelled", EINTR, NULL); + } + drgn_log_debuginfod_progress(prog->dbinfo.debuginfod_client, -1, fd); + if (fd >= 0) { + struct drgn_error *err = + drgn_module_try_file(module, path, fd, true); + free(path); + if (err) + return err; + } + return NULL; +} + +static struct drgn_error * +drgn_module_try_supplementary_file_from_debuginfod(struct drgn_module *module, + struct string_builder *cache_sb) +{ + if (drgn_module_wanted_supplementary_debug_file(module, NULL, NULL, + NULL, NULL) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return NULL; + const char *gnu_debugaltlink_build_id_str = + module->wanted_supplementary_debug_file->checksum_str; + return drgn_module_try_file_from_debuginfod(module, + gnu_debugaltlink_build_id_str, + true, true, cache_sb); +} + +static struct drgn_error * +drgn_debuginfod_find(struct drgn_module * const *modules, size_t num_modules, + void *arg) +{ + struct drgn_error *err; + struct drgn_program *prog = arg; + + if (!prog->dbinfo.debuginfod_client) { + prog->dbinfo.debuginfod_client = drgn_debuginfod_begin(); + if (!prog->dbinfo.debuginfod_client) { + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't create debuginfod client session"); + } + drgn_debuginfod_set_progressfn(prog->dbinfo.debuginfod_client, + drgn_debuginfod_progressfn); +#if _ELFUTILS_PREREQ(0, 179) + drgn_debuginfod_set_user_data(prog->dbinfo.debuginfod_client, + prog); +#endif + } + + STRING_BUILDER(sb); + const char *env; + if ((env = getenv("DEBUGINFOD_CACHE_PATH"))) { + if (!string_builder_append(&sb, env)) + return &drgn_enomem; + } else { + env = getenv("HOME") ?: "/"; + if (!string_builder_append(&sb, env) + || !string_builder_append(&sb, "/.debuginfod_client_cache") + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + struct stat st; + if (stat(sb.str, &st) < 0) { + sb.len = 0; + if ((env = getenv("XDG_CACHE_HOME"))) { + if (!string_builder_append(&sb, env) + || !string_builder_append(&sb, + "/debuginfod_client")) + return &drgn_enomem; + } else if (!string_builder_append(&sb, + getenv("HOME") ?: "/") + || !string_builder_append(&sb, + "/.cache/debuginfod_client")) { + return &drgn_enomem; + } + } + } + + size_t cache_dir_len = sb.len; + for (size_t i = 0; i < num_modules; i++) { + struct drgn_module *module = modules[i]; + const char *build_id_str = + drgn_module_build_id(module, NULL, NULL); + if (!build_id_str) { + drgn_module_try_files_log(module, "can't query debuginfod for"); + continue; + } + + drgn_module_try_files_log(module, "querying debuginfod for"); + + // If we need a supplementary file, try that first. + err = drgn_module_try_supplementary_file_from_debuginfod(module, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + + // If we need the debug file (including if we needed a + // gnu_debugaltlink file and didn't find it), try that next. + if (drgn_module_wants_debug_file(module)) { + uint64_t orig_supplementary_file_generation = + prog->dbinfo.supplementary_file_generation; + err = drgn_module_try_file_from_debuginfod(module, + build_id_str, + true, false, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + // If the wanted supplementary debug file changed, try + // finding it again. + if (drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = drgn_module_try_supplementary_file_from_debuginfod(module, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + } + } + + if (drgn_module_wants_loaded_file(module)) { + err = drgn_module_try_file_from_debuginfod(module, + build_id_str, + false, false, + &sb); + if (err) + return err; + sb.len = cache_dir_len; + } + } + return NULL; +} +#endif // WITH_DEBUGINFOD + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_try_file(struct drgn_module *module, + const char *path, int fd, bool force) +{ + if (!drgn_module_wants_file(module)) { + drgn_log_debug(module->prog, "%s: ignoring unwanted file %s", + module->name, path); + if (fd >= 0) + close(fd); + return NULL; + } + drgn_module_try_files_log(module, "trying provided file as"); + return drgn_module_try_file_internal(module, path, fd, !force, NULL); +} + +LIBDRGN_PUBLIC +void drgn_module_iterator_destroy(struct drgn_module_iterator *it) +{ + if (it) { + if (it->destroy) + it->destroy(it); + else + free(it); + } +} + +LIBDRGN_PUBLIC struct drgn_program * +drgn_module_iterator_program(const struct drgn_module_iterator *it) +{ + return it->prog; +} + +LIBDRGN_PUBLIC +struct drgn_error *drgn_module_iterator_next(struct drgn_module_iterator *it, + struct drgn_module **ret, + bool *new_ret) +{ + if (!it->next) { + *ret = NULL; + return NULL; + } + struct drgn_error *err = it->next(it, ret, new_ret); + if (err || !*ret) + it->next = NULL; + return err; +} + +struct drgn_created_module_iterator { + struct drgn_module_iterator it; + struct drgn_module_table_iterator table_it; + struct drgn_module *next_module; + uint64_t generation; + bool yielded_main; +}; + +static struct drgn_error * +drgn_created_module_iterator_next(struct drgn_module_iterator *_it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_created_module_iterator *it = + container_of(_it, struct drgn_created_module_iterator, it); + struct drgn_debug_info *dbinfo = &it->it.prog->dbinfo; + + if (!it->yielded_main) { + it->yielded_main = true; + it->table_it = drgn_module_table_first(&dbinfo->modules); + it->generation = dbinfo->modules_generation; + if (dbinfo->main_module) { + *ret = dbinfo->main_module; + if (new_ret) + *new_ret = false; + return NULL; + } + } + + if (it->generation != dbinfo->modules_generation) { + return drgn_error_create(DRGN_ERROR_OTHER, + "modules changed during iteration"); + } + + for (;;) { + if (!it->next_module) { + if (it->table_it.entry) { + it->next_module = *it->table_it.entry; + it->table_it = drgn_module_table_next(it->table_it); + } else { + *ret = NULL; + return NULL; + } + } + if (it->next_module == dbinfo->main_module) { + it->next_module = it->next_module->next_same_name; + } else { + *ret = it->next_module; + if (new_ret) + *new_ret = false; + it->next_module = it->next_module->next_same_name; + return NULL; + } + } +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_created_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_created_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->it, prog, NULL, + drgn_created_module_iterator_next); + *ret = &it->it; + return NULL; +} + +struct drgn_mapped_file { + const char *path; + // Mapped address range containing file offset 0. This is used to find + // the file header. + uint64_t offset0_vaddr, offset0_size; +}; + +static struct drgn_mapped_file *drgn_mapped_file_create(const char *path) +{ + struct drgn_mapped_file *file = calloc(1, sizeof(*file)); + if (file) + file->path = path; + return file; +} + +static void drgn_mapped_file_destroy(struct drgn_mapped_file *file) +{ + free(file); +} + +struct drgn_mapped_file_segment { + uint64_t start; + uint64_t end; + uint64_t file_offset; + struct drgn_mapped_file *file; +}; + +DEFINE_VECTOR(drgn_mapped_file_segment_vector, struct drgn_mapped_file_segment); + +struct drgn_mapped_file_segments { + struct drgn_mapped_file_segment_vector vector; + // Whether the segments are already sorted by start address. This should + // always be true for both /proc/$pid/maps and NT_FILE, but we check and + // sort afterwards if not just in case. + bool sorted; +}; + +#define DRGN_MAPPED_FILE_SEGMENTS_INIT { VECTOR_INIT, true } + +static void drgn_mapped_file_segments_abort(struct drgn_mapped_file_segments *segments) +{ + drgn_mapped_file_segment_vector_deinit(&segments->vector); +} + +static struct drgn_error * +drgn_add_mapped_file_segment(struct drgn_mapped_file_segments *segments, + uint64_t start, uint64_t end, uint64_t file_offset, + struct drgn_mapped_file *file) +{ + assert(start < end); + if (file_offset == 0 && file->offset0_size == 0) { + file->offset0_vaddr = start; + file->offset0_size = end - start; + } + if (!drgn_mapped_file_segment_vector_empty(&segments->vector)) { + struct drgn_mapped_file_segment *last = + drgn_mapped_file_segment_vector_last(&segments->vector); + // If the last segment is from the same file and contiguous with + // this one, merge into that one. + if (file == last->file && start == last->end + && file_offset == last->file_offset + (last->end - last->start)) { + last->end = end; + return NULL; + } + if (start < last->start) + segments->sorted = false; + } + struct drgn_mapped_file_segment *entry = + drgn_mapped_file_segment_vector_append_entry(&segments->vector); + if (!entry) + return &drgn_enomem; + entry->start = start; + entry->end = end; + entry->file_offset = file_offset; + entry->file = file; + return NULL; +} + +enum { + // Yield main module next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_MAIN, + // Yield vDSO module next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_VDSO, + // Get first link_map from r_debug next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_R_DEBUG, + // Yield module from link_map list next. + USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP, + // States after this are the same as + // USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP but also count how + // many link_map entries we've iterated. +}; + +// Arbitrary limit on the number iterations to make through the link_map list in +// order to avoid getting stuck in a cycle. +static const int MAX_LINK_MAP_LIST_ITERATIONS = 10000; + +struct userspace_loaded_module_iterator { + struct drgn_module_iterator it; + int state; + bool read_main_phdrs; + bool have_main_dyn; + bool have_vdso_dyn; + + struct drgn_mapped_file_segment *file_segments; + size_t num_file_segments; + + uint64_t main_phoff; + uint64_t main_bias; + uint64_t main_dyn_vaddr; + uint64_t main_dyn_memsz; + uint64_t vdso_dyn_vaddr; + uint64_t link_map; + + // Temporary buffer for reading program headers. + void *phdrs_buf; + size_t phdrs_buf_capacity; + + // Temporary buffer for reading segment contents. + void *segment_buf; + size_t segment_buf_capacity; +}; + +static void +userspace_loaded_module_iterator_deinit(struct userspace_loaded_module_iterator *it) +{ + free(it->segment_buf); + free(it->phdrs_buf); + free(it->file_segments); +} + +static inline int drgn_mapped_file_segment_compare(const void *_a, + const void *_b) +{ + const struct drgn_mapped_file_segment *a = _a; + const struct drgn_mapped_file_segment *b = _b; + return (a->start > b->start) - (a->start < b->start); +} + +static void +userspace_loaded_module_iterator_set_file_segments(struct userspace_loaded_module_iterator *it, + struct drgn_mapped_file_segments *segments) +{ + // Don't bother shrinking to fit since this is short-lived. + drgn_mapped_file_segment_vector_steal(&segments->vector, + &it->file_segments, + &it->num_file_segments); + if (!segments->sorted) { + qsort(it->file_segments, it->num_file_segments, + sizeof(it->file_segments[0]), + drgn_mapped_file_segment_compare); + } +} + +static struct drgn_mapped_file_segment * +find_mapped_file_segment(struct userspace_loaded_module_iterator *it, + uint64_t address) +{ + #define less_than_start(a, b) (*(a) < (b)->start) + size_t i = binary_search_gt(it->file_segments, it->num_file_segments, + &address, less_than_start); + #undef less_than_start + if (i == 0 || address >= it->file_segments[i - 1].end) + return NULL; + return &it->file_segments[i - 1]; +} + +static struct drgn_error * +userspace_loaded_module_iterator_read_ehdr(struct userspace_loaded_module_iterator *it, + uint64_t address, GElf_Ehdr *ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + err = drgn_program_read_memory(prog, ret, address, sizeof(*ret), false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read ELF header at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_not_found; + } else if (err) { + return err; + } + if (memcmp(ret->e_ident, ELFMAG, SELFMAG) != 0) { + drgn_log_debug(prog, "invalid ELF header magic"); + return &drgn_not_found; + } + if (ret->e_ident[EI_CLASS] != + (drgn_platform_is_64_bit(&prog->platform) + ? ELFCLASS64 : ELFCLASS32)) { + drgn_log_debug(prog, + "ELF header class (%u) does not match program", + ret->e_ident[EI_CLASS]); + return &drgn_not_found; + } + if (ret->e_ident[EI_DATA] != + (drgn_platform_is_little_endian(&prog->platform) + ? ELFDATA2LSB : ELFDATA2MSB)) { + drgn_log_debug(prog, + "ELF header data encoding (%u) does not match program", + ret->e_ident[EI_DATA]); + return &drgn_not_found; + } +#define visit_elf_ehdr_members(visit_scalar_member, visit_raw_member) do { \ + visit_raw_member(e_ident); \ + visit_scalar_member(e_type); \ + visit_scalar_member(e_machine); \ + visit_scalar_member(e_version); \ + visit_scalar_member(e_entry); \ + visit_scalar_member(e_phoff); \ + visit_scalar_member(e_shoff); \ + visit_scalar_member(e_flags); \ + visit_scalar_member(e_ehsize); \ + visit_scalar_member(e_phentsize); \ + visit_scalar_member(e_phnum); \ + visit_scalar_member(e_shentsize); \ + visit_scalar_member(e_shnum); \ + visit_scalar_member(e_shstrndx); \ +} while (0) + deserialize_struct64_inplace(ret, Elf32_Ehdr, visit_elf_ehdr_members, + drgn_platform_is_64_bit(&prog->platform), + drgn_platform_bswap(&prog->platform)); +#undef visit_elf_ehdr_members + if (ret->e_phentsize != + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr))) { + drgn_log_debug(prog, + "ELF program header entry size (%u) does not match class", + ret->e_phentsize); + return &drgn_not_found; + } + return NULL; +} + +static struct drgn_error * +userspace_loaded_module_iterator_read_phdrs(struct userspace_loaded_module_iterator *it, + uint64_t address, uint16_t phnum) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + uint32_t phentsize = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr)); + uint32_t phdrs_size = (uint32_t)phnum * phentsize; + if (phdrs_size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "program header table is unreasonably large (%" PRIu32 " bytes); ignoring", + phdrs_size); + return &drgn_not_found; + } + if (!alloc_or_reuse(&it->phdrs_buf, &it->phdrs_buf_capacity, + phdrs_size)) + return &drgn_enomem; + err = drgn_program_read_memory(prog, it->phdrs_buf, address, phdrs_size, + false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read program header table at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_not_found; + } + return err; +} + +static void +userspace_loaded_module_iterator_phdr(struct userspace_loaded_module_iterator *it, + size_t i, GElf_Phdr *ret) +{ + struct drgn_program *prog = it->it.prog; + size_t phentsize = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Phdr) : sizeof(Elf32_Phdr)); +#define visit_phdr_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(p_type); \ + visit_scalar_member(p_flags); \ + visit_scalar_member(p_offset); \ + visit_scalar_member(p_vaddr); \ + visit_scalar_member(p_paddr); \ + visit_scalar_member(p_filesz); \ + visit_scalar_member(p_memsz); \ + visit_scalar_member(p_align); \ +} while (0) + deserialize_struct64(ret, Elf32_Phdr, visit_phdr_members, + (char *)it->phdrs_buf + i * phentsize, + drgn_platform_is_64_bit(&prog->platform), + drgn_platform_bswap(&prog->platform)); +#undef visit_phdr_members +} + +static struct drgn_error * +userspace_loaded_module_iterator_read_dynamic(struct userspace_loaded_module_iterator *it, + uint64_t address, uint64_t size, + size_t *num_dyn_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "dynamic section is unreasonably large (%" PRIu64 " bytes); ignoring", + size); + return &drgn_not_found; + } + size_t dyn_size = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Dyn) : sizeof(Elf32_Dyn)); + uint64_t num_dyn = size / dyn_size; + *num_dyn_ret = num_dyn; + if (num_dyn == 0) + return NULL; + + if (!alloc_or_reuse(&it->segment_buf, &it->segment_buf_capacity, + num_dyn * dyn_size)) + return &drgn_enomem; + err = drgn_program_read_memory(prog, it->segment_buf, address, + num_dyn * dyn_size, false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read dynamic section at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_not_found; + } + return err; +} + +static void +userspace_loaded_module_iterator_dyn(struct userspace_loaded_module_iterator *it, + size_t i, GElf_Dyn *ret) +{ + struct drgn_program *prog = it->it.prog; + size_t dyn_size = + (drgn_platform_is_64_bit(&prog->platform) + ? sizeof(Elf64_Dyn) : sizeof(Elf32_Dyn)); +#define visit_elf_dyn_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(d_tag); \ + visit_scalar_member(d_un.d_val); \ +} while (0) + deserialize_struct64(ret, Elf32_Dyn, visit_elf_dyn_members, + (char *)it->segment_buf + i * dyn_size, + drgn_platform_is_64_bit(&prog->platform), + drgn_platform_bswap(&prog->platform)); +#undef visit_elf_dyn_members +} + +static struct drgn_error * +userspace_loaded_module_iterator_read_main_phdrs(struct userspace_loaded_module_iterator *it) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + // The main bias is the difference between AT_PHDR and the virtual + // address of the program headers in the ELF file. We determine the + // latter by finding the PT_LOAD segment containing e_phoff. We would + // use PT_PHDR instead, but static binaries usually don't have it, and + // we can't assume a bias of 0 for static PIE binaries. + // + // If we couldn't find the file offset of the program headers, we can't + // find anything else. + if (it->main_phoff == 0) + return NULL; + + drgn_log_debug(prog, "reading program header table from AT_PHDR"); + + err = userspace_loaded_module_iterator_read_phdrs(it, + prog->auxv.at_phdr, + prog->auxv.at_phnum); + if (err == &drgn_not_found) + return NULL; + else if (err) + return err; + + // Silence -Wmaybe-uninitialized false positives on dyn_vaddr and + // dyn_memsz last seen with GCC 9. + uint64_t phdr_vaddr, dyn_vaddr = 0, dyn_memsz = 0; + bool have_phdr_vaddr = false, have_dyn = false; + for (uint16_t i = 0; i < prog->auxv.at_phnum; i++) { + GElf_Phdr phdr; + userspace_loaded_module_iterator_phdr(it, i, &phdr); + if (phdr.p_type == PT_LOAD && phdr.p_offset <= it->main_phoff + && it->main_phoff < phdr.p_offset + phdr.p_filesz) { + drgn_log_debug(prog, + "found PT_LOAD containing program headers with p_vaddr 0x%" PRIx64 + " and p_offset 0x%" PRIx64, + phdr.p_vaddr, phdr.p_offset); + phdr_vaddr = it->main_phoff - phdr.p_offset + phdr.p_vaddr; + have_phdr_vaddr = true; + } else if (phdr.p_type == PT_DYNAMIC) { + drgn_log_debug(prog, + "found PT_DYNAMIC with p_vaddr 0x%" PRIx64 + " and p_memsz 0x%" PRIx64, + phdr.p_vaddr, phdr.p_memsz); + have_dyn = true; + dyn_vaddr = phdr.p_vaddr; + dyn_memsz = phdr.p_memsz; + } + } + if (have_phdr_vaddr) { + it->main_bias = prog->auxv.at_phdr - phdr_vaddr; + drgn_log_debug(prog, "main bias is 0x%" PRIx64, it->main_bias); + } else { + drgn_log_debug(prog, + "didn't find PT_LOAD containing program headers"); + return NULL; + } + if (have_dyn) { + it->have_main_dyn = true; + it->main_dyn_vaddr = dyn_vaddr + it->main_bias; + it->main_dyn_memsz = dyn_memsz; + drgn_log_debug(prog, "main dynamic section is at 0x%" PRIx64, + it->main_dyn_vaddr); + } else { + drgn_log_debug(prog, + "didn't find PT_DYNAMIC program header; probably statically linked"); + } + it->read_main_phdrs = true; + return NULL; +} + +static struct drgn_error * +identify_module_from_phdrs(struct userspace_loaded_module_iterator *it, + struct drgn_module *module, size_t phnum, + uint64_t bias) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + uint64_t start = UINT64_MAX, end = 0; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr; + userspace_loaded_module_iterator_phdr(it, i, &phdr); + if (phdr.p_type == PT_LOAD) { + // Like elf_address_range_from_min_and_max_phdr(). + start = min(start, phdr.p_vaddr + bias); + end = max(end, phdr.p_vaddr + phdr.p_memsz + bias); + } else if (phdr.p_type == PT_NOTE + && module->build_id_len == 0) { + uint64_t note_size = min(phdr.p_filesz, phdr.p_memsz); + if (!note_size) + continue; + if (note_size > MAX_MEMORY_READ_FOR_DEBUG_INFO) { + drgn_log_debug(prog, + "note is unreasonably large (%" PRIu64 " bytes); ignoring", + note_size); + continue; + } + if (!alloc_or_reuse(&it->segment_buf, + &it->segment_buf_capacity, + note_size)) + return &drgn_enomem; + err = drgn_program_read_memory(prog, it->segment_buf, + phdr.p_vaddr + bias, + note_size, false); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_debug(prog, + "couldn't read note at 0x%" PRIx64 ": %s" + "; ignoring", + err->address, err->message); + drgn_error_destroy(err); + continue; + } else if (err) { + return err; + } + const void *build_id; + size_t build_id_len = + parse_gnu_build_id_from_notes(it->segment_buf, + note_size, + phdr.p_align == 8 ? + 8 : 4, + drgn_platform_bswap(&prog->platform), + &build_id); + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (err) + return err; + drgn_log_debug(prog, + "found build ID %s in note at 0x%" PRIx64, + module->build_id_str, + phdr.p_vaddr + bias + + ((char *)build_id + - (char *)it->segment_buf)); + } + } + } + if (module->build_id_len == 0) { + drgn_log_debug(prog, + "couldn't find build ID from mapped program headers"); + } + if (start < end) { + err = drgn_module_set_address_range(module, start, end); + if (err) + return err; + drgn_log_debug(prog, + "got address range 0x%" PRIx64 "-0x%" PRIx64 " from mapped program headers", + start, end); + } else { + drgn_log_debug(prog, + "couldn't find address range from mapped program headers"); + } + return NULL; +} + +static struct drgn_error * +userspace_loaded_module_iterator_yield_main(struct userspace_loaded_module_iterator *it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + struct drgn_mapped_file_segment *segment = + find_mapped_file_segment(it, prog->auxv.at_phdr); + if (segment) { + // We don't need to read the file header to get e_phoff. Instead, + // determine it from the file mapping. + it->main_phoff = + segment->file_offset + (prog->auxv.at_phdr - segment->start); + drgn_log_debug(prog, + "AT_PHDR is mapped from file %s at offset 0x%" PRIx64, + segment->file->path, it->main_phoff); + } else { + drgn_log_debug(prog, + "couldn't find mapped file segment containing AT_PHDR"); + } + + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_main(prog, + segment ? segment->file->path : "", + &module, &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } + err = userspace_loaded_module_iterator_read_main_phdrs(it); + if (err) + return err; + if (it->read_main_phdrs) { + err = identify_module_from_phdrs(it, module, + prog->auxv.at_phnum, + it->main_bias); + if (err) + return err; + } + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; +} + +static struct drgn_error * +userspace_loaded_module_iterator_yield_vdso(struct userspace_loaded_module_iterator *it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (!prog->auxv.at_sysinfo_ehdr) { + drgn_log_debug(prog, "no vDSO"); +no_vdso: + *ret = NULL; + return NULL; + } + + drgn_log_debug(prog, "reading vDSO ELF header from AT_SYSINFO_EHDR"); + GElf_Ehdr ehdr; + err = userspace_loaded_module_iterator_read_ehdr(it, + prog->auxv.at_sysinfo_ehdr, + &ehdr); + if (err == &drgn_not_found) + goto no_vdso; + else if (err) + return err; + + drgn_log_debug(prog, + "reading %" PRIu16 " program headers at 0x%" PRIx64, + ehdr.e_phnum, prog->auxv.at_sysinfo_ehdr + ehdr.e_phoff); + + // It is effectively part of the ABI that the vDSO program headers are + // mapped at AT_SYSINFO_EHDR + e_phoff (see the Linux kernel's reference + // vDSO parser: vdso_init_from_sysinfo_ehdr() in + // tools/testing/selftests/vDSO/parse_vdso.c, glibc: setup_vdso() in + // elf/setup-vdso.h, and musl: __vdsosym() in src/internal/vdso.c). + err = userspace_loaded_module_iterator_read_phdrs(it, + prog->auxv.at_sysinfo_ehdr + ehdr.e_phoff, + ehdr.e_phnum); + if (err == &drgn_not_found) + goto no_vdso; + else if (err) + return err; + + // This is based on the Linux kernel's reference vDSO parser. + uint64_t bias = prog->auxv.at_sysinfo_ehdr; + // Silence -Wmaybe-uninitialized false positives on dyn_vaddr and + // dyn_memsz last seen with GCC 12. + uint64_t dyn_vaddr = 0, dyn_memsz = 0; + bool have_load = false, have_dyn = false; + for (size_t i = 0; i < ehdr.e_phnum; i++) { + GElf_Phdr phdr; + userspace_loaded_module_iterator_phdr(it, i, &phdr); + if (phdr.p_type == PT_LOAD && !have_load) { + drgn_log_debug(prog, + "found PT_LOAD with p_offset 0x%" PRIx64 + " and p_vaddr 0x%" PRIx64, + phdr.p_offset, phdr.p_vaddr); + have_load = true; + bias = prog->auxv.at_sysinfo_ehdr + phdr.p_offset - phdr.p_vaddr; + } else if (phdr.p_type == PT_DYNAMIC) { + drgn_log_debug(prog, + "found PT_DYNAMIC with p_offset 0x%" PRIx64 + " and p_memsz 0x%" PRIx64, + phdr.p_offset, phdr.p_memsz); + dyn_vaddr = prog->auxv.at_sysinfo_ehdr + phdr.p_offset; + dyn_memsz = phdr.p_memsz; + have_dyn = true; + } + } + if (!have_load) { + drgn_log_warning(prog, + "can't find vDSO: " + "no PT_LOAD header in vDSO program headers"); + goto no_vdso; + } + drgn_log_debug(prog, "vDSO bias is 0x%" PRIx64, bias); + if (!have_dyn) { + drgn_log_warning(prog, + "can't find vDSO: " + "no PT_DYNAMIC header in vDSO program headers"); + goto no_vdso; + } + it->vdso_dyn_vaddr = dyn_vaddr; + it->have_vdso_dyn = true; + + drgn_log_debug(prog, "reading vDSO dynamic section at 0x%" PRIx64, + dyn_vaddr); + size_t num_dyn; + err = userspace_loaded_module_iterator_read_dynamic(it, dyn_vaddr, + dyn_memsz, + &num_dyn); + if (err == &drgn_not_found) + goto no_vdso; + else if (err) + return err; + + // Silence -Wmaybe-uninitialized false positives on dt_strtab and + // dt_soname last seen with GCC 12. + uint64_t dt_strtab = 0, dt_soname = 0; + bool have_dt_strtab = false, have_dt_soname = false; + for (size_t i = 0; i < num_dyn; i++) { + GElf_Dyn dyn; + userspace_loaded_module_iterator_dyn(it, i, &dyn); + if (dyn.d_tag == DT_STRTAB) { + dt_strtab = dyn.d_un.d_ptr; + have_dt_strtab = true; + drgn_log_debug(prog, "found DT_STRTAB 0x%" PRIx64, + dt_strtab); + } else if (dyn.d_tag == DT_SONAME) { + dt_soname = dyn.d_un.d_val; + have_dt_soname = true; + drgn_log_debug(prog, "found DT_SONAME 0x%" PRIx64, + dt_soname); + } else if (dyn.d_tag == DT_NULL) { + break; + } + } + if (!have_dt_strtab || !have_dt_soname) { + drgn_log_warning(prog, + "can't find vDSO: " + "no %s%s%s entr%s in vDSO dynamic section", + have_dt_strtab ? "" : "DT_STRTAB", + have_dt_strtab || have_dt_soname ? "" : " or ", + have_dt_soname ? "" : "DT_SONAME", + have_dt_strtab || have_dt_soname ? "y" : "ies"); + goto no_vdso; + } + + _cleanup_free_ char *name = NULL; + err = drgn_program_read_c_string(prog, dt_strtab + bias + dt_soname, + false, SIZE_MAX, &name); + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_warning(prog, + "can't find vDSO: " + "couldn't read soname at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + goto no_vdso; + } else if (err) { + return err; + } + drgn_log_debug(prog, "read vDSO soname \"%s\"", name); + + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_vdso(prog, name, dyn_vaddr, &module, + &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } + + err = identify_module_from_phdrs(it, module, ehdr.e_phnum, bias); + if (err) + return err; + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; +} + +#define read_struct64(prog, struct64p, address, type32, visit_members) \ + read_struct64_impl(prog, struct64p, address, type32, visit_members, \ + PP_UNIQUE(prog), PP_UNIQUE(struct64p), \ + PP_UNIQUE(is_64_bit), PP_UNIQUE(err)) +#define read_struct64_impl(prog, struct64p, address, type32, visit_members, \ + unique_prog, unique_struct64, unique_is_64_bit, \ + unique_err) ({ \ + struct drgn_program *unique_prog = (prog); \ + __auto_type unique_struct64p = (struct64p); \ + static_assert(sizeof(*unique_struct64p) >= sizeof(type32), \ + "64-bit type is smaller than 32-bit type"); \ + const bool unique_is_64_bit = \ + drgn_platform_is_64_bit(&unique_prog->platform); \ + struct drgn_error *unique_err = \ + drgn_program_read_memory(unique_prog, unique_struct64p, \ + (address), \ + unique_is_64_bit \ + ? sizeof(*unique_struct64p) \ + : sizeof(type32), false); \ + if (!unique_err) { \ + deserialize_struct64_inplace(unique_struct64p, type32, \ + visit_members, unique_is_64_bit, \ + drgn_platform_bswap(&unique_prog->platform));\ + } \ + unique_err; \ +}) + +static struct drgn_error * +userspace_get_link_map(struct userspace_loaded_module_iterator *it) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (!it->read_main_phdrs) { + err = userspace_loaded_module_iterator_read_main_phdrs(it); + if (err) + return err; + } + if (!it->have_main_dyn) + return NULL; + + drgn_log_debug(prog, "reading main dynamic section"); + size_t num_dyn; + err = userspace_loaded_module_iterator_read_dynamic(it, + it->main_dyn_vaddr, + it->main_dyn_memsz, + &num_dyn); + if (err == &drgn_not_found) { + drgn_log_warning(prog, + "can't find shared libraries: " + "couldn't read main dynamic section"); + return NULL; + } else if (err) { + return err; + } + + GElf_Dyn dyn; + size_t i; + for (i = 0; i < num_dyn; i++) { + userspace_loaded_module_iterator_dyn(it, i, &dyn); + if (dyn.d_tag == DT_NULL) { + i = num_dyn; + break; + } + if (dyn.d_tag == DT_DEBUG) { + drgn_log_debug(prog, "found DT_DEBUG 0x%" PRIx64, + dyn.d_un.d_ptr); + break; + } + } + if (i >= num_dyn) { + drgn_log_warning(prog, + "can't find shared libraries: " + "no DT_DEBUG entry in main dynamic section"); + return NULL; + } + + struct drgn_r_debug { + int32_t r_version; + alignas(8) uint64_t r_map; + } r_debug; + struct drgn_r_debug32 { + int32_t r_version; + uint32_t r_map; + }; +#define visit_r_debug_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(r_version); \ + visit_scalar_member(r_map); \ +} while (0) + err = read_struct64(prog, &r_debug, dyn.d_un.d_ptr, + struct drgn_r_debug32, visit_r_debug_members); +#undef visit_r_debug_members + if (err && err->code == DRGN_ERROR_FAULT) { + // Note: musl doesn't update DT_DEBUG for static PIE binaries + // compiled with GCC (as of musl v1.2.3 and GCC 13), so that + // case is known to fail here. + drgn_log_warning(prog, + "can't find shared libraries: " + "couldn't read r_debug at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return NULL; + } else if (err) { + return err; + } + drgn_log_debug(prog, + "read r_debug = { .r_version = %" PRId32 ", .r_map = 0x%" PRIx64 " }", + r_debug.r_version, r_debug.r_map); + + if (r_debug.r_version < 1) { + drgn_log_warning(prog, + "can't find shared libraries: " + "invalid r_debug.r_version %" PRId32, + r_debug.r_version); + return NULL; + } + it->link_map = r_debug.r_map; + return NULL; +} + +static struct drgn_error * +identify_module_from_link_map(struct userspace_loaded_module_iterator *it, + struct drgn_module *module, + struct drgn_mapped_file *file, uint64_t l_addr) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + // Even if it is a 32-bit file, segments should be at least a page, so + // we should be able to read the 64-bit size. + if (file->offset0_size < sizeof(Elf64_Ehdr)) { + drgn_log_debug(prog, "didn't find mapped ELF header"); + return NULL; + } + + drgn_log_debug(prog, "reading ELF header at 0x%" PRIx64, + file->offset0_vaddr); + GElf_Ehdr ehdr; + err = userspace_loaded_module_iterator_read_ehdr(it, + file->offset0_vaddr, + &ehdr); + if (err == &drgn_not_found) + return NULL; + else if (err) + return err; + + drgn_log_debug(prog, + "reading %" PRIu16 " program headers from 0x%" PRIx64, + ehdr.e_phnum, file->offset0_vaddr + ehdr.e_phoff); + // e_phnum and e_phentsize are uint16_t, so this can't overflow. + uint32_t phdrs_size = + (uint32_t)ehdr.e_phnum * (uint32_t)ehdr.e_phentsize; + if (ehdr.e_phoff > file->offset0_size || + phdrs_size > file->offset0_size - ehdr.e_phoff) { + drgn_log_debug(prog, + "program header table is not mapped with ELF header"); + return NULL; + } + err = userspace_loaded_module_iterator_read_phdrs(it, + file->offset0_vaddr + ehdr.e_phoff, + ehdr.e_phnum); + if (err == &drgn_not_found) + return NULL; + else if (err) + return err; + + return identify_module_from_phdrs(it, module, ehdr.e_phnum, l_addr); +} + +// This is the public definition of struct link_map from glibc's link.h: +// +// struct link_map +// { +// /* These first few members are part of the protocol with the debugger. +// This is the same format used in SVR4. */ +// +// ElfW(Addr) l_addr; /* Difference between the address in the ELF +// file and the addresses in memory. */ +// char *l_name; /* Absolute file name object was found in. */ +// ElfW(Dyn) *l_ld; /* Dynamic section of the shared object. */ +// struct link_map *l_next, *l_prev; /* Chain of loaded objects. */ +// }; +// +// We don't need l_prev, so we exclude it from our definition. +struct drgn_link_map { + uint64_t l_addr; + uint64_t l_name; + uint64_t l_ld; + uint64_t l_next; +}; +struct drgn_link_map32 { + uint32_t l_addr; + uint32_t l_name; + uint32_t l_ld; + uint32_t l_next; +}; + +static struct drgn_error * +userspace_next_link_map(struct userspace_loaded_module_iterator *it, + struct drgn_link_map *ret, char **name_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + if (!it->link_map) { + drgn_log_debug(prog, "found end of link_map list"); + return &drgn_stop; + } + + if (it->state + >= USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP + + MAX_LINK_MAP_LIST_ITERATIONS) { + drgn_log_warning(prog, + "can't find remaining shared libraries: " + "too many entries or cycle in link_map list"); + return &drgn_stop; + } + it->state++; + +#define visit_link_map_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(l_addr); \ + visit_scalar_member(l_name); \ + visit_scalar_member(l_ld); \ + visit_scalar_member(l_next); \ +} while (0) + err = read_struct64(prog, ret, it->link_map, struct drgn_link_map32, + visit_link_map_members); +#undef visit_link_map_members + if (err && err->code == DRGN_ERROR_FAULT) { + drgn_log_warning(prog, + "can't find remaining shared libraries: " + "couldn't read next link_map at 0x%" PRIx64 ": %s", + err->address, err->message); + drgn_error_destroy(err); + return &drgn_stop; + } else if (err) { + return err; + } + + it->link_map = ret->l_next; + + err = drgn_program_read_c_string(prog, ret->l_name, false, SIZE_MAX, + name_ret); + if (err && err->code == DRGN_ERROR_FAULT) + *name_ret = NULL; + else if (err) + return err; + drgn_log_debug(prog, + "read link_map = { .l_addr = 0x%" PRIx64 ", .l_name = 0x%" PRIx64 "%s%s%s, .l_ld = 0x%" PRIx64 ", .l_next = 0x%" PRIx64 " }", + ret->l_addr, ret->l_name, *name_ret ? " = \"" : "", + *name_ret ? *name_ret : "", *name_ret ? "\"" : "", + ret->l_ld, ret->l_next); + if (err) { + drgn_log_debug(prog, + "couldn't read l_name at 0x%" PRIx64 ": %s" + "; skipping", + err->address, err->message); + drgn_error_destroy(err); + } + return NULL; +} + +static struct drgn_error * +yield_from_link_map(struct userspace_loaded_module_iterator *it, + struct drgn_module **ret, bool *new_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + for (;;) { + struct drgn_link_map link_map; + _cleanup_free_ char *name = NULL; + err = userspace_next_link_map(it, &link_map, &name); + if (err == &drgn_stop) { + *ret = NULL; + return NULL; + } else if (err) { + return err; + } + + if (link_map.l_ld == it->main_dyn_vaddr) { + drgn_log_debug(prog, + "l_ld matches main dynamic section; skipping"); + continue; + } + if (it->have_vdso_dyn && link_map.l_ld == it->vdso_dyn_vaddr) { + drgn_log_debug(prog, + "l_ld matches vDSO dynamic section; skipping"); + continue; + } + if (!name) + continue; + + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_shared_library(prog, name, + link_map.l_ld, + &module, &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } + + struct drgn_mapped_file_segment *segment = + find_mapped_file_segment(it, link_map.l_ld); + if (segment) { + err = identify_module_from_link_map(it, module, + segment->file, + link_map.l_addr); + if (err) + return err; + } else { + drgn_log_debug(prog, + "couldn't find mapped file segment containing l_ld"); + } + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; + } +} + +static struct drgn_error * +userspace_loaded_module_iterator_next(struct drgn_module_iterator *_it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_error *err; + struct userspace_loaded_module_iterator *it = + container_of(_it, struct userspace_loaded_module_iterator, it); + switch (it->state) { + case USERSPACE_LOADED_MODULE_ITERATOR_STATE_MAIN: + err = drgn_program_cache_auxv(it->it.prog); + if (err) + return err; + it->state = USERSPACE_LOADED_MODULE_ITERATOR_STATE_VDSO; + return userspace_loaded_module_iterator_yield_main(it, ret, + new_ret); + case USERSPACE_LOADED_MODULE_ITERATOR_STATE_VDSO: + it->state = USERSPACE_LOADED_MODULE_ITERATOR_STATE_R_DEBUG; + err = userspace_loaded_module_iterator_yield_vdso(it, ret, + new_ret); + if (err || *ret) + return err; + fallthrough; + case USERSPACE_LOADED_MODULE_ITERATOR_STATE_R_DEBUG: + it->state = USERSPACE_LOADED_MODULE_ITERATOR_STATE_LINK_MAP; + err = userspace_get_link_map(it); + if (err) + return err; + fallthrough; + default: + return yield_from_link_map(it, ret, new_ret); + } +} + +struct process_mapped_file_entry { + dev_t dev; + ino_t ino; + struct drgn_mapped_file *file; +}; + +struct process_mapped_file_key { + dev_t dev; + ino_t ino; + const char *path; +}; + +static struct process_mapped_file_key +process_mapped_file_entry_to_key(const struct process_mapped_file_entry *entry) +{ + return (struct process_mapped_file_key){ + .dev = entry->dev, + .ino = entry->ino, + .path = entry->file->path, + }; +} + +static struct hash_pair +process_mapped_file_key_hash_pair(const struct process_mapped_file_key *key) +{ + size_t hash = hash_combine(key->dev, key->ino); + hash = hash_combine(hash, hash_c_string(key->path)); + return hash_pair_from_avalanching_hash(hash); +} + +static bool process_mapped_file_key_eq(const struct process_mapped_file_key *a, + const struct process_mapped_file_key *b) +{ + return (a->dev == b->dev + && a->ino == b->ino + && strcmp(a->path, b->path) == 0); +} + +DEFINE_HASH_TABLE(process_mapped_files, struct process_mapped_file_entry, + process_mapped_file_entry_to_key, + process_mapped_file_key_hash_pair, + process_mapped_file_key_eq); + +struct process_loaded_module_iterator { + struct userspace_loaded_module_iterator u; + struct process_mapped_files files; +}; + +static struct drgn_error * +process_add_mapping(struct process_loaded_module_iterator *it, + const char *maps_path, const char *map_files_path, + int map_files_fd, bool *logged_readlink_eperm, + bool *logged_stat_eperm, + struct drgn_map_files_segment_vector *map_files_segments, + struct drgn_mapped_file_segments *segments, + char *line, size_t line_len) +{ + struct drgn_program *prog = it->u.it.prog; + + struct drgn_map_files_segment segment; + uint64_t segment_file_offset; + unsigned int dev_major, dev_minor; + uint64_t ino; + int map_name_len, path_index; + if (sscanf(line, + "%" SCNx64 "-%" SCNx64 "%n %*s %" SCNx64 " %x:%x %" SCNu64 " %n", + &segment.start, &segment.end, &map_name_len, + &segment_file_offset, &dev_major, &dev_minor, &ino, + &path_index) != 6) { + return drgn_error_format(DRGN_ERROR_OTHER, "couldn't parse %s", + maps_path); + } + // Skip anonymous mappings. + if (ino == 0) + return NULL; + + if (!drgn_map_files_segment_vector_append(map_files_segments, &segment)) + return &drgn_enomem; + + struct process_mapped_file_key key = { + .dev = makedev(dev_major, dev_minor), + .ino = ino, + .path = line + path_index, + }; + _cleanup_free_ char *real_path = NULL; + + // /proc/$pid/maps has a couple of ambiguities that + // /proc/$pid/map_files/
can help with: + // + // 1. Newlines in the file path from /proc/$pid/maps are escaped as + // \012. However, \ is not escaped, so it is ambiguous whether \012 + // is a newline or appeared literally in the path. We can read the + // map_files link to get the unescaped path. + // 2. The device number in /proc/$pid/maps is incorrect for some + // filesystems. Specifically, for Btrfs as of Linux 6.5, it refers to + // a filesystem-wide device number rather than the subvolume-specific + // device numbers returned by stat. We can stat the map_files link to + // get the correct device number. + if (map_files_fd >= 0) { + char map_files_name[34]; + snprintf(map_files_name, sizeof(map_files_name), + "%" PRIx64 "-%" PRIx64, segment.start, segment.end); + + // The escaped path must be at least as long as the original + // path, so use that as the readlink buffer size. + size_t bufsiz = line_len - path_index + 1; + real_path = malloc(bufsiz); + if (!real_path) + return &drgn_enomem; + // Before Linux kernel commit bdb4d100afe9 ("procfs: always + // expose /proc//map_files/ and make it readable") (in + // v4.3), reading these links required CAP_SYS_ADMIN. Since that + // commit, it only requires PTRACE_MODE_READ, which we must have + // since we opened /proc/$pid/maps. + // + // If we can't read this link, we have to fall back to the + // escaped path. Newlines and the literal sequence \012 are + // unlikely to appear in a path, so it's not a big deal. + ssize_t r = readlinkat(map_files_fd, map_files_name, real_path, + bufsiz); + if (r < 0) { + if (errno == EPERM) { + free(real_path); + real_path = NULL; + if (!*logged_readlink_eperm) { + drgn_log_debug(prog, + "don't have permission to read symlinks in %s", + map_files_path); + } + *logged_readlink_eperm = true; + } else if (errno == ENOENT) { + // We raced with a change to the mapping. + drgn_log_debug(prog, "mapping %s disappeared", + map_files_name); + return NULL; + } else { + return drgn_error_format_os("readlink", errno, + "%s/%s", + map_files_path, + map_files_name); + } + } else if (r >= bufsiz) { + // We didn't allocate enough for the link contents. The + // only way this is possible is if we raced with the + // mapping being replaced by a different path. + drgn_log_debug(prog, + "mapping %s path changed; skipping", + map_files_name); + return NULL; + } else { + real_path[r] = '\0'; + key.path = real_path; + } + + // Following these links requires CAP_SYS_ADMIN. If we can't, we + // have to fall back to using the device number from + // /proc/$pid/maps. Mapping files with the same path and inode + // number in different Btrfs subvolumes is unlikely, so this is + // also not a big deal. + struct stat st; + if (fstatat(map_files_fd, map_files_name, &st, 0) < 0) { + if (errno == EPERM) { + if (!*logged_stat_eperm) { + drgn_log_debug(prog, + "don't have permission to follow symlinks in %s", + map_files_path); + } + *logged_stat_eperm = true; + } else if (errno == ENOENT) { + // We raced with a change to the mapping. + drgn_log_debug(prog, "mapping %s disappeared", + map_files_name); + return NULL; + } else { + return drgn_error_format_os("stat", errno, + "%s/%s", + map_files_path, + map_files_name); + } + } else { + key.dev = st.st_dev; + } + } + + struct hash_pair hp = process_mapped_files_hash(&key); + struct process_mapped_files_iterator files_it = + process_mapped_files_search_hashed(&it->files, &key, hp); + if (!files_it.entry) { + if (!real_path) { + real_path = strdup(key.path); + if (!real_path) + return &drgn_enomem; + } + struct drgn_mapped_file *file = + drgn_mapped_file_create(real_path); + if (!file) + return &drgn_enomem; + struct process_mapped_file_entry entry = { + .dev = key.dev, + .ino = key.ino, + .file = file, + }; + if (process_mapped_files_insert_searched(&it->files, &entry, hp, + &files_it) < 0) { + drgn_mapped_file_destroy(file); + return &drgn_enomem; + } + // real_path is owned by the iterator now. + real_path = NULL; + } + return drgn_add_mapped_file_segment(segments, segment.start, segment.end, + segment_file_offset, + files_it.entry->file); +} + +static struct drgn_error * +process_get_mapped_files(struct process_loaded_module_iterator *it) +{ + struct drgn_error *err; + struct drgn_program *prog = it->u.it.prog; + +#define FORMAT "/proc/%ld/maps" + char maps_path[sizeof(FORMAT) + - sizeof("%ld") + + max_decimal_length(long) + + 1]; + snprintf(maps_path, sizeof(maps_path), FORMAT, (long)prog->pid); +#undef FORMAT + _cleanup_fclose_ FILE *maps_file = fopen(maps_path, "r"); + if (!maps_file) + return drgn_error_create_os("fopen", errno, maps_path); + drgn_log_debug(prog, "parsing %s", maps_path); + +#define FORMAT "/proc/%ld/map_files" + char map_files_path[sizeof(FORMAT) + - sizeof("%ld") + + max_decimal_length(long) + + 1]; + snprintf(map_files_path, sizeof(map_files_path), FORMAT, + (long)prog->pid); +#undef FORMAT + // Since Linux kernel commit bdb4d100afe9 ("procfs: always expose + // /proc//map_files/ and make it readable") (in v4.3), + // /proc/$pid/map_files always exists. Before that, it only exists if + // CONFIG_CHECKPOINT_RESTORE is enabled. + // + // If it exists, we should always have permission to open it since we + // were able to open /proc/$pid/maps. + _cleanup_close_ int map_files_fd = + open(map_files_path, O_RDONLY | O_DIRECTORY); + if (map_files_fd < 0) { + if (errno != ENOENT) { + return drgn_error_create_os("open", errno, + map_files_path); + } + drgn_log_debug(prog, "%s: %m", map_files_path); + } + + _cleanup_free_ char *line = NULL; + size_t n = 0; + bool logged_readlink_eperm = false, logged_stat_eperm = false; + // While we're reading /proc/$pid/maps, we might as well cache the + // segments for drgn_module_try_proc_files_for_shared_library(). + VECTOR(drgn_map_files_segment_vector, map_files_segments); + struct drgn_mapped_file_segments segments = DRGN_MAPPED_FILE_SEGMENTS_INIT; + for (;;) { + errno = 0; + ssize_t len; + if ((len = getline(&line, &n, maps_file)) == -1) { + if (errno) { + err = drgn_error_create_os("getline", errno, + maps_path); + } else { + err = NULL; + } + break; + } + // Remove the newline. + if (len > 0 && line[len - 1] == '\n') + line[--len] = '\0'; + + drgn_log_debug(prog, "read %s", line); + err = process_add_mapping(it, maps_path, map_files_path, + map_files_fd, &logged_readlink_eperm, + &logged_stat_eperm, + &map_files_segments, &segments, line, + len); + if (err) + break; + } + if (err) { + drgn_mapped_file_segments_abort(&segments); + } else { + drgn_debug_info_set_map_files_segments(&prog->dbinfo, + &map_files_segments, + segments.sorted); + userspace_loaded_module_iterator_set_file_segments(&it->u, + &segments); + } + return err; +} + +static void +process_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) +{ + struct process_loaded_module_iterator *it = + container_of(_it, struct process_loaded_module_iterator, u.it); + hash_table_for_each(process_mapped_files, files_it, &it->files) { + free((char *)files_it.entry->file->path); + drgn_mapped_file_destroy(files_it.entry->file); + } + process_mapped_files_deinit(&it->files); + userspace_loaded_module_iterator_deinit(&it->u); + free(it); +} + +static struct drgn_error * +process_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_error *err; + struct process_loaded_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->u.it, prog, + process_loaded_module_iterator_destroy, + userspace_loaded_module_iterator_next); + process_mapped_files_init(&it->files); + err = process_get_mapped_files(it); + if (err) { + process_loaded_module_iterator_destroy(&it->u.it); + return err; + } + *ret = &it->u.it; + return NULL; +} + +static const char * +core_mapped_file_entry_to_key(struct drgn_mapped_file * const *entry) +{ + return (*entry)->path; +} + +DEFINE_HASH_TABLE(core_mapped_files, struct drgn_mapped_file *, + core_mapped_file_entry_to_key, c_string_key_hash_pair, + c_string_key_eq); + +struct core_loaded_module_iterator { + struct userspace_loaded_module_iterator u; + struct core_mapped_files files; +}; + +static struct drgn_error *parse_nt_file_error(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + return drgn_error_create(DRGN_ERROR_OTHER, "couldn't parse NT_FILE"); +} + +static struct drgn_error * +core_get_mapped_files(struct core_loaded_module_iterator *it) +{ + struct drgn_error *err; + struct drgn_program *prog = it->u.it.prog; + + const void *note; + size_t note_size; + if (find_elf_note(prog->core, "CORE", NT_FILE, ¬e, ¬e_size)) + return drgn_error_libelf(); + if (!note) { + drgn_log_debug(prog, "core doesn't have NT_FILE note"); + return NULL; + } + + drgn_log_debug(prog, "parsing NT_FILE"); + + bool is_64_bit = drgn_platform_is_64_bit(&prog->platform); + bool little_endian = drgn_platform_is_little_endian(&prog->platform); + + struct binary_buffer bb; + binary_buffer_init(&bb, note, note_size, little_endian, + parse_nt_file_error); + + // fs/binfmt_elf.c in the Linux kernel source code documents the format + // of NT_FILE as: + // + // long count -- how many files are mapped + // long page_size -- units for file_ofs + // array of [COUNT] elements of + // long start + // long end + // long file_ofs + // followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... + struct nt_file_segment64 { + uint64_t start; + uint64_t end; + uint64_t file_offset; + }; + struct nt_file_segment32 { + uint32_t start; + uint32_t end; + uint32_t file_offset; + }; + uint64_t count, page_size; + if (is_64_bit) { + if ((err = binary_buffer_next_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / sizeof(struct nt_file_segment64)) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, + count * sizeof(struct nt_file_segment64)))) + return err; + } else { + if ((err = binary_buffer_next_u32_into_u64(&bb, &count))) + return err; + if (count > UINT64_MAX / sizeof(struct nt_file_segment32)) + return binary_buffer_error(&bb, "count is too large"); + if ((err = binary_buffer_next_u32_into_u64(&bb, &page_size)) || + (err = binary_buffer_skip(&bb, + count * sizeof(struct nt_file_segment32)))) + return err; + } + + struct drgn_mapped_file_segments segments = + DRGN_MAPPED_FILE_SEGMENTS_INIT; + for (uint64_t i = 0; i < count; i++) { + struct nt_file_segment64 segment; +#define visit_nt_file_segment_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(start); \ + visit_scalar_member(end); \ + visit_scalar_member(file_offset); \ +} while (0) + deserialize_struct64(&segment, struct nt_file_segment32, + visit_nt_file_segment_members, + (char *)note + + (is_64_bit + ? 16 + i * sizeof(struct nt_file_segment64) + : 8 + i * sizeof(struct nt_file_segment32)), + is_64_bit, bb.bswap); +#undef visit_nt_file_segment_members + segment.file_offset *= page_size; + const char *path = bb.pos; + if ((err = binary_buffer_skip_string(&bb))) + goto err; + drgn_log_debug(prog, + "found 0x%" PRIx64 "-0x%" PRIx64 " 0x%" PRIx64 " %s", + segment.start, segment.end, segment.file_offset, + path); + if (segment.start >= segment.end) + continue; + + struct hash_pair hp = core_mapped_files_hash(&path); + struct core_mapped_files_iterator files_it = + core_mapped_files_search_hashed(&it->files, &path, hp); + struct drgn_mapped_file *file; + if (files_it.entry) { + file = *files_it.entry; + } else { + file = drgn_mapped_file_create(path); + if (!file) { + err = &drgn_enomem; + goto err; + } + if (core_mapped_files_insert_searched(&it->files, &file, + hp, NULL) < 0) { + drgn_mapped_file_destroy(file); + err = &drgn_enomem; + goto err; + } + } + err = drgn_add_mapped_file_segment(&segments, segment.start, + segment.end, + segment.file_offset, file); + if (err) + goto err; + } + userspace_loaded_module_iterator_set_file_segments(&it->u, &segments); + return NULL; + +err: + drgn_mapped_file_segments_abort(&segments); + return err; +} + +static void +core_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) +{ + struct core_loaded_module_iterator *it = + container_of(_it, struct core_loaded_module_iterator, u.it); + hash_table_for_each(core_mapped_files, files_it, &it->files) + drgn_mapped_file_destroy(*files_it.entry); + core_mapped_files_deinit(&it->files); + userspace_loaded_module_iterator_deinit(&it->u); + free(it); +} + +static struct drgn_error * +core_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_error *err; + struct core_loaded_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->u.it, prog, + core_loaded_module_iterator_destroy, + userspace_loaded_module_iterator_next); + core_mapped_files_init(&it->files); + err = core_get_mapped_files(it); + if (err) { + core_loaded_module_iterator_destroy(&it->u.it); + return err; + } + *ret = &it->u.it; + return NULL; +} + +static struct drgn_error * +null_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct drgn_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(it, prog, NULL, NULL); + *ret = it; + return NULL; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) + return linux_kernel_loaded_module_iterator_create(prog, ret); + else if (drgn_program_is_userspace_process(prog)) + return process_loaded_module_iterator_create(prog, ret); + else if (drgn_program_is_userspace_core(prog)) + return core_loaded_module_iterator_create(prog, ret); + else + return null_module_iterator_create(prog, ret); +} + +static inline void drgn_module_iterator_destroyp(struct drgn_module_iterator **itp) +{ + drgn_module_iterator_destroy(*itp); +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_create_loaded_modules(struct drgn_program *prog) +{ + _cleanup_(drgn_module_iterator_destroyp) + struct drgn_module_iterator *it = NULL; + struct drgn_error *err = drgn_loaded_module_iterator_create(prog, &it); + if (err) + return err; + struct drgn_module *module; + while (!(err = drgn_module_iterator_next(it, &module, NULL)) && module); + return err; +} + +struct load_debug_info_file { + const char *path; + // We only keep this to keep load_debug_info_provided::build_id alive + // without needing to copy it. If we add a drgn_module_try_file API that + // allows providing an Elf handle, we could pass it down. + Elf *elf; + // This may be consumed and set to -1. + int fd; +}; + +DEFINE_VECTOR(load_debug_info_file_vector, struct load_debug_info_file); + +struct load_debug_info_provided { + const void *build_id; + size_t build_id_len; + struct load_debug_info_file_vector files; + bool matched; +}; + +static struct nstring +load_debug_info_provided_key(const struct load_debug_info_provided *provided) +{ + return (struct nstring){ provided->build_id, provided->build_id_len }; +} + +DEFINE_HASH_TABLE(load_debug_info_provided_table, + struct load_debug_info_provided, + load_debug_info_provided_key, nstring_hash_pair, nstring_eq); + +struct load_debug_info_state { + // Provided files grouped by build ID. + struct load_debug_info_provided_table provided; + // Number of entries in the provided table that haven't matched any + // modules. + size_t unmatched_provided; +}; + +static struct drgn_error * +load_debug_info_add_provided_file(struct drgn_program *prog, + struct load_debug_info_state *state, + const char *path) +{ + _cleanup_close_ int fd = open(path, O_RDONLY); + if (fd < 0) { + drgn_log_warning(prog, "%s: %m; ignoring", path); + return NULL; + } + _cleanup_elf_end_ Elf *elf = dwelf_elf_begin(fd); + if (!elf) { + drgn_log_warning(prog, "%s: %s; ignoring", path, + elf_errmsg(-1)); + return NULL; + } + if (elf_kind(elf) != ELF_K_ELF) { + drgn_log_warning(prog, "%s: not an ELF file; ignoring", path); + return NULL; + } + const void *build_id; + ssize_t build_id_len = drgn_elf_gnu_build_id(elf, &build_id); + if (build_id_len <= 0) { + if (build_id_len < 0) { + drgn_log_warning(prog, "%s: %s; ignoring", path, + elf_errmsg(-1)); + } else { + drgn_log_warning(prog, "%s: no build ID; ignoring", + path); + } + return NULL; + } + + if (drgn_log_is_enabled(prog, DRGN_LOG_DEBUG)) { + _cleanup_free_ char *build_id_str = + ahexlify(build_id, build_id_len); + if (!build_id_str) + return &drgn_enomem; + drgn_log_debug(prog, "provided file %s build ID %s", + path, build_id_str); + } + + struct load_debug_info_provided provided = { + .build_id = build_id, + .build_id_len = build_id_len, + }; + struct load_debug_info_provided_table_iterator it; + int r = load_debug_info_provided_table_insert(&state->provided, + &provided, &it); + if (r < 0) + return &drgn_enomem; + if (r > 0) { + load_debug_info_file_vector_init(&it.entry->files); + state->unmatched_provided++; + } + + struct load_debug_info_file file = { + .path = path, + .fd = fd, + .elf = elf, + }; + if (!load_debug_info_file_vector_append(&it.entry->files, &file)) { + if (load_debug_info_file_vector_empty(&it.entry->files)) { + // The key will no longer be valid once we free the Elf + // handle, so we need to delete the entry. + load_debug_info_provided_table_delete_iterator(&state->provided, + it); + } + return &drgn_enomem; + } + // fd and elf are owned by state now. + fd = -1; + elf = NULL; + return NULL; +} + +static void load_debug_info_state_deinit(struct load_debug_info_state *state) +{ + hash_table_for_each(load_debug_info_provided_table, it, + &state->provided) { + vector_for_each(load_debug_info_file_vector, file, + &it.entry->files) { + elf_end(file->elf); + if (file->fd >= 0) + close(file->fd); + } + load_debug_info_file_vector_deinit(&it.entry->files); + } + load_debug_info_provided_table_deinit(&state->provided); +} + +static struct load_debug_info_provided * +load_debug_info_find_provided(struct load_debug_info_state *state, + const void *build_id, size_t build_id_len) +{ + struct nstring key = { build_id, build_id_len }; + struct load_debug_info_provided *provided = + load_debug_info_provided_table_search(&state->provided, + &key).entry; + if (provided && !provided->matched) { + state->unmatched_provided--; + provided->matched = true; + } + return provided; +} + +static struct drgn_error * +load_debug_info_try_provided(struct drgn_module *module, + struct load_debug_info_provided *provided, + enum drgn_module_file_status not_status) +{ + struct drgn_error *err; + vector_for_each(load_debug_info_file_vector, file, &provided->files) { + // No need to check build ID again. + err = drgn_module_try_file_internal(module, file->path, + file->fd, false, NULL); + // drgn_module_try_file_internal took ownership of file->fd. In + // the unlikely scenario that another module has the same build + // ID, we'll just have to reopen it by path. + file->fd = -1; + if (err) + return err; + + if (module->loaded_file_status != not_status + && module->debug_file_status != not_status) + break; } + return NULL; +} - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); +static struct drgn_error * +load_debug_info_try_provided_supplementary_files(struct drgn_module *module, + struct load_debug_info_state *state) +{ + const void *checksum; + size_t checksum_len; + if (drgn_module_wanted_supplementary_debug_file(module, NULL, NULL, + &checksum, + &checksum_len) + != DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK) + return NULL; + struct load_debug_info_provided *provided = + load_debug_info_find_provided(state, checksum, checksum_len); + if (!provided) + return NULL; + drgn_module_try_supplementary_debug_file_log(module, + "trying provided files for"); + return load_debug_info_try_provided(module, provided, + DRGN_MODULE_FILE_WANT_SUPPLEMENTARY); +} - Elf_Scn *reloc_scn = NULL; - while ((reloc_scn = elf_nextscn(elf, reloc_scn))) { - GElf_Shdr *reloc_shdr, reloc_shdr_mem; - reloc_shdr = gelf_getshdr(reloc_scn, &reloc_shdr_mem); - if (!reloc_shdr) - return drgn_error_libelf(); - - int r = should_apply_relocation_section(elf, shstrndx, - reloc_shdr); - if (r < 0) - return drgn_error_libelf(); - if (r) { - scn = elf_getscn(elf, reloc_shdr->sh_info); - if (!scn) - return drgn_error_libelf(); - GElf_Shdr *shdr, shdr_mem; - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - if (shdr->sh_type == SHT_NOBITS) +static struct drgn_error * +load_debug_info_try_provided_vmlinux(struct drgn_module *module, + struct load_debug_info_state *state) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; + bool logged_trying = false; + hash_table_for_each(load_debug_info_provided_table, it, + &state->provided) { + vector_for_each(load_debug_info_file_vector, file, + &it.entry->files) { + int r = elf_is_vmlinux(file->elf); + if (r < 0) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + } + if (r <= 0) continue; - Elf_Scn *symtab_scn = elf_getscn(elf, - reloc_shdr->sh_link); - if (!symtab_scn) - return drgn_error_libelf(); - - Elf_Data *data, *reloc_data, *symtab_data; - if ((err = read_elf_section(scn, &data)) || - (err = read_elf_section(reloc_scn, &reloc_data)) || - (err = read_elf_section(symtab_scn, &symtab_data))) - return err; + if (!logged_trying) { + drgn_module_try_files_log(module, + "(Linux version %s): trying provided files for", + prog->vmcoreinfo.osrelease); + logged_trying = true; + } - struct drgn_relocating_section relocating = { - .buf = data->d_buf, - .buf_size = data->d_size, - .addr = sh_addrs[elf_ndxscn(scn)], - .bswap = drgn_platform_bswap(&platform), - }; + const char *release; + ssize_t release_len = + elf_vmlinux_release(file->elf, &release); + if (release_len < 0) { + drgn_log_debug(prog, "%s: %s", file->path, + elf_errmsg(-1)); + continue; + } else if (release_len == 0) { + drgn_log_debug(prog, "%s: %s Linux version not found", + module->name, file->path); + continue; + } - if (reloc_shdr->sh_type == SHT_RELA) { - err = apply_elf_relas(&relocating, reloc_data, - symtab_data, sh_addrs, - shdrnum, &platform); + if (strlen(prog->vmcoreinfo.osrelease) == release_len + && memcmp(release, prog->vmcoreinfo.osrelease, + release_len) == 0) { + drgn_log_debug(prog, "%s: %s Linux version matches", + module->name, file->path); } else { - err = apply_elf_rels(&relocating, reloc_data, - symtab_data, sh_addrs, - shdrnum, &platform); + drgn_log_debug(prog, + "%s: %s Linux version (%.*s) does not match", + module->name, file->path, + release_len > INT_MAX + ? INT_MAX : (int)release_len, + release); + continue; } + + if (!it.entry->matched) { + state->unmatched_provided--; + it.entry->matched = true; + } + + err = drgn_module_try_file_internal(module, file->path, + file->fd, true, + NULL); + file->fd = -1; if (err) return err; - - /* - * Mark the relocation section as empty so that libdwfl - * doesn't try to apply it again. - */ - reloc_shdr->sh_size = 0; - if (!gelf_update_shdr(reloc_scn, reloc_shdr)) - return drgn_error_libelf(); - reloc_data->d_size = 0; + if (module->loaded_file_status != DRGN_MODULE_FILE_WANT + && module->debug_file_status != DRGN_MODULE_FILE_WANT) + break; } } return NULL; } static struct drgn_error * -drgn_module_find_files(struct drgn_debug_info_load_state *load, - struct drgn_module *module) +load_debug_info_try_provided_files(struct drgn_module *module, + struct load_debug_info_state *state) { struct drgn_error *err; - if (module->elf) { - err = relocate_elf_file(module->elf); - if (err) - return err; - } - - GElf_Addr loaded_file_bias; - Elf *loaded_elf = NULL; - Dwarf_Addr debug_file_bias; - Dwarf *dwarf; - err = NULL; - #pragma omp critical(drgn_module_find_files) - { - // We don't need the loaded file for the Linux kernel, and we - // always report the debug file as the main file to libdwfl. - if (!(load->dbinfo->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) { - loaded_elf = dwfl_module_getelf(module->dwfl_module, - &loaded_file_bias); - if (!loaded_elf) - err = drgn_error_libdwfl(); - } - if (!err) { - dwarf = dwfl_module_getdwarf(module->dwfl_module, - &debug_file_bias); - if (!dwarf) - err = drgn_error_libdwfl(); - } - } + err = load_debug_info_try_provided_supplementary_files(module, state); if (err) return err; - const char *loaded_file_path; - const char *debug_file_path; - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, NULL, NULL, - &loaded_file_path, &debug_file_path); - // If the loaded file also has debugging information, debug_file_path is - // NULL. (debug_file_path is also NULL if libdwfl got the debug file - // from debuginfod, so this isn't 100% correct, but it'll at least - // identify the module.) - if (!debug_file_path) - debug_file_path = loaded_file_path; - - module->debug_file_bias = debug_file_bias; - err = drgn_elf_file_create(module, debug_file_path, dwarf_getelf(dwarf), - &module->debug_file); - if (err) { - module->debug_file = NULL; - return err; - } - module->debug_file->dwarf = dwarf; - if (!module->debug_file->scns[DRGN_SCN_DEBUG_INFO] || - !module->debug_file->scns[DRGN_SCN_DEBUG_ABBREV]) { - return drgn_error_create(DRGN_ERROR_OTHER, - "missing debugging information sections"); - } - - Dwarf *altdwarf = dwarf_getalt(dwarf); - if (altdwarf) { - Elf *altelf = dwarf_getelf(altdwarf); - if (!altelf) - return drgn_error_libdw(); - size_t shstrndx; - if (elf_getshdrstrndx(altelf, &shstrndx)) - return drgn_error_libelf(); - - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(altelf, scn))) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) - return drgn_error_libelf(); - - if (shdr->sh_type != SHT_PROGBITS) - continue; - const char *scnname = elf_strptr(altelf, shstrndx, - shdr->sh_name); - if (!scnname) - return drgn_error_libelf(); - - /* - * TODO: save more sections and support imported units. - */ - if (strcmp(scnname, ".debug_info") == 0 && - !module->debug_file->alt_debug_info_data) { - err = read_elf_section(scn, - &module->debug_file->alt_debug_info_data); - if (err) - return err; - } else if (strcmp(scnname, ".debug_str") == 0 && - !module->debug_file->alt_debug_str_data) { - err = read_elf_section(scn, - &module->debug_file->alt_debug_str_data); + const void *build_id; + size_t build_id_len; + drgn_module_build_id(module, &build_id, &build_id_len); + if (build_id_len > 0) { + // Look up the provided file even if we don't need it so that it + // counts as matched. + struct load_debug_info_provided *provided = + load_debug_info_find_provided(state, build_id, + build_id_len); + if (provided && drgn_module_wants_file(module)) { + uint64_t orig_supplementary_file_generation = + module->prog->dbinfo.supplementary_file_generation; + drgn_module_try_files_log(module, + "trying provided files for"); + err = load_debug_info_try_provided(module, provided, + DRGN_MODULE_FILE_WANT); + if (err) + return err; + // If the wanted supplementary debug file changed, try + // finding it again. + if (drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = load_debug_info_try_provided_supplementary_files(module, + state); if (err) return err; } } - } - err = drgn_elf_file_precache_sections(module->debug_file); - if (err) - return err; - - if (loaded_elf) { - module->loaded_file_bias = loaded_file_bias; - if (loaded_elf == module->debug_file->elf) { - module->loaded_file = module->debug_file; - } else { - err = drgn_elf_file_create(module, loaded_file_path, - loaded_elf, - &module->loaded_file); - if (err) { - module->loaded_file = NULL; - return err; - } - } + } else if (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL + && drgn_module_kind(module) == DRGN_MODULE_MAIN) { + // Before Linux kernel commit 0935288c6e00 ("kdump: append + // kernel build-id string to VMCOREINFO") (in v5.9) and in a few + // broken stable versions (see + // ignore_broken_vmcoreinfo_build_id()), we can't get the + // vmlinux build ID from a kernel core dump. Fall back to + // checking every provided file for a vmlinux file with a + // matching version. + err = load_debug_info_try_provided_vmlinux(module, state); + if (err) + return err; } return NULL; } -static struct drgn_error * -drgn_debug_info_read_module(struct drgn_debug_info_load_state *load, - struct drgn_dwarf_index_state *index, - struct drgn_module *head) +static void load_debug_info_log_missing(struct drgn_module *module, + unsigned int max_warnings, + unsigned int *num_missing) { - struct drgn_error *err; - struct drgn_module *module; - for (module = head; module; module = module->next) { - err = drgn_module_find_files(load, module); - if (err) { - module->err = err; - continue; - } - module->state = DRGN_DEBUG_INFO_MODULE_INDEXING; - return drgn_dwarf_index_read_file(index, module->debug_file); - } - /* - * We checked all of the files and didn't find debugging information. - * Report why for each one. - * - * (If we did find debugging information, we discard errors on the - * unused files.) - */ - err = NULL; - #pragma omp critical(drgn_debug_info_read_module_error) - for (module = head; module; module = module->next) { - const char *name = - dwfl_module_info(module->dwfl_module, NULL, NULL, NULL, - NULL, NULL, NULL, NULL); - if (module->err) { - err = drgn_debug_info_report_error(load, name, NULL, - module->err); - module->err = NULL; - } else { - err = drgn_debug_info_report_error(load, name, - "no debugging information", - NULL); - } - if (err) + if (++(*num_missing) > max_warnings) + return; + const char *missing_loaded = ""; + if (drgn_module_loaded_file_status(module) == DRGN_MODULE_FILE_WANT) { + switch (drgn_module_kind(module)) { + case DRGN_MODULE_MAIN: + missing_loaded = "executable file"; + break; + case DRGN_MODULE_SHARED_LIBRARY: + case DRGN_MODULE_VDSO: + missing_loaded = "shared object file"; break; + default: + missing_loaded = "loaded file"; + break; + } } - return err; + const char *missing_debug; + switch (drgn_module_debug_file_status(module)) { + case DRGN_MODULE_FILE_WANT: + missing_debug = "debugging symbols"; + break; + case DRGN_MODULE_FILE_WANT_SUPPLEMENTARY: + missing_debug = "supplementary debugging symbols"; + break; + default: + missing_debug = ""; + break; + } + const char *name_extra = ""; + if (module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL + && drgn_module_kind(module) == DRGN_MODULE_MAIN) + name_extra = module->prog->vmcoreinfo.osrelease; + drgn_log_warning(module->prog, "missing %s%s%s for %s%s%s", missing_loaded, + missing_loaded[0] && missing_debug[0] ? " and ": "", + missing_debug, module->name, name_extra[0] ? " " : "", + name_extra); } -static struct drgn_error * -drgn_debug_info_update_index(struct drgn_debug_info_load_state *load) +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, + size_t n, bool load_default, bool load_main) { - if (drgn_module_vector_empty(&load->new_modules)) + struct drgn_error *err; + + if (n == 0 && !load_default && !load_main) { + // We don't have any files to try. Don't create any modules. return NULL; - struct drgn_debug_info *dbinfo = load->dbinfo; - if (!c_string_set_reserve(&dbinfo->module_names, - c_string_set_size(&dbinfo->module_names) - + drgn_module_vector_size(&load->new_modules))) - return &drgn_enomem; + } - struct drgn_dwarf_index_state index; - if (!drgn_dwarf_index_state_init(&index, dbinfo)) - return &drgn_enomem; - struct drgn_error *err = NULL; - #pragma omp parallel for schedule(dynamic) num_threads(drgn_num_threads) - for (size_t i = 0; i < drgn_module_vector_size(&load->new_modules); i++) { + drgn_blocking_guard(); + + const char *env = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); + unsigned int max_warnings = env ? atoi(env) : 5; + unsigned int num_missing = 0; + + drgn_log_debug(prog, "loading %sdebugging symbols", + load_default ? "default " : load_main ? "main " : ""); + + _cleanup_(load_debug_info_state_deinit) + struct load_debug_info_state state = { + .provided = HASH_TABLE_INIT, + }; + for (size_t i = 0; i < n; i++) { + err = load_debug_info_add_provided_file(prog, &state, paths[i]); if (err) - continue; - struct drgn_module *module = - *drgn_module_vector_at(&load->new_modules, i); - struct drgn_error *module_err = - drgn_debug_info_read_module(load, &index, module); - if (module_err) { - #pragma omp critical(drgn_debug_info_update_index_error) - if (err) - drgn_error_destroy(module_err); - else - err = module_err; - } + return err; } - if (!err) { - drgn_debug_info_free_modules(dbinfo, true, false); - err = drgn_dwarf_info_update_index(&index); + + if (load_debug_info_provided_table_empty(&state.provided) + && !load_default && !load_main) { + drgn_log_debug(prog, "no usable provided files"); + return NULL; } - drgn_dwarf_index_state_deinit(&index); - return err; -} -struct drgn_error * -drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load) -{ - struct drgn_debug_info *dbinfo = load->dbinfo; - my_dwfl_report_end(dbinfo, NULL, NULL); - struct drgn_error *err = drgn_debug_info_update_index(load); - dwfl_report_begin_add(dbinfo->dwfl); + uint64_t old_generation = prog->dbinfo.load_debug_info_generation; + + _cleanup_(drgn_module_iterator_destroyp) + struct drgn_module_iterator *it = NULL; + err = drgn_loaded_module_iterator_create(prog, &it); if (err) return err; - drgn_module_vector_clear(&load->new_modules); - return NULL; -} + it->for_load_debug_info = true; + VECTOR(drgn_module_vector, modules); + struct drgn_module *module; + while (!(err = drgn_module_iterator_next(it, &module, NULL)) && module) { + // Reset DONT_WANT to WANT. + if (module->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->loaded_file_status = DRGN_MODULE_FILE_WANT; + if (module->debug_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->debug_file_status = DRGN_MODULE_FILE_WANT; + + err = load_debug_info_try_provided_files(module, &state); + if (err) + return err; -static struct drgn_error * -drgn_debug_info_report_finalize_errors(struct drgn_debug_info_load_state *load) -{ - if (load->num_errors > load->max_errors && - (!string_builder_line_break(&load->errors) || - !string_builder_appendf(&load->errors, "... %u more", - load->num_errors - load->max_errors))) { - string_builder_deinit(&load->errors); - return &drgn_enomem; + if (drgn_module_wants_file(module) + && (load_default + || (load_main + && drgn_module_kind(module) == DRGN_MODULE_MAIN)) + && !drgn_module_vector_append(&modules, &module)) + return &drgn_enomem; } - if (load->num_errors) { - return drgn_error_from_string_builder(DRGN_ERROR_MISSING_DEBUG_INFO, - &load->errors); - } else { - return NULL; + if (err) + return err; + + struct drgn_module **wanted_modules = + drgn_module_vector_begin(&modules); + size_t num_wanted_modules = drgn_module_vector_size(&modules); + + // The module iterator may have tried to load debug info, so we need to + // check each module again. + if (num_wanted_modules > 0) { + uint64_t new_generation = + ++prog->dbinfo.load_debug_info_generation; + size_t new_num_wanted_modules = 0; + for (size_t i = 0; i < num_wanted_modules; i++) { + module = wanted_modules[i]; + if (module->load_debug_info_generation <= old_generation) { + // Reset DONT_WANT to WANT. + if (module->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->loaded_file_status = DRGN_MODULE_FILE_WANT; + if (module->debug_file_status == DRGN_MODULE_FILE_DONT_WANT) + module->debug_file_status = DRGN_MODULE_FILE_WANT; + if (drgn_module_wants_file(module)) { + wanted_modules[new_num_wanted_modules++] = module; + module->load_debug_info_generation = new_generation; + } + } else if (drgn_module_wants_file(module)) { + load_debug_info_log_missing(module, + max_warnings, + &num_missing); + } + } + num_wanted_modules = new_num_wanted_modules; } -} -struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, - const char **paths, size_t n, - bool load_default, bool load_main) -{ - struct drgn_program *prog = dbinfo->prog; - struct drgn_error *err; + if (num_wanted_modules > 0) { + uint64_t orig_supplementary_file_generation = + prog->dbinfo.supplementary_file_generation; + drgn_handler_list_for_each_enabled(struct drgn_debug_info_finder, + finder, + &prog->dbinfo.debug_info_finders) { + err = finder->ops.find(wanted_modules, + num_wanted_modules, finder->arg); + if (err) + return err; + size_t new_num_wanted_modules = 0; + for (size_t i = 0; i < num_wanted_modules; i++) { + module = wanted_modules[i]; + // If there are no more finders to try after + // this and a finder changed the wanted + // supplementary debug file, try to find a + // provided file for it one last time. + if (drgn_handler_is_last_enabled(&finder->handler) + && drgn_module_wanted_supplementary_debug_file_is_new(module, + orig_supplementary_file_generation)) { + err = load_debug_info_try_provided_supplementary_files(module, + &state); + if (err) + return err; + } + if (drgn_module_wants_file(module)) { + wanted_modules[new_num_wanted_modules++] = + module; + } + } + num_wanted_modules = new_num_wanted_modules; + if (num_wanted_modules == 0) + break; + } + } - if (load_default) - load_main = true; - - const char *max_errors = getenv("DRGN_MAX_DEBUG_INFO_ERRORS"); - struct drgn_debug_info_load_state load = { - .dbinfo = dbinfo, - .paths = paths, - .num_paths = n, - .load_default = load_default, - .load_main = load_main, - .new_modules = VECTOR_INIT, - .errors = STRING_BUILDER_INIT, - .max_errors = max_errors ? atoi(max_errors) : 5, - }; - dwfl_report_begin_add(dbinfo->dwfl); - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - err = linux_kernel_report_debug_info(&load); - else - err = userspace_report_debug_info(&load); - my_dwfl_report_end(dbinfo, NULL, NULL); - if (err) - goto err; - - /* - * userspace_report_debug_info() reports the main debugging information - * directly with libdwfl, so we need to report it to dbinfo. - */ - if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) && load_main && - dwfl_getmodules(dbinfo->dwfl, drgn_debug_info_report_dwfl_module, - &load, 0)) { - err = &drgn_enomem; - goto err; + if (state.unmatched_provided != 0) { + hash_table_for_each(load_debug_info_provided_table, pit, + &state.provided) { + if (!pit.entry->matched) { + vector_for_each(load_debug_info_file_vector, + file, &pit.entry->files) { + drgn_log_warning(prog, + "provided file %s did not match any loaded modules; ignoring", + file->path); + } + } + } } - err = drgn_debug_info_update_index(&load); - if (err) - goto err; + for (size_t i = 0; i < num_wanted_modules; i++) { + load_debug_info_log_missing(wanted_modules[i], max_warnings, + &num_missing); + } + if (num_missing > max_warnings) { + drgn_log_warning(prog, "... missing %u more", + num_missing - max_warnings); + } - /* - * TODO: for core dumps, we need to add memory reader segments for - * read-only segments of the loaded binaries since those aren't saved in - * the core dump. - */ + // Update the DWARF index eagerly, mostly because that's what we did + // back when we used libdwfl. We may want to remove this in the future. + err = drgn_dwarf_info_update_index(&prog->dbinfo); + if (err) + return err; - err = drgn_debug_info_report_finalize_errors(&load); -out: - drgn_module_vector_deinit(&load.new_modules); - return err; + if (num_missing > 0) { + return drgn_error_create(DRGN_ERROR_MISSING_DEBUG_INFO, + "missing some debugging symbols; see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html"); + } -err: - drgn_debug_info_free_modules(dbinfo, false, false); - string_builder_deinit(&load.errors); - goto out; + return NULL; } -struct elf_symbols_search_arg { - const char *name; - uint64_t address; - enum drgn_find_symbol_flags flags; +LIBDRGN_PUBLIC struct drgn_error * +drgn_load_module_debug_info(struct drgn_module **modules, size_t *num_modulesp) +{ struct drgn_error *err; - struct drgn_symbol_result_builder *builder; -}; -static bool elf_symbol_match(struct elf_symbols_search_arg *arg, GElf_Addr addr, - const GElf_Sym *sym, const char *name) -{ - if ((arg->flags & DRGN_FIND_SYMBOL_NAME) && strcmp(name, arg->name) != 0) - return false; - if ((arg->flags & DRGN_FIND_SYMBOL_ADDR) && - (arg->address < addr || arg->address >= addr + sym->st_size)) - return false; - return true; -} + const size_t orig_num_modules = *num_modulesp; + if (orig_num_modules == 0) + return NULL; -static bool elf_symbol_store_match(struct elf_symbols_search_arg *arg, - GElf_Sym *elf_sym, GElf_Addr addr, - const char *name) -{ - struct drgn_symbol *sym; - if (arg->flags == (DRGN_FIND_SYMBOL_ONE | DRGN_FIND_SYMBOL_NAME)) { - int binding = GELF_ST_BIND(elf_sym->st_info); - /* - * The order of precedence is - * GLOBAL = UNIQUE > WEAK > LOCAL = everything else - * - * If we found a global or unique symbol, return it - * immediately. If we found a weak symbol, then save it, - * which may overwrite a previously found weak or local - * symbol. Otherwise, save the symbol only if we haven't - * found another symbol. - */ - if (binding != STB_GLOBAL - && binding != STB_GNU_UNIQUE - && binding != STB_WEAK - && drgn_symbol_result_builder_count(arg->builder) > 0) - return false; - sym = malloc(sizeof(*sym)); - if (!sym) { - arg->err = &drgn_enomem; - return true; - } - drgn_symbol_from_elf(name, addr, elf_sym, sym); - if (!drgn_symbol_result_builder_add(arg->builder, sym)) { - arg->err = &drgn_enomem; - drgn_symbol_destroy(sym); - } + struct drgn_program *prog = modules[0]->prog; + drgn_log_debug(prog, "loading debugging symbols for %zu modules", + orig_num_modules); - /* Abort on error, or short-circuit if we found a global or - * unique symbol */ - return (arg->err || sym->binding == DRGN_SYMBOL_BINDING_GLOBAL - || sym->binding == DRGN_SYMBOL_BINDING_UNIQUE); - } else { - sym = malloc(sizeof(*sym)); - if (!sym) { - arg->err = &drgn_enomem; - return true; + size_t num_wanted_modules = 0; + for (size_t i = 0; i < orig_num_modules; i++) { + if (modules[i]->prog != prog) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "modules are from different programs"); } - drgn_symbol_from_elf(name, addr, elf_sym, sym); - if (!drgn_symbol_result_builder_add(arg->builder, sym)) { - arg->err = &drgn_enomem; - drgn_symbol_destroy(sym); + if (drgn_module_wants_file(modules[i])) { + modules[num_wanted_modules++] = modules[i]; + } else if (modules[i]->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT + || modules[i]->loaded_file_status == DRGN_MODULE_FILE_DONT_WANT) { + drgn_log_debug(prog, + "debugging symbols not wanted for %s", + modules[i]->name); + } else { + drgn_log_debug(prog, + "debugging symbols already loaded for %s", + modules[i]->name); } - /* Abort on error, or short-circuit for single lookup */ - return (arg->err || (arg->flags & DRGN_FIND_SYMBOL_ONE)); } -} + if (num_wanted_modules == 0) { + *num_modulesp = 0; + return NULL; + } -static int elf_symbols_search_cb(Dwfl_Module *dwfl_module, void **userdatap, - const char *module_name, Dwarf_Addr base, - void *cb_arg) -{ - struct elf_symbols_search_arg *arg = cb_arg; + uint64_t generation = ++prog->dbinfo.load_debug_info_generation; + for (size_t i = 0; i < num_wanted_modules; i++) + modules[i]->load_debug_info_generation = generation; - int symtab_len = dwfl_module_getsymtab(dwfl_module); - if (symtab_len == -1) - return DWARF_CB_OK; + drgn_blocking_guard(); - /* Ignore the zeroth null symbol */ - for (int i = 1; i < symtab_len; i++) { - GElf_Sym elf_sym; - GElf_Addr elf_addr; - const char *name = dwfl_module_getsym_info(dwfl_module, i, - &elf_sym, &elf_addr, - NULL, NULL, NULL); - if (!name || !elf_symbol_match(arg, elf_addr, &elf_sym, name)) - continue; - if (elf_symbol_store_match(arg, &elf_sym, elf_addr, name)) - return DWARF_CB_ABORT; + const size_t orig_num_wanted_modules = num_wanted_modules; + drgn_handler_list_for_each_enabled(struct drgn_debug_info_finder, + finder, + &prog->dbinfo.debug_info_finders) { + err = finder->ops.find(modules, num_wanted_modules, + finder->arg); + if (err) + return err; + size_t new_num_wanted_modules = 0; + for (size_t i = 0; i < num_wanted_modules; i++) { + if (drgn_module_wants_file(modules[i])) + modules[new_num_wanted_modules++] = modules[i]; + } + num_wanted_modules = new_num_wanted_modules; + if (num_wanted_modules == 0) + break; } - return DWARF_CB_OK; + drgn_log_debug(prog, "debugging symbols loaded for %zu/%zu modules", + orig_num_wanted_modules - num_wanted_modules, + orig_num_wanted_modules); + *num_modulesp = num_wanted_modules; + return NULL; } static struct drgn_error * -elf_symbols_search(const char *name, uint64_t addr, enum drgn_find_symbol_flags flags, - void *data, struct drgn_symbol_result_builder *builder) +elf_symbols_search(const char *name, uint64_t addr, + enum drgn_find_symbol_flags flags, void *data, + struct drgn_symbol_result_builder *builder) { - Dwfl_Module *dwfl_module = NULL; + struct drgn_error *err; struct drgn_program *prog = data; - struct elf_symbols_search_arg arg = { - .name = name, - .address = addr, - .flags = flags, - .err = NULL, - .builder = builder, - }; - if (arg.flags & DRGN_FIND_SYMBOL_ADDR) { - dwfl_module = dwfl_addrmodule(prog->dbinfo.dwfl, arg.address); - if (!dwfl_module) - return NULL; - } - - if ((arg.flags & (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) - == (DRGN_FIND_SYMBOL_ADDR | DRGN_FIND_SYMBOL_ONE)) { - GElf_Off offset; - GElf_Sym elf_sym; - const char *sym_name = dwfl_module_addrinfo(dwfl_module, addr, - &offset, &elf_sym, - NULL, NULL, NULL); - if (!sym_name) + if (flags & DRGN_FIND_SYMBOL_ADDR) { + struct drgn_module *module = + drgn_module_find_by_address(prog, addr); + if (!module) return NULL; - struct drgn_symbol *sym = malloc(sizeof(*sym)); - if (!sym) - return &drgn_enomem; - drgn_symbol_from_elf(sym_name, addr - offset, &elf_sym, sym); - if (!drgn_symbol_result_builder_add(builder, sym)) { - arg.err = &drgn_enomem; - drgn_symbol_destroy(sym); - } - } else if (dwfl_module) { - elf_symbols_search_cb(dwfl_module, NULL, NULL, 0, &arg); + return drgn_module_elf_symbols_search(module, name, addr, flags, + builder); } else { - dwfl_getmodules(prog->dbinfo.dwfl, elf_symbols_search_cb, &arg, 0); + hash_table_for_each(drgn_module_table, it, + &prog->dbinfo.modules) { + for (struct drgn_module *module = *it.entry; module; + module = module->next_same_name) { + err = drgn_module_elf_symbols_search(module, + name, addr, + flags, + builder); + if (err == &drgn_stop) + break; + if (err) + return err; + } + } + return NULL; } - return arg.err; -} - -bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, - const char *name) -{ - return c_string_set_search(&dbinfo->module_names, &name).entry != NULL; } void drgn_debug_info_init(struct drgn_debug_info *dbinfo, struct drgn_program *prog) { + elf_version(EV_CURRENT); dbinfo->prog = prog; - dbinfo->dwfl = dwfl_begin(&drgn_dwfl_callbacks); - // This is temporary until we stop using libdwfl, and is extremely - // unlikely to fail anwyays, so don't bother propagating an error up. - if (!dbinfo->dwfl) - abort(); + drgn_module_table_init(&dbinfo->modules); + drgn_module_address_tree_init(&dbinfo->modules_by_address); const struct drgn_type_finder_ops type_finder_ops = { .find = drgn_debug_info_find_type, }; @@ -2149,19 +5596,57 @@ void drgn_debug_info_init(struct drgn_debug_info *dbinfo, drgn_program_register_symbol_finder_impl(prog, &dbinfo->symbol_finder, "elf", &symbol_finder_ops, prog, 0); - drgn_module_table_init(&dbinfo->modules); - c_string_set_init(&dbinfo->module_names); + const struct drgn_debug_info_finder_ops + standard_debug_info_finder_ops = { + .find = drgn_standard_debug_info_find, + }; + drgn_program_register_debug_info_finder_impl(prog, + &dbinfo->standard_debug_info_finder, + "standard", + &standard_debug_info_finder_ops, + &dbinfo->options, 0); + drgn_debug_info_options_init(&dbinfo->options); +#if WITH_DEBUGINFOD + dbinfo->debuginfod_client = NULL; + if (drgn_have_debuginfod()) { + const struct drgn_debug_info_finder_ops + debuginfod_debug_info_finder_ops = { + .find = drgn_debuginfod_find, + }; + drgn_program_register_debug_info_finder_impl(prog, + &dbinfo->debuginfod_debug_info_finder, + "debuginfod", + &debuginfod_debug_info_finder_ops, + prog, + DRGN_HANDLER_REGISTER_ENABLE_LAST); + } +#endif drgn_dwarf_info_init(dbinfo); } void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo) { + free(dbinfo->map_files_segments); + drgn_debug_info_options_deinit(&dbinfo->options); +#if WITH_DEBUGINFOD + if (dbinfo->debuginfod_client) + drgn_debuginfod_end(dbinfo->debuginfod_client); +#endif + drgn_handler_list_deinit(struct drgn_debug_info_finder, finder, + &dbinfo->debug_info_finders, + if (finder->ops.destroy) + finder->ops.destroy(finder->arg); + ); drgn_dwarf_info_deinit(dbinfo); - c_string_set_deinit(&dbinfo->module_names); - drgn_debug_info_free_modules(dbinfo, false, true); - assert(drgn_module_table_empty(&dbinfo->modules)); + hash_table_for_each(drgn_module_table, it, &dbinfo->modules) { + struct drgn_module *module = *it.entry; + do { + struct drgn_module *next = module->next_same_name; + drgn_module_destroy(module); + module = next; + } while (module); + } drgn_module_table_deinit(&dbinfo->modules); - dwfl_end(dbinfo->dwfl); } struct drgn_elf_file *drgn_module_find_dwarf_file(struct drgn_module *module, @@ -2169,8 +5654,11 @@ struct drgn_elf_file *drgn_module_find_dwarf_file(struct drgn_module *module, { if (!module->debug_file) return NULL; - if (dwarf == module->debug_file->dwarf) + if (dwarf == module->debug_file->_dwarf) return module->debug_file; + if (module->supplementary_debug_file + && dwarf == module->supplementary_debug_file->_dwarf) + return module->supplementary_debug_file; struct drgn_elf_file_dwarf_table_iterator it = drgn_elf_file_dwarf_table_search(&module->split_dwarf_files, &dwarf); @@ -2183,15 +5671,11 @@ drgn_module_create_split_dwarf_file(struct drgn_module *module, struct drgn_elf_file **ret) { struct drgn_error *err; - err = drgn_elf_file_create(module, name, dwarf_getelf(dwarf), ret); + err = drgn_elf_file_create(module, name, -1, NULL, dwarf_getelf(dwarf), + ret); if (err) return err; - err = drgn_elf_file_precache_sections(*ret); - if (err) { - drgn_elf_file_destroy(*ret); - return err; - } - (*ret)->dwarf = dwarf; + (*ret)->_dwarf = dwarf; int r = drgn_elf_file_dwarf_table_insert(&module->split_dwarf_files, ret, NULL); if (r < 0) { @@ -2230,9 +5714,19 @@ drgn_module_find_cfi(struct drgn_program *prog, struct drgn_module *module, module->parsed_debug_frame = true; } if (!module->parsed_orc) { - err = drgn_module_parse_orc(module); + err = drgn_module_parse_orc(module, false); + if (err) + return err; + + // For some distributions, such as Fedora & derivatives, + // ORC sections are stripped from the debug file. Try + // using built-in ORC if nothing was loaded from the + // debug_file. + if (!module->orc.num_entries) + err = drgn_module_parse_orc(module, true); if (err) return err; + module->parsed_orc = true; } @@ -2275,137 +5769,21 @@ drgn_module_find_cfi(struct drgn_program *prog, struct drgn_module *module, if (err != &drgn_not_found) return err; } - return &drgn_not_found; -} - -#if !_ELFUTILS_PREREQ(0, 175) -static Elf *dwelf_elf_begin(int fd) -{ - return elf_begin(fd, ELF_C_READ_MMAP_PRIVATE, NULL); -} -#endif - -struct drgn_error *open_elf_file(const char *path, int *fd_ret, Elf **elf_ret) -{ - struct drgn_error *err; - - *fd_ret = open(path, O_RDONLY); - if (*fd_ret == -1) - return drgn_error_create_os("open", errno, path); - *elf_ret = dwelf_elf_begin(*fd_ret); - if (!*elf_ret) { - err = drgn_error_libelf(); - goto err_fd; - } - if (elf_kind(*elf_ret) != ELF_K_ELF) { - err = drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); - goto err_elf; - } - return NULL; - -err_elf: - elf_end(*elf_ret); -err_fd: - close(*fd_ret); - return err; -} - -struct drgn_error *find_elf_file(char **path_ret, int *fd_ret, Elf **elf_ret, - const char * const *path_formats, ...) -{ - struct drgn_error *err; - size_t i; - for (i = 0; path_formats[i]; i++) { - va_list ap; - int ret; - char *path; - int fd; - Elf *elf; - - va_start(ap, path_formats); - ret = vasprintf(&path, path_formats[i], ap); - va_end(ap); - if (ret == -1) - return &drgn_enomem; - fd = open(path, O_RDONLY); - if (fd == -1) { - free(path); - continue; - } - elf = dwelf_elf_begin(fd); - if (!elf) { - close(fd); - free(path); - continue; + if (!can_use_debug_file) { + if (!module->parsed_orc) { + err = drgn_module_parse_orc(module, true); + if (err) + return err; + module->parsed_orc = true; } - if (elf_kind(elf) != ELF_K_ELF) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s: not an ELF file", path); - elf_end(elf); - close(fd); - free(path); + *file_ret = NULL; + err = drgn_module_find_orc_cfi(module, pc, row_ret, + interrupted_ret, + ret_addr_regno_ret); + if (err != &drgn_not_found) return err; - } - *path_ret = path; - *fd_ret = fd; - *elf_ret = elf; - return NULL; - } - *path_ret = NULL; - *fd_ret = -1; - *elf_ret = NULL; - return NULL; -} - -/* - * Get the start address from the first loadable segment and the end address - * from the last loadable segment. - * - * The ELF specification states that loadable segments are sorted on p_vaddr. - * However, vmlinux on x86-64 has an out of order segment for .data..percpu, and - * Arm has a couple for .vector and .stubs. Thankfully, those are placed in the - * middle by the vmlinux linker script, so we can still rely on the first and - * last loadable segments. - */ -struct drgn_error *elf_address_range(Elf *elf, uint64_t bias, - uint64_t *start_ret, uint64_t *end_ret) -{ - size_t phnum; - if (elf_getphdrnum(elf, &phnum) != 0) - return drgn_error_libelf(); - - GElf_Phdr phdr_mem, *phdr; - size_t i; - for (i = 0; i < phnum; i++) { - phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type == PT_LOAD) { - uint64_t align = phdr->p_align ? phdr->p_align : 1; - *start_ret = (phdr->p_vaddr & -align) + bias; - break; - } - } - if (i >= phnum) { - /* There were no loadable segments. */ - *start_ret = *end_ret = 0; - return NULL; } - for (i = phnum; i-- > 0;) { - phdr = gelf_getphdr(elf, i, &phdr_mem); - if (!phdr) - return drgn_error_libelf(); - if (phdr->p_type == PT_LOAD) { - *end_ret = (phdr->p_vaddr + phdr->p_memsz) + bias; - if (*start_ret >= *end_ret) - *start_ret = *end_ret = 0; - return NULL; - } - } - /* We found a loadable segment earlier, so this shouldn't happen. */ - assert(!"PT_LOAD segment disappeared"); - *end_ret = 0; - return NULL; + return &drgn_not_found; } diff --git a/libdrgn/debug_info.h b/libdrgn/debug_info.h index 6d69a13dd..ad1e88846 100644 --- a/libdrgn/debug_info.h +++ b/libdrgn/debug_info.h @@ -12,13 +12,18 @@ #ifndef DRGN_DEBUG_INFO_H #define DRGN_DEBUG_INFO_H +#if WITH_DEBUGINFOD +#include +#endif #include -#include #include +#include "binary_search_tree.h" #include "cfi.h" +#include "debug_info_options.h" #include "drgn_internal.h" #include "dwarf_info.h" +#include "elf_symtab.h" #include "hash_table.h" #include "object.h" #include "orc_info.h" @@ -42,216 +47,333 @@ struct drgn_elf_file; * @{ */ -/** State of a @ref drgn_module. */ -enum drgn_module_state { - /** Reported but not indexed. */ - DRGN_DEBUG_INFO_MODULE_NEW, - /** Reported and will be indexed on success. */ - DRGN_DEBUG_INFO_MODULE_INDEXING, - /** Indexed. Must not be freed until @ref drgn_debug_info_destroy(). */ - DRGN_DEBUG_INFO_MODULE_INDEXED, -} __attribute__((__packed__)); +#if WITH_DEBUGINFOD +#if ENABLE_DLOPEN_DEBUGINFOD +bool drgn_have_debuginfod(void); +#else +static inline bool drgn_have_debuginfod(void) +{ + return true; +} +#endif +#else +static inline bool drgn_have_debuginfod(void) +{ + return false; +} +#endif DEFINE_HASH_TABLE_TYPE(drgn_elf_file_dwarf_table, struct drgn_elf_file *); +DEFINE_HASH_TABLE_TYPE(drgn_module_table, struct drgn_module *); +DEFINE_BINARY_SEARCH_TREE_TYPE(drgn_module_address_tree, + struct drgn_module_address_range); -/** - * A module reported to a @ref drgn_debug_info. - * - * Conceptually, a module is an ELF file loaded at a specific address range (or - * not loaded). - * - * Files are identified by canonical path and, if present, build ID. Each (path, - * address range) is uniquely represented by a @ref drgn_module. - */ -struct drgn_module { +struct drgn_debug_info_finder { + struct drgn_handler handler; + struct drgn_debug_info_finder_ops ops; + void *arg; +}; + +/** Cache of debugging information. */ +struct drgn_debug_info { + /** Program owning this cache. */ struct drgn_program *prog; - /** @c NULL if the module does not have a build ID. */ - const void *build_id; - /** Zero if the module does not have a build ID. */ - size_t build_id_len; - /** Load address range, or both 0 if not loaded. */ + struct drgn_type_finder type_finder; + struct drgn_object_finder object_finder; + struct drgn_symbol_finder symbol_finder; + + /** Main module. @c NULL if not created yet. */ + struct drgn_module *main_module; + /** + * Table of all modules indexed by name. + * + * Modules with the same name (which should be rare) are on a + * singly-linked list (@ref drgn_module::next_same_name). + */ + struct drgn_module_table modules; + /** + * Counter used to detect when @ref modules is modified during iteration + * of a @ref drgn_created_module_iterator. + */ + uint64_t modules_generation; + /** Tree of modules sorted by start address. */ + struct drgn_module_address_tree modules_by_address; + /** + * Singly-linked list of modules that need to have their DWARF + * information indexed. + */ + struct drgn_module *modules_pending_indexing; + /** DWARF debugging information. */ + struct drgn_dwarf_info dwarf; + + struct drgn_handler_list debug_info_finders; + struct drgn_debug_info_finder standard_debug_info_finder; + struct drgn_debug_info_options options; + /** + * Counter used to detect when loading debugging information is + * attempted. + * + * @sa drgn_module::load_debug_info_generation + */ + uint64_t load_debug_info_generation; + /** + * Counter used to detect when the wanted supplementary file for a + * module has changed. + * + * @sa drgn_module_wanted_supplementary_file::generation + */ + uint64_t supplementary_file_generation; + +#if WITH_DEBUGINFOD + struct drgn_debug_info_finder debuginfod_debug_info_finder; + /** debuginfod-client session. */ + debuginfod_client *debuginfod_client; + const char *debuginfod_current_name; + const char *debuginfod_current_type; + unsigned int debuginfod_spinner_position; + bool debuginfod_have_url; + bool logged_debuginfod_progress; +#endif + bool logged_no_debuginfod; + + /** + * Cache of entries in /proc/$pid/map_files used for finding loaded + * files. Populated the first time we need it or opportunistically when + * we parse /proc/$pid/maps. Rebuilt whenever we try to open an entry + * that no longer exists. + */ + struct drgn_map_files_segment *map_files_segments; + /** Number of segments in @ref map_files_segments. */ + size_t num_map_files_segments; +}; + +/** Initialize a @ref drgn_debug_info. */ +void drgn_debug_info_init(struct drgn_debug_info *dbinfo, + struct drgn_program *prog); + +/** Deinitialize a @ref drgn_debug_info. */ +void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo); + +typedef void drgn_module_iterator_destroy_fn(struct drgn_module_iterator *); +typedef struct drgn_error * +drgn_module_iterator_next_fn(struct drgn_module_iterator *, + struct drgn_module **, bool *); + +struct drgn_module_iterator { + struct drgn_program *prog; + drgn_module_iterator_destroy_fn *destroy; + drgn_module_iterator_next_fn *next; + bool for_load_debug_info; +}; + +static inline void +drgn_module_iterator_init(struct drgn_module_iterator *it, + struct drgn_program *prog, + drgn_module_iterator_destroy_fn *destroy, + drgn_module_iterator_next_fn *next) +{ + it->prog = prog; + it->destroy = destroy; + it->next = next; + it->for_load_debug_info = false; +} + +/** Bitmask of files in a @ref drgn_module. */ +enum drgn_module_file_mask { + DRGN_MODULE_FILE_MASK_LOADED = 1 << 0, + DRGN_MODULE_FILE_MASK_DEBUG = 1 << 1, +} __attribute__((__packed__)); + +DEFINE_HASH_MAP_TYPE(drgn_module_section_address_map, char *, uint64_t); + +struct drgn_module_address_range { + /** Node in @ref drgn_debug_info::modules_by_address. */ + struct binary_tree_node node; + /** Address range. */ uint64_t start, end; - /** Optional module name allocated with @c malloc(). */ + /** Module owning this range. */ + struct drgn_module *module; +}; + +struct drgn_module { + struct drgn_program *prog; + enum drgn_module_kind kind; + + /** Module name. */ char *name; + /** Kind-specific info. */ + uint64_t info; + + /** Next module with the same name in @ref drgn_debug_info::modules. */ + struct drgn_module *next_same_name; - Dwfl_Module *dwfl_module; - /** File that is loaded into the program. */ - struct drgn_elf_file *loaded_file; - /** File containing debugging information. */ - struct drgn_elf_file *debug_file; /** - * Difference between addresses in program and addresses in @ref - * drgn_module::loaded_file. + * Raw binary build ID. @c NULL if the module does not have a build ID. */ - uint64_t loaded_file_bias; + void *build_id; /** - * Difference between addresses in program and addresses in @ref - * drgn_module::debug_file. + * Length of @ref drgn_module::build_id in bytes. Zero if the module + * does not have a build ID. */ - uint64_t debug_file_bias; + size_t build_id_len; + /** + * Build ID as a null-terminated hexadecimal string. @c NULL if the + * module does not have a build ID. + * + * Used for logging and finding debugging information. + * + * This is allocated together with @ref drgn_module::build_id. + */ + char *build_id_str; + /** Load address ranges. @c NULL if not known yet. */ + struct drgn_module_address_range *address_ranges; + /** Number of ranges in @ref address_ranges. */ + size_t num_address_ranges; + /** + * Placeholder assigned to @ref address_ranges in two cases: + * + * 1. If @ref num_address_ranges is 1. This lets us avoid allocating the + * address ranges separately. This is a minor optimization for the + * common case, but more importantly, + * `drgn_module_maybe_use_elf_file()` can't handle @ref + * drgn_module_set_address_range() failing. + * 2. If the address range is known to be empty. This allows us to + * distinguish between that and the unknown case. + */ + struct drgn_module_address_range single_address_range; + struct drgn_elf_file *loaded_file; + struct drgn_elf_file *debug_file; + struct drgn_elf_file *supplementary_debug_file; + struct drgn_elf_file *gnu_debugdata_file; + /** Table mapping libdw handle to corresponding @ref drgn_elf_file. */ struct drgn_elf_file_dwarf_table split_dwarf_files; + uint64_t loaded_file_bias; + uint64_t debug_file_bias; + enum drgn_module_file_status loaded_file_status; + enum drgn_module_file_status debug_file_status; + enum drgn_supplementary_file_kind supplementary_debug_file_kind; /** DWARF debugging information. */ struct drgn_module_dwarf_info dwarf; /** ORC unwinder information. */ struct drgn_module_orc_info orc; + /** ELF symbol table. */ + struct drgn_elf_symbol_table elf_symtab; + /** Symbol table from the gnu_debugdata_file */ + struct drgn_elf_symbol_table gnu_debugdata_symtab; - /** Whether DWARF CFI from .debug_frame has been parsed. */ + /** Whether .debug_frame has been parsed. */ bool parsed_debug_frame; - /** Whether EH CFI from .eh_frame has been parsed. */ + /** Whether .eh_frame has been parsed. */ bool parsed_eh_frame; /** Whether ORC unwinder data has been parsed. */ bool parsed_orc; + /** Which files need to be checked for an ELF symbol table. */ + enum drgn_module_file_mask elf_symtab_pending_files; + /** + * Whether a full symbol table has been found (as opposed to a dynamic + * symbol table, which only contains a subset of symbols). + */ + bool have_full_symtab; - /* - * path, elf, and fd are used when an ELF file was reported with - * drgn_debug_info_report_elf() so we can report the file to libdwfl - * later. They are not valid after loading. + /** Mapping from section name to address. */ + struct drgn_module_section_address_map section_addresses; + /** + * Counter used to detect when @ref section_addresses is modified during + * iteration of a @ref drgn_module_section_address_iterator. */ - char *path; - Elf *elf; - int fd; - enum drgn_module_state state; - /** Error while loading. */ - struct drgn_error *err; + uint64_t section_addresses_generation; + /** - * Next module with same build ID and address range. + * Counter used to detect when loading debugging information is + * attempted. * - * There may be multiple files with the same build ID (e.g., a stripped - * binary and its corresponding separate debug info file). While - * loading, all files with the same build ID and address range are - * linked in a list. Only one is indexed; the rest are destroyed. + * @sa drgn_debug_info::load_debug_info_generation */ - struct drgn_module *next; + uint64_t load_debug_info_generation; + struct drgn_module_wanted_supplementary_file *wanted_supplementary_debug_file; + /** Node in @ref drgn_debug_info::modules_pending_indexing. */ + struct drgn_module *pending_indexing_next; + /** Object the module was created from */ + struct drgn_object object; }; -DEFINE_HASH_TABLE_TYPE(drgn_module_table, struct drgn_module *); +/** + * Delete a partially-initialized module. This can only be called before the + * module is returned from public API. + */ +void drgn_module_delete(struct drgn_module *module); -DEFINE_HASH_SET_TYPE(c_string_set, const char *); +static inline void drgn_module_deletep(struct drgn_module **modulep) +{ + if (*modulep) + drgn_module_delete(*modulep); +} -/** Cache of debugging information. */ -struct drgn_debug_info { - /** Program owning this cache. */ - struct drgn_program *prog; +// Binary index file generated by depmod(8). +struct depmod_index { + char *path; + void *addr; + size_t len; +}; - struct drgn_type_finder type_finder; - struct drgn_object_finder object_finder; - struct drgn_symbol_finder symbol_finder; +DEFINE_VECTOR_TYPE(char_p_vector, char *); - /** DWARF frontend library handle. */ - Dwfl *dwfl; - /** Modules keyed by build ID and address range. */ - struct drgn_module_table modules; - /** - * Names of indexed modules. - * - * The entries in this set are @ref drgn_module::name, so they should - * not be freed. - */ - struct c_string_set module_names; - /** DWARF debugging information. */ - struct drgn_dwarf_info dwarf; -}; +DEFINE_HASH_MAP_TYPE(drgn_kmod_walk_module_map, const char *, + struct char_p_vector); -/** Initialize a @ref drgn_debug_info. */ -void drgn_debug_info_init(struct drgn_debug_info *dbinfo, - struct drgn_program *prog); +DEFINE_VECTOR_TYPE(drgn_kmod_walk_stack, + struct drgn_kmod_walk_stack_entry); -/** Deinitialize a @ref drgn_debug_info. */ -void drgn_debug_info_deinit(struct drgn_debug_info *dbinfo); +struct drgn_kmod_walk_inode { + dev_t dev; + ino_t ino; +}; -DEFINE_VECTOR_TYPE(drgn_module_vector, struct drgn_module *); - -/** State tracked while loading debugging information. */ -struct drgn_debug_info_load_state { - struct drgn_debug_info * const dbinfo; - const char ** const paths; - const size_t num_paths; - const bool load_default; - const bool load_main; - /** Newly added modules to be indexed. */ - struct drgn_module_vector new_modules; - /** Formatted errors reported by @ref drgn_debug_info_report_error(). */ - struct string_builder errors; - /** Number of errors reported by @ref drgn_debug_info_report_error(). */ - unsigned int num_errors; - /** Maximum number of errors to report before truncating. */ - unsigned int max_errors; +DEFINE_HASH_SET_TYPE(drgn_kmod_walk_inode_set, struct drgn_kmod_walk_inode); + +struct drgn_kmod_walk_state { + struct drgn_kmod_walk_module_map modules; + struct drgn_kmod_walk_stack stack; + struct string_builder path; + struct drgn_kmod_walk_inode_set visited_dirs; + const char * const *next_kernel_dir; + const char * const *next_debug_dir; }; -/** - * Report a non-fatal error while loading debugging information. - * - * The error will be included in a @ref DRGN_ERROR_MISSING_DEBUG_INFO error - * returned by @ref drgn_debug_info_load(). - * - * @param[name] name An optional module name to prefix to the error message. - * @param[message] message An optional message with additional context to prefix - * to the error message. - * @param[err] err The error to report. This may be @c NULL if @p name and @p - * message provide sufficient information. This is destroyed on either success - * or failure. - * @return @c NULL on success, @ref drgn_enomem if the error could not be - * reported. - */ -struct drgn_error * -drgn_debug_info_report_error(struct drgn_debug_info_load_state *load, - const char *name, const char *message, - struct drgn_error *err); +// State kept by standard debug info finder for all modules it's working on. +// Currently it's only used to cache locations of Linux kernel loadable modules. +struct drgn_standard_debug_info_find_state { + struct drgn_module * const *modules; + size_t num_modules; + struct depmod_index modules_dep; + struct drgn_kmod_walk_state kmod_walk; +}; -/** - * Report a module to a @ref drgn_debug_info from an ELF file. - * - * This takes ownership of @p fd and @p elf on either success or failure. They - * should not be used (including closed or freed) after this returns. - * - * @param[in] path The path to the file. - * @param[in] fd A file descriptor referring to the file. - * @param[in] elf The Elf handle of the file. - * @param[in] start The (inclusive) start address of the loaded file, or 0 if - * the file is not loaded. - * @param[in] end The (exclusive) end address of the loaded file, or 0 if the - * file is not loaded. - * @param[in] name An optional name for the module. This is only used for @ref - * drgn_debug_info_is_indexed(). - * @param[out] new_ret Whether the module was newly created and reported. This - * is @c false if a module with the same build ID and address range was already - * loaded or a file with the same path and address range was already reported. - */ -struct drgn_error * -drgn_debug_info_report_elf(struct drgn_debug_info_load_state *load, - const char *path, int fd, Elf *elf, uint64_t start, - uint64_t end, const char *name, bool *new_ret); +void +drgn_standard_debug_info_find_state_deinit(struct drgn_standard_debug_info_find_state *state); -/** Index new debugging information and continue reporting. */ +// Always takes ownership of fd. Attempts to resolve the real path of path. struct drgn_error * -drgn_debug_info_report_flush(struct drgn_debug_info_load_state *load); +drgn_module_try_standard_file(struct drgn_module *module, + const struct drgn_debug_info_options *options, + const char *path, int fd, bool check_build_id, + const uint32_t *expected_crc); -/** - * Load debugging information. - * - * @sa drgn_program_load_debug_info - */ -struct drgn_error *drgn_debug_info_load(struct drgn_debug_info *dbinfo, - const char **paths, size_t n, - bool load_default, bool load_main); - -/** - * Return whether a @ref drgn_debug_info has indexed a module with the given - * name. - */ -bool drgn_debug_info_is_indexed(struct drgn_debug_info *dbinfo, - const char *name); +static inline bool drgn_module_wants_file(struct drgn_module *module) +{ + return drgn_module_wants_loaded_file(module) + || drgn_module_wants_debug_file(module); +} /** * Get the language of the program's `main` function or `NULL` if it could not * be found. */ -struct drgn_error * -drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, - const struct drgn_language **ret); +const struct drgn_language * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo); /** @ref drgn_type_finder_ops::find() that uses debugging information. */ struct drgn_error *drgn_debug_info_find_type(uint64_t kinds, const char *name, diff --git a/libdrgn/debug_info_options.c b/libdrgn/debug_info_options.c new file mode 100644 index 000000000..1ed1d83ad --- /dev/null +++ b/libdrgn/debug_info_options.c @@ -0,0 +1,326 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include + +#include "cleanup.h" +#include "debug_info_options.h" +#include "string_builder.h" +#include "util.h" + +static const char * const drgn_debug_info_options_default_directories[] = { + "/usr/lib/debug", NULL +}; +static const bool drgn_debug_info_options_directories_allow_empty = false; + +static const char * const drgn_debug_info_options_default_debug_link_directories[] = { + "$ORIGIN", "$ORIGIN/.debug", "", NULL +}; +static const bool drgn_debug_info_options_debug_link_directories_allow_empty = true; + +static const char * const drgn_debug_info_options_default_kernel_directories[] = { + "", NULL +}; +static const bool drgn_debug_info_options_kernel_directories_allow_empty = true; + +void drgn_debug_info_options_init(struct drgn_debug_info_options *options) +{ +#define LIST_OPTION(name) \ + options->name = drgn_debug_info_options_default_##name; +#define BOOL_OPTION(name, default_value) options->name = default_value; +#define ENUM_OPTION(name, type, default_value) options->name = default_value; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION +} + +static void drgn_debug_info_options_list_destroy(const char * const *list, + const char * const *default_list) +{ + if (list && list != default_list) { + for (size_t i = 0; list[i]; i++) + free((void *)list[i]); + free((void *)list); + } +} + +static void drgn_debug_info_options_listp_destroy(const char * const **listp) +{ + drgn_debug_info_options_list_destroy((const char * const *)*listp, + NULL); +} + +void drgn_debug_info_options_deinit(struct drgn_debug_info_options *options) +{ +#define LIST_OPTION(name) \ + drgn_debug_info_options_list_destroy(options->name, \ + drgn_debug_info_options_default_##name); +#define BOOL_OPTION(name, default_value) +#define ENUM_OPTION(name, type, default_value) + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_debug_info_options_create(struct drgn_debug_info_options **ret) +{ + struct drgn_debug_info_options *options = malloc(sizeof(*options)); + if (!options) + return &drgn_enomem; + drgn_debug_info_options_init(options); + *ret = options; + return NULL; +} + +LIBDRGN_PUBLIC void +drgn_debug_info_options_destroy(struct drgn_debug_info_options *options) +{ + if (options) { + drgn_debug_info_options_deinit(options); + free(options); + } +} + +static struct drgn_error * +drgn_debug_info_options_list_dup(const char * const *list, bool allow_empty, + const char * const **ret) +{ + size_t n = 0; + while (list[n]) { + if (!allow_empty && !list[n][0]) { + return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, + "string cannot be empty"); + } + n++; + } + char **copy = malloc_array(n + 1, sizeof(copy[0])); + if (!copy) + return &drgn_enomem; + for (size_t i = 0; i < n; i++) { + copy[i] = strdup(list[i]); + if (!copy[i]) { + for (size_t j = 0; j < i; j++) + free(copy[j]); + free(copy); + return &drgn_enomem; + } + } + copy[n] = NULL; + *ret = (const char * const *)copy; + return NULL; +} + +LIBDRGN_PUBLIC struct drgn_error * +drgn_debug_info_options_copy(struct drgn_debug_info_options *dst, + const struct drgn_debug_info_options *src) +{ + struct drgn_error *err; + if (dst == src) + return NULL; + + // Since copying any list could fail, make all of the copies first. + // Replace the default lists with NULL for now to avoid unnecessary + // copies and simplify cleanup. +#define LIST_OPTION(name) \ + _cleanup_(drgn_debug_info_options_listp_destroy) \ + const char * const *name##_copy = NULL; \ + if (src->name != drgn_debug_info_options_default_##name) { \ + err = drgn_debug_info_options_list_dup(src->name, true, \ + &name##_copy); \ + if (err) \ + return err; \ + } +#define BOOL_OPTION(name, default_value) +#define ENUM_OPTION(name, type, default_value) + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + + // Now we can set everything. +#define LIST_OPTION(name) \ + drgn_debug_info_options_list_destroy(dst->name, \ + drgn_debug_info_options_default_##name);\ + if (name##_copy) \ + dst->name = no_cleanup_ptr(name##_copy); \ + else \ + dst->name = drgn_debug_info_options_default_##name; +#define BOOL_OPTION(name, default_value) dst->name = src->name; +#define ENUM_OPTION(name, type, default_value) dst->name = src->name; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + return NULL; +} + +#define DRGN_DEBUG_INFO_OPTIONS_GET(type, name) \ +LIBDRGN_PUBLIC type \ +drgn_debug_info_options_get_##name(const struct drgn_debug_info_options *options)\ +{ \ + return options->name; \ +} + +#define DRGN_DEBUG_INFO_OPTIONS_GETSET(type, name) \ +DRGN_DEBUG_INFO_OPTIONS_GET(type, name) \ + \ +LIBDRGN_PUBLIC void \ +drgn_debug_info_options_set_##name(struct drgn_debug_info_options *options, \ + type value) \ +{ \ + options->name = value; \ +} + +#define LIST_OPTION(name) \ +DRGN_DEBUG_INFO_OPTIONS_GET(const char * const *, name) \ + \ +LIBDRGN_PUBLIC struct drgn_error * \ +drgn_debug_info_options_set_##name(struct drgn_debug_info_options *options, \ + const char * const *value) \ +{ \ + struct drgn_error *err; \ + const char * const *copy; \ + if (value == drgn_debug_info_options_default_##name) { \ + copy = value; \ + } else { \ + err = drgn_debug_info_options_list_dup(value, \ + drgn_debug_info_options_##name##_allow_empty,\ + ©); \ + if (err) \ + return err; \ + } \ + drgn_debug_info_options_list_destroy(options->name, \ + drgn_debug_info_options_default_##name);\ + options->name = copy; \ + return NULL; \ +} + +#define BOOL_OPTION(name, default_value) \ + DRGN_DEBUG_INFO_OPTIONS_GETSET(bool, name) +#define ENUM_OPTION(name, type, default_value) \ + DRGN_DEBUG_INFO_OPTIONS_GETSET(enum type, name) + +DRGN_DEBUG_INFO_OPTIONS + +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + +static bool drgn_format_debug_info_options_common(struct string_builder *sb, + const char *name, + bool *first) +{ + if (*first) + *first = false; + else if (!string_builder_append(sb, ", ")) + return false; + return string_builder_append(sb, name) && string_builder_appendc(sb, '='); +} + +static bool drgn_debug_info_options_lists_equal(const char * const *a, + const char * const *b) +{ + if (a == b) + return true; + size_t i; + for (i = 0; a[i]; i++) { + if (!b[i] || strcmp(a[i], b[i]) != 0) + return false; + } + return !b[i]; +} + +static bool drgn_format_debug_info_options_list(struct string_builder *sb, + const char *name, bool *first, + const char * const *list, + const char * const *default_list) +{ + // Always include directories, skip other options set to the default. + if (default_list != drgn_debug_info_options_default_directories + && drgn_debug_info_options_lists_equal(list, default_list)) + return true; + + if (!drgn_format_debug_info_options_common(sb, name, first) + || !string_builder_appendc(sb, '(')) + return false; + size_t i; + for (i = 0; list[i]; i++) { + if (!string_builder_append(sb, i == 0 ? "'" : ", '") + || !string_builder_append(sb, list[i]) + || !string_builder_appendc(sb, '\'')) + return false; + } + return string_builder_append(sb, i == 1 ? ",)" : ")"); +} + +static bool drgn_format_debug_info_options_bool(struct string_builder *sb, + const char *name, bool *first, + bool value, bool default_value) +{ + // Skip options set to the default. + if (value == default_value) + return true; + return drgn_format_debug_info_options_common(sb, name, first) + && string_builder_append(sb, value ? "True" : "False"); +} + +static bool +drgn_kmod_search_method_format(struct string_builder *sb, const char *name, + bool *first, enum drgn_kmod_search_method value, + enum drgn_kmod_search_method default_value) +{ + // Skip options set to the default. + if (value == default_value) + return true; + const char *s; + SWITCH_ENUM(value) { + case DRGN_KMOD_SEARCH_NONE: + s = "NONE"; + break; + case DRGN_KMOD_SEARCH_DEPMOD: + s = "DEPMOD"; + break; + case DRGN_KMOD_SEARCH_WALK: + s = "WALK"; + break; + case DRGN_KMOD_SEARCH_DEPMOD_OR_WALK: + s = "DEPMOD_OR_WALK"; + break; + case DRGN_KMOD_SEARCH_DEPMOD_AND_WALK: + s = "DEPMOD_AND_WALK"; + break; + default: + UNREACHABLE(); + } + return drgn_format_debug_info_options_common(sb, name, first) + && string_builder_append(sb, s); +} + +char *drgn_format_debug_info_options(struct drgn_debug_info_options *options) +{ + STRING_BUILDER(sb); + + bool first = true; +#define LIST_OPTION(name) \ + if (!drgn_format_debug_info_options_list(&sb, #name, &first, \ + options->name, \ + drgn_debug_info_options_default_##name))\ + return NULL; +#define BOOL_OPTION(name, default_value) \ + if (!drgn_format_debug_info_options_bool(&sb, #name, &first, \ + options->name, default_value)) \ + return NULL; +#define ENUM_OPTION(name, type, default_value) \ + if (!type##_format(&sb, #name, &first, options->name, default_value)) \ + return NULL; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + if (!string_builder_null_terminate(&sb)) + return NULL; + return string_builder_steal(&sb); +} diff --git a/libdrgn/debug_info_options.h b/libdrgn/debug_info_options.h new file mode 100644 index 000000000..4c0c6ffc3 --- /dev/null +++ b/libdrgn/debug_info_options.h @@ -0,0 +1,39 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef DRGN_DEBUG_INFO_OPTIONS_H +#define DRGN_DEBUG_INFO_OPTIONS_H + +#include "drgn_internal.h" + +// X macro expanding to all debug info options. +#define DRGN_DEBUG_INFO_OPTIONS \ + LIST_OPTION(directories) \ + BOOL_OPTION(try_module_name, true) \ + BOOL_OPTION(try_build_id, true) \ + LIST_OPTION(debug_link_directories) \ + BOOL_OPTION(try_debug_link, true) \ + BOOL_OPTION(try_procfs, true) \ + BOOL_OPTION(try_embedded_vdso, true) \ + BOOL_OPTION(try_reuse, true) \ + BOOL_OPTION(try_supplementary, true) \ + LIST_OPTION(kernel_directories) \ + ENUM_OPTION(try_kmod, drgn_kmod_search_method, \ + DRGN_KMOD_SEARCH_DEPMOD_OR_WALK) + +struct drgn_debug_info_options { +#define LIST_OPTION(name) const char * const *name; +#define BOOL_OPTION(name, default_value) bool name; +#define ENUM_OPTION(name, type, default_value) enum type name; + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION +}; + +void drgn_debug_info_options_init(struct drgn_debug_info_options *options); +void drgn_debug_info_options_deinit(struct drgn_debug_info_options *options); + +char *drgn_format_debug_info_options(struct drgn_debug_info_options *options); + +#endif /* DRGN_DEBUG_INFO_OPTIONS_H */ diff --git a/libdrgn/drgn.h b/libdrgn/drgn.h index f6438e913..f1010a930 100644 --- a/libdrgn/drgn.h +++ b/libdrgn/drgn.h @@ -44,7 +44,7 @@ /** Minor version of drgn. */ #define DRGN_VERSION_MINOR 0 /** Patch level of drgn. */ -#define DRGN_VERSION_PATCH 30 +#define DRGN_VERSION_PATCH 32 /** * @defgroup ErrorHandling Error handling @@ -819,19 +819,6 @@ struct drgn_error *drgn_program_set_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_set_pid(struct drgn_program *prog, pid_t pid); -/** - * Load debugging information for a list of executable or library files. - * - * @param[in] load_default Whether to also load debugging information which can - * automatically be determined from the program. This implies @p load_main. - * @param[in] load_main Whether to also load information for the main - * executable. - */ -struct drgn_error *drgn_program_load_debug_info(struct drgn_program *prog, - const char **paths, size_t n, - bool load_default, - bool load_main); - /** * Create a @ref drgn_program from a core dump file. * @@ -1199,6 +1186,792 @@ struct drgn_error *drgn_program_element_info(struct drgn_program *prog, /** @} */ +/** + * @defgroup Modules Modules + * + * Modules in a program and debugging information. + * + * @{ + */ + +/** + * An executable, library, or other binary file used by a program. + * + * Modules are uniquely identified by the combination of their kind (@ref + * drgn_module_kind()), name (@ref drgn_module_name()), and info (@ref + * drgn_module_info()). + */ +struct drgn_module; + +/** Kinds of modules. */ +enum drgn_module_kind { + /** + * Main module. For userspace programs, this is the executable. For the + * Linux kernel, this is `vmlinux`. + */ + DRGN_MODULE_MAIN, + /** Shared library (a.k.a. dynamic library or dynamic shared object). */ + DRGN_MODULE_SHARED_LIBRARY, + /** Virtual dynamic shared object (vDSO). */ + DRGN_MODULE_VDSO, + /** Relocatable object (e.g., Linux kernel loadable module). */ + DRGN_MODULE_RELOCATABLE, + /** Extra debugging information. */ + DRGN_MODULE_EXTRA, +} __attribute__((__packed__)); + +/** + * Find the created @ref drgn_module with the given @p name. + * + * If there are multiple modules with the given name, one is returned + * arbitrarily. + * + * @return Module, or @c NULL if not found. + */ +struct drgn_module *drgn_module_find_by_name(struct drgn_program *prog, + const char *name); + +/** + * Find the created @ref drgn_module containing the given @p address. + * + * @return Module, or @c NULL if not found. + */ +struct drgn_module *drgn_module_find_by_address(struct drgn_program *prog, + uint64_t address); + +/** + * Find the main module. + * + * @param[in] name Module name, or @c NULL to match any name. + */ +struct drgn_module *drgn_module_find_main(struct drgn_program *prog, + const char *name); + +/** + * Find the main module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error *drgn_module_find_or_create_main(struct drgn_program *prog, + const char *name, + struct drgn_module **ret, + bool *new_ret); + +/** Find a shared library module. */ +struct drgn_module *drgn_module_find_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address); + +/** + * Find a shared library module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_or_create_shared_library(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret); + +/** Find a vDSO module. */ +struct drgn_module *drgn_module_find_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address); + +/** + * Find a vDSO module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error *drgn_module_find_or_create_vdso(struct drgn_program *prog, + const char *name, + uint64_t dynamic_address, + struct drgn_module **ret, + bool *new_ret); + +/** Find a relocatable module. */ +struct drgn_module *drgn_module_find_relocatable(struct drgn_program *prog, + const char *name, + uint64_t address); + +/** + * Find a relocatable module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_or_create_relocatable(struct drgn_program *prog, + const char *name, uint64_t address, + struct drgn_module **ret, bool *new_ret); + +/** + * Find a created Linux kernel loadable module from a ``struct module *`` object. + */ +struct drgn_error * +drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_obj, + struct drgn_module **ret); + +/** + * Find a Linux kernel loadable module from a ``struct module *`` object, creating + * it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error * +drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *module_obj, + struct drgn_module **ret, + bool *new_ret); + +/** Find an extra module. */ +struct drgn_module *drgn_module_find_extra(struct drgn_program *prog, + const char *name, uint64_t id); + +/** + * Find an extra module, creating it if it doesn't already exist. + * + * @param[out] new_ret @c true if the module was newly created, @c false if it + * was found. + */ +struct drgn_error *drgn_module_find_or_create_extra(struct drgn_program *prog, + const char *name, + uint64_t id, + struct drgn_module **ret, + bool *new_ret); + +/** Get the program that a module is from. */ +struct drgn_program *drgn_module_program(const struct drgn_module *module); + +/** Get the kind of a module. */ +enum drgn_module_kind drgn_module_kind(const struct drgn_module *module); + +/** Get the name of a module. */ +const char *drgn_module_name(const struct drgn_module *module); + +/** + * Get the kind-specific info of a module. + * + * - For the main module, it is always 0. + * - For shared library and vDSO modules, it is the address of the dynamic + * section. + * - For relocatable modules, it is an address identifying the module (e.g., for + * Linux kernel loadable modules, it is the base address). + * - For extra modules, it is an arbitrary identification number. + */ +uint64_t drgn_module_info(const struct drgn_module *module); + +/** + * Get the number of address ranges where a module is loaded. + * + * @param[out] ret Returned number of address ranges (zero if address ranges are + * empty or not set). + * @return @c true on success (including if address ranges are empty), @c false + * if address ranges are not set. + */ +bool drgn_module_num_address_ranges(const struct drgn_module *module, + size_t *ret); + +/** + * Get the @p i-th address range where a module is loaded. + * + * @param[out] start_ret Minimum address (inclusive). + * @param[out] end_ret Maximum address (exclusive). + * @return @c true on success, @c false if @p i is out of bounds (i.e., if it is + * greater than @ref drgn_module_num_address_ranges()). + */ +bool drgn_module_address_range(const struct drgn_module *module, size_t i, + uint64_t *start_ret, uint64_t *end_ret); + +/** + * Set the address range of a module. + * + * This is equivalent to: + * + * ``` + * uint64_t range[2] = {start, end}; + * drgn_module_set_address_ranges(module, &range, 1); + * ``` + */ +struct drgn_error *drgn_module_set_address_range(struct drgn_module *module, + uint64_t start, uint64_t end); + +/** + * Set the address ranges of a module. + * + * @param[in] ranges Ranges to set. The first element of each range is the + * start. The second is the end. The start must be less than the end. This is + * copied, so it need not remain valid after this function returns. + * @param[in] num_ranges Number of ranges in @p ranges. + */ +struct drgn_error *drgn_module_set_address_ranges(struct drgn_module *module, + uint64_t ranges[][2], + size_t num_ranges); + +/** Unset the address ranges for a module. */ +void drgn_module_unset_address_ranges(struct drgn_module *module); + +/** Return whether a module's address ranges contain @p address. */ +bool drgn_module_contains_address(const struct drgn_module *module, + uint64_t address); + +/** + * Get the unique byte string (e.g., GNU build ID) identifying files used by + * a module. + * + * @param[out] raw_ret Returned raw build ID. @c NULL if not known. Valid until + * the build ID is changed. + * @param[out] raw_len_ret Size of returned build ID, in bytes. 0 if not known. + * @return Lowercase hexadecimal representation of build ID. @c NULL if not + * known. Valid until the build ID is changed. + */ +const char *drgn_module_build_id(const struct drgn_module *module, + const void **raw_ret, size_t *raw_len_ret); + +/** + * Set the unique byte string (e.g., GNU build ID) identifying files used by a + * module. + * + * @param[in] build_id New build ID. + * @param[in] build_id_len New size of build ID, in bytes. May be 0 to unset the + * build ID. + */ +struct drgn_error *drgn_module_set_build_id(struct drgn_module *module, + const void *build_id, + size_t build_id_len); + +/** Get the address of a section with the given name in a relocatable module. */ +struct drgn_error *drgn_module_get_section_address(struct drgn_module *module, + const char *name, + uint64_t *ret); + +/** + * Set the address of a section with the given name in a relocatable module. + * + * This is not allowed after a file has been assigned to the module. + */ +struct drgn_error *drgn_module_set_section_address(struct drgn_module *module, + const char *name, + uint64_t address); + +/** + * Unset the address of a section with the given name in a relocatable module. + * + * This is not allowed after a file has been assigned to the module. + */ +struct drgn_error *drgn_module_delete_section_address(struct drgn_module *module, + const char *name); + +/** + * Get the number of section addresses currently set in a relocatable module. + */ +struct drgn_error *drgn_module_num_section_addresses(struct drgn_module *module, + size_t *ret); + +/** Iterator over set section addresses in a relocatable module. */ +struct drgn_module_section_address_iterator; + +/** Create a @ref drgn_module_section_address_iterator. */ +struct drgn_error * +drgn_module_section_address_iterator_create(struct drgn_module *module, + struct drgn_module_section_address_iterator **ret); + +/** Destroy a @ref drgn_module_section_address_iterator. */ +void +drgn_module_section_address_iterator_destroy(struct drgn_module_section_address_iterator *it); + +/** Get the module that a @ref drgn_module_section_address_iterator is for. */ +struct drgn_module * +drgn_module_section_address_iterator_module(struct drgn_module_section_address_iterator *it); + +/** + * Get the next section name and address from a @ref + * drgn_module_section_address_iterator. + * + * @param[out] name_ret Returned name. Valid until the the next call to @ref + * drgn_module_section_address_iterator_next() or @ref + * drgn_module_section_address_iterator_destroy() on @it. + * @param[out] address_ret Returned address. + */ +struct drgn_error * +drgn_module_section_address_iterator_next(struct drgn_module_section_address_iterator *it, + const char **name_ret, + uint64_t *address_ret); + +/** Status of a file in a @ref drgn_module. */ +enum drgn_module_file_status { + /** File has not been found and should be searched for. */ + DRGN_MODULE_FILE_WANT, + /** File has already been found and assigned. */ + DRGN_MODULE_FILE_HAVE, + /** File has not been found, but it should not be searched for. */ + DRGN_MODULE_FILE_DONT_WANT, + /** File has not been found and is not needed. */ + DRGN_MODULE_FILE_DONT_NEED, + /** + * File has been found, but it requires a supplementary file before it + * can be used. + */ + DRGN_MODULE_FILE_WANT_SUPPLEMENTARY, +}; + +/** Kind of supplementary file. */ +enum drgn_supplementary_file_kind { + /** Not known or not needed. */ + DRGN_SUPPLEMENTARY_FILE_NONE, + /** + * GNU-style supplementary debug file referred to by a + * ``.gnu_debugaltlink`` section. + */ + DRGN_SUPPLEMENTARY_FILE_GNU_DEBUGALTLINK, +}; + +/** Get the status of a module's loaded file. */ +enum drgn_module_file_status +drgn_module_loaded_file_status(const struct drgn_module *module); + +/** Set the status of a module's loaded file. */ +bool drgn_module_set_loaded_file_status(struct drgn_module *module, + enum drgn_module_file_status status); + +/** + * Get whether a module wants a loaded file. + * + * For future-proofness, debug info finders should prefer this over comparing + * @ref drgn_module_loaded_file_status() directly. + */ +bool drgn_module_wants_loaded_file(const struct drgn_module *module); + +/** Get the absolute path of a module's loaded file, or @c NULL if not known. */ +const char *drgn_module_loaded_file_path(const struct drgn_module *module); + +/** + * Get the difference between the load address in the program and addresses in a + * module's loaded file. + */ +uint64_t drgn_module_loaded_file_bias(const struct drgn_module *module); + +enum drgn_module_file_status +drgn_module_debug_file_status(const struct drgn_module *module); + +bool drgn_module_set_debug_file_status(struct drgn_module *module, + enum drgn_module_file_status status); + +/** + * Get whether a module wants a debug file. + * + * For future-proofness, debug info finders should prefer this over comparing + * @ref drgn_module_debug_file_status() directly. + */ +bool drgn_module_wants_debug_file(const struct drgn_module *module); + +/** Get the absolute path of a module's debug file, or @c NULL if not known. */ +const char *drgn_module_debug_file_path(const struct drgn_module *module); + +/** + * Get the difference between the load address in the program and addresses in a + * module's debug file. + */ +uint64_t drgn_module_debug_file_bias(const struct drgn_module *module); + +/** Get the kind of a module's supplementary debug file. */ +enum drgn_supplementary_file_kind +drgn_module_supplementary_debug_file_kind(const struct drgn_module *module); + +/** + * Get the absolute path of a module's supplementary debug file, or @c NULL if + * not known or not needed. + */ +const char * +drgn_module_supplementary_debug_file_path(const struct drgn_module *module); + +/** + * Get information about the supplementary debug file that a module currently + * wants. + * + * @param[out] debug_file_path_ret Path of main file that wants the + * supplementary file. + * @param[out] supplementary_path_ret Path to supplementary file. This may be + * absolute or relative to @p debug_file_path_ret. + * @param[out] checksum_ret Unique identifier of the supplementary file. + * @param[out] checksum_len_ret Size of unique identifier, in bytes. + * @return Kind of supplementary file. + */ +enum drgn_supplementary_file_kind +drgn_module_wanted_supplementary_debug_file(struct drgn_module *module, + const char **debug_file_path_ret, + const char **supplementary_path_ret, + const void **checksum_ret, + size_t *checksum_len_ret); + +/** + * Return the object associated with this module. + * + * For some modules, there may be an object related to it. For example, drgn + * automatically identifies the Linux kernel `struct module *` associated with + * loadable modules, and associates it with them. Users may set or replace an + * associated object with @ref drgn_set_module_object(). + * + * @param[out] ret Initialized object where the module object is placed + */ +struct drgn_error * +drgn_module_object(const struct drgn_module *module, struct drgn_object *ret); + +/** + * Set the object associated with this module. + * @param[in] obj A new (or replacement) object for the module + */ +struct drgn_error * +drgn_module_set_object(struct drgn_module *module, const struct drgn_object *obj); + +/** Debugging information finder callback table. */ +struct drgn_debug_info_finder_ops { + /** + * Callback to destroy the debug info finder. + * + * This may be @c NULL. + * + * @param[in] arg Argument passed to @ref + * drgn_program_register_debug_info_finder(). + */ + void (*destroy)(void *arg); + /** + * Callback for finding debug info. + * + * @param[in] modules Array of modules that want debugging information. + * @param[in] num_modules Number of modules in @p modules. + * @param[in] arg Argument passed to @ref + * drgn_program_register_debug_info_finder(). + * @return @c NULL on success, non-@c NULL on error. It is not an error + * for some debugging information to not be found. + */ + struct drgn_error *(*find)(struct drgn_module * const *modules, + size_t num_modules, void *arg); +}; + +/** + * Register a debugging information finding callback. + * + * @param[in] name Finder name. This is copied. + * @param[in] ops Callback table. This is copied. + * @param[in] arg Argument to pass to callbacks. + * @param[in] enable_index Insert the finder into the list of enabled finders at + * the given index. If @ref DRGN_HANDLER_REGISTER_ENABLE_LAST or greater than + * the number of enabled finders, insert it at the end. If @ref + * DRGN_HANDLER_REGISTER_DONT_ENABLE, don’t enable the finder. + */ +struct drgn_error * +drgn_program_register_debug_info_finder(struct drgn_program *prog, + const char *name, + const struct drgn_debug_info_finder_ops *ops, + void *arg, size_t enable_index); + +/** + * Get the names of all registered debugging information finders. + * + * The order of the names is arbitrary. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error * +drgn_program_registered_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); + +/** + * Set the list of enabled debugging information finders. + * + * Finders are called in the same order as the list until all wanted files have + * been found. + * + * @param[in] names Names of finders to enable, in order. + * @param[in] count Number of names in @p names. + */ +struct drgn_error * +drgn_program_set_enabled_debug_info_finders(struct drgn_program *prog, + const char * const *names, + size_t count); + +/** + * Get the names of enabled debugging information finders, in order. + * + * @param[out] names_ret Returned array of names. + * @param[out] count_ret Returned number of names in @p names_ret. + */ +struct drgn_error * +drgn_program_enabled_debug_info_finders(struct drgn_program *prog, + const char ***names_ret, + size_t *count_ret); + +/** Options for debugging information searches. */ +struct drgn_debug_info_options; + +/** Create a @ref drgn_debug_info_options with the default settings. */ +struct drgn_error * +drgn_debug_info_options_create(struct drgn_debug_info_options **ret); + +/** Destroy a @ref drgn_debug_info_options. */ +void +drgn_debug_info_options_destroy(struct drgn_debug_info_options *options); + +/** Set all options in @p dst to the same as @p src. */ +struct drgn_error * +drgn_debug_info_options_copy(struct drgn_debug_info_options *dst, + const struct drgn_debug_info_options *src); + +/** + * Get the list of directories to search for debugging information files. + * + * @return Null-terminated list of directories. Valid until @ref + * drgn_debug_info_options_set_directories() or @ref + * drgn_debug_info_options_destroy() is called on @p options. + */ +const char * const * +drgn_debug_info_options_get_directories(const struct drgn_debug_info_options *options); + +/** + * Set the list of directories to search for debugging information files. + * + * @param[in] value Null-terminated list of directories. It is copied, so it + * need not remain valid after this function returns. + */ +struct drgn_error * +drgn_debug_info_options_set_directories(struct drgn_debug_info_options *options, + const char * const *value) + __attribute__((__nonnull__(1, 2))); + +/** Get whether to try module names that look like filesystem paths. */ +bool +drgn_debug_info_options_get_try_module_name(const struct drgn_debug_info_options *options); + +/** Set whether to try module names that look like filesystem paths. */ +void +drgn_debug_info_options_set_try_module_name(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try files by build ID. */ +bool +drgn_debug_info_options_get_try_build_id(const struct drgn_debug_info_options *options); + +/** Set whether to try files by build ID. */ +void +drgn_debug_info_options_set_try_build_id(struct drgn_debug_info_options *options, + bool value); + +/** + * Get the list of directories to search for by debug link. + * + * @return Null-terminated list of directories. Valid until @ref + * drgn_debug_info_options_set_debug_link_directories() or @ref + * drgn_debug_info_options_destroy() is called on @p options. + */ +const char * const * +drgn_debug_info_options_get_debug_link_directories(const struct drgn_debug_info_options *options); + +/** + * Set the list of directories to search for by debug link. + * + * @param[in] value Null-terminated list of directories. It is copied, so it + * need not remain valid after this function returns. + */ +struct drgn_error * +drgn_debug_info_options_set_debug_link_directories(struct drgn_debug_info_options *options, + const char * const *value) + __attribute__((__nonnull__(1, 2))); + +/** Get whether to try files by debug link. */ +bool +drgn_debug_info_options_get_try_debug_link(const struct drgn_debug_info_options *options); + +/** Set whether to try files by debug link. */ +void +drgn_debug_info_options_set_try_debug_link(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try files via procfs for local processes. */ +bool +drgn_debug_info_options_get_try_procfs(const struct drgn_debug_info_options *options); + +/** Set whether to try files via procfs for local processes. */ +void +drgn_debug_info_options_set_try_procfs(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try the vDSO embedded in a process's memory/core dump. */ +bool +drgn_debug_info_options_get_try_embedded_vdso(const struct drgn_debug_info_options *options); + +/** Set whether to try the vDSO embedded in a process's memory/core dump. */ +void +drgn_debug_info_options_set_try_embedded_vdso(struct drgn_debug_info_options *options, + bool value); + +/** + * Get whether to reuse a module's loaded file as its debug file or vice versa. + */ +bool +drgn_debug_info_options_get_try_reuse(const struct drgn_debug_info_options *options); + +/** + * Set whether to reuse a module's loaded file as its debug file or vice versa. + */ +void +drgn_debug_info_options_set_try_reuse(struct drgn_debug_info_options *options, + bool value); + +/** Get whether to try finding supplementary files. */ +bool +drgn_debug_info_options_get_try_supplementary(const struct drgn_debug_info_options *options); + +/** Set whether to try finding supplementary files. */ +void +drgn_debug_info_options_set_try_supplementary(struct drgn_debug_info_options *options, + bool value); + +/** + * Get the list of directories to search for kernel debugging information files. + * + * @return Null-terminated list of directories. Valid until @ref + * drgn_debug_info_options_set_kernel_directories() or @ref + * drgn_debug_info_options_destroy() is called on @p options. + */ +const char * const * +drgn_debug_info_options_get_kernel_directories(const struct drgn_debug_info_options *options); + +/** + * Set the list of directories to search for kernel debugging information files. + * + * @param[in] value Null-terminated list of directories. It is copied, so it + * need not remain valid after this function returns. + */ +struct drgn_error * +drgn_debug_info_options_set_kernel_directories(struct drgn_debug_info_options *options, + const char * const *value) + __attribute__((__nonnull__(1, 2))); + +/** Methods of searching for loadable kernel module debugging information. */ +enum drgn_kmod_search_method { + DRGN_KMOD_SEARCH_NONE, + DRGN_KMOD_SEARCH_DEPMOD, + DRGN_KMOD_SEARCH_WALK, + DRGN_KMOD_SEARCH_DEPMOD_OR_WALK, + DRGN_KMOD_SEARCH_DEPMOD_AND_WALK, +} __attribute__((__packed__)); + +/** Get how to search for loadable kernel module debugging information. */ +enum drgn_kmod_search_method +drgn_debug_info_options_get_try_kmod(const struct drgn_debug_info_options *options); + +/** Set how to search for loadable kernel module debugging information. */ +void +drgn_debug_info_options_set_try_kmod(struct drgn_debug_info_options *options, + enum drgn_kmod_search_method value); + +/** + * Get the default debugging information options for @p prog. + * + * @return Program options. May be modified as needed. Must not be passed to + * @ref drgn_debug_info_options_destroy(). + */ +struct drgn_debug_info_options * +drgn_program_debug_info_options(struct drgn_program *prog); + +/** + * Load debugging information for the given modules from the standard locations. + * + * @param[in] options Options to use, or @p NULL to use the program's default + * options. + */ +struct drgn_error * +drgn_find_standard_debug_info(struct drgn_module * const *modules, + size_t num_modules, + struct drgn_debug_info_options *options); + +/** + * Try to use the given file for a module. + * + * @param[in] path Path to file. + * @param[in] fd If nonnegative, an open file descriptor referring to the file. + * This always takes ownership of the file descriptor even if the file is not + * used or on error. + * @param[in] force If @c true, don't check whether the file matches the module. + */ +struct drgn_error * +drgn_module_try_file(struct drgn_module *module, const char *path, int fd, + bool force); + +/** Iterator over a set of modules. */ +struct drgn_module_iterator; + +/** Destroy a @ref drgn_module_iterator. */ +void +drgn_module_iterator_destroy(struct drgn_module_iterator *it); + +/** Get the program that a module iterator is from. */ +struct drgn_program * +drgn_module_iterator_program(const struct drgn_module_iterator *it); + +/** + * Get the next module in a module iterator. + * + * @param[out] ret Returned module, or @c NULL if there are no more modules. + * @param[out] new_ret Whether the module was newly created. May be @c NULL. + */ +struct drgn_error *drgn_module_iterator_next(struct drgn_module_iterator *it, + struct drgn_module **ret, + bool *new_ret); + +/** Create an iterator over created modules. */ +struct drgn_error * +drgn_created_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret); + +/** + * Create an iterator that determines what executables, libraries, etc. are + * loaded in the program and creates modules to represent them. + */ +struct drgn_error * +drgn_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret); + +/** + * Determine what executables, libraries, etc. are loaded in the program and + * create modules to represent them. + * + * This is a shortcut for creating an iterator with @ref + * drgn_loaded_module_iterator_create() and calling @ref + * drgn_module_iterator_next() until it is exhausted. + */ +struct drgn_error * +drgn_create_loaded_modules(struct drgn_program *prog); + +/** + * Load debugging information for the given set of files and/or modules. + * + * @param[in] load_default Whether to load all debugging information for all + * loaded modules. This implies @p load_main. + * @param[in] load_main Whether to load all debugging information for the main + * module. + */ +struct drgn_error *drgn_program_load_debug_info(struct drgn_program *prog, + const char **paths, size_t n, + bool load_default, + bool load_main); + +/** + * Load debugging information for the given modules using the enabled debugging + * information finders. + */ +struct drgn_error *drgn_load_module_debug_info(struct drgn_module **modules, + size_t *num_modules); + +/** @} */ + /** * @defgroup Logging Logging * @@ -1211,6 +1984,12 @@ struct drgn_error *drgn_program_element_info(struct drgn_program *prog, * By default, the log file is set to `stderr` and the log level is @ref * DRGN_LOG_NONE, so logging is disabled. * + * Additionally, drgn can display a progress bar for some operations, like + * downloading debugging information. By default, progress bars are displayed on + * standard error if standard error is a terminal, the log file is set to + * `stderr`, and the log level is less than or equal to @ref DRGN_LOG_WARNING, + * but this can be changed (@ref drgn_program_set_progress_file()). + * * @{ */ @@ -1282,71 +2061,12 @@ void drgn_program_get_log_callback(struct drgn_program *prog, drgn_log_fn **callback_ret, void **callback_arg_ret); -/** @} */ - -/** - * @defgroup Embedding Embedding - * - * Embedding drgn in another runtime. - * - * @{ - */ - /** - * Callback before a blocking operation. + * Write progress bars to the given file. * - * @param[in] arg @c callback_arg passed to @ref - * drgn_program_set_blocking_callback(). - * @return Opaque pointer to pass to @ref drgn_program_end_blocking_fn(). + * @param[in] file File, or @c NULL to disable progress bars. */ -typedef void *drgn_program_begin_blocking_fn(struct drgn_program *prog, - void *arg); - -/** - * Callback after a blocking operation. - * - * @param[in] arg @c callback_arg passed to @ref - * drgn_program_set_blocking_callback(). - * @param[in] state Return value of matching call to @ref - * drgn_program_begin_blocking_fn(). - */ -typedef void drgn_program_end_blocking_fn(struct drgn_program *prog, - void *arg, void *state); - -/** - * Set callbacks around blocking operations. - * - * These callbacks will be called around blocking I/O operations and - * long-running computations. They are intended for things like releasing the - * [global interpreter - * lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock). - * Calls to these callbacks may be nested, but they will always be matched. - * - * @param[in] begin_callback Callback called before a blocking operation. Can be - * @c NULL to unset. - * @param[in] end_callback Callback called after a blocking operation. Can be @c - * NULL to unset. - * @param[in] callback_arg Argument passed to @p begin_callback and @p - * end_callback. - */ -void -drgn_program_set_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn *begin_callback, - drgn_program_end_blocking_fn *end_callback, - void *callback_arg); - -/** - * Get callbacks set by @ref drgn_program_set_blocking_callback(). - * - * @param[out] begin_callback_ret Returned @c begin_callback. - * @param[out] end_callback_ret Returned @c end_callback. - * @param[out] callback_arg_ret Returned @c callback_arg. - */ -void -drgn_program_get_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn **begin_callback_ret, - drgn_program_end_blocking_fn **end_callback_ret, - void **callback_arg_ret); +void drgn_program_set_progress_file(struct drgn_program *prog, FILE *file); /** @} */ @@ -1528,6 +2248,16 @@ static inline bool drgn_value_is_inline(uint64_t bits) return bits <= CHAR_BIT * sizeof(((union drgn_value *)0)->ibuf); } +/** Reason object is absent. */ +enum drgn_absence_reason { + /** Another reason not listed below. */ + DRGN_ABSENCE_REASON_OTHER, + /** Object was optimized out by the compiler. */ + DRGN_ABSENCE_REASON_OPTIMIZED_OUT, + /** Encountered unknown debugging information. */ + DRGN_ABSENCE_REASON_NOT_IMPLEMENTED, +}; + /** * Object in a program. * @@ -1584,6 +2314,8 @@ struct drgn_object { union drgn_value value; /** Address of reference object. */ uint64_t address; + /** Reason object is absent. */ + enum drgn_absence_reason absence_reason; }; }; @@ -1774,6 +2506,7 @@ drgn_object_set_reference(struct drgn_object *res, * * @param[out] res Object to set. * @param[in] qualified_type Type to set to. + * @param[in] reason Reason object is absent. * @param[in] bit_field_size If the object should be a bit field, its size in * bits. Otherwise, 0. * @return @c NULL on success, non-@c NULL on error. @@ -1781,6 +2514,7 @@ drgn_object_set_reference(struct drgn_object *res, struct drgn_error * drgn_object_set_absent(struct drgn_object *res, struct drgn_qualified_type qualified_type, + enum drgn_absence_reason reason, uint64_t bit_field_size); /** @@ -1854,7 +2588,7 @@ struct drgn_error *drgn_object_copy(struct drgn_object *res, const struct drgn_object *obj); /** - * Get a @ref drgn_object from a "slice" of an object. + * Get a @ref drgn_object from a "fragment" of an object. * * This is a low-level interface used to implement @ref drgn_object_subscript(), * @ref drgn_object_member(), and @ref drgn_object_reinterpret(). Those @@ -1879,11 +2613,11 @@ struct drgn_error *drgn_object_copy(struct drgn_object *res, * bits. Otherwise, 0. * @return @c NULL on success, non-@c NULL on error. */ -struct drgn_error *drgn_object_slice(struct drgn_object *res, - const struct drgn_object *obj, - struct drgn_qualified_type qualified_type, - uint64_t bit_offset, - uint64_t bit_field_size); +struct drgn_error *drgn_object_fragment(struct drgn_object *res, + const struct drgn_object *obj, + struct drgn_qualified_type qualified_type, + uint64_t bit_offset, + uint64_t bit_field_size); /** * Get a @ref drgn_object from dereferencing a pointer object with an offset. @@ -1893,7 +2627,7 @@ struct drgn_error *drgn_object_slice(struct drgn_object *res, * convenient, but this function can be more efficient if accessing multiple * elements or the same member multiple times. * - * @sa drgn_object_slice + * @sa drgn_object_fragment * * @param[out] res Dereferenced object. * @param[in] obj Pointer object. @@ -3177,13 +3911,24 @@ bool drgn_stack_frame_interrupted(struct drgn_stack_trace *trace, size_t frame); struct drgn_error *drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret); +/** + * Get the best available name for a stack frame. + * + * @param[out] ret Returned name. On success, it must be freed with @c free(). + * On error, it is not modified. + * @return @c NULL on success, non-@c NULL on error. + */ +struct drgn_error *drgn_stack_frame_name(struct drgn_stack_trace *trace, + size_t frame, char **ret); + /** * Get the name of the function at a stack frame. * * @return Function name. This is valid until the stack trace is destroyed; it * should not be freed. @c NULL if the name could not be determined. */ -const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, size_t frame); +const char *drgn_stack_frame_function_name(struct drgn_stack_trace *trace, + size_t frame); /** Return whether a stack frame is for an inlined call. */ bool drgn_stack_frame_is_inline(struct drgn_stack_trace *trace, size_t frame); diff --git a/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch b/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch index 545950220..a9d8dff07 100644 --- a/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch +++ b/libdrgn/drgn_program_parse_vmcoreinfo.inc.strswitch @@ -18,6 +18,34 @@ static struct drgn_error *parse_vmcoreinfo_u64(const char *value, return NULL; } +// Linux kernel commit 905415ff3ffb ("lib/buildid: harden build ID parsing +// logic") (in v6.12) contains a bug that results in a garbage build ID in +// VMCOREINFO. It was fixed in the same patch series in commits de3ec364c3c3 +// ("lib/buildid: add single folio-based file reader abstraction") and +// d4deb8242341 ("lib/buildid: take into account e_phoff when fetching program +// headers"). However, the broken commit was backported to several stable +// kernels. Some branches were fixed by "lib/buildid: Fix build ID parsing +// logic", but a couple reached their end-of-life while broken. See +// https://lore.kernel.org/all/20241104175256.2327164-1-jolsa@kernel.org/. +// +// The very sad workaround is to ignore the build ID based on a version check. +static void ignore_broken_vmcoreinfo_build_id(struct drgn_program *prog) +{ + char *p = (char *)prog->vmcoreinfo.osrelease; + long major = strtol(p, &p, 10), minor = 0, patch = 0; + if (*p == '.') { + minor = strtol(p + 1, &p, 10); + if (*p == '.') + patch = strtol(p + 1, NULL, 10); + } + if ((major == 6 && minor == 11 && patch >= 3 && patch < 10) + || (major == 6 && minor == 10 && patch >= 14) + || (major == 6 && minor == 6 && patch >= 55 && patch < 63) + || (major == 6 && minor == 1 && patch >= 113 && patch < 119) + || (major == 5 && minor == 15 && patch >= 168)) + prog->vmcoreinfo.build_id_len = 0; +} + struct drgn_error *drgn_program_parse_vmcoreinfo(struct drgn_program *prog, const char *desc, size_t descsz) @@ -37,6 +65,21 @@ struct drgn_error *drgn_program_parse_vmcoreinfo(struct drgn_program *prog, const char *value = equals + 1; @memswitch (line, equals - line)@ + @case "BUILD-ID"@ + { + size_t build_id_len = (newline - value) / 2; + if (build_id_len > sizeof(prog->vmcoreinfo.build_id)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "BUILD-ID in VMCOREINFO is too long"); + } + if (!unhexlify(value, newline - value, + &prog->vmcoreinfo.build_id)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't parse BUILD-ID in VMCOREINFO"); + } + prog->vmcoreinfo.build_id_len = build_id_len; + break; + } @case "CRASHTIME"@ prog->vmcoreinfo.have_crashtime = true; break; @@ -119,6 +162,7 @@ struct drgn_error *drgn_program_parse_vmcoreinfo(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "VMCOREINFO does not contain valid OSRELEASE"); } + ignore_broken_vmcoreinfo_build_id(prog); if (!is_power_of_two(prog->vmcoreinfo.page_size)) { return drgn_error_create(DRGN_ERROR_OTHER, "VMCOREINFO does not contain valid PAGESIZE"); diff --git a/libdrgn/dwarf_info.c b/libdrgn/dwarf_info.c index 7725e781c..c27a12771 100644 --- a/libdrgn/dwarf_info.c +++ b/libdrgn/dwarf_info.c @@ -137,7 +137,16 @@ struct drgn_dwarf_index_cu { Dwarf_CU *libdw_cu; }; +/** Indexed CU lookup table entry. */ +struct drgn_dwarf_index_cu_lookup { + /** Address of CU data (@ref drgn_dwarf_index_cu::buf). */ + uintptr_t buf; + /** Index of CU in @ref drgn_dwarf_info::index_cus. */ + size_t index; +}; + DEFINE_VECTOR_FUNCTIONS(drgn_dwarf_index_cu_vector); +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); DEFINE_HASH_MAP_FUNCTIONS(drgn_dwarf_type_map, ptr_key_hash_pair, scalar_key_eq); @@ -164,13 +173,11 @@ drgn_namespace_dwarf_index_deinit(struct drgn_namespace_dwarf_index *dindex) { drgn_error_destroy(dindex->saved_err); array_for_each(tag_map, dindex->map) { - for (auto it = drgn_dwarf_index_die_map_first(tag_map); it.entry; - it = drgn_dwarf_index_die_map_next(it)) + hash_table_for_each(drgn_dwarf_index_die_map, it, tag_map) drgn_dwarf_index_die_vector_deinit(&it.entry->value); drgn_dwarf_index_die_map_deinit(tag_map); } - for (auto it = drgn_namespace_table_first(&dindex->children); it.entry; - it = drgn_namespace_table_next(it)) { + hash_table_for_each(drgn_namespace_table, it, &dindex->children) { drgn_namespace_dwarf_index_deinit(*it.entry); free(*it.entry); } @@ -185,6 +192,7 @@ void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo) dbinfo->dwarf.global.parent = NULL; drgn_dwarf_base_type_map_init(&dbinfo->dwarf.base_types); drgn_dwarf_specification_map_init(&dbinfo->dwarf.specifications); + free(dbinfo->dwarf.index_cu_lookup); drgn_dwarf_index_cu_vector_init(&dbinfo->dwarf.index_cus); drgn_dwarf_type_map_init(&dbinfo->dwarf.types); drgn_dwarf_type_map_init(&dbinfo->dwarf.cant_be_incomplete_array_types); @@ -260,13 +268,11 @@ static inline struct drgn_error *drgn_check_address_size(uint8_t address_size) * friendly), which is important for the tight DIE parsing loop. */ enum drgn_dwarf_index_abbrev_insn { - /* - * Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to - * be skipped over. - */ + // Instructions > 0 and <= INSN_MAX_SKIP indicate a number of bytes to + // be skipped over. INSN_MAX_SKIP = 219, - /* These instructions indicate an attribute that can be skipped over. */ + // These instructions indicate an attribute that can be skipped over. INSN_SKIP_BLOCK, INSN_SKIP_BLOCK1, INSN_SKIP_BLOCK2, @@ -274,7 +280,7 @@ enum drgn_dwarf_index_abbrev_insn { INSN_SKIP_LEB128, INSN_SKIP_STRING, - /* These instructions indicate an attribute that should be parsed. */ + // These instructions indicate an attribute that should be parsed. INSN_SIBLING_REF1, INSN_SIBLING_REF2, INSN_SIBLING_REF4, @@ -291,6 +297,8 @@ enum drgn_dwarf_index_abbrev_insn { INSN_NAME_STRP_ALT4, INSN_NAME_STRP_ALT8, INSN_DECLARATION_FLAG, + // "Specification" is overloaded to mean DW_AT_specification, + // DW_AT_abstract_origin, or DW_AT_import. INSN_SPECIFICATION_REF1, INSN_SPECIFICATION_REF2, INSN_SPECIFICATION_REF4, @@ -308,36 +316,34 @@ enum drgn_dwarf_index_abbrev_insn { NUM_INSNS, - /* - * Every sequence of instructions for a DIE is terminated by a zero - * byte. - */ + // Every sequence of instructions for a DIE is terminated by a zero + // byte. INSN_END = 0, - /* - * The byte after INSN_END contains the DIE flags, which are a bitmask - * of flags combined with the drgn_dwarf_index_tag. - */ + // The byte after INSN_END contains the DIE flags, which are a bitmask + // of flags combined with the tag (either a drgn_dwarf_index_tag or one + // of the special INSN_DIE_TAG_ tags below). INSN_DIE_FLAG_TAG_MASK = 0x1f, - /* - * DIE has a DW_AT_inline attribute (which may be DW_INL_not_inlined or - * DW_INL_declared_not_inlined). We use this to decide whether to look - * for a concrete out-of-line instance of an abstract instance root, so - * false positives are okay. - */ - INSN_DIE_FLAG_MAYBE_INLINED = 0x20, - /* DIE is a declaration. */ + + // Tags that need special handling but don't need to be indexed + // themselves. + INSN_DIE_TAG_imported_unit = DRGN_DWARF_INDEX_NUM_TAGS, + INSN_DIE_NUM_TAGS, + + // DIE is DW_TAG_subprogram with no DW_AT_low_pc or DW_AT_ranges. + INSN_DIE_FLAG_SUBPROGRAM_NO_PC = 0x20, + // DIE is a declaration. INSN_DIE_FLAG_DECLARATION = 0x40, - /* DIE has children. */ + // DIE has children. INSN_DIE_FLAG_CHILDREN = 0x80, }; // We use INSN_DIE_FLAG_TAG_MASK as a sentinel when the DIE shouldn't be // indexed, so this is < and not <=. -static_assert((int)DRGN_DWARF_INDEX_NUM_TAGS < (int)INSN_DIE_FLAG_TAG_MASK, +static_assert((int)INSN_DIE_NUM_TAGS < (int)INSN_DIE_FLAG_TAG_MASK, "too many instruction DIE tags"); -/* Instructions are 8 bits. */ +// Instructions are 8 bits. static_assert(NUM_INSNS - 1 == UINT8_MAX, "maximum DWARF index instruction is invalid"); @@ -348,6 +354,8 @@ DEFINE_VECTOR(uint64_vector, uint64_t); struct drgn_dwarf_index_cu_buffer { struct binary_buffer bb; struct drgn_dwarf_index_cu *cu; + // Depth of current DIE relative to starting DIE, which has depth 0. + unsigned int depth; }; static struct drgn_error * @@ -370,26 +378,28 @@ drgn_dwarf_index_cu_buffer_init(struct drgn_dwarf_index_cu_buffer *buffer, drgn_elf_file_is_little_endian(cu->file), drgn_dwarf_index_cu_buffer_error); buffer->cu = cu; + buffer->depth = 0; } -bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, - struct drgn_debug_info *dbinfo) -{ - state->dbinfo = dbinfo; - drgn_init_num_threads(); - state->cus = malloc_array(drgn_num_threads, sizeof(*state->cus)); - if (!state->cus) - return false; - for (int i = 0; i < drgn_num_threads; i++) - drgn_dwarf_index_cu_vector_init(&state->cus[i]); - return true; -} - -void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state) +// Returns NULL if die_addr is not from an indexed CU. +static struct drgn_dwarf_index_cu * +drgn_dwarf_index_find_cu(struct drgn_debug_info *dbinfo, uintptr_t die_addr) { - for (int i = 0; i < drgn_num_threads; i++) - drgn_dwarf_index_cu_vector_deinit(&state->cus[i]); - free(state->cus); + struct drgn_dwarf_index_cu_lookup *lookup = + dbinfo->dwarf.index_cu_lookup; + #define less_than_cu_lookup_buf(a, b) (*(a) < (b)->buf) + size_t i = binary_search_gt(lookup, + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), + &die_addr, less_than_cu_lookup_buf); + #undef less_than_cu_buf + if (i == 0) + return NULL; + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, + lookup[i - 1].index); + if (die_addr - lookup[i - 1].buf >= cu->len) + return NULL; + return cu; } static const char *drgn_dwarf_dwo_name(Dwarf_Die *die) @@ -402,14 +412,22 @@ static const char *drgn_dwarf_dwo_name(Dwarf_Die *die) } static struct drgn_error * -drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, - struct drgn_elf_file *file, - enum drgn_section_index scn) +drgn_dwarf_index_read_file(struct drgn_elf_file *file, + struct drgn_dwarf_index_cu_vector *cus, + struct drgn_dwarf_index_cu_vector *partial_units); + +static struct drgn_error * +drgn_dwarf_index_read_cus(struct drgn_elf_file *file, + enum drgn_section_index scn, + struct drgn_dwarf_index_cu_vector *cus, + struct drgn_dwarf_index_cu_vector *partial_units) { struct drgn_error *err; - struct drgn_dwarf_index_cu_vector *cus = - &state->cus[omp_get_thread_num()]; + Dwarf *dwarf; + err = drgn_elf_file_get_dwarf(file, &dwarf); + if (err) + return err; Dwarf_Off off, next_off; size_t header_size; Dwarf_Half version; @@ -421,19 +439,18 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, scn == DRGN_SCN_DEBUG_TYPES ? &v4_type_signature : NULL; int ret; for (off = 0; - (ret = dwarf_next_unit(file->dwarf, off, &next_off, &header_size, + (ret = dwarf_next_unit(dwarf, off, &next_off, &header_size, &version, &abbrev_offset, &address_size, &offset_size, v4_type_signaturep, NULL)) == 0; off = next_off) { Dwarf_Die cudie; if (scn == DRGN_SCN_DEBUG_TYPES) { - if (!dwarf_offdie_types(file->dwarf, off + header_size, + if (!dwarf_offdie_types(dwarf, off + header_size, &cudie)) return drgn_error_libdw(); } else { - if (!dwarf_offdie(file->dwarf, off + header_size, - &cudie)) + if (!dwarf_offdie(dwarf, off + header_size, &cudie)) return drgn_error_libdw(); } uint8_t unit_type; @@ -459,18 +476,19 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, &split_file); if (err) return err; - err = drgn_dwarf_index_read_file(state, - split_file); + err = drgn_dwarf_index_read_file(split_file, + cus, + partial_units); if (err) return err; } continue; } else if (unit_type == DW_UT_skeleton) { - if (drgn_log_is_enabled(state->dbinfo->prog, + if (drgn_log_is_enabled(file->module->prog, DRGN_LOG_WARNING)) { const char *dwo_name = drgn_dwarf_dwo_name(&cudie); - drgn_log_warning(state->dbinfo->prog, + drgn_log_warning(file->module->prog, "%s: split DWARF file%s%s not found", file->path ?: "", dwo_name ? " " : "", @@ -485,8 +503,17 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, abbrev_offset += dwp_offset; } #else - unit_type = (scn == DRGN_SCN_DEBUG_TYPES - ? DW_UT_type : DW_UT_compile); + switch (dwarf_tag(&cudie)) { + case DW_TAG_type_unit: + unit_type = DW_UT_type; + break; + case DW_TAG_partial_unit: + unit_type = DW_UT_partial; + break; + default: + unit_type = DW_UT_compile; + break; + } #endif if (!elf_data_contains_ptr(file->scn_data[scn], @@ -570,7 +597,8 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, } struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_append_entry(cus); + drgn_dwarf_index_cu_vector_append_entry(unit_type == DW_UT_partial + ? partial_units : cus); if (!cu) return &drgn_enomem; *cu = (struct drgn_dwarf_index_cu){ @@ -592,15 +620,41 @@ drgn_dwarf_index_read_cus(struct drgn_dwarf_index_state *state, return NULL; } -struct drgn_error * -drgn_dwarf_index_read_file(struct drgn_dwarf_index_state *state, - struct drgn_elf_file *file) +static struct drgn_error * +drgn_dwarf_index_read_file(struct drgn_elf_file *file, + struct drgn_dwarf_index_cu_vector *cus, + struct drgn_dwarf_index_cu_vector *partial_units) { struct drgn_error *err; - err = drgn_dwarf_index_read_cus(state, file, DRGN_SCN_DEBUG_INFO); - if (!err && file->scn_data[DRGN_SCN_DEBUG_TYPES]) { - err = drgn_dwarf_index_read_cus(state, file, - DRGN_SCN_DEBUG_TYPES); + + for (int scn = 0; scn < DRGN_SECTION_INDEX_NUM_DWARF_INDEX; scn++) { + if (file->scns[scn]) { + Elf_Data *data; + err = drgn_elf_file_read_section(file, scn, &data); + if (err) + return err; + } + } + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_INFO, cus, + partial_units); + if (err) + return err; + if (file->scns[DRGN_SCN_DEBUG_TYPES]) { + err = drgn_dwarf_index_read_cus(file, DRGN_SCN_DEBUG_TYPES, + cus, partial_units); + if (err) + return err; + } + if (file == file->module->debug_file + && file->module->supplementary_debug_file) { + err = drgn_dwarf_index_read_file(file->module->supplementary_debug_file, + cus, partial_units); + if (err) + return err; + file->alt_debug_info_data = + file->module->supplementary_debug_file->scn_data[DRGN_SCN_DEBUG_INFO]; + file->alt_debug_str_data = + file->module->supplementary_debug_file->scn_data[DRGN_SCN_DEBUG_STR]; } return err; } @@ -907,7 +961,7 @@ dw_at_specification_to_insn(struct drgn_dwarf_index_cu *cu, return NULL; default: return binary_buffer_error(bb, - "unknown attribute form %#" PRIx64 " for DW_AT_specification or DW_AT_abstract_origin", + "unknown attribute form %#" PRIx64 " for DW_AT_specification, DW_AT_abstract_origin, or DW_AT_import", form); } } @@ -943,11 +997,17 @@ read_abbrev_decl(struct drgn_elf_file_section_buffer *buffer, #define X(name) case DW_TAG_##name: die_flags = DRGN_DWARF_INDEX_##name; break; DRGN_DWARF_INDEX_TAGS #undef X + case DW_TAG_imported_unit: + die_flags = INSN_DIE_TAG_imported_unit; + should_index = false; + break; default: die_flags = INSN_DIE_FLAG_TAG_MASK; should_index = false; break; } + if (tag == DW_TAG_subprogram) + die_flags |= INSN_DIE_FLAG_SUBPROGRAM_NO_PC; uint8_t children; if ((err = binary_buffer_next_u8(&buffer->bb, &children))) @@ -972,15 +1032,17 @@ read_abbrev_decl(struct drgn_elf_file_section_buffer *buffer, } else if (name == DW_AT_declaration && should_index) { err = dw_at_declaration_to_insn(&buffer->bb, form, &insn, &die_flags); - } else if (should_index - && (name == DW_AT_specification - || (tag == DW_TAG_subprogram - && name == DW_AT_abstract_origin))) { + } else if ((should_index + && (name == DW_AT_specification + || (tag == DW_TAG_subprogram + && name == DW_AT_abstract_origin))) + || (tag == DW_TAG_imported_unit + && name == DW_AT_import)) { err = dw_at_specification_to_insn(cu, &buffer->bb, form, &insn); } else { - if (tag == DW_TAG_subprogram && name == DW_AT_inline) - die_flags |= INSN_DIE_FLAG_MAYBE_INLINED; + if (name == DW_AT_low_pc || name == DW_AT_ranges) + die_flags &= ~INSN_DIE_FLAG_SUBPROGRAM_NO_PC; err = dw_form_to_insn(cu, &buffer->bb, form, &insn); } if (err) @@ -1108,29 +1170,43 @@ static struct drgn_error *read_indirect_insn(struct drgn_dwarf_index_cu *cu, } } +// Stack of CU buffers. The bottom is the initial unit/DIE, and +// DW_TAG_imported_unit DIEs push additional buffers. We use an inline size of 1 +// to avoid an allocation in the common case of no imports. +DEFINE_VECTOR(drgn_dwarf_index_cu_buffer_stack, + struct drgn_dwarf_index_cu_buffer, 1); +static const size_t MAX_IMPORTED_UNIT_DEPTH = 128; + /* * First pass: index DIEs with DW_AT_specification and DW_AT_abstract_origin. * This recurses into namespaces. */ static struct drgn_error * index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, - struct drgn_dwarf_index_cu_buffer *buffer) + struct drgn_dwarf_index_cu_buffer_stack *stack) { struct drgn_error *err; + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_last(stack); struct drgn_dwarf_index_cu *cu = buffer->cu; - const char *debug_info_buffer = cu->file->scn_data[cu->scn]->d_buf; - unsigned int depth = 0; for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; + uintptr_t die_addr = (uintptr_t)buffer->bb.pos; uint64_t code; if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) return err; if (code == 0) { - if (depth-- > 1) - continue; - else - break; + if (buffer->depth > 1) { + buffer->depth--; + } else { +pop: + drgn_dwarf_index_cu_buffer_stack_pop(stack); + if (drgn_dwarf_index_cu_buffer_stack_empty(stack)) + break; + buffer = drgn_dwarf_index_cu_buffer_stack_last(stack); + cu = buffer->cu; + } + continue; } else if (code > cu->num_abbrev_decls) { return binary_buffer_error(&buffer->bb, "unknown abbreviation code %" PRIu64, @@ -1139,7 +1215,7 @@ index_cu_first_pass(struct drgn_dwarf_specification_map *specifications, uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; bool declaration = false; - uintptr_t specification = 0; + const char *specification = NULL; const char *sibling = NULL; uint8_t insn; uint8_t extra_die_flags = 0; @@ -1265,7 +1341,11 @@ indirect_insn:; &tmp))) return err; specification: - specification = (uintptr_t)cu->buf + tmp; + if (tmp >= cu->len) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = cu->buf + tmp; break; case INSN_SPECIFICATION_REF_ADDR4: if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, @@ -1277,7 +1357,12 @@ indirect_insn:; &tmp))) return err; specification_ref_addr: - specification = (uintptr_t)debug_info_buffer + tmp; + if (tmp >= cu->file->scn_data[cu->scn]->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->scn_data[cu->scn]->d_buf + + tmp; break; case INSN_SPECIFICATION_REF_ALT4: if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, @@ -1289,8 +1374,12 @@ indirect_insn:; &tmp))) return err; specification_ref_alt: - specification = ((uintptr_t)cu->file->alt_debug_info_data->d_buf - + tmp); + if (tmp >= cu->file->alt_debug_info_data->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->alt_debug_info_data->d_buf + + tmp; break; case INSN_INDIRECT: case INSN_SIBLING_INDIRECT: @@ -1316,7 +1405,8 @@ indirect_insn:; } insn = *insnp | extra_die_flags; - if (depth > 0 && specification) { + uint8_t tag = insn & INSN_DIE_FLAG_TAG_MASK; + if (specification && tag != INSN_DIE_TAG_imported_unit) { if (insn & INSN_DIE_FLAG_DECLARATION) declaration = true; /* @@ -1327,19 +1417,62 @@ indirect_insn:; */ if (!declaration && !index_specification(specifications, - specification, die_addr)) + (uintptr_t)specification, + die_addr)) return &drgn_enomem; } + unsigned int orig_depth = buffer->depth; if (insn & INSN_DIE_FLAG_CHILDREN) { - if (sibling - && ((insn & INSN_DIE_FLAG_TAG_MASK) - != DRGN_DWARF_INDEX_namespace)) - buffer->bb.pos = sibling; + // We descend into a DIE's children in these cases: + // 1. The DIE doesn't have a sibling pointer, in which + // case we have no choice. + // 2. The DIE is the unit that we're indexing. + // 3. The DIE is a namespace. + // In cases 2 and 3, we ignore the DIE's sibling pointer + // if it has one. + // + // Otherwise, we skip over the DIE's children by + // following the sibling pointer. + if (!sibling + || buffer->depth == 0 + || tag == DRGN_DWARF_INDEX_namespace) + buffer->depth++; else - depth++; - } else if (depth == 0) { - break; + buffer->bb.pos = sibling; + } else if (buffer->depth == 0) { + goto pop; + } + + // We only need to follow imported_unit DIEs whose parent is a + // unit or namespace. To do that, we'd need to track extra + // information. In practice, imported_unit DIEs are mainly used + // in that case anyways, so we don't bother checking and take + // the risk of unnecessary imports. + // + // imported_unit DIEs at depth 0 are malformed, so we ignore + // those. + if (tag == INSN_DIE_TAG_imported_unit && orig_depth > 0) { + if (!specification) { + return binary_buffer_error(&buffer->bb, + "DW_TAG_imported_unit is missing DW_AT_import"); + } + cu = drgn_dwarf_index_find_cu(&cu->file->module->prog->dbinfo, + (uintptr_t)specification); + if (!cu) { + return binary_buffer_error(&buffer->bb, + "imported unit not found"); + } + if (drgn_dwarf_index_cu_buffer_stack_size(stack) + >= MAX_IMPORTED_UNIT_DEPTH) { + return binary_buffer_error(&buffer->bb, + "maximum DWARF imported unit depth exceeded"); + } + buffer = drgn_dwarf_index_cu_buffer_stack_append_entry(stack); + if (!buffer) + return &drgn_enomem; + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos = specification; } } return NULL; @@ -1409,25 +1542,32 @@ static struct drgn_error * index_cu_second_pass(struct drgn_debug_info *dbinfo, struct drgn_dwarf_index_die_map map[static DRGN_DWARF_INDEX_MAP_SIZE], struct drgn_dwarf_base_type_map *base_types, - struct drgn_dwarf_index_cu_buffer *buffer) + struct drgn_dwarf_index_cu_buffer_stack *stack) { struct drgn_error *err; + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_last(stack); struct drgn_dwarf_index_cu *cu = buffer->cu; - Elf_Data *debug_str = cu->file->scn_data[DRGN_SCN_DEBUG_STR]; - unsigned int depth = 0; uint8_t depth1_tag = 0; - size_t depth1_addr = 0; + uintptr_t depth1_addr = 0; for (;;) { - size_t die_addr = (uintptr_t)buffer->bb.pos; + uintptr_t die_addr = (uintptr_t)buffer->bb.pos; uint64_t code; if ((err = binary_buffer_next_uleb128(&buffer->bb, &code))) return err; if (code == 0) { - if (depth-- > 1) - continue; - else - break; + if (buffer->depth > 1) { + buffer->depth--; + } else { +pop: + drgn_dwarf_index_cu_buffer_stack_pop(stack); + if (drgn_dwarf_index_cu_buffer_stack_empty(stack)) + break; + buffer = drgn_dwarf_index_cu_buffer_stack_last(stack); + cu = buffer->cu; + } + continue; } else if (code > cu->num_abbrev_decls) { return binary_buffer_error(&buffer->bb, "unknown abbreviation code %" PRIu64, @@ -1437,7 +1577,7 @@ index_cu_second_pass(struct drgn_debug_info *dbinfo, uint8_t *insnp = &cu->abbrev_insns[cu->abbrev_decls[code - 1]]; const char *name = NULL; bool declaration = false; - bool specification = false; + const char *specification = NULL; const char *sibling = NULL; uint8_t insn; uint8_t extra_die_flags = 0; @@ -1465,9 +1605,6 @@ indirect_insn:; &skip))) return err; goto skip; - case INSN_SPECIFICATION_REF_UDATA: - specification = true; - fallthrough; case INSN_SKIP_LEB128: if ((err = binary_buffer_skip_leb128(&buffer->bb))) return err; @@ -1524,11 +1661,12 @@ indirect_insn:; if ((err = binary_buffer_next_u64(&buffer->bb, &tmp))) return err; strp: - if (tmp >= debug_str->d_size) { + if (tmp >= cu->file->scn_data[DRGN_SCN_DEBUG_STR]->d_size) { return binary_buffer_error(&buffer->bb, "DW_AT_name is out of bounds"); } - name = (const char *)debug_str->d_buf + tmp; + name = (const char *)cu->file->scn_data[DRGN_SCN_DEBUG_STR]->d_buf + + tmp; __builtin_prefetch(name); break; case INSN_NAME_STRX: @@ -1586,25 +1724,70 @@ indirect_insn:; break; } case INSN_SPECIFICATION_REF1: - specification = true; - skip = 1; - goto skip; + if ((err = binary_buffer_next_u8_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; case INSN_SPECIFICATION_REF2: - specification = true; - skip = 2; - goto skip; + if ((err = binary_buffer_next_u16_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; case INSN_SPECIFICATION_REF4: - case INSN_SPECIFICATION_REF_ADDR4: - case INSN_SPECIFICATION_REF_ALT4: - specification = true; - skip = 4; - goto skip; + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification; case INSN_SPECIFICATION_REF8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; + goto specification; + case INSN_SPECIFICATION_REF_UDATA: + if ((err = binary_buffer_next_uleb128(&buffer->bb, + &tmp))) + return err; +specification: + if (tmp >= cu->len) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = cu->buf + tmp; + break; + case INSN_SPECIFICATION_REF_ADDR4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_addr; case INSN_SPECIFICATION_REF_ADDR8: + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_addr: + if (tmp >= cu->file->scn_data[cu->scn]->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->scn_data[cu->scn]->d_buf + + tmp; + break; + case INSN_SPECIFICATION_REF_ALT4: + if ((err = binary_buffer_next_u32_into_u64(&buffer->bb, + &tmp))) + return err; + goto specification_ref_alt; case INSN_SPECIFICATION_REF_ALT8: - specification = true; - skip = 8; - goto skip; + if ((err = binary_buffer_next_u64(&buffer->bb, + &tmp))) + return err; +specification_ref_alt: + if (tmp >= cu->file->alt_debug_info_data->d_size) { + return binary_buffer_error(&buffer->bb, + "reference is out of bounds"); + } + specification = (char *)cu->file->alt_debug_info_data->d_buf + + tmp; + break; case INSN_INDIRECT: case INSN_SIBLING_INDIRECT: case INSN_NAME_INDIRECT: @@ -1630,11 +1813,11 @@ indirect_insn:; insn = *insnp | extra_die_flags; uint8_t tag = insn & INSN_DIE_FLAG_TAG_MASK; - if (depth == 1) { + if (buffer->depth == 1) { depth1_tag = tag; depth1_addr = die_addr; } - if (depth == (tag == DRGN_DWARF_INDEX_enumerator ? 2 : 1) + if (buffer->depth == (tag == DRGN_DWARF_INDEX_enumerator ? 2 : 1) && name && !specification) { if (insn & INSN_DIE_FLAG_DECLARATION) declaration = true; @@ -1665,7 +1848,26 @@ indirect_insn:; goto next; } - if (insn & INSN_DIE_FLAG_MAYBE_INLINED) { + // A subprogram DIE without an address may be the + // abstract instance root for an inlined function, or a + // subprogram DIE in a supplementary file. Check for a + // concrete instance or a definition in the main debug + // file, respectively. + // + // Note that if the original DIE was a declaration, then + // this is technically checking whether the declaration + // itself has an address, not the definition. Since + // declarations don't have an address, this always does + // an extra lookup for definitions of declarations. + // + // The extra lookup is redundant for normal definitions, + // but we actually need it in the case that the + // definition is an abstract instance root (so we need + // to go from declaration -> abstract instance root -> + // concrete instance). Avoiding redundant lookups would + // require storing an extra flag in the specification + // map, which empirically isn't worth it. + if (insn & INSN_DIE_FLAG_SUBPROGRAM_NO_PC) { drgn_dwarf_find_definition(dbinfo, die_addr, &die_addr); } @@ -1674,47 +1876,124 @@ indirect_insn:; return &drgn_enomem; } -next: +next:; + unsigned int orig_depth = buffer->depth; if (insn & INSN_DIE_FLAG_CHILDREN) { - /* - * We must descend into the children of enumeration_type - * DIEs to index enumerator DIEs. We don't want to skip - * over the children of the top-level DIE even if it has - * a sibling pointer. - */ - if (sibling && tag != DRGN_DWARF_INDEX_enumeration_type - && depth > 0) - buffer->bb.pos = sibling; + // We descend into a DIE's children in these cases: + // 1. The DIE doesn't have a sibling pointer, in which + // case we have no choice. + // 2. The DIE is the unit or namespace that we're + // indexing. + // 3. The DIE is a top-level enumeration_type DIE, so we + // want to index its children enumerator DIEs. + // In cases 2 and 3, we ignore the DIE's sibling pointer + // if it has one. + // + // Otherwise, we skip over the DIE's children by + // following the sibling pointer. + if (!sibling + || buffer->depth == 0 + || (buffer->depth == 1 && tag == DRGN_DWARF_INDEX_enumeration_type)) + buffer->depth++; else - depth++; - } else if (depth == 0) { - break; + buffer->bb.pos = sibling; + } else if (buffer->depth == 0) { + goto pop; + } + + // Each buffer actually has two depths: the physical depth in + // the file of the current DIE relative to where the buffer was + // initialized (either the partial unit that we imported or the + // unit or namespace DIE where we started indexing), and the + // logical depth, treating the children of a partial unit as if + // they were siblings of the imported_unit DIE. Therefore, the + // logical depth of the children of a partial unit is equal to + // the logical depth of the imported_unit DIE, and the logical + // depth of the partial unit itself is the logical depth of the + // imported_unit DIE minus 1. + // + // Other than enumerator DIEs, we only index DIEs at logical + // depth 1. We assume that partial units will not have top-level + // enumerator DIEs, or alternatively that an enumeration_type + // DIE will not have an imported_unit DIE child. + // + // imported_unit DIEs at logical depth > 1 can only contain DIEs + // at logical depth > 1, which we would ignore anyways. + // imported_unit DIEs at depth 0 are malformed. Therefore, we + // only follow imported_unit DIEs at logical depth 1 and ignore + // others. + // + // This lets us avoid tracking the depth and logical depth + // separately: since we only follow imports at logical depth 1, + // depth == logical depth. + // + // If our assumption about enumerator DIEs is incorrect, then we + // will need to track depth and logical depth separately, update + // everything to use the appropriate one, and also take imports + // into account for depth1_{tag,addr}. + if (tag == INSN_DIE_TAG_imported_unit && orig_depth == 1) { + if (!specification) { + return binary_buffer_error(&buffer->bb, + "DW_TAG_imported_unit is missing DW_AT_import"); + } + cu = drgn_dwarf_index_find_cu(&cu->file->module->prog->dbinfo, + (uintptr_t)specification); + if (!cu) { + return binary_buffer_error(&buffer->bb, + "imported unit not found"); + } + if (drgn_dwarf_index_cu_buffer_stack_size(stack) + >= MAX_IMPORTED_UNIT_DEPTH) { + return binary_buffer_error(&buffer->bb, + "maximum DWARF imported unit depth exceeded"); + } + buffer = drgn_dwarf_index_cu_buffer_stack_append_entry(stack); + if (!buffer) + return &drgn_enomem; + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos = specification; } } return NULL; } -static inline int drgn_dwarf_index_cu_cmp(const void *_a, const void *_b) +static inline int drgn_dwarf_index_cu_lookup_cmp(const void *_a, const void *_b) { - uintptr_t a = (uintptr_t)((struct drgn_dwarf_index_cu *)_a)->buf; - uintptr_t b = (uintptr_t)((struct drgn_dwarf_index_cu *)_b)->buf; + uintptr_t a = ((struct drgn_dwarf_index_cu_lookup *)_a)->buf; + uintptr_t b = ((struct drgn_dwarf_index_cu_lookup *)_b)->buf; return (a > b) - (a < b); } -// Returns NULL if die_addr is not from an indexed CU. -static struct drgn_dwarf_index_cu * -drgn_dwarf_index_find_cu(struct drgn_debug_info *dbinfo, uintptr_t die_addr) +static void +drgn_dwarf_index_cus_merge_partial(struct drgn_dwarf_index_cu_vector *dst, + struct drgn_dwarf_index_cu_vector *src_partial, + size_t *partial_pos) { - struct drgn_dwarf_index_cu *cus = - drgn_dwarf_index_cu_vector_begin(&dbinfo->dwarf.index_cus); - #define less_than_cu_buf(a, b) (*(a) < (uintptr_t)(b)->buf) - size_t i = binary_search_gt(cus, - drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), - &die_addr, less_than_cu_buf); - #undef less_than_cu_buf - if (i == 0 || die_addr - (uintptr_t)cus[i - 1].buf >= cus[i - 1].len) - return NULL; - return &cus[i - 1]; + if (!drgn_dwarf_index_cu_vector_empty(src_partial)) { + memcpy(drgn_dwarf_index_cu_vector_at(dst, *partial_pos), + drgn_dwarf_index_cu_vector_begin(src_partial), + drgn_dwarf_index_cu_vector_size(src_partial) + * sizeof(struct drgn_dwarf_index_cu)); + *partial_pos += drgn_dwarf_index_cu_vector_size(src_partial); + } + drgn_dwarf_index_cu_vector_deinit(src_partial); +} + +static void +drgn_dwarf_index_cus_merge(struct drgn_dwarf_index_cu_vector *dst, + struct drgn_dwarf_index_cu_vector *src, + struct drgn_dwarf_index_cu_vector *src_partial, + size_t *pos, size_t *partial_pos) +{ + if (!drgn_dwarf_index_cu_vector_empty(src)) { + memcpy(drgn_dwarf_index_cu_vector_at(dst, *pos), + drgn_dwarf_index_cu_vector_begin(src), + drgn_dwarf_index_cu_vector_size(src) + * sizeof(struct drgn_dwarf_index_cu)); + *pos += drgn_dwarf_index_cu_vector_size(src); + } + drgn_dwarf_index_cu_vector_deinit(src); + drgn_dwarf_index_cus_merge_partial(dst, src_partial, partial_pos); } // If there wasn't already an error, merge src into dst, and return an error if @@ -1726,8 +2005,7 @@ drgn_dwarf_specification_map_merge(struct drgn_dwarf_specification_map *dst, struct drgn_error *err) { if (!err) { - for (auto it = drgn_dwarf_specification_map_first(src); - it.entry; it = drgn_dwarf_specification_map_next(it)) { + hash_table_for_each(drgn_dwarf_specification_map, it, src) { if (drgn_dwarf_specification_map_insert(dst, it.entry, NULL) < 0) { err = &drgn_enomem; @@ -1785,8 +2063,7 @@ drgn_dwarf_base_type_map_merge(struct drgn_dwarf_base_type_map *dst, struct drgn_error *err) { if (!err) { - for (auto it = drgn_dwarf_base_type_map_first(src); it.entry; - it = drgn_dwarf_base_type_map_next(it)) { + hash_table_for_each(drgn_dwarf_base_type_map, it, src) { if (drgn_dwarf_base_type_map_insert(dst, it.entry, NULL) < 0) { err = &drgn_enomem; @@ -1798,24 +2075,36 @@ drgn_dwarf_base_type_map_merge(struct drgn_dwarf_base_type_map *dst, return err; } -struct drgn_error * -drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) +static struct drgn_error * +drgn_dwarf_index_update(struct drgn_debug_info *dbinfo) { - struct drgn_debug_info *dbinfo = state->dbinfo; - struct drgn_dwarf_index_cu_vector *cus = &dbinfo->dwarf.index_cus; + if (!dbinfo->modules_pending_indexing) + return NULL; if (dbinfo->dwarf.global.saved_err) return drgn_error_copy(dbinfo->dwarf.global.saved_err); - size_t new_cus_size = drgn_dwarf_index_cu_vector_size(cus); - for (int i = 0; i < drgn_num_threads; i++) - new_cus_size += drgn_dwarf_index_cu_vector_size(&state->cus[i]); - if (new_cus_size == drgn_dwarf_index_cu_vector_size(cus)) - return NULL; + drgn_init_num_threads(); + + // Gather linked list of modules into a vector that we can parallelize. + VECTOR(drgn_module_vector, modules); + { + struct drgn_module *module = dbinfo->modules_pending_indexing; + do { + if (!drgn_module_vector_append(&modules, &module)) + return &drgn_enomem; + module = module->pending_indexing_next; + } while (module); + } - // Per-thread array of maps to populate. Thread 0 uses the maps in the - // dbinfo directly. These are merged into the dbinfo and freed. + // Per-thread structures to populate. Thread 0 uses the structures in + // the dbinfo directly. These are merged into the dbinfo and freed. _cleanup_free_ union { + // For reading modules. + struct { + struct drgn_dwarf_index_cu_vector cus; + struct drgn_dwarf_index_cu_vector partial_units; + }; // For first pass. struct drgn_dwarf_specification_map specifications; // For second pass. @@ -1823,117 +2112,265 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) struct drgn_dwarf_index_die_map map[DRGN_DWARF_INDEX_MAP_SIZE]; struct drgn_dwarf_base_type_map base_types; }; - } *maps = NULL; + } *threads = NULL; if (drgn_num_threads > 1) { - maps = malloc_array(drgn_num_threads - 1, sizeof(maps[0])); - if (!maps) + threads = malloc_array(drgn_num_threads - 1, sizeof(threads[0])); + if (!threads) return &drgn_enomem; } - if (!drgn_dwarf_index_cu_vector_reserve(cus, new_cus_size)) - return &drgn_enomem; - for (int i = 0; i < drgn_num_threads; i++) - drgn_dwarf_index_cu_vector_extend(cus, &state->cus[i]); + // Thread 0 needs its own temporary partial_units vector. + struct drgn_dwarf_index_cu_vector partial_units0; struct drgn_error *err = NULL; + size_t new_cus_size; #pragma omp parallel num_threads(drgn_num_threads) { - struct drgn_dwarf_specification_map *specifications; + struct drgn_error *thread_err = NULL; int thread_num = omp_get_thread_num(); + + // Enumerate CUs in new modules. + struct drgn_dwarf_index_cu_vector *cus, *partial_units; if (thread_num == 0) { - specifications = &dbinfo->dwarf.specifications; + cus = &dbinfo->dwarf.index_cus; + partial_units = &partial_units0; } else { - specifications = &maps[thread_num - 1].specifications; - drgn_dwarf_specification_map_init(specifications); + cus = &threads[thread_num - 1].cus; + partial_units = &threads[thread_num - 1].partial_units; + drgn_dwarf_index_cu_vector_init(cus); } + drgn_dwarf_index_cu_vector_init(partial_units); - #pragma omp for schedule(dynamic) - for (size_t i = dbinfo->dwarf.global.cus_indexed; - i < drgn_dwarf_index_cu_vector_size(cus); i++) { - struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_at(cus, i); - if (err) + #pragma omp for schedule(dynamic) nowait + for (size_t i = 0; i < drgn_module_vector_size(&modules); i++) { + if (thread_err) continue; - struct drgn_error *cu_err = read_cu(cu); - if (!cu_err) { - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu_header_size(cu); - cu_err = index_cu_first_pass(specifications, - &buffer); + struct drgn_module *module = + *drgn_module_vector_at(&modules, i); + thread_err = + drgn_dwarf_index_read_file(module->debug_file, + cus, partial_units); + } + if (thread_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(thread_err); + else + err = thread_err; + thread_err = NULL; + } + #pragma omp barrier + + // Merge the per-thread CUs into dbinfo (and free them). Partial + // units are placed at the end and excluded from new_cus_size so + // that they are not indexed. + #pragma omp master + { + if (!err) { + size_t cus_pos = new_cus_size = + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); + size_t new_partial_units = + drgn_dwarf_index_cu_vector_size(&partial_units0); + for (int i = 0; i < drgn_num_threads - 1; i++) { + new_cus_size += drgn_dwarf_index_cu_vector_size(&threads[i].cus); + new_partial_units += drgn_dwarf_index_cu_vector_size(&threads[i].partial_units); + } + + if (new_cus_size + new_partial_units + > dbinfo->dwarf.global.cus_indexed) { + if (drgn_dwarf_index_cu_vector_resize(&dbinfo->dwarf.index_cus, + new_cus_size + + new_partial_units)) { + size_t partial_pos = new_cus_size; + drgn_dwarf_index_cus_merge_partial(&dbinfo->dwarf.index_cus, + &partial_units0, + &partial_pos); + for (int i = 0; i < drgn_num_threads - 1; i++) { + drgn_dwarf_index_cus_merge(&dbinfo->dwarf.index_cus, + &threads[i].cus, + &threads[i].partial_units, + &cus_pos, + &partial_pos); + } + } else { + err = &drgn_enomem; + } + } + } + if (err) { + for (int i = 0; i < drgn_num_threads - 1; i++) { + drgn_dwarf_index_cu_vector_deinit(&threads[i].partial_units); + drgn_dwarf_index_cu_vector_deinit(&threads[i].cus); + } + drgn_dwarf_index_cu_vector_deinit(&partial_units0); + // If there was an error, we'd like to avoid + // doing any more work, but we can't break out + // of an OpenMP parallel region. Set the number + // of CUs to the old number so the remaining + // loops are essentially no-ops. + new_cus_size = dbinfo->dwarf.global.cus_indexed; + drgn_dwarf_index_cu_vector_resize(&dbinfo->dwarf.index_cus, + new_cus_size); + } + } + #pragma omp barrier + + // Update the CU lookup table. This can be done by one thread in + // parallel with reading CUs. + #pragma omp master + if (drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus) + > dbinfo->dwarf.global.cus_indexed) { + struct drgn_dwarf_index_cu_lookup *lookup = + realloc_array(dbinfo->dwarf.index_cu_lookup, + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), + sizeof(lookup[0])); + if (lookup) { + dbinfo->dwarf.index_cu_lookup = lookup; + for (size_t i = dbinfo->dwarf.global.cus_indexed; + i < drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); + i++) { + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); + lookup[i].buf = (uintptr_t)cu->buf; + lookup[i].index = i; + } + qsort(lookup, + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus), + sizeof(lookup[0]), + drgn_dwarf_index_cu_lookup_cmp); + } else { + thread_err = &drgn_enomem; } - if (cu_err) { - #pragma omp critical(drgn_dwarf_info_update_index_error) + } + + // Read the abbreviation tables of new CUs. + #pragma omp for schedule(dynamic) nowait + for (size_t i = dbinfo->dwarf.global.cus_indexed; + i < drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); + i++) { + if (thread_err) + continue; + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); + thread_err = read_cu(cu); + } + if (thread_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + { if (err) - drgn_error_destroy(cu_err); + drgn_error_destroy(thread_err); else - err = cu_err; + err = thread_err; + // Same error handling trick as above, except + // that we can't resize the vector anymore for a + // couple of reasons: the CUs now need to be + // properly deinitialized by + // drgn_dwarf_index_cu_deinit(), and we can't + // change the iteration count of the above loop + // while it is running on other threads. + new_cus_size = dbinfo->dwarf.global.cus_indexed; } + thread_err = NULL; } - } - for (int i = 0; i < drgn_num_threads - 1; i++) { - err = drgn_dwarf_specification_map_merge(&dbinfo->dwarf.specifications, - &maps[i].specifications, - err); - } - if (err) - goto err; + #pragma omp barrier - #pragma omp parallel num_threads(drgn_num_threads) - { - struct drgn_error *thread_err; + // Do the first indexing pass. + struct drgn_dwarf_specification_map *specifications; + if (thread_num == 0) { + specifications = &dbinfo->dwarf.specifications; + } else { + specifications = &threads[thread_num - 1].specifications; + drgn_dwarf_specification_map_init(specifications); + } + VECTOR(drgn_dwarf_index_cu_buffer_stack, buffer_stack); + #pragma omp for schedule(dynamic) nowait + for (size_t i = dbinfo->dwarf.global.cus_indexed; + i < new_cus_size; i++) { + if (thread_err) + continue; + struct drgn_dwarf_index_cu *cu = + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); + drgn_dwarf_index_cu_buffer_stack_clear(&buffer_stack); + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_append_entry(&buffer_stack); + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos += cu_header_size(cu); + thread_err = index_cu_first_pass(specifications, + &buffer_stack); + } + if (thread_err) { + #pragma omp critical(drgn_dwarf_info_update_index_error) + if (err) + drgn_error_destroy(thread_err); + else + err = thread_err; + thread_err = NULL; + } + #pragma omp barrier + + // Merge the per-thread specification maps into dbinfo (and free + // them). + #pragma omp master + { + for (int i = 0; i < drgn_num_threads - 1; i++) { + err = drgn_dwarf_specification_map_merge(&dbinfo->dwarf.specifications, + &threads[i].specifications, + err); + } + // Same error handling trick as above. + if (err) + new_cus_size = dbinfo->dwarf.global.cus_indexed; + } + #pragma omp barrier + + // Do the second indexing pass. struct drgn_dwarf_index_die_map *map; struct drgn_dwarf_base_type_map *base_types; - int thread_num = omp_get_thread_num(); if (thread_num == 0) { map = dbinfo->dwarf.global.map; base_types = &dbinfo->dwarf.base_types; } else { - array_for_each(tag_map, maps[thread_num - 1].map) + array_for_each(tag_map, threads[thread_num - 1].map) drgn_dwarf_index_die_map_init(tag_map); - map = maps[thread_num - 1].map; - base_types = &maps[thread_num - 1].base_types; + map = threads[thread_num - 1].map; + base_types = &threads[thread_num - 1].base_types; drgn_dwarf_base_type_map_init(base_types); } #pragma omp for schedule(dynamic) for (size_t i = dbinfo->dwarf.global.cus_indexed; - i < drgn_dwarf_index_cu_vector_size(cus); i++) { - if (err) + i < new_cus_size; i++) { + if (thread_err) continue; struct drgn_dwarf_index_cu *cu = - drgn_dwarf_index_cu_vector_at(cus, i); - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos += cu_header_size(cu); + drgn_dwarf_index_cu_vector_at(&dbinfo->dwarf.index_cus, i); + drgn_dwarf_index_cu_buffer_stack_clear(&buffer_stack); + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_append_entry(&buffer_stack); + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos += cu_header_size(cu); thread_err = index_cu_second_pass(dbinfo, map, - base_types, &buffer); - if (thread_err) { - #pragma omp critical(drgn_dwarf_info_update_index_error) - if (err) - drgn_error_destroy(thread_err); - else - err = thread_err; - } + base_types, + &buffer_stack); } - thread_err = err; - + // Merge the per-thread DIE and base type maps into dbinfo (and + // free them). #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i <= array_size(dbinfo->dwarf.global.map); i++) { if (i < array_size(dbinfo->dwarf.global.map)) { for (int j = 0; j < drgn_num_threads - 1; j++) { thread_err = drgn_dwarf_index_die_map_merge(&dbinfo->dwarf.global.map[i], - &maps[j].map[i], + &threads[j].map[i], thread_err); } } else { for (int j = 0; j < drgn_num_threads - 1; j++) { thread_err = drgn_dwarf_base_type_map_merge(&dbinfo->dwarf.base_types, - &maps[j].base_types, + &threads[j].base_types, thread_err); } } @@ -1948,20 +2385,19 @@ drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state) } if (err) { -err: dbinfo->dwarf.global.saved_err = err; return drgn_error_copy(err); } - qsort(drgn_dwarf_index_cu_vector_begin(cus), - drgn_dwarf_index_cu_vector_size(cus), - sizeof(struct drgn_dwarf_index_cu), drgn_dwarf_index_cu_cmp); + dbinfo->modules_pending_indexing = NULL; dbinfo->dwarf.global.cus_indexed = - drgn_dwarf_index_cu_vector_size(cus); + drgn_dwarf_index_cu_vector_size(&dbinfo->dwarf.index_cus); return NULL; } -static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) +static struct drgn_error *index_namespace_impl(struct drgn_namespace_dwarf_index *ns) { + struct drgn_error *err; + size_t num_index_cus = drgn_dwarf_index_cu_vector_size(&ns->dbinfo->dwarf.index_cus); if (ns->cus_indexed >= num_index_cus) @@ -1972,12 +2408,10 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) // The parent namespace must be indexed first so that the DIEs for this // namespace are populated. - struct drgn_error *err = index_namespace(ns->parent); + err = index_namespace_impl(ns->parent); if (err) return err; - drgn_blocking_guard(ns->dbinfo->prog); - struct drgn_dwarf_index_die_vector *die_vectors_to_index[DRGN_DWARF_INDEX_NUM_NAMESPACE_TAGS]; int tags_to_index[DRGN_DWARF_INDEX_NUM_NAMESPACE_TAGS]; @@ -2014,8 +2448,7 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) err = NULL; #pragma omp parallel num_threads(drgn_num_threads) { - struct drgn_error *thread_err; - + struct drgn_error *thread_err = NULL; struct drgn_dwarf_index_die_map *map; int thread_num = omp_get_thread_num(); if (thread_num == 0) { @@ -2025,6 +2458,7 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) drgn_dwarf_index_die_map_init(tag_map); map = maps[thread_num - 1]; } + VECTOR(drgn_dwarf_index_cu_buffer_stack, buffer_stack); for (int i = 0; i < num_tags_to_index; i++) { struct drgn_dwarf_index_die_vector *dies = @@ -2032,31 +2466,24 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) #pragma omp for schedule(dynamic) nowait for (uint32_t j = ns->dies_indexed[tags_to_index[i]]; j < drgn_dwarf_index_die_vector_size(dies); j++) { - if (err) + if (thread_err) continue; uintptr_t die_addr = *drgn_dwarf_index_die_vector_at(dies, j); struct drgn_dwarf_index_cu *cu = drgn_dwarf_index_find_cu(ns->dbinfo, die_addr); - struct drgn_dwarf_index_cu_buffer buffer; - drgn_dwarf_index_cu_buffer_init(&buffer, cu); - buffer.bb.pos = (void *)die_addr; + drgn_dwarf_index_cu_buffer_stack_clear(&buffer_stack); + struct drgn_dwarf_index_cu_buffer *buffer = + drgn_dwarf_index_cu_buffer_stack_append_entry(&buffer_stack); + drgn_dwarf_index_cu_buffer_init(buffer, cu); + buffer->bb.pos = (void *)die_addr; thread_err = index_cu_second_pass(ns->dbinfo, map, NULL, - &buffer); - if (thread_err) { - #pragma omp critical(drgn_index_namespace_error) - if (err) - drgn_error_destroy(thread_err); - else - err = thread_err; - } + &buffer_stack); } } #pragma omp barrier - thread_err = err; - #pragma omp for schedule(dynamic) nowait for (size_t i = 0; i < array_size(ns->map); i++) { for (int j = 0; j < drgn_num_threads - 1; j++) { @@ -2086,6 +2513,26 @@ static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) return NULL; } +static struct drgn_error *index_namespace(struct drgn_namespace_dwarf_index *ns) +{ + if (!ns->dbinfo->modules_pending_indexing + && (ns->cus_indexed + >= drgn_dwarf_index_cu_vector_size(&ns->dbinfo->dwarf.index_cus))) + return NULL; + + drgn_blocking_guard(); + + struct drgn_error *err = drgn_dwarf_index_update(ns->dbinfo); + if (err) + return err; + return index_namespace_impl(ns); +} + +struct drgn_error *drgn_dwarf_info_update_index(struct drgn_debug_info *dbinfo) +{ + return index_namespace(&dbinfo->dwarf.global); +} + /** * Iterator over DWARF debugging information. * @@ -2285,28 +2732,29 @@ static struct drgn_error *drgn_language_from_die(Dwarf_Die *die, bool fall_back, return NULL; } -struct drgn_error * -drgn_debug_info_main_language(struct drgn_debug_info *dbinfo, - const struct drgn_language **ret) +const struct drgn_language * +drgn_debug_info_main_language(struct drgn_debug_info *dbinfo) { struct drgn_error *err; struct drgn_dwarf_index_iterator it; const enum drgn_dwarf_index_tag tag = DRGN_DWARF_INDEX_subprogram; err = drgn_dwarf_index_iterator_init(&it, &dbinfo->dwarf.global, "main", strlen("main"), &tag, 1); - if (err) - return err; + if (err) { + drgn_error_destroy(err); + return NULL; + } Dwarf_Die die; while (drgn_dwarf_index_iterator_next(&it, &die, NULL)) { - err = drgn_language_from_die(&die, false, ret); + const struct drgn_language *lang; + err = drgn_language_from_die(&die, false, &lang); if (err) { drgn_error_destroy(err); continue; } - if (*ret) - return NULL; + if (lang) + return lang; } - *ret = NULL; return NULL; } @@ -2531,7 +2979,10 @@ struct drgn_error *drgn_module_find_dwarf_scopes(struct drgn_module *module, *length_ret = 0; return NULL; } - Dwarf *dwarf = module->debug_file->dwarf; + Dwarf *dwarf; + err = drgn_elf_file_get_dwarf(module->debug_file, &dwarf); + if (err) + return err; *bias_ret = module->debug_file_bias; pc -= module->debug_file_bias; @@ -2616,8 +3067,7 @@ struct drgn_error *drgn_find_die_ancestors(Dwarf_Die *die, Dwarf_Die **dies_ret, if (!dwarf) return drgn_error_libdw(); - _cleanup_(dwarf_die_vector_deinit) - struct dwarf_die_vector dies = VECTOR_INIT; + VECTOR(dwarf_die_vector, dies); Dwarf_Die *cu_die = dwarf_die_vector_append_entry(&dies); if (!cu_die) return &drgn_enomem; @@ -2772,16 +3222,17 @@ static struct drgn_error *drgn_dwarf_next_addrx(struct binary_buffer *bb, return drgn_error_create(DRGN_ERROR_OTHER, "indirect address without .debug_addr section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_ADDR); + Elf_Data *data; + err = drgn_elf_file_read_section(file, DRGN_SCN_DEBUG_ADDR, &data); if (err) return err; - if (base > file->scn_data[DRGN_SCN_DEBUG_ADDR]->d_size) { + if (base > data->d_size) { return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_addr_base is out of bounds"); } - *addr_base = (char *)file->scn_data[DRGN_SCN_DEBUG_ADDR]->d_buf + base; + *addr_base = (char *)data->d_buf + base; // In DWARF 5, there is a header immediately before addr_base, // which ends with a segment selector size. We don't support a // segment selector yet. In GNU Debug Fission, .debug_addr @@ -2804,6 +3255,7 @@ static struct drgn_error *drgn_dwarf_next_addrx(struct binary_buffer *bb, if ((err = binary_buffer_next_uleb128(bb, &index))) return err; + // The data must was cached when we cached addr_base. Elf_Data *data = file->scn_data[DRGN_SCN_DEBUG_ADDR]; if (index >= ((char *)data->d_buf + data->d_size - *addr_base) / address_size) { @@ -2849,10 +3301,10 @@ static struct drgn_error *drgn_dwarf_read_loclistx(struct drgn_elf_file *file, return drgn_error_create(DRGN_ERROR_OTHER, "DW_FORM_loclistx without .debug_loclists section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOCLISTS); + Elf_Data *data; + err = drgn_elf_file_read_section(file, DRGN_SCN_DEBUG_LOCLISTS, &data); if (err) return err; - Elf_Data *data = file->scn_data[DRGN_SCN_DEBUG_LOCLISTS]; if (base > data->d_size) { return drgn_error_create(DRGN_ERROR_OTHER, @@ -2893,12 +3345,11 @@ static struct drgn_error *drgn_dwarf5_location_list(struct drgn_elf_file *file, return drgn_error_create(DRGN_ERROR_OTHER, "loclist without .debug_loclists section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOCLISTS); - if (err) - return err; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, + err = drgn_elf_file_section_buffer_read(&buffer, file, DRGN_SCN_DEBUG_LOCLISTS); + if (err) + return err; if (offset > buffer.bb.end - buffer.bb.pos) { return drgn_error_create(DRGN_ERROR_OTHER, "loclist is out of bounds"); @@ -3031,17 +3482,16 @@ drgn_dwarf4_split_location_list(struct drgn_elf_file *file, Dwarf_Word offset, return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr without .debug_loc section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOC); - if (err) - return err; Dwarf_Off dwp_offset; if (dwarf_cu_dwp_section_info(cu_die->cu, DW_SECT_LOCLISTS, &dwp_offset, NULL)) return drgn_error_libdw(); offset += dwp_offset; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, + err = drgn_elf_file_section_buffer_read(&buffer, file, DRGN_SCN_DEBUG_LOC); + if (err) + return err; if (offset > buffer.bb.end - buffer.bb.pos) { return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr is out of bounds"); @@ -3146,12 +3596,11 @@ static struct drgn_error *drgn_dwarf4_location_list(struct drgn_elf_file *file, return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr without .debug_loc section"); } - err = drgn_elf_file_cache_section(file, DRGN_SCN_DEBUG_LOC); - if (err) - return err; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, + err = drgn_elf_file_section_buffer_read(&buffer, file, DRGN_SCN_DEBUG_LOC); + if (err) + return err; if (offset > buffer.bb.end - buffer.bb.pos) { return drgn_error_create(DRGN_ERROR_OTHER, "loclistptr is out of bounds"); @@ -3349,6 +3798,48 @@ drgn_dwarf_frame_base(struct drgn_program *prog, struct drgn_elf_file *file, Dwarf_Die *die, const struct drgn_register_state *regs, int *remaining_ops, uint64_t *ret); +static struct drgn_error drgn_unknown_dwarf_opcode = { + .code = DRGN_ERROR_NOT_IMPLEMENTED, + .message = "unknown DWARF expression opcode", +}; + +static bool drgn_dwarf_opcode_is_known(uint8_t opcode) +{ +#define X(name, _) if (opcode == name) return true; + DW_OP_DEFINITIONS +#undef X + return false; +} + +static struct drgn_error * +drgn_handle_unknown_dwarf_opcode(struct drgn_dwarf_expression_context *ctx, + uint8_t opcode, + bool after_simple_location_description) +{ + // We warn the first time that we see an opcode that appears to be + // valid. + static bool warned; + enum drgn_log_level log_level = DRGN_LOG_DEBUG; + if (drgn_dwarf_opcode_is_known(opcode) + && !__atomic_test_and_set(&warned, __ATOMIC_SEQ_CST)) + log_level = DRGN_LOG_WARNING; + if (drgn_log_is_enabled(ctx->prog, log_level)) { + struct drgn_error *err; + char op_buf[DW_OP_STR_BUF_LEN]; + err = binary_buffer_error(&ctx->bb, + "unknown DWARF expression opcode %s%s; " + "please report this to %s", + dw_op_str(opcode, op_buf), + after_simple_location_description + ? " after simple location description" + : "", + PACKAGE_BUGREPORT); + drgn_error_log(log_level, ctx->prog, err, ""); + drgn_error_destroy(err); + } + return &drgn_unknown_dwarf_opcode; +} + /* * Evaluate a DWARF expression up to the next location description operation or * operation that can't be evaluated in the given context. @@ -3411,6 +3902,17 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, address_size, &uvalue))) return err; +addr: + /* + * If the address is not in the module's address range, + * then it's probably something special like a Linux + * per-CPU variable (which isn't actually a variable + * address but an offset). Don't apply the bias in that + * case. + */ + if (drgn_module_contains_address(ctx->file->module, + uvalue + ctx->file->module->debug_file_bias)) + uvalue += ctx->file->module->debug_file_bias; PUSH(uvalue); break; case DW_OP_const1u: @@ -3473,8 +3975,19 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, PUSH_MASK(uvalue); break; case DW_OP_addrx: - case DW_OP_constx: case DW_OP_GNU_addr_index: + if (!ctx->cu_die.addr) { + ctx->bb.pos = ctx->bb.prev; + return NULL; + } + if ((err = drgn_dwarf_next_addrx(&ctx->bb, ctx->file, + &ctx->cu_die, + address_size, + &ctx->cu_addr_base, + &uvalue))) + return err; + goto addr; + case DW_OP_constx: case DW_OP_GNU_const_index: if (!ctx->cu_die.addr) { ctx->bb.pos = ctx->bb.prev; @@ -3775,6 +4288,15 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, // address and using the DW_AT_(GNU_)call_value of a // DW_TAG_(GNU_)call_parameter with a DW_AT_location // matching that register. + if (drgn_log_is_enabled(ctx->prog, DRGN_LOG_DEBUG)) { + char op_buf[DW_OP_STR_BUF_LEN]; + err = binary_buffer_error(&ctx->bb, + "unimplemented DWARF expression opcode %s; " + "please upvote https://github.com/osandov/drgn/issues/337", + dw_op_str(opcode, op_buf)); + drgn_error_log_debug(ctx->prog, err, ""); + drgn_error_destroy(err); + } return &drgn_not_found; /* Location description operations. */ case DW_OP_reg0 ... DW_OP_reg31: @@ -3799,14 +4321,8 @@ drgn_eval_dwarf_expression(struct drgn_dwarf_expression_context *ctx, * DW_OP_xderef_size, DW_OP_xderef_type. */ default: - { - char op_buf[DW_OP_STR_BUF_LEN]; - return binary_buffer_error(&ctx->bb, - "unknown DWARF expression opcode %s; " - "please report this to %s", - dw_op_str(opcode, op_buf), - PACKAGE_BUGREPORT); - } + return drgn_handle_unknown_dwarf_opcode(ctx, opcode, + false); } } @@ -3846,8 +4362,7 @@ drgn_dwarf_frame_base(struct drgn_program *prog, struct drgn_elf_file *file, NULL, regs, expr, expr_size))) return err; - _cleanup_(uint64_vector_deinit) - struct uint64_vector stack = VECTOR_INIT; + VECTOR(uint64_vector, stack); for (;;) { err = drgn_eval_dwarf_expression(&ctx, &stack, remaining_ops); if (err) @@ -4183,8 +4698,11 @@ drgn_object_from_dwarf_subprogram(struct drgn_debug_info *dbinfo, if (err) return err; Dwarf_Addr low_pc; - if (dwarf_lowpc(die, &low_pc) == -1) - return drgn_object_set_absent(ret, qualified_type, 0); + if (dwarf_lowpc(die, &low_pc) == -1) { + return drgn_object_set_absent(ret, qualified_type, + DRGN_ABSENCE_REASON_OPTIMIZED_OUT, + 0); + } return drgn_object_set_reference(ret, qualified_type, low_pc + file->module->debug_file_bias, 0, 0); @@ -4285,6 +4803,9 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, uint64_t address = 0; /* GCC thinks this may be used uninitialized. */ int bit_offset = -1; /* -1 means that we don't have an address. */ + enum drgn_absence_reason absence_reason = + DRGN_ABSENCE_REASON_OPTIMIZED_OUT; + uint64_t bit_pos = 0; int remaining_ops = MAX_DWARF_EXPR_OPS; @@ -4297,6 +4818,13 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, do { uint64_vector_clear(&stack); err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); + if (err) { + if (err == &drgn_unknown_dwarf_opcode) + absence_reason = DRGN_ABSENCE_REASON_NOT_IMPLEMENTED; + else if (err != &drgn_not_found) + goto out; + goto absent; + } if (err == &drgn_not_found) goto absent; else if (err) @@ -4398,10 +4926,10 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, piece_bit_size = type.bit_size - bit_pos; break; default: - err = binary_buffer_error(&ctx.bb, - "unknown DWARF expression opcode %#" PRIx8 " after simple location description", - opcode); - goto out; + drgn_handle_unknown_dwarf_opcode(&ctx, opcode, + true); + absence_reason = DRGN_ABSENCE_REASON_NOT_IMPLEMENTED; + goto absent; } } else { piece_bit_size = type.bit_size - bit_pos; @@ -4536,20 +5064,9 @@ drgn_object_from_dwarf_location(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "DW_AT_template_value_parameter is missing value"); } - drgn_object_reinit(ret, &type, DRGN_OBJECT_ABSENT); + drgn_object_set_absent_internal(ret, &type, absence_reason); err = NULL; } else if (bit_offset >= 0) { - Dwarf_Addr start, end, bias; - dwfl_module_info(file->module->dwfl_module, NULL, &start, &end, - &bias, NULL, NULL, NULL); - /* - * If the address is not in the module's address range, then - * it's probably something special like a Linux per-CPU variable - * (which isn't actually a variable address but an offset). - * Don't apply the bias in that case. - */ - if (start <= address + bias && address + bias < end) - address += bias; err = drgn_object_set_reference_internal(ret, &type, address, bit_offset); } else if (type.encoding == DRGN_OBJECT_ENCODING_BUFFER) { @@ -4683,8 +5200,7 @@ struct drgn_error *drgn_dwarf_scopes_names(Dwarf_Die *scopes, { struct drgn_error *err; Dwarf_Die die; - _cleanup_(const_char_p_vector_deinit) - struct const_char_p_vector vec = VECTOR_INIT; + VECTOR(const_char_p_vector, vec); for (size_t scope = 0; scope < num_scopes; scope++) { if (dwarf_child(&scopes[scope], &die) != 0) continue; @@ -4996,6 +5512,7 @@ drgn_dwarf_member_thunk_fn(struct drgn_object *res, void *arg_) } err = drgn_object_set_absent(res, qualified_type, + DRGN_ABSENCE_REASON_OTHER, bit_field_size); if (err) return err; @@ -5263,7 +5780,8 @@ drgn_dwarf_template_type_parameter_thunk_fn(struct drgn_object *res, void *arg_) if (err) return err; - err = drgn_object_set_absent(res, qualified_type, 0); + err = drgn_object_set_absent(res, qualified_type, + DRGN_ABSENCE_REASON_OTHER, 0); if (err) return err; } @@ -5777,8 +6295,7 @@ drgn_array_type_from_dwarf(struct drgn_debug_info *dbinfo, struct drgn_type **ret) { struct drgn_error *err; - _cleanup_(array_dimension_vector_deinit) - struct array_dimension_vector dimensions = VECTOR_INIT; + VECTOR(array_dimension_vector, dimensions); struct array_dimension *dimension; Dwarf_Die child; int r = dwarf_child(die, &child); @@ -5853,7 +6370,8 @@ drgn_dwarf_formal_parameter_thunk_fn(struct drgn_object *res, void *arg_) if (err) return err; - err = drgn_object_set_absent(res, qualified_type, 0); + err = drgn_object_set_absent(res, qualified_type, + DRGN_ABSENCE_REASON_OTHER, 0); if (err) return err; } @@ -6623,20 +7141,14 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, &file->module->dwarf.datarel_base); } - err = drgn_elf_file_cache_section(file, scn); - if (err) - return err; - - _cleanup_(drgn_dwarf_cie_vector_deinit) - struct drgn_dwarf_cie_vector cies = VECTOR_INIT; - _cleanup_(drgn_dwarf_fde_vector_deinit) - struct drgn_dwarf_fde_vector fdes = VECTOR_INIT; - _cleanup_(drgn_dwarf_cie_map_deinit) - struct drgn_dwarf_cie_map cie_map = HASH_TABLE_INIT; + VECTOR(drgn_dwarf_cie_vector, cies); + VECTOR(drgn_dwarf_fde_vector, fdes); + HASH_TABLE(drgn_dwarf_cie_map, cie_map); - Elf_Data *data = file->scn_data[scn]; struct drgn_elf_file_section_buffer buffer; - drgn_elf_file_section_buffer_init_index(&buffer, file, scn); + err = drgn_elf_file_section_buffer_read(&buffer, file, scn); + if (err) + return err; while (binary_buffer_has_next(&buffer.bb)) { uint32_t tmp; if ((err = binary_buffer_next_u32(&buffer.bb, &tmp))) @@ -6688,13 +7200,13 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, size_t pointer_offset = (buffer.bb.pos - (is_64_bit ? 8 : 4) - - (char *)data->d_buf); + - (char *)buffer.data->d_buf); if (cie_pointer > pointer_offset) { return binary_buffer_error(&buffer.bb, "CIE pointer is out of bounds"); } cie_pointer = pointer_offset - cie_pointer; - } else if (cie_pointer > data->d_size) { + } else if (cie_pointer > buffer.data->d_size) { return binary_buffer_error(&buffer.bb, "CIE pointer is out of bounds"); } @@ -6753,7 +7265,8 @@ static struct drgn_error *drgn_parse_dwarf_cfi(struct drgn_dwarf_cfi *cfi, } buffer.bb.pos = buffer.bb.end; - buffer.bb.end = (const char *)data->d_buf + data->d_size; + buffer.bb.end = (const char *)buffer.data->d_buf + + buffer.data->d_size; } drgn_dwarf_cie_vector_shrink_to_fit(&cies); @@ -7155,6 +7668,11 @@ drgn_eval_dwarf_cfi(struct drgn_elf_file *file, enum drgn_section_index scn, goto set_reg; } fallthrough; + case DW_CFA_GNU_args_size: + // We have no use for this. Skip it. + if ((err = binary_buffer_skip_leb128(&buffer.bb))) + goto out; + break; default: err = binary_buffer_error(&buffer.bb, "unknown DWARF CFI opcode %#" PRIx8, @@ -7261,8 +7779,7 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, void *buf, size_t size) { struct drgn_error *err; - _cleanup_(uint64_vector_deinit) struct uint64_vector stack = - VECTOR_INIT; + VECTOR(uint64_vector, stack); if (rule->push_cfa) { struct optional_uint64 cfa = drgn_register_state_get_cfa(regs); @@ -7277,8 +7794,11 @@ drgn_eval_cfi_dwarf_expression(struct drgn_program *prog, drgn_dwarf_expression_context_init(&ctx, prog, file, NULL, NULL, regs, rule->expr, rule->expr_size); err = drgn_eval_dwarf_expression(&ctx, &stack, &remaining_ops); - if (err) + if (err) { + if (err == &drgn_unknown_dwarf_opcode) + err = &drgn_not_found; return err; + } if (binary_buffer_has_next(&ctx.bb)) { uint8_t opcode; err = binary_buffer_next_u8(&ctx.bb, &opcode); diff --git a/libdrgn/dwarf_info.h b/libdrgn/dwarf_info.h index 81761efcd..90576aea5 100644 --- a/libdrgn/dwarf_info.h +++ b/libdrgn/dwarf_info.h @@ -196,6 +196,12 @@ struct drgn_dwarf_info { struct drgn_dwarf_specification_map specifications; /** Indexed compilation units. */ struct drgn_dwarf_index_cu_vector index_cus; + /** + * Lookup table for indexed compilation units sorted on buffer address. + * + * Size is equal to that of @ref index_cus. + */ + struct drgn_dwarf_index_cu_lookup *index_cu_lookup; /** * Cache of parsed types. @@ -216,39 +222,7 @@ struct drgn_dwarf_info { void drgn_dwarf_info_init(struct drgn_debug_info *dbinfo); void drgn_dwarf_info_deinit(struct drgn_debug_info *dbinfo); -/** - * State tracked while indexing new DWARF information in a @ref drgn_dwarf_info. - */ -struct drgn_dwarf_index_state { - struct drgn_debug_info *dbinfo; - /** Per-thread arrays of CUs to be indexed. */ - struct drgn_dwarf_index_cu_vector *cus; -}; - -/** - * Initialize state for indexing new DWARF information. - * - * @return @c true on success, @c false on failure to allocate memory. - */ -bool drgn_dwarf_index_state_init(struct drgn_dwarf_index_state *state, - struct drgn_debug_info *dbinfo); - -/** Deinitialize state for indexing new DWARF information. */ -void drgn_dwarf_index_state_deinit(struct drgn_dwarf_index_state *state); - -/** Read a @ref drgn_elf_file to index its DWARF information. */ -struct drgn_error * -drgn_dwarf_index_read_file(struct drgn_dwarf_index_state *state, - struct drgn_elf_file *file); - -/** - * Index new DWARF information. - * - * This should be called once all files have been read with @ref - * drgn_dwarf_index_read_file() to finish indexing those files. - */ -struct drgn_error * -drgn_dwarf_info_update_index(struct drgn_dwarf_index_state *state); +struct drgn_error *drgn_dwarf_info_update_index(struct drgn_debug_info *dbinfo); /** * Find the DWARF DIEs in a @ref drgn_module for the scope containing a given diff --git a/libdrgn/elf_file.c b/libdrgn/elf_file.c index bf5952b6b..cbc81a6e5 100644 --- a/libdrgn/elf_file.c +++ b/libdrgn/elf_file.c @@ -3,13 +3,16 @@ #include #include +#include #include #include #include #include #include +#include #include "array.h" +#include "debug_info.h" #include "drgn_internal.h" #include "elf_file.h" #include "error.h" @@ -35,6 +38,16 @@ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret) return NULL; } +void truncate_elf_string_data(Elf_Data *data) +{ + const char *buf = data->d_buf; + const char *nul = memrchr(buf, '\0', data->d_size); + if (nul) + data->d_size = nul - buf + 1; + else + data->d_size = 0; +} + #include "drgn_section_name_to_index.inc" enum drgn_dwarf_file_type { @@ -45,163 +58,520 @@ enum drgn_dwarf_file_type { }; struct drgn_error *drgn_elf_file_create(struct drgn_module *module, - const char *path, Elf *elf, - struct drgn_elf_file **ret) + const char *path, int fd, char *image, + Elf *elf, struct drgn_elf_file **ret) { - struct drgn_error *err; + if (elf_kind(elf) != ELF_K_ELF) + return drgn_error_create(DRGN_ERROR_OTHER, "not an ELF file"); + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); if (!ehdr) return drgn_error_libelf(); - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - struct drgn_elf_file *file = calloc(1, sizeof(*file)); + _cleanup_free_ struct drgn_elf_file *file = calloc(1, sizeof(*file)); if (!file) return &drgn_enomem; - file->module = module; - file->path = path; - file->elf = elf; - drgn_platform_from_elf(ehdr, &file->platform); - // We mimic libdw's logic for choosing debug sections: we either use all - // .debug_* or .zdebug_* sections (DRGN_DWARF_FILE_PLAIN), all - // .debug_*.dwo or .zdebug_*.dwo sections (DRGN_DWARF_FILE_DWO), or all - // .gnu.debuglto_.debug_* sections (DRGN_DWARF_FILE_GNU_LTO), in that - // order of preference. - enum drgn_dwarf_file_type dwarf_file_type = DRGN_DWARF_FILE_NONE; - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto err; - } - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) { - err = drgn_error_libelf(); - goto err; - } + if (ehdr->e_type == ET_EXEC || + ehdr->e_type == ET_DYN || + ehdr->e_type == ET_REL) { + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); + + bool has_sections = false; + bool has_alloc_section = false; + // We mimic libdw's logic for choosing debug sections: we either + // use all .debug_* or .zdebug_* sections + // (DRGN_DWARF_FILE_PLAIN), all .debug_*.dwo or .zdebug_*.dwo + // sections (DRGN_DWARF_FILE_DWO), or all .gnu.debuglto_.debug_* + // sections (DRGN_DWARF_FILE_GNU_LTO), in that order of + // preference. + enum drgn_dwarf_file_type dwarf_file_type = DRGN_DWARF_FILE_NONE; + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + has_sections = true; + if (shdr->sh_type != SHT_NOBITS && + shdr->sh_type != SHT_NOTE && + (shdr->sh_flags & SHF_ALLOC)) + has_alloc_section = true; - enum drgn_dwarf_file_type dwarf_section_type; - if (strcmp(scnname, ".debug_cu_index") == 0 || - strcmp(scnname, ".debug_tu_index") == 0) { - dwarf_section_type = DRGN_DWARF_FILE_DWO; - } else if (strstartswith(scnname, ".debug_") || - strstartswith(scnname, ".zdebug_")) { - if (strcmp(scnname + strlen(scnname) - 4, ".dwo") == 0) + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); + + enum drgn_dwarf_file_type dwarf_section_type; + if (strcmp(scnname, ".debug_cu_index") == 0 || + strcmp(scnname, ".debug_tu_index") == 0) { dwarf_section_type = DRGN_DWARF_FILE_DWO; - else - dwarf_section_type = DRGN_DWARF_FILE_PLAIN; - } else if (strstartswith(scnname, ".gnu.debuglto_.debug")) { - dwarf_section_type = DRGN_DWARF_FILE_GNU_LTO; - } else { - dwarf_section_type = DRGN_DWARF_FILE_NONE; + } else if (strstartswith(scnname, ".debug_") || + strstartswith(scnname, ".zdebug_")) { + if (strcmp(scnname + strlen(scnname) - 4, ".dwo") == 0) + dwarf_section_type = DRGN_DWARF_FILE_DWO; + else + dwarf_section_type = DRGN_DWARF_FILE_PLAIN; + } else if (strstartswith(scnname, ".gnu.debuglto_.debug")) { + dwarf_section_type = DRGN_DWARF_FILE_GNU_LTO; + } else { + dwarf_section_type = DRGN_DWARF_FILE_NONE; + } + dwarf_file_type = max(dwarf_file_type, dwarf_section_type); } - dwarf_file_type = max(dwarf_file_type, dwarf_section_type); - } - scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto err; - } + scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); - if (shdr->sh_type != SHT_PROGBITS) - continue; + if (shdr->sh_type != SHT_PROGBITS) + continue; - const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) { - err = drgn_error_libelf(); - goto err; - } + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return drgn_error_libelf(); - enum drgn_section_index index; - if (strstartswith(scnname, ".debug_") || - strstartswith(scnname, ".zdebug_")) { - const char *subname; - if (strstartswith(scnname, ".zdebug_")) - subname = scnname + sizeof(".zdebug_") - 1; - else - subname = scnname + sizeof(".debug_") - 1; - size_t len = strlen(subname); - if (len >= 4 - && strcmp(subname + len - 4, ".dwo") == 0) { - if (dwarf_file_type != DRGN_DWARF_FILE_DWO) + enum drgn_section_index index; + if (strstartswith(scnname, ".debug_") || + strstartswith(scnname, ".zdebug_")) { + const char *subname; + if (strstartswith(scnname, ".zdebug_")) + subname = scnname + sizeof(".zdebug_") - 1; + else + subname = scnname + sizeof(".debug_") - 1; + size_t len = strlen(subname); + if (len >= 4 + && strcmp(subname + len - 4, ".dwo") == 0) { + if (dwarf_file_type != DRGN_DWARF_FILE_DWO) + continue; + len -= 4; + } else if (dwarf_file_type != DRGN_DWARF_FILE_PLAIN) { continue; - len -= 4; - } else if (dwarf_file_type != DRGN_DWARF_FILE_PLAIN) { - continue; + } + index = drgn_debug_section_name_to_index(subname, len); + } else if (strstartswith(scnname, ".gnu.debuglto_.debug_")) { + if (dwarf_file_type != DRGN_DWARF_FILE_GNU_LTO) + continue; + const char *subname = + scnname + sizeof(".gnu.debuglto_.debug_") - 1; + index = drgn_debug_section_name_to_index(subname, + strlen(subname)); + } else if (strcmp(scnname, ".init.text") == 0) { + // We consider a file to be vmlinux if it has an + // .init.text section and is not relocatable + // (which excludes kernel modules). + // Keep this in sync with elf_is_vmlinux(). + file->is_vmlinux = ehdr->e_type != ET_REL; + index = DRGN_SECTION_INDEX_NUM; + } else { + index = drgn_non_debug_section_name_to_index(scnname); } - index = drgn_debug_section_name_to_index(subname, len); - } else if (strstartswith(scnname, ".gnu.debuglto_.debug_")) { - if (dwarf_file_type != DRGN_DWARF_FILE_GNU_LTO) - continue; - const char *subname = - scnname + sizeof(".gnu.debuglto_.debug_") - 1; - index = drgn_debug_section_name_to_index(subname, - strlen(subname)); + if (index < DRGN_SECTION_INDEX_NUM && !file->scns[index]) + file->scns[index] = scn; + } + + if (ehdr->e_type == ET_REL) { + // We consider a relocatable file "loadable" if it has + // any allocated sections. + file->is_loadable = has_alloc_section; + file->is_relocatable = file->needs_relocation = true; } else { - index = drgn_non_debug_section_name_to_index(scnname); + // We consider executable and shared object files + // loadable if they have any loadable segments, and + // either no sections or at least one allocated section. + bool has_loadable_segment = false; + size_t phnum; + if (elf_getphdrnum(elf, &phnum) != 0) + return drgn_error_libelf(); + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = + gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return drgn_error_libelf(); + if (phdr->p_type == PT_LOAD) { + has_loadable_segment = true; + break; + } + } + file->is_loadable = + has_loadable_segment && + (!has_sections || has_alloc_section); } - if (index < DRGN_SECTION_INDEX_NUM && !file->scns[index]) - file->scns[index] = scn; } - *ret = file; - return NULL; -err: - free(file); - return err; + file->module = module; + file->path = strdup(path); + if (!file->path) + return &drgn_enomem; + file->image = image; + file->fd = fd; + file->elf = elf; + drgn_platform_from_elf(ehdr, &file->platform); + *ret = no_cleanup_ptr(file); + return NULL; } void drgn_elf_file_destroy(struct drgn_elf_file *file) { - free(file); + if (file) { + dwarf_end(file->_dwarf); + elf_end(file->elf); + if (file->fd >= 0) + close(file->fd); + free(file->image); + free(file->path); + free(file); + } +} + +static int should_apply_relocation_section(Elf *elf, size_t shstrndx, + const GElf_Shdr *shdr) +{ + if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) + return 0; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return -1; + if (shdr->sh_type == SHT_RELA) { + if (!strstartswith(scnname, ".rela.")) + return 0; + scnname += sizeof(".rela.") - 1; + } else { + if (!strstartswith(scnname, ".rel.")) + return 0; + scnname += sizeof(".rel.") - 1; + } + return (strstartswith(scnname, "debug_") + || strstartswith(scnname, "orc_")); } -static void truncate_null_terminated_section(Elf_Data *data) +static inline struct drgn_error *get_reloc_sym_value(const void *syms, + size_t num_syms, + const uint64_t *sh_addrs, + size_t shdrnum, + bool is_64_bit, + bool bswap, + uint32_t r_sym, + uint64_t *ret) { - if (data) { - const char *buf = data->d_buf; - const char *nul = memrchr(buf, '\0', data->d_size); - if (nul) - data->d_size = nul - buf + 1; - else - data->d_size = 0; + if (r_sym >= num_syms) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid ELF relocation symbol"); } + uint16_t st_shndx; + uint64_t st_value; + if (is_64_bit) { + const Elf64_Sym *sym = (Elf64_Sym *)syms + r_sym; + memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); + memcpy(&st_value, &sym->st_value, sizeof(st_value)); + if (bswap) { + st_shndx = bswap_16(st_shndx); + st_value = bswap_64(st_value); + } + } else { + const Elf32_Sym *sym = (Elf32_Sym *)syms + r_sym; + memcpy(&st_shndx, &sym->st_shndx, sizeof(st_shndx)); + uint32_t st_value32; + memcpy(&st_value32, &sym->st_value, sizeof(st_value32)); + if (bswap) { + st_shndx = bswap_16(st_shndx); + st_value32 = bswap_32(st_value32); + } + st_value = st_value32; + } + if (st_shndx >= shdrnum) { + return drgn_error_create(DRGN_ERROR_OTHER, + "invalid ELF symbol section index"); + } + *ret = sh_addrs[st_shndx] + st_value; + return NULL; } -struct drgn_error *drgn_elf_file_precache_sections(struct drgn_elf_file *file) +static struct drgn_error * +apply_elf_relas(const struct drgn_relocating_section *relocating, + Elf_Data *reloc_data, Elf_Data *symtab_data, + const uint64_t *sh_addrs, size_t shdrnum, + const struct drgn_platform *platform) { struct drgn_error *err; - for (size_t i = 0; i < DRGN_SECTION_INDEX_NUM_PRECACHE; i++) { - if (file->scns[i]) { - err = read_elf_section(file->scns[i], - &file->scn_data[i]); - if (err) - return err; + bool is_64_bit = drgn_platform_is_64_bit(platform); + bool bswap = drgn_platform_bswap(platform); + apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; + + const void *relocs = reloc_data->d_buf; + size_t reloc_size = is_64_bit ? sizeof(Elf64_Rela) : sizeof(Elf32_Rela); + size_t num_relocs = reloc_data->d_size / reloc_size; + + const void *syms = symtab_data->d_buf; + size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + size_t num_syms = symtab_data->d_size / sym_size; + + for (size_t i = 0; i < num_relocs; i++) { + uint64_t r_offset; + uint32_t r_sym; + uint32_t r_type; + int64_t r_addend; + if (is_64_bit) { + const Elf64_Rela *rela = (Elf64_Rela *)relocs + i; + uint64_t r_info; + memcpy(&r_offset, &rela->r_offset, sizeof(r_offset)); + memcpy(&r_info, &rela->r_info, sizeof(r_info)); + memcpy(&r_addend, &rela->r_addend, sizeof(r_addend)); + if (bswap) { + r_offset = bswap_64(r_offset); + r_info = bswap_64(r_info); + r_addend = bswap_64(r_addend); + } + r_sym = ELF64_R_SYM(r_info); + r_type = ELF64_R_TYPE(r_info); + } else { + const Elf32_Rela *rela32 = (Elf32_Rela *)relocs + i; + uint32_t r_offset32; + uint32_t r_info32; + int32_t r_addend32; + memcpy(&r_offset32, &rela32->r_offset, sizeof(r_offset32)); + memcpy(&r_info32, &rela32->r_info, sizeof(r_info32)); + memcpy(&r_addend32, &rela32->r_addend, sizeof(r_addend32)); + if (bswap) { + r_offset32 = bswap_32(r_offset32); + r_info32 = bswap_32(r_info32); + r_addend32 = bswap_32(r_addend32); + } + r_offset = r_offset32; + r_sym = ELF32_R_SYM(r_info32); + r_type = ELF32_R_TYPE(r_info32); + r_addend = r_addend32; } + uint64_t sym_value; + err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, + is_64_bit, bswap, r_sym, &sym_value); + if (err) + return err; + + err = apply_elf_reloc(relocating, r_offset, r_type, &r_addend, + sym_value); + if (err) + return err; } + return NULL; +} - /* - * Truncate any extraneous bytes so that we can assume that a pointer - * within .debug_{,line_}str is always null-terminated. - */ - truncate_null_terminated_section(file->scn_data[DRGN_SCN_DEBUG_STR]); - truncate_null_terminated_section(file->alt_debug_str_data); +static struct drgn_error * +apply_elf_rels(const struct drgn_relocating_section *relocating, + Elf_Data *reloc_data, Elf_Data *symtab_data, + const uint64_t *sh_addrs, size_t shdrnum, + const struct drgn_platform *platform) +{ + struct drgn_error *err; + + bool is_64_bit = drgn_platform_is_64_bit(platform); + bool bswap = drgn_platform_bswap(platform); + apply_elf_reloc_fn *apply_elf_reloc = platform->arch->apply_elf_reloc; + + const void *relocs = reloc_data->d_buf; + size_t reloc_size = is_64_bit ? sizeof(Elf64_Rel) : sizeof(Elf32_Rel); + size_t num_relocs = reloc_data->d_size / reloc_size; + + const void *syms = symtab_data->d_buf; + size_t sym_size = is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + size_t num_syms = symtab_data->d_size / sym_size; + + for (size_t i = 0; i < num_relocs; i++) { + uint64_t r_offset; + uint32_t r_sym; + uint32_t r_type; + if (is_64_bit) { + const Elf64_Rel *rel = (Elf64_Rel *)relocs + i; + uint64_t r_info; + memcpy(&r_offset, &rel->r_offset, sizeof(r_offset)); + memcpy(&r_info, &rel->r_info, sizeof(r_info)); + if (bswap) { + r_offset = bswap_64(r_offset); + r_info = bswap_64(r_info); + } + r_sym = ELF64_R_SYM(r_info); + r_type = ELF64_R_TYPE(r_info); + } else { + const Elf32_Rel *rel32 = (Elf32_Rel *)relocs + i; + uint32_t r_offset32; + uint32_t r_info32; + memcpy(&r_offset32, &rel32->r_offset, sizeof(r_offset32)); + memcpy(&r_info32, &rel32->r_info, sizeof(r_info32)); + if (bswap) { + r_offset32 = bswap_32(r_offset32); + r_info32 = bswap_32(r_info32); + } + r_offset = r_offset32; + r_sym = ELF32_R_SYM(r_info32); + r_type = ELF32_R_TYPE(r_info32); + } + uint64_t sym_value; + err = get_reloc_sym_value(syms, num_syms, sh_addrs, shdrnum, + is_64_bit, bswap, r_sym, &sym_value); + if (err) + return err; + + err = apply_elf_reloc(relocating, r_offset, r_type, NULL, + sym_value); + if (err) + return err; + } return NULL; } struct drgn_error * -drgn_elf_file_cache_section(struct drgn_elf_file *file, enum drgn_section_index scn) +drgn_elf_file_apply_relocations(struct drgn_elf_file *file) { - if (file->scn_data[scn]) + struct drgn_error *err; + + if (!file->needs_relocation) return NULL; - return read_elf_section(file->scns[scn], &file->scn_data[scn]); + + if (!file->platform.arch->apply_elf_reloc) { + return drgn_error_format(DRGN_ERROR_NOT_IMPLEMENTED, + "relocation support is not implemented for %s architecture", + file->platform.arch->name); + } + + Elf *elf = file->elf; + size_t shdrnum; + if (elf_getshdrnum(elf, &shdrnum)) + return drgn_error_libelf(); + _cleanup_free_ uint64_t *sh_addrs = + calloc(shdrnum, sizeof(sh_addrs[0])); + if (!sh_addrs && shdrnum > 0) + return &drgn_enomem; + + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + sh_addrs[elf_ndxscn(scn)] = shdr->sh_addr; + } + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return drgn_error_libelf(); + + Elf_Scn *reloc_scn = NULL; + while ((reloc_scn = elf_nextscn(elf, reloc_scn))) { + GElf_Shdr *reloc_shdr, reloc_shdr_mem; + reloc_shdr = gelf_getshdr(reloc_scn, &reloc_shdr_mem); + if (!reloc_shdr) + return drgn_error_libelf(); + + int r = should_apply_relocation_section(elf, shstrndx, + reloc_shdr); + if (r < 0) + return drgn_error_libelf(); + if (r) { + scn = elf_getscn(elf, reloc_shdr->sh_info); + if (!scn) + return drgn_error_libelf(); + GElf_Shdr *shdr, shdr_mem; + shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + if (shdr->sh_type == SHT_NOBITS) + continue; + + Elf_Scn *symtab_scn = elf_getscn(elf, + reloc_shdr->sh_link); + if (!symtab_scn) + return drgn_error_libelf(); + shdr = gelf_getshdr(symtab_scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + if (shdr->sh_type == SHT_NOBITS) { + return drgn_error_create(DRGN_ERROR_OTHER, + "relocation symbol table has no data"); + } + + Elf_Data *data, *reloc_data, *symtab_data; + if ((err = read_elf_section(scn, &data)) + || (err = read_elf_section(reloc_scn, &reloc_data)) + || (err = read_elf_section(symtab_scn, &symtab_data))) + return err; + + struct drgn_relocating_section relocating = { + .buf = data->d_buf, + .buf_size = data->d_size, + .addr = sh_addrs[elf_ndxscn(scn)], + .bswap = drgn_platform_bswap(&file->platform), + }; + + if (reloc_shdr->sh_type == SHT_RELA) { + err = apply_elf_relas(&relocating, reloc_data, + symtab_data, sh_addrs, + shdrnum, &file->platform); + } else { + err = apply_elf_rels(&relocating, reloc_data, + symtab_data, sh_addrs, + shdrnum, &file->platform); + } + if (err) + return err; + } + } + file->needs_relocation = false; + return NULL; +} + +struct drgn_error *drgn_elf_file_read_section(struct drgn_elf_file *file, + enum drgn_section_index scn, + Elf_Data **ret) +{ + struct drgn_error *err; + if (!file->scn_data[scn]) { + err = drgn_elf_file_apply_relocations(file); + if (err) + return err; + err = read_elf_section(file->scns[scn], &file->scn_data[scn]); + if (err) + return err; + if (scn == DRGN_SCN_DEBUG_STR) + truncate_elf_string_data(file->scn_data[scn]); + } + *ret = file->scn_data[scn]; + return NULL; +} + +struct drgn_error *drgn_elf_file_get_dwarf(struct drgn_elf_file *file, + Dwarf **ret) +{ + struct drgn_error *err; + if (!file->_dwarf) { + struct drgn_elf_file *supplementary_file = + file->module->supplementary_debug_file; + if (supplementary_file) { + supplementary_file->_dwarf = + dwarf_begin_elf(supplementary_file->elf, + DWARF_C_READ, NULL); + if (!supplementary_file->_dwarf) + return drgn_error_libdw(); + } + + err = drgn_elf_file_apply_relocations(file); + if (err) + return err; + + file->_dwarf = dwarf_begin_elf(file->elf, DWARF_C_READ, NULL); + if (!file->_dwarf) + return drgn_error_libdw(); + + if (supplementary_file) + dwarf_setalt(file->_dwarf, supplementary_file->_dwarf); + } + *ret = file->_dwarf; + return NULL; } struct drgn_error * @@ -281,3 +651,213 @@ struct drgn_error *drgn_elf_file_section_buffer_error(struct binary_buffer *bb, return drgn_elf_file_section_error(buffer->file, buffer->scn, buffer->data, ptr, message); } + +static bool elf_address_range_from_first_and_last_segment(Elf *elf, + uint64_t *start_ret, + uint64_t *end_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return false; + + uint64_t start; + GElf_Phdr phdr_mem, *phdr; + size_t i; + for (i = 0; i < phnum; i++) { + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return false; + if (phdr->p_type == PT_LOAD) { + start = phdr->p_vaddr; + break; + } + } + if (i >= phnum) { + *start_ret = *end_ret = 0; + return true; + } + + for (i = phnum; i-- > 0;) { + phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return false; + + if (phdr->p_type == PT_LOAD) { + uint64_t end = phdr->p_vaddr + phdr->p_memsz; + if (start < end) { + *start_ret = start; + *end_ret = end; + return true; + } + break; + } + } + *start_ret = *end_ret = 0; + return true; +} + +static bool elf_address_range_from_min_and_max_segment(Elf *elf, + uint64_t *start_ret, + uint64_t *end_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return false; + + uint64_t start = UINT64_MAX, end = 0; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return false; + if (phdr->p_type == PT_LOAD) { + start = min(start, phdr->p_vaddr); + end = max(end, phdr->p_vaddr + phdr->p_memsz); + } + } + if (start < end) { + *start_ret = start; + *end_ret = end; + } else { + *start_ret = *end_ret = 0; + } + return true; +} + +bool drgn_elf_file_address_range(struct drgn_elf_file *file, + uint64_t *start_ret, uint64_t *end_ret) +{ + // The ELF specification says that "loadable segment entries in the + // program header table appear in ascending order, sorted on the p_vaddr + // member." However, this is not the case in practice. + // + // vmlinux on some architectures contains special segments whose + // addresses are not meaningful and break the sorted order (e.g., + // segments corresponding to the .data..percpu section on x86-64 and the + // .vectors and .stubs sections on Arm). It appears that segments in + // vmlinux are sorted other than those special segments, and the special + // segments are never the first or last segment. + // + // Userspace ELF loaders disagree about whether to assume sorted order: + // + // - As of Linux kernel commit 10b19249192a ("ELF: fix overflow in total + // mapping size calculation") (in v5.18), the Linux kernel DOES NOT + // assume sorting. Before that, it DOES. + // - glibc as of v2.40 DOES assume sorting; see _dl_map_object_from_fd() + // in elf/dl-load.c and _dl_map_segments() in elf/dl-map-segments.h. + // - musl as of v1.2.5 DOES NOT assume sorting; see map_library() in + // ldso/dynlink.c. + // + // So, we use a heuristic: if the file has an .init.text section, then + // it is probably a vmlinux file, so we assume the sorted order, which + // allows us to ignore the special segments in the middle. + // + // Otherwise, we don't assume the sorted order. + if (file->is_vmlinux) { + return elf_address_range_from_first_and_last_segment(file->elf, + start_ret, + end_ret); + } else { + return elf_address_range_from_min_and_max_segment(file->elf, + start_ret, + end_ret); + } +} + +// Keep this in sync with drgn_elf_file_create(). +int elf_is_vmlinux(Elf *elf) +{ + GElf_Ehdr ehdr_mem, *ehdr = gelf_getehdr(elf, &ehdr_mem); + if (!ehdr) + return -1; + + if (ehdr->e_type == ET_REL) + return 0; + + size_t shstrndx; + if (elf_getshdrstrndx(elf, &shstrndx)) + return -1; + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return -1; + + if (shdr->sh_type != SHT_PROGBITS) + continue; + + const char *scnname = elf_strptr(elf, shstrndx, shdr->sh_name); + if (!scnname) + return -1; + + if (strcmp(scnname, ".init.text") == 0) + return 1; + } + return 0; +} + +ssize_t elf_vmlinux_release(Elf *elf, const char **ret) +{ + Elf_Scn *scn = NULL; + while ((scn = elf_nextscn(elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return -1; + + if (shdr->sh_type != SHT_SYMTAB || shdr->sh_entsize == 0) + continue; + + Elf_Data *data = elf_getdata(scn, NULL); + if (!data) + return -1; + + size_t num_syms = shdr->sh_size / shdr->sh_entsize; + for (size_t i = 0; i < num_syms; i++) { + GElf_Sym sym_mem, *sym = gelf_getsym(data, i, &sym_mem); + if (!sym) + return -1; + + static const char prefix[] = "Linux version "; + + if (GELF_ST_TYPE(sym->st_info) != STT_OBJECT + || GELF_ST_BIND(sym->st_info) != STB_GLOBAL + || sym->st_size < sizeof(prefix) - 1) + continue; + + const char *name = elf_strptr(elf, shdr->sh_link, + sym->st_name); + if (!name) + return -1; + if (strcmp(name, "linux_banner") != 0) + continue; + + GElf_Shdr sym_shdr_mem, *sym_shdr = + gelf_getshdr(elf_getscn(elf, sym->st_shndx), + &sym_shdr_mem); + if (!sym_shdr) + return -1; + + int64_t offset = sym_shdr->sh_offset + + sym->st_value - sym_shdr->sh_addr; + Elf_Data *banner_data = + elf_getdata_rawchunk(elf, offset, sym->st_size, + ELF_T_BYTE); + if (!banner_data) + return -1; + + if (memcmp(banner_data->d_buf, prefix, + sizeof(prefix) - 1) != 0) + return 0; + + const char *release = (const char *)banner_data->d_buf + + (sizeof(prefix) - 1); + const char *space = + memchr(release, ' ', + banner_data->d_size - (sizeof(prefix) - 1)); + if (!space) + return 0; + *ret = release; + return space - release; + } + } + return 0; +} diff --git a/libdrgn/elf_file.h b/libdrgn/elf_file.h index ca2b6f3eb..0eea9fa7f 100644 --- a/libdrgn/elf_file.h +++ b/libdrgn/elf_file.h @@ -43,6 +43,14 @@ struct drgn_module; */ struct drgn_error *read_elf_section(Elf_Scn *scn, Elf_Data **ret); +/** + * Truncate any bytes beyond the last null character in an ELF string table. + * + * This sets `data->d_size` so that any string table index less than + * `data->d_size` is guaranteed to be valid. + */ +void truncate_elf_string_data(Elf_Data *data); + static inline bool elf_data_contains_ptr(Elf_Data *data, const void *ptr) { uintptr_t bufi = (uintptr_t)data->d_buf; @@ -55,11 +63,38 @@ struct drgn_elf_file { /** Module using this file. */ struct drgn_module *module; /** Filesystem path to this file. */ - const char *path; + char *path; + /** + * Memory image backing @ref elf. + * + * @c NULL if not backed by a memory image. + */ + char *image; + /** + * File descriptor backing @ref elf. + * + * -1 if not backed by a file. + */ + int fd; + /** Whether the file is loadable. */ + bool is_loadable; + /** Whether the file is relocatable. */ + bool is_relocatable; + /** Whether the file still need to have relocations applied. */ + bool needs_relocation; + /** Whether the file is a Linux kernel image (`vmlinux`). */ + bool is_vmlinux; /** libelf handle. */ Elf *elf; - /** libdw handle if we're using DWARF information from this file. */ - Dwarf *dwarf; + /** + * libdw handle. + * + * @c NULL if not yet created. + * + * Don't access this directly. Get it with @ref + * drgn_elf_file_get_dwarf() instead. + */ + Dwarf *_dwarf; /** * Platform of this file. * @@ -86,16 +121,33 @@ struct drgn_elf_file { Elf_Data *alt_debug_str_data; }; +/** + * Create a @ref drgn_elf_file. + * + * On success, this takes ownership of @p fd, @p image, and @p elf. @p path is + * copied. + */ struct drgn_error *drgn_elf_file_create(struct drgn_module *module, - const char *path, Elf *elf, - struct drgn_elf_file **ret); + const char *path, int fd, char *image, + Elf *elf, struct drgn_elf_file **ret); void drgn_elf_file_destroy(struct drgn_elf_file *file); -struct drgn_error *drgn_elf_file_precache_sections(struct drgn_elf_file *file); - +/** Apply ELF relocations to the file if needed. */ struct drgn_error * -drgn_elf_file_cache_section(struct drgn_elf_file *file, enum drgn_section_index scn); +drgn_elf_file_apply_relocations(struct drgn_elf_file *file); + +/** + * Read an indexed ELF section. + * + * This applies ELF relocations to the file first if needed. + */ +struct drgn_error *drgn_elf_file_read_section(struct drgn_elf_file *file, + enum drgn_section_index scn, + Elf_Data **ret); + +struct drgn_error *drgn_elf_file_get_dwarf(struct drgn_elf_file *file, + Dwarf **ret); static inline bool drgn_elf_file_is_little_endian(const struct drgn_elf_file *file) @@ -108,6 +160,12 @@ static inline bool drgn_elf_file_bswap(const struct drgn_elf_file *file) return drgn_platform_bswap(&file->platform); } +static inline bool +drgn_elf_file_is_64_bit(const struct drgn_elf_file *file) +{ + return drgn_platform_is_64_bit(&file->platform); +} + static inline uint8_t drgn_elf_file_address_size(const struct drgn_elf_file *file) { @@ -120,6 +178,12 @@ drgn_elf_file_address_mask(const struct drgn_elf_file *file) return drgn_platform_address_mask(&file->platform); } +static inline bool drgn_elf_file_has_dwarf(const struct drgn_elf_file *file) +{ + return (file->scns[DRGN_SCN_DEBUG_INFO] + && file->scns[DRGN_SCN_DEBUG_ABBREV]); +} + struct drgn_error * drgn_elf_file_section_error(struct drgn_elf_file *file, Elf_Scn *scn, Elf_Data *data, const char *ptr, @@ -156,6 +220,10 @@ drgn_elf_file_section_buffer_init(struct drgn_elf_file_section_buffer *buffer, buffer->data = data; } +/** + * Initialize a @ref binary_buffer for an indexed ELF section that has already + * been read. + */ static inline void drgn_elf_file_section_buffer_init_index(struct drgn_elf_file_section_buffer *buffer, struct drgn_elf_file *file, @@ -165,6 +233,47 @@ drgn_elf_file_section_buffer_init_index(struct drgn_elf_file_section_buffer *buf file->scn_data[scn]); } +/** + * Read an indexed ELF section (applying ELF relocations if needed) and + * initialize a @ref binary_buffer for it. + */ +static inline struct drgn_error * +drgn_elf_file_section_buffer_read(struct drgn_elf_file_section_buffer *buffer, + struct drgn_elf_file *file, + enum drgn_section_index scn) +{ + Elf_Data *data; + struct drgn_error *err = drgn_elf_file_read_section(file, scn, &data); + if (err) + return err; + drgn_elf_file_section_buffer_init(buffer, file, file->scns[scn], data); + return NULL; +} + +/** + * Return the virtual address range of an ELF file. + * + * @param[out] start_ret Minimum virtual address (inclusive). + * @param[out] end_ret Maximum virtual address (exclusive). + */ +bool drgn_elf_file_address_range(struct drgn_elf_file *file, + uint64_t *start_ret, uint64_t *end_ret); + +/** + * Return whether an ELF file is a vmlinux file. + * + * @return > 0 if the file is vmlinux, 0 if it is not, < 0 on libelf error. + */ +int elf_is_vmlinux(Elf *elf); + +/** + * Get the Linux release from a vmlinux file. + * + * @param[out] ret Returned release. + * @return Length of @p ret on success, 0 if not found, < 0 on libelf error. + */ +ssize_t elf_vmlinux_release(Elf *elf, const char **ret); + /** @} */ #endif /* DRGN_ELF_FILE_H */ diff --git a/libdrgn/elf_notes.c b/libdrgn/elf_notes.c index bcd36449d..d9679f3cc 100644 --- a/libdrgn/elf_notes.c +++ b/libdrgn/elf_notes.c @@ -55,6 +55,45 @@ bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, return true; } +int find_elf_note(Elf *elf, const char *name, uint32_t type, const void **ret, + size_t *size_ret) +{ + size_t phnum; + if (elf_getphdrnum(elf, &phnum)) + return -1; + size_t name_size = strlen(name) + 1; + for (size_t i = 0; i < phnum; i++) { + GElf_Phdr phdr_mem, *phdr = gelf_getphdr(elf, i, &phdr_mem); + if (!phdr) + return -1; + if (phdr->p_type != PT_NOTE) + continue; + Elf_Data *data = elf_getdata_rawchunk(elf, phdr->p_offset, + phdr->p_filesz, + note_header_type(phdr->p_align)); + if (!data) + return -1; + GElf_Nhdr nhdr; + size_t offset = 0, name_offset, desc_offset; + while (offset < data->d_size && + (offset = gelf_getnote(data, offset, &nhdr, + &name_offset, + &desc_offset))) { + const char *note_name = (char *)data->d_buf + name_offset; + if (nhdr.n_namesz == name_size + && memcmp(note_name, name, name_size) == 0 + && nhdr.n_type == type) { + *ret = (char *)data->d_buf + desc_offset; + *size_ret = nhdr.n_descsz; + return 0; + } + } + } + *ret = NULL; + *size_ret = 0; + return 0; +} + size_t parse_gnu_build_id_from_notes(const void *buf, size_t size, unsigned int align, bool bswap, const void **ret) diff --git a/libdrgn/elf_notes.h b/libdrgn/elf_notes.h index 3f198f6ee..dc8cb9936 100644 --- a/libdrgn/elf_notes.h +++ b/libdrgn/elf_notes.h @@ -70,6 +70,16 @@ bool next_elf_note(const void **p, size_t *size, unsigned int align, bool bswap, GElf_Nhdr *nhdr_ret, const char **name_ret, const void **desc_ret); +/** + * Find an ELF note matching the given name and type. + * + * Note that this currently only checks segments, not sections. + * + * @return 0 on success, -1 on libelf error. + */ +int find_elf_note(Elf *elf, const char *name, uint32_t type, const void **ret, + size_t *size_ret); + /** * Parse a GNU build ID from a buffer containing note data. diff --git a/libdrgn/elf_symtab.c b/libdrgn/elf_symtab.c new file mode 100644 index 000000000..e04feafcd --- /dev/null +++ b/libdrgn/elf_symtab.c @@ -0,0 +1,683 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include +#ifdef WITH_LZMA +#include +#endif +#include +#include + +#include "cleanup.h" +#include "debug_info.h" +#include "elf_file.h" +#include "elf_symtab.h" +#include "error.h" +#include "log.h" +#include "minmax.h" +#include "serialize.h" +#include "string_builder.h" +#include "util.h" + +static struct drgn_error *find_elf_file_symtab(struct drgn_elf_file *file, + uint64_t bias, + struct drgn_elf_file **file_ret, + uint64_t *bias_ret, + Elf_Scn **scn_ret, + GElf_Word *strtab_idx_ret, + GElf_Word *num_local_symbols_ret, + bool *full_symtab_ret, + Elf_Scn **gnu_debugdata_ret) +{ + Elf_Scn *scn = NULL; + size_t shstrndx; + if (elf_getshdrstrndx(file->elf, &shstrndx)) + return drgn_error_libelf(); + while ((scn = elf_nextscn(file->elf, scn))) { + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return drgn_error_libelf(); + + const char *scnname = elf_strptr(file->elf, shstrndx, shdr->sh_name); + if (scnname && gnu_debugdata_ret && shdr->sh_type == SHT_PROGBITS + && strcmp(".gnu_debugdata", scnname) == 0) + *gnu_debugdata_ret = scn; + + if (shdr->sh_type == SHT_SYMTAB + || shdr->sh_type == SHT_DYNSYM) { + *file_ret = file; + *bias_ret = bias; + *scn_ret = scn; + *strtab_idx_ret = shdr->sh_link; + *num_local_symbols_ret = shdr->sh_info; + if (shdr->sh_type == SHT_SYMTAB) { + *full_symtab_ret = true; + return NULL; + } + } + } + return NULL; +} + +#ifdef WITH_LZMA +static struct drgn_error * +drgn_error_lzma(lzma_ret code) +{ + switch (code) { + case LZMA_MEM_ERROR: + return &drgn_enomem; + case LZMA_OPTIONS_ERROR: + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "lzma: invalid options"); + case LZMA_FORMAT_ERROR: + case LZMA_DATA_ERROR: + case LZMA_BUF_ERROR: + return drgn_error_format(DRGN_ERROR_INVALID_ARGUMENT, + "lzma: format error (%d)", code); + default: + return drgn_error_format(DRGN_ERROR_OTHER, + "lzma: unknown error (%d)", code); + } +} + +static struct drgn_error * +load_gnu_debugdata_file(struct drgn_module *module, Elf_Scn *gnu_debugdata_scn, + struct drgn_elf_file **file_ret) +{ + Elf_Data *gnu_debugdata_data; + struct drgn_error *err; + err = read_elf_section(gnu_debugdata_scn, &gnu_debugdata_data); + if (err) + return err; + + _cleanup_(lzma_end) lzma_stream stream = LZMA_STREAM_INIT; + lzma_ret ret = lzma_stream_decoder(&stream, UINT64_MAX, 0); + if (ret != LZMA_OK) + return drgn_error_lzma(ret); + + stream.next_in = gnu_debugdata_data->d_buf; + stream.avail_in = gnu_debugdata_data->d_size; + + // Use the input buffer size as the initial capacity. We'll expand it as + // needed. + size_t capacity = gnu_debugdata_data->d_size; + _cleanup_free_ void *data = malloc(capacity); + if (!data) + return &drgn_enomem; + + stream.next_out = data; + stream.avail_out = capacity; + + size_t bytes_decoded; + while (1) { + ret = lzma_code(&stream, LZMA_RUN); + if (ret != LZMA_OK && ret != LZMA_STREAM_END) + return drgn_error_lzma(ret); + + bytes_decoded = (char *)stream.next_out - (char *)data; + if (ret == LZMA_STREAM_END) { + void *tmp = realloc(data, bytes_decoded); + if (tmp) + data = tmp; + break; + } else if (__builtin_mul_overflow(capacity, 2U, &capacity)) { + return &drgn_enomem; + } else { + void *tmp = realloc(data, capacity); + if (!tmp) + return &drgn_enomem; + data = tmp; + stream.next_out = (uint8_t *)data + bytes_decoded; + stream.avail_out = capacity - bytes_decoded; + } + } + + STRING_BUILDER(path); + if (!string_builder_appendf(&path, "%s[.gnu_debugdata]", module->loaded_file->path) + || !string_builder_null_terminate(&path)) + return &drgn_enomem; + + Elf *elf = elf_memory(data, bytes_decoded); + if (!elf) + return drgn_error_libelf(); + + err = drgn_elf_file_create(module, path.str, -1, data, elf, file_ret); + if (err) + elf_end(elf); + else + data = NULL; + return err; +} +#else +static struct drgn_error * +load_gnu_debugdata_file(struct drgn_module *module, Elf_Scn *gnu_debugdata_scn, + struct drgn_elf_file **ret) +{ + drgn_log_info(module->prog, + "module \"%s\": .gnu_debugdata is available, but drgn was built without liblzma support", + module->name); + return NULL; +} +#endif + +static struct drgn_error * +set_elf_symtab(struct drgn_elf_symbol_table *symtab, struct drgn_elf_file *file, + uint64_t bias, Elf_Scn *symtab_scn, GElf_Word strtab_idx, + GElf_Word num_local_symbols) +{ + Elf_Scn *strtab_scn = elf_getscn(file->elf, strtab_idx); + if (!strtab_scn) + return drgn_error_libelf(); + + struct drgn_error *err; + Elf_Data *data, *strtab_data; + if ((err = read_elf_section(symtab_scn, &data)) + || (err = read_elf_section(strtab_scn, &strtab_data))) + return err; + + truncate_elf_string_data(strtab_data); + + Elf_Data *shndx_data = NULL; + int shndx_idx = elf_scnshndx(symtab_scn); + if (shndx_idx > 0) { + Elf_Scn *shndx_scn = elf_getscn(file->elf, shndx_idx); + if (!shndx_scn) + return drgn_error_libelf(); + err = read_elf_section(shndx_scn, &shndx_data); + if (err) + return err; + } + + symtab->file = file; + symtab->bias = bias; + symtab->data = data->d_buf; + symtab->num_symbols = + data->d_size + / (drgn_elf_file_is_64_bit(file) + ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym)); + if (num_local_symbols < 1) + num_local_symbols = 1; + if (num_local_symbols > symtab->num_symbols) + num_local_symbols = symtab->num_symbols; + symtab->num_local_symbols = num_local_symbols; + symtab->strtab = strtab_data; + symtab->shndx = shndx_data; + return NULL; +} + +static void +cleanup_elf_file(struct drgn_elf_file **pfile) +{ + if (*pfile) { + drgn_elf_file_destroy(*pfile); + } +} + +static struct drgn_error * +find_module_elf_symtab(struct drgn_module *module) +{ + struct drgn_error *err; + + if (!module->elf_symtab_pending_files) + return NULL; + + if (module->have_full_symtab) { + module->elf_symtab_pending_files = 0; + return NULL; + } + + // The goal is to have the most complete symbol information, which we + // can get from the following, in order of preference: + // 1. A .symtab from the loaded or debug file (i.e. module->full_symtab + // is true) + // 2. A .dynsym from the loaded file, as well as the .symtab from an + // embedded .gnu_debugdata file. (The .gnu_debugdata usually only + // contains complete function symbols, so we prefer #1 where + // possible) + // 3. A .dynsym and no .gnu_debugdata + + struct drgn_elf_file *file = NULL; + uint64_t bias; + Elf_Scn *symtab_scn; + GElf_Word strtab_idx, num_local_symbols; + bool full_symtab = false; + + if (module->elf_symtab_pending_files & DRGN_MODULE_FILE_MASK_DEBUG) { + err = find_elf_file_symtab(module->debug_file, + module->debug_file_bias, &file, + &bias, &symtab_scn, &strtab_idx, + &num_local_symbols, &full_symtab, + NULL); + if (err) + return err; + } + + Elf_Scn *gnu_debugdata_scn = NULL; + if (!full_symtab && + (module->elf_symtab_pending_files & DRGN_MODULE_FILE_MASK_LOADED)) { + err = find_elf_file_symtab(module->loaded_file, + module->loaded_file_bias, &file, + &bias, &symtab_scn, &strtab_idx, + &num_local_symbols, &full_symtab, + &gnu_debugdata_scn); + if (err) + return err; + } + + if (!file && !gnu_debugdata_scn) { + drgn_log_debug(module->prog, "%s: no ELF symbol table", + module->name); + module->elf_symtab_pending_files = 0; + return NULL; + } + + // If we've found a dynamic symbol table, but we already saw a dynamic + // table, don't bother replacing it, unless the new file contains + // .gnu_debugdata (and thus the old one didn't). + if (module->elf_symtab.num_symbols && !full_symtab && !gnu_debugdata_scn) { + module->elf_symtab_pending_files = 0; + return NULL; + } + + if (file) { + err = set_elf_symtab(&module->elf_symtab, file, bias, symtab_scn, + strtab_idx, num_local_symbols); + if (err) + return err; + + module->have_full_symtab = full_symtab; + drgn_log_debug(module->prog, + "%s: found ELF %ssymbol table with %zu symbols", + module->name, full_symtab ? "" : "dynamic ", + module->elf_symtab.num_symbols); + } + + if (full_symtab && module->gnu_debugdata_symtab.num_symbols) { + // With a full symbol table (likely from the debug file), there + // is no need to keep around the gnu_debugdata symbol table. + // We cannot free the memory associated with it, because we may + // have returned symbols that refer to the strings in this file. + // Stop using the symbol table, but delay freeing until the + // program is freed. + memset(&module->gnu_debugdata_symtab, 0, + sizeof(module->gnu_debugdata_symtab)); + } else if (!full_symtab && gnu_debugdata_scn) { + // We only search for .gnu_debugdata in the loaded file, not the + // debug file. Once attached to a module, files can't be + // detached, so there should be no case where we load + // .gnu_debugdata twice. Let's assert that precondition here. + assert(module->gnu_debugdata_file == NULL); + + _cleanup_(cleanup_elf_file) struct drgn_elf_file *gnu_debugdata_file = NULL; + + err = load_gnu_debugdata_file(module, gnu_debugdata_scn, + &gnu_debugdata_file); + if (err) + return err; + + if (gnu_debugdata_file) { + file = NULL; + err = find_elf_file_symtab(gnu_debugdata_file, + module->loaded_file_bias, &file, + &bias, &symtab_scn, &strtab_idx, + &num_local_symbols, &full_symtab, + NULL); + if (err) + return err; + + if (file) { + err = set_elf_symtab(&module->gnu_debugdata_symtab, + file, bias, symtab_scn, + strtab_idx, num_local_symbols); + if (err) + return err; + + module->gnu_debugdata_file = no_cleanup_ptr(gnu_debugdata_file); + drgn_log_debug(module->prog, + "%s: found ELF .gnu_debugdata symbol table with %zu symbols", + module->name, module->gnu_debugdata_symtab.num_symbols); + } + } + } + + module->elf_symtab_pending_files = 0; + return NULL; +} + +static size_t elf_symbol_shndx(struct drgn_elf_symbol_table *symtab, + size_t sym_idx, const GElf_Sym *sym) +{ + if (sym->st_shndx < SHN_LORESERVE) + return sym->st_shndx; + if (sym->st_shndx == SHN_XINDEX + && symtab->shndx + && sym_idx < symtab->shndx->d_size / sizeof(uint32_t)) { + uint32_t tmp; + memcpy(&tmp, + (const char *)symtab->shndx->d_buf + + sym_idx * sizeof(uint32_t), + sizeof(uint32_t)); + if (drgn_elf_file_bswap(symtab->file)) + tmp = bswap_32(tmp); + return tmp; + } + return SHN_UNDEF; +} + +static bool elf_symbol_address(struct drgn_elf_symbol_table *symtab, + size_t sym_idx, const GElf_Sym *sym, uint64_t *ret) +{ + uint64_t addr = sym->st_value; + + // On 32-bit Arm, the least significant bit of st_value in an STT_FUNC + // symbol indicates whether it addresses a Thumb instruction. Clear it. + // + // P.S. If we need any more architecture-specific hacks, then we should + // add a callback to drgn_architecture_info. Note that we don't + // currently support V1 of the 64-bit PowerPC ELF ABI where st_value is + // the address of a "function descriptor" instead of the function entry + // point. + if (symtab->file->platform.arch->arch == DRGN_ARCH_ARM + && GELF_ST_TYPE(sym->st_info) == STT_FUNC) + addr &= ~1; + + addr += symtab->bias; + if (symtab->file->is_relocatable) { + size_t shndx = elf_symbol_shndx(symtab, sym_idx, sym); + if (shndx == SHN_UNDEF) + return false; + Elf_Scn *scn = elf_getscn(symtab->file->elf, shndx); + if (!scn) + return false; + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return false; + addr += shdr->sh_addr; + } + *ret = addr; + return true; +} + +// When searching for one symbol, if there are multiple matches, we break ties +// based on the symbol binding. The order of precedence is: +// GLOBAL = UNIQUE > WEAK > LOCAL = everything else +static int drgn_symbol_binding_precedence(const struct drgn_symbol *sym) +{ + SWITCH_ENUM(sym->binding) { + case DRGN_SYMBOL_BINDING_GLOBAL: + case DRGN_SYMBOL_BINDING_UNIQUE: + return 3; + case DRGN_SYMBOL_BINDING_WEAK: + return 2; + case DRGN_SYMBOL_BINDING_LOCAL: + case DRGN_SYMBOL_BINDING_UNKNOWN: + return 1; + default: + UNREACHABLE(); + } +} + +static int elf_symbol_binding_precedence(const GElf_Sym *sym) +{ + switch (GELF_ST_BIND(sym->st_info)) { + case STB_GLOBAL: + case STB_GNU_UNIQUE: + return 3; + case STB_WEAK: + return 2; + default: + return 1; + } +} + +// This assumes that both symbols contain the search address. +static bool better_addr_match(const GElf_Sym *a, uint64_t a_addr, + const struct drgn_symbol *b) +{ + // Prefer the symbol that starts closer to the search address. + if (a_addr > b->address) + return true; + if (a_addr < b->address) + return false; + + // If the symbols have the same start address, prefer the one that ends + // closer to the search address. + if (a->st_size < b->size) + return true; + if (a->st_size > b->size) + return false; + + // If the symbols have the same start and end addresses, prefer the one + // with the higher binding precedence. + return elf_symbol_binding_precedence(a) + > drgn_symbol_binding_precedence(b); +} + +// This assumes that both symbols start before the search address and have size +// 0. +static bool better_sizeless_addr_match(const GElf_Sym *a, uint64_t a_addr, + const GElf_Sym *b, uint64_t b_addr) +{ + // Prefer the symbol that starts closer to the search address. + if (a_addr > b_addr) + return true; + if (a_addr < b_addr) + return false; + + // If the symbols have the same start address, prefer the one with the + // higher binding precedence. + return elf_symbol_binding_precedence(a) + > elf_symbol_binding_precedence(b); +} + +static bool addr_in_sym_section(struct drgn_elf_symbol_table *symtab, + size_t sym_idx, const GElf_Sym *sym, + uint64_t unbiased_addr) +{ + size_t shndx = elf_symbol_shndx(symtab, sym_idx, sym); + if (shndx == SHN_UNDEF) + return false; + Elf_Scn *scn = elf_getscn(symtab->file->elf, shndx); + if (!scn) + return false; + GElf_Shdr shdr_mem, *shdr = gelf_getshdr(scn, &shdr_mem); + if (!shdr) + return false; + return unbiased_addr >= shdr->sh_addr + && (unbiased_addr - shdr->sh_addr < shdr->sh_size); +} + +struct elf_symtab_search_state { + // Handwritten assembly functions may have a symbol size of 0 even + // though logically they have a size. The best we can do is assume that + // such a symbol extends until the next symbol. If we're searching by + // address and we don't find any symbols containing the address, then we + // will return a symbol with size 0 that could contain it based on this + // assumption. + const char *sizeless_name; + uint64_t sizeless_addr; + size_t sizeless_sym_idx; + struct drgn_elf_symbol_table *sizeless_symtab; + Elf64_Sym sizeless_sym; + + // If we're searching for one symbol, then we may already have a match, + // but we still need to search for a better match. + struct drgn_symbol *best_sym; + + // The maximum end address of any symbol starting before the given + // address. Any symbol with size 0 starting before this is either + // contained within another symbol or is assumed to end before this, so + // it should be ignored. + uint64_t max_end_addr; +}; + +static struct drgn_error * +drgn_elf_symbol_table_search(struct drgn_elf_symbol_table *symtab, const char *name, + uint64_t addr, enum drgn_find_symbol_flags flags, + struct elf_symtab_search_state *state, + struct drgn_symbol_result_builder *builder) +{ + const bool is_64_bit = drgn_elf_file_is_64_bit(symtab->file); + const bool bswap = drgn_elf_file_bswap(symtab->file); + const size_t sym_size = + is_64_bit ? sizeof(Elf64_Sym) : sizeof(Elf32_Sym); + + // If we already have a match, and we're not searching by address, then + // we will never prefer a local symbol over that match, so we can skip + // local symbols. For address searches, we can't skip local addresses, + // because we prioritize the closest match to the address. + // + // Otherwise, skip the undefined symbol at index 0. + for (size_t i = !(flags & DRGN_FIND_SYMBOL_ADDR) && state->best_sym + ? symtab->num_local_symbols : 1; + i < symtab->num_symbols; i++) { + Elf64_Sym elf_sym; +#define visit_elf_sym_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(st_name); \ + visit_scalar_member(st_info); \ + visit_scalar_member(st_other); \ + visit_scalar_member(st_shndx); \ + visit_scalar_member(st_value); \ + visit_scalar_member(st_size); \ +} while (0) + deserialize_struct64(&elf_sym, Elf32_Sym, visit_elf_sym_members, + symtab->data + i * sym_size, + is_64_bit, bswap); +#undef visit_elf_sym_members + + // Ignore undefined symbols. + if (elf_sym.st_shndx == SHN_UNDEF) + continue; + + // Ignore symbols with an out-of-bounds name. + if (elf_sym.st_name >= symtab->strtab->d_size) + continue; + const char *elf_sym_name = + (const char *)symtab->strtab->d_buf + + elf_sym.st_name; + + if ((flags & DRGN_FIND_SYMBOL_NAME) + && strcmp(elf_sym_name, name) != 0) + continue; + + if (flags & DRGN_FIND_SYMBOL_ADDR) { + // Ignore these special symbol types for address + // searches (before we bother computing the address). + switch (GELF_ST_TYPE(elf_sym.st_info)) { + case STT_SECTION: + case STT_FILE: + case STT_TLS: + continue; + default: + break; + } + } else if (state->best_sym + // This is a non-address search for one symbol. + // Prefer the symbol with the higher binding + // precedence. + && elf_symbol_binding_precedence(&elf_sym) + <= drgn_symbol_binding_precedence(state->best_sym)) { + continue; + } + + uint64_t elf_sym_addr; + if (!elf_symbol_address(symtab, i, &elf_sym, &elf_sym_addr)) + continue; + + if (flags & DRGN_FIND_SYMBOL_ADDR) { + if (elf_sym_addr > addr) + continue; + + state->max_end_addr = max(state->max_end_addr, + elf_sym_addr + elf_sym.st_size); + + if (elf_sym.st_size == 0) { + if (!state->sizeless_name + || better_sizeless_addr_match(&elf_sym, + elf_sym_addr, + &state->sizeless_sym, + state->sizeless_addr)) { + state->sizeless_name = elf_sym_name; + state->sizeless_addr = elf_sym_addr; + state->sizeless_sym_idx = i; + state->sizeless_sym = elf_sym; + state->sizeless_symtab = symtab; + } + continue; + } else if (addr - elf_sym_addr >= elf_sym.st_size + || (state->best_sym + && !better_addr_match(&elf_sym, + elf_sym_addr, + state->best_sym))) { + continue; + } + } + + if (!drgn_symbol_result_builder_add_from_elf(builder, + elf_sym_name, + elf_sym_addr, + &elf_sym)) + return &drgn_enomem; + + if (flags & DRGN_FIND_SYMBOL_ONE) { + state->best_sym = drgn_symbol_result_builder_single(builder); + if (!(flags & DRGN_FIND_SYMBOL_ADDR)) { + // If we're not searching by address and we find + // a matching global symbol, then we don't need + // to search anymore. + if (state->best_sym->binding == DRGN_SYMBOL_BINDING_GLOBAL + || state->best_sym->binding == DRGN_SYMBOL_BINDING_UNIQUE) + return &drgn_stop; + // Otherwise, if we're not searching by address + // and we find a matching local symbol, then we + // can skip past the remaining local symbols. + if (i < symtab->num_local_symbols) + i = symtab->num_local_symbols - 1; + } + } + } + return NULL; +} + +struct drgn_error * +drgn_module_elf_symbols_search(struct drgn_module *module, const char *name, + uint64_t addr, enum drgn_find_symbol_flags flags, + struct drgn_symbol_result_builder *builder) +{ + struct drgn_error *err; + + err = find_module_elf_symtab(module); + if (err) + return err; + + struct elf_symtab_search_state state = {0}; + if (flags & DRGN_FIND_SYMBOL_ONE) + state.best_sym = drgn_symbol_result_builder_single(builder); + + if (module->elf_symtab.num_symbols) { + err = drgn_elf_symbol_table_search(&module->elf_symtab, name, addr, + flags, &state, builder); + if (err) + return err; + } + + if (module->gnu_debugdata_symtab.num_symbols) { + err = drgn_elf_symbol_table_search(&module->gnu_debugdata_symtab, name, + addr, flags, &state, builder); + if (err) + return err; + } + + if (state.sizeless_name + && drgn_symbol_result_builder_count(builder) == 0 + && state.sizeless_addr >= state.max_end_addr + && addr_in_sym_section(state.sizeless_symtab, state.sizeless_sym_idx, + &state.sizeless_sym, addr - state.sizeless_symtab->bias) + && !drgn_symbol_result_builder_add_from_elf(builder, state.sizeless_name, + state.sizeless_addr, + &state.sizeless_sym)) + return &drgn_enomem; + + return NULL; +} diff --git a/libdrgn/elf_symtab.h b/libdrgn/elf_symtab.h new file mode 100644 index 000000000..298f93a84 --- /dev/null +++ b/libdrgn/elf_symtab.h @@ -0,0 +1,55 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * ELF symbol tables. + * + * See @ref ElfSymtab. + */ + +#ifndef DRGN_ELF_SYMBOL_H +#define DRGN_ELF_SYMBOL_H + +#include "drgn_internal.h" + +struct drgn_elf_file; + +/** + * @ingroup Internals + * + * @defgroup ElfSymtab ELF symbol tables + * + * ELF symbol table lookups. + * + * @{ + */ + +/** Symbol table from an ELF file. */ +struct drgn_elf_symbol_table { + /** File containing symbol table. @c NULL if not found yet. */ + struct drgn_elf_file *file; + /** Bias to apply to addresses from the file. */ + uint64_t bias; + /** Symbol table section data. */ + const char *data; + /** Number of symbols in table. */ + size_t num_symbols; + /** Number of local symbols in table. */ + size_t num_local_symbols; + /** String table section used by symbol table. */ + Elf_Data *strtab; + /** Optional `SHT_SYMTAB_SHNDX` section used by symbol table. */ + Elf_Data *shndx; +}; + +/** Find matching ELF symbols in a specific module. */ +struct drgn_error * +drgn_module_elf_symbols_search(struct drgn_module *module, const char *name, + uint64_t addr, enum drgn_find_symbol_flags flags, + struct drgn_symbol_result_builder *builder); + +/** @} */ + +#endif /* DRGN_ELF_SYMBOL_H */ diff --git a/libdrgn/error.c b/libdrgn/error.c index 95174abbe..27d9a2dc5 100644 --- a/libdrgn/error.c +++ b/libdrgn/error.c @@ -2,7 +2,6 @@ // SPDX-License-Identifier: LGPL-2.1-or-later #include -#include #include #include #include @@ -158,16 +157,6 @@ drgn_error_format_fault(uint64_t address, const char *format, ...) return err; } -struct drgn_error *drgn_error_from_string_builder(enum drgn_error_code code, - struct string_builder *sb) -{ - if (!string_builder_null_terminate(sb)) { - string_builder_deinit(sb); - return &drgn_enomem; - } - return drgn_error_create_nodup(code, sb->str); -} - LIBDRGN_PUBLIC struct drgn_error *drgn_error_copy(struct drgn_error *src) { if (!src->needs_destroy) @@ -274,9 +263,3 @@ struct drgn_error *drgn_error_libdw(void) return drgn_error_format(DRGN_ERROR_OTHER, "libdw error: %s", dwarf_errmsg(-1)); } - -struct drgn_error *drgn_error_libdwfl(void) -{ - return drgn_error_format(DRGN_ERROR_OTHER, "libdwfl error: %s", - dwfl_errmsg(-1)); -} diff --git a/libdrgn/error.h b/libdrgn/error.h index d9ee35368..40f57ca96 100644 --- a/libdrgn/error.h +++ b/libdrgn/error.h @@ -33,15 +33,16 @@ extern struct drgn_error drgn_stop; /** Global @ref DRGN_ERROR_OBJECT_ABSENT error. */ extern struct drgn_error drgn_error_object_absent; -struct string_builder; - /** - * Create a @ref drgn_error with a message from a @ref string_builder. - * - * This deinitializes the string builder. + * Return whether an error is fatal, meaning that it should usually be returned + * to the caller instead of being handled or logged. */ -struct drgn_error *drgn_error_from_string_builder(enum drgn_error_code code, - struct string_builder *sb); +static inline bool drgn_error_is_fatal(struct drgn_error *err) +{ + return err == &drgn_enomem; +} + +struct string_builder; /** * Append a formatted @ref drgn_error to a @ref string_builder. @@ -60,10 +61,6 @@ struct drgn_error *drgn_error_libelf(void) struct drgn_error *drgn_error_libdw(void) __attribute__((__returns_nonnull__)); -/** Create a @ref drgn_error from the libdwfl error indicator. */ -struct drgn_error *drgn_error_libdwfl(void) - __attribute__((__returns_nonnull__)); - /** * Create a @ref drgn_error with a type name. * diff --git a/libdrgn/examples/load_debug_info.c b/libdrgn/examples/load_debug_info.c index 953771df1..284c8b67e 100644 --- a/libdrgn/examples/load_debug_info.c +++ b/libdrgn/examples/load_debug_info.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -91,6 +92,8 @@ noreturn static void usage(bool error) int main(int argc, char **argv) { + setlocale(LC_ALL, ""); + struct option long_options[] = { {"kernel", no_argument, NULL, 'k'}, {"core", required_argument, NULL, 'c'}, diff --git a/libdrgn/handler.c b/libdrgn/handler.c index 10965ea02..2285b9d1c 100644 --- a/libdrgn/handler.c +++ b/libdrgn/handler.c @@ -69,8 +69,7 @@ struct drgn_error *drgn_handler_list_set_enabled(struct drgn_handler_list *list, size_t count, const char *what) { // Put all of the handlers in a hash table of tagged pointers. - _cleanup_(drgn_handler_table_deinit) - struct drgn_handler_table table = HASH_TABLE_INIT; + HASH_TABLE(drgn_handler_table, table); drgn_handler_list_for_each_registered(handler, list) { uintptr_t entry = (uintptr_t)handler; if (drgn_handler_table_insert(&table, &entry, NULL) < 0) @@ -106,8 +105,7 @@ struct drgn_error *drgn_handler_list_set_enabled(struct drgn_handler_list *list, } // The remaining handlers in the hash table are disabled. Insert them. - for (auto it = drgn_handler_table_first(&table); it.entry; - it = drgn_handler_table_next(it)) { + hash_table_for_each(drgn_handler_table, it, &table) { struct drgn_handler *handler = (struct drgn_handler *)*it.entry; handler->enabled = false; *handlerp = handler; @@ -135,3 +133,31 @@ struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, *count_ret = n; return NULL; } + +bool drgn_handler_list_disable(struct drgn_handler_list *list, + const char *name) +{ + // Find an enabled handler with the given name. + struct drgn_handler **handlerp = &list->head; + struct drgn_handler *handler = list->head; + for (;;) { + if (!handler || !handler->enabled) + return false; + if (strcmp(handler->name, name) == 0) + break; + handlerp = &handler->next; + handler = handler->next; + } + + // Disable the handler. + handler->enabled = false; + + // Move it to the appropriate part of the list (after all enabled + // handlers). + *handlerp = handler->next; + while (*handlerp && (*handlerp)->enabled) + handlerp = &(*handlerp)->next; + handler->next = *handlerp; + *handlerp = handler; + return true; +} diff --git a/libdrgn/handler.h b/libdrgn/handler.h index 3b7d8b7cb..630583039 100644 --- a/libdrgn/handler.h +++ b/libdrgn/handler.h @@ -51,6 +51,14 @@ struct drgn_error *drgn_handler_list_enabled(struct drgn_handler_list *list, const char ***names_ret, size_t *count_ret); +bool drgn_handler_list_disable(struct drgn_handler_list *list, + const char *name); + +static inline bool drgn_handler_is_last_enabled(struct drgn_handler *handler) +{ + return handler->enabled && (!handler->next || !handler->next->enabled); +} + // Helper to simplify the casting and naming in drgn_handler_list_deinit(). static inline struct drgn_handler * drgn_handler_free_and_next(struct drgn_handler *handler) diff --git a/libdrgn/hash_table.h b/libdrgn/hash_table.h index a524551ca..4e6b68584 100644 --- a/libdrgn/hash_table.h +++ b/libdrgn/hash_table.h @@ -291,6 +291,14 @@ hash_table_delete_iterator_hashed(struct hash_table *table, struct hash_table_iterator it, struct hash_pair hp); +/** + * Delete an entry in a @ref hash_table. + * + * @return An iterator pointing to the next entry in the table. See @ref + * hash_table_next(). + */ +bool hash_table_delete_entry(struct hash_table *table, const entry_type *entry); + /** * Get an iterator pointing to the first entry in a @ref hash_table. * @@ -1515,13 +1523,20 @@ static bool table##_delete_hashed(struct table *table, \ return true; \ } \ \ -__attribute__((__unused__)) \ static bool table##_delete(struct table *table, const table##_key_type *key) \ { \ return table##_delete_hashed(table, key, table##_hash(key)); \ } \ \ __attribute__((__unused__)) \ +static inline bool table##_delete_entry(struct table *table, \ + const table##_entry_type *entry) \ +{ \ + const table##_key_type key = table##_entry_to_key(entry); \ + return table##_delete(table, &key); \ +} \ + \ +__attribute__((__unused__)) \ static struct table##_iterator table##_first(struct table *table) \ { \ if (table##_vector_policy) { \ @@ -1679,6 +1694,25 @@ DEFINE_HASH_SET_FUNCTIONS(table, hash_func, eq_func) */ #define HASH_TABLE_INIT { hash_table_empty_chunk } +/** + * Define and initialize an empty @ref hash_table of type @p table_type named @p + * table that is automatically deinitialized when it goes out of scope. + */ +#define HASH_TABLE(table_type, table) \ + __attribute__((__cleanup__(table_type##_deinit))) \ + struct table_type table = HASH_TABLE_INIT + +/** + * Iterate over every entry in a @ref hash_table. + * + * @param[in] table_type Name of hash table type. + * @param[out] it Name of iterator variable. + * @param[in] table Hash table to iterate over. + */ +#define hash_table_for_each(table_type, it, table) \ + for (struct table_type##_iterator it = table_type##_first(table); \ + it.entry; it = table_type##_next(it)) + /** * @defgroup HashTableHelpers Hash table helpers * diff --git a/libdrgn/hexlify.c b/libdrgn/hexlify.c new file mode 100644 index 000000000..5a0d6e96e --- /dev/null +++ b/libdrgn/hexlify.c @@ -0,0 +1,58 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include +#include + +#include "hexlify.h" + +void hexlify(const void *in, size_t in_len, char *out) +{ + static const char nibble_to_hex_digit[] = "0123456789abcdef"; + for (size_t i = 0; i < in_len; i++) { + uint8_t byte = ((uint8_t *)in)[i]; + out[2 * i] = nibble_to_hex_digit[byte >> 4]; + out[2 * i + 1] = nibble_to_hex_digit[byte & 0xf]; + } +} + +char *ahexlify(const void *in, size_t in_len) +{ + size_t out_size; + if (__builtin_mul_overflow(in_len, 2U, &out_size) || + __builtin_add_overflow(out_size, 1U, &out_size)) + return NULL; + char *out = malloc(out_size); + if (!out) + return NULL; + hexlify(in, in_len, out); + out[out_size - 1] = '\0'; + return out; +} + +static inline bool hex_digit_to_nibble(char c, uint8_t *ret) +{ + if ('0' <= c && c <= '9') + *ret = c - '0'; + else if ('a' <= c && c <= 'f') + *ret = c - 'a' + 10; + else if ('A' <= c && c <= 'F') + *ret = c - 'A' + 10; + else + return false; + return true; +} + +bool unhexlify(const char *in, size_t in_len, void *out) +{ + if (in_len % 2) + return false; + for (size_t i = 0; i < in_len; i += 2) { + uint8_t lo, hi; + if (!hex_digit_to_nibble(in[i], &hi) || + !hex_digit_to_nibble(in[i + 1], &lo)) + return false; + ((uint8_t *)out)[i / 2] = (hi << 4) | lo; + } + return true; +} diff --git a/libdrgn/hexlify.h b/libdrgn/hexlify.h new file mode 100644 index 000000000..51421aead --- /dev/null +++ b/libdrgn/hexlify.h @@ -0,0 +1,71 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +/** + * @file + * + * Hexadecimal encoding/decoding. + * + * See @ref Hexlify. + */ + +#ifndef DRGN_HEXLIFY_H +#define DRGN_HEXLIFY_H + +#include +#include + +/** + * @ingroup Internals + * + * @defgroup Hexlify Hexlify + * + * Hexadecimal encoding/decoding. + * + * @{ + */ + +/** + * Encode binary data to a hexadecimal string. + * + * The output string is an even number of lowercase hexadecimal characters with + * no separators. It is not null-terminated. + * + * @param[in] in Input binary data. + * @param[in] in_len Size of @p in in bytes. + * @param[out] out Output hexadecimal string of size `2 * in_len` characters. + * Not null-terminated. + */ +void hexlify(const void *in, size_t in_len, char *out); + +/** + * Allocate and encode binary data to a hexadecimal string. + * + * This is like @ref hexlify(), but it allocates the output string, including a + * terminating null byte. + * + * @param[in] in Input binary data. + * @param[in] in_len Size of @p in in bytes. + * @return Output hexadecimal string, or `NULL` on failure to allocate memory. + * Unlike @ref hexlify(), this *is* null-terminated. On success, it must be + * freed with `free()`. + */ +char *ahexlify(const void *in, size_t in_len); + +/** + * Decode hexadecimal string to binary data. + * + * The input string must be an even number of hexadecimal characters (either + * lowercase or uppercase) with no separators. + * + * @param[in] in Input hexadecimal string. Does not need to be null-terminated. + * @param[in] in_len Number of characters in @p in. + * @param[out] out Returned binary data of size `in_len / 2` bytes. + * @return `true` if data was successfully decoded, `false` if not (either + * because @p in_len was odd or @p in contained non-hexadecimal characters). + */ +bool unhexlify(const char *in, size_t in_len, void *out); + +/** @} */ + +#endif /* DRGN_HEXLIFY_H */ diff --git a/libdrgn/kdump.c b/libdrgn/kdump.c index 09efd9658..ad6d07316 100644 --- a/libdrgn/kdump.c +++ b/libdrgn/kdump.c @@ -7,6 +7,7 @@ #include #include "linux_kernel.h" +#include "plugins.h" #include "program.h" // IWYU pragma: associated #include "util.h" @@ -271,6 +272,7 @@ struct drgn_error *drgn_program_set_kdump(struct drgn_program *prog) if (err) goto err_platform; prog->kdump_ctx = ctx; + drgn_call_plugins_prog("drgn_prog_set", prog); return NULL; err_platform: @@ -299,39 +301,36 @@ struct drgn_error *drgn_program_cache_kdump_threads(struct drgn_program *prog) } /* - * Note that in the following loop we never call kdump_attr_unref() on - * prstatus_ref, nor kdump_blob_unpin() on the prstatus blob that we get - * from libkdumpfile. Since drgn is completely read-only as a consumer - * of that library, we "leak" both the attribute reference and blob pin - * until kdump_free() is called which will clean up everything for us. + * Note that in the following loop we never call kdump_blob_unpin() on + * the prstatus blob that we get from libkdumpfile. Since drgn never + * modifies the PRSTATUS attributes (neither directly nor indirectly), + * we "leak" the blob pin until kdump_free() is called, which will + * clean up everything for us. */ for (i = 0; i < ncpus; i++) { - /* Enough for the longest possible PRSTATUS attribute name. */ - kdump_attr_ref_t prstatus_ref; kdump_attr_t prstatus_attr; void *prstatus_data; size_t prstatus_size; #define FORMAT "cpu.%" PRIuFAST64 ".PRSTATUS" + /* Enough for the longest possible PRSTATUS attribute name. */ char attr_name[sizeof(FORMAT) - sizeof("%" PRIuFAST64) + max_decimal_length(uint_fast64_t) + 1]; snprintf(attr_name, sizeof(attr_name), FORMAT, i); #undef FORMAT - ks = kdump_attr_ref(prog->kdump_ctx, attr_name, &prstatus_ref); - if (ks != KDUMP_OK) { - return drgn_error_format(DRGN_ERROR_OTHER, - "kdump_attr_ref(%s): %s", - attr_name, - kdump_get_err(prog->kdump_ctx)); - } - - ks = kdump_attr_ref_get(prog->kdump_ctx, &prstatus_ref, - &prstatus_attr); +#if KDUMPFILE_VERSION >= KDUMPFILE_MKVER(0, 5, 5) + ks = kdump_get_typed_attr(prog->kdump_ctx, attr_name, + KDUMP_BLOB, &prstatus_attr.val); +#else + prstatus_attr.type = KDUMP_BLOB; + ks = kdump_get_typed_attr(prog->kdump_ctx, attr_name, + &prstatus_attr); +#endif if (ks != KDUMP_OK) { return drgn_error_format(DRGN_ERROR_OTHER, - "kdump_attr_ref_get(%s): %s", + "kdump_get_typed_attr(%s): %s", attr_name, kdump_get_err(prog->kdump_ctx)); } diff --git a/libdrgn/language_c.c b/libdrgn/language_c.c index 6229c46f4..448b2e9c8 100644 --- a/libdrgn/language_c.c +++ b/libdrgn/language_c.c @@ -1049,9 +1049,10 @@ compound_initializer_iter_next(struct initializer_iter *iter_, if (member->name || !(iter->flags & DRGN_FORMAT_OBJECT_MEMBER_NAMES) || !drgn_type_has_members(member_type.type)) { - err = drgn_object_slice(obj_ret, iter->obj, member_type, - bit_offset + member->bit_offset, - member_bit_field_size); + err = drgn_object_fragment(obj_ret, iter->obj, + member_type, + bit_offset + member->bit_offset, + member_bit_field_size); if (err) return err; @@ -1178,9 +1179,9 @@ c_format_compound_object(const struct drgn_object *obj, if (err) goto out; - err = drgn_object_slice(&member, obj, member_type, - new->end[-1].bit_offset, - member_bit_field_size); + err = drgn_object_fragment(&member, obj, member_type, + new->end[-1].bit_offset, + member_bit_field_size); if (err) goto out; @@ -1385,8 +1386,10 @@ array_initializer_iter_next(struct initializer_iter *iter_, if (iter->i >= iter->length) return &drgn_stop; - err = drgn_object_slice(obj_ret, iter->obj, iter->element_type, - iter->i * iter->element_bit_size, 0); + err = drgn_object_fragment(obj_ret, iter->obj, + iter->element_type, + iter->i * iter->element_bit_size, + 0); if (err) return err; iter->i++; @@ -1499,11 +1502,11 @@ c_format_array_object(const struct drgn_object *obj, do { bool zero; - err = drgn_object_slice(&element, obj, - iter.element_type, - (iter.length - 1) * - iter.element_bit_size, - 0); + err = drgn_object_fragment(&element, obj, + iter.element_type, + (iter.length - 1) + * iter.element_bit_size, + 0); if (err) return err; @@ -1533,6 +1536,19 @@ c_format_function_object(const struct drgn_object *obj, return NULL; } +static const char *drgn_absence_reason_str(enum drgn_absence_reason reason) +{ + SWITCH_ENUM (reason) { + case DRGN_ABSENCE_REASON_OPTIMIZED_OUT: + return ""; + case DRGN_ABSENCE_REASON_NOT_IMPLEMENTED: + return ""; + case DRGN_ABSENCE_REASON_OTHER: + default: + return ""; + } +} + static struct drgn_error * c_format_object_impl(const struct drgn_object *obj, size_t indent, size_t one_line_columns, size_t multi_line_columns, @@ -1571,7 +1587,8 @@ c_format_object_impl(const struct drgn_object *obj, size_t indent, } if (obj->kind == DRGN_OBJECT_ABSENT) { - if (!string_builder_append(sb, "")) + if (!string_builder_append(sb, + drgn_absence_reason_str(obj->absence_reason))) return &drgn_enomem; return NULL; } @@ -3395,7 +3412,8 @@ static struct drgn_error *c_op_cast(struct drgn_object *res, switch (drgn_type_kind(type.underlying_type)) { case DRGN_TYPE_VOID: - drgn_object_set_absent_internal(res, &type); + drgn_object_set_absent_internal(res, &type, + DRGN_ABSENCE_REASON_OTHER); return NULL; case DRGN_TYPE_BOOL: { bool truthy; @@ -3470,7 +3488,7 @@ c_op_implicit_convert(struct drgn_object *res, return err; if (!compatible) goto incompatible_type_error; - return drgn_object_slice_internal(res, obj, &type, 0, 0); + return drgn_object_fragment_internal(res, obj, &type, 0, 0); } case DRGN_TYPE_POINTER: { if (drgn_type_kind(obj_type.underlying_type) diff --git a/libdrgn/linux_kernel.c b/libdrgn/linux_kernel.c index 94bc2dc70..d1af14329 100644 --- a/libdrgn/linux_kernel.c +++ b/libdrgn/linux_kernel.c @@ -1,6 +1,7 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -16,6 +18,7 @@ #include #include +#include "array.h" #include "binary_buffer.h" #include "cleanup.h" #include "debug_info.h" @@ -25,8 +28,10 @@ #include "error.h" #include "hash_table.h" #include "helpers.h" +#include "hexlify.h" #include "io.h" #include "linux_kernel.h" +#include "log.h" #include "platform.h" #include "program.h" #include "type.h" @@ -382,6 +387,24 @@ static struct drgn_error *linux_kernel_get_vmemmap(struct drgn_program *prog, #include "linux_kernel_object_find.inc" // IWYU pragma: keep +// Return whether the given kernel is from Fedora. We check whether the release +// matches the regular expression /.fc[0-9]+(.|$)/ +static bool is_fedora_kernel(const char *osrelease) +{ + const char *p = osrelease; + while ((p = strstr(p, ".fc"))) { + p += sizeof(".fc") - 1; + if (isdigit(*p)) { + do { + p++; + } while (isdigit(*p)); + if (*p == '.' || *p == '\0') + return true; + } + } + return false; +} + struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) { struct drgn_error *err; @@ -393,1376 +416,1860 @@ struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog) return err; if (!prog->lang) prog->lang = &drgn_language_c; + + // At the time of writing, only Fedora's debuginfod server provides fast + // Linux kernel downloads. It's painfully slow everywhere else, so + // disable it. + if (!is_fedora_kernel(prog->vmcoreinfo.osrelease) + && drgn_handler_list_disable(&prog->dbinfo.debug_info_finders, + "debuginfod")) + drgn_log_debug(prog, "disabled debuginfod for Linux kernel"); + return NULL; } -struct kernel_module_iterator { - char *name; - uint64_t start, end; - void *build_id_buf; - size_t build_id_buf_capacity; - /* `struct module` type. */ - struct drgn_qualified_type module_type; - /* Current `struct module` (not a pointer). */ - struct drgn_object mod; - /* `struct list_head *` in next module to return. */ - struct drgn_object node; - /* Temporary objects reused for various purposes. */ - struct drgn_object tmp1, tmp2, tmp3; - /* Address of `struct list_head modules`. */ - uint64_t head; - bool use_sys_module; - bool use_sys_module_sections; +/* + * /lib/modules/$(uname -r)/modules.dep.bin maps all installed kernel modules to + * their filesystem path (and dependencies, which we don't care about). It is + * generated by depmod; the format is a fairly simple serialized radix tree. + * + * modules.dep(5) contains a warning: "These files are not intended for editing + * or use by any additional utilities as their format is subject to change in + * the future." But, the format hasn't changed since 2009, and pulling in + * libkmod is overkill since we only need a very small subset of its + * functionality (plus our minimal parser is more efficient). If the format + * changes in the future, we can reevaluate this. + */ + +static void depmod_index_deinit(struct depmod_index *depmod) +{ + if (depmod->len > 0) + munmap(depmod->addr, depmod->len); + free(depmod->path); +} + +struct depmod_index_buffer { + struct binary_buffer bb; + struct depmod_index *depmod; }; -static void kernel_module_iterator_deinit(struct kernel_module_iterator *it) +static struct drgn_error *depmod_index_buffer_error(struct binary_buffer *bb, + const char *pos, + const char *message) +{ + struct depmod_index_buffer *buffer = + container_of(bb, struct depmod_index_buffer, bb); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %#tx: %s", + buffer->depmod->path, + pos - (const char *)buffer->depmod->addr, + message); +} + +static void depmod_index_buffer_init(struct depmod_index_buffer *buffer, + struct depmod_index *depmod) { - drgn_object_deinit(&it->tmp3); - drgn_object_deinit(&it->tmp2); - drgn_object_deinit(&it->tmp1); - drgn_object_deinit(&it->node); - drgn_object_deinit(&it->mod); - free(it->build_id_buf); - free(it->name); + binary_buffer_init(&buffer->bb, depmod->addr, depmod->len, false, + depmod_index_buffer_error); + buffer->depmod = depmod; } -static struct drgn_error * -kernel_module_iterator_init(struct kernel_module_iterator *it, - struct drgn_program *prog, bool use_sys_module) +static struct drgn_error *depmod_index_validate(struct depmod_index *depmod) { struct drgn_error *err; - - it->name = NULL; - it->build_id_buf = NULL; - it->build_id_buf_capacity = 0; - it->use_sys_module = use_sys_module; - it->use_sys_module_sections = use_sys_module; - err = drgn_program_find_type(prog, "struct module", NULL, - &it->module_type); - if (err) + struct depmod_index_buffer buffer; + depmod_index_buffer_init(&buffer, depmod); + uint32_t magic; + if ((err = binary_buffer_next_u32(&buffer.bb, &magic))) + return err; + if (magic != 0xb007f457) { + return binary_buffer_error(&buffer.bb, + "invalid magic 0x%" PRIx32, magic); + } + uint32_t version; + if ((err = binary_buffer_next_u32(&buffer.bb, &version))) return err; + if (version != 0x00020001) { + return binary_buffer_error(&buffer.bb, + "unknown version 0x%" PRIx32, + version); + } + return NULL; +} - drgn_object_init(&it->mod, prog); - drgn_object_init(&it->node, prog); - drgn_object_init(&it->tmp1, prog); - drgn_object_init(&it->tmp2, prog); - drgn_object_init(&it->tmp3, prog); +static struct drgn_error *depmod_index_init(struct depmod_index *depmod, + char *_path, int fd) +{ + struct drgn_error *err; + _cleanup_free_ char *path = _path; // Take ownership of path. - err = drgn_program_find_object(prog, "modules", NULL, - DRGN_FIND_OBJECT_VARIABLE, &it->node); - if (err) - goto err; - if (it->node.kind != DRGN_OBJECT_REFERENCE) { - err = drgn_error_create(DRGN_ERROR_OTHER, - "can't get address of modules list"); - goto err; - } - it->head = it->node.address; - err = drgn_object_member(&it->node, &it->node, "next"); - if (err) - goto err; - err = drgn_object_read(&it->node, &it->node); - if (err) - goto err; + struct stat st; + if (fstat(fd, &st) == -1) + return drgn_error_create_os("fstat", errno, path); - return NULL; + if (st.st_size > SIZE_MAX) + return &drgn_enomem; + + void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + return drgn_error_create_os("mmap", errno, path); -err: - kernel_module_iterator_deinit(it); + depmod->path = no_cleanup_ptr(path); + depmod->addr = addr; + depmod->len = st.st_size; + err = depmod_index_validate(depmod); + if (err) { + depmod_index_deinit(depmod); + depmod->path = NULL; + depmod->len = 0; + } return err; } -/** - * Get the the next loaded kernel module. - * - * After this is called, @c it->name is set to the name of the kernel module, - * and @c it->start and @c it->end are set to the address range of the kernel - * module. These are valid until the next time this is called or the iterator is - * destroyed. +/* + * Look up the path of the kernel module with the given name. * - * @return @c NULL on success, non-@c NULL on error. In particular, when there - * are no more modules, returns &@ref drgn_stop. + * @param[in] name Name of the kernel module. + * @param[out] path_ret Returned path of the kernel module, relative to + * /lib/modules/$(uname -r). This is @em not null-terminated. @c NULL if not + * found. + * @param[out] len_ret Returned length of @p path_ret. */ -static struct drgn_error * -kernel_module_iterator_next(struct kernel_module_iterator *it) +static struct drgn_error *depmod_index_find(struct depmod_index *depmod, + const char *name, + const char **path_ret, + size_t *len_ret) { - struct drgn_error *err; - struct drgn_program *prog = drgn_object_program(&it->mod); - - uint64_t addr; - err = drgn_object_read_unsigned(&it->node, &addr); - if (err) - return err; - if (addr == it->head) - return &drgn_stop; - - err = drgn_object_container_of(&it->mod, &it->node, it->module_type, - "list"); - if (err) - return err; - err = drgn_object_dereference(&it->mod, &it->mod); - if (err) - return err; - // We need several fields from the `struct module`. Especially for - // /proc/kcore, it is faster to read the entire structure (which is <1kB - // as of Linux 6.0) from the core dump all at once than it is to read - // each field individually. - err = drgn_object_read(&it->mod, &it->mod); - if (err) - return err; - err = drgn_object_member(&it->node, &it->mod, "list"); - if (err) - return err; - err = drgn_object_member(&it->node, &it->node, "next"); - if (err) - return err; + static const uint32_t INDEX_NODE_MASK = UINT32_C(0x0fffffff); + static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); + static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); + static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); - // Set tmp1 to the module base address and tmp2 to the size. - err = drgn_object_member(&it->tmp1, &it->mod, "mem"); - if (!err) { - // Since Linux kernel commit ac3b43283923 ("module: replace - // module_layout with module_memory") (in v6.4), the base and - // size are in the `struct module_memory mem[MOD_TEXT]` member - // of `struct module`. - if (!prog->mod_text_cached) { - err = drgn_program_find_object(drgn_object_program(&it->mod), - "MOD_TEXT", NULL, - DRGN_FIND_OBJECT_CONSTANT, - &it->tmp2); - if (err) - return err; - union drgn_value mod_text_value; - err = drgn_object_read_integer(&it->tmp2, - &mod_text_value); - if (err) - return err; - prog->mod_text = mod_text_value.uvalue; - prog->mod_text_cached = true; - } + struct drgn_error *err; + struct depmod_index_buffer buffer; + depmod_index_buffer_init(&buffer, depmod); - err = drgn_object_subscript(&it->tmp1, &it->tmp1, - prog->mod_text); - if (err) - return err; - err = drgn_object_member(&it->tmp2, &it->tmp1, "size"); - if (err) - return err; - err = drgn_object_member(&it->tmp1, &it->tmp1, "base"); - if (err) + /* depmod_index_validate() already checked that this is within bounds. */ + buffer.bb.pos += 8; + uint32_t offset; + for (;;) { + if ((err = binary_buffer_next_u32(&buffer.bb, &offset))) return err; - } else if (err->code == DRGN_ERROR_LOOKUP) { - // Since Linux kernel commit 7523e4dc5057 ("module: use a - // structure to encapsulate layout.") (in v4.5), the base and - // size are in the `struct module_layout core_layout` member of - // `struct module`. - drgn_error_destroy(err); + if ((offset & INDEX_NODE_MASK) > depmod->len) { + return binary_buffer_error(&buffer.bb, + "offset is out of bounds"); + } + buffer.bb.pos = (const char *)depmod->addr + (offset & INDEX_NODE_MASK); - err = drgn_object_member(&it->tmp1, &it->mod, "core_layout"); - if (!err) { - err = drgn_object_member(&it->tmp2, &it->tmp1, "size"); - if (err) - return err; - err = drgn_object_member(&it->tmp1, &it->tmp1, "base"); - if (err) + if (offset & INDEX_NODE_PREFIX) { + const char *prefix; + size_t prefix_len; + if ((err = binary_buffer_next_string(&buffer.bb, + &prefix, + &prefix_len))) return err; - } else if (err->code == DRGN_ERROR_LOOKUP) { - // Before that, they are directly in the `struct - // module`. - drgn_error_destroy(err); + if (strncmp(name, prefix, prefix_len) != 0) + goto not_found; + name += prefix_len; + } - err = drgn_object_member(&it->tmp2, &it->mod, - "core_size"); - if (err) - return err; - err = drgn_object_member(&it->tmp1, &it->mod, - "module_core"); - if (err) + if (offset & INDEX_NODE_CHILDS) { + uint8_t first, last; + if ((err = binary_buffer_next_u8(&buffer.bb, &first)) || + (err = binary_buffer_next_u8(&buffer.bb, &last))) return err; + if (*name) { + uint8_t cur = *name; + if (cur < first || cur > last) + goto not_found; + if ((err = binary_buffer_skip(&buffer.bb, + 4 * (cur - first)))) + return err; + name++; + continue; + } else { + if ((err = binary_buffer_skip(&buffer.bb, + 4 * (last - first + 1)))) + return err; + break; + } + } else if (*name) { + goto not_found; } else { - return err; + break; } - } else { - return err; } - err = drgn_object_read_unsigned(&it->tmp1, &it->start); - if (err) - return err; - err = drgn_object_read_unsigned(&it->tmp2, &it->end); - if (err) - return err; - it->end += it->start; + if (!(offset & INDEX_NODE_VALUES)) + goto not_found; - err = drgn_object_member(&it->tmp2, &it->mod, "name"); - if (err) + uint32_t value_count; + if ((err = binary_buffer_next_u32(&buffer.bb, &value_count))) return err; - char *name; - err = drgn_object_read_c_string(&it->tmp2, &name); - if (err) + if (!value_count) + goto not_found; /* Or is this malformed? */ + + /* Skip over priority. */ + if ((err = binary_buffer_skip(&buffer.bb, 4))) return err; - free(it->name); - it->name = name; + + const char *colon = memchr(buffer.bb.pos, ':', + buffer.bb.end - buffer.bb.pos); + if (!colon) { + return binary_buffer_error(&buffer.bb, + "expected string containing ':'"); + } + *path_ret = buffer.bb.pos; + *len_ret = colon - buffer.bb.pos; + return NULL; + +not_found: + *path_ret = NULL; return NULL; } -static struct drgn_error * -kernel_module_iterator_gnu_build_id_live(struct kernel_module_iterator *it, - const void **build_id_ret, - size_t *build_id_len_ret) -{ - struct drgn_error *err; +DEFINE_VECTOR_FUNCTIONS(char_p_vector); - char *path; - if (asprintf(&path, "/sys/module/%s/notes", it->name) == -1) - return &drgn_enomem; - DIR *dir = opendir(path); - if (!dir) { - err = drgn_error_create_os("opendir", errno, path); - goto out_path; - } +DEFINE_HASH_MAP_FUNCTIONS(drgn_kmod_walk_module_map, c_string_key_hash_pair, + c_string_key_eq); - struct dirent *ent; - while ((errno = 0, ent = readdir(dir))) { - if (ent->d_type == DT_DIR) - continue; +struct drgn_kmod_walk_stack_entry { + DIR *dir; + size_t path_len; +}; - int fd = openat(dirfd(dir), ent->d_name, O_RDONLY); - if (fd == -1) { - err = drgn_error_format_os("openat", errno, "%s/%s", - path, ent->d_name); - goto out; - } +DEFINE_VECTOR_FUNCTIONS(drgn_kmod_walk_stack); - struct stat st; - if (fstat(fd, &st) < 0) { - err = drgn_error_format_os("fstat", errno, "%s/%s", - path, ent->d_name); - close(fd); - goto out; - } +static inline struct hash_pair +drgn_kmod_walk_inode_hash_pair(const struct drgn_kmod_walk_inode *entry) +{ + return hash_pair_from_avalanching_hash(hash_combine(entry->dev, entry->ino)); +} - if (st.st_size > SIZE_MAX || - !alloc_or_reuse(&it->build_id_buf, - &it->build_id_buf_capacity, st.st_size)) { - err = &drgn_enomem; - close(fd); - goto out; - } +static inline bool +drgn_kmod_walk_inode_eq(const struct drgn_kmod_walk_inode *a, + const struct drgn_kmod_walk_inode *b) +{ + return a->dev == b->dev && a->ino == b->ino; +} - ssize_t r = read_all(fd, it->build_id_buf, st.st_size); - if (r < 0) { - err = drgn_error_format_os("read", errno, "%s/%s", path, - ent->d_name); - close(fd); - goto out; - } - close(fd); - - *build_id_len_ret = - parse_gnu_build_id_from_notes(it->build_id_buf, r, 4, - false, build_id_ret); - if (*build_id_len_ret) { - err = NULL; - goto out; - } - } - if (errno) { - err = drgn_error_create_os("readdir", errno, path); - } else { - *build_id_ret = NULL; - *build_id_len_ret = 0; - err = NULL; - } +DEFINE_HASH_SET_FUNCTIONS(drgn_kmod_walk_inode_set, + drgn_kmod_walk_inode_hash_pair, + drgn_kmod_walk_inode_eq); -out: - closedir(dir); -out_path: - free(path); - return err; +static void +drgn_kmod_walk_module_map_entry_deinit(struct drgn_kmod_walk_module_map_entry *entry) +{ + vector_for_each(char_p_vector, path, &entry->value) + free(*path); + char_p_vector_deinit(&entry->value); } -static struct drgn_error * -kernel_module_iterator_gnu_build_id(struct kernel_module_iterator *it, - const void **build_id_ret, - size_t *build_id_len_ret) +static void +drgn_kmod_walk_state_deinit(struct drgn_kmod_walk_state *state) { - if (it->use_sys_module) { - return kernel_module_iterator_gnu_build_id_live(it, - build_id_ret, - build_id_len_ret); - } + drgn_kmod_walk_inode_set_deinit(&state->visited_dirs); + string_builder_deinit(&state->path); + vector_for_each(drgn_kmod_walk_stack, entry, &state->stack) + closedir(entry->dir); + drgn_kmod_walk_stack_deinit(&state->stack); + hash_table_for_each(drgn_kmod_walk_module_map, it, &state->modules) + drgn_kmod_walk_module_map_entry_deinit(it.entry); + drgn_kmod_walk_module_map_deinit(&state->modules); +} - struct drgn_error *err; - struct drgn_program *prog = drgn_object_program(&it->mod); - const bool bswap = drgn_platform_bswap(&prog->platform); - - DRGN_OBJECT(attrs, prog); - DRGN_OBJECT(attr, prog); - DRGN_OBJECT(tmp, prog); - - // n = mod->notes_attrs->notes - uint64_t n; - err = drgn_object_member(&attrs, &it->mod, "notes_attrs"); - if (err) - return err; - err = drgn_object_member_dereference(&tmp, &attrs, "notes"); - if (err) - return err; - err = drgn_object_read_unsigned(&tmp, &n); - if (err) - return err; +void +drgn_standard_debug_info_find_state_deinit(struct drgn_standard_debug_info_find_state *state) +{ + drgn_kmod_walk_state_deinit(&state->kmod_walk); + depmod_index_deinit(&state->modules_dep); +} - // attrs = mod->notes_attrs->attrs - err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); - if (err) - return err; +static struct drgn_error * +drgn_module_try_vmlinux_in_debug_directories(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct string_builder *sb) +{ + struct drgn_error *err; + // Paths relative to the debug directory where vmlinux might be + // installed. + static const char * const debug_dir_paths[] = { + // Debian, Ubuntu: + "/boot/vmlinux-%s", + // Fedora, CentOS: + "/lib/modules/%s/vmlinux", + // SUSE: + "/lib/modules/%s/vmlinux.debug", + }; + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + sb->len = 0; + if (!string_builder_append(sb, debug_dir)) + return &drgn_enomem; + size_t debug_dir_len = sb->len; + array_for_each(format, debug_dir_paths) { + sb->len = debug_dir_len; + if (!string_builder_appendf(sb, *format, + module->prog->vmcoreinfo.osrelease) + || !string_builder_null_terminate(sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, + sb->str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + } + return NULL; +} - for (uint64_t i = 0; i < n; i++) { - // attr = attrs[i] - err = drgn_object_subscript(&attr, &attrs, i); - if (err) - return err; +struct drgn_error * +drgn_module_try_vmlinux_files(struct drgn_module *module, + const struct drgn_debug_info_options *options) +{ + struct drgn_error *err; + struct drgn_program *prog = module->prog; - // address = attr.private - err = drgn_object_member(&tmp, &attr, "private"); - if (err) - return err; - uint64_t address; - err = drgn_object_read_unsigned(&tmp, &address); - if (err) - return err; + const char *osrelease = prog->vmcoreinfo.osrelease; + STRING_BUILDER(sb); + for (size_t i = 0; options->kernel_directories[i]; i++) { + const char *kernel_dir = options->kernel_directories[i]; - // size = attr.size - err = drgn_object_member(&tmp, &attr, "size"); - if (err) - return err; - uint64_t size; - err = drgn_object_read_unsigned(&tmp, &size); - if (err) - return err; + if (kernel_dir[0]) { + sb.len = 0; + if (!string_builder_append(&sb, kernel_dir)) + return &drgn_enomem; + } else { + // Empty path. Try under the debug directories first. + err = drgn_module_try_vmlinux_in_debug_directories(module, + options, + &sb); + if (err || !drgn_module_wants_file(module)) + return err; - if (size > SIZE_MAX || - !alloc_or_reuse(&it->build_id_buf, - &it->build_id_buf_capacity, size)) - return &drgn_enomem; + // Try /boot/vmlinux-$osrelease. + sb.len = 0; + if (!string_builder_append(&sb, "/boot/vmlinux-") + || !string_builder_append(&sb, osrelease) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, + sb.str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; - err = drgn_program_read_memory(prog, it->build_id_buf, address, - size, false); - if (err) - return err; + // Try /lib/modules/$osrelease as the kernel directory. + sb.len = 0; + if (!string_builder_append(&sb, "/lib/modules/") + || !string_builder_append(&sb, osrelease)) + return &drgn_enomem; + } - *build_id_len_ret = - parse_gnu_build_id_from_notes(it->build_id_buf, size, 4, - bswap, build_id_ret); - if (*build_id_len_ret) - return NULL; + // Paths relative to the kernel directory where vmlinux might be + // installed. + static const char * const kernel_dir_paths[] = { + "/build/vmlinux", + "/vmlinux", + }; + size_t kernel_dir_len = sb.len; + array_for_each(path, kernel_dir_paths) { + if (!string_builder_append(&sb, *path) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, + sb.str, -1, true, + NULL); + if (err || !drgn_module_wants_file(module)) + return err; + sb.len = kernel_dir_len; + } } - *build_id_ret = NULL; - *build_id_len_ret = 0; + return NULL; } -struct kernel_module_section_iterator { - struct kernel_module_iterator *kmod_it; - bool yielded_percpu; - /* /sys/module/$module/sections directory or NULL. */ - DIR *sections_dir; - /* If not using /sys/module/$module/sections. */ - uint64_t i; - uint64_t nsections; - char *name; -}; - static struct drgn_error * -kernel_module_section_iterator_init_no_sys_module(struct kernel_module_section_iterator *it, - struct kernel_module_iterator *kmod_it) +drgn_open_modules_dep(struct drgn_program *prog, + const struct drgn_debug_info_options *options, + struct depmod_index *modules_dep) { struct drgn_error *err; - it->sections_dir = NULL; - it->i = 0; - it->name = NULL; - /* it->nsections = mod->sect_attrs->nsections */ - err = drgn_object_member(&kmod_it->tmp1, &kmod_it->mod, "sect_attrs"); - if (err) - return err; - err = drgn_object_member_dereference(&kmod_it->tmp2, &kmod_it->tmp1, - "nsections"); - if (err) - return err; - err = drgn_object_read_unsigned(&kmod_it->tmp2, &it->nsections); - if (err) - return err; - /* kmod_it->tmp1 = mod->sect_attrs->attrs */ - return drgn_object_member_dereference(&kmod_it->tmp1, &kmod_it->tmp1, - "attrs"); + if (modules_dep->addr) + return NULL; + + STRING_BUILDER(sb); + _cleanup_close_ int fd = -1; + for (size_t i = 0; options->kernel_directories[i]; i++) { + const char *kernel_dir = options->kernel_directories[i]; + + sb.len = 0; + if (kernel_dir[0]) { + if (!string_builder_append(&sb, kernel_dir)) + return &drgn_enomem; + } else { + // Empty path. Try /lib/modules/$osrelease. + if (!string_builder_append(&sb, "/lib/modules/") + || !string_builder_append(&sb, + prog->vmcoreinfo.osrelease)) + return &drgn_enomem; + } + if (!string_builder_append(&sb, "/modules.dep.bin") + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + fd = open(sb.str, O_RDONLY); + if (fd >= 0) + break; + drgn_log_debug(prog, "%s: %m", sb.str); + } + if (fd < 0) { + drgn_log_debug(prog, "couldn't find depmod index"); +fail: + // Set addr so that we don't try again. + modules_dep->addr = MAP_FAILED; + return NULL; + } + + err = depmod_index_init(modules_dep, string_builder_steal(&sb), fd); + if (err) { + if (drgn_error_is_fatal(err)) + return err; + drgn_error_log_warning(prog, err, + "couldn't open depmod index: "); + drgn_error_destroy(err); + goto fail; + } + drgn_log_debug(prog, "found depmod index %s", modules_dep->path); + return NULL; } static struct drgn_error * -kernel_module_section_iterator_init(struct kernel_module_section_iterator *it, - struct kernel_module_iterator *kmod_it) +drgn_module_try_depmod_in_debug_directories(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct string_builder *sb, + const char *depmod_path, size_t ko_len) { - it->kmod_it = kmod_it; - it->yielded_percpu = false; - if (kmod_it->use_sys_module_sections) { - char *path; - if (asprintf(&path, "/sys/module/%s/sections", - kmod_it->name) == -1) + struct drgn_error *err; + for (size_t i = 0; options->directories[i]; i++) { + const char *debug_dir = options->directories[i]; + sb->len = 0; + // Debian, Ubuntu: + // $debug_dir/lib/modules/$(uname -r)/$ko_name + if (!string_builder_append(sb, debug_dir) + || !string_builder_append(sb, "/lib/modules/") + || !string_builder_append(sb, + module->prog->vmcoreinfo.osrelease) + || !string_builder_appendc(sb, '/') + || !string_builder_appendn(sb, depmod_path, ko_len) + || !string_builder_null_terminate(sb)) return &drgn_enomem; - it->sections_dir = opendir(path); - free(path); - if (!it->sections_dir) { - return drgn_error_format_os("opendir", errno, - "/sys/module/%s/sections", - kmod_it->name); - } - return NULL; - } else { - return kernel_module_section_iterator_init_no_sys_module(it, kmod_it); + err = drgn_module_try_standard_file(module, options, sb->str, + -1, true, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + + // Fedora, CentOS, SUSE: + // $debug_dir/lib/modules/$(uname -r)/$ko_name.debug + if (!string_builder_append(sb, ".debug") + || !string_builder_null_terminate(sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, sb->str, + -1, true, NULL); + if (err || !drgn_module_wants_file(module)) + return err; } + return NULL; } -static void -kernel_module_section_iterator_deinit(struct kernel_module_section_iterator *it) +static struct drgn_error * +drgn_module_try_linux_kmod_depmod(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct drgn_standard_debug_info_find_state *state) { - if (it->sections_dir) - closedir(it->sections_dir); + struct drgn_error *err; + struct drgn_program *prog = module->prog; + + const char *depmod_path; + size_t depmod_path_len; + err = depmod_index_find(&state->modules_dep, module->name, &depmod_path, + &depmod_path_len); + if (err) { + drgn_error_log_warning(prog, err, + "couldn't parse depmod index: "); + drgn_error_destroy(err); + return NULL; + } + if (!depmod_path) { + drgn_log_debug(prog, "couldn't find %s in depmod index", + module->name); + return NULL; + } + drgn_log_debug(prog, "found %.*s in depmod index", + depmod_path_len > INT_MAX + ? INT_MAX : (int)depmod_path_len, + depmod_path); + + // Get the length of the path with one extension after ".ko" removed if + // present (e.g., ".gz", ".xz", or ".zst"). + const char *name = memrchr(depmod_path, '/', depmod_path_len); + if (name) + name = name + 1; else - free(it->name); + name = depmod_path; + const char *name_end = depmod_path + depmod_path_len; + size_t ko_len = depmod_path_len; + for (int j = 0; j < 2; j++) { + char *dot = memrchr(name, '.', name_end - name); + if (!dot) + break; + if (name_end - dot == 3 + && dot[1] == 'k' && dot[2] == 'o') { + ko_len = name_end - depmod_path; + break; + } + name_end = dot; + } + + STRING_BUILDER(sb); + for (size_t i = 0; options->kernel_directories[i]; i++) { + const char *kernel_dir = options->kernel_directories[i]; + + if (kernel_dir[0]) { + sb.len = 0; + if (!string_builder_append(&sb, kernel_dir)) + return &drgn_enomem; + } else { + // Empty path. Try under the debug directories first. + err = drgn_module_try_depmod_in_debug_directories(module, + options, + &sb, + depmod_path, + ko_len); + if (err || !drgn_module_wants_file(module)) + return err; + + // Try /lib/modules/$osrelease as the kernel directory. + sb.len = 0; + if (!string_builder_append(&sb, "/lib/modules/") + || !string_builder_append(&sb, + prog->vmcoreinfo.osrelease)) + return &drgn_enomem; + } + if (!string_builder_appendc(&sb, '/') + || !string_builder_appendn(&sb, depmod_path, depmod_path_len) + || !string_builder_null_terminate(&sb)) + return &drgn_enomem; + err = drgn_module_try_standard_file(module, options, sb.str, -1, + true, NULL); + if (err || !drgn_module_wants_file(module)) + return err; + } + return NULL; } static struct drgn_error * -kernel_module_section_iterator_next_live(struct kernel_module_section_iterator *it, - const char **name_ret, - uint64_t *address_ret) +drgn_kmod_walk_next_dir(struct drgn_program *prog, + const struct drgn_debug_info_options *options, + struct drgn_kmod_walk_state *state) { - struct dirent *ent; - while ((errno = 0, ent = readdir(it->sections_dir))) { - if (ent->d_type == DT_DIR) - continue; - if (ent->d_type == DT_UNKNOWN) { - struct stat st; - - if (fstatat(dirfd(it->sections_dir), ent->d_name, &st, - 0) == -1) { - return drgn_error_format_os("fstatat", errno, - "/sys/module/%s/sections/%s", - it->kmod_it->name, - ent->d_name); + struct string_builder *path = &state->path; + for (;;) { + if (state->next_debug_dir) { + const char *debug_dir = *state->next_debug_dir++; + path->len = 0; + if (debug_dir) { + if (!string_builder_append(path, debug_dir)) + return &drgn_enomem; + } else { + state->next_debug_dir = NULL; } - if (S_ISDIR(st.st_mode)) + if (!string_builder_append(path, "/lib/modules/") + || !string_builder_append(path, + prog->vmcoreinfo.osrelease)) + return &drgn_enomem; + } else { + const char *kernel_dir = *state->next_kernel_dir; + if (!kernel_dir) + return &drgn_stop; + state->next_kernel_dir++; + if (kernel_dir[0]) { + path->len = 0; + if (!string_builder_append(path, kernel_dir)) + return &drgn_enomem; + } else { + state->next_debug_dir = options->directories; continue; + } } - int fd = openat(dirfd(it->sections_dir), ent->d_name, O_RDONLY); - if (fd == -1) { - return drgn_error_format_os("openat", errno, - "/sys/module/%s/sections/%s", - it->kmod_it->name, - ent->d_name); - } - FILE *file = fdopen(fd, "r"); - if (!file) { - close(fd); - return drgn_error_create_os("fdopen", errno, NULL); + if (!string_builder_null_terminate(path)) + return &drgn_enomem; + struct drgn_kmod_walk_stack_entry entry = { + .dir = opendir(path->str), + .path_len = path->len, + }; + if (!entry.dir) { + drgn_log_debug(prog, "opendir: %s: %m", path->str); + continue; } - int ret = fscanf(file, "%" SCNx64, address_ret); - fclose(file); - if (ret != 1) { - return drgn_error_format(DRGN_ERROR_OTHER, - "could not parse /sys/module/%s/sections/%s", - it->kmod_it->name, - ent->d_name); + if (!drgn_kmod_walk_stack_append(&state->stack, &entry)) { + closedir(entry.dir); + return &drgn_enomem; } - *name_ret = ent->d_name; + drgn_log_debug(prog, "searching for kernel modules in %s", + path->str); return NULL; } - if (errno) { - return drgn_error_format_os("readdir", errno, - "/sys/module/%s/sections", - it->kmod_it->name); - } else { - return &drgn_stop; - } } static struct drgn_error * -kernel_module_section_iterator_next(struct kernel_module_section_iterator *it, - const char **name_ret, - uint64_t *address_ret) +drgn_kmod_walk(struct drgn_program *prog, + const struct drgn_debug_info_options *options, + struct drgn_kmod_walk_state *state, + struct drgn_kmod_walk_module_map_entry *current) { struct drgn_error *err; - struct kernel_module_iterator *kmod_it = it->kmod_it; + struct string_builder *path = &state->path; - // As of Linux 6.0, the .data..percpu section is not included in the - // section attributes. (kernel/module/sysfs.c:add_sect_attrs() only - // creates attributes for sections with the SHF_ALLOC flag set, but - // kernel/module/main.c:layout_and_allocate() clears the SHF_ALLOC flag - // for the .data..percpu section.) However, we need this address so that - // global per-CPU variables will be relocated correctly. Get it from - // `struct module`. - if (!it->yielded_percpu) { - it->yielded_percpu = true; - err = drgn_object_member(&kmod_it->tmp2, &kmod_it->mod, - "percpu"); - if (!err) { - err = drgn_object_read_unsigned(&kmod_it->tmp2, address_ret); + for (;;) { + if (drgn_kmod_walk_stack_empty(&state->stack)) { + err = drgn_kmod_walk_next_dir(prog, options, state); if (err) return err; - // struct module::percpu is NULL if the module doesn't - // have any per-CPU data. - if (*address_ret) { - *name_ret = ".data..percpu"; - return NULL; + } + + struct drgn_kmod_walk_stack_entry *top = + drgn_kmod_walk_stack_last(&state->stack); + errno = 0; + struct dirent *ent = readdir(top->dir); + if (!ent) { + if (errno) { + path->str[top->path_len] = '\0'; + drgn_log_debug(prog, "%s: readdir: %m", + path->str); + } + closedir(top->dir); + drgn_kmod_walk_stack_pop(&state->stack); + continue; + } + + // Skip "." and "..". + if (ent->d_name[0] == '.' + && (!ent->d_name[1] + || (ent->d_name[1] == '.' && !ent->d_name[2]))) + continue; + + bool is_directory = false; + if (ent->d_type == DT_LNK || ent->d_type == DT_UNKNOWN) { + struct stat st; + if (fstatat(dirfd(top->dir), ent->d_name, &st, 0) < 0) { + path->str[top->path_len] = '\0'; + drgn_log_debug(prog, "%s/%s: fstatat: %m", + path->str, ent->d_name); + continue; + } + if (S_ISDIR(st.st_mode)) + is_directory = true; + else if (!S_ISREG(st.st_mode)) + continue; + } else if (ent->d_type == DT_DIR) { + is_directory = true; + } else if (ent->d_type != DT_REG) { + continue; + } + + if (is_directory) { + path->len = top->path_len; + if (!string_builder_appendc(path, '/') + || !string_builder_append(path, ent->d_name) + || !string_builder_null_terminate(path)) + return &drgn_enomem; + + _cleanup_close_ int fd = + openat(dirfd(top->dir), ent->d_name, + O_RDONLY | O_DIRECTORY); + if (fd < 0) { + drgn_log_debug(prog, "openat: %s: %m", + path->str); + continue; + } + + struct stat st; + if (fstat(fd, &st) < 0) { + drgn_log_debug(prog, "fstat: %s: %m", + path->str); + continue; + } + struct drgn_kmod_walk_inode inode = { + .dev = st.st_dev, + .ino = st.st_ino, + }; + int r = drgn_kmod_walk_inode_set_insert(&state->visited_dirs, + &inode, NULL); + if (r < 0) + return &drgn_enomem; + if (r == 0) { + drgn_log_debug(prog, + "%s is cycle or duplicate; skipping", + path->str); + continue; + } + + struct drgn_kmod_walk_stack_entry entry = { + .dir = fdopendir(fd), + .path_len = path->len, + }; + if (!entry.dir) { + drgn_log_debug(prog, "fdopendir: %s: %m", + path->str); + continue; + } + fd = -1; // entry.dir owns fd now. + if (!drgn_kmod_walk_stack_append(&state->stack, + &entry)) { + closedir(entry.dir); + return &drgn_enomem; } - } else if (err->code == DRGN_ERROR_LOOKUP) { - // struct module::percpu doesn't exist if !SMP. - drgn_error_destroy(err); } else { - return err; + // Match anything where the first extension is ".ko". + char *dot = strchr(ent->d_name, '.'); + if (!dot || dot[1] != 'k' || dot[2] != 'o' + || (dot[3] != '\0' && dot[3] != '.')) + continue; + + // Borrow the path string builder to build the module + // name (removing extensions and replacing '-' with + // '_'). + path->len = top->path_len; + if (!string_builder_appendn(path, ent->d_name, + dot - ent->d_name) + || !string_builder_null_terminate(path)) + return &drgn_enomem; + char *dash = &path->str[top->path_len]; + while ((dash = strchr(dash, '-'))) + *dash++ = '_'; + + // Find the module (if wanted). + const char *module_name = &path->str[top->path_len]; + auto it = drgn_kmod_walk_module_map_search(&state->modules, + &module_name); + if (!it.entry) + continue; + + size_t name_len = strlen(ent->d_name); + size_t path_len; + if (__builtin_add_overflow(top->path_len, name_len, + &path_len) + || __builtin_add_overflow(path_len, 2, &path_len)) + return &drgn_enomem; + _cleanup_free_ char *file_path = malloc(path_len); + if (!file_path) + return &drgn_enomem; + memcpy(file_path, path->str, top->path_len); + file_path[top->path_len] = '/'; + memcpy(&file_path[top->path_len + 1], ent->d_name, + name_len + 1); + drgn_log_debug(prog, "found kernel module %s", file_path); + + if (!char_p_vector_append(&it.entry->value, &file_path)) + return &drgn_enomem; + file_path = NULL; // it.entry->value owns file_path now. + + // If the file matches the current module, return it. + // Otherwise, keep going. + if (it.entry == current) + return NULL; } } +} - if (it->sections_dir) { - err = kernel_module_section_iterator_next_live(it, name_ret, - address_ret); - if (err && err->code == DRGN_ERROR_OS && err->errnum == EACCES) { - closedir(it->sections_dir); - drgn_error_destroy(err); - it->kmod_it->use_sys_module_sections = false; - err = kernel_module_section_iterator_init_no_sys_module(it, it->kmod_it); - if (err) - return err; - } else { +struct drgn_error * +drgn_module_try_linux_kmod_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct drgn_standard_debug_info_find_state *state) +{ + struct drgn_error *err; + + if (options->try_kmod == DRGN_KMOD_SEARCH_NONE) + return NULL; + + if (options->try_kmod != DRGN_KMOD_SEARCH_WALK) { + err = drgn_open_modules_dep(module->prog, options, + &state->modules_dep); + if (err) return err; + if (state->modules_dep.len > 0) { + err = drgn_module_try_linux_kmod_depmod(module, options, + state); + if (err + || options->try_kmod != DRGN_KMOD_SEARCH_DEPMOD_AND_WALK + || !drgn_module_wants_file(module)) + return err; } + if (options->try_kmod == DRGN_KMOD_SEARCH_DEPMOD) + return NULL; } - if (it->i >= it->nsections) - return &drgn_stop; - err = drgn_object_subscript(&kmod_it->tmp2, &kmod_it->tmp1, it->i++); - if (err) - return err; - err = drgn_object_member(&kmod_it->tmp3, &kmod_it->tmp2, "address"); - if (err) - return err; - err = drgn_object_read_unsigned(&kmod_it->tmp3, address_ret); - if (err) - return err; - /* - * Since Linux kernel commit ed66f991bb19 ("module: Refactor section - * attr into bin attribute") (in v5.8), the section name is - * module_sect_attr.battr.attr.name. Before that, it is simply - * module_sect_attr.name. - */ - err = drgn_object_member(&kmod_it->tmp2, &kmod_it->tmp2, "battr"); - if (!err) { - err = drgn_object_member(&kmod_it->tmp2, &kmod_it->tmp2, - "attr"); + if (drgn_kmod_walk_module_map_empty(&state->kmod_walk.modules)) { + for (size_t i = 0; i < state->num_modules; i++) { + if (!drgn_module_wants_file(state->modules[i])) + continue; + struct drgn_kmod_walk_module_map_entry entry = { + .key = state->modules[i]->name, + .value = VECTOR_INIT, + }; + if (drgn_kmod_walk_module_map_insert(&state->kmod_walk.modules, + &entry, NULL) < 0) + return &drgn_enomem; + } + } + + const char *module_name = module->name; + auto it = drgn_kmod_walk_module_map_search(&state->kmod_walk.modules, + &module_name); + size_t i = 0; + for (;;) { + if (i >= char_p_vector_size(&it.entry->value)) { + // No matches remaining for this module. Clear the old + // matches and find another one. + vector_for_each(char_p_vector, path, &it.entry->value) + free(*path); + char_p_vector_clear(&it.entry->value); + i = 0; + + err = drgn_kmod_walk(module->prog, options, + &state->kmod_walk, it.entry); + if (err == &drgn_stop) + break; + else if (err) + return err; + } + char *path = *char_p_vector_at(&it.entry->value, i++); + err = drgn_module_try_standard_file(module, options, path, -1, + true, NULL); if (err) return err; - } else { - if (err->code != DRGN_ERROR_LOOKUP) - return err; - drgn_error_destroy(err); + if (!drgn_module_wants_file(module)) + break; } - err = drgn_object_member(&kmod_it->tmp3, &kmod_it->tmp2, "name"); - if (err) - return err; - char *name; - err = drgn_object_read_c_string(&kmod_it->tmp3, &name); - if (err) - return err; - free(it->name); - *name_ret = it->name = name; + // We won't need any more matches for this module. + drgn_kmod_walk_module_map_entry_deinit(it.entry); + drgn_kmod_walk_module_map_delete_iterator(&state->kmod_walk.modules, + it); return NULL; } -/* - * /lib/modules/$(uname -r)/modules.dep.bin maps all installed kernel modules to - * their filesystem path (and dependencies, which we don't care about). It is - * generated by depmod; the format is a fairly simple serialized radix tree. - * - * modules.dep(5) contains a warning: "These files are not intended for editing - * or use by any additional utilities as their format is subject to change in - * the future." But, the format hasn't changed since 2009, and pulling in - * libkmod is overkill since we only need a very small subset of its - * functionality (plus our minimal parser is more efficient). If the format - * changes in the future, we can reevaluate this. - */ - -struct depmod_index { - void *addr; - size_t len; - char path[256]; -}; +// This has a weird calling convention so that the caller can call +// drgn_error_format_os() itself. +static const char *get_gnu_build_id_from_note_file(int fd, + void **bufp, + size_t *buf_capacityp, + const void **build_id_ret, + size_t *build_id_len_ret) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return "fstat"; + + if (st.st_size > SSIZE_MAX + || !alloc_or_reuse(bufp, buf_capacityp, st.st_size)) + return ""; + + ssize_t r = read_all(fd, *bufp, st.st_size); + if (r < 0) + return "read"; + *build_id_len_ret = parse_gnu_build_id_from_notes(*bufp, r, 4, false, + build_id_ret); + return NULL; +} -static void depmod_index_deinit(struct depmod_index *depmod) +static struct drgn_error * +get_build_id_from_sys_kernel_notes(void **buf_ret, + const void **build_id_ret, + size_t *build_id_len_ret) { - munmap(depmod->addr, depmod->len); + static const char path[] = "/sys/kernel/notes"; + _cleanup_close_ int fd = open(path, O_RDONLY); + if (fd == -1) + return drgn_error_create_os("open", errno, path); + + _cleanup_free_ void *buf = NULL; + size_t buf_capacity = 0; + const char *message = get_gnu_build_id_from_note_file(fd, &buf, + &buf_capacity, + build_id_ret, + build_id_len_ret); + if (message && message[0]) + return drgn_error_create_os(message, errno, path); + else if (message) + return &drgn_enomem; + *buf_ret = no_cleanup_ptr(buf); + return NULL; } -struct depmod_index_buffer { - struct binary_buffer bb; - struct depmod_index *depmod; +// Arbitrary limit on the number iterations to make through the modules list in +// order to avoid getting stuck in a cycle. +static const int MAX_MODULE_LIST_ITERATIONS = 10000; + +struct linux_kernel_loaded_module_iterator { + struct drgn_module_iterator it; + bool yielded_vmlinux; + int module_list_iterations_remaining; + // `struct module` type. + struct drgn_qualified_type module_type; + // `struct list_head *` in next module to yield. + struct drgn_object node; + // Address of `struct list_head modules`. + uint64_t modules_head; }; -static struct drgn_error *depmod_index_buffer_error(struct binary_buffer *bb, - const char *pos, - const char *message) -{ - struct depmod_index_buffer *buffer = - container_of(bb, struct depmod_index_buffer, bb); - return drgn_error_format(DRGN_ERROR_OTHER, "%s: %#tx: %s", - buffer->depmod->path, - pos - (const char *)buffer->depmod->addr, - message); -} - -static void depmod_index_buffer_init(struct depmod_index_buffer *buffer, - struct depmod_index *depmod) +static void +linux_kernel_loaded_module_iterator_destroy(struct drgn_module_iterator *_it) { - binary_buffer_init(&buffer->bb, depmod->addr, depmod->len, false, - depmod_index_buffer_error); - buffer->depmod = depmod; + struct linux_kernel_loaded_module_iterator *it = + container_of(_it, struct linux_kernel_loaded_module_iterator, it); + drgn_object_deinit(&it->node); + free(it); } -static struct drgn_error *depmod_index_validate(struct depmod_index *depmod) +static struct drgn_error * +yield_vmlinux(struct linux_kernel_loaded_module_iterator *it, + struct drgn_module **ret, bool *new_ret) { struct drgn_error *err; - struct depmod_index_buffer buffer; - depmod_index_buffer_init(&buffer, depmod); - uint32_t magic; - if ((err = binary_buffer_next_u32(&buffer.bb, &magic))) + struct drgn_program *prog = it->it.prog; + + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_main(prog, "kernel", &module, &new); + if (err) return err; - if (magic != 0xb007f457) { - return binary_buffer_error(&buffer.bb, - "invalid magic 0x%" PRIx32, magic); + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; } - uint32_t version; - if ((err = binary_buffer_next_u32(&buffer.bb, &version))) - return err; - if (version != 0x00020001) { - return binary_buffer_error(&buffer.bb, - "unknown version 0x%" PRIx32, - version); + + if (prog->vmcoreinfo.build_id_len > 0) { + // Since Linux kernel commit 0935288c6e00 ("kdump: append kernel + // build-id string to VMCOREINFO") (in v5.9), we can get the + // build ID from VMCOREINFO. + err = drgn_module_set_build_id(module, prog->vmcoreinfo.build_id, + prog->vmcoreinfo.build_id_len); + if (err) + return err; + drgn_log_debug(prog, + "found kernel build ID %s in VMCOREINFO", + module->build_id_str); + } else if (prog->flags & DRGN_PROGRAM_IS_LIVE) { + // Before that, on the live kernel, we can get the build ID from + // /sys/kernel/notes. + _cleanup_free_ void *build_id_buf = NULL; + const void *build_id; + size_t build_id_len; + err = get_build_id_from_sys_kernel_notes(&build_id_buf, + &build_id, + &build_id_len); + if (err) + return err; + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (err) + return err; + drgn_log_debug(prog, + "found kernel build ID %s in /sys/kernel/notes", + module->build_id_str); + } else { + drgn_log_debug(prog, + "couldn't find kernel build ID in /sys/kernel/notes"); + } + } else { + // Otherwise, we can't get the build ID. + drgn_log_debug(prog, "couldn't find kernel build ID"); } + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; return NULL; } -static struct drgn_error *depmod_index_init(struct depmod_index *depmod, - const char *osrelease) +enum kernel_module_address_ranges_version { + // Since Linux kernel commit ac3b43283923 ("module: replace + // module_layout with module_memory") (in v6.4), `struct module` + // contains an array, `struct module_memory mem[]`, of discontiguous + // allocations per memory type (`module->mem[type].base` and + // `module->mem[type].size`). The module address is + // `module->mem[MOD_TEXT].base`. + MODULE_MEMORY, + // Between that and Linux kernel commit 7523e4dc5057 ("module: use a + // structure to encapsulate layout.") (in v4.5), `struct module` + // contains a `struct module_layout core_layout` member with the base + // address (`module->core_layout.base`) and contiguous size + // (`module->core_layout.size`). + MODULE_LAYOUT, + // Before that, `struct module` contains the base address + // (`module->module_core`) and contiguous size (`module->core_size`) + // directly. + IN_MODULE, +}; + +static struct drgn_error * +kernel_module_address(const struct drgn_object *module_obj, + struct drgn_object *mem, + enum kernel_module_address_ranges_version *version_ret, + uint64_t *address_ret) { + struct drgn_program *prog = drgn_object_program(module_obj); struct drgn_error *err; - snprintf(depmod->path, sizeof(depmod->path), - "/lib/modules/%s/modules.dep.bin", osrelease); + DRGN_OBJECT(tmp, prog); + err = drgn_object_member(mem, module_obj, "mem"); + if (!err) { + *version_ret = MODULE_MEMORY; + if (!prog->mod_text_cached) { + err = drgn_program_find_object(prog, "MOD_TEXT", NULL, + DRGN_FIND_OBJECT_CONSTANT, + &tmp); + if (err) + return err; + union drgn_value mod_text_value; + err = drgn_object_read_integer(&tmp, &mod_text_value); + if (err) + return err; + prog->mod_text = mod_text_value.uvalue; + prog->mod_text_cached = true; + } + err = drgn_object_subscript(&tmp, mem, prog->mod_text); + if (err) + return err; + err = drgn_object_member(&tmp, &tmp, "base"); + } else if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + err = drgn_object_member(mem, module_obj, "core_layout"); + if (!err) { + *version_ret = MODULE_LAYOUT; + err = drgn_object_member(&tmp, mem, "base"); + } else if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + *version_ret = IN_MODULE; + err = drgn_object_member(&tmp, module_obj, + "module_core"); + } + } + if (err) + return err; + return drgn_object_read_unsigned(&tmp, address_ret); +} - int fd = open(depmod->path, O_RDONLY); - if (fd == -1) - return drgn_error_create_os("open", errno, depmod->path); +// If version is MODULE_MEMORY, mem is struct module::mem. If version is +// MODULE_LAYOUT, mem is struct module::core_layout. +static struct drgn_error * +kernel_module_set_address_ranges(struct drgn_module *module, + enum kernel_module_address_ranges_version version, + const struct drgn_object *module_obj, + const struct drgn_object *mem, + uint64_t address) +{ + struct drgn_program *prog = module->prog; + struct drgn_error *err; - struct stat st; - if (fstat(fd, &st) == -1) { - err = drgn_error_create_os("fstat", errno, depmod->path); - goto out; + DRGN_OBJECT(tmp, prog); + if (version != MODULE_MEMORY) { + if (version == IN_MODULE) + err = drgn_object_member(&tmp, module_obj, "core_size"); + else + err = drgn_object_member(&tmp, mem, "size"); + if (err) + return err; + uint64_t size; + err = drgn_object_read_unsigned(&tmp, &size); + if (err) + return err; + drgn_log_debug(prog, "module size is %" PRIu64, size); + return drgn_module_set_address_range(module, address, + address + size); } - if (st.st_size < 0 || st.st_size > SIZE_MAX) { - err = &drgn_enomem; - goto out; + struct drgn_type *mem_array_type = drgn_underlying_type(mem->type); + if (drgn_type_kind(mem_array_type) != DRGN_TYPE_ARRAY) { + return drgn_error_create(DRGN_ERROR_TYPE, + "struct module::mem is not an array"); } + uint64_t length = drgn_type_length(mem_array_type); - void *addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (addr == MAP_FAILED) { - err = drgn_error_create_os("mmap", errno, depmod->path); - goto out; - } + if (length > SIZE_MAX) + return &drgn_enomem; + _cleanup_free_ uint64_t (*ranges)[2] = + malloc_array(length, sizeof(*ranges)); + if (!ranges) + return &drgn_enomem; - depmod->addr = addr; - depmod->len = st.st_size; + DRGN_OBJECT(element, prog); + size_t num_ranges = 0; + for (size_t i = 0; i < length; i++) { + err = drgn_object_subscript(&element, mem, i); + if (err) + return err; - err = depmod_index_validate(depmod); - if (err) - depmod_index_deinit(depmod); -out: - close(fd); - return err; + err = drgn_object_member(&tmp, &element, "size"); + if (err) + return err; + uint64_t size; + err = drgn_object_read_unsigned(&tmp, &size); + if (err) + return err; + if (!size) + continue; + + err = drgn_object_member(&tmp, &element, "base"); + if (err) + return err; + uint64_t base; + err = drgn_object_read_unsigned(&tmp, &base); + if (err) + return err; + + drgn_log_debug(prog, "module has address range %" PRIu64 "-%" PRIu64, + base, base + size); + ranges[num_ranges][0] = base; + ranges[num_ranges][1] = base + size; + num_ranges++; + } + return drgn_module_set_address_ranges(module, ranges, num_ranges); } -/* - * Look up the path of the kernel module with the given name. - * - * @param[in] name Name of the kernel module. - * @param[out] path_ret Returned path of the kernel module, relative to - * /lib/modules/$(uname -r). This is @em not null-terminated. @c NULL if not - * found. - * @param[out] len_ret Returned length of @p path_ret. - */ -static struct drgn_error *depmod_index_find(struct depmod_index *depmod, - const char *name, - const char **path_ret, - size_t *len_ret) +static struct drgn_error * +kernel_module_set_build_id_live(struct drgn_module *module) { - static const uint32_t INDEX_NODE_MASK = UINT32_C(0x0fffffff); - static const uint32_t INDEX_NODE_CHILDS = UINT32_C(0x20000000); - static const uint32_t INDEX_NODE_VALUES = UINT32_C(0x40000000); - static const uint32_t INDEX_NODE_PREFIX = UINT32_C(0x80000000); - struct drgn_error *err; - struct depmod_index_buffer buffer; - depmod_index_buffer_init(&buffer, depmod); + struct drgn_program *prog = module->prog; - /* depmod_index_validate() already checked that this is within bounds. */ - buffer.bb.pos += 8; - uint32_t offset; - for (;;) { - if ((err = binary_buffer_next_u32(&buffer.bb, &offset))) - return err; - if ((offset & INDEX_NODE_MASK) > depmod->len) { - return binary_buffer_error(&buffer.bb, - "offset is out of bounds"); + _cleanup_free_ char *path; + if (asprintf(&path, "/sys/module/%s/notes", module->name) < 0) { + path = NULL; + return &drgn_enomem; + } + _cleanup_closedir_ DIR *dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + drgn_log_debug(prog, "opendir: %s: %m", path); + return NULL; + } else { + return drgn_error_create_os("opendir", errno, path); } - buffer.bb.pos = (const char *)depmod->addr + (offset & INDEX_NODE_MASK); + } - if (offset & INDEX_NODE_PREFIX) { - const char *prefix; - size_t prefix_len; - if ((err = binary_buffer_next_string(&buffer.bb, - &prefix, - &prefix_len))) - return err; - if (strncmp(name, prefix, prefix_len) != 0) - goto not_found; - name += prefix_len; + _cleanup_free_ void *buf = NULL; + size_t capacity = 0; + + struct dirent *ent; + while ((errno = 0, ent = readdir(dir))) { + if (ent->d_type == DT_DIR) + continue; + + _cleanup_close_ int fd = openat(dirfd(dir), ent->d_name, + O_RDONLY); + if (fd < 0) { + return drgn_error_format_os("openat", errno, "%s/%s", + path, ent->d_name); } - if (offset & INDEX_NODE_CHILDS) { - uint8_t first, last; - if ((err = binary_buffer_next_u8(&buffer.bb, &first)) || - (err = binary_buffer_next_u8(&buffer.bb, &last))) - return err; - if (*name) { - uint8_t cur = *name; - if (cur < first || cur > last) - goto not_found; - if ((err = binary_buffer_skip(&buffer.bb, - 4 * (cur - first)))) - return err; - name++; - continue; - } else { - if ((err = binary_buffer_skip(&buffer.bb, - 4 * (last - first + 1)))) - return err; - break; + const void *build_id; + size_t build_id_len; + const char *message = + get_gnu_build_id_from_note_file(fd, &buf, &capacity, + &build_id, + &build_id_len); + if (message && message[0]) { + return drgn_error_format_os(message, errno, "%s/%s", + path, ent->d_name); + } else if (message) { + return &drgn_enomem; + } + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (!err) { + drgn_log_debug(prog, + "found build ID %s in %s/%s", + module->build_id_str, path, + ent->d_name); } - } else if (*name) { - goto not_found; - } else { - break; + return err; } } - if (!(offset & INDEX_NODE_VALUES)) - goto not_found; + if (errno) + return drgn_error_create_os("readdir", errno, path); + drgn_log_debug(prog, "couldn't find build ID in %s", path); + return NULL; +} - uint32_t value_count; - if ((err = binary_buffer_next_u32(&buffer.bb, &value_count))) - return err; - if (!value_count) - goto not_found; /* Or is this malformed? */ +static struct drgn_error * +kernel_module_set_build_id(struct drgn_module *module, + const struct drgn_object *module_obj, + bool use_sys_module) +{ + if (use_sys_module) + return kernel_module_set_build_id_live(module); - /* Skip over priority. */ - if ((err = binary_buffer_skip(&buffer.bb, 4))) + struct drgn_error *err; + struct drgn_program *prog = module->prog; + const bool bswap = drgn_platform_bswap(&prog->platform); + + DRGN_OBJECT(attrs, prog); + DRGN_OBJECT(attr, prog); + DRGN_OBJECT(tmp, prog); + _cleanup_free_ void *buf = NULL; + size_t capacity = 0; + + err = drgn_object_member(&attrs, module_obj, "notes_attrs"); + if (err) return err; - const char *colon = memchr(buffer.bb.pos, ':', - buffer.bb.end - buffer.bb.pos); - if (!colon) { - return binary_buffer_error(&buffer.bb, - "expected string containing ':'"); + bool group = true; + uint64_t n; + err = drgn_object_member_dereference(&attrs, &attrs, "grp"); + if (!err) { + // Since Linux kernel commit 4723f16de64e ("module: sysfs: Add + // notes attributes through attribute_group") (in v6.14), we + // have to iterate over struct attribute_group::bin_attrs, a + // null-terminated array of struct bin_attribute pointers. + + // attr = mod->notes_attrs->grp.bin_attrs + err = drgn_object_member(&attrs, &attrs, "bin_attrs"); + if (err) + return err; + } else if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + // Before that, there was no struct attribute_group for notes, + // so we iterate over struct module_notes_attrs::attrs, an array + // of struct bin_attribute with a length given by struct + // module_notes_attrs::notes. + group = false; + // n = mod->notes_attrs->notes + err = drgn_object_member_dereference(&tmp, &attrs, "notes"); + if (err) + return err; + err = drgn_object_read_unsigned(&tmp, &n); + if (err) + return err; + + // attrs = mod->notes_attrs->attrs + err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); + if (err) + return err; + } else { + return err; } - *path_ret = buffer.bb.pos; - *len_ret = colon - buffer.bb.pos; - return NULL; -not_found: - *path_ret = NULL; + // If we're not using struct attribute_group, we know how many + // attributes there are. + for (uint64_t i = 0; group || i < n; i++) { + // attr = attrs[i] + err = drgn_object_subscript(&attr, &attrs, i); + if (err) + return err; + + if (group) { + // If we're using struct attribute_group, we stop when + // we hit a NULL pointer. + err = drgn_object_read(&attr, &attr); + if (err) + return err; + bool truthy; + err = drgn_object_bool(&attr, &truthy); + if (err) + return err; + if (!truthy) + break; + } else { + // attr = &attrs[i] + err = drgn_object_address_of(&attr, &attr); + if (err) + return err; + } + + // address = attr->private + err = drgn_object_member_dereference(&tmp, &attr, "private"); + if (err) + return err; + uint64_t address; + err = drgn_object_read_unsigned(&tmp, &address); + if (err) + return err; + + // size = attr->size + err = drgn_object_member_dereference(&tmp, &attr, "size"); + if (err) + return err; + uint64_t size; + err = drgn_object_read_unsigned(&tmp, &size); + if (err) + return err; + + if (size > SIZE_MAX || !alloc_or_reuse(&buf, &capacity, size)) + return &drgn_enomem; + + err = drgn_program_read_memory(prog, buf, address, size, false); + if (err) + return err; + + const void *build_id; + size_t build_id_len = + parse_gnu_build_id_from_notes(buf, size, 4, bswap, + &build_id); + if (build_id_len > 0) { + err = drgn_module_set_build_id(module, build_id, + build_id_len); + if (!err) { + drgn_log_debug(prog, + "found build ID %s in notes_attrs", + module->build_id_str); + } + return err; + } + } + drgn_log_debug(prog, + "couldn't find build ID in notes_attrs"); return NULL; } -/* - * Identify an ELF file as a kernel module, vmlinux, or neither. We classify a - * file as a kernel module if it has a section named .gnu.linkonce.this_module. - * If it doesn't, but it does have a section named .init.text, we classify it as - * vmlinux. - */ -static struct drgn_error *identify_kernel_elf(Elf *elf, - bool *is_vmlinux_ret, - bool *is_module_ret) +static struct drgn_error * +kernel_module_set_section_addresses_live(struct drgn_module *module) { - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - Elf_Scn *scn = NULL; - bool have_init_text = false; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr *shdr, shdr_mem; - const char *scnname; - - shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) + struct drgn_error *err; + struct drgn_program *prog = module->prog; + bool logged = false; + + _cleanup_free_ char *path; + if (asprintf(&path, "/sys/module/%s/sections", module->name) < 0) { + path = NULL; + return &drgn_enomem; + } + _cleanup_closedir_ DIR *dir = opendir(path); + if (!dir) + return drgn_error_create_os("opendir", errno, path); + + struct dirent *ent; + while ((errno = 0, ent = readdir(dir))) { + if (ent->d_type == DT_DIR) continue; - scnname = elf_strptr(elf, shstrndx, shdr->sh_name); - if (!scnname) - return drgn_error_libelf(); - if (strcmp(scnname, ".gnu.linkonce.this_module") == 0) { - *is_vmlinux_ret = false; - *is_module_ret = true; - return NULL; - } else if (strcmp(scnname, ".init.text") == 0) { - have_init_text = true; + _cleanup_close_ int fd = openat(dirfd(dir), ent->d_name, + O_RDONLY); + if (fd < 0) { + return drgn_error_format_os("openat", errno, "%s/%s", + path, ent->d_name); + } + + _cleanup_fclose_ FILE *file = fdopen(fd, "r"); + if (!file) + return drgn_error_create_os("fdopen", errno, NULL); + uint64_t address; + if (fscanf(file, "%" SCNx64, &address) != 1) { + return drgn_error_format(DRGN_ERROR_OTHER, + "could not parse %s/%s", + path, ent->d_name); + } + + if (!logged) { + drgn_log_debug(prog, + "getting section addresses from %s", + path); + logged = true; } + err = drgn_module_set_section_address(module, ent->d_name, + address); + if (err) + return err; } - *is_vmlinux_ret = have_init_text; - *is_module_ret = false; + if (errno) + return drgn_error_create_os("readdir", errno, path); return NULL; } -DEFINE_HASH_MAP(elf_scn_name_map, const char *, Elf_Scn *, - c_string_key_hash_pair, c_string_key_eq); - static struct drgn_error * -cache_kernel_module_sections(struct kernel_module_iterator *kmod_it, Elf *elf) +kernel_module_set_section_addresses(struct drgn_module *module, + const struct drgn_object *module_obj, + bool use_sys_module) { struct drgn_error *err; + struct drgn_program *prog = module->prog; - size_t shstrndx; - if (elf_getshdrstrndx(elf, &shstrndx)) - return drgn_error_libelf(); - - struct elf_scn_name_map scn_map = HASH_TABLE_INIT; - Elf_Scn *scn = NULL; - while ((scn = elf_nextscn(elf, scn))) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(scn, &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - goto out_scn_map; - } - - if (!(shdr->sh_flags & SHF_ALLOC)) - continue; - - struct elf_scn_name_map_entry entry = { - .key = elf_strptr(elf, shstrndx, shdr->sh_name), - .value = scn, - }; - if (!entry.key) { - err = drgn_error_libelf(); - goto out_scn_map; - } + DRGN_OBJECT(tmp, prog); - if (elf_scn_name_map_insert(&scn_map, &entry, NULL) == -1) { - err = &drgn_enomem; - goto out_scn_map; + // As of Linux 6.0, the .data..percpu section is not included in the + // section attributes. (kernel/module/sysfs.c:add_sect_attrs() only + // creates attributes for sections with the SHF_ALLOC flag set, but + // kernel/module/main.c:layout_and_allocate() clears the SHF_ALLOC flag + // for the .data..percpu section.) However, we need this address so that + // global per-CPU variables will be relocated correctly. Get it from + // `struct module`. + err = drgn_object_member(&tmp, module_obj, "percpu"); + if (!err) { + uint64_t address; + err = drgn_object_read_unsigned(&tmp, &address); + if (err) + return err; + drgn_log_debug(prog, "module percpu is 0x%" PRIx64, address); + // struct module::percpu is NULL if the module doesn't have any + // per-CPU data. + if (address) { + err = drgn_module_set_section_address(module, + ".data..percpu", + address); + if (err) + return err; } + } else if (err->code == DRGN_ERROR_LOOKUP) { + // struct module::percpu doesn't exist if !SMP. + drgn_error_destroy(err); + } else { + return err; } - struct kernel_module_section_iterator section_it; - err = kernel_module_section_iterator_init(§ion_it, kmod_it); - if (err) - goto out_scn_map; - const char *name; - uint64_t address; - while (!(err = kernel_module_section_iterator_next(§ion_it, &name, - &address))) { - struct elf_scn_name_map_iterator it = - elf_scn_name_map_search(&scn_map, &name); - if (it.entry) { - GElf_Shdr shdr_mem; - GElf_Shdr *shdr = gelf_getshdr(it.entry->value, - &shdr_mem); - if (!shdr) { - err = drgn_error_libelf(); - break; - } - shdr->sh_addr = address; - if (!gelf_update_shdr(it.entry->value, shdr)) { - err = drgn_error_libelf(); - break; - } - } + if (use_sys_module) { + err = kernel_module_set_section_addresses_live(module); + // We could be debugging /proc/kcore without root privileges via + // an fd that we were passed. If we didn't have permission to + // access the files in /sys/module/$module/sections, fall back + // to the non-live path. + if (!err || err->code != DRGN_ERROR_OS || err->errnum != EACCES) + return err; + drgn_error_log_debug(prog, err, + "falling back to section addresses from sect_attrs: "); + drgn_error_destroy(err); + } else { + drgn_log_debug(prog, + "getting section addresses from sect_attrs"); } - if (err && err != &drgn_stop) - goto out_section_it; - err = NULL; -out_section_it: - kernel_module_section_iterator_deinit(§ion_it); -out_scn_map: - elf_scn_name_map_deinit(&scn_map); - return err; -} -struct kernel_module_file { - const char *path; - int fd; - Elf *elf; - /* - * Kernel module build ID. This is owned by the Elf handle. Because we - * use this as the key in the kernel_module_table, the file must always - * be removed from the table before it is reported to the DWARF index - * (which takes ownership of the Elf handle). - */ - const void *gnu_build_id; - size_t gnu_build_id_len; - /* Next file with the same build ID. */ - struct kernel_module_file *next; -}; - -static struct nstring -kernel_module_table_key(struct kernel_module_file * const *entry) -{ - return (struct nstring){ - (*entry)->gnu_build_id, (*entry)->gnu_build_id_len - }; -} + DRGN_OBJECT(attrs, prog); + DRGN_OBJECT(attr, prog); -DEFINE_HASH_TABLE(kernel_module_table, struct kernel_module_file *, - kernel_module_table_key, nstring_hash_pair, nstring_eq); + err = drgn_object_member(&attrs, module_obj, "sect_attrs"); + if (err) + return err; -static struct drgn_error * -report_loaded_kernel_module(struct drgn_debug_info_load_state *load, - struct kernel_module_iterator *kmod_it, - struct kernel_module_table *kmod_table) -{ - struct drgn_error *err; + bool group = true; + uint64_t nsections; + err = drgn_object_member_dereference(&tmp, &attrs, "nsections"); + if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + // Since Linux kernel commit d8959b947a8d ("module: sysfs: Drop + // member 'module_sect_attrs::nsections'") (in v6.14), we have + // to iterate over struct attribute_group::bin_attrs, a + // null-terminated array of struct bin_attribute pointers. + + // attrs = mod->sect_attrs->grp.bin_attrs + err = drgn_object_member_dereference(&attrs, &attrs, "grp"); + if (err) + return err; + err = drgn_object_member(&attrs, &attrs, "bin_attrs"); + if (err) + return err; + } else if (!err) { + // Before that, struct module_sect_attrs::grp still exists. + // However, since Linux kernel commit ed66f991bb19 ("module: + // Refactor section attr into bin attribute") (in v5.8), the + // sections are in struct attribute_group::bin_attrs, and before + // that, they're in struct attribute_group::attrs. Additionally, + // we'd then have to get the containing struct module_sect_attr + // to get the section address. + // + // Instead, it's easier to iterate over struct + // module_sect_attrs::attrs, an array of struct module_sect_attr + // with a length given by struct module_sect_attrs::nsections. + group = false; + // nsections = mod->sect_attrs->nsections + err = drgn_object_read_unsigned(&tmp, &nsections); + if (err) + return err; - struct nstring key; - err = kernel_module_iterator_gnu_build_id(kmod_it, - (const void **)&key.str, - &key.len); - if (err || key.len == 0) { - return drgn_debug_info_report_error(load, kmod_it->name, - "could not find GNU build ID", - err); + // attrs = mod->sect_attrs->attrs + err = drgn_object_member_dereference(&attrs, &attrs, "attrs"); + if (err) + return err; + } else { + return err; } - struct hash_pair hp = kernel_module_table_hash(&key); - struct kernel_module_table_iterator it = - kernel_module_table_search_hashed(kmod_table, &key, hp); - if (!it.entry) - return &drgn_not_found; + // If we're not using struct attribute_group, we know how many + // attributes there are. + for (uint64_t i = 0; group || i < nsections; i++) { + // attr = attrs[i] + err = drgn_object_subscript(&attr, &attrs, i); + if (err) + return err; - struct kernel_module_file *kmod = *it.entry; - kernel_module_table_delete_iterator_hashed(kmod_table, it, hp); - do { - err = cache_kernel_module_sections(kmod_it, kmod->elf); - if (err) { - err = drgn_debug_info_report_error(load, kmod->path, - "could not get section addresses", - err); + if (group) { + // If we're using struct attribute_group, we stop when + // we hit a NULL pointer. + err = drgn_object_read(&attr, &attr); + if (err) + return err; + bool truthy; + err = drgn_object_bool(&attr, &truthy); + if (err) + return err; + if (!truthy) + break; + // Since Linux kernel commit 4b2c11e4aaf7 ("module: + // sysfs: Drop member 'module_sect_attr::address'") (in + // v6.14), the section address is in struct + // bin_attribute::private. + err = drgn_object_member_dereference(&tmp, &attr, + "private"); + } else { + // Before that, the section address is in struct + // module_sect_attr::address. + err = drgn_object_member(&tmp, &attr, "address"); + if (err) + return err; + } + uint64_t address; + err = drgn_object_read_unsigned(&tmp, &address); + if (err) + return err; + + if (group) { + // attr = attr->attr + err = drgn_object_member_dereference(&attr, &attr, + "attr"); if (err) return err; - goto next; + } else { + // Since Linux kernel commit ed66f991bb19 ("module: + // Refactor section attr into bin attribute") (in v5.8), + // the section name is module_sect_attr.battr.attr.name. + // Before that, it is simply module_sect_attr.name. + + // attr = attr.battr.attr + err = drgn_object_member(&attr, &attr, "battr"); + if (!err) { + err = drgn_object_member(&attr, &attr, "attr"); + if (err) + return err; + } else if (!drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { + return err; + } } + err = drgn_object_member(&tmp, &attr, "name"); + if (err) + return err; + _cleanup_free_ char *name = NULL; + err = drgn_object_read_c_string(&tmp, &name); + if (err) + return err; - err = drgn_debug_info_report_elf(load, kmod->path, kmod->fd, - kmod->elf, kmod_it->start, - kmod_it->end, kmod_it->name, - NULL); - kmod->elf = NULL; - kmod->fd = -1; + err = drgn_module_set_section_address(module, name, address); if (err) return err; -next: - kmod = kmod->next; - } while (kmod); + } return NULL; } static struct drgn_error * -report_default_kernel_module(struct drgn_debug_info_load_state *load, - struct kernel_module_iterator *kmod_it, - struct depmod_index *depmod) +kernel_module_find_or_create_internal(const struct drgn_object *module_ptr, + const struct drgn_object *module_obj, + struct drgn_module **ret, bool *new_ret, + bool create, bool log) { - static const char * const module_paths[] = { - "/usr/lib/debug/lib/modules/%s/%.*s", - "/usr/lib/debug/lib/modules/%s/%.*s.debug", - "/lib/modules/%s/%.*s%.*s", - NULL, - }; struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(module_obj); - const char *depmod_path; - size_t depmod_path_len; - err = depmod_index_find(depmod, kmod_it->name, &depmod_path, - &depmod_path_len); - if (err) { - return drgn_debug_info_report_error(load, - "kernel modules", - "could not parse depmod", - err); - } else if (!depmod_path) { - return drgn_debug_info_report_error(load, kmod_it->name, - "could not find module in depmod", - NULL); - } - - size_t extension_len; - if (depmod_path_len >= 3 && - (memcmp(depmod_path + depmod_path_len - 3, ".gz", 3) == 0 || - memcmp(depmod_path + depmod_path_len - 3, ".xz", 3) == 0)) - extension_len = 3; - else - extension_len = 0; - char *path; - int fd; - Elf *elf; - err = find_elf_file(&path, &fd, &elf, module_paths, - load->dbinfo->prog->vmcoreinfo.osrelease, - depmod_path_len - extension_len, depmod_path, - extension_len, - depmod_path + depmod_path_len - extension_len); + uint64_t name_offset; + err = drgn_type_offsetof(module_obj->type, "name", &name_offset); if (err) - return drgn_debug_info_report_error(load, NULL, NULL, err); - if (!elf) { - return drgn_debug_info_report_error(load, kmod_it->name, - "could not find .ko", - NULL); - } - - err = cache_kernel_module_sections(kmod_it, elf); - if (err) { - err = drgn_debug_info_report_error(load, path, - "could not get section addresses", - err); - elf_end(elf); - close(fd); - free(path); return err; + if (name_offset >= drgn_object_size(module_obj) + || !memchr(drgn_object_buffer(module_obj) + name_offset, '\0', + drgn_object_size(module_obj) - name_offset)) { + return drgn_error_create(DRGN_ERROR_OTHER, + "couldn't read module name"); } + const char *name = drgn_object_buffer(module_obj) + name_offset; - err = drgn_debug_info_report_elf(load, path, fd, elf, kmod_it->start, - kmod_it->end, kmod_it->name, NULL); - free(path); - return err; -} - -static struct drgn_error * -report_loaded_kernel_modules(struct drgn_debug_info_load_state *load, - struct kernel_module_table *kmod_table, - struct depmod_index *depmod, bool use_sys_module) -{ - struct drgn_program *prog = load->dbinfo->prog; - struct drgn_error *err; + DRGN_OBJECT(mem, prog); + enum kernel_module_address_ranges_version version; + uint64_t address; + err = kernel_module_address(module_obj, &mem, &version, &address); + if (err) + return err; - struct kernel_module_iterator kmod_it; - err = kernel_module_iterator_init(&kmod_it, prog, use_sys_module); - if (err) { -kernel_module_iterator_error: - return drgn_debug_info_report_error(load, "kernel modules", - "could not find loaded kernel modules", - err); + if (log) { + drgn_log_debug(prog, "found loaded kernel module %s@0x%" PRIx64, + name, address); } - for (;;) { - err = kernel_module_iterator_next(&kmod_it); - if (err == &drgn_stop) { - err = NULL; - break; - } else if (err) { - kernel_module_iterator_deinit(&kmod_it); - goto kernel_module_iterator_error; - } - /* Look for an explicitly-reported file first. */ - if (kmod_table) { - err = report_loaded_kernel_module(load, &kmod_it, - kmod_table); - if (!err) - continue; - else if (err != &drgn_not_found) - break; - } + if (!create) { + *ret = drgn_module_find_relocatable(prog, name, address); + if (new_ret) + *new_ret = false; + return NULL; + } - /* - * If it was not reported explicitly and we're also reporting the - * defaults, look for the module at the standard locations unless we've - * already indexed that module. - */ - if (depmod && - !drgn_debug_info_is_indexed(load->dbinfo, kmod_it.name)) { - if (!depmod->addr) { - err = depmod_index_init(depmod, - prog->vmcoreinfo.osrelease); - if (err) { - depmod->addr = NULL; - err = drgn_debug_info_report_error(load, - "kernel modules", - "could not read depmod", - err); - if (err) - break; - depmod = NULL; - continue; - } - } - err = report_default_kernel_module(load, &kmod_it, - depmod); - if (err) - break; - } + _cleanup_(drgn_module_deletep) struct drgn_module *module = NULL; + bool new; + err = drgn_module_find_or_create_relocatable(prog, name, address, + &module, &new); + if (err) + return err; + if (!new) { + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; } - kernel_module_iterator_deinit(&kmod_it); - return err; -} -static struct drgn_error * -report_kernel_modules(struct drgn_debug_info_load_state *load, - struct kernel_module_file *kmods, size_t num_kmods, - bool vmlinux_is_pending) -{ - struct drgn_program *prog = load->dbinfo->prog; - struct drgn_error *err; + err = drgn_module_set_object(module, module_ptr); + if (err) + return err; - if (!num_kmods && !load->load_default) - return NULL; + err = kernel_module_set_address_ranges(module, version, module_obj, + &mem, address); + if (err) + return err; - /* - * If we're debugging the running kernel, we can use - * /sys/module/$module/notes and /sys/module/$module/sections instead of - * getting the equivalent information from the core dump. This fast path - * can be disabled via an environment variable for testing. It may also - * be disabled if we encounter permission issues using - * /sys/module/$module/sections. - */ + // If we're debugging the running kernel, we can use + // /sys/module/$module/notes and /sys/module/$module/sections instead of + // getting the equivalent information from the core dump. This fast path + // can be disabled via an environment variable for testing. It may also + // be disabled if we encounter permission issues using + // /sys/module/$module/sections. bool use_sys_module = false; if (prog->flags & DRGN_PROGRAM_IS_LOCAL) { char *env = getenv("DRGN_USE_SYS_MODULE"); use_sys_module = !env || atoi(env); } - /* - * We need to index vmlinux now so that we can walk the list of modules - * in the kernel. - */ - if (vmlinux_is_pending) { - err = drgn_debug_info_report_flush(load); - if (err) - return err; - } - - struct kernel_module_table kmod_table = HASH_TABLE_INIT; - struct depmod_index depmod; - depmod.addr = NULL; - struct kernel_module_table_iterator it; - for (size_t i = 0; i < num_kmods; i++) { - struct kernel_module_file *kmod = &kmods[i]; - - ssize_t build_id_len = - drgn_elf_gnu_build_id(kmod->elf, &kmod->gnu_build_id); - if (build_id_len < 0) { - err = drgn_debug_info_report_error(load, kmod->path, - NULL, - drgn_error_libelf()); - if (err) - goto out; - continue; - } - kmod->gnu_build_id_len = build_id_len; - - struct nstring key = kernel_module_table_key(&kmod); - struct hash_pair hp = kernel_module_table_hash(&key); - it = kernel_module_table_search_hashed(&kmod_table, &key, hp); - if (it.entry) { - kmod->next = *it.entry; - *it.entry = kmod; - } else { - if (kernel_module_table_insert_searched(&kmod_table, - &kmod, hp, - NULL) == -1) { - err = &drgn_enomem; - goto out; - } - kmod->next = NULL; - } - } - - err = report_loaded_kernel_modules(load, num_kmods ? &kmod_table : NULL, - load->load_default ? &depmod : NULL, - use_sys_module); + err = kernel_module_set_build_id(module, module_obj, use_sys_module); if (err) - goto out; - - /* Anything left over was not loaded. */ - for (it = kernel_module_table_first(&kmod_table); it.entry; ) { - struct kernel_module_file *kmod = *it.entry; - it = kernel_module_table_delete_iterator(&kmod_table, it); - do { - err = drgn_debug_info_report_elf(load, kmod->path, - kmod->fd, kmod->elf, 0, - 0, kmod->path, NULL); - kmod->elf = NULL; - kmod->fd = -1; - if (err) - goto out; - kmod = kmod->next; - } while (kmod); - } - err = NULL; -out: - if (depmod.addr) - depmod_index_deinit(&depmod); - kernel_module_table_deinit(&kmod_table); - return err; + return err; + err = kernel_module_set_section_addresses(module, module_obj, + use_sys_module); + if (err) + return err; + + *ret = no_cleanup_ptr(module); + if (new_ret) + *new_ret = new; + return NULL; } static struct drgn_error * -report_vmlinux(struct drgn_debug_info_load_state *load, - bool *vmlinux_is_pending) +drgn_module_find_or_create_linux_kernel_loadable_internal(const struct drgn_object *module_ptr, + struct drgn_module **ret, + bool *new_ret, + bool create) { - static const char * const vmlinux_paths[] = { - /* - * The files under /usr/lib/debug should always have debug - * information, so check for those first. - */ - "/usr/lib/debug/boot/vmlinux-%s", - "/usr/lib/debug/lib/modules/%s/vmlinux", - "/boot/vmlinux-%s", - "/lib/modules/%s/build/vmlinux", - "/lib/modules/%s/vmlinux", - NULL, - }; - struct drgn_program *prog = load->dbinfo->prog; struct drgn_error *err; + struct drgn_program *prog = drgn_object_program(module_ptr); - char *path; - int fd; - Elf *elf; - err = find_elf_file(&path, &fd, &elf, vmlinux_paths, - prog->vmcoreinfo.osrelease); + if (drgn_type_kind(drgn_underlying_type(module_ptr->type)) + != DRGN_TYPE_POINTER) + return drgn_error_create(DRGN_ERROR_TYPE, + "struct module * is required"); + + DRGN_OBJECT(module_obj, prog); + err = drgn_object_dereference(&module_obj, module_ptr); if (err) - return drgn_debug_info_report_error(load, NULL, NULL, err); - if (!elf) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "could not find vmlinux for %s", - prog->vmcoreinfo.osrelease); - return drgn_debug_info_report_error(load, "kernel", NULL, err); - } + return err; - uint64_t start, end; - err = elf_address_range(elf, prog->vmcoreinfo.kaslr_offset, &start, - &end); - if (err) { - err = drgn_debug_info_report_error(load, path, NULL, err); - elf_end(elf); - close(fd); - free(path); + err = drgn_object_read(&module_obj, &module_obj); + if (err) return err; - } - err = drgn_debug_info_report_elf(load, path, fd, elf, start, end, - "kernel", vmlinux_is_pending); - free(path); - return err; + return kernel_module_find_or_create_internal(module_ptr, &module_obj, ret, new_ret, + create, false); } -struct drgn_error * -linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load) +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_linux_kernel_loadable(const struct drgn_object *module_ptr, + struct drgn_module **ret) { - struct drgn_program *prog = load->dbinfo->prog; - struct drgn_error *err; + return drgn_module_find_or_create_linux_kernel_loadable_internal(module_ptr, ret, + NULL, false); +} - struct kernel_module_file *kmods; - if (load->num_paths) { - kmods = malloc_array(load->num_paths, sizeof(*kmods)); - if (!kmods) - return &drgn_enomem; - } else { - kmods = NULL; - } +LIBDRGN_PUBLIC struct drgn_error * +drgn_module_find_or_create_linux_kernel_loadable(const struct drgn_object *module_ptr, + struct drgn_module **ret, + bool *new_ret) +{ + return drgn_module_find_or_create_linux_kernel_loadable_internal(module_ptr, ret, + new_ret, true); +} - /* - * We may need to index vmlinux before we can properly report kernel - * modules. So, this sets aside kernel modules and reports everything - * else. - */ - size_t num_kmods = 0; - bool vmlinux_is_pending = false; - for (size_t i = 0; i < load->num_paths; i++) { - const char *path = load->paths[i]; - int fd; - Elf *elf; - err = open_elf_file(path, &fd, &elf); +static struct drgn_error * +yield_kernel_module(struct linux_kernel_loaded_module_iterator *it, + struct drgn_module **ret, bool *new_ret) +{ + struct drgn_error *err; + struct drgn_program *prog = it->it.prog; + + DRGN_OBJECT(mod, prog); + DRGN_OBJECT(mod_ptr, prog); + for (;;) { + uint64_t addr; + err = drgn_object_read_unsigned(&it->node, &addr); if (err) { - err = drgn_debug_info_report_error(load, path, NULL, - err); - if (err) - goto out; - continue; +list_walk_err: + if (!drgn_error_is_fatal(err)) { + drgn_error_log_warning(prog, err, + "can't find remaining kernel modules: " + "couldn't read next module: "); + drgn_error_destroy(err); + *ret = NULL; + err = NULL; + } + return err; + } + if (addr == it->modules_head) { + drgn_log_debug(prog, + "found end of loaded kernel module list"); + *ret = NULL; + return NULL; } - bool is_vmlinux, is_module; - err = identify_kernel_elf(elf, &is_vmlinux, &is_module); - if (err) { - err = drgn_debug_info_report_error(load, path, NULL, - err); - elf_end(elf); - close(fd); - if (err) - goto out; - continue; + if (it->module_list_iterations_remaining == 0) { + drgn_log_warning(prog, + "can't find remaining kernel modules: " + "too many entries or cycle in modules list"); + *ret = NULL; + return NULL; } - if (is_module) { - struct kernel_module_file *kmod = &kmods[num_kmods++]; - kmod->path = path; - kmod->fd = fd; - kmod->elf = elf; - } else if (is_vmlinux) { - uint64_t start, end; - err = elf_address_range(elf, - prog->vmcoreinfo.kaslr_offset, - &start, &end); - if (err) { - elf_end(elf); - close(fd); - err = drgn_debug_info_report_error(load, path, - NULL, err); - if (err) - goto out; - continue; - } + it->module_list_iterations_remaining--; - bool is_new; - err = drgn_debug_info_report_elf(load, path, fd, elf, - start, end, "kernel", - &is_new); - if (err) - goto out; - if (is_new) - vmlinux_is_pending = true; - } else { - err = drgn_debug_info_report_elf(load, path, fd, elf, 0, - 0, NULL, NULL); - if (err) - goto out; + err = drgn_object_container_of(&mod_ptr, &it->node, it->module_type, + "list"); + if (err) + goto list_walk_err; + + err = drgn_object_dereference(&mod, &mod_ptr); + if (err) + goto list_walk_err; + // We need several fields from the `struct module`. Especially + // for /proc/kcore, it is faster to read the entire structure + // (which is <2kB as of Linux 6.5) from the core dump all at + // once than it is to read each field individually. + err = drgn_object_read(&mod, &mod); + if (err) + goto list_walk_err; + + err = drgn_object_member(&it->node, &mod, "list"); + if (err) + goto list_walk_err; + err = drgn_object_member(&it->node, &it->node, "next"); + if (err) + goto list_walk_err; + + err = kernel_module_find_or_create_internal(&mod_ptr, &mod, ret, + new_ret, true, true); + if (err && !drgn_error_is_fatal(err)) { + drgn_error_log_warning(prog, err, "ignoring module: "); + drgn_error_destroy(err); + continue; } + return err; } +} - if (load->load_main && !vmlinux_is_pending && - !drgn_debug_info_is_indexed(load->dbinfo, "kernel")) { - err = report_vmlinux(load, &vmlinux_is_pending); - if (err) - goto out; +static struct drgn_error * +linux_kernel_loaded_module_iterator_next(struct drgn_module_iterator *_it, + struct drgn_module **ret, + bool *new_ret) +{ + struct drgn_error *err; + struct linux_kernel_loaded_module_iterator *it = + container_of(_it, struct linux_kernel_loaded_module_iterator, it); + struct drgn_program *prog = it->it.prog; + + if (!it->yielded_vmlinux) { + it->yielded_vmlinux = true; + return yield_vmlinux(it, ret, new_ret); } - err = report_kernel_modules(load, kmods, num_kmods, vmlinux_is_pending); -out: - for (size_t i = 0; i < num_kmods; i++) { - elf_end(kmods[i].elf); - if (kmods[i].fd != -1) - close(kmods[i].fd); + // Start the module list walk if we haven't yet. + if (!it->module_type.type) { + for (int attempt = 1; attempt <= 2; attempt++) { + err = drgn_program_find_type(prog, "struct module", + NULL, &it->module_type); + if (!err) { + err = drgn_program_find_object(prog, "modules", + NULL, + DRGN_FIND_OBJECT_VARIABLE, + &it->node); + } + if (err && err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + if (attempt == 1 && prog->dbinfo.main_module) { + struct drgn_module *module = + prog->dbinfo.main_module; + if (module->debug_file_status + == DRGN_MODULE_FILE_DONT_WANT) { + module->debug_file_status = + DRGN_MODULE_FILE_WANT; + } + if (drgn_module_wants_debug_file(module)) { + err = drgn_load_module_debug_info(&module, + &(size_t){1}); + if (err) + return err; + continue; + } + } + if (!prog->dbinfo.main_module + || drgn_module_wants_debug_file(prog->dbinfo.main_module)) { + drgn_log(it->it.for_load_debug_info + ? DRGN_LOG_DEBUG + : DRGN_LOG_WARNING, + prog, + "can't find loaded modules without kernel debug info"); + } else { + drgn_log_debug(prog, + "kernel does not have loadable module support"); + } + *ret = NULL; + return NULL; + } else if (err) { + return err; + } + } + if (it->node.kind != DRGN_OBJECT_REFERENCE) { + drgn_log_warning(prog, + "can't find kernel modules: " + "can't get address of modules list"); + *ret = NULL; + return NULL; + } + it->modules_head = it->node.address; + err = drgn_object_member(&it->node, &it->node, "next"); + if (!err) + err = drgn_object_read(&it->node, &it->node); + if (err) { + if (drgn_error_is_fatal(err)) + return err; + drgn_error_log_warning(prog, err, + "can't find kernel modules: " + "couldn't read modules list: "); + drgn_error_destroy(err); + *ret = NULL; + return NULL; + } } - free(kmods); - return err; + + return yield_kernel_module(it, ret, new_ret); +} + +struct drgn_error * +linux_kernel_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret) +{ + struct linux_kernel_loaded_module_iterator *it = calloc(1, sizeof(*it)); + if (!it) + return &drgn_enomem; + drgn_module_iterator_init(&it->it, prog, + linux_kernel_loaded_module_iterator_destroy, + linux_kernel_loaded_module_iterator_next); + it->module_list_iterations_remaining = MAX_MODULE_LIST_ITERATIONS; + drgn_object_init(&it->node, prog); + *ret = &it->it; + return NULL; } diff --git a/libdrgn/linux_kernel.h b/libdrgn/linux_kernel.h index 78002f5aa..a7c04a30e 100644 --- a/libdrgn/linux_kernel.h +++ b/libdrgn/linux_kernel.h @@ -6,7 +6,8 @@ #include "drgn_internal.h" -struct drgn_debug_info_load_state; +struct drgn_debug_info_options; +struct drgn_standard_debug_info_find_state; struct drgn_error *drgn_program_finish_set_kernel(struct drgn_program *prog); @@ -24,7 +25,17 @@ struct drgn_error *proc_kallsyms_symbol_addr(const char *name, struct drgn_error *read_vmcoreinfo_fallback(struct drgn_program *prog); struct drgn_error * -linux_kernel_report_debug_info(struct drgn_debug_info_load_state *load); +linux_kernel_loaded_module_iterator_create(struct drgn_program *prog, + struct drgn_module_iterator **ret); + +struct drgn_error * +drgn_module_try_vmlinux_files(struct drgn_module *module, + const struct drgn_debug_info_options *options); + +struct drgn_error * +drgn_module_try_linux_kmod_files(struct drgn_module *module, + const struct drgn_debug_info_options *options, + struct drgn_standard_debug_info_find_state *state); #define KDUMP_SIGNATURE "KDUMP " #define KDUMP_SIG_LEN (sizeof(KDUMP_SIGNATURE) - 1) diff --git a/libdrgn/linux_kernel_helpers.c b/libdrgn/linux_kernel_helpers.c index 7a8ae0ac5..eb54cac41 100644 --- a/libdrgn/linux_kernel_helpers.c +++ b/libdrgn/linux_kernel_helpers.c @@ -320,25 +320,40 @@ struct drgn_error *linux_helper_task_cpu(const struct drgn_object *task, struct drgn_error *err; DRGN_OBJECT(tmp, drgn_object_program(task)); - // If CONFIG_THREAD_INFO_IN_TASK=y and since Linux kernel commit - // bcf9033e5449 ("sched: move CPU field back into thread_info if - // THREAD_INFO_IN_TASK=y") (in v5.16), the CPU is task->thread_info.cpu. + // The CPU may be task_thread_info(task)->cpu or task->cpu depending on + // the kernel version. If neither exists, then the kernel must be !SMP. // - // If CONFIG_THREAD_INFO_IN_TASK=y but before that commit, the cpu is - // task->cpu. + // Since Linux kernel commit bcf9033e5449 ("sched: move CPU field back + // into thread_info if THREAD_INFO_IN_TASK=y") (in v5.16), or if + // CONFIG_THREAD_INFO_IN_TASK=n, or before Linux kernel commit + // c65eacbe290b ("sched/core: Allow putting thread_info into + // task_struct") (in v4.9), the CPU is task_thread_info(task)->cpu. // - // If CONFIG_THREAD_INFO_IN_TASK=n or before Linux kernel commit + // Between Linux kernel commits bcf9033e5449 ("sched: move CPU field + // back into thread_info if THREAD_INFO_IN_TASK=y") (in v5.16) and // c65eacbe290b ("sched/core: Allow putting thread_info into - // task_struct") (in v4.9), the CPU is - // ((struct thread_info *)task->stack)->cpu. + // task_struct") (in v4.9), if CONFIG_THREAD_INFO_IN_TASK=y, then the + // CPU is task->cpu. // - // If none of those exist, then the kernel must be !SMP. - err = linux_helper_task_thread_info(&tmp, task); - if (err) - return err; - err = drgn_object_member_dereference(&tmp, &tmp, "cpu"); + // Note that between Linux kernel commit bcf9033e5449 ("sched: move CPU + // field back into thread_info if THREAD_INFO_IN_TASK=y") and commits + // 001430c1910d ("arm64: add CPU field to struct thread_info"), + // 5443f98fb9e0 ("x86: add CPU field to struct thread_info"), + // bd2e2632556a ("s390: add CPU field to struct thread_info"), and + // 227d735d889e ("powerpc: add CPU field to struct thread_info") (all in + // v5.16-rc1), if CONFIG_THREAD_INFO_IN_TASK=y, then + // struct thread_info::cpu may exist but task->cpu is still used. + // Therefore, we must check for task->cpu first. (Normally we don't care + // about commits in the middle of a release candidate, but CentOS Stream + // 9 and its derivatives apparently backported commit 5443f98fb9e0 + // without commit bcf9033e5449: + // https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/commit/6d09fbd042c8d99009e16ddba62af09c89358f80.) + err = drgn_object_member_dereference(&tmp, task, "cpu"); if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { - err = drgn_object_member_dereference(&tmp, task, "cpu"); + err = linux_helper_task_thread_info(&tmp, task); + if (err) + return err; + err = drgn_object_member_dereference(&tmp, &tmp, "cpu"); } if (!err) { union drgn_value value; diff --git a/libdrgn/log.c b/libdrgn/log.c index e43fc5314..f86b7c035 100644 --- a/libdrgn/log.c +++ b/libdrgn/log.c @@ -3,6 +3,7 @@ #include #include +#include #include "log.h" #include "program.h" @@ -81,3 +82,38 @@ void drgn_error_log(enum drgn_log_level level, struct drgn_program *prog, prog->log_fn(prog, prog->log_arg, level, format, ap, err); va_end(ap); } + +LIBDRGN_PUBLIC void drgn_program_set_progress_file(struct drgn_program *prog, + FILE *file) +{ + prog->progress_file = file; + prog->default_progress_file = false; +} + +FILE *drgn_program_get_progress_file(struct drgn_program *prog, + int *columns_ret) +{ + *columns_ret = -1; + + if (!prog->default_progress_file) { + if (prog->progress_file) { + int fd = fileno(prog->progress_file); + struct winsize winsize; + if (fd >= 0 && ioctl(fd, TIOCGWINSZ, &winsize) == 0) + *columns_ret = winsize.ws_col; + } + return prog->progress_file; + } + + if (drgn_log_is_enabled(prog, DRGN_LOG_WARNING) + && prog->log_fn == drgn_file_log_fn && prog->log_arg == stderr) { + int fd = fileno(stderr); + struct winsize winsize; + if (fd >= 0 && ioctl(fd, TIOCGWINSZ, &winsize) == 0) { + *columns_ret = winsize.ws_col; + return stderr; + } + } + + return NULL; +} diff --git a/libdrgn/log.h b/libdrgn/log.h index 23eaa5063..77cd54c06 100644 --- a/libdrgn/log.h +++ b/libdrgn/log.h @@ -86,4 +86,7 @@ void drgn_error_log(enum drgn_log_level level, struct drgn_program *prog, * @} */ +FILE *drgn_program_get_progress_file(struct drgn_program *prog, + int *columns_ret); + #endif /* DRGN_LOG_H */ diff --git a/libdrgn/m4/.gitignore b/libdrgn/m4/.gitignore index 5e048193b..8cb171c93 100644 --- a/libdrgn/m4/.gitignore +++ b/libdrgn/m4/.gitignore @@ -7,3 +7,4 @@ !/my_c_auto.m4 !/my_c_switch_enum.m4 !/my_check_va_args_comma_deletion.m4 +!/my_python_devel.m4 diff --git a/libdrgn/m4/my_python_devel.m4 b/libdrgn/m4/my_python_devel.m4 new file mode 100644 index 000000000..f69fc030c --- /dev/null +++ b/libdrgn/m4/my_python_devel.m4 @@ -0,0 +1,67 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +# MY_PYTHON_DEVEL([find-libpython=no]) +# Check for Python development files and define PYTHON_CPPFLAGS and PYTHON_LIBS +# accordingly. +AC_DEFUN([MY_PYTHON_DEVEL], +[ +AS_IF([test -z "$PYTHON_CPPFLAGS"], + [prog="import sysconfig +include = sysconfig.get_path('include') +platinclude = sysconfig.get_path('platinclude') +include_paths = [[include]] +if platinclude != include: + include_paths.append(plat_include) +print(' '.join('-I' + path for path in include_paths))" + PYTHON_CPPFLAGS=`"$PYTHON" -c "$prog"`]) + +save_CPPFLAGS="$CPPFLAGS" +CPPFLAGS="$CPPFLAGS $PYTHON_CPPFLAGS" + +AS_IF([test "x$1" = xyes], + [AS_IF([test -z "$PYTHON_LIBS"] + [prog="import sysconfig +print('-L' + sysconfig.get_config_var('LIBDIR') + + ' -lpython' + sysconfig.get_config_var('LDVERSION'))" + PYTHON_LIBS=`"$PYTHON" -c "$prog"`]) + + save_LIBS="$LIBS" + LIBS="$LIBS $PYTHON_LIBS" + + AC_MSG_CHECKING([for $PYTHON development headers and library]) + AC_LINK_IFELSE([AC_LANG_SOURCE([[ +#include + +int main(void) +{ + Py_Initialize(); +} +]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_ERROR( +[Could not compile and link test program with Python headers and library. + +You may need to install your distribution's Python development package (e.g., +python3-devel or python3-dev) or specify the location of the Python development +headers and/or library by setting the PYTHON_CPPFLAGS and PYTHON_LIBS +environment variables.])]) + + LIBS="$save_LIBS"], + [AC_MSG_CHECKING([for $PYTHON development headers]) + AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#include ]])], + [AC_MSG_RESULT([yes])], + [AC_MSG_RESULT([no]) + AC_MSG_ERROR( +[Could not compile test program with Python headers. + +You may need to install your distribution's Python development package (e.g., +python3-devel or python3-dev) or specify the location of the Python development +headers by setting the PYTHON_CPPFLAGS environment variable.])])]) + +CPPFLAGS="$save_CPPFLAGS" + +AC_SUBST(PYTHON_CPPFLAGS) +AC_SUBST(PYTHON_LIBS) +]) diff --git a/libdrgn/no_python.c b/libdrgn/no_python.c new file mode 100644 index 000000000..ea41b180e --- /dev/null +++ b/libdrgn/no_python.c @@ -0,0 +1,40 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +// Fallback implementations for builds without Python support. + +#include "plugins.h" +#include "program.h" + +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_create(const struct drgn_platform *platform, + struct drgn_program **ret) +{ + struct drgn_program *prog = malloc(sizeof(*prog)); + if (!prog) + return &drgn_enomem; + drgn_program_init(prog, platform); + *ret = prog; + return NULL; +} + +LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) +{ + if (prog) { + drgn_program_deinit(prog); + free(prog); + } +} + +void drgn_call_plugins_prog(const char *name, struct drgn_program *prog) +{ +} + +void *drgn_begin_blocking(void) +{ + return NULL; +} + +void drgn_end_blocking(void *state) +{ +} diff --git a/libdrgn/object.c b/libdrgn/object.c index 5820ce183..d74680b67 100644 --- a/libdrgn/object.c +++ b/libdrgn/object.c @@ -18,11 +18,12 @@ #include "type.h" #include "util.h" -#define DRGN_OBJECT_INITIALIZER(prog) \ - (struct drgn_object){ \ - .type = drgn_void_type(prog, NULL), \ - .encoding = DRGN_OBJECT_ENCODING_NONE, \ - .kind = DRGN_OBJECT_ABSENT, \ +#define DRGN_OBJECT_INITIALIZER(prog) \ + (struct drgn_object){ \ + .type = &(prog)->void_types[DRGN_LANGUAGE_C], \ + .encoding = DRGN_OBJECT_ENCODING_NONE, \ + .kind = DRGN_OBJECT_ABSENT, \ + .absence_reason = DRGN_ABSENCE_REASON_OTHER, \ } LIBDRGN_PUBLIC @@ -498,6 +499,7 @@ drgn_object_set_reference(struct drgn_object *res, LIBDRGN_PUBLIC struct drgn_error * drgn_object_set_absent(struct drgn_object *res, struct drgn_qualified_type qualified_type, + enum drgn_absence_reason reason, uint64_t bit_field_size) { struct drgn_error *err; @@ -505,7 +507,7 @@ drgn_object_set_absent(struct drgn_object *res, err = drgn_object_type(qualified_type, bit_field_size, &type); if (err) return err; - drgn_object_set_absent_internal(res, &type); + drgn_object_set_absent_internal(res, &type, reason); return NULL; } @@ -565,10 +567,10 @@ drgn_object_copy(struct drgn_object *res, const struct drgn_object *obj) } struct drgn_error * -drgn_object_slice_internal(struct drgn_object *res, - const struct drgn_object *obj, - const struct drgn_object_type *type, - uint64_t bit_offset, uint64_t bit_field_size) +drgn_object_fragment_internal(struct drgn_object *res, + const struct drgn_object *obj, + const struct drgn_object_type *type, + uint64_t bit_offset, uint64_t bit_field_size) { struct drgn_error *err; @@ -617,9 +619,9 @@ drgn_object_slice_internal(struct drgn_object *res, } LIBDRGN_PUBLIC struct drgn_error * -drgn_object_slice(struct drgn_object *res, const struct drgn_object *obj, - struct drgn_qualified_type qualified_type, - uint64_t bit_offset, uint64_t bit_field_size) +drgn_object_fragment(struct drgn_object *res, const struct drgn_object *obj, + struct drgn_qualified_type qualified_type, + uint64_t bit_offset, uint64_t bit_field_size) { struct drgn_error *err; if (drgn_object_program(res) != drgn_object_program(obj)) { @@ -630,8 +632,8 @@ drgn_object_slice(struct drgn_object *res, const struct drgn_object *obj, err = drgn_object_type(qualified_type, bit_field_size, &type); if (err) return err; - return drgn_object_slice_internal(res, obj, &type, bit_offset, - bit_field_size); + return drgn_object_fragment_internal(res, obj, &type, bit_offset, + bit_field_size); } LIBDRGN_PUBLIC struct drgn_error * @@ -1225,9 +1227,9 @@ drgn_compound_object_is_zero(const struct drgn_object *obj, if (err) return err; - err = drgn_object_slice(&member, obj, member_type, - members[i].bit_offset, - member_bit_field_size); + err = drgn_object_fragment(&member, obj, member_type, + members[i].bit_offset, + member_bit_field_size); if (err) return err; @@ -1254,8 +1256,8 @@ drgn_array_object_is_zero(const struct drgn_object *obj, DRGN_OBJECT(element, drgn_object_program(obj)); length = drgn_type_length(underlying_type); for (i = 0; i < length; i++) { - err = drgn_object_slice(&element, obj, element_type, - i * element_bit_size, 0); + err = drgn_object_fragment(&element, obj, element_type, + i * element_bit_size, 0); if (err) return err; @@ -1383,7 +1385,7 @@ drgn_object_reinterpret(struct drgn_object *res, struct drgn_qualified_type qualified_type, const struct drgn_object *obj) { - return drgn_object_slice(res, obj, qualified_type, 0, 0); + return drgn_object_fragment(res, obj, qualified_type, 0, 0); } LIBDRGN_PUBLIC struct drgn_error * @@ -1586,8 +1588,8 @@ drgn_object_subscript(struct drgn_object *res, const struct drgn_object *obj, index * element.bit_size, 0); } else { - return drgn_object_slice(res, obj, element.qualified_type, - index * element.bit_size, 0); + return drgn_object_fragment(res, obj, element.qualified_type, + index * element.bit_size, 0); } } @@ -1613,8 +1615,8 @@ drgn_object_member(struct drgn_object *res, const struct drgn_object *obj, err = drgn_member_type(member, &member_type, &member_bit_field_size); if (err) return err; - return drgn_object_slice(res, obj, member_type, member_bit_offset, - member_bit_field_size); + return drgn_object_fragment(res, obj, member_type, member_bit_offset, + member_bit_field_size); } LIBDRGN_PUBLIC struct drgn_error * diff --git a/libdrgn/object.h b/libdrgn/object.h index 12dcc8f57..d1a7c688d 100644 --- a/libdrgn/object.h +++ b/libdrgn/object.h @@ -203,16 +203,18 @@ drgn_object_set_reference_internal(struct drgn_object *res, */ static inline void drgn_object_set_absent_internal(struct drgn_object *res, - const struct drgn_object_type *type) + const struct drgn_object_type *type, + enum drgn_absence_reason reason) { drgn_object_reinit(res, type, DRGN_OBJECT_ABSENT); + res->absence_reason = reason; } struct drgn_error * -drgn_object_slice_internal(struct drgn_object *res, - const struct drgn_object *obj, - const struct drgn_object_type *type, - uint64_t bit_offset, uint64_t bit_field_size); +drgn_object_fragment_internal(struct drgn_object *res, + const struct drgn_object *obj, + const struct drgn_object_type *type, + uint64_t bit_offset, uint64_t bit_field_size); /** * Binary operator implementation. diff --git a/libdrgn/openmp.c b/libdrgn/openmp.c index f73fa592e..80ddc700d 100644 --- a/libdrgn/openmp.c +++ b/libdrgn/openmp.c @@ -46,7 +46,7 @@ static int drgn_num_online_cpu_cores(void) + 1]; int num_cores = 0; - _cleanup_(int_set_deinit) struct int_set cpus_seen = HASH_TABLE_INIT; + HASH_TABLE(int_set, cpus_seen); _cleanup_fclose_ FILE *online = fopen("/sys/devices/system/cpu/online", "r"); if (!online) diff --git a/libdrgn/orc_info.c b/libdrgn/orc_info.c index 809fd6b50..7015d0965 100644 --- a/libdrgn/orc_info.c +++ b/libdrgn/orc_info.c @@ -13,6 +13,7 @@ #include "debug_info.h" // IWYU pragma: associated #include "elf_file.h" #include "error.h" +#include "log.h" #include "orc.h" #include "platform.h" #include "program.h" @@ -20,6 +21,8 @@ DEFINE_VECTOR(uint64_range_vector, struct uint64_range); +#define ORC_HEADER_SIZE 20 + void drgn_module_orc_info_deinit(struct drgn_module *module) { free(module->orc.entries); @@ -34,7 +37,7 @@ static inline uint64_t drgn_raw_orc_pc(struct drgn_module *module, { int32_t offset; memcpy(&offset, &module->orc.pc_offsets[i], sizeof(offset)); - if (drgn_elf_file_bswap(module->debug_file)) + if (module->orc.bswap) offset = bswap_32(offset); return module->orc.pc_base + UINT64_C(4) * i + offset; } @@ -44,7 +47,7 @@ drgn_raw_orc_entry_is_terminator(struct drgn_module *module, unsigned int i) { uint16_t flags; memcpy(&flags, &module->orc.entries[i].flags, sizeof(flags)); - if (drgn_elf_file_bswap(module->debug_file)) + if (module->orc.bswap) flags = bswap_16(flags); if (module->orc.version >= 3) { // orc->type == ORC_TYPE_UNDEFINED @@ -63,7 +66,7 @@ drgn_raw_orc_entry_is_preferred(struct drgn_module *module, unsigned int i) { uint16_t flags; memcpy(&flags, &module->orc.entries[i].flags, sizeof(flags)); - if (drgn_elf_file_bswap(module->debug_file)) + if (module->orc.bswap) flags = bswap_16(flags); // ORC_REG_SP_INDIRECT is used for the stack switching pattern used in // the Linux kernel's call_on_stack()/call_on_irqstack() macros. See @@ -151,11 +154,15 @@ remove_fdes_from_orc(struct drgn_module *module, unsigned int *indices, unsigned int num_entries = *num_entriesp; unsigned int new_num_entries = 0; - uint64_t start_pc = drgn_raw_orc_pc(module, 0); + // ORC can be built-in or from the debug file. Because of that, we + // always store the biased/actual address at orc.pc_base. Since we are + // comparing to the unbiased addresses in the debug_frame FDEs, we need + // to subtract the bias from the ORC PC. + uint64_t start_pc = drgn_raw_orc_pc(module, 0) - module->debug_file_bias; uint64_t end_pc; for (unsigned int i = 0; i < num_entries; i++, start_pc = end_pc) { if (i < num_entries - 1) - end_pc = drgn_raw_orc_pc(module, i + 1); + end_pc = drgn_raw_orc_pc(module, i + 1) - module->debug_file_bias; else end_pc = UINT64_MAX; @@ -214,31 +221,28 @@ remove_fdes_from_orc(struct drgn_module *module, unsigned int *indices, return NULL; } -static int orc_version_from_header(Elf_Data *orc_header) +static int orc_version_from_header(const void *buffer) { - if (orc_header->d_size != 20) - return -1; - // Known version identifiers in .orc_header. These can be generated in // the kernel source tree with: // sh ./scripts/orc_hash.sh < arch/x86/include/asm/orc_types.h | sed -e 's/^#define ORC_HASH //' -e 's/,/, /g' // Linux kernel commit fb799447ae29 ("x86,objtool: Split // UNWIND_HINT_EMPTY in two") (in v6.4) - static const uint8_t orc_hash_6_4[20] = { + static const uint8_t orc_hash_6_4[ORC_HEADER_SIZE] = { 0xfe, 0x5d, 0x32, 0xbf, 0x58, 0x1b, 0xd6, 0x3b, 0x2c, 0xa9, 0xa5, 0xc6, 0x5b, 0xa5, 0xa6, 0x25, 0xea, 0xb3, 0xfe, 0x24, }; // Linux kernel commit ffb1b4a41016 ("x86/unwind/orc: Add 'signal' field // to ORC metadata") (in v6.3) - static const uint8_t orc_hash_6_3[20] = { + static const uint8_t orc_hash_6_3[ORC_HEADER_SIZE] = { 0xdb, 0x84, 0xae, 0xd4, 0x10, 0x3b, 0x31, 0xdd, 0x51, 0xe0, 0x17, 0xf8, 0xf7, 0x97, 0x83, 0xca, 0x98, 0x5c, 0x2c, 0x51, }; - if (memcmp(orc_header->d_buf, orc_hash_6_4, 20) == 0) + if (memcmp(buffer, orc_hash_6_4, ORC_HEADER_SIZE) == 0) return 3; - else if (memcmp(orc_header->d_buf, orc_hash_6_3, 20) == 0) + else if (memcmp(buffer, orc_hash_6_3, ORC_HEADER_SIZE) == 0) return 2; return -1; } @@ -302,6 +306,10 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) return NULL; } + err = drgn_elf_file_apply_relocations(module->debug_file); + if (err) + return err; + // Since Linux kernel b9f174c811e3 ("x86/unwind/orc: Add ELF section // with ORC version identifier") (in v6.4), which was also backported to // Linux 6.3.10, vmlinux and kernel modules have a .orc_header ELF @@ -314,7 +322,9 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) err = read_elf_section(orc_header_scn, &orc_header); if (err) return err; - module->orc.version = orc_version_from_header(orc_header); + module->orc.version = -1; + if (orc_header->d_size == ORC_HEADER_SIZE) + module->orc.version = orc_version_from_header(orc_header->d_buf); if (module->orc.version < 0) { return drgn_error_create(DRGN_ERROR_OTHER, "unrecognized .orc_header"); @@ -352,6 +362,165 @@ static struct drgn_error *drgn_read_orc_sections(struct drgn_module *module) return NULL; } +static struct drgn_error * +copy_builtin_orc_buffers(struct drgn_module *module, uint64_t num_entries, + uint64_t unwind, uint64_t unwind_ip, uint64_t header) +{ + uint8_t header_data[ORC_HEADER_SIZE]; + + struct drgn_error *err; + + if (header) { + err = drgn_program_read_memory(module->prog, header_data, + header, sizeof(header_data), + false); + + if (err) + return err; + + module->orc.version = orc_version_from_header(header_data); + if (module->orc.version < 0) + return drgn_error_create(DRGN_ERROR_OTHER, + "unrecognized .orc_header"); + } else { + module->orc.version = orc_version_from_osrelease(module->prog); + } + + _cleanup_free_ int32_t *pc_offsets = malloc_array(num_entries, + sizeof(pc_offsets[0])); + if (!pc_offsets) + return &drgn_enomem; + err = drgn_program_read_memory(module->prog, pc_offsets, unwind_ip, + num_entries * sizeof(pc_offsets[0]), false); + if (err) + return err; + + _cleanup_free_ struct drgn_orc_entry *entries = + malloc_array(num_entries, sizeof(entries[0])); + if (!entries) + return &drgn_enomem; + err = drgn_program_read_memory(module->prog, entries, unwind, + num_entries * sizeof(entries[0]), false); + if (err) + return err; + + module->orc.entries = no_cleanup_ptr(entries); + module->orc.pc_offsets = no_cleanup_ptr(pc_offsets); + module->orc.num_entries = num_entries; + module->orc.pc_base = unwind_ip; + drgn_log_debug(module->prog, "Loaded built-in ORC (v%d) for module %s", + module->orc.version, module->name); + return NULL; +} + +static struct drgn_error *drgn_read_vmlinux_orc(struct drgn_module *module) +{ + struct drgn_error *err; + struct drgn_symbol *sym; + + uint64_t unwind_ip_start, unwind_ip_end; + uint64_t unwind_start, unwind_end; + uint64_t header_start = 0, header_end = 0; + +#define get_symbol(name, var, optional) \ + err = drgn_program_find_symbol_by_name(module->prog, name, &sym); \ + if (!err) { \ + var = sym->address; \ + drgn_symbol_destroy(sym); \ + sym = NULL; \ + } else if (optional && drgn_error_catch(&err, DRGN_ERROR_LOOKUP)) { \ + sym = NULL; \ + } else { \ + drgn_error_catch(&err, DRGN_ERROR_LOOKUP); \ + return err; \ + } + + get_symbol("__start_orc_unwind_ip", unwind_ip_start, false); + get_symbol("__stop_orc_unwind_ip", unwind_ip_end, false); + get_symbol("__start_orc_unwind", unwind_start, false); + get_symbol("__stop_orc_unwind", unwind_end, false); + get_symbol("__start_orc_header", header_start, true); + get_symbol("__stop_orc_header", header_end, true); +#undef get_symbol + + if ((unwind_ip_end - unwind_ip_start) % sizeof(int32_t)) + return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_unwind_ip range"); + uint64_t num_entries = (unwind_ip_end - unwind_ip_start) / sizeof(int32_t); + if (num_entries > UINT_MAX) + return drgn_error_create(DRGN_ERROR_OTHER, + "built-in orc_unwind_ip range is too large"); + + if ((unwind_end - unwind_start) % sizeof(struct drgn_orc_entry) + || (unwind_end - unwind_start) / sizeof(struct drgn_orc_entry) != num_entries) + return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_unwind range"); + + if (header_start && header_end && header_end - header_start != ORC_HEADER_SIZE) + return drgn_error_create(DRGN_ERROR_OTHER, "invalid built-in orc_header size"); + + return copy_builtin_orc_buffers(module, num_entries, unwind_start, + unwind_ip_start, header_start); +} + +static struct drgn_error *drgn_read_builtin_orc(struct drgn_module *module) +{ + if (!(module->prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL)) + return NULL; + if (module->kind == DRGN_MODULE_MAIN) + return drgn_read_vmlinux_orc(module); + else if (module->kind != DRGN_MODULE_RELOCATABLE) + return NULL; + else if (module->object.kind == DRGN_OBJECT_ABSENT) + return NULL; + + // num_entries is implied by the size of the arrays. We can get the + // array addresses from the section address info, but not their size. So + // we need to find num_orcs by reading it out of the arch-specific + // module info. + DRGN_OBJECT(tmp, module->prog); + struct drgn_error *err; + + err = drgn_object_dereference(&tmp, &module->object); + if (err) + return err; + + err = drgn_object_member(&tmp, &tmp, "arch"); + if (err) + return err; + + err = drgn_object_member(&tmp, &tmp, "num_orcs"); + // If the kernel does not support ORC (e.g. it is too old), this will be + // the first lookup error we encounter. Catch it and don't return any + // error. + if (drgn_error_catch(&err, DRGN_ERROR_LOOKUP) || err) + return err; + + uint64_t num_entries; + err = drgn_object_read_unsigned(&tmp, &num_entries); + if (err) + return err; + + // We'll still use the section addresses for everything else, because + // the orc_header is only present there, and it should be a bit faster + // to read data which we already parsed, rather than going back to read + // it from program memory. + uint64_t orc_unwind; + uint64_t orc_unwind_ip; + uint64_t orc_header = 0; + err = drgn_module_get_section_address(module, ".orc_unwind", &orc_unwind); + if (err) + return err; + err = drgn_module_get_section_address(module, ".orc_unwind_ip", &orc_unwind_ip); + if (err) + return err; + err = drgn_module_get_section_address(module, ".orc_header", &orc_header); + drgn_error_catch(&err, DRGN_ERROR_LOOKUP); + if (err) + return err; + + return copy_builtin_orc_buffers(module, num_entries, orc_unwind, + orc_unwind_ip, orc_header); +} + static inline void drgn_module_clear_orc(struct drgn_module **modulep) { if (*modulep) { @@ -360,22 +529,48 @@ static inline void drgn_module_clear_orc(struct drgn_module **modulep) } } -struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) +struct drgn_error *drgn_module_parse_orc(struct drgn_module *module, + bool use_builtin) { struct drgn_error *err; - if (module->debug_file->platform.arch->arch != DRGN_ARCH_X86_64) + if (module->prog->platform.arch->arch != DRGN_ARCH_X86_64) return NULL; - // pc_offsets and entries point to the Elf_Data buffers until we're - // done. We don't want those freed by drgn_module_orc_info_deinit(), so - // clear them if anything goes wrong. + // When loading from the debug_file, pc_offsets and entries point to the + // Elf_Data buffers until the end of this function, when copies are made. + // When loading built-in ORC, we own both buffers. In either case, we + // don't want these pointers freed by drgn_module_orc_info_deinit() if + // we fail early. So we use this to reset the pointers to NULL on + // failure. _cleanup_(drgn_module_clear_orc) struct drgn_module *clear = module; - err = drgn_read_orc_sections(module); + // For the built-in ORC, we use these to clean up the memory allocated + // in drgn_read_builtin_orc(). This happens even on success, because the + // buffers are copied at the end of this function. + _cleanup_free_ void *cleanup_pc_offsets = NULL; + _cleanup_free_ void *cleanup_entries = NULL; + + if (use_builtin) { + err = drgn_read_builtin_orc(module); + cleanup_pc_offsets = module->orc.pc_offsets; + cleanup_entries = module->orc.entries; + } else { + err = drgn_read_orc_sections(module); + module->orc.pc_base += module->debug_file_bias; + } if (err || !module->orc.num_entries) return err; + // We may need to byte swap ORC entries. Rather than checking the + // debug_file's platform, use the program's platform (since they are the + // same) because it's possible there is no debug_file (e.g. for builtin + // ORC). + bool bswap; + err = drgn_program_bswap(module->prog, &bswap); + if (err) + return err; + unsigned int num_entries = module->orc.num_entries; _cleanup_free_ unsigned int *indices = malloc_array(num_entries, sizeof(indices[0])); @@ -398,8 +593,7 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) } } - _cleanup_(uint64_range_vector_deinit) - struct uint64_range_vector preferred = VECTOR_INIT; + VECTOR(uint64_range_vector, preferred); err = remove_fdes_from_orc(module, indices, &preferred, &num_entries); if (err) @@ -415,7 +609,6 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) return &drgn_enomem; const int32_t *orig_offsets = module->orc.pc_offsets; const struct drgn_orc_entry *orig_entries = module->orc.entries; - const bool bswap = drgn_elf_file_bswap(module->debug_file); const int version = module->orc.version; for (unsigned int i = 0; i < num_entries; i++) { unsigned int index = indices[i]; @@ -472,6 +665,7 @@ struct drgn_error *drgn_module_parse_orc(struct drgn_module *module) module->orc.pc_offsets = no_cleanup_ptr(pc_offsets); module->orc.entries = no_cleanup_ptr(entries); module->orc.num_entries = num_entries; + module->orc.bswap = bswap; clear = NULL; return NULL; } @@ -497,11 +691,10 @@ drgn_module_find_orc_cfi(struct drgn_module *module, uint64_t pc, struct drgn_cfi_row **row_ret, bool *interrupted_ret, drgn_register_number *ret_addr_regno_ret) { - uint64_t unbiased_pc = pc - module->debug_file_bias; #define less_than_orc_pc(a, b) \ (*(a) < drgn_orc_pc(module, (b) - module->orc.pc_offsets)) size_t i = binary_search_gt(module->orc.pc_offsets, - module->orc.num_entries, &unbiased_pc, + module->orc.num_entries, &pc, less_than_orc_pc); #undef less_than_orc_pc // We can tell when the program counter is below the minimum program diff --git a/libdrgn/orc_info.h b/libdrgn/orc_info.h index 49c07076c..a95d234d1 100644 --- a/libdrgn/orc_info.h +++ b/libdrgn/orc_info.h @@ -41,7 +41,8 @@ struct drgn_module_orc_info { * Base for calculating program counter corresponding to an ORC unwinder * entry. * - * This is the address of the `.orc_unwind_ip` ELF section. + * This is the address of the `.orc_unwind_ip` ELF section. It is the + * actual loaded location, with any bias already applied. * * @sa drgn_module_orc_info::entries */ @@ -72,11 +73,14 @@ struct drgn_module_orc_info { unsigned int num_entries; /** Version of the ORC format. See @ref orc.h. */ int version; + /** Whether to byte swap data */ + bool bswap; }; void drgn_module_orc_info_deinit(struct drgn_module *module); -struct drgn_error *drgn_module_parse_orc(struct drgn_module *module); +struct drgn_error *drgn_module_parse_orc(struct drgn_module *module, + bool use_builtin); bool drgn_module_should_prefer_orc_cfi(struct drgn_module *module, uint64_t pc); diff --git a/libdrgn/platform.h b/libdrgn/platform.h index 18bc23cef..a3fc0dc16 100644 --- a/libdrgn/platform.h +++ b/libdrgn/platform.h @@ -194,6 +194,7 @@ typedef struct drgn_error * * - Define the following @ref drgn_architecture_info members: * - @ref default_dwarf_cfi_row (use @ref DRGN_CFI_ROW) * - @ref fallback_unwind + * - @ref bad_call_unwind * - @ref pt_regs_get_initial_registers * - @ref prstatus_get_initial_registers * - @ref linux_kernel_get_initial_registers @@ -343,6 +344,17 @@ struct drgn_architecture_info { struct drgn_error *(*fallback_unwind)(struct drgn_program *, struct drgn_register_state *, struct drgn_register_state **); + /** + * Try to unwind a stack frame assuming that a call was made to a bad + * program counter. + * + * This should typically undo the effects of a single call instruction + * and nothing more. If this has to read memory, translate @ref + * DRGN_ERROR_FAULT errors to &@ref drgn_stop. + */ + struct drgn_error *(*bad_call_unwind)(struct drgn_program *, + struct drgn_register_state *, + struct drgn_register_state **); /** * Create a @ref drgn_register_state from a Linux `struct pt_regs`. * diff --git a/libdrgn/plugins.h b/libdrgn/plugins.h new file mode 100644 index 000000000..5fd7f4e80 --- /dev/null +++ b/libdrgn/plugins.h @@ -0,0 +1,11 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef DRGN_PLUGINS_H +#define DRGN_PLUGINS_H + +struct drgn_program; + +void drgn_call_plugins_prog(const char *name, struct drgn_program *prog); + +#endif /* DRGN_PLUGINS_H */ diff --git a/libdrgn/program.c b/libdrgn/program.c index ab7783196..c4522c6f2 100644 --- a/libdrgn/program.c +++ b/libdrgn/program.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -28,10 +27,13 @@ #include "language.h" #include "log.h" #include "linux_kernel.h" +#include "log.h" #include "memory_reader.h" #include "minmax.h" #include "object.h" +#include "plugins.h" #include "program.h" +#include "serialize.h" #include "symbol.h" #include "util.h" #include "vector.h" @@ -77,7 +79,27 @@ drgn_program_platform(struct drgn_program *prog) LIBDRGN_PUBLIC const struct drgn_language * drgn_program_language(struct drgn_program *prog) { - return prog->lang ? prog->lang : &drgn_default_language; + if (prog->lang) + return prog->lang; + if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) { + prog->lang = &drgn_language_c; + return prog->lang; + } + if (!prog->tried_main_language) { + prog->tried_main_language = true; + prog->lang = drgn_debug_info_main_language(&prog->dbinfo); + if (prog->lang) { + drgn_log_debug(prog, + "set default language to %s from main()", + prog->lang->name); + return prog->lang; + } else { + drgn_log_debug(prog, + "couldn't find language of main(); defaulting to %s", + drgn_default_language.name); + } + } + return &drgn_default_language; } LIBDRGN_PUBLIC void drgn_program_set_language(struct drgn_program *prog, @@ -108,18 +130,19 @@ void drgn_program_init(struct drgn_program *prog, drgn_thread_set_init(&prog->thread_set); drgn_program_set_log_level(prog, DRGN_LOG_NONE); drgn_program_set_log_file(prog, stderr); + prog->default_progress_file = true; drgn_object_init(&prog->vmemmap, prog); } void drgn_program_deinit(struct drgn_program *prog) { drgn_thread_set_deinit(&prog->thread_set); - /* - * For userspace core dumps, main_thread and crashed_thread are in - * prog->thread_set and thus freed by the above call to - * drgn_thread_set_deinit(). - */ - if (!drgn_program_is_userspace_core(prog)) { + if (drgn_program_is_userspace_core(prog)) { + free(prog->core_dump_fname_cached); + } else { + // For userspace core dumps, main_thread and crashed_thread are + // in prog->thread_set and thus freed by the above call to + // drgn_thread_set_deinit(). drgn_thread_destroy(prog->crashed_thread); drgn_thread_destroy(prog->main_thread); } @@ -155,28 +178,6 @@ void drgn_program_deinit(struct drgn_program *prog) drgn_debug_info_deinit(&prog->dbinfo); } -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_create(const struct drgn_platform *platform, - struct drgn_program **ret) -{ - struct drgn_program *prog; - - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - drgn_program_init(prog, platform); - *ret = prog; - return NULL; -} - -LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) -{ - if (prog) { - drgn_program_deinit(prog); - free(prog); - } -} - LIBDRGN_PUBLIC struct drgn_error * drgn_program_add_memory_segment(struct drgn_program *prog, uint64_t address, uint64_t size, drgn_memory_read_fn read_fn, @@ -642,6 +643,7 @@ drgn_program_set_core_dump_fd_internal(struct drgn_program *prog, int fd, goto out_segments; } + drgn_call_plugins_prog("drgn_prog_set", prog); return NULL; out_segments: @@ -745,6 +747,8 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) prog->pid = pid; prog->flags |= DRGN_PROGRAM_IS_LIVE | DRGN_PROGRAM_IS_LOCAL; + + drgn_call_plugins_prog("drgn_prog_set", prog); return NULL; out_segments: @@ -759,63 +763,90 @@ drgn_program_set_pid(struct drgn_program *prog, pid_t pid) return err; } -/* Set the default language from the language of "main". */ -static void drgn_program_set_language_from_main(struct drgn_program *prog) +struct drgn_error *drgn_program_cache_auxv(struct drgn_program *prog) { - struct drgn_error *err; + if (prog->auxv_cached) + return NULL; - if (prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - return; - const struct drgn_language *lang; - err = drgn_debug_info_main_language(&prog->dbinfo, &lang); - if (err) { - drgn_error_destroy(err); - return; + _cleanup_close_ int fd = -1; + const void *note; + size_t note_size; +#define FORMAT "/proc/%ld/auxv" + char path[sizeof(FORMAT) + - sizeof("%ld") + + max_decimal_length(long) + + 1]; + if (drgn_program_is_userspace_process(prog)) { + snprintf(path, sizeof(path), FORMAT, (long)prog->pid); +#undef FORMAT + fd = open(path, O_RDONLY); + if (fd < 0) + return drgn_error_create_os("open", errno, path); + drgn_log_debug(prog, "parsing %s", path); + } else { + assert(drgn_program_is_userspace_core(prog)); + if (find_elf_note(prog->core, "CORE", NT_AUXV, ¬e, + ¬e_size)) + return drgn_error_libelf(); + if (!note) { + return drgn_error_create(DRGN_ERROR_OTHER, + "core file is missing NT_AUXV"); + } + drgn_log_debug(prog, "parsing NT_AUXV"); } - if (lang) - prog->lang = lang; -} -static int drgn_set_platform_from_dwarf(Dwfl_Module *module, void **userdatap, - const char *name, Dwarf_Addr base, - Dwarf *dwarf, Dwarf_Addr bias, - void *arg) -{ - Elf *elf; - GElf_Ehdr ehdr_mem, *ehdr; - struct drgn_platform platform; + memset(&prog->auxv, 0, sizeof(prog->auxv)); - elf = dwarf_getelf(dwarf); - if (!elf) - return DWARF_CB_OK; - ehdr = gelf_getehdr(elf, &ehdr_mem); - if (!ehdr) - return DWARF_CB_OK; - drgn_platform_from_elf(ehdr, &platform); - drgn_program_set_platform(arg, &platform); - return DWARF_CB_ABORT; -} - -LIBDRGN_PUBLIC struct drgn_error * -drgn_program_load_debug_info(struct drgn_program *prog, const char **paths, - size_t n, bool load_default, bool load_main) -{ - struct drgn_error *err; - - if (!n && !load_default && !load_main) - return NULL; - - drgn_blocking_guard(prog); - err = drgn_debug_info_load(&prog->dbinfo, paths, n, load_default, load_main); - if ((!err || err->code == DRGN_ERROR_MISSING_DEBUG_INFO)) { - if (!prog->lang) - drgn_program_set_language_from_main(prog); - if (!prog->has_platform) { - dwfl_getdwarf(prog->dbinfo.dwfl, - drgn_set_platform_from_dwarf, prog, 0); + bool is_64_bit = drgn_platform_is_64_bit(&prog->platform); + bool bswap = drgn_platform_bswap(&prog->platform); + size_t aux_size = is_64_bit ? 16 : 8; +#define visit_aux_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(a_type); \ + visit_scalar_member(a_un.a_val); \ +} while (0) + for (;;) { + Elf64_auxv_t auxv; + if (fd >= 0) { + ssize_t r = read_all(fd, &auxv, aux_size); + if (r < 0) + return drgn_error_create_os("read", errno, path); + if (r < aux_size) + break; + deserialize_struct64_inplace(&auxv, Elf32_auxv_t, + visit_aux_members, + is_64_bit, bswap); + } else { + if (note_size < aux_size) + break; + deserialize_struct64(&auxv, Elf32_auxv_t, + visit_aux_members, note, is_64_bit, + bswap); + note = (char *)note + aux_size; + note_size -= aux_size; + } + if (auxv.a_type == 0 && auxv.a_un.a_val == 0) + break; + switch (auxv.a_type) { + case AT_PHDR: + drgn_log_debug(prog, "found AT_PHDR 0x%" PRIx64, + auxv.a_un.a_val); + prog->auxv.at_phdr = auxv.a_un.a_val; + break; + case AT_PHNUM: + drgn_log_debug(prog, "found AT_PHNUM %" PRIu64, + auxv.a_un.a_val); + prog->auxv.at_phnum = auxv.a_un.a_val; + break; + case AT_SYSINFO_EHDR: + drgn_log_debug(prog, "found AT_SYSINFO_EHDR 0x%" PRIx64, + auxv.a_un.a_val); + prog->auxv.at_sysinfo_ehdr = auxv.a_un.a_val; + break; } } - return err; +#undef visit_aux_members + prog->auxv_cached = true; + return NULL; } static struct drgn_error *get_prstatus_pid(struct drgn_program *prog, const char *data, @@ -869,7 +900,7 @@ static struct drgn_error *get_prpsinfo_pid(struct drgn_program *prog, static struct drgn_error *get_prpsinfo_fname(struct drgn_program *prog, const char *data, size_t size, - const char **ret) + char **ret) { bool is_64_bit; struct drgn_error *err = drgn_program_is_64_bit(prog, &is_64_bit); @@ -883,15 +914,10 @@ static struct drgn_error *get_prpsinfo_fname(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_OTHER, "NT_PRPSINFO is truncated"); } - // No need to make a copy: the data returned by elf_getdata_rawchunk() - // is valid for the lifetime of the Elf handle, and prog->core is valid for - // the lifetime of prog. - const char *tmp = data + offset; - size_t len = strnlen(tmp, PR_FNAME_LEN); - if (len == PR_FNAME_LEN) + char *tmp = strndup(data + offset, PR_FNAME_LEN); #undef PR_FNAME_LEN - return drgn_error_create(DRGN_ERROR_OTHER, - "pr_fname is not null terminated"); + if (!tmp) + return &drgn_enomem; *ret = tmp; return NULL; } @@ -975,7 +1001,7 @@ drgn_program_cache_core_dump_threads(struct drgn_program *prog) uint32_t first_prstatus_tid; bool found_prpsinfo = false; uint32_t prpsinfo_pid; - const char *prpsinfo_fname = NULL; + _cleanup_free_ char *prpsinfo_fname = NULL; if (prog->core_dump_threads_cached) return NULL; @@ -1075,7 +1101,7 @@ drgn_program_cache_core_dump_threads(struct drgn_program *prog) &prpsinfo_pid); /* If the PID isn't found, then this is NULL. */ prog->main_thread = it.entry; - prog->core_dump_fname_cached = prpsinfo_fname; + prog->core_dump_fname_cached = no_cleanup_ptr(prpsinfo_fname); } if (found_prstatus) { /* @@ -1692,18 +1718,14 @@ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_core_dump(const char *path, struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_core_dump(prog, path); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1714,18 +1736,14 @@ drgn_program_from_core_dump(const char *path, struct drgn_program **ret) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_core_dump_fd(int fd, struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_core_dump_fd(prog, fd); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1736,18 +1754,14 @@ drgn_program_from_core_dump_fd(int fd, struct drgn_program **ret) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_kernel(struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_kernel(prog); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1758,18 +1772,14 @@ drgn_program_from_kernel(struct drgn_program **ret) LIBDRGN_PUBLIC struct drgn_error * drgn_program_from_pid(pid_t pid, struct drgn_program **ret) { - struct drgn_error *err; struct drgn_program *prog; + struct drgn_error *err = drgn_program_create(NULL, &prog); + if (err) + return err; - prog = malloc(sizeof(*prog)); - if (!prog) - return &drgn_enomem; - - drgn_program_init(prog, NULL); err = drgn_program_init_pid(prog, pid); if (err) { - drgn_program_deinit(prog); - free(prog); + drgn_program_destroy(prog); return err; } @@ -1808,7 +1818,7 @@ LIBDRGN_PUBLIC struct drgn_error * drgn_program_read_c_string(struct drgn_program *prog, uint64_t address, bool physical, size_t max_size, char **ret) { - _cleanup_(char_vector_deinit) struct char_vector str = VECTOR_INIT; + VECTOR(char_vector, str); for (;;) { struct drgn_error *err = drgn_program_untagged_addr(prog, &address); if (err) @@ -2093,38 +2103,3 @@ drgn_program_element_info(struct drgn_program *prog, struct drgn_type *type, ret->qualified_type = drgn_type_type(underlying_type); return drgn_type_bit_size(ret->qualified_type.type, &ret->bit_size); } - -LIBDRGN_PUBLIC void -drgn_program_set_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn *begin_callback, - drgn_program_end_blocking_fn *end_callback, - void *callback_arg) -{ - prog->begin_blocking_fn = begin_callback; - prog->end_blocking_fn = end_callback; - prog->blocking_arg = callback_arg; -} - -LIBDRGN_PUBLIC void -drgn_program_get_blocking_callback(struct drgn_program *prog, - drgn_program_begin_blocking_fn **begin_callback_ret, - drgn_program_end_blocking_fn **end_callback_ret, - void **callback_arg_ret) -{ - *begin_callback_ret = prog->begin_blocking_fn; - *end_callback_ret = prog->end_blocking_fn; - *callback_arg_ret = prog->blocking_arg; -} - -void *drgn_program_begin_blocking(struct drgn_program *prog) -{ - if (!prog->begin_blocking_fn) - return NULL; - return prog->begin_blocking_fn(prog, prog->blocking_arg); -} - -void drgn_program_end_blocking(struct drgn_program *prog, void *state) -{ - if (prog->end_blocking_fn) - prog->end_blocking_fn(prog, prog->blocking_arg, state); -} diff --git a/libdrgn/program.h b/libdrgn/program.h index 5e5fee714..17afe8630 100644 --- a/libdrgn/program.h +++ b/libdrgn/program.h @@ -30,7 +30,9 @@ #include "vector.h" struct drgn_object_finder; +struct drgn_symbol; struct drgn_symbol_finder; +struct drgn_type_finder; /** * @defgroup Internals Internals @@ -118,6 +120,11 @@ struct drgn_program { /* Default language of the program. */ const struct drgn_language *lang; struct drgn_platform platform; + /** + * Whether we have tried determining the default language from "main" + * since the last time that debug info was added. + */ + bool tried_main_language; bool has_platform; enum drgn_program_flags flags; @@ -146,7 +153,14 @@ struct drgn_program { */ struct { /** Cached `pr_fname` from `NT_PRPSINFO` note. */ - const char *core_dump_fname_cached; + char *core_dump_fname_cached; + /** Cache of important parts of auxiliary vector. */ + struct { + uint64_t at_phdr; + uint64_t at_phnum; + uint64_t at_sysinfo_ehdr; + } auxv; + bool auxv_cached; }; /* @@ -160,6 +174,8 @@ struct drgn_program { struct { /** `uname -r` */ char osrelease[128]; + /** Build ID. */ + char build_id[128]; /** `PAGE_SIZE` of the kernel. */ uint64_t page_size; /** @@ -193,6 +209,8 @@ struct drgn_program { bool have_crashtime; /** Whether `phys_base` was in the VMCOREINFO. */ bool have_phys_base; + /** Length of build ID. */ + unsigned int build_id_len; /** * `PAGE_SHIFT` of the kernel (derived from * `PAGE_SIZE`). @@ -240,14 +258,9 @@ struct drgn_program { */ drgn_log_fn *log_fn; void *log_arg; + FILE *progress_file; enum drgn_log_level log_level; - - /* - * Blocking callbacks. - */ - drgn_program_begin_blocking_fn *begin_blocking_fn; - drgn_program_end_blocking_fn *end_blocking_fn; - void *blocking_arg; + bool default_progress_file; }; /** Initialize a @ref drgn_program. */ @@ -289,6 +302,8 @@ struct drgn_error *drgn_program_init_kernel(struct drgn_program *prog); */ struct drgn_error *drgn_program_init_pid(struct drgn_program *prog, pid_t pid); +struct drgn_error *drgn_program_cache_auxv(struct drgn_program *prog); + /** * Return whether a @ref drgn_program is a userspace process running on the * local machine. @@ -450,46 +465,32 @@ drgn_program_register_symbol_finder_impl(struct drgn_program *prog, /** * Call before a blocking (I/O or long-running) operation. * - * Must be paired with @ref drgn_program_end_blocking(). + * Must be paired with @ref drgn_end_blocking(). * - * @return Opaque pointer to pass to @ref drgn_program_end_blocking(). + * @return Opaque pointer to pass to @ref drgn_end_blocking(). */ -void *drgn_program_begin_blocking(struct drgn_program *prog); +void *drgn_begin_blocking(void); /** * Call after a blocking (I/O or long-running) operation. * - * @param[in] state Return value of @ref drgn_program_begin_blocking(). + * @param[in] state Return value of @ref drgn_begin_blocking(). */ -void drgn_program_end_blocking(struct drgn_program *prog, void *state); - -struct drgn_blocking_guard_struct { - struct drgn_program *prog; - void *state; -}; - -static inline struct drgn_blocking_guard_struct -drgn_blocking_guard_init(struct drgn_program *prog) -{ - return (struct drgn_blocking_guard_struct){ - prog, drgn_program_begin_blocking(prog), - }; -} +void drgn_end_blocking(void *state); -static inline void -drgn_blocking_guard_cleanup(struct drgn_blocking_guard_struct *guard) +static inline void drgn_blocking_guard_cleanup(void **statep) { - drgn_program_end_blocking(guard->prog, guard->state); + drgn_end_blocking(*statep); } /** - * Scope guard that wraps @ref drgn_program_begin_blocking() and @ref - * drgn_program_end_blocking(). + * Scope guard that wraps @ref drgn_begin_blocking() and @ref + * drgn_end_blocking(). */ -#define drgn_blocking_guard(prog) \ - struct drgn_blocking_guard_struct PP_UNIQUE(guard) \ +#define drgn_blocking_guard() \ + void *PP_UNIQUE(guard) \ __attribute__((__cleanup__(drgn_blocking_guard_cleanup), __unused__)) = \ - drgn_blocking_guard_init(prog) + drgn_begin_blocking() /** * @} diff --git a/libdrgn/python/debug_info_options.c b/libdrgn/python/debug_info_options.c new file mode 100644 index 000000000..852c9aa0d --- /dev/null +++ b/libdrgn/python/debug_info_options.c @@ -0,0 +1,240 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" + +static PyObject *DebugInfoOptions_wrap_list(const char * const *list) +{ + if (!list) + Py_RETURN_NONE; + size_t n = 0; + while (list[n]) + n++; + _cleanup_pydecref_ PyObject *ret = PyTuple_New(n); + if (!ret) + return NULL; + for (size_t i = 0; i < n; i++) { + PyObject *item = PyUnicode_FromString(list[i]); + if (!item) + return NULL; + PyTuple_SET_ITEM(ret, i, item); + } + return_ptr(ret); +} + +#define DebugInfoOptions_SETTER(name) \ +static int DebugInfoOptions_set_##name(DebugInfoOptions *self, PyObject *value, \ + void *arg) \ +{ \ + SETTER_NO_DELETE(#name, value); \ + if (!DebugInfoOptions_##name##_converter(value, self->options)) \ + return -1; \ + return 0; \ +} + +#define LIST_OPTION(name) \ +static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ +{ \ + PATH_SEQUENCE_ARG(list, .null_terminate = true); \ + if (!path_sequence_converter(o, &list)) \ + return 0; \ + struct drgn_error *err = \ + drgn_debug_info_options_set_##name(p, list.paths); \ + if (err) { \ + set_drgn_error(err); \ + return 0; \ + } \ + return 1; \ +} \ + \ +static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) \ +{ \ + const char * const *list = \ + drgn_debug_info_options_get_##name(self->options); \ + return DebugInfoOptions_wrap_list(list); \ +} \ +DebugInfoOptions_SETTER(name) + +#define BOOL_OPTION(name, default_value) \ +static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ +{ \ + int r = PyObject_IsTrue(o); \ + if (r < 0) \ + return 0; \ + drgn_debug_info_options_set_##name(p, r); \ + return 1; \ +} \ + \ +static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) \ +{ \ + Py_RETURN_BOOL(drgn_debug_info_options_get_##name(self->options)); \ +} \ +DebugInfoOptions_SETTER(name) + +#define drgn_kmod_search_method_class KmodSearchMethod_class + +#define ENUM_OPTION(name, type, default_value) \ +static int DebugInfoOptions_##name##_converter(PyObject *o, void *p) \ +{ \ + if (!PyObject_TypeCheck(o, (PyTypeObject *)type##_class)) { \ + PyErr_Format(PyExc_TypeError, "%s must be %s", #name, \ + ((PyTypeObject *)type##_class)->tp_name); \ + return 0; \ + } \ + _cleanup_pydecref_ PyObject *value_obj = \ + PyObject_GetAttrString(o, "value"); \ + if (!value_obj) \ + return 0; \ + long value = PyLong_AsLong(value_obj); \ + if (value == -1 && PyErr_Occurred()) \ + return 0; \ + drgn_debug_info_options_set_##name(p, value); \ + return 1; \ +} \ + \ +static PyObject *DebugInfoOptions_get_##name(DebugInfoOptions *self, void *arg) \ +{ \ + return PyObject_CallFunction(type##_class, "i", \ + drgn_debug_info_options_get_##name(self->options));\ +} \ +DebugInfoOptions_SETTER(name) + +DRGN_DEBUG_INFO_OPTIONS + +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + +static inline void +drgn_debug_info_options_destroyp(struct drgn_debug_info_options **optionsp) +{ + drgn_debug_info_options_destroy(*optionsp); +} + +static DebugInfoOptions *DebugInfoOptions_new(PyTypeObject *subtype, + PyObject *args, PyObject *kwds) +{ + struct drgn_error *err; + + _cleanup_(drgn_debug_info_options_destroyp) + struct drgn_debug_info_options *options = NULL; + err = drgn_debug_info_options_create(&options); + if (err) + return set_drgn_error(err); + + // Parse the positional options argument manually so that we can parse + // the keyword arguments directly into the struct + // drgn_debug_info_options. + if (PyTuple_GET_SIZE(args) > 0) { + PyObject *source = PyTuple_GET_ITEM(args, 0); + if (source != Py_None) { + if (!PyObject_TypeCheck(source, + &DebugInfoOptions_type)) { + PyErr_SetString(PyExc_TypeError, + "options must be DebugInfoOptions"); + return NULL; + } + err = drgn_debug_info_options_copy(options, + ((DebugInfoOptions *)source)->options); + if (err) { + set_drgn_error(err); + return NULL; + } + } + } + +#define BOOL_OPTION(name, default_value) LIST_OPTION(name) +#define ENUM_OPTION(name, type, default_value) LIST_OPTION(name) + static char *keywords[] = { + "", +#define LIST_OPTION(name) #name, + DRGN_DEBUG_INFO_OPTIONS +#undef LIST_OPTION + NULL, + }; + PyObject *unused; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "|O$" +#define LIST_OPTION(name) "O&" + DRGN_DEBUG_INFO_OPTIONS +#undef LIST_OPTION + ":DebugInfoOptions", keywords, &unused +#define LIST_OPTION(name) , DebugInfoOptions_##name##_converter, options + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + )) + return NULL; + + DebugInfoOptions *ret = + (DebugInfoOptions *)subtype->tp_alloc(subtype, 0); + if (ret) + ret->options = no_cleanup_ptr(options); + return ret; +} + +static void DebugInfoOptions_dealloc(DebugInfoOptions *self) +{ + PyObject_GC_UnTrack(self); + if (self->prog) + Py_DECREF(self->prog); + else + drgn_debug_info_options_destroy(self->options); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int DebugInfoOptions_traverse(DebugInfoOptions *self, visitproc visit, + void *arg) +{ + Py_VISIT(self->prog); + return 0; +} + +static PyGetSetDef DebugInfoOptions_getset[] = { +#define LIST_OPTION(name) \ + {#name, (getter)DebugInfoOptions_get_##name, \ + (setter)DebugInfoOptions_set_##name, \ + drgn_DebugInfoOptions_##name##_DOC}, +#define BOOL_OPTION(name, default_value) LIST_OPTION(name) +#define ENUM_OPTION(name, type, default_value) LIST_OPTION(name) + DRGN_DEBUG_INFO_OPTIONS +#undef ENUM_OPTION +#undef BOOL_OPTION +#undef LIST_OPTION + {}, +}; + +static PyObject *DebugInfoOptions_repr(PyObject *self) +{ + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + if (append_string(parts, "DebugInfoOptions(")) + return NULL; + bool first = true; + for (size_t i = 0; DebugInfoOptions_getset[i].name; i++) { + if (append_format(parts, "%s%s=", first ? "" : ", ", + DebugInfoOptions_getset[i].name) + || append_attr_repr(parts, self, + DebugInfoOptions_getset[i].name)) + return NULL; + first = false; + } + if (append_string(parts, ")")) + return NULL; + return join_strings(parts); +} + +PyTypeObject DebugInfoOptions_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.DebugInfoOptions", + .tp_dealloc = (destructor)DebugInfoOptions_dealloc, + .tp_basicsize = sizeof(DebugInfoOptions), + .tp_repr = DebugInfoOptions_repr, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_doc = drgn_DebugInfoOptions_DOC, + .tp_traverse = (traverseproc)DebugInfoOptions_traverse, + .tp_getset = DebugInfoOptions_getset, + .tp_new = (newfunc)DebugInfoOptions_new, +}; diff --git a/libdrgn/python/drgnpy.h b/libdrgn/python/drgnpy.h index 24ac5347c..fcccec739 100644 --- a/libdrgn/python/drgnpy.h +++ b/libdrgn/python/drgnpy.h @@ -19,31 +19,27 @@ #include "../pp.h" #include "../program.h" #include "../symbol.h" +#include "../vector.h" -/* These were added in Python 3.7. */ -#ifndef Py_UNREACHABLE -#define Py_UNREACHABLE() abort() -#endif -#ifndef Py_RETURN_RICHCOMPARE -#define Py_RETURN_RICHCOMPARE(val1, val2, op) \ - do { \ - switch (op) { \ - case Py_EQ: if ((val1) == (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ - case Py_NE: if ((val1) != (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ - case Py_LT: if ((val1) < (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ - case Py_GT: if ((val1) > (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ - case Py_LE: if ((val1) <= (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ - case Py_GE: if ((val1) >= (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ - default: \ - Py_UNREACHABLE(); \ - } \ - } while (0) +#if PY_VERSION_HEX < 0x030900a1 +static inline PyObject *PyObject_CallNoArgs(PyObject *func) +{ + return PyObject_CallFunctionObjArgs(func, NULL); +} +static inline PyObject *PyObject_CallOneArg(PyObject *callable, PyObject *arg) +{ + return PyObject_CallFunctionObjArgs(callable, arg, NULL); +} #endif #if PY_VERSION_HEX < 0x030d00a1 #define PyThreadState_GetUnchecked _PyThreadState_UncheckedGet #endif +#if PY_VERSION_HEX < 0x030e00a5 +#define Py_HashPointer _Py_HashPointer +#endif + #define DRGNPY_PUBLIC __attribute__((__visibility__("default"))) // PyLong_From* and PyLong_As* for stdint.h types. These use _Generic for @@ -80,6 +76,18 @@ Py_RETURN_FALSE; \ } while (0) +/** + * Return from a PyGetSetDef setter with an error if attempting to delete the + * attribute. + */ +#define SETTER_NO_DELETE(name, value) do { \ + if (!(value)) { \ + PyErr_Format(PyExc_AttributeError, \ + "can't delete '%s' attribute", (name)); \ + return -1; \ + } \ +} while (0) + static inline void pydecrefp(void *p) { Py_XDECREF(*(PyObject **)p); @@ -129,6 +137,26 @@ typedef struct { const struct drgn_language *language; } Language; +typedef struct { + PyObject_HEAD + struct drgn_module *module; +} Module; + +typedef struct { + PyObject_HEAD + struct drgn_module_iterator *it; +} ModuleIterator; + +typedef struct { + PyObject_HEAD + struct drgn_module *module; +} ModuleSectionAddresses; + +typedef struct { + PyObject_HEAD + struct drgn_module_section_address_iterator *it; +} ModuleSectionAddressesIterator; + typedef struct { PyObject_HEAD DrgnObject *obj; @@ -153,6 +181,14 @@ typedef struct { struct pyobjectp_set objects; } Program; +typedef struct { + PyObject_HEAD + struct drgn_debug_info_options *options; + // If this is a Program's default debug info options, the Program. + // Otherwise, NULL. + Program *prog; +} DebugInfoOptions; + typedef struct { PyObject_HEAD struct drgn_thread thread; @@ -232,39 +268,60 @@ typedef struct { PyObject *is_default; } TypeTemplateParameter; +extern PyObject *AbsenceReason_class; extern PyObject *Architecture_class; extern PyObject *FindObjectFlags_class; +extern PyObject *KmodSearchMethod_class; +extern PyObject *ModuleFileStatus_class; +extern PyObject *ModuleSectionAddresses_class; extern PyObject *PlatformFlags_class; extern PyObject *PrimitiveType_class; extern PyObject *ProgramFlags_class; extern PyObject *Qualifiers_class; +extern PyObject *SupplementaryFileKind_class; extern PyObject *SymbolBinding_class; extern PyObject *SymbolKind_class; extern PyObject *TypeKind_class; +extern PyTypeObject DebugInfoOptions_type; extern PyTypeObject DrgnObject_type; extern PyTypeObject DrgnType_type; +extern PyTypeObject ExtraModule_type; extern PyTypeObject FaultError_type; extern PyTypeObject Language_type; +extern PyTypeObject MainModule_type; +extern PyTypeObject ModuleIteratorWithNew_type; +extern PyTypeObject ModuleIterator_type; +extern PyTypeObject ModuleSectionAddressesIterator_type; +extern PyTypeObject Module_type; extern PyTypeObject ObjectIterator_type; extern PyTypeObject Platform_type; extern PyTypeObject Program_type; extern PyTypeObject Register_type; +extern PyTypeObject RelocatableModule_type; +extern PyTypeObject SharedLibraryModule_type; extern PyTypeObject StackFrame_type; extern PyTypeObject StackTrace_type; -extern PyTypeObject Symbol_type; extern PyTypeObject SymbolIndex_type; -extern PyTypeObject Thread_type; +extern PyTypeObject Symbol_type; extern PyTypeObject ThreadIterator_type; +extern PyTypeObject Thread_type; extern PyTypeObject TypeEnumerator_type; -extern PyTypeObject TypeKindSet_type; extern PyTypeObject TypeKindSetIterator_type; +extern PyTypeObject TypeKindSet_type; extern PyTypeObject TypeMember_type; extern PyTypeObject TypeParameter_type; extern PyTypeObject TypeTemplateParameter_type; +extern PyTypeObject VdsoModule_type; extern PyObject *MissingDebugInfoError; extern PyObject *ObjectAbsentError; extern PyObject *OutOfBoundsError; +PyGILState_STATE drgn_initialize_python(bool *success_ret); + +#define drgn_initialize_python_guard(success_ret) \ + __attribute__((__cleanup__(PyGILState_Releasep), __unused__)) \ + PyGILState_STATE PP_UNIQUE(gstate) = drgn_initialize_python(success_ret) + int add_module_constants(PyObject *m); int init_logging(void); @@ -277,6 +334,16 @@ void *set_error_type_name(const char *format, #define call_tp_alloc(type) ((type *)type##_type.tp_alloc(&type##_type, 0)) +PyObject *Module_wrap(struct drgn_module *module); +static inline Program *Module_prog(Module *module) +{ + struct drgn_program *prog = drgn_module_program(module->module); + return container_of(prog, Program, prog); +} + +int add_WantedSupplementaryFile(PyObject *m); +int init_module_section_addresses(void); + PyObject *Language_wrap(const struct drgn_language *language); int language_converter(PyObject *o, void *p); int add_languages(void); @@ -341,7 +408,10 @@ DrgnType *Program_array_type(Program *self, PyObject *args, PyObject *kwds); DrgnType *Program_function_type(Program *self, PyObject *args, PyObject *kwds); int append_string(PyObject *parts, const char *s); +int append_u64_hex(PyObject *parts, uint64_t value); int append_format(PyObject *parts, const char *format, ...); +int append_attr_repr(PyObject *parts, PyObject *obj, const char *attr_name); +int append_attr_str(PyObject *parts, PyObject *obj, const char *attr_name); PyObject *join_strings(PyObject *parts); // Implementation of _repr_pretty_() for IPython/Jupyter that just calls str(). PyObject *repr_pretty_from_str(PyObject *self, PyObject *args, PyObject *kwds); @@ -375,6 +445,22 @@ void path_cleanup(struct path_arg *path); __attribute__((__cleanup__(path_cleanup))) \ struct path_arg name = { __VA_ARGS__ } +DEFINE_VECTOR_TYPE(path_arg_vector, struct path_arg); + +struct path_sequence_arg { + bool allow_none; + bool null_terminate; + struct path_arg_vector args; + const char **paths; +}; +int path_sequence_converter(PyObject *o, void *p); +void path_sequence_cleanup(struct path_sequence_arg *paths); +size_t path_sequence_size(struct path_sequence_arg *paths); + +#define PATH_SEQUENCE_ARG(name, ...) \ + __attribute__((__cleanup__(path_sequence_cleanup))) \ + struct path_sequence_arg name = { .args = VECTOR_INIT, __VA_ARGS__ } + struct enum_arg { PyObject *type; unsigned long value; diff --git a/libdrgn/python/error.c b/libdrgn/python/error.c index a5dfa307b..f4e11ddba 100644 --- a/libdrgn/python/error.c +++ b/libdrgn/python/error.c @@ -73,51 +73,74 @@ void clear_drgn_in_python(void) drgn_in_python = false; } -struct drgn_error *drgn_error_from_python(void) +static struct drgn_error *drgn_fault_error_from_python(PyObject *exc_value) { - PyObject *exc_type, *exc_value, *exc_traceback, *exc_message; - const char *type, *message; - struct drgn_error *err; + _cleanup_pydecref_ PyObject *py_message = + PyObject_GetAttrString(exc_value, "message"); + const char *message = py_message ? PyUnicode_AsUTF8(py_message) : NULL; + if (!message) + return NULL; + + _cleanup_pydecref_ PyObject *py_address = + PyObject_GetAttrString(exc_value, "address"); + uint64_t address = py_address ? PyLong_AsUint64(py_address) : (uint64_t)-1; + if (address == (uint64_t)-1 && PyErr_Occurred()) + return NULL; + + return drgn_error_create_fault(message, address); +} +struct drgn_error *drgn_error_from_python(void) +{ + _cleanup_pydecref_ PyObject *exc_type, *exc_value, *exc_traceback; PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); if (!exc_type) return NULL; + // Python FaultErrors should be translated back to drgn errors because + // they are frequently handled in libdrgn. They should be translated no + // matter how deeply nested we are, so we do this before checking + // drgn_in_python. + if ((PyTypeObject *)exc_type == &FaultError_type && exc_value) { + struct drgn_error *err = drgn_fault_error_from_python(exc_value); + if (err) + return err; + // A NULL return means that we encountered a Python error while + // trying to convert it. Clear the Python error and fall back to + // the standard code path. + PyErr_Clear(); + } + if (drgn_in_python) { PyErr_Restore(exc_type, exc_value, exc_traceback); + exc_type = exc_value = exc_traceback = NULL; return &drgn_error_python; } - type = ((PyTypeObject *)exc_type)->tp_name; + const char *type = ((PyTypeObject *)exc_type)->tp_name; + _cleanup_pydecref_ PyObject *exc_message = NULL; + const char *message; if (exc_value) { exc_message = PyObject_Str(exc_value); message = exc_message ? PyUnicode_AsUTF8(exc_message) : NULL; if (!message) { - err = drgn_error_format(DRGN_ERROR_OTHER, - "%s: ", type); - goto out; + PyErr_Clear(); + return drgn_error_format(DRGN_ERROR_OTHER, + "%s: ", type); } } else { - exc_message = NULL; message = ""; } if (message[0]) { - err = drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", type, - message); + return drgn_error_format(DRGN_ERROR_OTHER, "%s: %s", type, + message); } else { - err = drgn_error_create(DRGN_ERROR_OTHER, type); + return drgn_error_create(DRGN_ERROR_OTHER, type); } - -out: - Py_XDECREF(exc_message); - Py_XDECREF(exc_traceback); - Py_XDECREF(exc_value); - Py_DECREF(exc_type); - return err; } -DRGNPY_PUBLIC void *set_drgn_error(struct drgn_error *err) +void *set_drgn_error(struct drgn_error *err) { if (err == &drgn_error_python) return NULL; diff --git a/libdrgn/python/language.c b/libdrgn/python/language.c index 1fa30370e..3981cb076 100644 --- a/libdrgn/python/language.c +++ b/libdrgn/python/language.c @@ -25,6 +25,7 @@ PyTypeObject Language_type = { .tp_name = "_drgn.Language", .tp_basicsize = sizeof(Language), .tp_repr = (reprfunc)Language_repr, + // Doesn't reference any objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_Language_DOC, .tp_getset = Language_getset, diff --git a/libdrgn/python/main.c b/libdrgn/python/main.c index c103f0dec..7d13b1579 100644 --- a/libdrgn/python/main.c +++ b/libdrgn/python/main.c @@ -2,9 +2,6 @@ // SPDX-License-Identifier: LGPL-2.1-or-later #include -#ifdef WITH_KDUMPFILE -#include -#endif #include "drgnpy.h" #include "../path.h" @@ -26,6 +23,16 @@ static int add_type(PyObject *module, PyTypeObject *type) return ret; } +static int add_bool(PyObject *module, const char *name, bool value) +{ + PyObject *obj = value ? Py_True : Py_False; + Py_INCREF(obj); + int ret = PyModule_AddObject(module, name, obj); + if (ret) + Py_DECREF(obj); + return ret; +} + PyObject *MissingDebugInfoError; static PyObject *NoDefaultProgramError; PyObject *ObjectAbsentError; @@ -244,17 +251,10 @@ static int add_type_aliases(PyObject *m) if (!typing_Union) return -1; - // This should be a subclass of typing.Protocol, but that is only - // available since Python 3.8. - PyObject *IntegerLike = PyType_FromSpec(&(PyType_Spec){ - .name = "_drgn.IntegerLike", - .flags = Py_TPFLAGS_DEFAULT, - .slots = (PyType_Slot []){{0, NULL}}, - }); - if (!IntegerLike) - return -1; - if (PyModule_AddObject(m, "IntegerLike", IntegerLike) == -1) { - Py_DECREF(IntegerLike); + PyObject *typing_SupportsIndex = + PyObject_GetAttrString(typing_module, "SupportsIndex"); + if (PyModule_AddObject(m, "IntegerLike", typing_SupportsIndex) == -1) { + Py_XDECREF(typing_SupportsIndex); return -1; } @@ -291,8 +291,20 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) }) if (add_module_constants(m) || + add_type(m, &DebugInfoOptions_type) || add_type(m, &Language_type) || add_languages() || add_type(m, &DrgnObject_type) || + add_type(m, &Module_type) || + add_type(m, &MainModule_type) || + add_type(m, &SharedLibraryModule_type) || + add_type(m, &VdsoModule_type) || + add_type(m, &RelocatableModule_type) || + add_type(m, &ExtraModule_type) || + PyType_Ready(&ModuleIterator_type) || + PyType_Ready(&ModuleIteratorWithNew_type) || + add_WantedSupplementaryFile(m) || + init_module_section_addresses() || + PyType_Ready(&ModuleSectionAddressesIterator_type) || PyType_Ready(&ObjectIterator_type) || add_type(m, &Platform_type) || add_type(m, &Program_type) || @@ -335,17 +347,35 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) dwfl_version(NULL))) goto err; - PyObject *with_libkdumpfile; + if (add_bool(m, "_have_debuginfod", drgn_have_debuginfod())) + goto err; + + if (add_bool(m, "_enable_dlopen_debuginfod", +#if ENABLE_DLOPEN_DEBUGINFOD + true +#else + false +#endif + )) + goto err; + + if (add_bool(m, "_with_libkdumpfile", #ifdef WITH_LIBKDUMPFILE - with_libkdumpfile = Py_True; + true #else - with_libkdumpfile = Py_False; + false #endif - Py_INCREF(with_libkdumpfile); - if (PyModule_AddObject(m, "_with_libkdumpfile", with_libkdumpfile)) { - Py_DECREF(with_libkdumpfile); + )) + goto err; + + if (add_bool(m, "_with_lzma", +#ifdef WITH_LZMA + true +#else + false +#endif + )) goto err; - } return m; @@ -353,3 +383,42 @@ DRGNPY_PUBLIC PyMODINIT_FUNC PyInit__drgn(void) Py_DECREF(m); return NULL; } + +// On return from this function, three things need to be true: +// +// 1. The Python interpreter needs to be initialized. +// 2. The GIL needs to be held (and the caller needs to know whether to release +// it to restore the original state). +// 3. The _drgn module needs to be initialized. +// +// This can be called from many possible contexts (drgn CLI, standalone +// application using libdrgn, etc.), so we have to handle every possible initial +// state. +PyGILState_STATE drgn_initialize_python(bool *success_ret) +{ + PyGILState_STATE gstate; + if (Py_IsInitialized()) { + gstate = PyGILState_Ensure(); + } else { + gstate = PyGILState_UNLOCKED; + // If the Python interpreter wasn't already initialized, then we + // are in a standalone application using libdrgn. Set our + // imports up. + PyImport_AppendInittab("_drgn", PyInit__drgn); + Py_InitializeEx(0); + // Note: we don't have a good place to call Py_Finalize(), so we + // don't call it. + const char *env = getenv("PYTHONSAFEPATH"); + if (!env || !env[0]) + PyRun_SimpleString("import sys\nsys.path.insert(0, '')"); + } + + bool success = true; + if (!PyState_FindModule(&drgnmodule)) { + _cleanup_pydecref_ PyObject *m = PyImport_ImportModule("_drgn"); + if (!m) + success = false; + } + *success_ret = success; + return gstate; +} diff --git a/libdrgn/python/module.c b/libdrgn/python/module.c new file mode 100644 index 000000000..44c9e0ddb --- /dev/null +++ b/libdrgn/python/module.c @@ -0,0 +1,731 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../util.h" + +static PyObject *WantedSupplementaryFile_class; + +int add_WantedSupplementaryFile(PyObject *m) +{ + _cleanup_pydecref_ PyObject *collections = + PyImport_ImportModule("collections"); + _cleanup_pydecref_ PyObject *namedtuple = + PyObject_GetAttrString(collections, "namedtuple"); + if (!namedtuple) + return -1; + WantedSupplementaryFile_class = + PyObject_CallFunction(namedtuple, "s[ssss]", + "WantedSupplementaryFile", "kind", "path", + "supplementary_path", "checksum"); + if (!WantedSupplementaryFile_class) + return -1; + Py_INCREF(WantedSupplementaryFile_class); + if (PyModule_AddObject(m, "WantedSupplementaryFile", + WantedSupplementaryFile_class) == -1) { + Py_DECREF(WantedSupplementaryFile_class); + Py_DECREF(WantedSupplementaryFile_class); + return -1; + } + return 0; +} + +PyObject *Module_wrap(struct drgn_module *module) +{ + PyTypeObject *type; + SWITCH_ENUM(drgn_module_kind(module)) { + case DRGN_MODULE_MAIN: + type = &MainModule_type; + break; + case DRGN_MODULE_SHARED_LIBRARY: + type = &SharedLibraryModule_type; + break; + case DRGN_MODULE_VDSO: + type = &VdsoModule_type; + break; + case DRGN_MODULE_RELOCATABLE: + type = &RelocatableModule_type; + break; + case DRGN_MODULE_EXTRA: + type = &ExtraModule_type; + break; + default: + UNREACHABLE(); + } + Module *ret = (Module *)type->tp_alloc(type, 0); + if (ret) { + struct drgn_program *prog = drgn_module_program(module); + Py_INCREF(container_of(prog, Program, prog)); + ret->module = module; + } + return (PyObject *)ret; +} + +static void Module_dealloc(Module *self) +{ + PyObject_GC_UnTrack(self); + if (self->module) + Py_DECREF(Module_prog(self)); + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int Module_traverse(Module *self, visitproc visit, void *arg) +{ + if (self->module) + Py_VISIT(Module_prog(self)); + return 0; +} + +static int append_module_repr_common(PyObject *parts, Module *self, + const char *method_name) +{ + if (append_format(parts, "prog.%s_module(name=", method_name) < 0 || + append_attr_repr(parts, (PyObject *)self, "name") < 0) + return -1; + return 0; +} + +static PyObject *Module_repr(Module *self) +{ + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + + SWITCH_ENUM(drgn_module_kind(self->module)) { + case DRGN_MODULE_MAIN: + if (append_module_repr_common(parts, self, "main") < 0) + return NULL; + break; + case DRGN_MODULE_SHARED_LIBRARY: + if (append_module_repr_common(parts, self, + "shared_library") + || append_string(parts, ", dynamic_address=") + || append_u64_hex(parts, drgn_module_info(self->module))) + return NULL; + break; + case DRGN_MODULE_VDSO: + if (append_module_repr_common(parts, self, "vdso") + || append_string(parts, ", dynamic_address=") + || append_u64_hex(parts, drgn_module_info(self->module))) + return NULL; + break; + case DRGN_MODULE_RELOCATABLE: + if (append_module_repr_common(parts, self, "relocatable") + || append_string(parts, ", address=") + || append_u64_hex(parts, drgn_module_info(self->module))) + return NULL; + break; + case DRGN_MODULE_EXTRA: + if (append_module_repr_common(parts, self, "extra") + || append_string(parts, ", id=") + || append_u64_hex(parts, drgn_module_info(self->module))) + return NULL; + break; + default: + UNREACHABLE(); + } + if (append_string(parts, ")")) + return NULL; + return join_strings(parts); +} + +static PyObject *Module_richcompare(Module *self, PyObject *other, int op) +{ + if ((op != Py_EQ && op != Py_NE) || + !PyObject_TypeCheck(other, &Module_type)) + Py_RETURN_NOTIMPLEMENTED; + int ret = self->module == ((Module *)other)->module; + if (op == Py_NE) + ret = !ret; + Py_RETURN_BOOL(ret); +} + +static Py_hash_t Module_hash(Module *self) +{ + return Py_HashPointer(self->module); +} + +static PyObject *Module_wanted_supplementary_debug_file(Module *self) +{ + const char *debug_file_path, *supplementary_path; + const void *checksum; + size_t checksum_len; + enum drgn_supplementary_file_kind kind = + drgn_module_wanted_supplementary_debug_file(self->module, + &debug_file_path, + &supplementary_path, + &checksum, + &checksum_len); + if (kind == DRGN_SUPPLEMENTARY_FILE_NONE) { + return PyErr_Format(PyExc_ValueError, + "module does not want supplementary debug file"); + } + return PyObject_CallFunction(WantedSupplementaryFile_class, + "NO&O&y#", + PyObject_CallFunction(SupplementaryFileKind_class, + "k", + (unsigned long)kind), + PyUnicode_DecodeFSDefault, debug_file_path, + PyUnicode_DecodeFSDefault, + supplementary_path, checksum, + (Py_ssize_t)checksum_len); +} + +static PyObject *Module_try_file(Module *self, PyObject *args, PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = { "path", "fd", "force", NULL }; + struct path_arg path = {}; + int fd = -1; + int force = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|$ip:try_file", keywords, + path_converter, &path, &fd, &force)) + return NULL; + err = drgn_module_try_file(self->module, path.path, fd, force); + path_cleanup(&path); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + +static Program *Module_get_prog(Module *self, void *arg) +{ + Program *prog = Module_prog(self); + Py_INCREF(prog); + return prog; +} + +static PyObject *Module_get_name(Module *self, void *arg) +{ + return PyUnicode_DecodeFSDefault(drgn_module_name(self->module)); +} + +static PyObject *Module_get_address_ranges(Module *self, void *arg) +{ + size_t n; + if (!drgn_module_num_address_ranges(self->module, &n)) + Py_RETURN_NONE; + _cleanup_pydecref_ PyObject *ret = PyTuple_New(n); + if (!ret) + return NULL; + for (size_t i = 0; i < n; i++) { + uint64_t start, end; + drgn_module_address_range(self->module, i, &start, &end); + PyObject *range = Py_BuildValue("KK", (unsigned long long)start, + (unsigned long long)end); + if (!range) + return NULL; + PyTuple_SET_ITEM(ret, i, range); + } + return_ptr(ret); +} + +DEFINE_VECTOR(uint64_pair_vector, uint64_t [2]); + +static int Module_set_address_ranges(Module *self, PyObject *value, void *arg) +{ + SETTER_NO_DELETE("address_ranges", value); + + if (value == Py_None) { + drgn_module_unset_address_ranges(self->module); + return 0; + } + + struct drgn_error *err; + _cleanup_pydecref_ PyObject *it = PyObject_GetIter(value); + if (!it) + return -1; + + Py_ssize_t length_hint = PyObject_LengthHint(value, 1); + if (length_hint == -1) + return -1; + + VECTOR(uint64_pair_vector, ranges); + if (!uint64_pair_vector_reserve(&ranges, length_hint)) { + PyErr_NoMemory(); + return -1; + } + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(it); + if (!item) + break; + + if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { + PyErr_SetString(PyExc_TypeError, + "address_ranges must None or sequence of (int, int)"); + return -1; + } + _cleanup_pydecref_ PyObject *start_obj = + PyNumber_Index(PyTuple_GET_ITEM(item, 0)); + if (!start_obj) + return -1; + _cleanup_pydecref_ PyObject *end_obj = + PyNumber_Index(PyTuple_GET_ITEM(item, 1)); + if (!end_obj) + return -1; + + uint64_t range[2]; + range[0] = PyLong_AsUint64(start_obj); + if (range[0] == UINT64_MAX && PyErr_Occurred()) + return -1; + range[1] = PyLong_AsUint64(end_obj); + if (range[1] == UINT64_MAX && PyErr_Occurred()) + return -1; + + if (!uint64_pair_vector_append(&ranges, &range)) { + PyErr_NoMemory(); + return -1; + } + } + if (PyErr_Occurred()) + return -1; + + err = drgn_module_set_address_ranges(self->module, + uint64_pair_vector_begin(&ranges), + uint64_pair_vector_size(&ranges)); + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static PyObject *Module_get_address_range(Module *self, void *arg) +{ + size_t n; + if (!drgn_module_num_address_ranges(self->module, &n)) + Py_RETURN_NONE; + if (n == 0) { + return Py_BuildValue("ii", 0, 0); + } else if (n == 1) { + uint64_t start, end; + drgn_module_address_range(self->module, 0, &start, &end); + return Py_BuildValue("KK", (unsigned long long)start, + (unsigned long long)end); + } else { + PyErr_SetString(PyExc_ValueError, + "module has multiple address ranges"); + return NULL; + } +} + +static int Module_set_address_range(Module *self, PyObject *value, void *arg) +{ + SETTER_NO_DELETE("address_range", value); + + if (value == Py_None) { + drgn_module_unset_address_ranges(self->module); + return 0; + } + + struct drgn_error *err; + if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) { + PyErr_SetString(PyExc_TypeError, + "address_range must be None or (int, int)"); + return -1; + } + _cleanup_pydecref_ PyObject *start_obj = + PyNumber_Index(PyTuple_GET_ITEM(value, 0)); + if (!start_obj) + return -1; + _cleanup_pydecref_ PyObject *end_obj = + PyNumber_Index(PyTuple_GET_ITEM(value, 1)); + if (!end_obj) + return -1; + + uint64_t start = PyLong_AsUint64(start_obj); + if (start == UINT64_MAX && PyErr_Occurred()) + return -1; + uint64_t end = PyLong_AsUint64(end_obj); + if (end == UINT64_MAX && PyErr_Occurred()) + return -1; + + if (start == 0 && end == 0) { + err = drgn_module_set_address_ranges(self->module, NULL, 0); + } else { + err = drgn_module_set_address_range(self->module, start, end); + } + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static PyObject *Module_get_build_id(Module *self, void *arg) +{ + const void *build_id; + size_t build_id_len; + if (!drgn_module_build_id(self->module, &build_id, &build_id_len)) + Py_RETURN_NONE; + return PyBytes_FromStringAndSize(build_id, build_id_len); +} + +static int Module_set_build_id(Module *self, PyObject *value, void *arg) +{ + SETTER_NO_DELETE("build_id", value); + struct drgn_error *err; + if (value == Py_None) { + err = drgn_module_set_build_id(self->module, NULL, 0); + } else { + Py_buffer buffer; + int ret = PyObject_GetBuffer(value, &buffer, PyBUF_SIMPLE); + if (ret) + return ret; + + if (buffer.len == 0) { + PyErr_SetString(PyExc_ValueError, + "build ID cannot be empty"); + PyBuffer_Release(&buffer); + return -1; + } + + err = drgn_module_set_build_id(self->module, buffer.buf, + buffer.len); + PyBuffer_Release(&buffer); + } + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static DrgnObject *Module_get_object(Module *self, void *arg) +{ + + Program *prog_obj = Module_prog(self); + _cleanup_pydecref_ DrgnObject *ret = DrgnObject_alloc(prog_obj); + if (!ret) + return NULL; + + struct drgn_error *err = drgn_module_object(self->module, &ret->obj); + if (err) + return set_drgn_error(err); + return_ptr(ret); +} + +static int Module_set_object(Module *self, PyObject *value, void *arg) +{ + SETTER_NO_DELETE("object", value); + if (!PyObject_TypeCheck(value, &DrgnObject_type)) { + PyErr_SetString(PyExc_TypeError, "object must be a drgn.Object"); + return -1; + } + DrgnObject *object = (DrgnObject *)value; + + struct drgn_error *err = drgn_module_set_object(self->module, &object->obj); + if (err) + set_drgn_error(err); + return 0; +} + +#define MODULE_FILE_STATUS_GETSET(which) \ +static PyObject *Module_wants_##which##_file(Module *self) \ +{ \ + Py_RETURN_BOOL(drgn_module_wants_##which##_file(self->module)); \ +} \ + \ +static PyObject *Module_get_##which##_file_status(Module *self, void *arg) \ +{ \ + return PyObject_CallFunction(ModuleFileStatus_class, "i", \ + (int)drgn_module_##which##_file_status(self->module));\ +} \ + \ +static int Module_set_##which##_file_status(Module *self, PyObject *value, \ + void *arg) \ +{ \ + SETTER_NO_DELETE(#which, value); \ + if (!PyObject_TypeCheck(value, \ + (PyTypeObject *)ModuleFileStatus_class)) { \ + PyErr_SetString(PyExc_TypeError, \ + #which "_file_status must be ModuleFileStatus");\ + return -1; \ + } \ + _cleanup_pydecref_ PyObject *value_obj = \ + PyObject_GetAttrString(value, "value"); \ + if (!value_obj) \ + return -1; \ + long status = PyLong_AsLong(value_obj); \ + if (status == -1 && PyErr_Occurred()) \ + return -1; \ + \ + if (drgn_module_set_##which##_file_status(self->module, status)) \ + return 0; \ + \ + _cleanup_pydecref_ PyObject *old_status = \ + Module_get_##which##_file_status(self, NULL); \ + if (!old_status) \ + return -1; \ + PyErr_Format(PyExc_ValueError, \ + "cannot change " #which "_file_status from %S to %S", \ + old_status, value); \ + return -1; \ +} +MODULE_FILE_STATUS_GETSET(loaded) +MODULE_FILE_STATUS_GETSET(debug) + +static PyObject *Module_get_loaded_file_path(Module *self, void *arg) +{ + const char *path = drgn_module_loaded_file_path(self->module); + if (!path) + Py_RETURN_NONE; + return PyUnicode_DecodeFSDefault(path); +} + +static PyObject *Module_get_loaded_file_bias(Module *self, void *arg) +{ + if (!drgn_module_loaded_file_path(self->module)) + Py_RETURN_NONE; + return PyLong_FromUint64(drgn_module_loaded_file_bias(self->module)); +} + +static PyObject *Module_get_debug_file_path(Module *self, void *arg) +{ + const char *path = drgn_module_debug_file_path(self->module); + if (!path) + Py_RETURN_NONE; + return PyUnicode_DecodeFSDefault(path); +} + +static PyObject *Module_get_debug_file_bias(Module *self, void *arg) +{ + if (!drgn_module_debug_file_path(self->module)) + Py_RETURN_NONE; + return PyLong_FromUint64(drgn_module_debug_file_bias(self->module)); +} + +static PyObject *Module_get_supplementary_debug_file_kind(Module *self, + void *arg) +{ + enum drgn_supplementary_file_kind kind = + drgn_module_supplementary_debug_file_kind(self->module); + if (kind == DRGN_SUPPLEMENTARY_FILE_NONE) + Py_RETURN_NONE; + return PyObject_CallFunction(SupplementaryFileKind_class, "k", + (unsigned long)kind); +} + +static PyObject *Module_get_supplementary_debug_file_path(Module *self, + void *arg) +{ + const char *path = + drgn_module_supplementary_debug_file_path(self->module); + if (!path) + Py_RETURN_NONE; + return PyUnicode_DecodeFSDefault(path); +} + +static PyMethodDef Module_methods[] = { + {"wants_loaded_file", (PyCFunction)Module_wants_loaded_file, + METH_NOARGS, drgn_Module_wants_loaded_file_DOC}, + {"wants_debug_file", (PyCFunction)Module_wants_debug_file, METH_NOARGS, + drgn_Module_wants_debug_file_DOC}, + {"wanted_supplementary_debug_file", + (PyCFunction)Module_wanted_supplementary_debug_file, METH_NOARGS, + drgn_Module_wanted_supplementary_debug_file_DOC}, + {"try_file", (PyCFunction)Module_try_file, + METH_VARARGS | METH_KEYWORDS, drgn_Module_try_file_DOC}, + {}, +}; + +static PyGetSetDef Module_getset[] = { + {"prog", (getter)Module_get_prog, NULL, drgn_Module_prog_DOC}, + {"name", (getter)Module_get_name, NULL, drgn_Module_name_DOC}, + {"address_ranges", (getter)Module_get_address_ranges, + (setter)Module_set_address_ranges, drgn_Module_address_ranges_DOC}, + {"address_range", (getter)Module_get_address_range, + (setter)Module_set_address_range, drgn_Module_address_range_DOC}, + {"build_id", (getter)Module_get_build_id, (setter)Module_set_build_id, + drgn_Module_build_id_DOC}, + {"object", (getter)Module_get_object, (setter)Module_set_object, + drgn_Module_object_DOC}, + {"loaded_file_status", (getter)Module_get_loaded_file_status, + (setter)Module_set_loaded_file_status, + drgn_Module_loaded_file_status_DOC}, + {"loaded_file_path", (getter)Module_get_loaded_file_path, NULL, + drgn_Module_loaded_file_path_DOC}, + {"loaded_file_bias", (getter)Module_get_loaded_file_bias, NULL, + drgn_Module_loaded_file_bias_DOC}, + {"debug_file_status", (getter)Module_get_debug_file_status, + (setter)Module_set_debug_file_status, + drgn_Module_debug_file_status_DOC}, + {"debug_file_path", (getter)Module_get_debug_file_path, NULL, + drgn_Module_debug_file_path_DOC}, + {"debug_file_bias", (getter)Module_get_debug_file_bias, NULL, + drgn_Module_debug_file_bias_DOC}, + {"supplementary_debug_file_kind", + (getter)Module_get_supplementary_debug_file_kind, NULL, + drgn_Module_supplementary_debug_file_kind_DOC}, + {"supplementary_debug_file_path", + (getter)Module_get_supplementary_debug_file_path, NULL, + drgn_Module_supplementary_debug_file_path_DOC}, + {}, +}; + +PyTypeObject Module_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.Module", + .tp_basicsize = sizeof(Module), + .tp_dealloc = (destructor)Module_dealloc, + .tp_repr = (reprfunc)Module_repr, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + .tp_doc = drgn_Module_DOC, + .tp_traverse = (traverseproc)Module_traverse, + .tp_richcompare = (richcmpfunc)Module_richcompare, + .tp_hash = (hashfunc)Module_hash, + .tp_methods = Module_methods, + .tp_getset = Module_getset, +}; + +PyTypeObject MainModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.MainModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_MainModule_DOC, + .tp_base = &Module_type, +}; + +static PyObject *Module_get_info(Module *self, void *arg) +{ + return PyLong_FromUint64(drgn_module_info(self->module)); +} + +static PyGetSetDef SharedLibraryModule_getset[] = { + {"dynamic_address", (getter)Module_get_info, NULL, + drgn_SharedLibraryModule_dynamic_address_DOC}, + {}, +}; + +PyTypeObject SharedLibraryModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.SharedLibraryModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_SharedLibraryModule_DOC, + .tp_getset = SharedLibraryModule_getset, + .tp_base = &Module_type, +}; + +static PyGetSetDef VdsoModule_getset[] = { + {"dynamic_address", (getter)Module_get_info, NULL, + drgn_VdsoModule_dynamic_address_DOC}, + {}, +}; + +PyTypeObject VdsoModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.VdsoModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_VdsoModule_DOC, + .tp_getset = VdsoModule_getset, + .tp_base = &Module_type, +}; + +static PyObject *RelocatableModule_get_section_addresses(PyObject *self, + void *arg) +{ + return PyObject_CallOneArg(ModuleSectionAddresses_class, self); +} + +static PyGetSetDef RelocatableModule_getset[] = { + {"address", (getter)Module_get_info, NULL, + drgn_RelocatableModule_address_DOC}, + {"section_addresses", RelocatableModule_get_section_addresses, + NULL, drgn_RelocatableModule_section_addresses_DOC}, + {}, +}; + +PyTypeObject RelocatableModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.RelocatableModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_RelocatableModule_DOC, + .tp_getset = RelocatableModule_getset, + .tp_base = &Module_type, +}; + +static PyGetSetDef ExtraModule_getset[] = { + {"id", (getter)Module_get_info, NULL, drgn_ExtraModule_id_DOC}, + {}, +}; + +PyTypeObject ExtraModule_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.ExtraModule", + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = drgn_ExtraModule_DOC, + .tp_getset = ExtraModule_getset, + .tp_base = &Module_type, +}; + +static void ModuleIterator_dealloc(ModuleIterator *self) +{ + PyObject_GC_UnTrack(self); + if (self->it) { + struct drgn_program *prog = + drgn_module_iterator_program(self->it); + Py_DECREF(container_of(prog, Program, prog)); + drgn_module_iterator_destroy(self->it); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int ModuleIterator_traverse(ModuleIterator *self, visitproc visit, + void *arg) +{ + if (self->it) { + struct drgn_program *prog = + drgn_module_iterator_program(self->it); + Py_VISIT(container_of(prog, Program, prog)); + } + return 0; +} + +static PyObject *ModuleIterator_next(ModuleIterator *self) +{ + struct drgn_error *err; + struct drgn_module *module; + err = drgn_module_iterator_next(self->it, &module, NULL); + if (err) + return set_drgn_error(err); + if (!module) + return NULL; + return Module_wrap(module); +} + +static PyObject *ModuleIteratorWithNew_next(ModuleIterator *self) +{ + struct drgn_error *err; + struct drgn_module *module; + bool new; + err = drgn_module_iterator_next(self->it, &module, &new); + if (err) + return set_drgn_error(err); + if (!module) + return NULL; + return Py_BuildValue("NO", Module_wrap(module), + new ? Py_True : Py_False); +} + +PyTypeObject ModuleIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._ModuleIterator", + .tp_basicsize = sizeof(ModuleIterator), + .tp_dealloc = (destructor)ModuleIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)ModuleIterator_traverse, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)ModuleIterator_next, +}; + +PyTypeObject ModuleIteratorWithNew_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._ModuleIteratorWithNew", + .tp_basicsize = sizeof(ModuleIterator), + .tp_dealloc = (destructor)ModuleIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)ModuleIterator_traverse, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)ModuleIteratorWithNew_next, +}; diff --git a/libdrgn/python/module_section_addresses.c b/libdrgn/python/module_section_addresses.c new file mode 100644 index 000000000..0674a0c6a --- /dev/null +++ b/libdrgn/python/module_section_addresses.c @@ -0,0 +1,286 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../cleanup.h" +#include "../util.h" + +PyObject *ModuleSectionAddresses_class; + +static ModuleSectionAddresses *ModuleSectionAddresses_new(PyTypeObject *subtype, + PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"module", NULL}; + Module *module; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O!:_ModuleSectionAddresses", keywords, + &Module_type, &module)) + return NULL; + ModuleSectionAddresses *ret = + (ModuleSectionAddresses *)subtype->tp_alloc(subtype, 0); + if (ret) { + Py_INCREF(Module_prog(module)); + ret->module = module->module; + } + return ret; +} + +static void ModuleSectionAddresses_dealloc(ModuleSectionAddresses *self) +{ + PyObject_GC_UnTrack(self); + if (self->module) { + struct drgn_program *prog = drgn_module_program(self->module); + Py_DECREF(container_of(prog, Program, prog)); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int ModuleSectionAddresses_traverse(ModuleSectionAddresses *self, + visitproc visit, void *arg) +{ + if (self->module) { + struct drgn_program *prog = drgn_module_program(self->module); + Py_VISIT(container_of(prog, Program, prog)); + } + return 0; +} + +static inline void +drgn_module_section_address_iterator_destroyp(struct drgn_module_section_address_iterator **itp) +{ + drgn_module_section_address_iterator_destroy(*itp); +} + +static PyObject *ModuleSectionAddresses_repr(ModuleSectionAddresses *self) +{ + struct drgn_error *err; + + _cleanup_(drgn_module_section_address_iterator_destroyp) + struct drgn_module_section_address_iterator *it = NULL; + err = drgn_module_section_address_iterator_create(self->module, &it); + if (err) + return set_drgn_error(err); + + _cleanup_pydecref_ PyObject *parts = PyList_New(0); + if (!parts) + return NULL; + if (append_string(parts, "ModuleSectionAddresses(")) + return NULL; + bool first = true; + for (;;) { + const char *name; + uint64_t address; + err = drgn_module_section_address_iterator_next(it, &name, + &address); + if (err) + return set_drgn_error(err); + if (!name) + break; + + _cleanup_pydecref_ PyObject *name_obj = + PyUnicode_FromString(name); + if (!name_obj) + return NULL; + if (append_format(parts, "%s%R: ", first ? "{" : ", ", name_obj) + || append_u64_hex(parts, address)) + return NULL; + first = false; + } + if (append_string(parts, first ? ")" : "})")) + return NULL; + return join_strings(parts); +} + +static Py_ssize_t ModuleSectionAddresses_length(ModuleSectionAddresses *self) +{ + size_t ret; + struct drgn_error *err = + drgn_module_num_section_addresses(self->module, &ret); + if (err) { + set_drgn_error(err); + return -1; + } + return ret; +} + +static PyObject *ModuleSectionAddresses_subscript(ModuleSectionAddresses *self, + PyObject *key) +{ + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return NULL; + uint64_t address; + struct drgn_error *err = drgn_module_get_section_address(self->module, + name, + &address); + if (err && err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + PyErr_SetObject(PyExc_KeyError, key); + return NULL; + } else if (err) { + return set_drgn_error(err); + } + return PyLong_FromUint64(address); +} + +static int ModuleSectionAddresses_ass_subscript(ModuleSectionAddresses *self, + PyObject *key, + PyObject *value) +{ + struct drgn_error *err; + if (value) { + if (!PyUnicode_Check(key)) { + PyErr_SetString(PyExc_TypeError, + "section_addresses key must be str"); + return -1; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return -1; + uint64_t address = PyLong_AsUint64(value); + if (address == (uint64_t)-1 && PyErr_Occurred()) + return -1; + err = drgn_module_set_section_address(self->module, name, + address); + } else { + if (!PyUnicode_Check(key)) { + PyErr_SetObject(PyExc_KeyError, key); + return -1; + } + const char *name = PyUnicode_AsUTF8(key); + if (!name) + return -1; + err = drgn_module_delete_section_address(self->module, name); + if (err && err->code == DRGN_ERROR_LOOKUP) { + drgn_error_destroy(err); + PyErr_SetObject(PyExc_KeyError, key); + return -1; + } + } + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static ModuleSectionAddressesIterator * +ModuleSectionAddresses_iter(ModuleSectionAddresses *self) +{ + struct drgn_error *err; + _cleanup_pydecref_ ModuleSectionAddressesIterator *it = + call_tp_alloc(ModuleSectionAddressesIterator); + if (!it) + return NULL; + err = drgn_module_section_address_iterator_create(self->module, + &it->it); + if (err) + return set_drgn_error(err); + struct drgn_program *prog = drgn_module_program(self->module); + Py_INCREF(container_of(prog, Program, prog)); + return_ptr(it); +} + +// We only define the bare minimum for collections.abc.MutableMapping, +// which gives us naive implementations of the remaining methods. We can +// define performance-sensitive ones as needed. +static PyMappingMethods ModuleSectionAddressesMixin_as_mapping = { + .mp_length = (lenfunc)ModuleSectionAddresses_length, + .mp_subscript = (binaryfunc)ModuleSectionAddresses_subscript, + .mp_ass_subscript = (objobjargproc)ModuleSectionAddresses_ass_subscript, +}; + +static PyTypeObject ModuleSectionAddressesMixin_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn.ModuleSectionAddressesMixin", + .tp_dealloc = (destructor)ModuleSectionAddresses_dealloc, + .tp_basicsize = sizeof(ModuleSectionAddresses), + .tp_repr = (reprfunc)ModuleSectionAddresses_repr, + .tp_as_mapping = &ModuleSectionAddressesMixin_as_mapping, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE, + .tp_traverse = (traverseproc)ModuleSectionAddresses_traverse, + .tp_iter = (getiterfunc)ModuleSectionAddresses_iter, + .tp_new = (newfunc)ModuleSectionAddresses_new, +}; + +static void +ModuleSectionAddressesIterator_dealloc(ModuleSectionAddressesIterator *self) +{ + PyObject_GC_UnTrack(self); + if (self->it) { + struct drgn_module *module = + drgn_module_section_address_iterator_module(self->it); + struct drgn_program *prog = drgn_module_program(module); + Py_DECREF(container_of(prog, Program, prog)); + drgn_module_section_address_iterator_destroy(self->it); + } + Py_TYPE(self)->tp_free((PyObject *)self); +} + +static int +ModuleSectionAddressesIterator_traverse(ModuleSectionAddressesIterator *self, + visitproc visit, void *arg) +{ + if (self->it) { + struct drgn_module *module = + drgn_module_section_address_iterator_module(self->it); + struct drgn_program *prog = drgn_module_program(module); + Py_VISIT(container_of(prog, Program, prog)); + } + return 0; +} + +static PyObject * +ModuleSectionAddressesIterator_next(ModuleSectionAddressesIterator *self) +{ + struct drgn_error *err; + const char *name; + err = drgn_module_section_address_iterator_next(self->it, &name, NULL); + if (err) + return set_drgn_error(err); + if (!name) + return NULL; + return PyUnicode_FromString(name); +} + +PyTypeObject ModuleSectionAddressesIterator_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "_drgn._ModuleSectionAddressesIterator", + .tp_basicsize = sizeof(ModuleSectionAddressesIterator), + .tp_dealloc = (destructor)ModuleSectionAddressesIterator_dealloc, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)ModuleSectionAddressesIterator_traverse, + .tp_iter = PyObject_SelfIter, + .tp_iternext = (iternextfunc)ModuleSectionAddressesIterator_next, +}; + +int init_module_section_addresses(void) +{ + if (PyType_Ready(&ModuleSectionAddressesMixin_type)) + return -1; + _cleanup_pydecref_ PyObject *collections_abc = + PyImport_ImportModule("collections.abc"); + if (!collections_abc) + return -1; + _cleanup_pydecref_ PyObject *MutableMapping = + PyObject_GetAttrString(collections_abc, "MutableMapping"); + if (!MutableMapping) + return -1; + // We can't create a direct subclass of MutableMapping from C (see + // https://github.com/python/cpython/issues/103968). Use this multiple + // inheritance trick taken from cpython/Modules/_decimal/_decimal.c + // instead. + ModuleSectionAddresses_class = + PyObject_CallFunction((PyObject *)&PyType_Type, "s(OO){}", + "ModuleSectionAddresses", + &ModuleSectionAddressesMixin_type, + MutableMapping); + if (!ModuleSectionAddresses_class) + return -1; + return 0; +} diff --git a/libdrgn/python/object.c b/libdrgn/python/object.c index b0808dec6..9bd1f80eb 100644 --- a/libdrgn/python/object.c +++ b/libdrgn/python/object.c @@ -112,19 +112,13 @@ static int serialize_compound_value(struct drgn_program *prog, char *buf, return -1; } - _cleanup_pydecref_ PyObject *tmp = PyMapping_Items(value_obj); - if (!tmp) - return -1; - // Since Python 3.7, PyMapping_Items() always returns a list. However, - // before that, it could also return a tuple. - _cleanup_pydecref_ PyObject *items = - PySequence_Fast(tmp, "items must be sequence"); + _cleanup_pydecref_ PyObject *items = PyMapping_Items(value_obj); if (!items) return -1; - Py_ssize_t num_items = PySequence_Fast_GET_SIZE(items); + Py_ssize_t num_items = PyList_GET_SIZE(items); for (Py_ssize_t i = 0; i < num_items; i++) { - PyObject *item = PySequence_Fast_GET_ITEM(items, i); + PyObject *item = PyList_GET_ITEM(items, i); if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) { PyErr_SetString(PyExc_TypeError, "invalid item"); return -1; @@ -365,21 +359,28 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { static char *keywords[] = { - "prog", "type", "value", "address", "bit_offset", - "bit_field_size", NULL, + "prog", "type", "value", "address", "absence_reason", + "bit_offset", "bit_field_size", NULL, }; struct drgn_error *err; Program *prog; PyObject *type_obj = Py_None, *value_obj = Py_None; struct index_arg address = { .allow_none = true, .is_none = true }; + struct enum_arg absence_reason = { + .type = AbsenceReason_class, + // Sentinel value so we can tell when the argument was passed. + .value = ULONG_MAX, + }; struct index_arg bit_offset = { .allow_none = true, .is_none = true }; struct index_arg bit_field_size = { .allow_none = true, .is_none = true }; struct drgn_qualified_type qualified_type; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|OO$O&O&O&:Object", + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|OO$O&O&O&O&:Object", keywords, &Program_type, &prog, &type_obj, &value_obj, index_converter, - &address, index_converter, &bit_offset, - index_converter, &bit_field_size)) + &address, enum_converter, + &absence_reason, index_converter, + &bit_offset, index_converter, + &bit_field_size)) return NULL; if (Program_type_arg(prog, type_obj, true, &qualified_type) == -1) @@ -394,9 +395,17 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, _cleanup_pydecref_ DrgnObject *obj = DrgnObject_alloc(prog); if (!obj) return NULL; - if (!address.is_none && value_obj != Py_None) { - PyErr_SetString(PyExc_ValueError, - "object cannot have address and value"); + if (!address.is_none + + (value_obj != Py_None) + + (absence_reason.value != ULONG_MAX) > 1) { + PyErr_Format(PyExc_ValueError, + "object cannot have %s and %s", + !address.is_none + ? (value_obj != Py_None + && absence_reason.value != ULONG_MAX) + ? "address, value," : "address" : "value", + absence_reason.value != ULONG_MAX + ? "absence reason" : "value"); return NULL; } else if (!address.is_none) { if (!qualified_type.type) { @@ -532,6 +541,9 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, return NULL; } err = drgn_object_set_absent(&obj->obj, qualified_type, + absence_reason.value == ULONG_MAX + ? DRGN_ABSENCE_REASON_OTHER + : absence_reason.value, bit_field_size.uvalue); } if (err) @@ -541,6 +553,7 @@ static DrgnObject *DrgnObject_new(PyTypeObject *subtype, PyObject *args, static void DrgnObject_dealloc(DrgnObject *self) { + PyObject_GC_UnTrack(self); Py_DECREF(DrgnObject_prog(self)); drgn_object_deinit(&self->obj); Py_TYPE(self)->tp_free((PyObject *)self); @@ -575,9 +588,9 @@ static PyObject *DrgnObject_compound_value(struct drgn_object *obj, if (err) return set_drgn_error(err); - err = drgn_object_slice(&member, obj, member_type, - members[i].bit_offset, - member_bit_field_size); + err = drgn_object_fragment(&member, obj, member_type, + members[i].bit_offset, + member_bit_field_size); if (err) return set_drgn_error(err); @@ -622,8 +635,8 @@ static PyObject *DrgnObject_array_value(struct drgn_object *obj, DRGN_OBJECT(element, drgn_object_program(obj)); for (uint64_t i = 0; i < length; i++) { - err = drgn_object_slice(&element, obj, element_type, - i * element_bit_size, 0); + err = drgn_object_fragment(&element, obj, element_type, + i * element_bit_size, 0); if (err) return set_drgn_error(err); @@ -872,6 +885,12 @@ static PyObject *DrgnObject_repr(DrgnObject *self) break; } case DRGN_OBJECT_ABSENT: + if (self->obj.absence_reason != DRGN_ABSENCE_REASON_OTHER) { + if (append_format(parts, ", absence_reason=") < 0 + || append_attr_str(parts, (PyObject *)self, + "absence_reason_") < 0) + return NULL; + } break; default: UNREACHABLE(); @@ -1001,6 +1020,14 @@ static PyObject *DrgnObject_get_absent(DrgnObject *self, void *arg) Py_RETURN_BOOL(self->obj.kind == DRGN_OBJECT_ABSENT); } +static PyObject *DrgnObject_get_absence_reason(DrgnObject *self, void *arg) +{ + if (self->obj.kind != DRGN_OBJECT_ABSENT) + Py_RETURN_NONE; + return PyObject_CallFunction(AbsenceReason_class, "i", + (int)self->obj.absence_reason); +} + static PyObject *DrgnObject_get_address(DrgnObject *self, void *arg) { if (self->obj.kind == DRGN_OBJECT_REFERENCE) @@ -1336,39 +1363,20 @@ static DrgnObject *DrgnObject_member(DrgnObject *self, PyObject *args, static PyObject *DrgnObject_getattro(DrgnObject *self, PyObject *attr_name) { struct drgn_error *err; - PyObject *attr; - const char *name; - DrgnObject *res; - - /* - * In Python 3.7 and newer, _PyObject_GenericGetAttrWithDict() can - * suppress the AttributeError if the attribute isn't found. This makes - * member lookups much more efficient. - */ -#define GETATTR_SUPPRESS (PY_VERSION_HEX >= 0x030700b1) -#if GETATTR_SUPPRESS - attr = _PyObject_GenericGetAttrWithDict((PyObject *)self, attr_name, - NULL, 1); - if (attr || PyErr_Occurred()) - return attr; -#else - PyObject *exc_type, *exc_value, *exc_traceback; - attr = PyObject_GenericGetAttr((PyObject *)self, attr_name); - if (attr || !PyErr_ExceptionMatches(PyExc_AttributeError)) + PyObject *attr = _PyObject_GenericGetAttrWithDict((PyObject *)self, + attr_name, NULL, 1); + if (attr || PyErr_Occurred()) return attr; - PyErr_Fetch(&exc_type, &exc_value, &exc_traceback); -#endif - name = PyUnicode_AsUTF8(attr_name); - if (!name) { - res = NULL; - goto out; - } + const char *name = PyUnicode_AsUTF8(attr_name); + if (!name) + return NULL; - res = DrgnObject_alloc(DrgnObject_prog(self)); + _cleanup_pydecref_ DrgnObject *res = + DrgnObject_alloc(DrgnObject_prog(self)); if (!res) - goto out; + return NULL; if (self->obj.encoding == DRGN_OBJECT_ENCODING_UNSIGNED) { err = drgn_object_member_dereference(&res->obj, &self->obj, @@ -1376,38 +1384,20 @@ static PyObject *DrgnObject_getattro(DrgnObject *self, PyObject *attr_name) } else { err = drgn_object_member(&res->obj, &self->obj, name); } - if (err) { - Py_CLEAR(res); - if (err->code == DRGN_ERROR_TYPE) { - /* - * If the object doesn't have a compound type, raise a - * generic AttributeError (or restore the original one - * if we weren't able to suppress it). - */ -#if GETATTR_SUPPRESS - PyErr_Format(PyExc_AttributeError, - "'%s' object has no attribute '%U'", - Py_TYPE(self)->tp_name, attr_name); -#else - PyErr_Restore(exc_type, exc_value, exc_traceback); -#endif - drgn_error_destroy(err); - return NULL; - } else if (err->code == DRGN_ERROR_LOOKUP) { - PyErr_SetString(PyExc_AttributeError, err->message); - drgn_error_destroy(err); - } else { - set_drgn_error(err); - } + if (drgn_error_catch(&err, DRGN_ERROR_TYPE)) { + // If the object doesn't have a compound type, raise a generic + // AttributeError. + return PyErr_Format(PyExc_AttributeError, + "'%s' object has no attribute '%U'", + Py_TYPE(self)->tp_name, attr_name); + } else if (err && err->code == DRGN_ERROR_LOOKUP) { + PyErr_SetString(PyExc_AttributeError, err->message); + drgn_error_destroy(err); + return NULL; + } else if (err) { + return set_drgn_error(err); } -out: -#if !GETATTR_SUPPRESS - Py_XDECREF(exc_traceback); - Py_XDECREF(exc_value); - Py_DECREF(exc_type); -#endif -#undef GETATTR_SUPPRESS - return (PyObject *)res; + return (PyObject *)no_cleanup_ptr(res); } static Py_ssize_t DrgnObject_length(DrgnObject *self) @@ -1535,6 +1525,8 @@ static PyGetSetDef DrgnObject_getset[] = { {"type_", (getter)DrgnObject_get_type, NULL, drgn_Object_type__DOC}, {"absent_", (getter)DrgnObject_get_absent, NULL, drgn_Object_absent__DOC}, + {"absence_reason_", (getter)DrgnObject_get_absence_reason, NULL, + drgn_Object_absence_reason__DOC}, {"address_", (getter)DrgnObject_get_address, NULL, drgn_Object_address__DOC}, {"bit_offset_", (getter)DrgnObject_get_bit_offset, NULL, @@ -1601,6 +1593,12 @@ static PyMappingMethods DrgnObject_as_mapping = { .mp_subscript = (binaryfunc)DrgnObject_subscript, }; +static int DrgnObject_traverse(DrgnObject *self, visitproc visit, void *arg) +{ + Py_VISIT(DrgnObject_prog(self)); + return 0; +} + PyTypeObject DrgnObject_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn.Object", @@ -1611,7 +1609,8 @@ PyTypeObject DrgnObject_type = { .tp_as_mapping = &DrgnObject_as_mapping, .tp_str = (reprfunc)DrgnObject_str, .tp_getattro = (getattrofunc)DrgnObject_getattro, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)DrgnObject_traverse, .tp_doc = drgn_Object_DOC, .tp_richcompare = DrgnObject_richcompare, .tp_iter = (getiterfunc)DrgnObject_iter, @@ -1695,10 +1694,18 @@ DrgnObject *DrgnObject_container_of(PyObject *self, PyObject *args, static void ObjectIterator_dealloc(ObjectIterator *self) { + PyObject_GC_UnTrack(self); Py_DECREF(self->obj); Py_TYPE(self)->tp_free((PyObject *)self); } +static int ObjectIterator_traverse(ObjectIterator *self, visitproc visit, + void *arg) +{ + Py_VISIT(self->obj); + return 0; +} + static DrgnObject *ObjectIterator_next(ObjectIterator *self) { if (self->index >= self->length) @@ -1722,7 +1729,8 @@ PyTypeObject ObjectIterator_type = { .tp_name = "_drgn._ObjectIterator", .tp_basicsize = sizeof(ObjectIterator), .tp_dealloc = (destructor)ObjectIterator_dealloc, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)ObjectIterator_traverse, .tp_iter = PyObject_SelfIter, .tp_iternext = (iternextfunc)ObjectIterator_next, .tp_methods = ObjectIterator_methods, diff --git a/libdrgn/python/platform.c b/libdrgn/python/platform.c index 0bced53fd..46b4c84a0 100644 --- a/libdrgn/python/platform.c +++ b/libdrgn/python/platform.c @@ -123,6 +123,7 @@ PyTypeObject Platform_type = { .tp_basicsize = sizeof(Platform), .tp_dealloc = (destructor)Platform_dealloc, .tp_repr = (reprfunc)Platform_repr, + // Doesn't reference any objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_Platform_DOC, .tp_richcompare = (richcmpfunc)Platform_richcompare, @@ -173,6 +174,7 @@ PyTypeObject Register_type = { .tp_name = "_drgn.Register", .tp_basicsize = sizeof(Register), .tp_repr = (reprfunc)Register_repr, + // Doesn't reference any objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_Register_DOC, .tp_richcompare = (richcmpfunc)Register_richcompare, diff --git a/libdrgn/python/plugins.c b/libdrgn/python/plugins.c new file mode 100644 index 000000000..2bf2b20a1 --- /dev/null +++ b/libdrgn/python/plugins.c @@ -0,0 +1,32 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "drgnpy.h" +#include "../plugins.h" + +void drgn_call_plugins_prog(const char *name, struct drgn_program *prog) +{ + PyGILState_guard(); + + static PyObject *call_plugins; + if (!call_plugins) { + _cleanup_pydecref_ PyObject *_drgn_util_plugins_module = + PyImport_ImportModule("_drgn_util.plugins"); + if (!_drgn_util_plugins_module) { + PyErr_WriteUnraisable(NULL); + return; + } + call_plugins = PyObject_GetAttrString(_drgn_util_plugins_module, + "call_plugins"); + if (!call_plugins) { + PyErr_WriteUnraisable(NULL); + return; + } + } + + Program *prog_obj = container_of(prog, Program, prog); + _cleanup_pydecref_ PyObject *res = + PyObject_CallFunction(call_plugins, "sO", name, prog_obj); + if (!res) + PyErr_WriteUnraisable(call_plugins); +} diff --git a/libdrgn/python/program.c b/libdrgn/python/program.c index 154b0a022..193022cfd 100644 --- a/libdrgn/python/program.c +++ b/libdrgn/python/program.c @@ -16,6 +16,7 @@ DEFINE_HASH_SET_FUNCTIONS(pyobjectp_set, ptr_key_hash_pair, scalar_key_eq); static PyObject *percent_s; +static PyObject *logging_StreamHandler; static PyObject *logger; static PyObject *logger_log; @@ -40,7 +41,7 @@ static void drgnpy_log_fn(struct drgn_program *prog, void *arg, PyErr_WriteUnraisable(logger_log); } -static int get_log_level(void) +static int get_logging_status(int *log_level_ret, bool *enable_progress_bar_ret) { // We don't use getEffectiveLevel() because that doesn't take // logging.disable() into account. @@ -57,38 +58,122 @@ static int get_log_level(void) if (ret) break; } - return level; + + *log_level_ret = level; + + if (level > DRGN_LOG_WARNING || !isatty(STDERR_FILENO)) { + *enable_progress_bar_ret = false; + return 0; + } + + PyObject *current_logger = logger; + _cleanup_pydecref_ PyObject *logger_to_decref = NULL; + do { + _cleanup_pydecref_ PyObject *handlers = + PyObject_GetAttrString(current_logger, "handlers"); + if (!handlers) + return -1; + + Py_ssize_t size = PySequence_Size(handlers); + if (size < 0) + return -1; + + for (Py_ssize_t i = 0; i < size; i++) { + _cleanup_pydecref_ PyObject *handler = + PySequence_GetItem(handlers, i); + if (!handler) + return -1; + + int r = PyObject_IsInstance(handler, + logging_StreamHandler); + if (r < 0) + return -1; + if (!r) + continue; + + _cleanup_pydecref_ PyObject *stream = + PyObject_GetAttrString(handler, "stream"); + if (!stream) + return -1; + + _cleanup_pydecref_ PyObject *fd_obj = + PyObject_CallMethod(stream, "fileno", NULL); + if (!fd_obj) { + // Ignore AttributeError, + // io.UnsupportedOperation, etc. + if (PyErr_ExceptionMatches(PyExc_Exception)) { + PyErr_Clear(); + continue; + } else { + return -1; + } + } + + long fd = PyLong_AsLong(fd_obj); + if (fd == -1 && PyErr_Occurred()) + return -1; + + if (fd == STDERR_FILENO) { + *enable_progress_bar_ret = true; + return 0; + } + } + + _cleanup_pydecref_ PyObject *propagate = + PyObject_GetAttrString(current_logger, "propagate"); + if (!propagate) + return -1; + int ret = PyObject_IsTrue(propagate); + if (ret < 0) + return -1; + if (!ret) + break; + + Py_XDECREF(logger_to_decref); + logger_to_decref = PyObject_GetAttrString(current_logger, + "parent"); + if (!logger_to_decref) + return -1; + current_logger = logger_to_decref; + } while (current_logger != Py_None); + + *enable_progress_bar_ret = false; + return 0; } -// This is slightly heinous. We need to sync the Python log level with the -// libdrgn log level, but the Python log level can change at any time, and there -// is no API to be notified of this. So, we monkey patch logger._cache.clear() -// to update the log level on every live program. This only works since CPython -// commit 78c18a9b9a14 ("bpo-30962: Added caching to Logger.isEnabledFor() -// (GH-2752)") (in v3.7), though. Before that, the best we can do is sync the -// level at the time that the program is created. -#if PY_VERSION_HEX >= 0x030700a1 +// This is slightly heinous. We need to sync the Python logging configuration +// with libdrgn, but the Python log level and handlers can change at any time, +// and there are no APIs to be notified of this. +// +// To sync the log level, we monkey patch logger._cache.clear() to update the +// libdrgn log level on every live program. +// +// We also check handlers in that monkey patch, which isn't the right place to +// hook but should work in practice in most cases. static int cached_log_level; +static bool cached_enable_progress_bar; static struct pyobjectp_set programs = HASH_TABLE_INIT; -static int cache_log_level(void) +static int cache_logging_status(void) { - int level = get_log_level(); - if (level < 0) - return level; - cached_log_level = level; - return 0; + return get_logging_status(&cached_log_level, + &cached_enable_progress_bar); } static PyObject *LoggerCacheWrapper_clear(PyObject *self) { PyDict_Clear(self); - if (cache_log_level()) - return NULL; - for (struct pyobjectp_set_iterator it = pyobjectp_set_first(&programs); - it.entry; it = pyobjectp_set_next(it)) { - Program *prog = (Program *)*it.entry; - drgn_program_set_log_level(&prog->prog, cached_log_level); + if (!pyobjectp_set_empty(&programs)) { + if (cache_logging_status()) + return NULL; + hash_table_for_each(pyobjectp_set, it, &programs) { + Program *prog = (Program *)*it.entry; + drgn_program_set_log_level(&prog->prog, + cached_log_level); + drgn_program_set_progress_file(&prog->prog, + cached_enable_progress_bar + ? stderr : NULL); + } } Py_RETURN_NONE; } @@ -114,19 +199,23 @@ static int init_logger_cache_wrapper(void) NULL); if (!cache_wrapper) return -1; - if (PyObject_SetAttrString(logger, "_cache", cache_wrapper)) - return -1; - - return cache_log_level(); + return PyObject_SetAttrString(logger, "_cache", cache_wrapper); } static int Program_init_logging(Program *prog) { + // The cache is only maintained while there are live programs, so if + // this is the only program, we need to update the cache. + if (pyobjectp_set_empty(&programs) && cache_logging_status()) + return -1; + PyObject *obj = (PyObject *)prog; if (pyobjectp_set_insert(&programs, &obj, NULL) < 0) return -1; drgn_program_set_log_callback(&prog->prog, drgnpy_log_fn, NULL); drgn_program_set_log_level(&prog->prog, cached_log_level); + drgn_program_set_progress_file(&prog->prog, + cached_enable_progress_bar ? stderr : NULL); return 0; } @@ -135,21 +224,6 @@ static void Program_deinit_logging(Program *prog) PyObject *obj = (PyObject *)prog; pyobjectp_set_delete(&programs, &obj); } -#else -static int init_logger_cache_wrapper(void) { return 0; } - -static int Program_init_logging(Program *prog) -{ - int level = get_log_level(); - if (level < 0) - return level; - drgn_program_set_log_callback(&prog->prog, drgnpy_log_fn, NULL); - drgn_program_set_log_level(&prog->prog, level); - return 0; -} - -static void Program_deinit_logging(Program *prog) {} -#endif int init_logging(void) { @@ -160,6 +234,10 @@ int init_logging(void) _cleanup_pydecref_ PyObject *logging = PyImport_ImportModule("logging"); if (!logging) return -1; + logging_StreamHandler = PyObject_GetAttrString(logging, + "StreamHandler"); + if (!logging_StreamHandler) + return -1; logger = PyObject_CallMethod(logging, "getLogger", "s", "drgn"); if (!logger) return -1; @@ -229,7 +307,7 @@ int Program_type_arg(Program *prog, PyObject *type_obj, bool can_be_none, return 0; } -static void *drgnpy_begin_blocking(struct drgn_program *prog, void *arg) +void *drgn_begin_blocking(void) { PyThreadState *state = PyThreadState_GetUnchecked(); if (state) @@ -237,12 +315,29 @@ static void *drgnpy_begin_blocking(struct drgn_program *prog, void *arg) return state; } -static void drgnpy_end_blocking(struct drgn_program *prog, void *arg, void *state) +void drgn_end_blocking(void *state) { if (state) PyEval_RestoreThread(state); } +static Program *Program_new_impl(const struct drgn_platform *platform) +{ + _cleanup_pydecref_ PyObject *cache = PyDict_New(); + if (!cache) + return NULL; + + _cleanup_pydecref_ Program *prog = call_tp_alloc(Program); + if (!prog) + return NULL; + prog->cache = no_cleanup_ptr(cache); + pyobjectp_set_init(&prog->objects); + drgn_program_init(&prog->prog, platform); + if (Program_init_logging(prog)) + return NULL; + return_ptr(prog); +} + static Program *Program_new(PyTypeObject *subtype, PyObject *args, PyObject *kwds) { @@ -265,37 +360,47 @@ static Program *Program_new(PyTypeObject *subtype, PyObject *args, "platform must be Platform or None"); return NULL; } - - _cleanup_pydecref_ PyObject *cache = PyDict_New(); - if (!cache) - return NULL; - - _cleanup_pydecref_ Program *prog = call_tp_alloc(Program); + _cleanup_pydecref_ Program *prog = Program_new_impl(platform); if (!prog) return NULL; - prog->cache = no_cleanup_ptr(cache); - pyobjectp_set_init(&prog->objects); - drgn_program_init(&prog->prog, platform); - drgn_program_set_blocking_callback(&prog->prog, drgnpy_begin_blocking, - drgnpy_end_blocking, NULL); if (vmcoreinfo) { struct drgn_error *err = drgn_program_parse_vmcoreinfo( &prog->prog, vmcoreinfo, vmcoreinfo_size); if (err) return set_drgn_error(err); } - if (Program_init_logging(prog)) - return NULL; return_ptr(prog); } +LIBDRGN_PUBLIC struct drgn_error * +drgn_program_create(const struct drgn_platform *platform, + struct drgn_program **ret) +{ + bool success; + drgn_initialize_python_guard(&success); + if (!success) + return drgn_error_from_python(); + Program *prog = Program_new_impl(platform); + if (!prog) + return drgn_error_from_python(); + *ret = &prog->prog; + return NULL; +} + +LIBDRGN_PUBLIC void drgn_program_destroy(struct drgn_program *prog) +{ + if (prog) { + PyGILState_guard(); + Py_DECREF(container_of(prog, Program, prog)); + } +} + static void Program_dealloc(Program *self) { + PyObject_GC_UnTrack(self); Program_deinit_logging(self); drgn_program_deinit(&self->prog); - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&self->objects); it.entry; - it = pyobjectp_set_next(it)) + hash_table_for_each(pyobjectp_set, it, &self->objects) Py_DECREF(*it.entry); pyobjectp_set_deinit(&self->objects); Py_XDECREF(self->cache); @@ -304,9 +409,7 @@ static void Program_dealloc(Program *self) static int Program_traverse(Program *self, visitproc visit, void *arg) { - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&self->objects); it.entry; - it = pyobjectp_set_next(it)) + hash_table_for_each(pyobjectp_set, it, &self->objects) Py_VISIT(*it.entry); Py_VISIT(self->cache); return 0; @@ -314,9 +417,7 @@ static int Program_traverse(Program *self, visitproc visit, void *arg) static int Program_clear(Program *self) { - for (struct pyobjectp_set_iterator it = - pyobjectp_set_first(&self->objects); it.entry; - it = pyobjectp_set_next(it)) + hash_table_for_each(pyobjectp_set, it, &self->objects) Py_DECREF(*it.entry); pyobjectp_set_deinit(&self->objects); pyobjectp_set_init(&self->objects); @@ -390,6 +491,28 @@ static PyObject *Program_add_memory_segment(Program *self, PyObject *args, Py_RETURN_NONE; } +static struct drgn_error * +py_debug_info_find_fn(struct drgn_module * const *modules, size_t num_modules, + void *arg) +{ + PyGILState_guard(); + + _cleanup_pydecref_ PyObject *modules_list = PyList_New(num_modules); + if (!modules_list) + return drgn_error_from_python(); + for (size_t i = 0; i < num_modules; i++) { + PyObject *module_obj = Module_wrap(modules[i]); + if (!module_obj) + return drgn_error_from_python(); + PyList_SET_ITEM(modules_list, i, module_obj); + } + _cleanup_pydecref_ PyObject *obj = + PyObject_CallOneArg(arg, modules_list); + if (!obj) + return drgn_error_from_python(); + return NULL; +} + static inline struct drgn_error * py_type_find_fn_common(PyObject *type_obj, void *arg, struct drgn_qualified_type *ret) @@ -580,6 +703,7 @@ py_symbol_find_fn(const char *name, uint64_t addr, return NULL; } +#define debug_info_finder_arg(self, fn) PyObject *arg = fn; #define type_finder_arg(self, fn) \ _cleanup_pydecref_ PyObject *arg = Py_BuildValue("OO", self, fn); \ if (!arg) \ @@ -727,6 +851,7 @@ static PyObject *Program_enabled_##which##_finders(Program *self) \ return_ptr(res); \ } +DEFINE_PROGRAM_FINDER_METHODS(debug_info) DEFINE_PROGRAM_FINDER_METHODS(type) DEFINE_PROGRAM_FINDER_METHODS(object) DEFINE_PROGRAM_FINDER_METHODS(symbol) @@ -866,75 +991,316 @@ static PyObject *Program_set_pid(Program *self, PyObject *args, PyObject *kwds) Py_RETURN_NONE; } -DEFINE_VECTOR(path_arg_vector, struct path_arg); +static ModuleIterator *Program_modules(Program *self) +{ + struct drgn_error *err; + ModuleIterator *it = call_tp_alloc(ModuleIterator); + if (!it) + return NULL; + err = drgn_created_module_iterator_create(&self->prog, &it->it); + if (err) { + it->it = NULL; + Py_DECREF(it); + return set_drgn_error(err); + } + Py_INCREF(self); + return it; +} + +static ModuleIterator *Program_loaded_modules(Program *self) +{ + struct drgn_error *err; + ModuleIterator *it = + (ModuleIterator *)ModuleIteratorWithNew_type.tp_alloc( + &ModuleIteratorWithNew_type, 0); + if (!it) + return NULL; + err = drgn_loaded_module_iterator_create(&self->prog, &it->it); + if (err) { + it->it = NULL; + Py_DECREF(it); + return set_drgn_error(err); + } + Py_INCREF(self); + return it; +} + +static PyObject *Program_create_loaded_modules(Program *self) +{ + struct drgn_error *err = drgn_create_loaded_modules(&self->prog); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + +static inline PyObject *Module_wrap_find(struct drgn_module *module) +{ + if (module) + return Module_wrap(module); + PyErr_SetString(PyExc_LookupError, "module not found"); + return NULL; +} -static void path_arg_vector_cleanup(struct path_arg_vector *path_args) +static PyObject *Program_main_module(Program *self, PyObject *args, + PyObject *kwds) { - vector_for_each(path_arg_vector, path_arg, path_args) - path_cleanup(path_arg); - path_arg_vector_deinit(path_args); + struct drgn_error *err; + static char *keywords[] = {"name", "create", NULL}; + PATH_ARG(name); + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&$p:main_module", + keywords, path_converter, &name, + &create)) + return NULL; + + if (create) { + if (!name.path) { + PyErr_SetString(PyExc_TypeError, + "name must be given if create=True"); + return NULL; + } + struct drgn_module *module; + err = drgn_module_find_or_create_main(&self->prog, name.path, + &module, NULL); + if (err) { + set_drgn_error(err); + return NULL; + } + return Module_wrap(module); + } else { + return Module_wrap_find(drgn_module_find_main(&self->prog, + name.path)); + } } -static PyObject *Program_load_debug_info(Program *self, PyObject *args, - PyObject *kwds) +static PyObject *Program_shared_library_module(Program *self, PyObject *args, + PyObject *kwds) { - static char *keywords[] = {"paths", "default", "main", NULL}; struct drgn_error *err; - PyObject *paths_obj = Py_None; - int load_default = 0; - int load_main = 0; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Opp:load_debug_info", - keywords, &paths_obj, &load_default, - &load_main)) + static char *keywords[] = {"name", "dynamic_address", "create", NULL}; + PATH_ARG(name); + struct index_arg dynamic_address = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O&|$p:shared_library_module", + keywords, path_converter, &name, + index_converter, &dynamic_address, + &create)) return NULL; - _cleanup_(path_arg_vector_cleanup) - struct path_arg_vector path_args = VECTOR_INIT; - _cleanup_free_ const char **paths = NULL; - if (paths_obj != Py_None) { - _cleanup_pydecref_ PyObject *it = PyObject_GetIter(paths_obj); - if (!it) + if (create) { + struct drgn_module *module; + err = drgn_module_find_or_create_shared_library(&self->prog, + name.path, + dynamic_address.uvalue, + &module, NULL); + if (err) { + set_drgn_error(err); return NULL; + } + return Module_wrap(module); + } else { + return Module_wrap_find(drgn_module_find_shared_library(&self->prog, + name.path, + dynamic_address.uvalue)); + } +} + +static PyObject *Program_vdso_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "dynamic_address", "create", NULL}; + PATH_ARG(name); + struct index_arg dynamic_address = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|$p:vdso_module", + keywords, path_converter, &name, + index_converter, &dynamic_address, + &create)) + return NULL; - Py_ssize_t length_hint = PyObject_LengthHint(paths_obj, 1); - if (length_hint == -1) + if (create) { + struct drgn_module *module; + err = drgn_module_find_or_create_vdso(&self->prog, name.path, + dynamic_address.uvalue, + &module, NULL); + if (err) { + set_drgn_error(err); return NULL; - if (!path_arg_vector_reserve(&path_args, length_hint)) { - PyErr_NoMemory(); + } + return Module_wrap(module); + } else { + return Module_wrap_find(drgn_module_find_vdso(&self->prog, + name.path, + dynamic_address.uvalue)); + } +} + +static PyObject *Program_relocatable_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "address", "create", NULL}; + PATH_ARG(name); + struct index_arg address = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O&O&|$p:relocatable_module", keywords, + path_converter, &name, index_converter, + &address, &create)) + return NULL; + + if (create) { + struct drgn_module *module; + err = drgn_module_find_or_create_relocatable(&self->prog, + name.path, + address.uvalue, + &module, NULL); + if (err) { + set_drgn_error(err); return NULL; } + return Module_wrap(module); + } else { + return Module_wrap_find(drgn_module_find_relocatable(&self->prog, + name.path, + address.uvalue)); + } +} - for (;;) { - _cleanup_pydecref_ PyObject *item = PyIter_Next(it); - if (!item) - break; +static PyObject *Program_linux_kernel_loadable_module(Program *self, + PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"module_obj", "create", NULL}; + DrgnObject *module_obj; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O!|$p:linux_kernel_loadable_module", + keywords, &DrgnObject_type, + &module_obj, &create)) + return NULL; - struct path_arg *path_arg = - path_arg_vector_append_entry(&path_args); - if (!path_arg) { - PyErr_NoMemory(); - return NULL; - } - memset(path_arg, 0, sizeof(*path_arg)); - if (!path_converter(item, path_arg)) { - path_arg_vector_pop(&path_args); - return NULL; - } + if (DrgnObject_prog(module_obj) != self) { + PyErr_SetString(PyExc_ValueError, + "object is from different program"); + return NULL; + } + + struct drgn_module *module; + if (create) { + err = drgn_module_find_or_create_linux_kernel_loadable(&module_obj->obj, + &module, + NULL); + if (err) { + set_drgn_error(err); + return NULL; } - if (PyErr_Occurred()) + return Module_wrap(module); + } else { + err = drgn_module_find_linux_kernel_loadable(&module_obj->obj, + &module); + if (err) { + set_drgn_error(err); return NULL; + } + return Module_wrap_find(module); + } +} + +static PyObject *Program_extra_module(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"name", "id", "create", NULL}; + PATH_ARG(name); + struct index_arg id = {}; + int create = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|O&$p:extra_module", + keywords, path_converter, &name, + index_converter, &id, &create)) + return NULL; - paths = malloc_array(path_arg_vector_size(&path_args), - sizeof(*paths)); - if (!paths) { - PyErr_NoMemory(); + if (create) { + struct drgn_module *module; + err = drgn_module_find_or_create_extra(&self->prog, name.path, + id.uvalue, &module, + NULL); + if (err) { + set_drgn_error(err); return NULL; } - for (size_t i = 0; i < path_arg_vector_size(&path_args); i++) - paths[i] = path_arg_vector_at(&path_args, i)->path; + return Module_wrap(module); + } else { + return Module_wrap_find(drgn_module_find_extra(&self->prog, + name.path, + id.uvalue)); } - err = drgn_program_load_debug_info(&self->prog, paths, - path_arg_vector_size(&path_args), +} + +static PyObject *Program_module(Program *self, PyObject *arg) +{ + struct drgn_module *module; + if (PyUnicode_Check(arg)) { + const char *name = PyUnicode_AsUTF8(arg); + if (!name) + return NULL; + module = drgn_module_find_by_name(&self->prog, name); + } else { + struct index_arg address = {}; + if (!index_converter(arg, &address)) + return NULL; + module = drgn_module_find_by_address(&self->prog, + address.uvalue); + } + return Module_wrap_find(module); +} + +static DebugInfoOptions *Program_get_debug_info_options(Program *self, void *arg) +{ + DebugInfoOptions *options = call_tp_alloc(DebugInfoOptions); + if (options) { + options->options = drgn_program_debug_info_options(&self->prog); + options->prog = self; + Py_INCREF(self); + } + return options; +} + +static int Program_set_debug_info_options(Program *self, PyObject *value, void *arg) +{ + SETTER_NO_DELETE("debug_info_options", value); + if (!PyObject_TypeCheck(value, &DebugInfoOptions_type)) { + PyErr_SetString(PyExc_TypeError, + "debug_info_options must be DebugInfoOptions"); + return -1; + } + struct drgn_error *err = + drgn_debug_info_options_copy(drgn_program_debug_info_options(&self->prog), + ((DebugInfoOptions *)value)->options); + if (err) { + set_drgn_error(err); + return -1; + } + return 0; +} + +static PyObject *Program_load_debug_info(Program *self, PyObject *args, + PyObject *kwds) +{ + static char *keywords[] = {"paths", "default", "main", NULL}; + struct drgn_error *err; + PATH_SEQUENCE_ARG(paths, .allow_none = true); + int load_default = 0; + int load_main = 0; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O&pp:load_debug_info", + keywords, path_sequence_converter, + &paths, &load_default, &load_main)) + return NULL; + err = drgn_program_load_debug_info(&self->prog, paths.paths, + path_sequence_size(&paths), load_default, load_main); if (err) { set_drgn_error(err); @@ -953,6 +1319,105 @@ static PyObject *Program_load_default_debug_info(Program *self) Py_RETURN_NONE; } +static PyObject *Program_load_module_debug_info(Program *self, PyObject *args) +{ + size_t num_modules = PyTuple_GET_SIZE(args); + _cleanup_free_ struct drgn_module **modules = + malloc_array(num_modules, sizeof(*modules)); + if (!modules) { + PyErr_NoMemory(); + return NULL; + } + + for (size_t i = 0; i < num_modules; i++) { + PyObject *item = PyTuple_GET_ITEM(args, i); + if (!PyObject_TypeCheck(item, &Module_type)) { + return PyErr_Format(PyExc_TypeError, + "expected Module, not %s", + Py_TYPE(item)->tp_name); + } + modules[i] = ((Module *)item)->module; + if (modules[i]->prog != &self->prog) { + PyErr_SetString(PyExc_ValueError, + "module from wrong program"); + return NULL; + } + } + + struct drgn_error *err = + drgn_load_module_debug_info(modules, &num_modules); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + +DEFINE_VECTOR(drgn_module_vector, struct drgn_module *); + +static PyObject *Program_find_standard_debug_info(Program *self, PyObject *args, + PyObject *kwds) +{ + struct drgn_error *err; + static char *keywords[] = {"modules", "options", NULL}; + PyObject *modules_obj; + PyObject *options_obj = Py_None; + if (!PyArg_ParseTupleAndKeywords(args, kwds, + "O|O:find_standard_debug_info", + keywords, &modules_obj, &options_obj)) + return NULL; + + _cleanup_pydecref_ PyObject *it = PyObject_GetIter(modules_obj); + if (!it) + return NULL; + + Py_ssize_t length_hint = PyObject_LengthHint(modules_obj, 1); + if (length_hint == -1) + return 0; + + VECTOR(drgn_module_vector, modules); + if (!drgn_module_vector_reserve(&modules, length_hint)) + return PyErr_NoMemory(); + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(it); + if (!item) + break; + + if (!PyObject_TypeCheck(item, &Module_type)) { + return PyErr_Format(PyExc_TypeError, + "expected Module, not %s", + Py_TYPE(item)->tp_name); + } + struct drgn_module *module = ((Module *)item)->module; + if (module->prog != &self->prog) { + PyErr_SetString(PyExc_ValueError, + "module from wrong program"); + return NULL; + } + if (!drgn_module_vector_append(&modules, &module)) + return PyErr_NoMemory(); + } + if (PyErr_Occurred()) + return NULL; + + struct drgn_debug_info_options *options; + if (options_obj == Py_None) { + options = NULL; + } else if (PyObject_TypeCheck(options_obj, &DebugInfoOptions_type)) { + options = ((DebugInfoOptions *)options_obj)->options; + } else { + PyErr_SetString(PyExc_TypeError, + "options must be DebugInfoOptions or None"); + return NULL; + } + + err = drgn_find_standard_debug_info(drgn_module_vector_begin(&modules), + drgn_module_vector_size(&modules), + options); + if (err) + return set_drgn_error(err); + Py_RETURN_NONE; +} + static PyObject *Program_read(Program *self, PyObject *args, PyObject *kwds) { static char *keywords[] = {"address", "size", "physical", NULL}; @@ -1433,6 +1898,7 @@ static PyObject *Program_get_language(Program *self, void *arg) static int Program_set_language(Program *self, PyObject *value, void *arg) { + SETTER_NO_DELETE("language", value); if (!PyObject_TypeCheck(value, &Language_type)) { PyErr_SetString(PyExc_TypeError, "language must be Language"); return -1; @@ -1460,6 +1926,7 @@ static int Program_set_language(Program *self, PyObject *value, void *arg) static PyMethodDef Program_methods[] = { {"add_memory_segment", (PyCFunction)Program_add_memory_segment, METH_VARARGS | METH_KEYWORDS, drgn_Program_add_memory_segment_DOC}, + PROGRAM_FINDER_METHOD_DEFS(debug_info), PROGRAM_FINDER_METHOD_DEFS(type), PROGRAM_FINDER_METHOD_DEFS(object), PROGRAM_FINDER_METHOD_DEFS(symbol), @@ -1473,11 +1940,39 @@ static PyMethodDef Program_methods[] = { drgn_Program_set_kernel_DOC}, {"set_pid", (PyCFunction)Program_set_pid, METH_VARARGS | METH_KEYWORDS, drgn_Program_set_pid_DOC}, + {"modules", (PyCFunction)Program_modules, METH_NOARGS, + drgn_Program_modules_DOC}, + {"loaded_modules", (PyCFunction)Program_loaded_modules, METH_NOARGS, + drgn_Program_loaded_modules_DOC}, + {"create_loaded_modules", (PyCFunction)Program_create_loaded_modules, + METH_NOARGS, drgn_Program_create_loaded_modules_DOC}, + {"main_module", (PyCFunction)Program_main_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_main_module_DOC}, + {"shared_library_module", (PyCFunction)Program_shared_library_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_shared_library_module_DOC}, + {"vdso_module", (PyCFunction)Program_vdso_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_vdso_module_DOC}, + {"relocatable_module", (PyCFunction)Program_relocatable_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_relocatable_module_DOC}, + {"linux_kernel_loadable_module", + (PyCFunction)Program_linux_kernel_loadable_module, + METH_VARARGS | METH_KEYWORDS, + drgn_Program_linux_kernel_loadable_module_DOC}, + {"extra_module", (PyCFunction)Program_extra_module, + METH_VARARGS | METH_KEYWORDS, drgn_Program_extra_module_DOC}, + {"module", (PyCFunction)Program_module, METH_O, + drgn_Program_module_DOC}, {"load_debug_info", (PyCFunction)Program_load_debug_info, METH_VARARGS | METH_KEYWORDS, drgn_Program_load_debug_info_DOC}, {"load_default_debug_info", (PyCFunction)Program_load_default_debug_info, METH_NOARGS, drgn_Program_load_default_debug_info_DOC}, + {"load_module_debug_info", (PyCFunction)Program_load_module_debug_info, + METH_VARARGS, drgn_Program_load_module_debug_info_DOC}, + {"find_standard_debug_info", + (PyCFunction)Program_find_standard_debug_info, + METH_VARARGS | METH_KEYWORDS, + drgn_Program_find_standard_debug_info_DOC}, {"__getitem__", (PyCFunction)Program_subscript, METH_O | METH_COEXIST, drgn_Program___getitem___DOC}, {"__contains__", (PyCFunction)Program_contains, METH_O | METH_COEXIST, @@ -1559,6 +2054,9 @@ static PyGetSetDef Program_getset[] = { drgn_Program_platform_DOC}, {"language", (getter)Program_get_language, (setter)Program_set_language, drgn_Program_language_DOC}, + {"debug_info_options", (getter)Program_get_debug_info_options, + (setter)Program_set_debug_info_options, + drgn_Program_debug_info_options_DOC}, {}, }; diff --git a/libdrgn/python/stack_trace.c b/libdrgn/python/stack_trace.c index 1112a8092..96ab16985 100644 --- a/libdrgn/python/stack_trace.c +++ b/libdrgn/python/stack_trace.c @@ -16,12 +16,22 @@ PyObject *StackTrace_wrap(struct drgn_stack_trace *trace) { static void StackTrace_dealloc(StackTrace *self) { - struct drgn_program *prog = self->trace->prog; - drgn_stack_trace_destroy(self->trace); - Py_XDECREF(container_of(prog, Program, prog)); + PyObject_GC_UnTrack(self); + if (self->trace) { + struct drgn_program *prog = self->trace->prog; + drgn_stack_trace_destroy(self->trace); + Py_DECREF(container_of(prog, Program, prog)); + } Py_TYPE(self)->tp_free((PyObject *)self); } +static int StackTrace_traverse(StackTrace *self, visitproc visit, void *arg) +{ + if (self->trace) + Py_VISIT(container_of(self->trace->prog, Program, prog)); + return 0; +} + static Program *StackTrace_get_prog(StackTrace *self, void *arg) { Program *prog = container_of(drgn_stack_trace_program(self->trace), @@ -84,18 +94,26 @@ PyTypeObject StackTrace_type = { .tp_dealloc = (destructor)StackTrace_dealloc, .tp_as_sequence = &StackTrace_as_sequence, .tp_str = (reprfunc)StackTrace_str, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .tp_doc = drgn_StackTrace_DOC, + .tp_traverse = (traverseproc)StackTrace_traverse, .tp_methods = StackTrace_methods, .tp_getset = StackTrace_getset, }; static void StackFrame_dealloc(StackFrame *self) { + PyObject_GC_UnTrack(self); Py_XDECREF(self->trace); Py_TYPE(self)->tp_free((PyObject *)self); } +static int StackFrame_traverse(StackFrame *self, visitproc visit, void *arg) +{ + Py_VISIT(self->trace); + return 0; +} + static PyObject *StackFrame_str(StackFrame *self) { struct drgn_error *err; @@ -272,9 +290,20 @@ static PyObject *StackFrame_registers(StackFrame *self) static PyObject *StackFrame_get_name(StackFrame *self, void *arg) { - const char *name = drgn_stack_frame_name(self->trace->trace, self->i); - if (name) - return PyUnicode_FromString(name); + _cleanup_free_ char *name = NULL; + struct drgn_error *err = drgn_stack_frame_name(self->trace->trace, + self->i, &name); + if (err) + return set_drgn_error(err); + return PyUnicode_FromString(name); +} + +static PyObject *StackFrame_get_function_name(StackFrame *self, void *arg) +{ + const char *function_name = + drgn_stack_frame_function_name(self->trace->trace, self->i); + if (function_name) + return PyUnicode_FromString(function_name); else Py_RETURN_NONE; } @@ -336,6 +365,8 @@ static PyMethodDef StackFrame_methods[] = { static PyGetSetDef StackFrame_getset[] = { {"name", (getter)StackFrame_get_name, NULL, drgn_StackFrame_name_DOC}, + {"function_name", (getter)StackFrame_get_function_name, NULL, + drgn_StackFrame_function_name_DOC}, {"is_inline", (getter)StackFrame_get_is_inline, NULL, drgn_StackFrame_is_inline_DOC}, {"interrupted", (getter)StackFrame_get_interrupted, NULL, @@ -361,8 +392,9 @@ PyTypeObject StackFrame_type = { .tp_as_sequence = &StackFrame_as_sequence, .tp_as_mapping = &StackFrame_as_mapping, .tp_str = (reprfunc)StackFrame_str, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .tp_doc = drgn_StackFrame_DOC, + .tp_traverse = (traverseproc)StackFrame_traverse, .tp_methods = StackFrame_methods, .tp_getset = StackFrame_getset, }; diff --git a/libdrgn/python/symbol.c b/libdrgn/python/symbol.c index d0e84e1bf..dd7a8264c 100644 --- a/libdrgn/python/symbol.c +++ b/libdrgn/python/symbol.c @@ -156,6 +156,7 @@ PyTypeObject Symbol_type = { .tp_basicsize = sizeof(Symbol), .tp_dealloc = (destructor)Symbol_dealloc, .tp_repr = (reprfunc)Symbol_repr, + // Only references str objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_Symbol_DOC, .tp_richcompare = (richcmpfunc)Symbol_richcompare, diff --git a/libdrgn/python/symbol_index.c b/libdrgn/python/symbol_index.c index d19467352..f91b8691c 100644 --- a/libdrgn/python/symbol_index.c +++ b/libdrgn/python/symbol_index.c @@ -115,6 +115,7 @@ PyTypeObject SymbolIndex_type = { .tp_name = "_drgn.SymbolIndex", .tp_basicsize = sizeof(SymbolIndex), .tp_dealloc = (destructor)SymbolIndex_dealloc, + // Doesn't reference any objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_SymbolIndex_DOC, .tp_call = (ternaryfunc)SymbolIndex_call, diff --git a/libdrgn/python/thread.c b/libdrgn/python/thread.c index 6de0f0013..21c1b6b6a 100644 --- a/libdrgn/python/thread.c +++ b/libdrgn/python/thread.c @@ -27,6 +27,7 @@ PyObject *Thread_wrap(struct drgn_thread *thread) static void Thread_dealloc(Thread *self) { + PyObject_GC_UnTrack(self); if (self->thread.prog) { Program *prog = Thread_prog(self); drgn_thread_deinit(&self->thread); @@ -35,6 +36,13 @@ static void Thread_dealloc(Thread *self) Py_TYPE(self)->tp_free((PyObject *)self); } +static int Thread_traverse(Thread *self, visitproc visit, void *arg) +{ + if (self->thread.prog) + Py_VISIT(Thread_prog(self)); + return 0; +} + static PyObject *Thread_get_tid(Thread *self) { return PyLong_FromUint32(self->thread.tid); @@ -99,19 +107,28 @@ PyTypeObject Thread_type = { .tp_name = "_drgn.Thread", .tp_basicsize = sizeof(Thread), .tp_dealloc = (destructor)Thread_dealloc, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, .tp_doc = drgn_Thread_DOC, + .tp_traverse = (traverseproc)Thread_traverse, .tp_getset = Thread_getset, .tp_methods = Thread_methods, }; static void ThreadIterator_dealloc(ThreadIterator *self) { + PyObject_GC_UnTrack(self); drgn_thread_iterator_destroy(self->iterator); Py_XDECREF(self->prog); Py_TYPE(self)->tp_free((PyObject *)self); } +static int ThreadIterator_traverse(ThreadIterator *self, visitproc visit, + void *arg) +{ + Py_VISIT(self->prog); + return 0; +} + static PyObject *ThreadIterator_next(ThreadIterator *self) { struct drgn_error *err; @@ -127,7 +144,8 @@ PyTypeObject ThreadIterator_type = { .tp_name = "_drgn._ThreadIterator", .tp_basicsize = sizeof(ThreadIterator), .tp_dealloc = (destructor)ThreadIterator_dealloc, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)ThreadIterator_traverse, .tp_iter = PyObject_SelfIter, .tp_iternext = (iternextfunc)ThreadIterator_next, }; diff --git a/libdrgn/python/type.c b/libdrgn/python/type.c index b073b9b56..4a92dd80e 100644 --- a/libdrgn/python/type.c +++ b/libdrgn/python/type.c @@ -465,6 +465,7 @@ static PyGetSetDef DrgnType_getset[] = { static void DrgnType_dealloc(DrgnType *self) { + PyObject_GC_UnTrack(self); Py_XDECREF(self->attr_cache); if (self->type) Py_DECREF(DrgnType_prog(self)); @@ -746,6 +747,7 @@ static TypeEnumerator *TypeEnumerator_new(PyTypeObject *subtype, PyObject *args, static void TypeEnumerator_dealloc(TypeEnumerator *self) { + PyObject_GC_UnTrack(self); Py_XDECREF(self->value); Py_XDECREF(self->name); Py_TYPE(self)->tp_free((PyObject *)self); @@ -809,6 +811,12 @@ static PyMemberDef TypeEnumerator_members[] = { {}, }; +static int LazyObject_traverse(LazyObject *self, visitproc visit, void *arg) +{ + Py_VISIT(self->obj); + return 0; +} + PyTypeObject TypeEnumerator_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn.TypeEnumerator", @@ -816,7 +824,8 @@ PyTypeObject TypeEnumerator_type = { .tp_dealloc = (destructor)TypeEnumerator_dealloc, .tp_repr = (reprfunc)TypeEnumerator_repr, .tp_as_sequence = &TypeEnumerator_as_sequence, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)LazyObject_traverse, .tp_doc = drgn_TypeEnumerator_DOC, .tp_richcompare = (richcmpfunc)TypeEnumerator_richcompare, .tp_members = TypeEnumerator_members, @@ -829,7 +838,8 @@ static DrgnObject *DrgnType_to_absent_DrgnObject(DrgnType *type) if (!obj) return NULL; struct drgn_error *err = - drgn_object_set_absent(&obj->obj, DrgnType_unwrap(type), 0); + drgn_object_set_absent(&obj->obj, DrgnType_unwrap(type), + DRGN_ABSENCE_REASON_OTHER, 0); if (err) return set_drgn_error(err); return_ptr(obj); @@ -1026,6 +1036,7 @@ static TypeMember *TypeMember_new(PyTypeObject *subtype, PyObject *args, static void TypeMember_dealloc(TypeMember *self) { + PyObject_GC_UnTrack(self); Py_XDECREF(self->bit_offset); Py_XDECREF(self->name); LazyObject_dealloc((LazyObject *)self); @@ -1098,7 +1109,8 @@ PyTypeObject TypeMember_type = { .tp_basicsize = sizeof(TypeMember), .tp_dealloc = (destructor)TypeMember_dealloc, .tp_repr = (reprfunc)TypeMember_repr, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)LazyObject_traverse, .tp_doc = drgn_TypeMember_DOC, .tp_members = TypeMember_members, .tp_getset = TypeMember_getset, @@ -1141,6 +1153,7 @@ static TypeParameter *TypeParameter_new(PyTypeObject *subtype, PyObject *args, static void TypeParameter_dealloc(TypeParameter *self) { + PyObject_GC_UnTrack(self); Py_XDECREF(self->name); LazyObject_dealloc((LazyObject *)self); } @@ -1181,7 +1194,8 @@ PyTypeObject TypeParameter_type = { .tp_basicsize = sizeof(TypeParameter), .tp_dealloc = (destructor)TypeParameter_dealloc, .tp_repr = (reprfunc)TypeParameter_repr, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)LazyObject_traverse, .tp_doc = drgn_TypeParameter_DOC, .tp_members = TypeParameter_members, .tp_getset = TypeParameter_getset, @@ -1229,6 +1243,7 @@ static TypeTemplateParameter *TypeTemplateParameter_new(PyTypeObject *subtype, static void TypeTemplateParameter_dealloc(TypeTemplateParameter *self) { + PyObject_GC_UnTrack(self); Py_XDECREF(self->is_default); Py_XDECREF(self->name); LazyObject_dealloc((LazyObject *)self); @@ -1287,7 +1302,8 @@ PyTypeObject TypeTemplateParameter_type = { .tp_basicsize = sizeof(TypeTemplateParameter), .tp_dealloc = (destructor)TypeTemplateParameter_dealloc, .tp_repr = (reprfunc)TypeTemplateParameter_repr, - .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, + .tp_traverse = (traverseproc)LazyObject_traverse, .tp_doc = drgn_TypeTemplateParameter_DOC, .tp_members = TypeTemplateParameter_members, .tp_getset = TypeTemplateParameter_getset, diff --git a/libdrgn/python/type_kind_set.c b/libdrgn/python/type_kind_set.c index 51b0ced4b..63918c346 100644 --- a/libdrgn/python/type_kind_set.c +++ b/libdrgn/python/type_kind_set.c @@ -309,6 +309,7 @@ PyTypeObject TypeKindSet_type = { .tp_as_number = &TypeKindSet_as_number, .tp_as_sequence = &TypeKindSet_as_sequence, .tp_hash = (hashfunc)TypeKindSet_hash, + // Doesn't reference any objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_doc = drgn_TypeKindSet_DOC, .tp_richcompare = (richcmpfunc)TypeKindSet_richcompare, @@ -341,6 +342,7 @@ PyTypeObject TypeKindSetIterator_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "_drgn._TypeKindSetIterator", .tp_basicsize = sizeof(TypeKindSetIterator), + // Doesn't reference any objects, no GC needed. .tp_flags = Py_TPFLAGS_DEFAULT, .tp_iter = PyObject_SelfIter, .tp_iternext = (iternextfunc)TypeKindSetIterator_next, diff --git a/libdrgn/python/util.c b/libdrgn/python/util.c index 40b09f36f..ac087aa72 100644 --- a/libdrgn/python/util.c +++ b/libdrgn/python/util.c @@ -1,9 +1,11 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later +#include #include #include "drgnpy.h" +#include "../vector.h" int append_string(PyObject *parts, const char *s) { @@ -13,6 +15,13 @@ int append_string(PyObject *parts, const char *s) return PyList_Append(parts, str); } +int append_u64_hex(PyObject *parts, uint64_t value) +{ + char buf[19]; + snprintf(buf, sizeof(buf), "0x%" PRIx64, value); + return append_string(parts, buf); +} + static int append_formatv(PyObject *parts, const char *format, va_list ap) { _cleanup_pydecref_ PyObject *str = PyUnicode_FromFormatV(format, ap); @@ -32,6 +41,30 @@ int append_format(PyObject *parts, const char *format, ...) return ret; } +int append_attr_repr(PyObject *parts, PyObject *obj, const char *attr_name) +{ + _cleanup_pydecref_ PyObject *attr = + PyObject_GetAttrString(obj, attr_name); + if (!attr) + return -1; + _cleanup_pydecref_ PyObject *str = PyObject_Repr(attr); + if (!str) + return -1; + return PyList_Append(parts, str); +} + +int append_attr_str(PyObject *parts, PyObject *obj, const char *attr_name) +{ + _cleanup_pydecref_ PyObject *attr = + PyObject_GetAttrString(obj, attr_name); + if (!attr) + return -1; + _cleanup_pydecref_ PyObject *str = PyObject_Str(attr); + if (!str) + return -1; + return PyList_Append(parts, str); +} + PyObject *join_strings(PyObject *parts) { _cleanup_pydecref_ PyObject *sep = PyUnicode_New(0, 0); @@ -142,6 +175,89 @@ void path_cleanup(struct path_arg *path) Py_CLEAR(path->object); } +DEFINE_VECTOR_FUNCTIONS(path_arg_vector); + +int path_sequence_converter(PyObject *o, void *p) +{ + if (o == NULL) { + path_sequence_cleanup(p); + return 1; + } + + struct path_sequence_arg *paths = p; + + if (paths->allow_none && o == Py_None) + return 1; + + _cleanup_pydecref_ PyObject *it = PyObject_GetIter(o); + if (!it) + return 0; + + Py_ssize_t length_hint = PyObject_LengthHint(o, 1); + if (length_hint == -1) + return 0; + if (!path_arg_vector_reserve(&paths->args, length_hint)) { + PyErr_NoMemory(); + return 0; + } + + for (;;) { + _cleanup_pydecref_ PyObject *item = PyIter_Next(it); + if (!item) + break; + + struct path_arg *path_arg = + path_arg_vector_append_entry(&paths->args); + if (!path_arg) { + PyErr_NoMemory(); + return 0; + } + memset(path_arg, 0, sizeof(*path_arg)); + if (!path_converter(item, path_arg)) { + path_arg_vector_pop(&paths->args); + return 0; + } + } + if (PyErr_Occurred()) + return 0; + + size_t n = path_arg_vector_size(&paths->args); + if (paths->null_terminate) { + if (n == SIZE_MAX) { + PyErr_NoMemory(); + return 0; + } + n++; + } + paths->paths = malloc_array(n, sizeof(paths->paths[0])); + if (!paths->paths) { + PyErr_NoMemory(); + return 0; + } + + for (size_t i = 0; i < path_arg_vector_size(&paths->args); i++) + paths->paths[i] = path_arg_vector_at(&paths->args, i)->path; + if (paths->null_terminate) + paths->paths[path_arg_vector_size(&paths->args)] = NULL; + + return Py_CLEANUP_SUPPORTED; +} + +void path_sequence_cleanup(struct path_sequence_arg *paths) +{ + free(paths->paths); + paths->paths = NULL; + vector_for_each(path_arg_vector, path_arg, &paths->args) + path_cleanup(path_arg); + path_arg_vector_deinit(&paths->args); + path_arg_vector_init(&paths->args); +} + +size_t path_sequence_size(struct path_sequence_arg *paths) +{ + return path_arg_vector_size(&paths->args); +} + int enum_converter(PyObject *o, void *p) { struct enum_arg *arg = p; diff --git a/libdrgn/register_state.c b/libdrgn/register_state.c index d6c6d3c55..0c9d30e11 100644 --- a/libdrgn/register_state.c +++ b/libdrgn/register_state.c @@ -1,7 +1,6 @@ // Copyright (c) Meta Platforms, Inc. and affiliates. // SPDX-License-Identifier: LGPL-2.1-or-later -#include #include #include "debug_info.h" @@ -65,6 +64,13 @@ static void drgn_register_state_set_known(struct drgn_register_state *regs, bitset[i / CHAR_BIT] |= 1 << (i % CHAR_BIT); } +static void drgn_register_state_set_unknown(struct drgn_register_state *regs, + uint32_t i) +{ + unsigned char *bitset = drgn_register_state_known_bitset(regs); + bitset[i / CHAR_BIT] &= ~(1 << (i % CHAR_BIT)); +} + bool drgn_register_state_has_register(const struct drgn_register_state *regs, drgn_register_number regno) { @@ -90,6 +96,13 @@ drgn_register_state_set_has_register_range(struct drgn_register_state *regs, drgn_register_state_set_known(regs, regno + 2); } +void drgn_register_state_unset_has_register(struct drgn_register_state *regs, + drgn_register_number regno) +{ + if (regno < regs->num_regs) + drgn_register_state_set_unknown(regs, (uint32_t)regno + 2); +} + struct optional_uint64 drgn_register_state_get_pc(const struct drgn_register_state *regs) { @@ -105,14 +118,8 @@ void drgn_register_state_set_pc(struct drgn_program *prog, pc &= drgn_platform_address_mask(&prog->platform); regs->_pc = pc; drgn_register_state_set_known(regs, 0); - Dwfl_Module *dwfl_module = dwfl_addrmodule(prog->dbinfo.dwfl, + regs->module = drgn_module_find_by_address(prog, pc - !regs->interrupted); - if (dwfl_module) { - void **userdatap; - dwfl_module_info(dwfl_module, &userdatap, NULL, NULL, - NULL, NULL, NULL, NULL); - regs->module = *userdatap; - } } struct optional_uint64 diff --git a/libdrgn/register_state.h b/libdrgn/register_state.h index fbefbe952..f6f7fbf28 100644 --- a/libdrgn/register_state.h +++ b/libdrgn/register_state.h @@ -177,6 +177,14 @@ drgn_register_state_set_has_register_range(struct drgn_register_state *regs, drgn_register_number first_regno, drgn_register_number last_regno); +/** + * Mark a register as unknown in a @ref drgn_register_state. + * + * @param[in] regno Register number to mark as unknown. + */ +void drgn_register_state_unset_has_register(struct drgn_register_state *regs, + drgn_register_number regno); + /** A `uint64_t` which may or may not be present. */ struct optional_uint64 { uint64_t value; diff --git a/libdrgn/serialize.h b/libdrgn/serialize.h index 5c995dcbd..e23f69df2 100644 --- a/libdrgn/serialize.h +++ b/libdrgn/serialize.h @@ -12,11 +12,13 @@ #ifndef DRGN_SERIALIZE_H #define DRGN_SERIALIZE_H +#include #include #include #include #include "minmax.h" +#include "util.h" /** * @ingroup Internals @@ -175,6 +177,181 @@ void serialize_bits(void *buf, uint64_t bit_offset, uint64_t uvalue, uint64_t deserialize_bits(const void *buf, uint64_t bit_offset, uint8_t bit_size, bool little_endian); +#define struct64_assign_member(member) do { \ + typeof_member(_struct64_src_type, member) _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_dst->member = _struct64_tmp; \ +} while (0) + +#define struct64_bswap_member(member) do { \ + typeof_member(_struct64_src_type, member) _struct64_swapped; \ + _Static_assert(sizeof(_struct64_swapped) == 8 || \ + sizeof(_struct64_swapped) == 4 || \ + sizeof(_struct64_swapped) == 2 || \ + sizeof(_struct64_swapped) == 1, \ + "scalar member has invalid size"); \ + if (sizeof(_struct64_swapped) == 8) { \ + uint64_t _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_64(_struct64_tmp); \ + memcpy(&_struct64_swapped, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_swapped) == 4) { \ + uint32_t _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_32(_struct64_tmp); \ + memcpy(&_struct64_swapped, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_swapped) == 2) { \ + uint16_t _struct64_tmp; \ + memcpy(&_struct64_tmp, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_16(_struct64_tmp); \ + memcpy(&_struct64_swapped, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else { \ + memcpy(&_struct64_swapped, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_swapped)); \ + } \ + _struct64_dst->member = _struct64_swapped; \ +} while (0) + +#define struct64_bswap_member_inplace(member) do { \ + _Static_assert(sizeof(_struct64_dst->member) == 8 || \ + sizeof(_struct64_dst->member) == 4 || \ + sizeof(_struct64_dst->member) == 2 || \ + sizeof(_struct64_dst->member) == 1, \ + "scalar member has invalid size"); \ + if (sizeof(_struct64_dst->member) == 8) { \ + uint64_t _struct64_tmp; \ + memcpy(&_struct64_tmp, &_struct64_dst->member, \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_64(_struct64_tmp); \ + memcpy(&_struct64_dst->member, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_dst->member) == 4) { \ + uint32_t _struct64_tmp; \ + memcpy(&_struct64_tmp, &_struct64_dst->member, \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_32(_struct64_tmp); \ + memcpy(&_struct64_dst->member, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } else if (sizeof(_struct64_dst->member) == 2) { \ + uint16_t _struct64_tmp; \ + memcpy(&_struct64_tmp, &_struct64_dst->member, \ + sizeof(_struct64_tmp)); \ + _struct64_tmp = bswap_16(_struct64_tmp); \ + memcpy(&_struct64_dst->member, &_struct64_tmp, \ + sizeof(_struct64_tmp)); \ + } \ +} while (0) + +#define struct64_memcpy_member(member) do { \ + _Static_assert(sizeof(_struct64_dst->member) \ + == sizeof_member(_struct64_src_type, member), \ + "64-bit and 32-bit members have different sizes"); \ + memcpy(&_struct64_dst->member, \ + _struct64_src + offsetof(_struct64_src_type, member), \ + sizeof(_struct64_dst->member)); \ +} while (0) + +#define struct64_ignore_member(member) + +#ifdef DOXYGEN +/** + * Deserialize a structure from a memory buffer, where the structure has + * different 64-bit and 32-bit formats, may have a different byte order, and may + * be unaligned. + * + * @param[out] struct64p Returned 64-bit structure in host byte order. + * @param[in] T32 32-bit structure type. + * @param[in] visit_members Macro with signature + * `visit_members(visit_scalar_member, visit_raw_member)`. + * `visit_scalar_member()` is a macro that should be called with the name of + * each scalar member of the structure. `visit_raw_member()` is a macro that + * should be called with the name of each member that is identical regardless of + * 64-/32-bit format or byte order. + * @param[in] buf Source buffer. Must not overlap with @p struct64p. + * @param[in] is_64_bit Whether the source is in the 64-bit format or the 32-bit + * format. + * @param[in] bswap Whether the source has a different byte order than the host + * system. + */ +void deserialize_struct64(T64 * restrict struct64p, T32, visit_members, + const void * restrict buf, bool is_64_bit, + bool bswap); + +/** + * Like @ref deserialize_struct64(), but the source and destination are the + * same. + * + * @param[in,out] struct64p Initially the source buffer, then the returned + * 64-bit structure in host byte order. + */ +void deserialize_struct64_inplace(T64 *struct64p, T32, bool visit_members, + bool is_64_bit, bool bswap); + +#else +#define deserialize_struct64(struct64p, type32, visit_members, buf, is_64_bit, \ + bswap) \ +do { \ + __auto_type _struct64_dst = (struct64p); \ + /* \ + * We want to type check buf like a function parameter, so do two \ + * implicit conversions instead of an explicit cast. \ + */ \ + const void *_struct64_buf = (buf); \ + const char *_struct64_src = _struct64_buf; \ + if (is_64_bit) { \ + if (bswap) { \ + typedef typeof(*_struct64_dst) _struct64_src_type; \ + visit_members(struct64_bswap_member, \ + struct64_memcpy_member); \ + } else { \ + memcpy(_struct64_dst, buf, sizeof(*_struct64_dst)); \ + } \ + } else { \ + typedef typeof(type32) _struct64_src_type; \ + if (bswap) { \ + visit_members(struct64_bswap_member, \ + struct64_memcpy_member); \ + } else { \ + visit_members(struct64_assign_member, \ + struct64_memcpy_member); \ + } \ + } \ +} while (0) + +#define deserialize_struct64_inplace(struct64p, type32, visit_members, \ + is_64_bit, bswap) do { \ + __auto_type _struct64_dst = (struct64p); \ + if (!(is_64_bit)) { \ + typedef typeof(type32) _struct64_src_type; \ + _Alignas(_struct64_src_type) char \ + _struct64_src[sizeof(_struct64_src_type)]; \ + memcpy(_struct64_src, _struct64_dst, sizeof(_struct64_src)); \ + if (bswap) { \ + visit_members(struct64_bswap_member, \ + struct64_memcpy_member); \ + } else { \ + visit_members(struct64_assign_member, \ + struct64_memcpy_member); \ + } \ + } else if (bswap) { \ + visit_members(struct64_bswap_member_inplace, \ + struct64_ignore_member); \ + } \ +} while (0) +#endif + /** @} */ #endif /* DRGN_SERIALIZE_H */ diff --git a/libdrgn/stack_trace.c b/libdrgn/stack_trace.c index c1fe3c595..c85c9fc19 100644 --- a/libdrgn/stack_trace.c +++ b/libdrgn/stack_trace.c @@ -117,9 +117,10 @@ drgn_format_stack_trace(struct drgn_stack_trace *trace, char **ret) struct drgn_register_state *regs = trace->frames[frame].regs; struct optional_uint64 pc; - const char *name = drgn_stack_frame_name(trace, frame); - if (name) { - if (!string_builder_append(&str, name)) + const char *function_name = + drgn_stack_frame_function_name(trace, frame); + if (function_name) { + if (!string_builder_append(&str, function_name)) return &drgn_enomem; } else if ((pc = drgn_register_state_get_pc(regs)).has_value) { _cleanup_symbol_ struct drgn_symbol *sym = NULL; @@ -198,8 +199,9 @@ drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret return &drgn_enomem; } - const char *name = drgn_stack_frame_name(trace, frame); - if (name && !string_builder_appendf(&str, " in %s", name)) + const char *function_name = drgn_stack_frame_function_name(trace, frame); + if (function_name + && !string_builder_appendf(&str, " in %s", function_name)) return &drgn_enomem; int line, column; @@ -224,8 +226,42 @@ drgn_format_stack_frame(struct drgn_stack_trace *trace, size_t frame, char **ret return NULL; } -LIBDRGN_PUBLIC const char *drgn_stack_frame_name(struct drgn_stack_trace *trace, - size_t frame) +LIBDRGN_PUBLIC +struct drgn_error *drgn_stack_frame_name(struct drgn_stack_trace *trace, + size_t frame, char **ret) +{ + struct drgn_error *err; + char *name; + const char *function_name = drgn_stack_frame_function_name(trace, frame); + if (function_name) { + name = strdup(function_name); + } else { + struct drgn_register_state *regs = trace->frames[frame].regs; + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + if (pc.has_value) { + _cleanup_symbol_ struct drgn_symbol *sym = NULL; + err = drgn_program_find_symbol_by_address_internal(trace->prog, + pc.value - !regs->interrupted, + &sym); + if (err) + return err; + if (sym) + name = strdup(sym->name); + else if (asprintf(&name, "0x%" PRIx64, pc.value) < 0) + name = NULL; + } else { + name = strdup("???"); + } + } + if (!name) + return &drgn_enomem; + *ret = name; + return NULL; +} + +LIBDRGN_PUBLIC +const char *drgn_stack_frame_function_name(struct drgn_stack_trace *trace, + size_t frame) { Dwarf_Die *scopes = trace->frames[frame].scopes; size_t num_scopes = trace->frames[frame].num_scopes; @@ -463,11 +499,12 @@ drgn_stack_frame_find_object(struct drgn_stack_trace *trace, size_t frame_i, } if (!die.addr) { not_found:; - const char *frame_name = drgn_stack_frame_name(trace, frame_i); - if (frame_name) { + const char *function_name = + drgn_stack_frame_function_name(trace, frame_i); + if (function_name) { return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find '%s' in '%s'", - name, frame_name); + name, function_name); } else { return drgn_error_format(DRGN_ERROR_LOOKUP, "could not find '%s'", name); @@ -733,8 +770,14 @@ drgn_get_initial_registers(struct drgn_program *prog, uint32_t tid, if (err) return err; if (!found) { - return drgn_error_create(DRGN_ERROR_LOOKUP, - "task not found"); + if (tid == 0) { + return drgn_error_create(DRGN_ERROR_LOOKUP, + "task not found; " + "use stack_trace(idle_task(cpu)) for PID 0"); + } else { + return drgn_error_create(DRGN_ERROR_LOOKUP, + "task not found"); + } } } @@ -780,6 +823,9 @@ drgn_get_initial_registers(struct drgn_program *prog, uint32_t tid, } return prog->platform.arch->linux_kernel_get_initial_registers(&obj, ret); + } else if (drgn_program_is_userspace_process(prog)) { + return drgn_error_create(DRGN_ERROR_NOT_IMPLEMENTED, + "stack unwinding is not yet supported for live processes"); } else { struct nstring prstatus; err = drgn_program_find_prstatus(prog, tid, &prstatus); @@ -1024,6 +1070,10 @@ drgn_unwind_one_register(struct drgn_program *prog, struct drgn_elf_file *file, } case DRGN_CFI_RULE_AT_DWARF_EXPRESSION: case DRGN_CFI_RULE_DWARF_EXPRESSION: + // It is possible for file to be NULL when using built-in ORC. + // However, it should be impossible to encounter a DWARF + // expression for built-in ORC. + assert(file != NULL); err = drgn_eval_cfi_dwarf_expression(prog, file, rule, regs, buf, size); break; @@ -1139,6 +1189,17 @@ drgn_unwind_with_cfi(struct drgn_program *prog, struct drgn_cfi_row **row, return NULL; } +static bool drgn_is_bad_call(const struct drgn_register_state *regs) +{ + // If the program counter is 0, it's likely that a NULL function pointer + // was called. Other than that, it's difficult to differentiate a bad + // program counter from a valid program counter that we don't know about + // (e.g., because it's JIT compiled). We can add heuristics in the + // future. + struct optional_uint64 pc = drgn_register_state_get_pc(regs); + return pc.has_value && pc.value == 0; +} + static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, uint32_t tid, const struct drgn_object *obj, @@ -1151,14 +1212,6 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, return drgn_error_create(DRGN_ERROR_INVALID_ARGUMENT, "cannot unwind stack without platform"); } - if (drgn_program_is_userspace_process(prog)) { - return drgn_error_create(DRGN_ERROR_NOT_IMPLEMENTED, - "stack unwinding is not yet supported for live processes"); - } else if (!(prog->flags & DRGN_PROGRAM_IS_LINUX_KERNEL) - && !drgn_program_is_userspace_core(prog)) { - return drgn_error_create(DRGN_ERROR_NOT_IMPLEMENTED, - "stack unwinding is not supported for this program"); - } size_t trace_capacity = 1; struct drgn_stack_trace *trace = @@ -1190,8 +1243,16 @@ static struct drgn_error *drgn_get_stack_trace(struct drgn_program *prog, err = drgn_unwind_with_cfi(prog, &row, regs, ®s); if (err == &drgn_not_found) { - err = prog->platform.arch->fallback_unwind(prog, regs, - ®s); + if (drgn_is_bad_call(regs) + && prog->platform.arch->bad_call_unwind) { + err = prog->platform.arch->bad_call_unwind(prog, + regs, + ®s); + } else { + err = prog->platform.arch->fallback_unwind(prog, + regs, + ®s); + } } if (err == &drgn_stop) break; diff --git a/libdrgn/string_builder.c b/libdrgn/string_builder.c index 0bd852189..fc5152ae8 100644 --- a/libdrgn/string_builder.c +++ b/libdrgn/string_builder.c @@ -52,6 +52,8 @@ bool string_builder_appendc(struct string_builder *sb, char c) bool string_builder_appendn(struct string_builder *sb, const char *str, size_t len) { + if (len == 0) + return true; if (!string_builder_reserve_for_append(sb, len)) return false; memcpy(&sb->str[sb->len], str, len); diff --git a/libdrgn/symbol.c b/libdrgn/symbol.c index 51177deb0..5b7b3779f 100644 --- a/libdrgn/symbol.c +++ b/libdrgn/symbol.c @@ -34,26 +34,6 @@ LIBDRGN_PUBLIC void drgn_symbols_destroy(struct drgn_symbol **syms, free(syms); } -void drgn_symbol_from_elf(const char *name, uint64_t address, - const GElf_Sym *elf_sym, struct drgn_symbol *ret) -{ - ret->name = name; - ret->name_lifetime = DRGN_LIFETIME_STATIC; - ret->lifetime = DRGN_LIFETIME_OWNED; - ret->address = address; - ret->size = elf_sym->st_size; - int binding = GELF_ST_BIND(elf_sym->st_info); - if (binding <= STB_WEAK || binding == STB_GNU_UNIQUE) - ret->binding = binding + 1; - else - ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; - int type = GELF_ST_TYPE(elf_sym->st_info); - if (type <= STT_TLS || type == STT_GNU_IFUNC) - ret->kind = type; - else - ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; -} - struct drgn_error * drgn_symbol_copy(struct drgn_symbol *dst, struct drgn_symbol *src) { @@ -142,6 +122,57 @@ drgn_symbol_result_builder_add(struct drgn_symbol_result_builder *builder, return true; } +static void drgn_symbol_from_elf(const char *name, uint64_t address, + const GElf_Sym *elf_sym, + struct drgn_symbol *ret) +{ + ret->name = name; + ret->name_lifetime = DRGN_LIFETIME_STATIC; + ret->lifetime = DRGN_LIFETIME_OWNED; + ret->address = address; + ret->size = elf_sym->st_size; + int binding = GELF_ST_BIND(elf_sym->st_info); + if (binding <= STB_WEAK || binding == STB_GNU_UNIQUE) + ret->binding = binding + 1; + else + ret->binding = DRGN_SYMBOL_BINDING_UNKNOWN; + int type = GELF_ST_TYPE(elf_sym->st_info); + if (type <= STT_TLS || type == STT_GNU_IFUNC) + ret->kind = type; + else + ret->kind = DRGN_SYMBOL_KIND_UNKNOWN; +} + +bool +drgn_symbol_result_builder_add_from_elf(struct drgn_symbol_result_builder *builder, + const char *name, uint64_t address, + const GElf_Sym *elf_sym) +{ + if (builder->one) { + // As an optimization, reuse the existing symbol allocation if + // we can. + if (!builder->single + || builder->single->lifetime == DRGN_LIFETIME_STATIC) { + builder->single = malloc(sizeof(*builder->single)); + if (!builder->single) + return false; + } else if (builder->single->name_lifetime == DRGN_LIFETIME_OWNED) { + free((char *)builder->single->name); + } + drgn_symbol_from_elf(name, address, elf_sym, builder->single); + } else { + struct drgn_symbol *sym = malloc(sizeof(*sym)); + if (!sym) + return false; + drgn_symbol_from_elf(name, address, elf_sym, sym); + if (!symbolp_vector_append(&builder->vector, &sym)) { + free(sym); + return false; + } + } + return true; +} + LIBDRGN_PUBLIC size_t drgn_symbol_result_builder_count(const struct drgn_symbol_result_builder *builder) { diff --git a/libdrgn/symbol.h b/libdrgn/symbol.h index c3dd75ca7..3bd0c508c 100644 --- a/libdrgn/symbol.h +++ b/libdrgn/symbol.h @@ -46,10 +46,6 @@ static inline void drgn_symbol_cleanup(struct drgn_symbol **p) drgn_symbol_destroy(*p); } -/** Initialize a @ref drgn_symbol from an ELF symbol. */ -void drgn_symbol_from_elf(const char *name, uint64_t address, - const GElf_Sym *elf_sym, struct drgn_symbol *ret); - /** Destroy the contents of the result builder */ void drgn_symbol_result_builder_abort(struct drgn_symbol_result_builder *builder); @@ -57,6 +53,16 @@ void drgn_symbol_result_builder_abort(struct drgn_symbol_result_builder *builder void drgn_symbol_result_builder_init(struct drgn_symbol_result_builder *builder, bool one); +/** + * Convert an ELF symbol to a @ref drgn_symbol and add it to a result builder. + * + * @return @c true on success, @c false on failure to allocate memory. + */ +bool +drgn_symbol_result_builder_add_from_elf(struct drgn_symbol_result_builder *builder, + const char *name, uint64_t address, + const GElf_Sym *elf_sym); + /** Return single result */ struct drgn_symbol * drgn_symbol_result_builder_single(struct drgn_symbol_result_builder *builder); diff --git a/libdrgn/tests/crc32.c.in b/libdrgn/tests/crc32.c.in new file mode 100644 index 000000000..fb8fe662a --- /dev/null +++ b/libdrgn/tests/crc32.c.in @@ -0,0 +1,40 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include + +#include "test_util.h" +#include "../crc32.h" + +static uint32_t string_crc32(const char *s) +{ + return ~crc32_update(~0, s, strlen(s)); +} + +#suite crc32 + +#tcase crc32 + +#test empty +{ + ck_assert_uint_eq(string_crc32(""), 0); +} + +#test simple +{ + // https://reveng.sourceforge.io/crc-catalogue/17plus.htm#crc.cat.crc-32-iso-hdlc + ck_assert_uint_eq(string_crc32("123456789"), 0xcbf43926); + // http://www.febooti.com/products/filetweak/members/hash-and-crc/test-vectors/ + ck_assert_uint_eq(string_crc32("The quick brown fox jumps over the lazy dog"), + 0x414fa339); +} + +#test update +{ + uint32_t crc = ~0; + crc = crc32_update(crc, "12", 2); + crc = crc32_update(crc, "345", 3); + crc = crc32_update(crc, "6789", 4); + crc = ~crc; + ck_assert_uint_eq(crc, 0xcbf43926); +} diff --git a/libdrgn/tests/hexlify.c.in b/libdrgn/tests/hexlify.c.in new file mode 100644 index 000000000..82ab50eed --- /dev/null +++ b/libdrgn/tests/hexlify.c.in @@ -0,0 +1,52 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include + +#include "test_util.h" +#include "../cleanup.h" +#include "../hexlify.h" + +static const uint8_t binary[] = { + 0x19, 0x29, 0x1d, 0x9a, 0xc4, 0xf3, 0x4c, 0x42, 0x01, 0xee, + 0xdf, 0x9e, 0x8d, 0x1e, 0x59, 0x68, 0xf7, 0xd5, 0x48, 0x19, +}; +static const char str[] = "19291d9ac4f34c4201eedf9e8d1e5968f7d54819"; + +#suite hexlify + +#tcase hexlify + +#test hexlify_simple +{ + char out[2 * sizeof(binary) + 1]; + out[sizeof(out) - 1] = '~'; + hexlify(binary, sizeof(binary), out); + ck_assert_mem_eq(out, str, sizeof(out) - 1); + // Test that the string wasn't null-terminated. + ck_assert_int_eq(out[sizeof(out) - 1], '~'); +} + +#test ahexlify_simple +{ + _cleanup_free_ char *out = ahexlify(binary, sizeof(binary)); + ck_assert_ptr_nonnull(out); + ck_assert_str_eq(out, str); +} + +#test unhexlify_simple +{ + uint8_t out[(sizeof(str) - 1) / 2]; + ck_assert(unhexlify(str, sizeof(str) - 1, out)); + ck_assert_mem_eq(out, binary, sizeof(binary)); +} + +#test unhexlify_odd +{ + ck_assert(!unhexlify("abc", 3, (uint8_t [1]){})); +} + +#test unhexlify_non_hex +{ + ck_assert(!unhexlify("foobar", 6, (uint8_t [3]){})); +} diff --git a/libdrgn/tests/serialize.c.in b/libdrgn/tests/serialize.c.in new file mode 100644 index 000000000..96143c812 --- /dev/null +++ b/libdrgn/tests/serialize.c.in @@ -0,0 +1,196 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "test_util.h" +#include "../serialize.h" +#include "../util.h" + +struct foo64 { + uint64_t big; + uint32_t medium; + uint16_t small; + uint8_t tiny; + uint8_t array[3]; + // Add padding so size is consistent on all architectures. + uint8_t pad[6]; +}; + +struct foo32 { + uint32_t big; + uint16_t medium; + uint8_t small; + uint8_t tiny; + uint8_t array[3]; + // Add padding so size is consistent on all architectures. + uint8_t pad; +}; + +#define visit_foo_members(visit_scalar_member, visit_raw_member) do { \ + visit_scalar_member(big); \ + visit_scalar_member(medium); \ + visit_scalar_member(small); \ + visit_scalar_member(tiny); \ + visit_raw_member(array); \ +} while (0) + +#suite serialize + +#tcase deserialize_struct64 + +#test deserialize_struct64_64le +{ + uint8_t buf[24] = { + 0x00, 0xf2, 0x05, 0x2a, 0x01, 0x00, 0x00, 0x00, + 0xa0, 0x86, 0x01, 0x00, + 0x20, 0x03, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, true, + !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_64be +{ + uint8_t buf[24] = { + 0x00, 0x00, 0x00, 0x01, 0x2a, 0x05, 0xf2, 0x00, + 0x00, 0x01, 0x86, 0xa0, + 0x03, 0x20, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, true, + HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_32le +{ + uint8_t buf[12] = { + 0x00, 0x5e, 0xd0, 0xb2, + 0x10, 0x27, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, false, + !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_32be +{ + uint8_t buf[12] = { + 0xb2, 0xd0, 0x5e, 0x00, + 0x27, 0x10, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + deserialize_struct64(&foo, struct foo32, visit_foo_members, buf, false, + HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#tcase deserialize_struct64_inplace + +#test deserialize_struct64_inplace_64le +{ + uint8_t buf[24] = { + 0x00, 0xf2, 0x05, 0x2a, 0x01, 0x00, 0x00, 0x00, + 0xa0, 0x86, 0x01, 0x00, + 0x20, 0x03, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + true, !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_inplace_64be +{ + uint8_t buf[24] = { + 0x00, 0x00, 0x00, 0x01, 0x2a, 0x05, 0xf2, 0x00, + 0x00, 0x01, 0x86, 0xa0, + 0x03, 0x20, + 0x0a, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + true, HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 5000000000); + ck_assert_uint_eq(foo.medium, 100000); + ck_assert_uint_eq(foo.small, 800); + ck_assert_uint_eq(foo.tiny, 10); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_inplace_32le +{ + uint8_t buf[12] = { + 0x00, 0x5e, 0xd0, 0xb2, + 0x10, 0x27, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + false, !HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} + +#test deserialize_struct64_inplace_32be +{ + uint8_t buf[12] = { + 0xb2, 0xd0, 0x5e, 0x00, + 0x27, 0x10, + 0x64, + 0x0d, + 0x41, 0x42, 0x43, + }; + struct foo64 foo; + memcpy(&foo, buf, sizeof(buf)); + deserialize_struct64_inplace(&foo, struct foo32, visit_foo_members, + false, HOST_LITTLE_ENDIAN); + ck_assert_uint_eq(foo.big, 3000000000); + ck_assert_uint_eq(foo.medium, 10000); + ck_assert_uint_eq(foo.small, 100); + ck_assert_uint_eq(foo.tiny, 13); + ck_assert_mem_eq(foo.array, "ABC", 3); +} diff --git a/libdrgn/type.c b/libdrgn/type.c index c0806e7d0..41232cde5 100644 --- a/libdrgn/type.c +++ b/libdrgn/type.c @@ -1353,9 +1353,7 @@ void drgn_program_deinit_types(struct drgn_program *prog) } drgn_typep_vector_deinit(&prog->created_types); - for (struct drgn_dedupe_type_set_iterator it = - drgn_dedupe_type_set_first(&prog->dedupe_types); - it.entry; it = drgn_dedupe_type_set_next(it)) + hash_table_for_each(drgn_dedupe_type_set, it, &prog->dedupe_types) free(*it.entry); drgn_dedupe_type_set_deinit(&prog->dedupe_types); diff --git a/libdrgn/util.h b/libdrgn/util.h index 243c7e842..07297a880 100644 --- a/libdrgn/util.h +++ b/libdrgn/util.h @@ -18,6 +18,8 @@ #include #include +#define _unused_ __attribute__((__unused__)) + #ifndef LIBDRGN_PUBLIC #define LIBDRGN_PUBLIC __attribute__((__visibility__("default"))) #endif @@ -69,6 +71,10 @@ _Generic(sizeof(struct { _Static_assert(assert_expression, message); int _; }),\ default: (eval_expression)) +#define sizeof_member(type, member) sizeof(((type *)0)->member) + +#define typeof_member(type, member) typeof(((type *)0)->member) + #define container_of(ptr, type, member) \ static_assert_expression( \ types_compatible(*(ptr), ((type *)0)->member) \ @@ -127,6 +133,18 @@ static inline void *malloc64(uint64_t size) return malloc(size); } +// glibc added reallocarray() in 2.26, but since it's so trivial, it's easier to +// duplicate it here than it is to do feature detection. +static inline void *realloc_array(void *ptr, size_t nmemb, size_t size) +{ + size_t bytes; + if (__builtin_mul_overflow(nmemb, size, &bytes)) { + errno = ENOMEM; + return NULL; + } + return realloc(ptr, bytes); +} + static inline void *memdup(const void *ptr, size_t size) { void *copy = malloc(size); diff --git a/libdrgn/vector.h b/libdrgn/vector.h index 8467cf633..93d039529 100644 --- a/libdrgn/vector.h +++ b/libdrgn/vector.h @@ -594,6 +594,14 @@ DEFINE_VECTOR_FUNCTIONS(vector) */ #define VECTOR_INIT { { 0 } } +/** + * Define and initialize an empty @ref vector of type @p vector_type named @p + * vector that is automatically deinitialized when it goes out of scope. + */ +#define VECTOR(vector_type, vector) \ + __attribute__((__cleanup__(vector_type##_deinit))) \ + struct vector_type vector = VECTOR_INIT + /** * Iterate over every entry in a @ref vector. * diff --git a/scripts/build_manylinux_in_docker.sh b/scripts/build_manylinux_in_docker.sh index 91787cd3d..7c1c083c6 100755 --- a/scripts/build_manylinux_in_docker.sh +++ b/scripts/build_manylinux_in_docker.sh @@ -13,6 +13,8 @@ sed -i -e 's/mirrorlist/#mirrorlist/g' \ yum install -y \ bzip2-devel \ + json-c-devel \ + libcurl-devel \ libzstd-devel \ lzo-devel \ snappy-devel \ @@ -33,27 +35,21 @@ fi # Install a recent version of elfutils instead of whatever is in the manylinux # image. -elfutils_version=0.192 +elfutils_version=0.193 elfutils_url=https://sourceware.org/elfutils/ftp/$elfutils_version/elfutils-$elfutils_version.tar.bz2 mkdir /tmp/elfutils cd /tmp/elfutils curl -L "$elfutils_url" | tar -xj --strip-components=1 -# We don't bother with debuginfod support for a few reasons: -# -# 1. It depends on libcurl, which would pull in a bunch of transitive -# dependencies. -# 2. libdw loads libdebuginfod with dlopen(), which auditwheel misses. -# 3. drgn hasn't been tested with debuginfod. -./configure --disable-libdebuginfod --disable-debuginfod +./configure --enable-libdebuginfod --disable-debuginfod --with-zlib --with-bzlib --with-lzma --with-zstd make -j$(($(nproc) + 1)) make install -libkdumpfile_version=0.5.4 +libkdumpfile_version=0.5.5 libkdumpfile_url=https://github.com/ptesarik/libkdumpfile/releases/download/v$libkdumpfile_version/libkdumpfile-$libkdumpfile_version.tar.gz mkdir /tmp/libkdumpfile cd /tmp/libkdumpfile curl -L "$libkdumpfile_url" | tar -xz --strip-components=1 -./configure --with-libzstd --with-lzo2 --with-snappy --with-zlib --without-python +./configure --with-libzstd --with-lzo2 --with-snappy --with-zlib --without-python --disable-kdumpid make -j$(($(nproc) + 1)) make install @@ -79,13 +75,14 @@ build_for_python() { "$1" -c "import sys; sys.exit(sys.version_info[:2] != $BUILD_ONLY_PYTHON)" else # Build for all supported Pythons - "$1" -c 'import sys; sys.exit(sys.version_info < (3, 6))' + "$1" -c 'import sys; sys.exit(sys.version_info < (3, 8))' fi } for pybin in /opt/python/cp*/bin; do if build_for_python "$pybin/python"; then - "$pybin/pip" wheel . --no-deps -w /tmp/wheels/ + CONFIGURE_FLAGS="--with-debuginfod --disable-dlopen-debuginfod --with-libkdumpfile --with-lzma" \ + "$pybin/pip" wheel . --no-deps -w /tmp/wheels/ fi done diff --git a/scripts/crashme/Makefile b/scripts/crashme/Makefile new file mode 100644 index 000000000..b9b5c5a8c --- /dev/null +++ b/scripts/crashme/Makefile @@ -0,0 +1,65 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +# Makefile used to generate tests/resources/crashme* + +.PHONY: all cores clean + +.DELETE_ON_ERROR: + +EXECUTABLES := crashme crashme_pie crashme_static crashme_static_pie +CORES := $(addsuffix .core, $(EXECUTABLES)) $(addsuffix _no_headers.core, $(EXECUTABLES)) +BINARIES := crashme.so $(EXECUTABLES) crashme.dwz crashme.so.dwz crashme.alt +ZSTD_BINARIES := $(addsuffix .zst, $(BINARIES)) +ZSTD_CORES := $(addsuffix .zst, $(CORES)) + +all: $(BINARIES) cores $(ZSTD_BINARIES) $(ZSTD_CORES) + +clean: + rm -f $(BINARIES) $(CORES) $(ZSTD_BINARIES) $(ZSTD_CORES) + +crashme.so: crashme.c common.c + gcc -g -Os -fpic -shared $^ -o $@ + +crashme: main.c common.c crashme.so + gcc -g -Os -fno-pie -no-pie $(filter-out crashme.so,$^) -o $@ -L . -l:crashme.so -Wl,-rpath,$(CURDIR) + +crashme_pie: main.c common.c crashme.so + gcc -g -Os -fpie -pie $(filter-out crashme.so,$^) -o $@ -L . -l:crashme.so -Wl,-rpath,$(CURDIR) + +crashme_static: main.c common.c crashme.c + musl-gcc -g -Os -fno-pie -static $^ -o $@ + +crashme_static_pie: main.c common.c crashme.c + musl-gcc -g -Os -fpie -static-pie $^ -o $@ + +crashme.dwz crashme.so.dwz crashme.alt &: crashme crashme.so + cp crashme crashme.dwz + cp crashme.so crashme.so.dwz + dwz -m crashme.alt -r crashme.dwz crashme.so.dwz + +cores: $(CORES) + +.NOTPARALLEL: cores + +define CORE_COMMAND +flock /proc/sys/kernel/core_pattern sh -e -c '\ +ulimit -c unlimited; \ +echo "$$COREDUMP_FILTER" > /proc/$$$$/coredump_filter; \ +old_pattern=$$(cat /proc/sys/kernel/core_pattern); \ +restore_core_pattern() { \ + echo "$$old_pattern" > /proc/sys/kernel/core_pattern; \ +}; \ +trap restore_core_pattern EXIT; \ +echo "$$PWD/core.%p" > /proc/sys/kernel/core_pattern; \ +su "$$SUDO_USER" -c "env -i sh -l -c \"exec ./$<\" & wait; mv core.\$$! $@"' +endef + +%.core: % + sudo env COREDUMP_FILTER=0x33 $(CORE_COMMAND) + +%_no_headers.core: % + sudo env COREDUMP_FILTER=0x23 $(CORE_COMMAND) + +%.zst: % + zstd -19 $< -o $@ diff --git a/scripts/crashme/common.c b/scripts/crashme/common.c new file mode 100644 index 000000000..98b13c615 --- /dev/null +++ b/scripts/crashme/common.c @@ -0,0 +1,10 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crashme.h" + +__attribute__((__visibility__("hidden"))) +int *crashme_ptr(void) +{ + return (int *)0xabc; +} diff --git a/scripts/crashme/crashme.c b/scripts/crashme/crashme.c new file mode 100644 index 000000000..8edf2f5d9 --- /dev/null +++ b/scripts/crashme/crashme.c @@ -0,0 +1,25 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crashme.h" + +__attribute__((__noipa__)) static int c(struct crashme *cm) +{ + *cm->ptr = 0xdeadbeef; + return 3; +} + +__attribute__((__noipa__)) static int b(struct crashme *cm) +{ + return c(cm) - 1; +} + +__attribute__((__noipa__)) static int a(struct crashme *cm) +{ + return b(cm) - 1; +} + +int crashme(struct crashme *cm) +{ + return cm->ptr == crashme_ptr() ? a(cm) - 1 : 1; +} diff --git a/scripts/crashme/crashme.h b/scripts/crashme/crashme.h new file mode 100644 index 000000000..75ab6e1cb --- /dev/null +++ b/scripts/crashme/crashme.h @@ -0,0 +1,15 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#ifndef CRASHME_H +#define CRASHME_H + +int *crashme_ptr(void); + +struct crashme { + int *ptr; +}; + +int crashme(struct crashme *cm); + +#endif /* CRASHME_H */ diff --git a/scripts/crashme/main.c b/scripts/crashme/main.c new file mode 100644 index 000000000..06c65a758 --- /dev/null +++ b/scripts/crashme/main.c @@ -0,0 +1,10 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "crashme.h" + +int main(void) +{ + struct crashme cm = { crashme_ptr() }; + return !!crashme(&cm); +} diff --git a/setup.py b/setup.py index 6535bde97..8a61c2918 100755 --- a/setup.py +++ b/setup.py @@ -92,7 +92,8 @@ def _run_configure(self): args = [ os.path.relpath("libdrgn/configure", self.build_temp), "--disable-static", - "--enable-python", + "--disable-libdrgn", + "--enable-python-extension", ] try: args.extend(shlex.split(os.environ["CONFIGURE_FLAGS"])) @@ -465,7 +466,7 @@ def get_version(): "test": test, }, entry_points={"console_scripts": ["drgn=drgn.cli:_main"]}, - python_requires=">=3.6", + python_requires=">=3.8", author="Omar Sandoval", author_email="osandov@osandov.com", description="Programmable debugger", diff --git a/tests/__init__.py b/tests/__init__.py index 12a5d3264..735d2a4d6 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -3,6 +3,7 @@ import contextlib import functools +import logging import os import sys from typing import Any, Mapping, NamedTuple, Optional @@ -10,12 +11,14 @@ from unittest.mock import Mock from drgn import ( + AbsenceReason, Architecture, FindObjectFlags, Language, Object, Platform, PlatformFlags, + PrimitiveType, Program, Type, TypeEnumerator, @@ -124,7 +127,9 @@ def assertReprPrettyEqualsStr(obj): _IDENTICAL_EQ_TYPES = ( type(None), + AbsenceReason, Language, + PrimitiveType, Program, TypeEnumerator, TypeKind, @@ -196,6 +201,7 @@ def _identical(a, b): "prog_", "type_", "address_", + "absence_reason_", "bit_offset_", "bit_field_size_", ), @@ -290,57 +296,8 @@ def __eq__(self, other): return identical(self._obj, other) -if sys.version_info < (3, 8): - - # Class cleanups need to be called even if setUpClass() fails. - # Unfortunately, we need to wrap setUpClass() to do that reliably. - def classCleanups(setUpClass): - @functools.wraps(setUpClass) - def wrapper(cls): - cls._class_cleanups = [] - try: - setUpClass(cls) - except Exception: - cls.doClassCleanups() - raise - - return wrapper - -else: - - def classCleanups(setUpClass): - return setUpClass - - class TestCase(unittest.TestCase): - # "Backport" addClassCleanup(), doClassCleanups(), enterContext(), and - # enterClassContext(). - if sys.version_info < (3, 8): - - @classmethod - def addClassCleanup(cls, function, *args, **kwargs): - # Note that this will fail if the @classCleanups decorator wasn't - # used. This is intentional. - cls._class_cleanups.append((function, args, kwargs)) - - @classmethod - def doClassCleanups(cls): - if hasattr(cls, "_class_cleanups"): - exceptions = [] - while cls._class_cleanups: - function, args, kwargs = cls._class_cleanups.pop() - try: - function(*args, **kwargs) - except Exception as e: - exceptions.append(e) - if exceptions: - raise Exception(exceptions) - - @classmethod - def tearDownClass(cls): - cls.doClassCleanups() - super().tearDownClass() - + # "Backport" enterContext() and enterClassContext(). if sys.version_info < (3, 11): def enterContext(self, cm): @@ -455,3 +412,14 @@ def modifyenv(vars: Mapping[str, Optional[str]]): del os.environ[key] else: os.environ[key] = old_value + + +@contextlib.contextmanager +def drgn_log_level(level: int): + logger = logging.getLogger("drgn") + old_level = logger.getEffectiveLevel() + logger.setLevel(level) + try: + yield + finally: + logger.setLevel(old_level) diff --git a/tests/assembler.py b/tests/assembler.py index c1ac523e4..dedc996ed 100644 --- a/tests/assembler.py +++ b/tests/assembler.py @@ -5,6 +5,7 @@ def _append_uleb128(buf, value): + assert value >= 0 while True: byte = value & 0x7F value >>= 7 diff --git a/tests/dwarfwriter.py b/tests/dwarfwriter.py index b496bf2b7..1fcc378c9 100644 --- a/tests/dwarfwriter.py +++ b/tests/dwarfwriter.py @@ -3,7 +3,7 @@ from collections import OrderedDict import os.path -from typing import Any, NamedTuple, Optional, Sequence, Union +from typing import Any, Dict, NamedTuple, Optional, Sequence, Union import zlib from _drgn_util.elf import ET, SHF, SHT @@ -24,13 +24,14 @@ class DwarfLabel(NamedTuple): class DwarfDie(NamedTuple): tag: DW_TAG - attribs: Sequence[DwarfAttrib] + attribs: Sequence[DwarfAttrib] = () children: Sequence[Union["DwarfDie", DwarfLabel]] = () class DwarfUnit(NamedTuple): type: DW_UT die: DwarfDie + die_label: Optional[str] = None dwo_id: Optional[int] = None type_signature: Optional[int] = None type_offset: Optional[str] = None @@ -66,20 +67,18 @@ def aux(die): def _compile_debug_info(units, little_endian, bits, version, use_dw_form_indirect): + offset_size = 4 # We only emit the 32-bit format for now. byteorder = "little" if little_endian else "big" - all_labels = set() labels = {} - relocations = [] + references = [] + unit_references = [] code = 1 decl_file = 1 def aux(buf, die, depth): if isinstance(die, DwarfLabel): - # For now, labels are only supported within a unit, but make sure - # they're unique across all units. - if die.name in all_labels: + if die.name in labels: raise ValueError(f"duplicate label {die.name!r}") - all_labels.add(die.name) labels[die.name] = len(buf) return @@ -100,9 +99,9 @@ def aux(buf, die, depth): buf.append(value) elif attrib.form == DW_FORM.data2: buf.extend(value.to_bytes(2, byteorder)) - elif attrib.form == DW_FORM.data4: + elif attrib.form in (DW_FORM.data4, DW_FORM.ref_sup4): buf.extend(value.to_bytes(4, byteorder)) - elif attrib.form == DW_FORM.data8: + elif attrib.form in (DW_FORM.data8, DW_FORM.ref_sup8): buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.udata: _append_uleb128(buf, value) @@ -114,16 +113,53 @@ def aux(buf, die, depth): elif attrib.form == DW_FORM.block1: buf.append(len(value)) buf.extend(value) + elif attrib.form in (DW_FORM.strp, DW_FORM.GNU_ref_alt): + buf.extend(value.to_bytes(offset_size, byteorder)) elif attrib.form == DW_FORM.string: buf.extend(value.encode()) buf.append(0) + elif attrib.form == DW_FORM.ref1: + if isinstance(value, str): + unit_references.append((len(buf), 1, value)) + buf.append(0) + else: + buf.extend(value.to_bytes(1, byteorder)) + elif attrib.form == DW_FORM.ref2: + if isinstance(value, str): + unit_references.append((len(buf), 2, value)) + buf.extend(bytes(2)) + else: + buf.extend(value.to_bytes(2, byteorder)) elif attrib.form == DW_FORM.ref4: - relocations.append((len(buf), value)) - buf.extend(b"\0\0\0\0") + if isinstance(value, str): + unit_references.append((len(buf), 4, value)) + buf.extend(bytes(4)) + else: + buf.extend(value.to_bytes(4, byteorder)) + elif attrib.form == DW_FORM.ref8: + if isinstance(value, str): + unit_references.append((len(buf), 8, value)) + buf.extend(bytes(8)) + else: + buf.extend(value.to_bytes(8, byteorder)) + elif attrib.form == DW_FORM.ref_udata: + if isinstance(value, str): + assert ( + value in labels + ), "DW_FORM_ref_udata can only be used for backreferences" + _append_uleb128(buf, labels[value] - unit_offset) + else: + _append_uleb128(buf, value) + elif attrib.form == DW_FORM.ref_addr: + if isinstance(value, str): + references.append((len(buf), offset_size, value)) + buf.extend(bytes(offset_size)) + else: + buf.extend(value.to_bytes(offset_size, byteorder)) elif attrib.form == DW_FORM.ref_sig8: buf.extend(value.to_bytes(8, byteorder)) elif attrib.form == DW_FORM.sec_offset: - buf.extend(b"\0\0\0\0") + buf.extend(bytes(offset_size)) elif attrib.form == DW_FORM.flag_present: pass elif attrib.form == DW_FORM.exprloc: @@ -139,14 +175,13 @@ def aux(buf, die, depth): debug_info = bytearray() debug_types = bytearray() for unit in units: - labels.clear() - relocations.clear() + unit_references.clear() decl_file = 1 if version == 4 and unit.type in (DW_UT.type, DW_UT.split_type): buf = debug_types else: buf = debug_info - orig_len = len(buf) + unit_offset = len(buf) buf.extend(b"\0\0\0\0") # unit_length buf.extend(version.to_bytes(2, byteorder)) # version if version >= 5: @@ -162,21 +197,27 @@ def aux(buf, die, depth): assert unit.dwo_id is None if unit.type in (DW_UT.type, DW_UT.split_type): buf.extend(unit.type_signature.to_bytes(8, byteorder)) # type_signature - relocations.append((len(buf), unit.type_offset)) - buf.extend(b"\0\0\0\0") # type_offset + unit_references.append((len(buf), offset_size, unit.type_offset)) + buf.extend(bytes(offset_size)) # type_offset else: assert unit.type_signature is None assert unit.type_offset is None + if unit.die_label is not None: + aux(buf, DwarfLabel(unit.die_label), 0) aux(buf, unit.die, 0) - unit_length = len(buf) - orig_len - 4 - buf[orig_len : orig_len + 4] = unit_length.to_bytes(4, byteorder) + unit_length = len(buf) - unit_offset - 4 + buf[unit_offset : unit_offset + 4] = unit_length.to_bytes(4, byteorder) - for offset, label in relocations: - die_offset = labels[label] - orig_len - buf[offset : offset + 4] = die_offset.to_bytes(4, byteorder) - return debug_info, debug_types + for offset, size, label in unit_references: + die_offset = labels[label] - unit_offset + buf[offset : offset + size] = die_offset.to_bytes(size, byteorder) + + for offset, size, label in references: + buf[offset : offset + size] = labels[label].to_bytes(size, byteorder) + + return debug_info, debug_types, labels def _compile_debug_line(units, little_endian, bits, version): @@ -295,19 +336,27 @@ def collect_file_names(die): return buf -_UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit}) +_UNIT_TAGS = frozenset({DW_TAG.type_unit, DW_TAG.compile_unit, DW_TAG.partial_unit}) + +class DwarfResult(NamedTuple): + data: bytes + labels: Dict[str, int] -def dwarf_sections( + +def compile_dwarf( units_or_dies, - little_endian=True, - bits=64, *, version=4, lang=None, use_dw_form_indirect=False, compress=None, split=None, + sections=(), + little_endian=True, + bits=64, + allow_any_unit_die=False, + **kwargs, ): assert compress in (None, "zlib-gnu", "zlib-gabi") assert split in (None, "dwo") @@ -326,7 +375,7 @@ def dwarf_sections( DwarfUnit(DW_UT.compile, DwarfDie(DW_TAG.compile_unit, (), units_or_dies)), ) assert all(isinstance(unit, DwarfUnit) for unit in units) - assert all(unit.die.tag in _UNIT_TAGS for unit in units) + assert allow_any_unit_die or all(unit.die.tag in _UNIT_TAGS for unit in units) unit_attribs = [] if lang is not None: @@ -346,7 +395,7 @@ def dwarf_sections( if not split: debug_line = _compile_debug_line(units, little_endian, bits, version) - debug_info, debug_types = _compile_debug_info( + debug_info, debug_types, labels = _compile_debug_info( units, little_endian, bits, version, use_dw_form_indirect ) @@ -368,7 +417,7 @@ def debug_section(name, data): ) return name - sections = [ + dwarf_sections = [ debug_section( ".debug_abbrev", _compile_debug_abbrev(units, use_dw_form_indirect) ), @@ -376,41 +425,21 @@ def debug_section(name, data): debug_section(".debug_str", b"\0"), ] if not split: - sections.append(debug_section(".debug_line", debug_line)) + dwarf_sections.append(debug_section(".debug_line", debug_line)) if debug_types: - sections.append(debug_section(".debug_types", debug_types)) - return sections + dwarf_sections.append(debug_section(".debug_types", debug_types)) + + return DwarfResult( + data=create_elf_file( + ET.EXEC, + sections=[*sections, *dwarf_sections], + little_endian=little_endian, + bits=bits, + **kwargs, + ), + labels=labels, + ) -def compile_dwarf( - dies, - little_endian=True, - bits=64, - *, - version=4, - lang=None, - use_dw_form_indirect=False, - compress=None, - split=None, - sections=(), - build_id=None, -): - return create_elf_file( - ET.EXEC, - sections=[ - *sections, - *dwarf_sections( - dies, - little_endian=little_endian, - bits=bits, - version=version, - lang=lang, - use_dw_form_indirect=use_dw_form_indirect, - compress=compress, - split=split, - ), - ], - build_id=build_id, - little_endian=little_endian, - bits=bits, - ) +def create_dwarf_file(*args, **kwargs): + return compile_dwarf(*args, **kwargs).data diff --git a/tests/elfwriter.py b/tests/elfwriter.py index 430432aac..dfb03c9e1 100644 --- a/tests/elfwriter.py +++ b/tests/elfwriter.py @@ -1,8 +1,9 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import os import struct -from typing import List, NamedTuple, Optional, Sequence +from typing import List, NamedTuple, Optional, Sequence, Tuple, Union import zlib from _drgn_util.elf import ET, PT, SHF, SHN, SHT, STB, STT, STV @@ -59,10 +60,19 @@ def st_info(self) -> int: def _create_symtab( sections: List[ElfSection], symbols: Sequence[ElfSymbol], + *, + dynamic: bool = False, little_endian: bool, bits: int, ): - assert not any(section.name in (".symtab", ".strtab") for section in sections) + symtab_name = ".dynsym" if dynamic else ".symtab" + strtab_name = ".dynstr" if dynamic else ".strtab" + assert not any(section.name in (symtab_name, strtab_name) for section in sections) + + # An empty symbol name is a placeholder for the implicit 0-index entry in + # the symbol table. It's used to create a valid, but empty symbol table. + if symbols and symbols[0].name == "": + symbols = symbols[1:] endian = "<" if little_endian else ">" if bits == 64: @@ -104,15 +114,15 @@ def symbol_fields(sym: ElfSymbol): sections.append( ElfSection( - name=".symtab", - sh_type=SHT.SYMTAB, + name=symtab_name, + sh_type=SHT.DYNSYM if dynamic else SHT.SYMTAB, data=symtab_data, sh_link=sum((1 for section in sections if section.name is not None), 2), sh_info=sh_info, sh_entsize=symbol_struct.size, ) ) - sections.append(ElfSection(name=".strtab", sh_type=SHT.STRTAB, data=strtab_data)) + sections.append(ElfSection(name=strtab_name, sh_type=SHT.STRTAB, data=strtab_data)) def create_elf_file( @@ -120,7 +130,14 @@ def create_elf_file( sections: Sequence[ElfSection] = (), symbols: Sequence[ElfSymbol] = (), *, + dynamic_symbols: Sequence[ElfSymbol] = (), build_id: Optional[bytes] = None, + gnu_debuglink: Optional[ + Tuple[Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"], int] + ] = None, + gnu_debugaltlink: Optional[ + Tuple[Union[str, bytes, "os.PathLike[str]", "os.PathLike[bytes]"], bytes] + ] = None, little_endian: bool = True, bits: int = 64, ): @@ -141,6 +158,14 @@ def create_elf_file( nhdr_struct = struct.Struct(endian + "3I") sections = list(sections) + if dynamic_symbols: + _create_symtab( + sections, + dynamic_symbols, + dynamic=True, + little_endian=little_endian, + bits=bits, + ) if symbols: _create_symtab(sections, symbols, little_endian=little_endian, bits=bits) if build_id is not None: @@ -157,6 +182,32 @@ def create_elf_file( sections.append( ElfSection(name=".note.gnu.build-id", sh_type=SHT.NOTE, data=build_id_note) ) + + if gnu_debuglink is not None: + gnu_debuglink_path, gnu_debuglink_crc = gnu_debuglink + gnu_debuglink_path = os.fsencode(gnu_debuglink_path) + sections.append( + ElfSection( + name=".gnu_debuglink", + sh_type=SHT.PROGBITS, + data=gnu_debuglink_path + + bytes(4 - len(gnu_debuglink_path) % 4) + + gnu_debuglink_crc.to_bytes(4, "little"), + ) + ) + + if gnu_debugaltlink is not None: + gnu_debugaltlink_path, gnu_debugaltlink_build_id = gnu_debugaltlink + sections.append( + ElfSection( + name=".gnu_debugaltlink", + sh_type=SHT.PROGBITS, + data=os.fsencode(gnu_debugaltlink_path) + + b"\0" + + gnu_debugaltlink_build_id, + ) + ) + shnum = 0 phnum = 0 shstrtab = bytearray(1) diff --git a/tests/helpers/common/test_format.py b/tests/helpers/common/test_format.py index d57421531..d6b9c6f51 100644 --- a/tests/helpers/common/test_format.py +++ b/tests/helpers/common/test_format.py @@ -1,11 +1,15 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import io + from drgn import Program, TypeEnumerator from drgn.helpers.common.format import ( + CellFormat, decode_enum_type_flags, decode_flags, number_in_binary_units, + print_table, ) from tests import MOCK_PLATFORM, TestCase @@ -124,3 +128,46 @@ def test_precision(self): def test_huge(self): self.assertEqual(number_in_binary_units(1024**8 * 1.5), "1.5Y") self.assertEqual(number_in_binary_units(1024**10), "1048576Y") + + +class TestPrintTable(TestCase): + def assert_print_table(self, rows, expected, **kwargs): + f = io.StringIO() + print_table(rows, file=f) + self.assertEqual(f.getvalue(), expected) + + def test_empty(self): + self.assert_print_table([], "") + + def test_one_row(self): + self.assert_print_table([["abc", "de", "fghi"]], "abc de fghi\n") + + def test_align(self): + self.assert_print_table( + [[2, 2000, 4], [13, 3, 19]], + """\ + 2 2000 4 +13 3 19 +""", + ) + + def test_empty_cell(self): + self.assert_print_table( + [[2, 2000, 4], ["", 3, 13, 19]], + """\ +2 2000 4 + 3 13 19 +""", + ) + + def test_cell_format(self): + self.assert_print_table( + [ + ["DECIMAL", "HEXADECIMAL"], + [CellFormat(10, "<"), CellFormat(10, "= 0 except ImportError: have_pyroute2 = False -@unittest.skipUnless(have_pyroute2, "pyroute2 not found") +@unittest.skipUnless(have_pyroute2, "pyroute2 >= 0.6.10 not found") class TestTc(LinuxKernelTestCase): @classmethod - @classCleanups def setUpClass(cls): super().setUpClass() cls.ns = None @@ -33,7 +34,7 @@ def setUpClass(cls): cls.name = "".join( random.choice(string.ascii_letters) for _ in range(16) ) - cls.ns = NetNS(cls.name, flags=os.O_CREAT | os.O_EXCL) + cls.ns = pyroute2.NetNS(cls.name, flags=os.O_CREAT | os.O_EXCL) except FileExistsError: pass cls.addClassCleanup(cls.ns.remove) @@ -41,7 +42,7 @@ def setUpClass(cls): def test_qdisc_lookup(self): try: self.ns.link("add", ifname="dummy0", kind="dummy") - except NetlinkError: + except pyroute2.NetlinkError: self.skipTest("kernel does not support dummy interface (CONFIG_DUMMY)") dummy = self.ns.link_lookup(ifname="dummy0")[0] @@ -57,7 +58,7 @@ def test_qdisc_lookup(self): bands=3, priomap=[1, 2, 2, 2, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1], ) - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support Multi Band Priority Queueing (CONFIG_NET_SCH_PRIO)" @@ -65,7 +66,7 @@ def test_qdisc_lookup(self): # tc qdisc add dev dummy0 parent 1:1 handle 10: sfq try: self.ns.tc("add", kind="sfq", index=dummy, parent="1:1", handle="10:") - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support Stochastic Fairness Queueing (CONFIG_NET_SCH_SFQ)" @@ -82,7 +83,7 @@ def test_qdisc_lookup(self): burst=1600, limit=3000, ) - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support Token Bucket Filter (CONFIG_NET_SCH_TBF)" @@ -92,7 +93,7 @@ def test_qdisc_lookup(self): # tc qdisc add dev dummy0 ingress try: self.ns.tc("add", kind="ingress", index=dummy) - except NetlinkError: + except pyroute2.NetlinkError: self.ns.link("delete", ifname="dummy0") self.skipTest( "kernel does not support ingress Qdisc (CONFIG_NET_SCH_INGRESS)" diff --git a/tests/linux_kernel/helpers/test_timekeeping.py b/tests/linux_kernel/helpers/test_timekeeping.py new file mode 100644 index 000000000..2a83ecd36 --- /dev/null +++ b/tests/linux_kernel/helpers/test_timekeeping.py @@ -0,0 +1,128 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from pathlib import Path +import time + +from drgn import cast +from drgn.helpers.linux.timekeeping import ( + ktime_get_boottime_seconds, + ktime_get_clocktai_seconds, + ktime_get_coarse_boottime_ns, + ktime_get_coarse_clocktai_ns, + ktime_get_coarse_ns, + ktime_get_coarse_real_ns, + ktime_get_real_seconds, + ktime_get_seconds, + uptime, + uptime_pretty, +) +from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod + +# These aren't available in the time module as of Python 3.14. +CLOCK_REALTIME_COARSE = getattr(time, "CLOCK_REALTIME_COARSE", 5) +CLOCK_MONOTONIC_COARSE = getattr(time, "CLOCK_MONOTONIC_COARSE", 6) + + +class TestTimekeeping(LinuxKernelTestCase): + def assert_in_range(self, a, b, c): + self.assertTrue(a <= b <= c, f"{b} is not in range [{a}, {c}]") + + def test_ktime_get_seconds(self): + t1 = int(time.clock_gettime(CLOCK_MONOTONIC_COARSE)) + t2 = ktime_get_seconds(self.prog) + t3 = int(time.clock_gettime(CLOCK_MONOTONIC_COARSE)) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("time64_t", t2)) + + def test_ktime_get_coarse_ns(self): + t1 = time.clock_gettime_ns(CLOCK_MONOTONIC_COARSE) + t2 = ktime_get_coarse_ns(self.prog) + t3 = time.clock_gettime_ns(CLOCK_MONOTONIC_COARSE) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("u64", t2)) + + def test_ktime_get_real_seconds(self): + t1 = int(time.clock_gettime(CLOCK_REALTIME_COARSE)) + t2 = ktime_get_real_seconds(self.prog) + t3 = int(time.clock_gettime(CLOCK_REALTIME_COARSE)) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("time64_t", t2)) + + def test_ktime_get_coarse_real_ns(self): + t1 = time.clock_gettime_ns(CLOCK_REALTIME_COARSE) + t2 = ktime_get_coarse_real_ns(self.prog) + t3 = time.clock_gettime_ns(CLOCK_REALTIME_COARSE) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("u64", t2)) + + @skip_unless_have_test_kmod + def test_ktime_get_boottime_seconds(self): + # There is no CLOCK_BOOTTIME_COARSE, so the test module exposes it in + # sysfs. + path = Path("/sys/kernel/drgn_test/boottime_seconds") + + t1 = int(path.read_text()) + t2 = ktime_get_boottime_seconds(self.prog) + t3 = int(path.read_text()) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("time64_t", t2)) + + @skip_unless_have_test_kmod + def test_ktime_get_coarse_boottime_ns(self): + # There is no CLOCK_BOOTTIME_COARSE, so the test module exposes it in + # sysfs. + path = Path("/sys/kernel/drgn_test/coarse_boottime_ns") + + t1 = int(path.read_text()) + t2 = ktime_get_coarse_boottime_ns(self.prog) + t3 = int(path.read_text()) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("u64", t2)) + + @skip_unless_have_test_kmod + def test_ktime_get_clocktai_seconds(self): + # There is no CLOCK_TAI_COARSE, so the test module exposes it in sysfs. + path = Path("/sys/kernel/drgn_test/clocktai_seconds") + + t1 = int(path.read_text()) + t2 = ktime_get_clocktai_seconds(self.prog) + t3 = int(path.read_text()) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("time64_t", t2)) + + @skip_unless_have_test_kmod + def test_ktime_get_coarse_clocktai_ns(self): + # There is no CLOCK_TAI_COARSE, so the test module exposes it in sysfs. + path = Path("/sys/kernel/drgn_test/coarse_clocktai_ns") + + t1 = int(path.read_text()) + t2 = ktime_get_coarse_clocktai_ns(self.prog) + t3 = int(path.read_text()) + + self.assert_in_range(t1, t2.value_(), t3) + self.assertIdentical(t2, cast("u64", t2)) + + @skip_unless_have_test_kmod + def test_uptime(self): + # There is no CLOCK_BOOTTIME_COARSE, so the test module exposes it in + # sysfs. + path = Path("/sys/kernel/drgn_test/coarse_boottime_ns") + + t1 = int(path.read_text()) + t2 = uptime(self.prog) + t3 = int(path.read_text()) + + self.assert_in_range(t1 / 1e9, t2, t3 / 1e9) + self.assertIsInstance(t2, float) + + def test_uptime_pretty(self): + # Just test that it succeeds. + uptime_pretty(self.prog) diff --git a/tests/linux_kernel/kmod/drgn_test.c b/tests/linux_kernel/kmod/drgn_test.c index d417b44a3..8ce5b5ac3 100644 --- a/tests/linux_kernel/kmod/drgn_test.c +++ b/tests/linux_kernel/kmod/drgn_test.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -36,6 +37,8 @@ #ifdef CONFIG_STACKDEPOT #include #endif +#include +#include #include #include #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 20, 0) @@ -45,6 +48,66 @@ #define HAVE_XARRAY 0 #endif +// Page pools were added in Linux kernel commit ff7d6b27f894 ("page_pool: +// refurbish version of page_pool code") (in v4.18) and may not be enabled. +#ifdef CONFIG_PAGE_POOL +#define HAVE_PAGE_POOL 1 +// The header file was moved in Linux kernel commit a9ca9f9ceff3 ("page_pool: +// split types and declarations from page_pool.h") (in v6.6). +#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 6, 0) +#include +#else +#include +#endif +#else +#define HAVE_PAGE_POOL 0 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0) +// These were added in b9ff604cff11 ("timekeeping: Add +// ktime_get_coarse_with_offset") (in v4.18-rc1). +static inline ktime_t ktime_get_coarse_boottime(void) +{ + struct timespec64 ts = get_monotonic_coarse64(); + + return ktime_mono_to_any(timespec64_to_ktime(ts), TK_OFFS_BOOT); +} + +static inline ktime_t ktime_get_coarse_clocktai(void) +{ + struct timespec64 ts = get_monotonic_coarse64(); + + return ktime_mono_to_any(timespec64_to_ktime(ts), TK_OFFS_TAI); +} + +// These were added in Linux kernel commit 06aa376903b6 ("timekeeping: Add more +// coarse clocktai/boottime interfaces") (in v4.18). +static inline time64_t ktime_get_boottime_seconds(void) +{ + return ktime_divns(ktime_get_coarse_boottime(), NSEC_PER_SEC); +} + +static inline time64_t ktime_get_clocktai_seconds(void) +{ + return ktime_divns(ktime_get_coarse_clocktai(), NSEC_PER_SEC); +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 3, 0) +// These were added in 4c54294d01e6 ("timekeeping: Add missing _ns functions for +// coarse accessors") (in v5.3). +static inline u64 ktime_get_coarse_boottime_ns(void) +{ + return ktime_to_ns(ktime_get_coarse_boottime()); +} + +static inline u64 ktime_get_coarse_clocktai_ns(void) +{ + return ktime_to_ns(ktime_get_coarse_clocktai()); +} +#endif + + // Convert a 4-character string to a seed for drgn_test_prng32(). static inline u32 drgn_test_prng32_seed(const char *s) { @@ -473,6 +536,56 @@ static void drgn_test_net_exit(void) dev_put(drgn_test_netdev); } +// page_pool + +const int drgn_test_have_page_pool = HAVE_PAGE_POOL; + +#if HAVE_PAGE_POOL +struct page_pool *drgn_test_page_pool; +struct page *drgn_test_page_pool_page; +#endif + +static int drgn_test_page_pool_init(void) +{ +#if HAVE_PAGE_POOL + struct page_pool_params params = { + .order = 0, + .flags = 0, + .pool_size = 1, + .nid = NUMA_NO_NODE, + }; + struct page_pool *pool; + + pool = page_pool_create(¶ms); + if (IS_ERR(pool)) + return PTR_ERR(pool); + drgn_test_page_pool = pool; + + drgn_test_page_pool_page = page_pool_alloc_pages(pool, GFP_KERNEL); + if (!drgn_test_page_pool_page) + return -ENOMEM; +#endif + return 0; +} + +static void drgn_test_page_pool_exit(void) +{ +#if HAVE_PAGE_POOL + if (drgn_test_page_pool_page) { + // page_pool_put_page() changed in Linux kernel commit + // 458de8a97f10 ("net: page_pool: API cleanup and comments") (in + // v5.7). + page_pool_put_page(drgn_test_page_pool, + drgn_test_page_pool_page, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 7, 0) + 0, +#endif + true); + } + page_pool_destroy(drgn_test_page_pool); +#endif +} + // percpu DEFINE_PER_CPU(u32, drgn_test_percpu_static); @@ -968,7 +1081,8 @@ static void drgn_test_stack_trace_exit(void) static int drgn_test_stack_trace_init(void) { - drgn_test_kthread = kthread_create(drgn_test_kthread_fn, NULL, + drgn_test_kthread = kthread_create(drgn_test_kthread_fn, + (void *)0xb0ba000, "drgn_test_kthread"); if (!drgn_test_kthread) return -1; @@ -1228,6 +1342,9 @@ int drgn_test_function(int x) return x + 1; } +char drgn_test_data[] = "abc"; +const char drgn_test_rodata[] = "def"; + // kmodify #ifdef __x86_64__ @@ -1385,13 +1502,130 @@ DEFINE_KMODIFY_TEST_ARGS( ) #endif +#ifdef CONFIG_SYSFS + +// Crash from an IRQ handler on architectures where drgn supports unwinding +// through IRQ handlers. +#ifdef __x86_64__ +#define DRGN_TEST_IRQ_CRASH +#endif + +static __noreturn noinline_for_stack void drgn_test_crash_func(struct irq_work *work) +{ + panic("drgn_test\n"); +} + +#ifdef DRGN_TEST_IRQ_CRASH +static DEFINE_IRQ_WORK(drgn_test_crash_irq_work, drgn_test_crash_func); +#endif + +static ssize_t drgn_test_crash_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int ret, val; + + ret = kstrtoint(buf, 0, &val); + if (ret < 0) + return ret; + if (val != 1) + return -EINVAL; + +#ifdef DRGN_TEST_IRQ_CRASH + preempt_disable(); + irq_work_queue(&drgn_test_crash_irq_work); + // Spin until we get interrupted and crash. + while (1); +#else + drgn_test_crash_func(NULL); +#endif +} + +static struct kobj_attribute drgn_test_crash_attr = + __ATTR(crash, 0200, NULL, drgn_test_crash_store); + +static ssize_t drgn_test_boottime_seconds_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lld\n", ktime_get_boottime_seconds()); +} + +static struct kobj_attribute drgn_test_boottime_seconds_attr = + __ATTR(boottime_seconds, 0444, drgn_test_boottime_seconds_show, NULL); + +static ssize_t drgn_test_coarse_boottime_ns_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%llu\n", ktime_get_coarse_boottime_ns()); +} + +static struct kobj_attribute drgn_test_coarse_boottime_ns_attr = + __ATTR(coarse_boottime_ns, 0444, drgn_test_coarse_boottime_ns_show, + NULL); + +static ssize_t drgn_test_clocktai_seconds_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%lld\n", ktime_get_clocktai_seconds()); +} + +static struct kobj_attribute drgn_test_clocktai_seconds_attr = + __ATTR(clocktai_seconds, 0444, drgn_test_clocktai_seconds_show, NULL); + +static ssize_t drgn_test_coarse_clocktai_ns_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%llu\n", ktime_get_coarse_clocktai_ns()); +} + +static struct kobj_attribute drgn_test_coarse_clocktai_ns_attr = + __ATTR(coarse_clocktai_ns, 0444, drgn_test_coarse_clocktai_ns_show, + NULL); + +static struct attribute_group drgn_test_attr_group = { + .attrs = (struct attribute *[]){ + &drgn_test_crash_attr.attr, + &drgn_test_boottime_seconds_attr.attr, + &drgn_test_coarse_boottime_ns_attr.attr, + &drgn_test_clocktai_seconds_attr.attr, + &drgn_test_coarse_clocktai_ns_attr.attr, + NULL, + }, +}; + +static struct kobject *drgn_test_kobj; + +static int __init drgn_test_sysfs_init(void) +{ + drgn_test_kobj = kobject_create_and_add("drgn_test", kernel_kobj); + if (!drgn_test_kobj) + return -ENOMEM; + + return sysfs_create_group(drgn_test_kobj, &drgn_test_attr_group); +} + +static void drgn_test_sysfs_exit(void) +{ + kobject_put(drgn_test_kobj); +} +#else +static inline int drgn_test_sysfs_init(void) { return 0; } +static inline void drgn_test_sysfs_exit(void) {} +#endif + static void drgn_test_exit(void) { + drgn_test_sysfs_exit(); drgn_test_slab_exit(); drgn_test_percpu_exit(); drgn_test_maple_tree_exit(); drgn_test_mm_exit(); drgn_test_net_exit(); + drgn_test_page_pool_exit(); drgn_test_stack_trace_exit(); drgn_test_radix_tree_exit(); drgn_test_xarray_exit(); @@ -1413,6 +1647,9 @@ static int __init drgn_test_init(void) if (ret) goto out; ret = drgn_test_net_init(); + if (ret) + goto out; + ret = drgn_test_page_pool_init(); if (ret) goto out; ret = drgn_test_percpu_init(); @@ -1436,6 +1673,9 @@ static int __init drgn_test_init(void) if (ret) goto out; ret = drgn_test_idr_init(); + if (ret) + goto out; + ret = drgn_test_sysfs_init(); out: if (ret) drgn_test_exit(); diff --git a/tests/linux_kernel/test_debug_info.py b/tests/linux_kernel/test_debug_info.py index 75ccabf50..6dd1d37cd 100644 --- a/tests/linux_kernel/test_debug_info.py +++ b/tests/linux_kernel/test_debug_info.py @@ -3,48 +3,146 @@ import os from pathlib import Path -import unittest +import tempfile -from drgn import Program +from drgn import ( + DebugInfoOptions, + KmodSearchMethod, + MainModule, + Program, + RelocatableModule, +) +from drgn.helpers.linux.module import find_module from tests import modifyenv from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod -KALLSYMS_PATH = Path("/proc/kallsyms") +def iter_proc_modules(): + try: + f = open("/proc/modules", "r") + except FileNotFoundError: + return + with f: + for line in f: + tokens = line.split() + yield tokens[0], int(tokens[5], 16) -@unittest.skipUnless( - KALLSYMS_PATH.exists(), "kernel does not have kallsyms (CONFIG_KALLSYMS)" -) -@skip_unless_have_test_kmod -class TestModuleDebugInfo(LinuxKernelTestCase): - # Arbitrary symbol that we can use to check that the module debug info was - # loaded. - SYMBOL = "drgn_test_function" - - def setUp(self): - super().setUp() - with KALLSYMS_PATH.open() as f: - for line in f: - tokens = line.split() - if tokens[2] == self.SYMBOL: - self.symbol_address = int(tokens[0], 16) + +class TestDebugInfo(LinuxKernelTestCase): + def test_debug_info(self): + # This is actually two test cases squished into one to avoid indexing + # vmlinux another time. + prog = Program() + prog.set_kernel() + prog.set_enabled_debug_info_finders([]) + + with self.subTest("vmlinux_no_build_id"): + for module, _ in prog.loaded_modules(): + if isinstance(module, MainModule): + module.build_id = None break else: - self.fail(f"{self.SYMBOL!r} symbol not found") + self.fail("main module not found") + prog.load_debug_info([self.prog.main_module().debug_file_path]) + self.assertEqual( + prog.main_module().debug_file_path, + self.prog.main_module().debug_file_path, + ) + + with self.subTest("kmod_walk"), tempfile.TemporaryDirectory() as temp_dir: + temp_dir = Path(temp_dir) + found_modules = set() + for i, module in enumerate(self.prog.modules()): + if isinstance(module, RelocatableModule) and module.debug_file_path: + found_modules.add(module.name) + link = temp_dir / str(i) / (module.name + ".ko") + link.parent.mkdir() + link.symlink_to(module.debug_file_path) - def _test_module_debug_info(self, use_sys_module): - old_use_sys_module = int(os.environ.get("DRGN_USE_SYS_MODULE", "1")) != 0 - with modifyenv({"DRGN_USE_SYS_MODULE": "1" if use_sys_module else "0"}): - if old_use_sys_module == use_sys_module: - prog = self.prog + modules = [ + module + for module, _ in prog.loaded_modules() + if module.name in found_modules + ] + prog.find_standard_debug_info( + modules, + options=DebugInfoOptions( + kernel_directories=(temp_dir,), try_kmod=KmodSearchMethod.WALK + ), + ) + for module in modules: + with self.subTest(module=module.name): + self.assertIsNotNone(module.debug_file_path) + + +class TestModule(LinuxKernelTestCase): + def test_loaded_modules(self): + expected = [("kernel", None), *iter_proc_modules()] + + loaded_modules = [] + for module, _ in self.prog.loaded_modules(): + if isinstance(module, RelocatableModule): + loaded_modules.append((module.name, module.address)) else: - prog = Program() - prog.set_kernel() - self._load_debug_info(prog) - self.assertEqual(prog.symbol(self.SYMBOL).address, self.symbol_address) + loaded_modules.append((module.name, None)) + + self.assertCountEqual(loaded_modules, expected) - def test_module_debug_info_use_proc_and_sys(self): - self._test_module_debug_info(True) + @skip_unless_have_test_kmod + def test_find(self): + self.assertEqual(self.prog.main_module().name, "kernel") + for name, address in iter_proc_modules(): + if name == "drgn_test": + self.assertEqual( + self.prog.relocatable_module(name, address).name, "drgn_test" + ) + break + else: + self.fail("test module not found") + + @skip_unless_have_test_kmod + def test_find_by_obj(self): + for module in self.prog.modules(): + if module.name == "drgn_test": + break + else: + self.fail("test module not found") + + module_obj = find_module(self.prog, "drgn_test") + self.assertEqual(self.prog.linux_kernel_loadable_module(module_obj), module) + self.assertEqual( + self.prog.linux_kernel_loadable_module(module_obj, create=True), module + ) + + def test_no_sys_module(self): + # Test that we get the same modules with and without using /sys/module. + + def module_dict(prog): + return { + (module.name, module.address): ( + ( + None + if module.address_ranges is None + else sorted(module.address_ranges) + ), + module.build_id, + dict(module.section_addresses), + ) + for module, _ in prog.loaded_modules() + if isinstance(module, RelocatableModule) + } + + use_sys_module = int(os.environ.get("DRGN_USE_SYS_MODULE", "1")) != 0 + + with modifyenv({"DRGN_USE_SYS_MODULE": str(int(not use_sys_module))}): + prog = Program() + prog.set_kernel() + + if use_sys_module: + with_sys_module = module_dict(self.prog) + without_sys_module = module_dict(prog) + else: + with_sys_module = module_dict(prog) + without_sys_module = module_dict(self.prog) - def test_module_debug_info_use_core_dump(self): - self._test_module_debug_info(False) + self.assertEqual(with_sys_module, without_sys_module) diff --git a/tests/linux_kernel/test_stack_trace.py b/tests/linux_kernel/test_stack_trace.py index d414306a9..870c75d66 100644 --- a/tests/linux_kernel/test_stack_trace.py +++ b/tests/linux_kernel/test_stack_trace.py @@ -1,12 +1,15 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import logging import os +import re import unittest from _drgn_util.platform import NORMALIZED_MACHINE_NAME -from drgn import Object, Program, reinterpret -from tests import assertReprPrettyEqualsStr, modifyenv +from drgn import Object, Program, TypeMember, reinterpret +from drgn.helpers.linux import load_module_kallsyms, load_vmlinux_kallsyms +from tests import assertReprPrettyEqualsStr, drgn_log_level, modifyenv from tests.linux_kernel import ( LinuxKernelTestCase, fork_and_stop, @@ -59,6 +62,60 @@ def test_by_pid_dwarf(self): def test_by_pid_orc(self): self._test_by_pid(True) + def _check_logged_orc_message(self, captured_logs, module): + # To be sure that we actually used ORC to unwind through the drgn_test + # stack frames, search for the log output. We don't know which ORC + # version is used, so just ensure that we have a log line that mentions + # loading ORC. + expr = re.compile( + r"DEBUG:drgn:Loaded built-in ORC \(v\d+\) for module " + module + ) + for line in captured_logs.output: + if expr.fullmatch(line): + break + else: + self.fail(f"Did not load built-in ORC for {module}") + + @unittest.skipUnless( + NORMALIZED_MACHINE_NAME == "x86_64", + f"{NORMALIZED_MACHINE_NAME} does not use ORC", + ) + @skip_unless_have_test_kmod + def test_by_pid_builtin_orc(self): + # ORC was introduced in kernel 4.14. Detect the presence of ORC or skip + # the test. + try: + self.prog.symbol("__start_orc_unwind") + except LookupError: + ver = self.prog["UTS_RELEASE"].string_().decode() + self.skipTest(f"ORC is not available for {ver}") + + with drgn_log_level(logging.DEBUG): + # Create a program with the core kernel debuginfo loaded, + # but without module debuginfo. Load a symbol finder using + # kallsyms so that the module's stack traces can still have + # usable frame names. + prog = Program() + prog.set_kernel() + prog.load_debug_info(main=True) + # Now that vmlinux is loaded, enumerate all the kernel modules so + # that a drgn_module is created to hold the ORC data + prog.create_loaded_modules() + kallsyms = load_module_kallsyms(prog) + prog.register_symbol_finder("module_kallsyms", kallsyms, enable_index=1) + for thread in prog.threads(): + if b"drgn_test_kthread".startswith(thread.object.comm.string_()): + pid = thread.tid + break + else: + self.fail("couldn't find drgn_test_kthread") + # We must set drgn's log level manually, beacuse it won't log messages + # to the logger if it isn't enabled for them. + with self.assertLogs("drgn", logging.DEBUG) as log: + self._test_drgn_test_kthread_trace(prog.stack_trace(pid)) + + self._check_logged_orc_message(log, "drgn_test") + @skip_unless_have_test_kmod def test_by_pt_regs(self): pt_regs = self.prog["drgn_test_kthread_pt_regs"] @@ -104,6 +161,84 @@ def test_locals(self): else: self.fail("Couldn't find drgn_test_kthread_fn3 frame") + @unittest.skipUnless( + NORMALIZED_MACHINE_NAME == "x86_64", + f"{NORMALIZED_MACHINE_NAME} does not use ORC", + ) + def test_vmlinux_builtin_orc(self): + # ORC was introduced in kernel 4.14. Detect the presence of ORC or skip + # the test. + try: + self.prog.symbol("__start_orc_unwind") + except LookupError: + ver = self.prog["UTS_RELEASE"].string_().decode() + self.skipTest(f"ORC is not available for {ver}") + + with drgn_log_level(logging.DEBUG): + # It is difficult to test stack unwinding in a program without also + # loading types, which necessarily will also make DWARF CFI and ORC + # available in the debug file. The way we get around this is by creating + # a new program with no debuginfo, getting a pt_regs from the program + # that has debuginfo, and then using that to unwind the kernel. We still + # need a symbol finder, and we'll need the Module API to recognize the + # kernel address range correctly. + prog = Program() + prog.set_kernel() + prog.register_symbol_finder( + "vmlinux_kallsyms", load_vmlinux_kallsyms(prog), enable_index=0 + ) + main = prog.main_module(name="kernel", create=True) + main.address_range = self.prog.main_module().address_range + + # Luckily, all drgn cares about for x86_64 pt_regs is that it is a + # structure. Rather than creating a matching struct pt_regs definition, + # we can just create a dummy one of the correct size: + # struct pt_regs { unsigned char[size]; }; + # Drgn will happily use that and reinterpret the bytes correctly. + real_pt_regs_type = self.prog.type("struct pt_regs") + fake_pt_regs_type = prog.struct_type( + tag="pt_regs", + size=real_pt_regs_type.size, + members=[ + TypeMember( + prog.array_type( + prog.int_type("unsigned char", 1, False), + real_pt_regs_type.size, + ), + "data", + ), + ], + ) + + with fork_and_stop() as pid: + trace = self.prog.stack_trace(pid) + regs_dict = trace[0].registers() + pt_regs_obj = Object( + self.prog, + real_pt_regs_type, + { + "bp": regs_dict["rbp"], + "sp": regs_dict["rsp"], + "ip": regs_dict["rip"], + "r15": regs_dict["r15"], + }, + ) + fake_pt_regs_obj = Object.from_bytes_( + prog, fake_pt_regs_type, pt_regs_obj.to_bytes_() + ) + # We must set drgn's log level manually, beacuse it won't log messages + # to the logger if it isn't enabled for them. + with self.assertLogs("drgn", logging.DEBUG) as log: + no_debuginfo_trace = prog.stack_trace(fake_pt_regs_obj) + + dwarf_pcs = [] + for frame in trace: + if not dwarf_pcs or dwarf_pcs[-1] != frame.pc: + dwarf_pcs.append(frame.pc) + orc_pcs = [frame.pc for frame in no_debuginfo_trace] + self.assertEqual(dwarf_pcs, orc_pcs) + self._check_logged_orc_message(log, "kernel") + def test_registers(self): # Smoke test that we get at least one register and that # StackFrame.registers() agrees with StackFrame.register(). diff --git a/tests/linux_kernel/test_symbol.py b/tests/linux_kernel/test_symbol.py index 702e874af..b6724923b 100644 --- a/tests/linux_kernel/test_symbol.py +++ b/tests/linux_kernel/test_symbol.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: LGPL-2.1-or-later from drgn import SymbolBinding, SymbolKind -from tests.linux_kernel import LinuxKernelTestCase +from tests.linux_kernel import LinuxKernelTestCase, skip_unless_have_test_kmod class TestSymbol(LinuxKernelTestCase): @@ -11,3 +11,39 @@ def test_global_symbol(self): self.assertEqual(symbol.name, "jiffies") self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) self.assertEqual(symbol.kind, SymbolKind.OBJECT) + + @skip_unless_have_test_kmod + def test_module_function_symbol(self): + symbol = self.prog.symbol("drgn_test_function") + self.assertEqual(symbol.name, "drgn_test_function") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.FUNC) + + symbol = self.prog.symbol(symbol.address) + self.assertEqual(symbol.name, "drgn_test_function") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.FUNC) + + @skip_unless_have_test_kmod + def test_module_data_symbol(self): + symbol = self.prog.symbol("drgn_test_data") + self.assertEqual(symbol.name, "drgn_test_data") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.OBJECT) + + symbol = self.prog.symbol(symbol.address) + self.assertEqual(symbol.name, "drgn_test_data") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.OBJECT) + + @skip_unless_have_test_kmod + def test_module_rodata_symbol(self): + symbol = self.prog.symbol("drgn_test_rodata") + self.assertEqual(symbol.name, "drgn_test_rodata") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.OBJECT) + + symbol = self.prog.symbol(symbol.address) + self.assertEqual(symbol.name, "drgn_test_rodata") + self.assertEqual(symbol.binding, SymbolBinding.GLOBAL) + self.assertEqual(symbol.kind, SymbolKind.OBJECT) diff --git a/tests/linux_kernel/test_threads.py b/tests/linux_kernel/test_threads.py index f363403c7..aa778a42c 100644 --- a/tests/linux_kernel/test_threads.py +++ b/tests/linux_kernel/test_threads.py @@ -13,11 +13,8 @@ def test_threads(self): NUM_PROCS = 12 barrier = Barrier(NUM_PROCS + 1) - def proc_func(): - barrier.wait() - try: - procs = [Process(target=proc_func) for _ in range(NUM_PROCS)] + procs = [Process(target=barrier.wait) for _ in range(NUM_PROCS)] for proc in procs: proc.start() pids = {thread.tid for thread in self.prog.threads()} diff --git a/tests/linux_kernel/tools/test_fsrefs.py b/tests/linux_kernel/tools/test_fsrefs.py index db6260224..7d44333fe 100644 --- a/tests/linux_kernel/tools/test_fsrefs.py +++ b/tests/linux_kernel/tools/test_fsrefs.py @@ -16,6 +16,7 @@ import tempfile import unittest +from drgn import container_of from drgn.helpers.linux.fs import fget from drgn.helpers.linux.pid import find_task from tests.linux_kernel import ( @@ -231,6 +232,50 @@ def test_super_block_on_block_device(self): ), ) + @skip_unless_have_test_disk + def test_btrfs_subvolume(self): + disk = os.environ["DRGN_TEST_DISK"] + with contextlib.ExitStack() as exit_stack: + subprocess.check_call(["mkfs.btrfs", "-qf", disk]) + + mount(disk, self._tmp, "btrfs") + exit_stack.callback(umount, self._tmp) + + subvol = self._tmp / "subvol" + subprocess.check_call(["btrfs", "subvolume", "create", subvol]) + + top_file = self._tmp / "file" + top_fd = os.open(top_file, os.O_CREAT | os.O_WRONLY, 0o600) + exit_stack.callback(os.close, top_fd) + top_regex = rf"pid {os.getpid()} \(.*\) fd {top_fd} \(struct file \*\)0x[0-9a-f]+ {re.escape(str(top_file))}" + + subvol_file = subvol / "file" + subvol_fd = os.open(subvol_file, os.O_CREAT | os.O_WRONLY, 0o600) + exit_stack.callback(os.close, subvol_fd) + subvol_regex = rf"pid {os.getpid()} \(.*\) fd {subvol_fd} \(struct file \*\)0x[0-9a-f]+ {re.escape(str(subvol_file))}" + + subvol_output = self.run_and_capture( + "--check", "tasks", "--btrfs-subvolume", str(subvol) + ) + self.assertRegex(subvol_output, subvol_regex) + self.assertNotRegex(subvol_output, top_regex) + + top_root = container_of( + fget(find_task(self.prog, os.getpid()), top_fd).f_inode, + "struct btrfs_inode", + "vfs_inode", + ).root + top_output = self.run_and_capture( + "--check", "tasks", "--btrfs-subvolume-pointer", hex(top_root) + ) + self.assertRegex(top_output, top_regex) + self.assertNotRegex(top_output, subvol_regex) + + def test_not_btrfs(self): + with self.assertRaises(SystemExit) as cm: + main(self.prog, ["--check", "tasks", "--btrfs-subvolume", "/proc"]) + self.assertIn("not on Btrfs", cm.exception.code) + def test_binfmt_misc(self): for mnt in iter_mounts(): if mnt.fstype == "binfmt_misc": diff --git a/tests/linux_kernel/vmcore/test_vmcore.py b/tests/linux_kernel/vmcore/test_vmcore.py index 7ad7f75eb..595de1216 100644 --- a/tests/linux_kernel/vmcore/test_vmcore.py +++ b/tests/linux_kernel/vmcore/test_vmcore.py @@ -53,18 +53,37 @@ def test_crashed_thread(self): # why anyone would run these tests from kdump otherwise. self.assertEqual(crashed_thread.object.comm.string_(), b"selfdestruct") + def _test_crashed_thread_stack_trace(self, trace): + # This assumes that we crashed using the drgn_test kmod. Note that on + # supported architectures, drgn_test_crash_func() is called from an IRQ + # handler that interrupts drgn_test_crash_store(). + trace_iter = iter(trace) + for frame in trace_iter: + if frame.name == "drgn_test_crash_func": + break + else: + self.fail("drgn_test_crash_func frame not found") + + for frame in trace_iter: + if frame.name == "drgn_test_crash_store": + break + else: + self.fail( + "drgn_test_crash_store frame not found below drgn_test_crash_func" + ) + def test_crashed_thread_stack_trace(self): self._skip_if_cpu0_on_s390x() - self.assertIn("sysrq", str(self.prog.crashed_thread().stack_trace())) + self._test_crashed_thread_stack_trace(self.prog.crashed_thread().stack_trace()) def test_crashed_thread_stack_trace_by_tid(self): self._skip_if_cpu0_on_s390x() - self.assertIn( - "sysrq", str(self.prog.stack_trace(self.prog.crashed_thread().tid)) + self._test_crashed_thread_stack_trace( + self.prog.stack_trace(self.prog.crashed_thread().tid) ) def test_crashed_thread_stack_trace_by_task_struct(self): self._skip_if_cpu0_on_s390x() - self.assertIn( - "sysrq", str(self.prog.stack_trace(self.prog.crashed_thread().object)) + self._test_crashed_thread_stack_trace( + self.prog.stack_trace(self.prog.crashed_thread().object) ) diff --git a/tests/resources/crashme.alt.zst b/tests/resources/crashme.alt.zst new file mode 100644 index 000000000..1aab40796 Binary files /dev/null and b/tests/resources/crashme.alt.zst differ diff --git a/tests/resources/crashme.core.zst b/tests/resources/crashme.core.zst new file mode 100644 index 000000000..215fe38e8 Binary files /dev/null and b/tests/resources/crashme.core.zst differ diff --git a/tests/resources/crashme.dwz.zst b/tests/resources/crashme.dwz.zst new file mode 100755 index 000000000..79dd123fd Binary files /dev/null and b/tests/resources/crashme.dwz.zst differ diff --git a/tests/resources/crashme.so.dwz.zst b/tests/resources/crashme.so.dwz.zst new file mode 100755 index 000000000..df14e0fd9 Binary files /dev/null and b/tests/resources/crashme.so.dwz.zst differ diff --git a/tests/resources/crashme.so.zst b/tests/resources/crashme.so.zst new file mode 100755 index 000000000..a31a002ad Binary files /dev/null and b/tests/resources/crashme.so.zst differ diff --git a/tests/resources/crashme.zst b/tests/resources/crashme.zst new file mode 100755 index 000000000..670858840 Binary files /dev/null and b/tests/resources/crashme.zst differ diff --git a/tests/resources/crashme_pie.core.zst b/tests/resources/crashme_pie.core.zst new file mode 100644 index 000000000..a57ce2a64 Binary files /dev/null and b/tests/resources/crashme_pie.core.zst differ diff --git a/tests/resources/crashme_pie.zst b/tests/resources/crashme_pie.zst new file mode 100755 index 000000000..126347c11 Binary files /dev/null and b/tests/resources/crashme_pie.zst differ diff --git a/tests/resources/crashme_pie_no_headers.core.zst b/tests/resources/crashme_pie_no_headers.core.zst new file mode 100644 index 000000000..db67b9136 Binary files /dev/null and b/tests/resources/crashme_pie_no_headers.core.zst differ diff --git a/tests/resources/crashme_static.core.zst b/tests/resources/crashme_static.core.zst new file mode 100644 index 000000000..db207fbc6 Binary files /dev/null and b/tests/resources/crashme_static.core.zst differ diff --git a/tests/resources/crashme_static.zst b/tests/resources/crashme_static.zst new file mode 100755 index 000000000..891ead1e2 Binary files /dev/null and b/tests/resources/crashme_static.zst differ diff --git a/tests/resources/crashme_static_pie.core.zst b/tests/resources/crashme_static_pie.core.zst new file mode 100644 index 000000000..2e2ef463b Binary files /dev/null and b/tests/resources/crashme_static_pie.core.zst differ diff --git a/tests/resources/crashme_static_pie.zst b/tests/resources/crashme_static_pie.zst new file mode 100755 index 000000000..03fc71cac Binary files /dev/null and b/tests/resources/crashme_static_pie.zst differ diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 000000000..0660a754c --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,138 @@ +# Copyright (c) 2025, Oracle and/or its affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + + +import os +import sys +import tempfile +import traceback +import types + +import drgn.cli +from tests import TestCase + + +class TestCli(TestCase): + def run_cli(self, args, *, input=None): + stdout_r, stdout_w = os.pipe() + stderr_r, stderr_w = os.pipe() + if input is not None: + stdin_r, stdin_w = os.pipe() + + pid = os.fork() + if pid == 0: + try: + os.close(stdout_r) + sys.stdout = open(stdout_w, "w") + os.close(stderr_r) + sys.stderr = open(stderr_w, "w") + + if input is not None: + os.close(stdin_w) + sys.stdin = open(stdin_r, "r") + + sys.argv = ["drgn"] + args + + drgn.cli._main() + finally: + exception = sys.exc_info()[1] is not None + if exception: + traceback.print_exc() + sys.stdout.flush() + sys.stderr.flush() + os._exit(1 if exception else 0) + + os.close(stdout_w) + os.close(stderr_w) + + if input is not None: + os.close(stdin_r) + with open(stdin_w, "w") as f: + f.write(input) + + with open(stdout_r, "r") as f: + stdout = f.read() + with open(stderr_r, "r") as f: + stderr = f.read() + + _, wstatus = os.waitpid(pid, 0) + if not os.WIFEXITED(wstatus) or os.WEXITSTATUS(wstatus) != 0: + if os.WIFEXITED(wstatus): + msg = f"Exited with status {os.WEXITSTATUS(wstatus)}" + elif os.WIFSIGNALED(wstatus): + msg = f"Terminated by signal {os.WTERMSIG(wstatus)}" + else: + msg = "Exited abnormally" + self.fail( + f"""\ +{msg} +STDOUT: +{stdout} +STDERR: +{stderr} +""" + ) + + return types.SimpleNamespace(stdout=stdout, stderr=stderr) + + def test_e(self): + script = r""" +import sys + +assert drgn.get_default_prog() is prog +assert __name__ == "__main__" +assert "__file__" not in globals() +assert sys.path[0] == "" +print(sys.argv) +""" + proc = self.run_cli( + ["--quiet", "--pid", "0", "--no-default-symbols", "-e", script, "pass"] + ) + self.assertEqual(proc.stdout, "['-e', 'pass']\n") + + def test_e_empty(self): + self.run_cli( + ["--quiet", "--pid", "0", "--no-default-symbols", "-e", ""], + # This shouldn't be executed. + input="raise Exception('-e was ignored')", + ) + + def test_script(self): + with tempfile.NamedTemporaryFile() as f: + f.write( + rb""" +assert "drgn" not in globals() + +import drgn +import os.path +import sys + +assert drgn.get_default_prog() is prog +assert __name__ == "__main__" +assert __file__ == sys.argv[0] +assert sys.path[0] == os.path.dirname(__file__) +print(sys.argv) +""" + ) + f.flush() + proc = self.run_cli( + ["--quiet", "--pid", "0", "--no-default-symbols", f.name, "pass"] + ) + self.assertEqual(proc.stdout, f"[{f.name!r}, 'pass']\n") + + def test_pipe(self): + script = r""" +import sys + +assert drgn.get_default_prog() is prog +assert __name__ == "__main__" +assert __file__ == "" +assert sys.path[0] == "" +# Dummy if statement to test handling of multi-line blocks. +if True: + print(sys.argv) +""" + proc = self.run_cli( + ["--quiet", "--pid", "0", "--no-default-symbols"], input=script + ) + self.assertEqual(proc.stdout, "['']\n") diff --git a/tests/test_debug_info.py b/tests/test_debug_info.py new file mode 100644 index 000000000..4f3a9f53b --- /dev/null +++ b/tests/test_debug_info.py @@ -0,0 +1,2776 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + + +import binascii +import contextlib +import http.server +import os +import os.path +from pathlib import Path +import re +import shutil +import socket +import socketserver +import tempfile +import threading +import unittest +import unittest.mock + +from _drgn_util.elf import ET, PT, SHF, SHT +from drgn import ( + DebugInfoOptions, + MainModule, + MissingDebugInfoError, + ModuleFileStatus, + Program, + SharedLibraryModule, + SupplementaryFileKind, + VdsoModule, +) +from tests import TestCase, modifyenv +from tests.dwarfwriter import create_dwarf_file +from tests.elfwriter import ElfSection, create_elf_file +from tests.resources import get_resource + +ALLOCATED_SECTION = ElfSection( + name=".bss", + sh_type=SHT.PROGBITS, + sh_flags=SHF.ALLOC, + p_type=PT.LOAD, + vaddr=0x10000000, + memsz=0x1000, +) + + +@contextlib.contextmanager +def NamedTemporaryElfFile(*, loadable=True, debug=True, sections=(), **kwargs): + if loadable: + sections = (ALLOCATED_SECTION,) + sections + with tempfile.NamedTemporaryFile() as f: + if debug: + f.write(create_dwarf_file((), sections=sections, **kwargs)) + else: + f.write(create_elf_file(ET.EXEC, sections=sections, **kwargs)) + f.flush() + yield f + + +class TestModuleTryFile(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_enabled_debug_info_finders([]) + + def test_want_both(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + for status in set(ModuleFileStatus) - {ModuleFileStatus.HAVE}: + for file in ("loaded", "debug"): + with self.subTest(file=file): + self.assertEqual(getattr(module, f"wants_{file}_file")(), False) + # Test that we can't unset the file once it's set. + status_attr = file + "_file_status" + with self.subTest(from_=ModuleFileStatus.HAVE, to=status): + self.assertRaises( + ValueError, setattr, module, status_attr, status + ) + self.assertEqual( + getattr(module, status_attr), ModuleFileStatus.HAVE + ) + + def test_want_both_not_loadable(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(loadable=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_want_both_no_debug(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + def test_want_both_is_neither(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(loadable=False, debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded_not_loadable(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded_no_debug(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_loaded_is_neither(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False, debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_debug(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_only_want_debug_not_loadable(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_only_want_debug_no_debug(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + def test_only_want_debug_is_neither(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile(loadable=False, debug=False) as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + def test_want_neither(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + module.debug_file_status = ModuleFileStatus.DONT_WANT + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + self.assertIsNone(module.debug_file_path) + + def test_separate_files_loaded_first(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(debug=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + with NamedTemporaryElfFile(loadable=False) as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f2.name) + + def test_separate_files_debug_first(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(loadable=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + with NamedTemporaryElfFile(debug=False) as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f2.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + def test_loadable_then_both(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(debug=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + + with NamedTemporaryElfFile() as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f2.name) + + def test_debug_then_both(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(loadable=False) as f1: + module.try_file(f1.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + with NamedTemporaryElfFile() as f2: + module.try_file(f2.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f2.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f1.name) + + def test_no_build_id_force(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile() as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_no_build_id_file_has_build_id(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_no_build_id_file_has_build_id_force(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_match(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_match_force(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\x01\x23\x45\x67\x89\xab\xcd\xef") as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_mismatch(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\xff\xff\xff\xff") as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_mismatch_force(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile(build_id=b"\xff\xff\xff\xff") as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_missing(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_build_id_missing_force(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + with NamedTemporaryElfFile() as f: + module.try_file(f.name, force=True) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + def test_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.build_id = build_id + + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + module.try_file(binary_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.wants_debug_file(), True) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + + with self.assertRaises(ValueError): + module.debug_file_status = ModuleFileStatus.HAVE + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + module.debug_file_status = ModuleFileStatus.WANT_SUPPLEMENTARY + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + module.try_file(alt_path) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual( + module.supplementary_debug_file_kind, + SupplementaryFileKind.GNU_DEBUGALTLINK, + ) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + def test_gnu_debugaltlink_build_id_mismatch(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id[::-1])) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.build_id = build_id + + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + module.try_file(binary_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + + module.try_file(alt_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + + def test_gnu_debugaltlink_then_both(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.build_id = build_id + with NamedTemporaryElfFile( + build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), + ) as f1: + module.try_file(f1.name) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + with NamedTemporaryElfFile(build_id=build_id) as f2: + module.try_file(f2.name) + + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f1.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f2.name) + + def test_gnu_debugaltlink_cancel(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.build_id = build_id + with NamedTemporaryElfFile( + build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), + ) as f: + module.try_file(f.name) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + module.debug_file_status = ModuleFileStatus.WANT + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.wants_debug_file(), True) + self.assertRaises(ValueError, module.wanted_supplementary_debug_file) + + def test_extra_module_no_address_range(self): + module = self.prog.extra_module("/foo/bar", create=True) + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertIsNone(module.address_range) + self.assertEqual(module.loaded_file_bias, 0) + self.assertEqual(module.debug_file_bias, 0) + + def test_extra_module_address_range(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.address_range = (0x40000000, 0x40001000) + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.address_range, (0x40000000, 0x40001000)) + self.assertEqual(module.loaded_file_bias, 0x30000000) + self.assertEqual(module.debug_file_bias, 0x30000000) + + def test_extra_module_empty_address_range(self): + module = self.prog.extra_module("/foo/bar", create=True) + module.address_range = (0, 0) + with NamedTemporaryElfFile() as f: + module.try_file(f.name) + self.assertEqual(module.address_range, (0, 0)) + self.assertEqual(module.loaded_file_bias, 0) + self.assertEqual(module.debug_file_bias, 0) + + +class TestLinuxUserspaceCoreDump(TestCase): + def setUp(self): + self.prog = Program() + self.prog.debug_info_options.directories = () + self.prog.debug_info_options.debug_link_directories = () + self.prog.set_enabled_debug_info_finders(["standard"]) + + def test_loaded_modules(self): + self.prog.set_core_dump(get_resource("crashme.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme") + self.assertEqual(module.address_range, (0x400000, 0x404010)) + self.assertEqual( + module.build_id.hex(), "99a6524c4df01fbff9b43a6ead3d8e8e6201568b" + ) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F6112CACE08 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112CA9000, 0x7F6112CAD010)) + self.assertEqual( + module.build_id.hex(), "7bd58f10e741c3c8fbcf2031aa65f830f933d616" + ) + + with self.subTest(module="libc"): + module = self.prog.shared_library_module("/lib64/libc.so.6", 0x7F6112C94960) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112AAE000, 0x7F6112C9EB70)) + self.assertEqual( + module.build_id.hex(), "77c77fee058b19c6f001cf2cb0371ce3b8341211" + ) + + with self.subTest(module="ld-linux"): + module = self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7F6112CEAE68 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112CB6000, 0x7F6112CEC2D8)) + self.assertEqual( + module.build_id.hex(), "91dcd0244204201b616bbf59427771b3751736ce" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7F6112CB4438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F6112CB4000, 0x7F6112CB590F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def _try_vdso_in_core(self, module): + module.debug_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + + def test_bias(self): + self.prog.set_core_dump(get_resource("crashme.core")) + self.prog.create_loaded_modules() + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme")) + self.assertEqual(module.loaded_file_bias, 0) + self.assertEqual(module.debug_file_bias, 0) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F6112CACE08 + ) + module.try_file(get_resource("crashme.so")) + self.assertEqual(module.loaded_file_bias, 0x7F6112CA9000) + self.assertEqual(module.debug_file_bias, 0x7F6112CA9000) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7F6112CB4438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7F6112CB4000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_pie(self): + self.prog.set_core_dump(get_resource("crashme_pie.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_pie") + self.assertEqual(module.address_range, (0x557ED343D000, 0x557ED3441018)) + self.assertEqual( + module.build_id.hex(), "eb4ad7aaded3815ab133a6d7784a2c95a4e52998" + ) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7FAB2C38DE08 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C38A000, 0x7FAB2C38E010)) + self.assertEqual( + module.build_id.hex(), "7bd58f10e741c3c8fbcf2031aa65f830f933d616" + ) + + with self.subTest(module="libc"): + module = self.prog.shared_library_module("/lib64/libc.so.6", 0x7FAB2C375960) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C18F000, 0x7FAB2C37FB70)) + self.assertEqual( + module.build_id.hex(), "77c77fee058b19c6f001cf2cb0371ce3b8341211" + ) + + with self.subTest(module="ld-linux"): + module = self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7FAB2C3CBE68 + ) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C397000, 0x7FAB2C3CD2D8)) + self.assertEqual( + module.build_id.hex(), "91dcd0244204201b616bbf59427771b3751736ce" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FAB2C395438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FAB2C395000, 0x7FAB2C39690F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def test_bias_pie(self): + self.prog.set_core_dump(get_resource("crashme_pie.core")) + self.prog.create_loaded_modules() + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme_pie")) + self.assertEqual(module.loaded_file_bias, 0x557ED343D000) + self.assertEqual(module.debug_file_bias, 0x557ED343D000) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7FAB2C38DE08 + ) + module.try_file(get_resource("crashme.so")) + self.assertEqual(module.loaded_file_bias, 0x7FAB2C38A000) + self.assertEqual(module.debug_file_bias, 0x7FAB2C38A000) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FAB2C395438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7FAB2C395000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_static(self): + self.prog.set_core_dump(get_resource("crashme_static.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_static") + self.assertEqual(module.address_range, (0x400000, 0x4042B8)) + self.assertEqual( + module.build_id.hex(), "a0b6befad9f0883c52c475ba3cee9c549cd082cf" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FBC73A66438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FBC73A66000, 0x7FBC73A6790F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def test_bias_static(self): + self.prog.set_core_dump(get_resource("crashme_static.core")) + self.prog.create_loaded_modules() + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme_static")) + self.assertEqual(module.loaded_file_bias, 0x0) + self.assertEqual(module.debug_file_bias, 0x0) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FBC73A66438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7FBC73A66000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_static_pie(self): + self.prog.set_core_dump(get_resource("crashme_static_pie.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_static_pie") + self.assertEqual(module.address_range, (0x7FD981DC9000, 0x7FD981DCD278)) + self.assertEqual( + module.build_id.hex(), "3e0bc47f80d7e64724e11fc021a251ed0d35bc2c" + ) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FD981DC7438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7FD981DC7000, 0x7FD981DC890F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + def test_bias_static_pie(self): + self.prog.set_core_dump(get_resource("crashme_static_pie.core")) + self.prog.create_loaded_modules() + + with self.subTest(module="main"): + module = self.prog.main_module() + module.try_file(get_resource("crashme_static_pie")) + self.assertEqual(module.loaded_file_bias, 0x7FD981DC9000) + self.assertEqual(module.debug_file_bias, 0x7FD981DC9000) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7FD981DC7438) + self._try_vdso_in_core(module) + self.assertEqual(module.loaded_file_bias, 0x7FD981DC7000) + self.assertIsNone(module.debug_file_bias) + + def test_loaded_modules_pie_no_headers(self): + self.prog.set_core_dump(get_resource("crashme_pie_no_headers.core")) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, True) + loaded_modules.append(module) + found_modules = [] + + # Without ELF headers saved in the core dump, and without the main ELF + # file, only the main module (with limited information) and vDSO can be + # found. + with self.subTest(module="main"): + module = self.prog.main_module() + found_modules.append(module) + self.assertEqual(module.name, "/home/osandov/crashme_pie") + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="vdso"): + module = self.prog.vdso_module("linux-vdso.so.1", 0x7F299F607438) + found_modules.append(module) + self.assertEqual(module.address_range, (0x7F299F607000, 0x7F299F60890F)) + self.assertEqual( + module.build_id.hex(), "fdc3e4d463911345fbc6d9cc432e5ebc276e8e03" + ) + + self.assertCountEqual(loaded_modules, found_modules) + + loaded_modules = [] + for module, new in self.prog.loaded_modules(): + self.assertEqual(new, False) + loaded_modules.append(module) + self.assertCountEqual(loaded_modules, found_modules) + + # If we can read the file headers (specifically, the program header + # table and the interpreter path), then we should be able to get all of + # the modules (with limited information). + exe_file = self.enterContext(open(get_resource("crashme_pie"), "rb")) + + def read_headers(address, count, offset, physical): + exe_file.seek(offset) + return exe_file.read(count) + + self.prog.add_memory_segment(0x5623363D6000, 4096, read_headers, False) + + old_loaded_modules = [] + new_loaded_modules = [] + for module, new in self.prog.loaded_modules(): + (new_loaded_modules if new else old_loaded_modules).append(module) + new_found_modules = [] + + with self.subTest(module="main2"): + module = self.prog.main_module() + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="crashme"): + module = self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ) + new_found_modules.append(module) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="libc"): + module = self.prog.shared_library_module("/lib64/libc.so.6", 0x7F299F5E7960) + new_found_modules.append(module) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + with self.subTest(module="ld-linux"): + module = self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7F299F63DE68 + ) + new_found_modules.append(module) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + + self.assertCountEqual(old_loaded_modules, loaded_modules) + self.assertCountEqual(new_loaded_modules, new_found_modules) + + +class TestLoadDebugInfo(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_core_dump(get_resource("crashme.core")) + self.prog.set_enabled_debug_info_finders([]) + self.finder = unittest.mock.Mock() + self.prog.register_debug_info_finder("mock", self.finder, enable_index=0) + + def test_nothing(self): + self.prog.load_debug_info(None, default=False, main=False) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_empty_list(self): + self.prog.load_debug_info([], default=False, main=False) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_no_such_file(self): + with tempfile.TemporaryDirectory() as tmp_dir: + self.prog.load_debug_info([Path(tmp_dir) / "file"]) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_not_elf(self): + with tempfile.NamedTemporaryFile() as f: + f.write(b"hello, world\n") + f.flush() + self.prog.load_debug_info([f.name]) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_no_build_id(self): + with NamedTemporaryElfFile() as f: + self.prog.load_debug_info([f.name]) + self.assertFalse(list(self.prog.modules())) + self.finder.assert_not_called() + + def test_only_main_path(self): + crashme_path = get_resource("crashme") + + self.prog.load_debug_info([crashme_path]) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The provided path should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_only_paths(self): + crashme_path = get_resource("crashme") + crashme_so_path = get_resource("crashme.so") + + self.prog.load_debug_info([crashme_path, crashme_so_path]) + + modules = list(self.prog.modules()) + # All loaded modules should be created. + self.assertEqual(len(modules), 5) + # The provided files should be used for their respective modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_path), + ) + # The rest should not have a file. + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_by_path(self): + crashme_path = get_resource("crashme") + + self.prog.load_debug_info([crashme_path], main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The provided path should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_by_finder(self): + crashme_path = get_resource("crashme") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The finder should be called and set the file for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_default_by_paths(self): + crashme_path = get_resource("crashme") + crashme_so_path = get_resource("crashme.so") + + self.assertRaises( + MissingDebugInfoError, + self.prog.load_debug_info, + [crashme_path, crashme_so_path], + default=True, + ) + + # All loaded modules should be created. + modules = list(self.prog.modules()) + self.assertEqual(len(modules), 5) + # The provided files should be used for their respective modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_path), + ) + # The rest should not have a file. + missing_modules = [] + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + missing_modules.append(module) + self.assertEqual(len(missing_modules), 3) + # The finder should be called for the rest. + self.finder.assert_called_once() + self.assertCountEqual(self.finder.call_args[0][0], missing_modules) + + def test_default_by_finder(self): + crashme_path = get_resource("crashme") + crashme_so_path = get_resource("crashme.so") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_path) + elif module.name == "/home/osandov/crashme.so": + module.try_file(crashme_so_path) + + self.finder.side_effect = finder + + self.assertRaises( + MissingDebugInfoError, self.prog.load_debug_info, default=True + ) + + # All loaded modules should be created. + modules = list(self.prog.modules()) + self.assertEqual(len(modules), 5) + # The finder should be called and set the files for the matching + # modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_path), + ) + # The rest should not have a file. + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + # The finder should be called for all loaded modules. + self.finder.assert_called_once() + self.assertCountEqual(self.finder.call_args[0][0], modules) + + def test_main_gnu_debugaltlink_by_path(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + self.prog.load_debug_info([crashme_dwz_path, crashme_alt_path], main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The provided paths should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_gnu_debugaltlink_by_finder(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_dwz_path) + module.try_file(crashme_alt_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The finder should be called and set the files for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_path_gnu_debugaltlink_not_found(self): + crashme_dwz_path = get_resource("crashme.dwz") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + self.finder.side_effect = finder + + self.assertRaises( + MissingDebugInfoError, + self.prog.load_debug_info, + [crashme_dwz_path], + main=True, + ) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The provided path should be used for the loaded file. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + # The finder should be called and fail to find the supplementary file + # for the main module. + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + self.assertEqual( + self.prog.main_module().wanted_supplementary_debug_file()[:3], + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(crashme_dwz_path), + "crashme.alt", + ), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_finder_gnu_debugaltlink_not_found(self): + crashme_dwz_path = get_resource("crashme.dwz") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + module.try_file(crashme_dwz_path) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + + self.finder.side_effect = finder + + self.assertRaises(MissingDebugInfoError, self.prog.load_debug_info, main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The finder should be called and set the loaded file for the main + # module but fail to find the supplementary file. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + self.assertEqual( + self.prog.main_module().wanted_supplementary_debug_file()[:3], + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(crashme_dwz_path), + "crashme.alt", + ), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_path_gnu_debugaltlink_by_finder(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + def finder(modules): + for module in modules: + if ( + module.name == "/home/osandov/crashme" + and module.debug_file_status == ModuleFileStatus.WANT_SUPPLEMENTARY + ): + module.try_file(crashme_alt_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info([crashme_dwz_path], main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The provided path should be used for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + # The finder should be called and set the supplementary file for the + # main module. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_by_finder_gnu_debugaltlink_by_path(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme": + module.try_file(crashme_dwz_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info([crashme_alt_path], main=True) + + # The provided path should be used for the supplementary file for the + # main module. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + # The finder should be called and set the file for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_wants_gnu_debugaltlink_by_path(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.try_file(crashme_dwz_path) + break + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + + self.prog.load_debug_info([crashme_alt_path], main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The provided path should be used for the supplementary file. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + # Finders shouldn't be called. + self.finder.assert_not_called() + + def test_main_wants_gnu_debugaltlink_by_finder(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_alt_path = get_resource("crashme.alt") + + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.try_file(crashme_dwz_path) + break + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + + def finder(modules): + for module in modules: + if ( + module.name == "/home/osandov/crashme" + and module.debug_file_status == ModuleFileStatus.WANT_SUPPLEMENTARY + ): + module.try_file(crashme_alt_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The finder should be called and set the supplementary file for the + # main module. + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_main_wants_gnu_debugaltlink_not_found(self): + crashme_dwz_path = get_resource("crashme.dwz") + + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.try_file(crashme_dwz_path) + break + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + + self.assertRaises(MissingDebugInfoError, self.prog.load_debug_info, main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The finder should be called and fail to find the supplementary file + # for the main module, but the supplementary file should still be + # wanted. + self.assertEqual( + self.prog.main_module().debug_file_status, + ModuleFileStatus.WANT_SUPPLEMENTARY, + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_default_gnu_debugaltlink_by_paths(self): + crashme_dwz_path = get_resource("crashme.dwz") + crashme_so_dwz_path = get_resource("crashme.so.dwz") + crashme_alt_path = get_resource("crashme.alt") + + self.assertRaises( + MissingDebugInfoError, + self.prog.load_debug_info, + [crashme_dwz_path, crashme_so_dwz_path, crashme_alt_path], + default=True, + ) + + # All loaded modules should be created. + modules = list(self.prog.modules()) + self.assertEqual(len(modules), 5) + # The provided files should be used for their respective modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_dwz_path), + ) + self.assertEqual( + self.prog.main_module().supplementary_debug_file_path, + str(crashme_alt_path), + ) + crashme_so_module = next( + module for module in modules if module.name == "/home/osandov/crashme.so" + ) + self.assertEqual( + crashme_so_module.loaded_file_path, + str(crashme_so_dwz_path), + ) + self.assertEqual( + crashme_so_module.debug_file_path, + str(crashme_so_dwz_path), + ) + self.assertEqual( + crashme_so_module.supplementary_debug_file_path, + str(crashme_alt_path), + ) + # The rest should not have a file. + missing_modules = [] + for module in modules: + if module.name not in ("/home/osandov/crashme", "/home/osandov/crashme.so"): + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + missing_modules.append(module) + self.assertEqual(len(missing_modules), 3) + # The finder should be called for the rest. + self.finder.assert_called_once() + self.assertCountEqual(self.finder.call_args[0][0], missing_modules) + + def test_dont_want(self): + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.loaded_file_status = ModuleFileStatus.DONT_WANT + module.debug_file_status = ModuleFileStatus.DONT_WANT + break + # DONT_WANT should be reset to WANT. + self.assertRaises(MissingDebugInfoError, self.prog.load_debug_info, main=True) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.finder.assert_called_once_with([self.prog.main_module()]) + + def test_dont_need(self): + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + module.loaded_file_status = ModuleFileStatus.DONT_NEED + module.debug_file_status = ModuleFileStatus.DONT_NEED + break + # DONT_NEED should be preserved. + self.prog.load_debug_info(main=True) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_NEED) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_NEED) + self.finder.assert_not_called() + + def test_unmatched(self): + self.prog.load_debug_info([get_resource("crashme_static")]) + modules = list(self.prog.modules()) + # All loaded modules should be created. + self.assertEqual(len(modules), 5) + # None of them should have files. + for module in modules: + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.finder.assert_not_called() + + +class TestLoadDebugInfoCoreNoHeaders(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_core_dump(get_resource("crashme_pie_no_headers.core")) + self.prog.set_enabled_debug_info_finders([]) + self.finder = unittest.mock.Mock() + self.prog.register_debug_info_finder("mock", self.finder, enable_index=0) + + def test_main_by_finder(self): + crashme_pie_path = get_resource("crashme_pie") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme_pie": + module.try_file(crashme_pie_path) + + self.finder.side_effect = finder + + self.prog.load_debug_info(main=True) + + # The main module should be created. + self.assertIn(self.prog.main_module(), list(self.prog.modules())) + # The finder should be called and set the files, address range, and + # build ID for the main module. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().address_range, (0x5623363D6000, 0x5623363DA018) + ) + self.assertEqual( + self.prog.main_module().build_id.hex(), + "eb4ad7aaded3815ab133a6d7784a2c95a4e52998", + ) + self.finder.assert_called_once_with([self.prog.main_module()]) + + @unittest.expectedFailure # Issue #291 + def test_default_by_finder(self): + crashme_pie_path = get_resource("crashme_pie") + crashme_so_path = get_resource("crashme.so") + + def finder(modules): + for module in modules: + if module.name == "/home/osandov/crashme_pie": + module.try_file(crashme_pie_path) + elif module.name == "/home/osandov/crashme.so": + module.try_file(crashme_so_path) + else: + module.loaded_file_status = ModuleFileStatus.DONT_NEED + module.debug_file_status = ModuleFileStatus.DONT_NEED + + self.finder.side_effect = finder + + self.prog.load_debug_info(default=True) + + # All loaded modules should be created (except ld-linux.so; see + # tests.test_module.TestLinuxUserspaceCoreDump.test_loaded_modules_pie_no_headers). + self.assertCountEqual( + list(self.prog.modules()), + [ + self.prog.main_module(), + self.prog.vdso_module("linux-vdso.so.1", 0x7F299F607438), + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ), + self.prog.shared_library_module("/lib64/libc.so.6", 0x7F299F5E7960), + self.prog.shared_library_module( + "/lib64/ld-linux-x86-64.so.2", 0x7F299F63DE68 + ), + ], + ) + # The finder should be called and set the files, address range, and + # build ID for the main and crashme.so modules. + self.assertEqual( + self.prog.main_module().loaded_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().debug_file_path, + str(crashme_pie_path), + ) + self.assertEqual( + self.prog.main_module().address_range, (0x5623363D6000, 0x5623363DA018) + ) + self.assertEqual( + self.prog.main_module().build_id.hex(), + "eb4ad7aaded3815ab133a6d7784a2c95a4e52998", + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).loaded_file_path, + str(crashme_so_path), + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).debug_file_path, + str(crashme_so_path), + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).address_range, + (0x7F299F5FC000, 0x7F299F600010), + ) + self.assertEqual( + self.prog.shared_library_module( + "/home/osandov/crashme.so", 0x7F299F5FFE08 + ).build_id.hex(), + "7bd58f10e741c3c8fbcf2031aa65f830f933d616", + ) + self.finder.assert_called() + + +class TestLoadModuleDebugInfo(TestCase): + def setUp(self): + self.prog = Program() + self.prog.set_enabled_debug_info_finders([]) + self.finder = unittest.mock.Mock() + self.prog.register_debug_info_finder("mock", self.finder, enable_index=0) + + def test_empty(self): + self.prog.load_module_debug_info() + self.finder.assert_not_called() + + def test_multiple(self): + self.prog.load_module_debug_info( + self.prog.extra_module("/foo/bar", create=True), + self.prog.extra_module("/foo/baz", create=True), + ) + self.finder.assert_called_once() + self.assertCountEqual( + self.finder.call_args[0][0], + [ + self.prog.extra_module("/foo/bar"), + self.prog.extra_module("/foo/baz"), + ], + ) + + def test_wrong_program(self): + self.assertRaisesRegex( + ValueError, + "module from wrong program", + self.prog.load_module_debug_info, + self.prog.extra_module("/foo/bar", create=True), + Program().extra_module("/foo/baz", create=True), + ) + + def test_type_error(self): + self.assertRaises( + TypeError, + self.prog.load_module_debug_info, + self.prog.extra_module("/foo/bar", create=True), + None, + ) + + +class TestStandardDebugInfoFinder(TestCase): + def setUp(self): + self.prog = Program() + self.prog.debug_info_options.directories = () + self.prog.debug_info_options.debug_link_directories = () + self.prog.set_enabled_debug_info_finders(["standard"]) + + def test_by_module_name(self): + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_path, f.name) + + def test_by_module_name_with_build_id(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile(build_id=build_id) as f: + module = self.prog.extra_module(f.name, create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_path, f.name) + + def test_by_module_name_missing_build_id(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_by_module_name_build_id_mismatch(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile(build_id=build_id[::-1]) as f: + module = self.prog.extra_module(f.name, create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_reuse_loaded_file(self): + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True) + module.debug_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.DONT_WANT) + + module.debug_file_status = ModuleFileStatus.WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_reuse_debug_file(self): + with NamedTemporaryElfFile() as f: + module = self.prog.extra_module(f.name, create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + module.loaded_file_status = ModuleFileStatus.WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + + def test_reuse_wanted_supplementary_debug_file(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + gnu_debugaltlink=("alt.debug", alt_build_id), + ) as f: + module = self.prog.extra_module(f.name, create=True) + module.loaded_file_status = ModuleFileStatus.DONT_WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.DONT_WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY) + + module.loaded_file_status = ModuleFileStatus.WANT + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, f.name) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY) + + def test_vdso_in_core(self): + self.prog.set_core_dump(get_resource("crashme.core")) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, VdsoModule): + break + else: + self.fail("vDSO module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, "[vdso]") + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_main_by_proc(self): + self.prog.set_pid(os.getpid()) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, MainModule): + break + else: + self.fail("main module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + + def test_vdso_by_proc(self): + self.prog.set_pid(os.getpid()) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, VdsoModule): + break + else: + self.skipTest("vDSO module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, "[vdso]") + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_shared_library_by_proc(self): + self.prog.set_pid(os.getpid()) + for module, _ in self.prog.loaded_modules(): + if isinstance(module, SharedLibraryModule): + break + else: + self.skipTest("shared library module not found") + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + + def test_by_build_id(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + for i, relative in enumerate((False, True)): + with self.subTest(relative=relative): + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + binary_path = build_id_dir / build_id.hex()[2:] + binary_path.write_bytes( + create_dwarf_file((), sections=(ALLOCATED_SECTION,)) + ) + + module = self.prog.extra_module(bin_dir / "binary", i, create=True) + module.build_id = build_id + + self.prog.debug_info_options.directories = ( + os.path.relpath(debug_dir) if relative else str(debug_dir), + ) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + + def test_by_build_id_separate(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + loadable_path = build_id_dir / build_id.hex()[2:] + loadable_path.write_bytes( + create_elf_file(ET.EXEC, sections=(ALLOCATED_SECTION,)) + ) + debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") + debug_path.write_bytes(create_dwarf_file(())) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.build_id = build_id + + self.prog.debug_info_options.directories = (str(debug_dir),) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_build_id_from_loaded(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, sections=(ALLOCATED_SECTION,), build_id=build_id + ) + ) + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + debug_path = build_id_dir / (build_id.hex()[2:] + ".debug") + debug_path.write_bytes(create_dwarf_file(())) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + + self.prog.debug_info_options.directories = (str(debug_dir),) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_build_id_method(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + build_id_dir = debug_dir / ".build-id" / build_id.hex()[:2] + build_id_dir.mkdir(parents=True) + binary_path = build_id_dir / build_id.hex()[2:] + binary_path.write_bytes( + create_dwarf_file((), sections=(ALLOCATED_SECTION,)) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.build_id = build_id + + self.prog.find_standard_debug_info( + [module], + options=DebugInfoOptions(directories=(str(debug_dir),)), + ) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + + def test_by_gnu_debuglink(self): + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + debug_file_contents = create_dwarf_file(()) + crc = binascii.crc32(debug_file_contents) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc), + ) + ) + + self.prog.debug_info_options.directories = (str(debug_dir),) + self.prog.debug_info_options.debug_link_directories = ( + "$ORIGIN", + "$ORIGIN/.debug", + "", + ) + for i, debug_path in enumerate( + ( + bin_dir / "binary.debug", + bin_dir / ".debug" / "binary.debug", + debug_dir / bin_dir.relative_to("/") / "binary.debug", + ) + ): + with self.subTest(debug_path=debug_path): + try: + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module( + bin_dir / "binary", i, create=True + ) + + self.prog.load_module_debug_info(module) + self.assertEqual( + module.loaded_file_status, ModuleFileStatus.HAVE + ) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.HAVE + ) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + finally: + try: + debug_path.unlink() + except FileNotFoundError: + pass + + def test_by_gnu_debuglink_absolute(self): + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + debug_file_contents = create_dwarf_file(()) + crc = binascii.crc32(debug_file_contents) + debug_path = debug_dir / "binary.debug" + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=(ALLOCATED_SECTION,), + gnu_debuglink=(debug_path, crc), + ) + ) + + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_gnu_debuglink_origin_with_braces(self): + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + debug_file_contents = create_dwarf_file(()) + crc = binascii.crc32(debug_file_contents) + debug_path = bin_dir / "binary.debug" + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc), + ) + ) + + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + + self.prog.debug_info_options.debug_link_directories = ("${ORIGIN}",) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_gnu_debuglink_not_origin(self): + # Test that strings other than $ORIGIN followed by a word boundary are + # not replaced. + for i, subdir in enumerate(("$ORIGINAL", "$foo", "$")): + with self.subTest(subdir=subdir): + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + debug_dir = bin_dir / subdir + + debug_file_contents = create_dwarf_file(()) + crc = binascii.crc32(debug_file_contents) + debug_path = debug_dir / "binary.debug" + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc), + ) + ) + + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", i, create=True) + + self.prog.debug_info_options.debug_link_directories = ( + str(debug_dir), + ) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_gnu_debuglink_origin_multiple(self): + # Pathological case combining the cases above. + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + debug_dir = ( + bin_dir + / "$ORIGINx" + / (bin_dir.parent / (bin_dir.name + "x")).relative_to("/") + / "$" + / bin_dir.relative_to("/") + / "$O" + ) + + debug_file_contents = create_dwarf_file(()) + crc = binascii.crc32(debug_file_contents) + debug_path = debug_dir / "binary.debug" + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc), + ) + ) + + debug_path.parent.mkdir(parents=True, exist_ok=True) + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + + self.prog.debug_info_options.debug_link_directories = ( + str(bin_dir) + "/$ORIGINx${ORIGIN}x/$$ORIGIN/$O", + ) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + self.assertEqual(module.debug_file_path, str(debug_path)) + + def test_by_gnu_debuglink_crc_mismatch(self): + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + debug_file_contents = create_dwarf_file(()) + crc = binascii.crc32(debug_file_contents) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=(ALLOCATED_SECTION,), + gnu_debuglink=("binary.debug", crc ^ 1), + ) + ) + + debug_path = bin_dir / "binary.debug" + debug_path.write_bytes(debug_file_contents) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.debug_info_options.debug_link_directories = ("$ORIGIN",) + self.prog.load_module_debug_info(module) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_invalid_gnu_debuglink(self): + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + loadable_path = bin_dir / "binary" + loadable_path.write_bytes( + create_elf_file( + ET.EXEC, + sections=( + ALLOCATED_SECTION, + ElfSection( + name=".gnu_debuglink", sh_type=SHT.PROGBITS, data=b"foo" + ), + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.loaded_file_path, str(loadable_path)) + + def test_gnu_debugaltlink_absolute(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(alt_path, alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + + def test_gnu_debugaltlink_not_found(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(debug_dir / "alt.debug", alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(debug_dir / "alt.debug"), + alt_build_id, + ), + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + def test_only_gnu_debugaltlink_absolute(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(alt_path, alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.try_file(binary_path) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + + def test_only_gnu_debugaltlink_not_found(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(debug_dir / "alt.debug", alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + module.try_file(binary_path) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(debug_dir / "alt.debug"), + alt_build_id, + ), + ) + + def test_gnu_debugaltlink_relative(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=( + Path(os.path.relpath(alt_path, bin_dir)), + alt_build_id, + ), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual(module.supplementary_debug_file_path, str(alt_path)) + + def test_gnu_debugaltlink_debug_directories(self): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / ".dwz/alt.debug" + alt_path.parent.mkdir() + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id)) + + self.prog.debug_info_options.directories = (str(debug_dir),) + for i, debugaltlink in enumerate( + ( + bin_dir / "debug/.dwz/alt.debug", + Path("debug/.dwz/alt.debug"), + ) + ): + with self.subTest(debugaltlink=debugaltlink): + binary_path = bin_dir / f"binary{i}" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + gnu_debugaltlink=(debugaltlink, alt_build_id), + ) + ) + + module = self.prog.extra_module(binary_path, create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.loaded_file_path, str(binary_path)) + self.assertEqual(module.debug_file_path, str(binary_path)) + self.assertEqual( + module.supplementary_debug_file_path, str(alt_path) + ) + + def test_gnu_debugaltlink_build_id_mismatch(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with tempfile.TemporaryDirectory( + prefix="bin-" + ) as bin_dir, tempfile.TemporaryDirectory(prefix="debug-") as debug_dir: + bin_dir = Path(bin_dir) + debug_dir = Path(debug_dir) + + alt_path = debug_dir / "alt.debug" + alt_path.write_bytes(create_dwarf_file((), build_id=alt_build_id[::-1])) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=(ALLOCATED_SECTION,), + build_id=build_id, + gnu_debugaltlink=(alt_path, alt_build_id), + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(binary_path), + str(alt_path), + alt_build_id, + ), + ) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + def test_invalid_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with tempfile.TemporaryDirectory(prefix="bin-") as bin_dir: + bin_dir = Path(bin_dir) + + binary_path = bin_dir / "binary" + binary_path.write_bytes( + create_dwarf_file( + (), + sections=( + ALLOCATED_SECTION, + ElfSection( + name=".gnu_debugaltlink", + sh_type=SHT.PROGBITS, + data=b"foo", + ), + ), + build_id=build_id, + ) + ) + + module = self.prog.extra_module(bin_dir / "binary", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.loaded_file_path, str(binary_path)) + + +class _DebuginfodHTTPHandler(http.server.BaseHTTPRequestHandler): + def do_GET(self): + match = re.fullmatch( + r"/buildid/((?:[0-9a-fA-F][0-9a-fA-F])+)/(executable|debuginfo)", self.path + ) + if not match: + self.send_error(http.HTTPStatus.BAD_REQUEST) + return + + build_id = bytes.fromhex(match.group(1)) + type = match.group(2) + + try: + file_path = self.server.build_ids[build_id][type] + except KeyError: + self.send_error(http.HTTPStatus.NOT_FOUND) + return + + try: + f = open(file_path, "rb") + except OSError: + self.send_error(http.HTTPStatus.INTERNAL_SERVER_ERROR) + return + + with f: + self.send_response(http.HTTPStatus.OK) + st = os.fstat(f.fileno()) + self.send_header("Content-Type", "application/octet-stream") + self.send_header("Content-Length", str(st.st_size)) + self.send_header("X-Debuginfod-Size", str(st.st_size)) + self.send_header("Last-Modified", self.date_time_string(st.st_mtime)) + self.end_headers() + shutil.copyfileobj(f, self.wfile) + + +class TestDebuginfodDebugInfoFinder(TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.server = socketserver.TCPServer(("localhost", 0), _DebuginfodHTTPHandler) + cls.server.build_ids = {} + cls.server_thread = threading.Thread( + target=cls.server.serve_forever, daemon=True + ) + cls.server_thread.start() + + @classmethod + def tearDownClass(cls): + # By default, serve_forever() only checks if it should shut down every + # 0.5 seconds. Shutting down the socket makes it check immediately. + cls.server.socket.shutdown(socket.SHUT_RD) + cls.server.shutdown() + cls.server_thread.join() + cls.server.server_close() + + def setUp(self): + self.prog = Program() + try: + self.prog.set_enabled_debug_info_finders(["debuginfod"]) + except ValueError: + self.skipTest("no debuginfod support") + + self.server.build_ids.clear() + self.cache_dir = Path( + self.enterContext(tempfile.TemporaryDirectory(prefix="debuginfod-cache-")) + ) + self.enterContext( + modifyenv( + { + "DEBUGINFOD_URLS": "http://{}:{}/".format( + *self.server.server_address + ), + "DEBUGINFOD_CACHE_PATH": str(self.cache_dir), + } + ) + ) + + def test_no_build_id(self): + module = self.prog.extra_module("foo", create=True) + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_separate(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=build_id + ) as debug_file: + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + + module = self.prog.extra_module("foo", create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.loaded_file_path, + str(self.cache_dir / build_id.hex() / "executable"), + ) + self.assertEqual( + module.debug_file_path, + str(self.cache_dir / build_id.hex() / "debuginfo"), + ) + + def test_no_servers(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=build_id + ) as debug_file, modifyenv( + {"DEBUGINFOD_URLS": None} + ): + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + + module = self.prog.extra_module("foo", create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + + def test_cache_hit(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + + with NamedTemporaryElfFile( + loadable=False, debug=True, build_id=build_id + ) as debug_file: + self.server.build_ids[build_id] = {"debuginfo": debug_file.name} + + for i in range(2): + module = self.prog.extra_module("foo", i, create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_path, + str(self.cache_dir / build_id.hex() / "debuginfo"), + ) + + def test_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, + debug=True, + build_id=build_id, + gnu_debugaltlink=("alt.debug", alt_build_id), + ) as debug_file, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=alt_build_id + ) as alt_f: + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + self.server.build_ids[alt_build_id] = {"debuginfo": alt_f.name} + + module = self.prog.extra_module("foo", create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.loaded_file_path, + str(self.cache_dir / build_id.hex() / "executable"), + ) + self.assertEqual( + module.debug_file_path, + str(self.cache_dir / build_id.hex() / "debuginfo"), + ) + self.assertEqual( + module.supplementary_debug_file_path, + str(self.cache_dir / alt_build_id.hex() / "debuginfo"), + ) + + def test_gnu_debugaltlink_not_found(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + loadable=True, debug=False, build_id=build_id + ) as loadable_file, NamedTemporaryElfFile( + loadable=False, + debug=True, + build_id=build_id, + gnu_debugaltlink=("alt.debug", alt_build_id), + ) as debug_file: + self.server.build_ids[build_id] = { + "executable": loadable_file.name, + "debuginfo": debug_file.name, + } + + module = self.prog.extra_module("foo", create=True) + module.build_id = build_id + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + str(self.cache_dir / build_id.hex() / "debuginfo"), + "alt.debug", + alt_build_id, + ), + ) + self.assertEqual( + module.loaded_file_path, + str(self.cache_dir / build_id.hex() / "executable"), + ) + + def test_only_gnu_debugaltlink(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + build_id=build_id, + gnu_debugaltlink=("alt.debug", alt_build_id), + ) as f, NamedTemporaryElfFile( + loadable=False, debug=True, build_id=alt_build_id + ) as alt_f: + self.server.build_ids[alt_build_id] = {"debuginfo": alt_f.name} + + module = self.prog.extra_module("foo", create=True) + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual(module.loaded_file_path, f.name) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_status, ModuleFileStatus.HAVE) + self.assertEqual(module.debug_file_path, f.name) + self.assertEqual( + module.supplementary_debug_file_path, + str(self.cache_dir / alt_build_id.hex() / "debuginfo"), + ) + + def test_only_gnu_debugaltlink_not_found(self): + build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + with NamedTemporaryElfFile( + build_id=build_id, + gnu_debugaltlink=("alt.debug", alt_build_id), + ) as f: + module = self.prog.extra_module("foo", create=True) + module.try_file(f.name) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) + self.assertEqual( + module.wanted_supplementary_debug_file(), + ( + SupplementaryFileKind.GNU_DEBUGALTLINK, + f.name, + "alt.debug", + alt_build_id, + ), + ) + self.assertEqual(module.loaded_file_path, f.name) + + self.prog.load_module_debug_info(module) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.HAVE) + self.assertEqual( + module.debug_file_status, ModuleFileStatus.WANT_SUPPLEMENTARY + ) diff --git a/tests/test_debug_info_options.py b/tests/test_debug_info_options.py new file mode 100644 index 000000000..00493c7b3 --- /dev/null +++ b/tests/test_debug_info_options.py @@ -0,0 +1,99 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from drgn import DebugInfoOptions, KmodSearchMethod, Program +from tests import TestCase + + +class TestDebugInfoOptions(TestCase): + def test_list_default(self): + self.assertEqual(DebugInfoOptions().directories, ("/usr/lib/debug",)) + + def test_list_init(self): + self.assertEqual( + DebugInfoOptions(directories=["foo", "bar"]).directories, ("foo", "bar") + ) + self.assertRaises(TypeError, DebugInfoOptions, directories=None) + + def test_list_copy(self): + self.assertEqual( + DebugInfoOptions(DebugInfoOptions(directories=["foo", "bar"])).directories, + ("foo", "bar"), + ) + + def test_list_set(self): + options = DebugInfoOptions() + options.directories = ("foo", "bar") + self.assertEqual(options.directories, ("foo", "bar")) + with self.assertRaises(TypeError): + DebugInfoOptions().directories = None + + def test_directories_empty_string(self): + with self.assertRaises(ValueError): + DebugInfoOptions().directories = ("",) + + def test_bool_default(self): + self.assertIs(DebugInfoOptions().try_build_id, True) + + def test_bool_init(self): + self.assertIs(DebugInfoOptions(try_build_id=False).try_build_id, False) + + def test_bool_copy(self): + self.assertIs( + DebugInfoOptions(DebugInfoOptions(try_build_id=False)).try_build_id, False + ) + + def test_bool_set(self): + options = DebugInfoOptions() + options.try_build_id = False + self.assertIs(options.try_build_id, False) + + def test_enum_default(self): + self.assertEqual(DebugInfoOptions().try_kmod, KmodSearchMethod.DEPMOD_OR_WALK) + + def test_enum_init(self): + self.assertEqual( + DebugInfoOptions(try_kmod=KmodSearchMethod.WALK).try_kmod, + KmodSearchMethod.WALK, + ) + self.assertRaises(TypeError, DebugInfoOptions, try_kmod=False) + + def test_enum_copy(self): + self.assertEqual( + DebugInfoOptions( + DebugInfoOptions(try_kmod=KmodSearchMethod.DEPMOD) + ).try_kmod, + KmodSearchMethod.DEPMOD, + ) + + def test_enum_set(self): + options = DebugInfoOptions() + options.try_kmod = KmodSearchMethod.DEPMOD_AND_WALK + self.assertEqual(options.try_kmod, KmodSearchMethod.DEPMOD_AND_WALK) + with self.assertRaises(TypeError): + options.try_kmod = False + + def test_del(self): + with self.assertRaises(AttributeError): + del DebugInfoOptions().directories + + def test_repr(self): + self.assertIn("directories=()", repr(DebugInfoOptions(directories=()))) + + +class TestProgramDebugInfoOptions(TestCase): + def test_default(self): + self.assertEqual( + Program().debug_info_options.directories, DebugInfoOptions().directories + ) + + def test_assign(self): + prog = Program() + prog.debug_info_options.directories = ("foo", "bar") + prog.debug_info_options = DebugInfoOptions(directories=("bar", "baz")) + self.assertEqual(prog.debug_info_options.directories, ("bar", "baz")) + + def test_assign_list(self): + prog = Program() + prog.debug_info_options.directories = ("bar", "foo") + self.assertEqual(prog.debug_info_options.directories, ("bar", "foo")) diff --git a/tests/test_dwarf.py b/tests/test_dwarf.py index 7658cd67f..fcdb9891a 100644 --- a/tests/test_dwarf.py +++ b/tests/test_dwarf.py @@ -10,6 +10,7 @@ import drgn from drgn import ( + AbsenceReason, FaultError, FindObjectFlags, Language, @@ -48,6 +49,7 @@ DwarfLabel, DwarfUnit, compile_dwarf, + create_dwarf_file, ) bool_die = DwarfDie( @@ -202,12 +204,28 @@ labeled_float_die = (DwarfLabel("float_die"), float_die) -def dwarf_program(*args, segments=None, **kwds): +def add_extra_dwarf(prog, path, supplementary_path=None): + module = prog.extra_module(path, create=True) + module.try_file(path, force=True) + if module.debug_file_status == drgn.ModuleFileStatus.WANT_SUPPLEMENTARY: + module.try_file(supplementary_path) + else: + assert supplementary_path is None + assert not module.wants_debug_file() + + +def dwarf_program(*args, segments=None, gnu_debugaltlink=None, **kwds): prog = Program() with tempfile.NamedTemporaryFile() as f: - f.write(compile_dwarf(*args, **kwds)) + f.write(create_dwarf_file(*args, gnu_debugaltlink=gnu_debugaltlink, **kwds)) f.flush() - prog.load_debug_info([f.name]) + add_extra_dwarf( + prog, + f.name, + supplementary_path=( + None if gnu_debugaltlink is None else gnu_debugaltlink[0] + ), + ) if segments is not None: add_mock_memory_segments(prog, segments) @@ -245,6 +263,44 @@ def wrapper(self): return wrapper +class TestInvalidDwarf(TestCase): + def test_name_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "name is out of bounds"): + "foo" in dwarf_program( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.name, DW_FORM.strp, 0xDEADBEEF), + ), + ) + ) + + def test_sibling_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "DW_AT_sibling is out of bounds"): + "foo" in dwarf_program( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref4, 0xDEADBEEF), + ), + ) + ) + + def test_sibling_points_backwards(self): + with self.assertRaisesRegex(Exception, "DW_AT_sibling points backwards"): + "foo" in dwarf_program( + DwarfDie( + DW_TAG.base_type, + ( + DwarfAttrib(DW_AT.encoding, DW_FORM.data1, DW_ATE.signed), + DwarfAttrib(DW_AT.sibling, DW_FORM.ref1, 0), + ), + ) + ) + + class TestTypes(TestCase): def test_unknown_tag(self): prog = dwarf_program(wrap_test_type_dies(DwarfDie(0x9999, ()))) @@ -324,6 +380,32 @@ def test_unknown_base_type_encoding(self): ) self.assertRaisesRegex(Exception, "unknown DWARF encoding", prog.type, "TEST") + def test_reference_forms(self): + for form in ( + DW_FORM.ref1, + DW_FORM.ref2, + DW_FORM.ref4, + DW_FORM.ref8, + DW_FORM.ref_udata, + DW_FORM.ref_addr, + ): + with self.subTest(form=form): + prog = dwarf_program( + ( + *labeled_int_die, + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"), + DwarfAttrib(DW_AT.type, form, "int_die"), + ), + ), + ) + ) + self.assertIdentical( + prog.type("TEST").type, prog.int_type("int", 4, True) + ) + def test_int_type_byteorder(self): prog = dwarf_program( wrap_test_type_dies( @@ -4505,12 +4587,17 @@ def test_function_no_address(self): ) ) self.assertIdentical( - prog.object("abort"), Object(prog, prog.function_type(prog.void_type(), ())) + prog.object("abort"), + Object( + prog, + prog.function_type(prog.void_type(), ()), + absence_reason=AbsenceReason.OPTIMIZED_OUT, + ), ) def test_function_concrete_out_of_line_instance(self): prog = dwarf_program( - wrap_test_type_dies( + ( *labeled_int_die, DwarfLabel("abstract_instance_root"), DwarfDie( @@ -4569,6 +4656,118 @@ def test_function_concrete_out_of_line_instance(self): ), ) + def test_member_function_specification(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.class_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "Foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), + ), + ( + DwarfLabel("declaration"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "bar"), + DwarfAttrib( + DW_AT.declaration, DW_FORM.flag_present, True + ), + ), + ), + ), + ), + # This is how GCC and Clang do it: the declaration is in the + # correct scope, and the definition is at the top level. + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, "declaration"), + DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + lang=DW_LANG.C_plus_plus, + ) + self.assertIdentical( + prog["Foo::bar"], + Object( + prog, + prog.function_type( + prog.void_type(), + (), + ), + address=0x7FC3EB9B1C30, + ), + ) + self.assertNotIn("bar", prog) + + def test_member_function_concrete_out_of_line_instance(self): + prog = dwarf_program( + ( + DwarfDie( + DW_TAG.class_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "Foo"), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 0), + ), + ( + DwarfLabel("declaration"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "bar"), + DwarfAttrib( + DW_AT.declaration, DW_FORM.flag_present, True + ), + ), + ), + ), + ), + DwarfLabel("abstract_instance_root"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.specification, DW_FORM.ref4, "declaration"), + DwarfAttrib( + DW_AT.inline, DW_FORM.data1, DW_INL.declared_inlined + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib( + DW_AT.specification, DW_FORM.ref4, "abstract_instance_root" + ), + DwarfAttrib(DW_AT.low_pc, DW_FORM.addr, 0x7FC3EB9B1C30), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + lang=DW_LANG.C_plus_plus, + ) + self.assertIdentical( + prog["Foo::bar"], + Object( + prog, + prog.function_type( + prog.void_type(), + (), + ), + address=0x7FC3EB9B1C30, + ), + ) + self.assertNotIn("bar", prog) + def test_variable(self): prog = dwarf_program( wrap_test_type_dies( @@ -4645,7 +4844,10 @@ def test_variable_no_address(self): ), ) ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_empty(self): prog = dwarf_program( @@ -4661,7 +4863,10 @@ def test_variable_expr_empty(self): ), ) ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_bit_piece(self): prog = dwarf_program( @@ -4898,7 +5103,10 @@ def test_variable_expr_implicit_value_piece_empty(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_stack_value(self): for little_endian in (True, False): @@ -5028,7 +5236,10 @@ def test_variable_expr_stack_value_piece_empty(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_contiguous_piece_addresses(self): prog = dwarf_program( @@ -5314,7 +5525,10 @@ def test_variable_expr_address_empty_piece(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_absent_empty_piece(self): prog = dwarf_program( @@ -5337,7 +5551,10 @@ def test_variable_expr_absent_empty_piece(self): ), ), ) - self.assertIdentical(prog.object("x"), Object(prog, "int")) + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT), + ) def test_variable_expr_unknown(self): prog = dwarf_program( @@ -5353,8 +5570,9 @@ def test_variable_expr_unknown(self): ), ) ) - self.assertRaisesRegex( - Exception, "unknown DWARF expression opcode", prog.object, "x" + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.NOT_IMPLEMENTED), ) def test_variable_expr_unknown_after_location(self): @@ -5380,8 +5598,9 @@ def test_variable_expr_unknown_after_location(self): ), ) ) - self.assertRaisesRegex( - Exception, "unknown DWARF expression opcode", prog.object, "x" + self.assertIdentical( + prog.object("x"), + Object(prog, "int", absence_reason=AbsenceReason.NOT_IMPLEMENTED), ) def _eval_dwarf_expr(self, ops, **kwds): @@ -6870,7 +7089,7 @@ def test_dwo4(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -6890,7 +7109,7 @@ def test_dwo4(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -6909,7 +7128,7 @@ def test_dwo4(self): ) ) ) - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) def test_dwo4_not_found(self): @@ -6917,7 +7136,7 @@ def test_dwo4_not_found(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -6937,7 +7156,12 @@ def test_dwo4_not_found(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output @@ -6950,7 +7174,7 @@ def test_dwo4_id_mismatch(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -6969,7 +7193,7 @@ def test_dwo4_id_mismatch(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -6989,7 +7213,12 @@ def test_dwo4_id_mismatch(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output @@ -7002,7 +7231,7 @@ def test_dwo5(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -7018,7 +7247,7 @@ def test_dwo5(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -7034,7 +7263,7 @@ def test_dwo5(self): version=5, ) ) - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) def test_dwo5_not_found(self): @@ -7042,7 +7271,7 @@ def test_dwo5_not_found(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -7059,7 +7288,12 @@ def test_dwo5_not_found(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output @@ -7072,7 +7306,7 @@ def test_dwo5_id_mismatch(self): with tempfile.TemporaryDirectory() as temp_dir: with open(os.path.join(temp_dir, "split.dwo"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.split_compile, DwarfDie( @@ -7088,7 +7322,7 @@ def test_dwo5_id_mismatch(self): ) with open(os.path.join(temp_dir, "skeleton"), "wb") as f: f.write( - compile_dwarf( + create_dwarf_file( DwarfUnit( DW_UT.skeleton, DwarfDie( @@ -7105,10 +7339,1436 @@ def test_dwo5_id_mismatch(self): ) ) with self.assertLogs(logging.getLogger("drgn"), "WARNING") as log: - prog.load_debug_info([f.name]) + add_extra_dwarf(prog, f.name) + # Force debug info to be indexed. + try: + prog["foo"] + except KeyError: + pass self.assertTrue( any( "split DWARF file split.dwo not found" in output for output in log.output ) ) + + +class TestImportedUnit(TestCase): + alt_build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + + def test_global(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_global_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_global_nested(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, DW_FORM.ref_addr, "partial_unit2" + ), + ), + ), + ), + ), + die_label="partial_unit", + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit2", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_global_nested_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, DW_FORM.ref_addr, "alt_unit2" + ), + ), + ), + ), + ), + die_label="alt_unit", + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="alt_unit2", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + die_label="partial_unit", + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + + def test_enumeration_type(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.enumeration_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "color"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "unsigned_int_die" + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 4), + ), + ( + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "RED" + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 0 + ), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "GREEN" + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 1 + ), + ), + ), + DwarfDie( + DW_TAG.enumerator, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "BLUE" + ), + DwarfAttrib( + DW_AT.const_value, DW_FORM.data1, 2 + ), + ), + ), + ), + ), + *labeled_unsigned_int_die, + ), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.enum_type( + "color", + prog.int_type("unsigned int", 4, False), + ( + TypeEnumerator("RED", 0), + TypeEnumerator("GREEN", 1), + TypeEnumerator("BLUE", 2), + ), + ), + ) + + def test_namespace(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + ( + # TODO: in practice, partial units don't seem to + # have a language set, and it's supposed to be + # inherited from the unit that imports it. We don't + # handle that yet. + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + *labeled_int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"),), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "TEST" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + ), + ), + die_label="partial_unit", + ), + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + ( + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + ), + ), + ), + ) + self.assertIdentical(prog.type("foo::TEST").type, prog.int_type("int", 4, True)) + + def test_namespace_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + ( + # See above re: language in partial units. + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + *labeled_int_die, + DwarfDie( + DW_TAG.namespace, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"),), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "TEST" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + ), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + ( + DwarfAttrib( + DW_AT.language, + DW_FORM.data1, + DW_LANG.C_plus_plus, + ), + ), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "main"),), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical( + prog.type("foo::TEST").type, prog.int_type("int", 4, True) + ) + + def test_specification_imported(self): + # DW_AT_specification in an imported unit referring to a + # DW_AT_declaration DIE in a normal CU. + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "incomplete_struct_die", + ), + ), + ), + DwarfLabel("incomplete_struct_die"), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib( + DW_AT.declaration, + DW_FORM.flag_present, + True, + ), + ), + ), + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.specification, + DW_FORM.ref_addr, + "incomplete_struct_die", + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 0, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "y" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 4, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + ), + ), + *labeled_int_die, + ), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + ) + + def test_declaration_and_specification_imported(self): + # DW_AT_specification in an imported unit referring to a + # DW_AT_declaration DIE in the same imported unit. + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "incomplete_struct_die", + ), + ), + ), + DwarfLabel("incomplete_struct_die"), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "point"), + DwarfAttrib( + DW_AT.declaration, + DW_FORM.flag_present, + True, + ), + ), + ), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.specification, + DW_FORM.ref4, + "incomplete_struct_die", + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 0, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "y" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 4, + ), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "int_die", + ), + ), + ), + ), + ), + *labeled_int_die, + ), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + ) + + def test_declaration_and_specification_alt(self): + # DW_AT_specification in an imported unit from a .gnu_debugaltlink file + # referring to a DW_AT_declaration DIE in the same imported unit. + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies( + DwarfDie( + DW_TAG.pointer_type, + ( + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + DwarfAttrib( + DW_AT.type, + DW_FORM.ref4, + "incomplete_struct_die", + ), + ), + ), + DwarfLabel("incomplete_struct_die"), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "point" + ), + DwarfAttrib( + DW_AT.declaration, + DW_FORM.flag_present, + True, + ), + ), + ), + DwarfDie( + DW_TAG.structure_type, + ( + DwarfAttrib( + DW_AT.specification, + DW_FORM.ref4, + "incomplete_struct_die", + ), + DwarfAttrib(DW_AT.byte_size, DW_FORM.data1, 8), + ), + ( + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 0, + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + DwarfDie( + DW_TAG.member, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "y" + ), + DwarfAttrib( + DW_AT.data_member_location, + DW_FORM.data1, + 4, + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + *labeled_int_die, + ), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical( + prog.type("TEST").type, + prog.pointer_type( + prog.struct_type( + "point", + 8, + ( + TypeMember(prog.int_type("int", 4, True), "x"), + TypeMember(prog.int_type("int", 4, True), "y", 32), + ), + ) + ), + ) + + def test_function_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + *labeled_int_die, + # DWP puts a subprogram DIE without + # DW_AT_low_pc/DW_AT_ranges in the + # supplementary file and another subprogram DIE + # that references it with DW_AT_abstract_origin + # in the main debug file. + DwarfLabel("abstract_instance_root"), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "abs"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ( + DwarfLabel("abstract_instance_parameter"), + DwarfDie( + DW_TAG.formal_parameter, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "x" + ), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "int_die" + ), + ), + ), + ), + ), + ), + ), + die_label="alt_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + DwarfDie( + DW_TAG.subprogram, + ( + DwarfAttrib( + DW_AT.abstract_origin, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["abstract_instance_root"], + ), + DwarfAttrib( + DW_AT.low_pc, + DW_FORM.addr, + 0x7FC3EB9B1C30, + ), + ), + ( + DwarfDie( + DW_TAG.formal_parameter, + ( + DwarfAttrib( + DW_AT.abstract_origin, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels[ + "abstract_instance_parameter" + ], + ), + ), + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical( + prog["abs"], + Object( + prog, + prog.function_type( + prog.int_type("int", 4, True), + (TypeParameter(prog.int_type("int", 4, True), "x"),), + False, + ), + address=0x7FC3EB9B1C30, + ), + ) + + def test_unused_partial_unit(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"),), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.void_type()) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_unused_partial_unit_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST"),), + ), + ), + ), + die_label="alt_unit", + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "UNUSED" + ), + ), + ), + ), + ), + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["alt_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + self.assertIdentical(prog.type("TEST").type, prog.void_type()) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_imported_unit_with_children(self): + # DW_TAG_imported_unit shouldn't have children. Test that we ignore the + # children properly and continue where we left off. + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ( + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib( + DW_AT.name, DW_FORM.string, "UNUSED" + ), + ), + ), + ), + ), + *labeled_unsigned_int_die, + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST2"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "unsigned_int_die" + ), + ), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + self.assertIdentical( + prog.type("TEST2").type, prog.int_type("unsigned int", 4, False) + ) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_imported_unit_with_sibling(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + DwarfAttrib( + DW_AT.sibling, + DW_FORM.ref4, + "TEST2_die", + ), + ), + ), + DwarfLabel("TEST2_die"), + DwarfDie( + DW_TAG.typedef, + ( + DwarfAttrib(DW_AT.name, DW_FORM.string, "TEST2"), + DwarfAttrib( + DW_AT.type, DW_FORM.ref4, "unsigned_int_die" + ), + ), + ), + *labeled_unsigned_int_die, + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + wrap_test_type_dies(int_die), + ), + die_label="partial_unit", + ), + ), + ) + self.assertIdentical(prog.type("TEST").type, prog.int_type("int", 4, True)) + self.assertIdentical( + prog.type("TEST2").type, prog.int_type("unsigned int", 4, False) + ) + + def test_top_level_imported_unit_with_children(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "foo"),), + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + die_label="partial_unit", + ), + ), + allow_any_unit_die=True, + ) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_top_level_imported_unit(self): + prog = dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref_addr, + "partial_unit", + ), + ), + ), + ), + DwarfUnit( + DW_UT.partial, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.typedef, + (DwarfAttrib(DW_AT.name, DW_FORM.string, "UNUSED"),), + ), + ), + ), + die_label="partial_unit", + ), + ), + allow_any_unit_die=True, + ) + self.assertRaises(LookupError, prog.type, "UNUSED") + + def test_missing_import(self): + with self.assertRaisesRegex( + Exception, "DW_TAG_imported_unit is missing DW_AT_import" + ): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + (DwarfDie(DW_TAG.imported_unit),), + ), + ), + ), + ) + + def test_out_of_bounds(self): + with self.assertRaisesRegex(Exception, "reference is out of bounds"): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref4, + 0x100000, + ), + ), + ), + ), + ), + ), + ), + ) + + def test_out_of_bounds_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + (), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + with self.assertRaisesRegex(Exception, "reference is out of bounds"): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + 0x100000, + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) + + def test_cycle(self): + with self.assertRaisesRegex( + Exception, "maximum DWARF imported unit depth exceeded" + ): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref4, + "cycle_unit", + ), + ), + ), + ), + ), + die_label="cycle_unit", + ), + ), + ) + + def test_cycle_alt(self): + with tempfile.NamedTemporaryFile() as alt_f: + alt_dwarf = compile_dwarf( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.partial_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.ref4, + "cycle_unit", + ), + ), + ), + ), + ), + die_label="cycle_unit", + ), + ), + build_id=self.alt_build_id, + ) + alt_f.write(alt_dwarf.data) + alt_f.flush() + + with self.assertRaisesRegex( + Exception, "maximum DWARF imported unit depth exceeded" + ): + "foo" in dwarf_program( + ( + DwarfUnit( + DW_UT.compile, + DwarfDie( + DW_TAG.compile_unit, + (), + ( + DwarfDie( + DW_TAG.imported_unit, + ( + DwarfAttrib( + DW_AT.import_, + DW_FORM.GNU_ref_alt, + alt_dwarf.labels["cycle_unit"], + ), + ), + ), + ), + ), + ), + ), + gnu_debugaltlink=(alt_f.name, self.alt_build_id), + ) diff --git a/tests/test_language_c.py b/tests/test_language_c.py index 448feb326..9af7825ce 100644 --- a/tests/test_language_c.py +++ b/tests/test_language_c.py @@ -4,6 +4,7 @@ import operator from drgn import ( + AbsenceReason, Object, Qualifiers, Type, @@ -3059,6 +3060,12 @@ def test_absent(self): type_name = type_ self.assertEqual(str(Object(self.prog, type_)), f"({type_name})") + def test_optimized_out(self): + self.assertEqual( + str(Object(self.prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT)), + "(int)", + ) + def test_bigint(self): segment = bytearray(16) self.add_memory_segment(segment, virt_addr=0xFFFF0000) diff --git a/tests/test_logging.py b/tests/test_logging.py index c5b9aa516..656d9f0cd 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -2,8 +2,6 @@ # SPDX-License-Identifier: LGPL-2.1-or-later import logging -import sys -import unittest from drgn import Program from tests import TestCase @@ -19,9 +17,6 @@ def test_set_level_before(self): prog._log(0, "foo") self.assertIn("DEBUG:drgn:foo", cm.output) - @unittest.skipIf( - sys.version_info < (3, 7), "syncing log level only works since Python 3.7" - ) def test_set_level_after(self): prog = Program() logger = logging.getLogger("drgn") diff --git a/tests/test_module.py b/tests/test_module.py new file mode 100644 index 000000000..ca3aaf7c2 --- /dev/null +++ b/tests/test_module.py @@ -0,0 +1,587 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from pathlib import Path + +from drgn import ( + ExtraModule, + MainModule, + ModuleFileStatus, + Program, + RelocatableModule, + SharedLibraryModule, + VdsoModule, +) +from tests import TestCase + + +class IntWrapper: + def __init__(self, value): + self._value = value + + def __index__(self): + return self._value + + +class TestModule(TestCase): + def _test_module_init_common(self, module): + self.assertIsNone(module.address_ranges) + self.assertIsNone(module.address_range) + self.assertIsNone(module.build_id) + self.assertEqual(module.loaded_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.loaded_file_path) + self.assertIsNone(module.loaded_file_bias) + self.assertEqual(module.debug_file_status, ModuleFileStatus.WANT) + self.assertIsNone(module.debug_file_path) + self.assertIsNone(module.debug_file_bias) + self.assertIsNone(module.supplementary_debug_file_kind) + self.assertIsNone(module.supplementary_debug_file_path) + + def test_main_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.main_module) + self.assertRaises(LookupError, prog.main_module, "/foo/bar") + + module = prog.main_module("/foo/bar", create=True) + self.assertIsInstance(module, MainModule) + + self.assertEqual(prog.main_module(), module) + self.assertEqual(prog.main_module(create=False), module) + self.assertEqual(prog.main_module("/foo/bar"), module) + self.assertEqual(prog.main_module(b"/foo/bar"), module) + self.assertEqual(prog.main_module(Path("/foo/bar")), module) + self.assertEqual(prog.main_module("/foo/bar", create=True), module) + + self.assertRaises(LookupError, prog.main_module, "/foo/baz") + self.assertRaises(LookupError, prog.main_module, "/foo/baz", create=True) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self._test_module_init_common(module) + + def test_main_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.main_module, None) + self.assertRaises(TypeError, prog.main_module, create=True) + self.assertRaises(TypeError, prog.main_module, "/foo/bar", True) + + def test_shared_library_module(self): + prog = Program() + + self.assertRaises( + LookupError, prog.shared_library_module, "/foo/bar", 0x10000000 + ) + + module = prog.shared_library_module("/foo/bar", 0x10000000, create=True) + self.assertIsInstance(module, SharedLibraryModule) + + self.assertEqual(prog.shared_library_module("/foo/bar", 0x10000000), module) + self.assertEqual(prog.shared_library_module(b"/foo/bar", 0x10000000), module) + self.assertEqual( + prog.shared_library_module(Path("/foo/bar"), IntWrapper(0x10000000)), module + ) + self.assertEqual( + prog.shared_library_module("/foo/bar", 0x10000000, create=True), module + ) + + self.assertRaises( + LookupError, prog.shared_library_module, "/foo/bar", 0x20000000 + ) + self.assertRaises( + LookupError, prog.shared_library_module, "/foo/baz", 0x10000000 + ) + + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 0x20000000, create=True), module + ) + self.assertNotEqual( + prog.shared_library_module("/foo/baz", 0x10000000, create=True), module + ) + self.assertNotEqual( + prog.vdso_module("/foo/bar", 0x10000000, create=True), module + ) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.dynamic_address, 0x10000000) + self._test_module_init_common(module) + + def test_shared_library_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.shared_library_module) + self.assertRaises(TypeError, prog.shared_library_module, "/foo/bar") + self.assertRaises(TypeError, prog.shared_library_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.shared_library_module, None, 0) + self.assertRaises( + TypeError, prog.shared_library_module, "/foo/bar", 0x10000000, True + ) + + def test_vdso_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.vdso_module, "/foo/bar", 0x10000000) + + module = prog.vdso_module("/foo/bar", 0x10000000, create=True) + self.assertIsInstance(module, VdsoModule) + + self.assertEqual(prog.vdso_module("/foo/bar", 0x10000000), module) + self.assertEqual(prog.vdso_module(b"/foo/bar", 0x10000000), module) + self.assertEqual( + prog.vdso_module(Path("/foo/bar"), IntWrapper(0x10000000)), module + ) + self.assertEqual(prog.vdso_module("/foo/bar", 0x10000000, create=True), module) + + self.assertRaises(LookupError, prog.vdso_module, "/foo/bar", 0x20000000) + self.assertRaises(LookupError, prog.vdso_module, "/foo/baz", 0x10000000) + + self.assertNotEqual( + prog.vdso_module("/foo/bar", 0x20000000, create=True), module + ) + self.assertNotEqual( + prog.vdso_module("/foo/baz", 0x10000000, create=True), module + ) + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 0x10000000, create=True), module + ) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.dynamic_address, 0x10000000) + self._test_module_init_common(module) + + def test_vdso_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.vdso_module) + self.assertRaises(TypeError, prog.vdso_module, "/foo/bar") + self.assertRaises(TypeError, prog.vdso_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.vdso_module, None, 0) + self.assertRaises(TypeError, prog.vdso_module, "/foo/bar", 0x10000000, True) + + def test_relocatable_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.relocatable_module, "/foo/bar", 0x10000000) + + module = prog.relocatable_module("/foo/bar", 0x10000000, create=True) + self.assertIsInstance(module, RelocatableModule) + + self.assertEqual(prog.relocatable_module("/foo/bar", 0x10000000), module) + self.assertEqual(prog.relocatable_module(b"/foo/bar", 0x10000000), module) + self.assertEqual( + prog.relocatable_module(Path("/foo/bar"), IntWrapper(0x10000000)), module + ) + self.assertEqual( + prog.relocatable_module("/foo/bar", 0x10000000, create=True), module + ) + + self.assertRaises(LookupError, prog.relocatable_module, "/foo/bar", 0x20000000) + self.assertRaises(LookupError, prog.relocatable_module, "/foo/baz", 0x10000000) + + self.assertNotEqual( + prog.relocatable_module("/foo/bar", 0x20000000, create=True), module + ) + self.assertNotEqual( + prog.relocatable_module("/foo/baz", 0x10000000, create=True), module + ) + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 0x10000000, create=True), module + ) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.address, 0x10000000) + self._test_module_init_common(module) + + def test_section_addresses(self): + prog = Program() + module = prog.relocatable_module("/foo/bar", 0x10000000, create=True) + + self.assertNotIn(".text", module.section_addresses) + self.assertNotIn(1, module.section_addresses) + + with self.assertRaises(KeyError): + module.section_addresses[".text"] + with self.assertRaises(KeyError): + module.section_addresses[1] + + with self.assertRaises(KeyError): + del module.section_addresses[".text"] + with self.assertRaises(KeyError): + del module.section_addresses[1] + + module.section_addresses[".text"] = 0x10000000 + self.assertIn(".text", module.section_addresses) + self.assertEqual(module.section_addresses[".text"], 0x10000000) + + self.assertEqual(len(module.section_addresses), 1) + self.assertCountEqual(list(module.section_addresses), [".text"]) + self.assertCountEqual(list(module.section_addresses.keys()), [".text"]) + self.assertCountEqual(list(module.section_addresses.values()), [0x10000000]) + self.assertCountEqual( + list(module.section_addresses.items()), [(".text", 0x10000000)] + ) + + module.section_addresses[".data"] = 0x10001000 + + self.assertEqual(len(module.section_addresses), 2) + self.assertCountEqual(list(module.section_addresses), [".text", ".data"]) + self.assertCountEqual(list(module.section_addresses.keys()), [".text", ".data"]) + self.assertCountEqual( + list(module.section_addresses.values()), [0x10000000, 0x10001000] + ) + self.assertCountEqual( + list(module.section_addresses.items()), + [(".text", 0x10000000), (".data", 0x10001000)], + ) + + del module.section_addresses[".data"] + + self.assertEqual(len(module.section_addresses), 1) + self.assertCountEqual(list(module.section_addresses), [".text"]) + self.assertCountEqual(list(module.section_addresses.keys()), [".text"]) + self.assertCountEqual(list(module.section_addresses.values()), [0x10000000]) + self.assertCountEqual( + list(module.section_addresses.items()), [(".text", 0x10000000)] + ) + + def test_relocatable_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.relocatable_module) + self.assertRaises(TypeError, prog.relocatable_module, "/foo/bar") + self.assertRaises(TypeError, prog.relocatable_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.relocatable_module, None, 0) + self.assertRaises( + TypeError, prog.relocatable_module, "/foo/bar", 0x10000000, True + ) + + def test_extra_module(self): + prog = Program() + + self.assertRaises(LookupError, prog.extra_module, "/foo/bar", 1234) + + module = prog.extra_module("/foo/bar", 1234, create=True) + self.assertIsInstance(module, ExtraModule) + + self.assertEqual(prog.extra_module("/foo/bar", 1234), module) + self.assertEqual(prog.extra_module(b"/foo/bar", 1234), module) + self.assertEqual(prog.extra_module(Path("/foo/bar"), IntWrapper(1234)), module) + self.assertEqual(prog.extra_module("/foo/bar", 1234, create=True), module) + + self.assertRaises(LookupError, prog.extra_module, "/foo/bar", 5678) + self.assertRaises(LookupError, prog.extra_module, "/foo/baz", 1234) + + self.assertNotEqual(prog.extra_module("/foo/bar", 5678, create=True), module) + self.assertNotEqual(prog.extra_module("/foo/baz", 1234, create=True), module) + self.assertNotEqual( + prog.shared_library_module("/foo/bar", 1234, create=True), module + ) + self.assertEqual(prog.extra_module("/foo/bar", create=True).id, 0) + + self.assertIs(module.prog, prog) + self.assertEqual(module.name, "/foo/bar") + self.assertEqual(module.id, 1234) + self._test_module_init_common(module) + + def test_extra_module_invalid(self): + prog = Program() + self.assertRaises(TypeError, prog.extra_module) + self.assertRaises(TypeError, prog.extra_module, "/foo/bar", None) + self.assertRaises(TypeError, prog.extra_module, None, 0) + self.assertRaises(TypeError, prog.extra_module, "/foo/bar", 1234, True) + + def test_address_range(self): + module = Program().extra_module("/foo/bar", create=True) + + module.address_range = (0x10000000, 0x10010000) + self.assertEqual(module.address_range, (0x10000000, 0x10010000)) + + module.address_range = (0x20000000, 0x20020000) + self.assertEqual(module.address_range, (0x20000000, 0x20020000)) + + module.address_range = None + self.assertIsNone(module.address_range) + + module.address_range = None + self.assertIsNone(module.address_range) + + def test_address_range_empty(self): + module = Program().extra_module("/foo/bar", create=True) + + module.address_range = (0, 0) + self.assertEqual(module.address_range, (0, 0)) + + def test_address_range_type_error(self): + module = Program().extra_module("/foo/bar", create=True) + + with self.assertRaises(TypeError): + module.address_range = 1 + + with self.assertRaises(TypeError): + module.address_range = (1,) + + with self.assertRaises(TypeError): + module.address_range = (1, 2, 3) + + with self.assertRaises(TypeError): + module.address_range = ("foo", 1) + + with self.assertRaises(TypeError): + module.address_range = (1, "bar") + + def test_address_range_invalid(self): + module = Program().extra_module("/foo/bar", create=True) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (0x10010000, 0x10000000) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (1, 1) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (2**64 - 1, 1) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_range = (2**64 - 1, 2**64 - 1) + + def test_address_range_del(self): + module = Program().extra_module("/foo/bar", create=True) + with self.assertRaises(AttributeError): + del module.address_range + + def test_address_ranges_single(self): + module = Program().extra_module("/foo/bar", create=True) + + module.address_ranges = [(0x10000000, 0x10010000)] + self.assertEqual(module.address_range, (0x10000000, 0x10010000)) + self.assertCountEqual(module.address_ranges, [(0x10000000, 0x10010000)]) + + module.address_range = (0x20000000, 0x20010000) + self.assertCountEqual(module.address_ranges, [(0x20000000, 0x20010000)]) + + def test_address_ranges_multiple(self): + module = Program().extra_module("/foo/bar", create=True) + + module.address_ranges = [ + (0x10000000, 0x10010000), + (0x20000000, 0x20010000), + ] + with self.assertRaisesRegex(ValueError, "module has multiple address ranges"): + module.address_range + self.assertCountEqual( + module.address_ranges, [(0x10000000, 0x10010000), (0x20000000, 0x20010000)] + ) + + def test_address_ranges_empty(self): + module = Program().extra_module("/foo/bar", create=True) + + module.address_ranges = () + self.assertEqual(module.address_range, (0, 0)) + self.assertCountEqual(module.address_ranges, ()) + + module.address_range = (0, 0) + self.assertCountEqual(module.address_ranges, ()) + + def test_address_ranges_type_error(self): + module = Program().extra_module("/foo/bar", create=True) + + with self.assertRaises(TypeError): + module.address_ranges = 1 + + with self.assertRaises(TypeError): + module.address_ranges = (1,) + + with self.assertRaises(TypeError): + module.address_ranges = ((1,),) + + with self.assertRaises(TypeError): + module.address_ranges = ((1, 2, 3),) + + with self.assertRaises(TypeError): + module.address_ranges = (("foo", 1),) + + with self.assertRaises(TypeError): + module.address_ranges = ((1, "bar"),) + + def test_address_ranges_invalid(self): + module = Program().extra_module("/foo/bar", create=True) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_ranges = ((0x10010000, 0x10000000),) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_ranges = ((1, 1),) + + with self.assertRaisesRegex(ValueError, "invalid module address range"): + module.address_ranges = ((0, 0),) + + def test_build_id(self): + module = Program().extra_module("/foo/bar", create=True) + + module.build_id = b"\x01\x23\x45\x67\x89\xab\xcd\xef" + self.assertEqual(module.build_id, b"\x01\x23\x45\x67\x89\xab\xcd\xef") + + module.build_id = b"\xfe\xdc\xba\x98\x76\x54\x32\x10" + self.assertEqual(module.build_id, b"\xfe\xdc\xba\x98\x76\x54\x32\x10") + + module.build_id = None + self.assertIsNone(module.build_id) + + module.build_id = None + self.assertIsNone(module.build_id) + + def test_build_id_type_error(self): + module = Program().extra_module("/foo/bar", create=True) + with self.assertRaises(TypeError): + module.build_id = "abcd" + + def test_build_id_invalid_empty(self): + module = Program().extra_module("/foo/bar", create=True) + with self.assertRaisesRegex(ValueError, "build ID cannot be empty"): + module.build_id = b"" + + def test_build_id_del(self): + module = Program().extra_module("/foo/bar", create=True) + with self.assertRaises(AttributeError): + del module.build_id + + def test_find_by_name(self): + prog = Program() + self.assertRaises(LookupError, prog.module, "foo") + + module1 = prog.extra_module("foo", create=True) + self.assertEqual(prog.module("foo"), module1) + + module2 = prog.main_module("foo", create=True) + self.assertIn(prog.module("foo"), (module1, module2)) + + self.assertRaises(LookupError, prog.module, "bar") + + def test_find_by_address(self): + prog = Program() + module1 = prog.extra_module("/foo/bar", create=True) + module1.address_range = (0x10000000, 0x10010000) + module2 = prog.extra_module("/asdf/jkl", create=True) + module2.address_range = (0x20000000, 0x20020000) + + self.assertRaises(LookupError, prog.module, 0x0FFFFFFF) + self.assertEqual(prog.module(0x10000000), module1) + self.assertEqual(prog.module(0x10000001), module1) + self.assertEqual(prog.module(0x1000FFFF), module1) + self.assertRaises(LookupError, prog.module, 0x10010000) + + self.assertRaises(LookupError, prog.module, 0x1FFFFFFF) + self.assertEqual(prog.module(0x20000000), module2) + self.assertEqual(prog.module(0x20000001), module2) + self.assertEqual(prog.module(0x2001FFFF), module2) + self.assertRaises(LookupError, prog.module, 0x20020000) + + # Test all of the state transitions that we can without setting a file. + def _test_file_status(self, which): + module = Program().extra_module("/foo/bar", create=True) + + status_attr = which + "_file_status" + wants_file = getattr(module, f"wants_{which}_file") + + self.assertRaises(TypeError, setattr, module, status_attr, 1) + + setattr(module, status_attr, ModuleFileStatus.WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + self.assertEqual(wants_file(), True) + for status in set(ModuleFileStatus) - { + ModuleFileStatus.WANT, + ModuleFileStatus.DONT_WANT, + ModuleFileStatus.DONT_NEED, + }: + with self.subTest(from_=ModuleFileStatus.WANT, to=status): + self.assertRaises(ValueError, setattr, module, status_attr, status) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + + setattr(module, status_attr, ModuleFileStatus.DONT_WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_WANT) + self.assertEqual(wants_file(), False) + for status in set(ModuleFileStatus) - { + ModuleFileStatus.WANT, + ModuleFileStatus.DONT_WANT, + ModuleFileStatus.DONT_NEED, + }: + with self.subTest(from_=ModuleFileStatus.DONT_WANT, to=status): + self.assertRaises(ValueError, setattr, module, status_attr, status) + self.assertEqual( + getattr(module, status_attr), ModuleFileStatus.DONT_WANT + ) + + setattr(module, status_attr, ModuleFileStatus.DONT_NEED) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_NEED) + self.assertEqual(wants_file(), False) + for status in set(ModuleFileStatus) - { + ModuleFileStatus.WANT, + ModuleFileStatus.DONT_WANT, + ModuleFileStatus.DONT_NEED, + }: + with self.subTest(from_=ModuleFileStatus.DONT_NEED, to=status): + self.assertRaises(ValueError, setattr, module, status_attr, status) + self.assertEqual( + getattr(module, status_attr), ModuleFileStatus.DONT_NEED + ) + + setattr(module, status_attr, ModuleFileStatus.DONT_WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_WANT) + + setattr(module, status_attr, ModuleFileStatus.WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + + setattr(module, status_attr, ModuleFileStatus.DONT_NEED) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.DONT_NEED) + + setattr(module, status_attr, ModuleFileStatus.WANT) + self.assertEqual(getattr(module, status_attr), ModuleFileStatus.WANT) + + self.assertRaises(AttributeError, delattr, module, status_attr) + + def test_loaded_file_status(self): + self._test_file_status("loaded") + + def test_debug_file_status(self): + self._test_file_status("debug") + + +class TestCreatedModules(TestCase): + def test_empty(self): + self.assertEqual(list(Program().modules()), []) + + def test_one(self): + module = Program().extra_module("/foo/bar", create=True) + self.assertEqual(list(module.prog.modules()), [module]) + + def test_multiple(self): + prog = Program() + modules = [ + prog.extra_module("/foo/bar", create=True), + prog.extra_module("/asdf/jkl", create=True), + prog.extra_module("/123/456", create=True), + ] + self.assertCountEqual(list(prog.modules()), modules) + + def test_same_name(self): + prog = Program() + modules = [ + prog.extra_module("foo", id=0, create=True), + prog.main_module("foo", create=True), + ] + actual = list(prog.modules()) + self.assertCountEqual(actual, modules) + self.assertEqual(actual[0], prog.main_module()) + + modules.append(prog.extra_module("foo", id=1, create=True)) + actual = list(prog.modules()) + self.assertCountEqual(actual, modules) + self.assertEqual(actual[0], prog.main_module()) + + def test_change_during_iteration(self): + prog = Program() + prog.extra_module("/foo/bar", create=True) + with self.assertRaisesRegex(Exception, "modules changed during iteration"): + for module in prog.modules(): + prog.extra_module("/asdf/jkl", create=True) + prog.extra_module("/123/456", create=True) diff --git a/tests/test_object.py b/tests/test_object.py index 5c21eb422..5b0509482 100644 --- a/tests/test_object.py +++ b/tests/test_object.py @@ -6,6 +6,7 @@ import struct from drgn import ( + AbsenceReason, FaultError, Object, ObjectAbsentError, @@ -43,7 +44,7 @@ def test_type(self): ValueError, "absent object must have type", Object, self.prog ) - def test_address_nand_value(self): + def test_address_value_absence_reason_nand(self): self.assertRaisesRegex( ValueError, "object cannot have address and value", @@ -62,6 +63,34 @@ def test_address_nand_value(self): value=0, address=0, ) + self.assertRaisesRegex( + ValueError, + "object cannot have address and absence reason", + Object, + self.prog, + "int", + address=0, + absence_reason=AbsenceReason.OTHER, + ) + self.assertRaisesRegex( + ValueError, + "object cannot have value and absence reason", + Object, + self.prog, + "int", + value=0, + absence_reason=AbsenceReason.OTHER, + ) + self.assertRaisesRegex( + ValueError, + "object cannot have address, value, and absence reason", + Object, + self.prog, + "int", + value=0, + address=0, + absence_reason=AbsenceReason.OTHER, + ) def test_integer_address(self): self.assertRaises(TypeError, Object, self.prog, "int", address="NULL") @@ -644,6 +673,7 @@ def test_signed(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("int")) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -677,6 +707,7 @@ def test_unsigned(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("unsigned int")) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -766,6 +797,7 @@ def test_signed_big(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.int_type("__int128", 16, True)) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -799,6 +831,7 @@ def test_unsigned_big(self): obj.type_, self.prog.int_type("unsigned __int128", 16, False) ) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -863,6 +896,7 @@ def test_float(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("double")) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 3.14) self.assertEqual(repr(obj), "Object(prog, 'double', value=3.14)") @@ -1118,6 +1152,7 @@ def truncate(x, bit_size): def test_pointer(self): obj = Object(self.prog, "int *", value=0xFFFF0000) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 0xFFFF0000) self.assertEqual(repr(obj), "Object(prog, 'int *', value=0xffff0000)") @@ -1129,6 +1164,7 @@ def test_pointer_typedef(self): value=0xFFFF0000, ) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertEqual(obj.value_(), 0xFFFF0000) self.assertEqual(repr(obj), "Object(prog, 'INTP', value=0xffff0000)") @@ -1136,6 +1172,7 @@ def test_pointer_typedef(self): def test_array(self): obj = Object(self.prog, "int [2]", value=[1, 2]) self.assertFalse(obj.absent_) + self.assertIsNone(obj.absence_reason_) self.assertIsNone(obj.address_) self.assertIdentical(obj[0], Object(self.prog, "int", value=1)) @@ -1215,6 +1252,9 @@ def test_basic(self): self.assertIs(obj.prog_, self.prog) self.assertIdentical(obj.type_, self.prog.type("int")) self.assertTrue(obj.absent_) + self.assertEqual( + Object(self.prog, "int").absence_reason_, AbsenceReason.OTHER + ) self.assertIsNone(obj.address_) self.assertIsNone(obj.bit_offset_) self.assertIsNone(obj.bit_field_size_) @@ -1223,6 +1263,13 @@ def test_basic(self): self.assertRaises(ObjectAbsentError, obj.read_) + def test_reason(self): + obj = Object(self.prog, "int", absence_reason=AbsenceReason.OPTIMIZED_OUT) + self.assertEqual(obj.absence_reason_, AbsenceReason.OPTIMIZED_OUT) + self.assertEqual( + repr(obj), "Object(prog, 'int', absence_reason=AbsenceReason.OPTIMIZED_OUT)" + ) + def test_bit_field(self): obj = Object(self.prog, "int", bit_field_size=1) self.assertIs(obj.prog_, self.prog) diff --git a/tests/test_plugins.py b/tests/test_plugins.py new file mode 100644 index 000000000..2794aa42b --- /dev/null +++ b/tests/test_plugins.py @@ -0,0 +1,218 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +import logging +import os +from pathlib import Path +import sys +import tempfile +import unittest.mock + +import _drgn_util.plugins +from _drgn_util.plugins import call_plugins +from tests import TestCase, modifyenv + + +class TestPlugins(TestCase): + def setUp(self): + # Clear the plugin and hook caches before each test. + _drgn_util.plugins._plugins = None + _drgn_util.plugins._hooks.clear() + + # pkg_resources caches distributions on import. Delete it before each + # test so that it is reloaded. + sys.modules.pop("pkg_resources", None) + + # These tests change these environment variables and sys.path, so + # restore them after each test. + self.enterContext( + modifyenv({"DRGN_PLUGINS": None, "DRGN_DISABLE_PLUGINS": None}) + ) + self.addCleanup(setattr, sys, "path", list(sys.path)) + + # Delete modules imported by each test so that we can reuse the same + # module names. + def restore_modules(old_modules): + for new_module in set(sys.modules) - old_modules: + sys.modules.pop(new_module, None) + + self.addCleanup(restore_modules, set(sys.modules)) + + @staticmethod + def _create_plugin(dir): + plugin_path = Path(dir) / "test_plugin.py" + plugin_path.write_text( + """\ +def drgn_test_hook(call_me): + call_me() +""" + ) + return plugin_path + + @staticmethod + def _create_dist_info(dir, module_name="test_plugin", entry_point_name="test"): + dist_info_dir = Path(dir) / f"{module_name}-1.0.dist-info" + dist_info_dir.mkdir() + (dist_info_dir / "METADATA").write_text( + f"""\ +Metadata-Version: 1.1 +Name: {module_name} +Version: 1.0 +""" + ) + (dist_info_dir / "entry_points.txt").write_text( + f"""\ +[drgn.plugins] +{entry_point_name} = {module_name} +""" + ) + + def test_entry_point(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_drgn_disable_plugins_envvar_all(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_DISABLE_PLUGINS"] = "*" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_not_called() + + def test_drgn_disable_plugins_envvar_specific(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_DISABLE_PLUGINS"] = "foo,test" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_not_called() + + def test_drgn_plugins_envvar_path(self): + with tempfile.TemporaryDirectory() as temp_dir: + plugin_path = self._create_plugin(temp_dir) + os.environ["DRGN_PLUGINS"] = f"test={plugin_path}" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_drgn_plugins_envvar_module(self): + with tempfile.TemporaryDirectory() as temp_dir: + plugin_path = self._create_plugin(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_PLUGINS"] = f"test={plugin_path.stem}" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_drgn_plugins_envvar_precedence(self): + with tempfile.TemporaryDirectory() as temp_dir: + self._create_plugin(temp_dir) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + os.environ["DRGN_DISABLE_PLUGINS"] = "*" + os.environ["DRGN_PLUGINS"] = "test" + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + call_me.assert_called_once() + + def test_priority(self): + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "test_plugin1.py").write_text( + """\ +def drgn_test_hook(call_me): + call_me(1) +drgn_test_hook.drgn_priority = 75 +""" + ) + (Path(temp_dir) / "test_plugin2.py").write_text( + """\ +def drgn_test_hook(call_me): + call_me(2) +drgn_test_hook.drgn_priority = 25 +""" + ) + (Path(temp_dir) / "test_plugin3.py").write_text( + """\ +def drgn_test_hook(call_me): + call_me(3) +""" + ) + self._create_dist_info(temp_dir, "test_plugin1", "test1") + self._create_dist_info(temp_dir, "test_plugin2", "test2") + self._create_dist_info(temp_dir, "test_plugin3", "test3") + sys.path.insert(0, temp_dir) + + call_me = unittest.mock.Mock() + call_plugins("drgn_test_hook", call_me) + self.assertEqual( + call_me.call_args_list, + [unittest.mock.call(2), unittest.mock.call(3), unittest.mock.call(1)], + ) + + def test_plugin_exception(self): + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "test_plugin.py").write_text('raise Exception("foo")\n') + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + + with self.assertLogs(logging.getLogger("drgn.plugins"), "WARNING") as cm: + call_plugins("drgn_test_hook") + self.assertTrue( + any( + message.startswith("WARNING:drgn.plugins:failed to load 'test") + for message in cm.output + ), + msg=f"no match in {cm.output}", + ) + + def test_hook_exception(self): + with tempfile.TemporaryDirectory() as temp_dir: + (Path(temp_dir) / "test_plugin.py").write_text( + """\ +def drgn_test_hook(): + raise Exception("foo") +""" + ) + self._create_dist_info(temp_dir) + sys.path.insert(0, temp_dir) + + with self.assertLogs(logging.getLogger("drgn.plugins"), "WARNING") as cm: + call_plugins("drgn_test_hook") + self.assertTrue( + any( + message.startswith( + "WARNING:drgn.plugins:'test' drgn_test_hook failed:" + ) + for message in cm.output + ), + msg=f"no match in {cm.output}", + ) + + def test_missing_entry_point(self): + os.environ["DRGN_PLUGINS"] = "__non__existent__entrypoint__" + with self.assertLogs(logging.getLogger("drgn.plugins"), "WARNING") as cm: + call_plugins("drgn_test_hook") + self.assertTrue( + any( + message.startswith( + "WARNING:drgn.plugins:not found: '__non__existent__entrypoint__'" + ) + for message in cm.output + ), + msg=f"no match in {cm.output}", + ) diff --git a/tests/test_program.py b/tests/test_program.py index b3b22483b..18ae3aec6 100644 --- a/tests/test_program.py +++ b/tests/test_program.py @@ -162,6 +162,10 @@ def test_language(self): TypeError, "language must be Language", setattr, prog, "language", "CPP" ) + def test_language_del(self): + with self.assertRaises(AttributeError): + del Program().language + class TestMemory(TestCase): def test_simple_read(self): @@ -403,6 +407,42 @@ def test_invalid_read_fn(self): 8, ) + def test_python_fault_error(self): + def fault_memory_reader(address, count, offset, physical): + raise FaultError("fault from Python", address) + + prog = Program(MOCK_PLATFORM) + prog.add_memory_segment(0xFFFF0000, 8, fault_memory_reader) + + with self.assertRaises(FaultError) as cm: + Object(prog, "int", address=0xFFFF0004).read_() + self.assertEqual(cm.exception.message, "fault from Python") + self.assertEqual(cm.exception.address, 0xFFFF0004) + + # If the FaultError from Python is translated to a drgn_error + # correctly, then this shouldn't raise an exception. + str(Object(prog, "int *", 0xFFFF0004)) + + def test_python_fault_error_invalid_message(self): + def fault_memory_reader(address, count, offset, physical): + raise FaultError(None, address) + + prog = Program(MOCK_PLATFORM) + prog.add_memory_segment(0xFFFF0000, 8, fault_memory_reader) + + # Just test that it doesn't crash. + self.assertRaises(Exception, Object(prog, "int", address=0xFFFF0004).read_) + + def test_python_fault_error_invalid_address(self): + def fault_memory_reader(address, count, offset, physical): + raise FaultError("fault from Python", None) + + prog = Program(MOCK_PLATFORM) + prog.add_memory_segment(0xFFFF0000, 8, fault_memory_reader) + + # Just test that it doesn't crash. + self.assertRaises(Exception, Object(prog, "int", address=0xFFFF0004).read_) + class TestTypeFinder(TestCase): def test_register(self): diff --git a/tests/test_stack_trace.py b/tests/test_stack_trace.py new file mode 100644 index 000000000..2f05dd452 --- /dev/null +++ b/tests/test_stack_trace.py @@ -0,0 +1,33 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# SPDX-License-Identifier: LGPL-2.1-or-later + +from drgn import Program +from tests import TestCase +from tests.resources import get_resource + + +class TestLinuxUserspaceCoreDump(TestCase): + @classmethod + def setUpClass(cls): + cls.prog = Program() + cls.prog.set_enabled_debug_info_finders([]) + cls.prog.set_core_dump(get_resource("crashme.core")) + cls.prog.load_debug_info([get_resource("crashme"), get_resource("crashme.so")]) + cls.trace = cls.prog.crashed_thread().stack_trace() + + @classmethod + def tearDownClass(cls): + del cls.trace + del cls.prog + + def test_stack_frame_name(self): + self.assertEqual(self.trace[0].name, "c") + self.assertEqual(self.trace[5].name, "0x7f6112ad8088") + self.assertEqual(self.trace[7].name, "_start") + self.assertEqual(self.trace[8].name, "???") + + def test_stack_frame_function_name(self): + self.assertEqual(self.trace[0].function_name, "c") + self.assertIsNone(self.trace[5].function_name) + self.assertIsNone(self.trace[7].function_name) + self.assertIsNone(self.trace[8].function_name) diff --git a/tests/test_symbol.py b/tests/test_symbol.py index d9cc3dd94..3c8598fb9 100644 --- a/tests/test_symbol.py +++ b/tests/test_symbol.py @@ -1,46 +1,150 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later + +import itertools +import lzma import tempfile +import unittest -from _drgn_util.elf import ET, PT, SHT, STB, STT +from _drgn_util.elf import ET, PT, SHF, SHT, STB, STT +import drgn from drgn import Program, Symbol, SymbolBinding, SymbolIndex, SymbolKind from tests import TestCase -from tests.dwarfwriter import dwarf_sections +from tests.dwarfwriter import create_dwarf_file from tests.elfwriter import ElfSection, ElfSymbol, create_elf_file -def create_elf_symbol_file(symbols): - # We need some DWARF data so that libdwfl will load the file. - sections = dwarf_sections(()) +def add_shndx(symbols, shndx): + return [ + symbol._replace(shindex=shndx if symbol.shindex is None else symbol.shindex) + for symbol in symbols + ] + + +def create_elf_symbol_file( + symbols=(), + dynamic_symbols=(), + gnu_debugdata_symbols=(), + dwarf=False, + loadable=True, +): + def symbols_start(symbols): + return min(symbol.value for symbol in symbols) + + def symbols_end(symbols): + return max(symbol.value + max(symbol.size, 1) for symbol in symbols) + + assert symbols or dynamic_symbols or gnu_debugdata_symbols + start = float("inf") + end = float("-inf") + if symbols: + start = min(start, symbols_start(symbols)) + end = max(end, symbols_end(symbols)) + if dynamic_symbols: + start = min(start, symbols_start(dynamic_symbols)) + end = max(end, symbols_end(dynamic_symbols)) + if gnu_debugdata_symbols: + start = min(start, symbols_start(gnu_debugdata_symbols)) + end = max(end, symbols_end(gnu_debugdata_symbols)) + + start &= ~7 + end = (end + 7) & ~7 + # Create a section for the symbols to reference and the corresponding - # segment for address lookups. - min_address = min(symbol.value for symbol in symbols) - max_address = max(symbol.value + symbol.size for symbol in symbols) - sections.append( + # segment for address lookups. It must be SHF_ALLOC and must not be + # SHT_NOBITS or SHT_NOTE for the file to be loadable. + size = end - start + assert size <= 4096, "symbols are too far apart; file would be too large" + sections = [ ElfSection( - name=".foo", - sh_type=SHT.NOBITS, + name=".data", + sh_type=SHT.PROGBITS, + sh_flags=SHF.ALLOC if loadable else 0, p_type=PT.LOAD, - vaddr=min_address, - memsz=max_address - min_address, + vaddr=start, + memsz=size, + data=bytes(size), + ), + ] + symbols = add_shndx(symbols, len(sections)) + dynamic_symbols = add_shndx(dynamic_symbols, len(sections)) + + if gnu_debugdata_symbols: + gds_sections = [ + ElfSection( + name=".data", + sh_type=SHT.NOBITS, + sh_flags=SHF.ALLOC, + p_type=PT.LOAD, + vaddr=start, + memsz=size, + ), + ] + gds_contents = create_elf_file( + ET.EXEC, + sections=gds_sections, + symbols=add_shndx(gnu_debugdata_symbols, len(gds_sections)), ) - ) - symbols = [ - symbol._replace( - shindex=len(sections) if symbol.shindex is None else symbol.shindex + compressor = lzma.LZMACompressor() + gds_compressed = compressor.compress(gds_contents) + compressor.flush() + sections.append( + ElfSection( + name=".gnu_debugdata", + sh_type=SHT.PROGBITS, + memsz=len(gds_compressed), + data=gds_compressed, + ) ) - for symbol in symbols - ] - return create_elf_file(ET.EXEC, sections, symbols) + + if dwarf: + contents = create_dwarf_file( + (), + sections=sections, + symbols=symbols, + dynamic_symbols=dynamic_symbols, + ) + else: + contents = create_elf_file( + ET.EXEC, + sections=sections, + symbols=symbols, + dynamic_symbols=dynamic_symbols, + ) + + return contents, start, end + + +def module_set_elf_symbol_file(module, **kwargs): + contents, start, end = create_elf_symbol_file(**kwargs) + + with tempfile.NamedTemporaryFile() as f: + f.write(contents) + f.flush() + + if module.address_range is None: + for other_module in module.prog.modules(): + other_address_range = other_module.address_range + if other_address_range is not None: + other_start, other_end = other_address_range + assert ( + end <= other_start or start >= other_end + ), f"{module.name} overlaps {other_module.name}" + module.address_range = (start, end) + else: + assert (start, end) == module.address_range + + module.try_file(f.name, force=True) + + +def program_add_elf_symbol_file(prog, name, **kwargs): + module = prog.extra_module(name, create=True) + module_set_elf_symbol_file(module, **kwargs) def elf_symbol_program(*modules): prog = Program() - for symbols in modules: - with tempfile.NamedTemporaryFile() as f: - f.write(create_elf_symbol_file(symbols)) - f.flush() - prog.load_debug_info([f.name]) + for i, symbols in enumerate(modules): + program_add_elf_symbol_file(prog, f"module{i}", symbols=symbols) return prog @@ -78,59 +182,164 @@ def test_by_address(self): self.assert_symbols_equal_unordered(prog.symbols(0xFFFF000C), [second]) self.assertRaises(LookupError, prog.symbol, 0xFFFF0010) - def test_by_address_precedence(self): - precedence = (STB.GLOBAL, STB.WEAK, STB.LOCAL) - drgn_precedence = ( - SymbolBinding.GLOBAL, - SymbolBinding.WEAK, - SymbolBinding.LOCAL, + def test_by_address_closest(self): + # If two symbols contain the given address, then the one whose start + # address is closest to the given address should be preferred + # (regardless of the binding of either symbol). + elf_closest = ElfSymbol("closest", 0xFFFF0008, 0x8, STT.OBJECT, STB.WEAK) + elf_furthest = ElfSymbol("furthest", 0xFFFF0000, 0xC, STT.OBJECT, STB.GLOBAL) + closest = Symbol( + "closest", 0xFFFF0008, 0x8, SymbolBinding.WEAK, SymbolKind.OBJECT + ) + furthest = Symbol( + "furthest", 0xFFFF0000, 0xC, SymbolBinding.GLOBAL, SymbolKind.OBJECT ) - def assert_find_higher(*modules): - self.assertEqual( - elf_symbol_program(*modules).symbol(0xFFFF0000).name, "foo" + def test(elf_symbols): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF000B), closest) + self.assert_symbols_equal_unordered( + prog.symbols(0xFFFF000B), [closest, furthest] ) - def assert_finds_both(symbols, *modules): + with self.subTest("closest first"): + test([elf_closest, elf_furthest]) + + with self.subTest("furthest first"): + test([elf_furthest, elf_closest]) + + def test_by_address_closest_end(self): + # If two symbols contain the given address and have the same start + # address, then the one whose end address is closest to the given + # address should be preferred (regardless of the binding of either + # symbol). + elf_closest = ElfSymbol("closest", 0xFFFF0000, 0xC, STT.OBJECT, STB.WEAK) + elf_furthest = ElfSymbol("furthest", 0xFFFF0000, 0x10, STT.OBJECT, STB.GLOBAL) + closest = Symbol( + "closest", 0xFFFF0000, 0xC, SymbolBinding.WEAK, SymbolKind.OBJECT + ) + furthest = Symbol( + "furthest", 0xFFFF0000, 0x10, SymbolBinding.GLOBAL, SymbolKind.OBJECT + ) + + def test(elf_symbols): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF000B), closest) self.assert_symbols_equal_unordered( - elf_symbol_program(*modules).symbols(0xFFFF0000), - symbols, + prog.symbols(0xFFFF000B), [closest, furthest] ) - for i in range(len(precedence) - 1): - higher_binding = precedence[i] - higher_binding_drgn = drgn_precedence[i] - for j in range(i + 1, len(precedence)): - lower_binding = precedence[j] - lower_binding_drgn = drgn_precedence[j] - with self.subTest(higher=higher_binding, lower=lower_binding): - higher = ElfSymbol( - "foo", 0xFFFF0000, 0x8, STT.OBJECT, higher_binding - ) - lower = ElfSymbol("bar", 0xFFFF0000, 0x8, STT.OBJECT, lower_binding) - symbols = [ - Symbol( - "foo", - 0xFFFF0000, - 0x8, - higher_binding_drgn, - SymbolKind.OBJECT, - ), - Symbol( - "bar", - 0xFFFF0000, - 0x8, - lower_binding_drgn, - SymbolKind.OBJECT, - ), - ] - # Local symbols must be before global symbols. - if lower_binding != STB.LOCAL: - with self.subTest("higher before lower"): - assert_find_higher((higher, lower)) - with self.subTest("lower before higher"): - assert_find_higher((lower, higher)) - assert_finds_both(symbols, (lower, higher)) + with self.subTest("closest first"): + test([elf_closest, elf_furthest]) + + with self.subTest("furthest first"): + test([elf_furthest, elf_closest]) + + def test_by_address_sizeless(self): + label = ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL) + less = ElfSymbol("less", 0xFFFF0000, 0x4, STT.FUNC, STB.LOCAL) + greater = ElfSymbol("greater", 0xFFFF0010, 0x4, STT.FUNC, STB.LOCAL) + + expected = Symbol( + "label", 0xFFFF0008, 0x0, SymbolBinding.LOCAL, SymbolKind.FUNC + ) + + # Test every permutation of every combination of symbols that includes + # "label". + for elf_symbols in itertools.chain.from_iterable( + itertools.permutations((label,) + extra_elf_symbols) + for r in range(3) + for extra_elf_symbols in itertools.combinations((less, greater), r) + ): + with self.subTest(elf_symbols=[sym.name for sym in elf_symbols]): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF0009), expected) + self.assertEqual(prog.symbols(0xFFFF0009), [expected]) + + def test_by_address_sizeless_subsumed(self): + label = ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL) + subsume = ElfSymbol("subsume", 0xFFFF0004, 0x8, STT.FUNC, STB.LOCAL) + less = ElfSymbol("less", 0xFFFF0000, 0x4, STT.FUNC, STB.LOCAL) + greater = ElfSymbol("greater", 0xFFFF0010, 0x4, STT.FUNC, STB.LOCAL) + + expected = Symbol( + "subsume", 0xFFFF0004, 0x8, SymbolBinding.LOCAL, SymbolKind.FUNC + ) + + # Test every permutation of every combination of symbols that includes + # "label" and "subsume". + for elf_symbols in itertools.chain.from_iterable( + itertools.permutations((label, subsume) + extra_elf_symbols) + for r in range(3) + for extra_elf_symbols in itertools.combinations((less, greater), r) + ): + with self.subTest(elf_symbols=[sym.name for sym in elf_symbols]): + prog = elf_symbol_program(elf_symbols) + self.assertEqual(prog.symbol(0xFFFF0009), expected) + self.assertEqual(prog.symbols(0xFFFF0009), [expected]) + + def test_by_address_sizeless_wrong_section(self): + prog = elf_symbol_program( + (ElfSymbol("label", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL),) + ) + for module in prog.modules(): + start, end = module.address_range + module.address_range = (start, 0xFFFFFF00) + self.assertRaises(LookupError, prog.symbol, 0xFFFFFE00) + + def test_by_address_binding_precedence(self): + precedence = ( + (STB.GLOBAL, STB.GNU_UNIQUE), + (STB.WEAK,), + (STB.LOCAL, STB.HIPROC), + ) + + def assert_find_higher(*modules, both): + prog = elf_symbol_program(*modules) + self.assertEqual(prog.symbol(0xFFFF0000).name, "foo") + # Test that symbols() finds both if expected or either one if not. + if both: + self.assertCountEqual( + [sym.name for sym in prog.symbols(0xFFFF0000)], ["foo", "bar"] + ) + else: + self.assertIn( + [sym.name for sym in prog.symbols(0xFFFF0000)], (["foo"], ["bar"]) + ) + + for size in (8, 0): + with self.subTest(size=size): + for i in range(len(precedence) - 1): + for higher_binding in precedence[i]: + for j in range(i + 1, len(precedence)): + for lower_binding in precedence[j]: + with self.subTest( + higher=higher_binding, lower=lower_binding + ): + higher = ElfSymbol( + "foo", + 0xFFFF0000, + size, + STT.OBJECT, + higher_binding, + ) + lower = ElfSymbol( + "bar", + 0xFFFF0000, + size, + STT.OBJECT, + lower_binding, + ) + # Local symbols must be before global symbols. + if lower_binding not in precedence[-1]: + with self.subTest("higher before lower"): + assert_find_higher( + (higher, lower), both=size > 0 + ) + with self.subTest("lower before higher"): + assert_find_higher( + (lower, higher), both=size > 0 + ) def test_by_name(self): elf_first = ElfSymbol("first", 0xFFFF0000, 0x8, STT.OBJECT, STB.GLOBAL) @@ -156,7 +365,7 @@ def test_by_name(self): self.assert_symbols_equal_unordered(prog.symbols("second"), [second]) self.assertEqual(prog.symbols("third"), []) - def test_by_name_precedence(self): + def test_by_name_binding_precedence(self): precedence = ( (STB.GLOBAL, STB.GNU_UNIQUE), (STB.WEAK,), @@ -170,10 +379,9 @@ def assert_find_higher(*modules): prog = elf_symbol_program(*modules) self.assertEqual(prog.symbol("foo").address, expected) # assert symbols() always finds both - symbols = sorted(prog.symbols("foo"), key=lambda s: s.address) - self.assertEqual(len(symbols), 2) - self.assertEqual(symbols[0].address, other) - self.assertEqual(symbols[1].address, expected) + self.assertCountEqual( + [sym.address for sym in prog.symbols("foo")], [expected, other] + ) for i in range(len(precedence) - 1): for higher_binding in precedence[i]: @@ -264,6 +472,297 @@ def test_all_symbols(self): prog = elf_symbol_program(*elf_syms) self.assert_symbols_equal_unordered(prog.symbols(), syms) + def test_dynsym(self): + prog = Program() + program_add_elf_symbol_file( + prog, + "module0", + dynamic_symbols=[ + ElfSymbol("sym", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + ) + + sym = Symbol("sym", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT) + self.assertEqual(prog.symbol("sym"), sym) + self.assertEqual(prog.symbol(0xFFFF0004), sym) + + def test_ignore_dynsym_same_file(self): + # Test that .dynsym is ignored in a file with both .symtab and .dynsym. + prog = Program() + program_add_elf_symbol_file( + prog, + "module0", + # Normally .symtab is a superset of .dynsym, but to test that we + # ignore .dynsym, make them distinct. + symbols=[ + ElfSymbol("full", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + dynamic_symbols=[ + ElfSymbol("partial", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + ) + + self.assertRaises(LookupError, prog.symbol, "partial") + + full = Symbol("full", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT) + self.assertEqual(prog.symbol("full"), full) + self.assertEqual(prog.symbol(0xFFFF0004), full) + + def test_ignore_dynsym_separate_files(self): + # Same as test_ignore_dynsym_same_file(), except .symtab and .dynsym + # are in different files. + prog = Program() + program_add_elf_symbol_file( + prog, + "module0", + dynamic_symbols=[ + ElfSymbol("partial", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + ) + program_add_elf_symbol_file( + prog, + "module0", + symbols=[ + ElfSymbol("full", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + dwarf=True, + ) + + self.assertRaises(LookupError, prog.symbol, "partial") + + full = Symbol("full", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT) + self.assertEqual(prog.symbol("full"), full) + self.assertEqual(prog.symbol(0xFFFF0004), full) + + def test_override_dynsym(self): + # Same as test_ignore_dynsym_separate_files(), except we do a lookup in + # .dynsym before we have .symtab. + prog = Program() + program_add_elf_symbol_file( + prog, + "module0", + dynamic_symbols=[ + ElfSymbol("partial", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + ) + + partial = Symbol( + "partial", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT + ) + self.assertEqual(prog.symbol("partial"), partial) + self.assertEqual(prog.symbol(0xFFFF0004), partial) + + program_add_elf_symbol_file( + prog, + "module0", + symbols=[ + ElfSymbol("full", 0xFFFF0000, 0x8, STT.OBJECT, STB.LOCAL), + ], + dwarf=True, + ) + + self.assertRaises(LookupError, prog.symbol, "partial") + + full = Symbol("full", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.OBJECT) + self.assertEqual(prog.symbol("full"), full) + self.assertEqual(prog.symbol(0xFFFF0004), full) + + +@unittest.skipUnless(drgn._with_lzma, "built without lzma support") +class TestGnuDebugdata(TestCase): + + def assert_all_symbols_found_by_name(self, prog, symbols): + for symbol in symbols: + self.assertEqual(prog.symbol(symbol.name), symbol) + + def assert_all_symbols_found_by_address(self, prog, symbols): + for symbol in symbols: + self.assertEqual(prog.symbol(symbol.address), symbol) + self.assertEqual(prog.symbol(symbol.address + symbol.size - 1), symbol) + + def assert_all_symbols_returned_by_lookup(self, prog, symbols): + def sort_key(sym): + return (sym.address, sym.name) + + expected = sorted(symbols, key=sort_key) + actual = prog.symbols() + actual.sort(key=sort_key) + self.assertEqual(expected, actual) + + def test_gnu_debugdata_and_dynamic_lookup(self): + gnu_symbols = [ + ElfSymbol("first", 0xFFFF0000, 0x8, STT.FUNC, STB.LOCAL), + ElfSymbol("second", 0xFFFF0018, 0x8, STT.FUNC, STB.LOCAL), + ] + dynamic_symbols = [ + ElfSymbol("third", 0xFFFF0010, 0x8, STT.FUNC, STB.LOCAL), + ElfSymbol("fourth", 0xFFFF0008, 0x8, STT.FUNC, STB.LOCAL), + ] + prog = Program() + program_add_elf_symbol_file( + prog, + "module0", + dynamic_symbols=dynamic_symbols, + gnu_debugdata_symbols=gnu_symbols, + ) + drgn_symbols = [ + Symbol("first", 0xFFFF0000, 0x8, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("second", 0xFFFF0018, 0x8, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("third", 0xFFFF0010, 0x8, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("fourth", 0xFFFF0008, 0x8, SymbolBinding.LOCAL, SymbolKind.FUNC), + ] + self.assert_all_symbols_found_by_name(prog, drgn_symbols) + self.assert_all_symbols_found_by_address(prog, drgn_symbols) + self.assert_all_symbols_returned_by_lookup(prog, drgn_symbols) + + def test_sizeless_symbols_gnu_debugdata(self): + gnu_symbols = [ + ElfSymbol("zero", 0xFFFF0000, 0x0, STT.FUNC, STB.LOCAL), + ElfSymbol("two", 0xFFFF0002, 0x4, STT.FUNC, STB.LOCAL), + ElfSymbol("ten", 0xFFFF000A, 0x0, STT.FUNC, STB.LOCAL), + ] + dynamic_symbols = [ + ElfSymbol("four", 0xFFFF0004, 0x0, STT.FUNC, STB.LOCAL), + ElfSymbol("eight", 0xFFFF0008, 0x0, STT.FUNC, STB.LOCAL), + ] + drgn_symbols = { + s.name: s + for s in ( + Symbol("zero", 0xFFFF0000, 0x0, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("two", 0xFFFF0002, 0x4, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("four", 0xFFFF0004, 0x0, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("eight", 0xFFFF0008, 0x0, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("ten", 0xFFFF000A, 0x0, SymbolBinding.LOCAL, SymbolKind.FUNC), + ) + } + + for swap in (False, True): + prog = Program() + program_add_elf_symbol_file( + prog, + "module0", + dynamic_symbols=gnu_symbols if swap else dynamic_symbols, + gnu_debugdata_symbols=dynamic_symbols if swap else gnu_symbols, + ) + + self.assert_all_symbols_found_by_name(prog, drgn_symbols.values()) + self.assert_all_symbols_returned_by_lookup(prog, drgn_symbols.values()) + + # Address 9 has a best match in .dynsym, despite other sizeless matches + # in .gnu_debugdata. + self.assertEqual(drgn_symbols["eight"], prog.symbol(0xFFFF0009)) + + # Address 5 is conained by symbol "two" in .gnu_debugdata, despite + # "four" being a sizeless match in .dynsym. + self.assertEqual(drgn_symbols["two"], prog.symbol(0xFFFF0005)) + + # Address 11 has a best sizeless match of "ten" in .gnu_debugdata, + # despite having a sizeless match of "eight" in .dynsym. + self.assertEqual(drgn_symbols["ten"], prog.symbol(0xFFFF000B)) + + def test_file_preferences(self): + # We need to be careful to make the address range the same for both + # files: so the minimum and maximum address for gnu + dynamic must be + # the same as for symtab. + # Normally a debug file would contain the same symbols as the loaded + # file, plus more. For testing, give them different names to + # distinguish. + loaded = [ + ElfSymbol("loaded_lo", 0xFFFF0000, 0x4, STT.FUNC, STB.LOCAL), + ElfSymbol("loaded_hi", 0xFFFF0004, 0x4, STT.FUNC, STB.LOCAL), + ] + debug = [ + ElfSymbol("symtab_lo", 0xFFFF0000, 0x4, STT.OBJECT, STB.LOCAL), + ElfSymbol("symtab_hi", 0xFFFF0004, 0x4, STT.OBJECT, STB.LOCAL), + ] + empty = [ElfSymbol("", 0xFFFF0000, 0, 0, 0, 0, 0)] + loaded_file_symbols = [ + Symbol("loaded_lo", 0xFFFF0000, 0x4, SymbolBinding.LOCAL, SymbolKind.FUNC), + Symbol("loaded_hi", 0xFFFF0004, 0x4, SymbolBinding.LOCAL, SymbolKind.FUNC), + ] + debug_file_symbols = [ + Symbol( + "symtab_lo", 0xFFFF0000, 0x4, SymbolBinding.LOCAL, SymbolKind.OBJECT + ), + Symbol( + "symtab_hi", 0xFFFF0004, 0x4, SymbolBinding.LOCAL, SymbolKind.OBJECT + ), + ] + file_choices = { + "loaded": ( + {"gnu_debugdata_symbols": loaded[:1], "dynamic_symbols": loaded[1:]}, + loaded_file_symbols, + ), + "loaded_dyn": ( + {"dynamic_symbols": loaded}, + loaded_file_symbols, + ), + "loaded_gnu": ( + {"gnu_debugdata_symbols": loaded}, + loaded_file_symbols, + ), + "loaded_gnu_dynempty": ( + {"gnu_debugdata_symbols": loaded, "dynamic_symbols": empty}, + loaded_file_symbols, + ), + "debug": ( + {"symbols": debug, "dwarf": True, "loadable": False}, + debug_file_symbols, + ), + "debug_dyn": ( + {"dynamic_symbols": debug, "dwarf": True, "loadable": False}, + debug_file_symbols, + ), + } + + # First file, second file, whether or not the symtab should be replaced. + # Combining the symbol table is possible in a corner case (.dynsym from + # the debug file, plus .gnu_debugdata from the loaded, if the loaded + # file has no .dynsym of its own). This really ought not to happen in + # practice, but it's worth ensuring that it's handled safely. + cases = [ + ("loaded", "debug", "replace"), + ("loaded_dyn", "debug", "replace"), + ("loaded_gnu", "debug", "replace"), + ("loaded_gnu_dynempty", "debug", "replace"), + ("debug", "loaded", None), + ("debug", "loaded_dyn", None), + ("debug", "loaded_gnu", None), + ("debug", "loaded_gnu_dynempty", None), + ("loaded", "debug_dyn", None), + ("loaded_dyn", "debug_dyn", None), + ("loaded_gnu", "debug_dyn", "combine"), + ("loaded_gnu_dynempty", "debug_dyn", None), + # We will replace a .dynsym with another .dynsym only if the file + # also has a .gnu_debugdata + ("debug_dyn", "loaded", "replace"), + ("debug_dyn", "loaded_dyn", None), + ("debug_dyn", "loaded_gnu", "combine"), + ("debug_dyn", "loaded_gnu_dynempty", "replace"), + ] + + for first, second, action in cases: + with self.subTest(f"{first}, {second}"): + prog = Program() + module = prog.extra_module("module0", create=True) + module_set_elf_symbol_file(module, **file_choices[first][0]) + expected = file_choices[first][1] + self.assert_all_symbols_found_by_name(prog, expected) + self.assert_all_symbols_found_by_address(prog, expected) + self.assert_all_symbols_returned_by_lookup(prog, expected) + + module_set_elf_symbol_file(module, **file_choices[second][0]) + if action == "replace": + expected = file_choices[second][1] + elif action == "combine": + expected = expected + file_choices[second][1] + self.assert_all_symbols_found_by_name(prog, expected) + # We end up with overlapping symbols when tables get combined. + # Don't bother checking address lookup there. + if action != "combine": + self.assert_all_symbols_found_by_address(prog, expected) + self.assert_all_symbols_returned_by_lookup(prog, expected) + class TestSymbolFinder(TestCase): TEST_SYMS = [ diff --git a/tests/test_thread.py b/tests/test_thread.py index 023410439..cce1b46a7 100644 --- a/tests/test_thread.py +++ b/tests/test_thread.py @@ -3,6 +3,7 @@ import os import os.path +import tempfile from drgn import Program from tests import TestCase @@ -16,6 +17,10 @@ def setUpClass(cls): cls.prog = Program() cls.prog.set_pid(os.getpid()) + @classmethod + def tearDownClass(cls): + del cls.prog + def test_threads(self): tids = [thread.tid for thread in self.prog.threads()] self.assertIn(os.getpid(), tids) @@ -71,6 +76,10 @@ def setUpClass(cls): cls.prog = Program() cls.prog.set_core_dump(get_resource("multithreaded.core")) + @classmethod + def tearDownClass(cls): + del cls.prog + def test_threads(self): self.assertSequenceEqual( sorted(thread.tid for thread in self.prog.threads()), @@ -95,3 +104,24 @@ def test_thread_name(self): for tid in self.TIDS: if tid != self.MAIN_TID: self.assertIsNone(self.prog.thread(tid).name) + + +class TestCoreDumpLongName(TestCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + cls.prog = Program() + with open(get_resource("crashme_static_pie.core"), "rb") as f: + data = f.read() + data = data.replace(b"crashme_static_\x00", b"crashme_static_p") + with tempfile.NamedTemporaryFile("wb") as f: + f.write(data) + f.flush() + cls.prog.set_core_dump(f.name) + + @classmethod + def tearDownClass(cls): + del cls.prog + + def test_thread_name(self): + self.assertEqual(self.prog.main_thread().name, "crashme_static_p") diff --git a/tools/fsrefs.py b/tools/fsrefs.py index cef1a5892..5513db708 100755 --- a/tools/fsrefs.py +++ b/tools/fsrefs.py @@ -89,6 +89,29 @@ def visit_path(self, path: Object) -> Optional[str]: return path.format_(**format_args) +def _format_file_with_path(file: Object) -> str: + match = file.format_(**format_args) + with ignore_fault: + match += " " + os.fsdecode(d_path(file.f_path)) + return match + + +def _format_inode_with_path(inode: Object) -> str: + match = inode.format_(**format_args) + with ignore_fault: + path = inode_path(inode) + if path: + match += " " + os.fsdecode(path) + return match + + +def _format_path_with_path(path: Object) -> str: + match = path.format_(**format_args) + with ignore_fault: + match += " " + os.fsdecode(d_path(path)) + return match + + class SuperBlockVisitor: def __init__(self, sb: Object) -> None: self._sb = sb.read_() @@ -96,28 +119,52 @@ def __init__(self, sb: Object) -> None: def visit_file(self, file: Object) -> Optional[str]: if file.f_inode.i_sb != self._sb: return None - match = file.format_(**format_args) - with ignore_fault: - match += " " + os.fsdecode(d_path(file.f_path)) - return match + return _format_file_with_path(file) def visit_inode(self, inode: Object) -> Optional[str]: if inode.i_sb != self._sb: return None - match = inode.format_(**format_args) - with ignore_fault: - path = inode_path(inode) - if path: - match += " " + os.fsdecode(path) - return match + return _format_inode_with_path(inode) def visit_path(self, path: Object) -> Optional[str]: if path.mnt.mnt_sb != self._sb: return None - match = path.format_(**format_args) - with ignore_fault: - match += " " + os.fsdecode(d_path(path)) - return match + return _format_path_with_path(path) + + +class BtrfsSubvolumeVisitor: + def __init__(self, root: Object) -> None: + self._root = root.read_() + # We compare the super block first to easily filter out non-Btrfs + # inodes. + self._sb = self._root.fs_info.sb.read_() + + def visit_file(self, file: Object) -> Optional[str]: + f_inode = file.f_inode.read_() + if ( + f_inode.i_sb != self._sb + or container_of(f_inode, "struct btrfs_inode", "vfs_inode").root + != self._root + ): + return None + return _format_file_with_path(file) + + def visit_inode(self, inode: Object) -> Optional[str]: + if ( + inode.i_sb != self._sb + or container_of(inode, "struct btrfs_inode", "vfs_inode").root != self._root + ): + return None + return _format_inode_with_path(inode) + + def visit_path(self, path: Object) -> Optional[str]: + inode = path.dentry.d_inode.read_() + if ( + inode.i_sb != self._sb + or container_of(inode, "struct btrfs_inode", "vfs_inode").root != self._root + ): + return None + return _format_path_with_path(path) def super_block_on_bdev(bdev: Object) -> Optional[Object]: @@ -526,6 +573,17 @@ def main(prog: Program, argv: Sequence[str]) -> None: type=hexint, help="find references to the given struct super_block pointer", ) + object_group.add_argument( + "--btrfs-subvolume", + metavar="PATH", + help="find references to the Btrfs subvolume containing the given path", + ) + object_group.add_argument( + "--btrfs-subvolume-pointer", + metavar="ADDRESS", + type=hexint, + help="find references to the given struct btrfs_root pointer", + ) CHECKS = [ "binfmt_misc", @@ -598,6 +656,26 @@ def main(prog: Program, argv: Sequence[str]) -> None: visitor = SuperBlockVisitor( Object(prog, "struct super_block *", args.super_block_pointer) ) + elif args.btrfs_subvolume is not None: + fd = os.open( + args.btrfs_subvolume, os.O_PATH | (0 if args.dereference else os.O_NOFOLLOW) + ) + try: + inode = fget(find_task(prog, os.getpid()), fd).f_inode.read_() + # It'd be better to check the filesystem type directly from + # userspace using fstatfs(2), but Python doesn't provide + # {,f}statfs() (and os.statvfs() doesn't include f_type). + if inode.i_sb.s_type.name.string_() != b"btrfs": + sys.exit(f"{args.btrfs_subvolume} is not on Btrfs") + visitor = BtrfsSubvolumeVisitor( + container_of(inode, "struct btrfs_inode", "vfs_inode").root + ) + finally: + os.close(fd) + elif args.btrfs_subvolume_pointer is not None: + visitor = BtrfsSubvolumeVisitor( + Object(prog, "struct btrfs_root *", args.btrfs_subvolume_pointer) + ) else: assert False diff --git a/vmtest/__main__.py b/vmtest/__main__.py index d910e424d..bc97a1a3e 100644 --- a/vmtest/__main__.py +++ b/vmtest/__main__.py @@ -45,6 +45,9 @@ def __init__(self, file: TextIO) -> None: self._passed: Dict[str, List[str]] = {} self._failed: Dict[str, List[str]] = {} + def succeeded(self) -> bool: + return not self._failed + def _green(self, s: str) -> str: if self._color: return "\033[32m" + s + "\033[0m" @@ -331,7 +334,7 @@ def add_kernel(arch: Architecture, pattern: str) -> None: else: python_executable = "/usr/bin/python3" # Skip excessively slow tests when emulating. - tests_expression = "-k 'not test_slab_cache_for_each_allocated_object'" + tests_expression = "-k 'not test_slab_cache_for_each_allocated_object and not test_mtree_load_three_levels'" if _kdump_works(kernel): kdump_command = """\ @@ -374,3 +377,4 @@ def add_kernel(arch: Architecture, pattern: str) -> None: if in_github_actions: shutil.rmtree(kernel.path) progress.update(kernel.arch.name, kernel.release, status == 0) + sys.exit(0 if progress.succeeded() else 1) diff --git a/vmtest/config.py b/vmtest/config.py index c5f014f40..6c346e44d 100644 --- a/vmtest/config.py +++ b/vmtest/config.py @@ -9,10 +9,14 @@ from typing import Dict, Mapping, NamedTuple, Sequence from _drgn_util.platform import NORMALIZED_MACHINE_NAME +from util import KernelVersion # Kernel versions that we run tests on and therefore support. Keep this in sync # with docs/support_matrix.rst. SUPPORTED_KERNEL_VERSIONS = ( + "6.16", + "6.15", + "6.14", "6.13", "6.12", "6.11", @@ -107,6 +111,9 @@ # For testing kernel core dumps from QEMU's dump-guest-memory command. CONFIG_FW_CFG_SYSFS=y +# kmodify breakpoints need kprobes. +CONFIG_KPROBES=y + # For BPF tests. CONFIG_BPF_SYSCALL=y CONFIG_BPF_JIT=y @@ -284,7 +291,7 @@ class Architecture(NamedTuple): """, }, kernel_org_compiler_name="aarch64-linux", - qemu_options=("-M", "virt", "-cpu", "cortex-a57"), + qemu_options=("-M", "virt", "-cpu", "cortex-a76"), qemu_console="ttyAMA0", ), Architecture( @@ -426,13 +433,25 @@ def kconfig_localversion(arch: Architecture, flavor: KernelFlavor, version: str) vmtest_kernel_version = [ # Increment the major version to rebuild every # architecture/flavor/version combination. - 34, + 35, # The minor version makes the default flavor the "latest" version. 1 if flavor.name == "default" else 0, ] patch_level = 0 # If only specific architecture/flavor/version combinations need to be # rebuilt, conditionally increment the patch level here. + kver = KernelVersion(version) + if KernelVersion("4.18") <= kver < KernelVersion("5.18"): + patch_level += 1 + if ( + (KernelVersion("6.6") <= kver < KernelVersion("6.6.3")) + or (KernelVersion("6.2") <= kver < KernelVersion("6.5.13")) + or (KernelVersion("5.16") <= kver < KernelVersion("6.1.64")) + or (KernelVersion("5.11") <= kver < KernelVersion("5.15.140")) + or (KernelVersion("5.5") <= kver < KernelVersion("5.10.202")) + or (kver < KernelVersion("5.4.262")) + ): + patch_level += 1 if patch_level: vmtest_kernel_version.append(patch_level) diff --git a/vmtest/enter_kdump.py b/vmtest/enter_kdump.py index 674006af2..b9175fcc2 100644 --- a/vmtest/enter_kdump.py +++ b/vmtest/enter_kdump.py @@ -65,6 +65,14 @@ def main() -> None: if cpus: os.sched_setaffinity(0, cpus) + # Try the drgn_test kmod crash method first. + try: + with open("/sys/kernel/drgn_test/crash", "w") as f: + f.write("1") + except FileNotFoundError: + pass + + # Fall back to sysrq-trigger. with open("/proc/sysrq-trigger", "w") as f: f.write("c") diff --git a/vmtest/githubapi.py b/vmtest/githubapi.py index 95914a73c..af0a7aceb 100644 --- a/vmtest/githubapi.py +++ b/vmtest/githubapi.py @@ -121,26 +121,34 @@ def _request( ) -> Any: if params: url += "?" + urllib.parse.urlencode(params) - return urllib.request.urlopen( - urllib.request.Request( - url, - data=data, - headers={} if headers is None else headers, - method=method, - ) + req = urllib.request.Request( + url, + data=data, + headers={} if headers is None else headers, + method=method, ) + # Work around python/cpython#77842. + if req.has_header("Authorization"): + authorization = req.get_header("Authorization") + req.remove_header("Authorization") + req.add_unredirected_header("Authorization", authorization) + return urllib.request.urlopen(req) def _cached_get_json(self, endpoint: str, cache: _CACHE) -> Any: cached = self._read_cache(cache) if self._trust_cache(cached): return cached["body"] + req = urllib.request.Request( + self._HOST + "/" + endpoint, + headers=self._cached_get_headers(cached), + ) + # Work around python/cpython#77842. + if req.has_header("Authorization"): + authorization = req.get_header("Authorization") + req.remove_header("Authorization") + req.add_unredirected_header("Authorization", authorization) try: - with urllib.request.urlopen( - urllib.request.Request( - self._HOST + "/" + endpoint, - headers=self._cached_get_headers(cached), - ) - ) as resp: + with urllib.request.urlopen(req) as resp: body = json.load(resp) self._write_cache(cache, body, resp.headers) return body diff --git a/vmtest/kbuild.py b/vmtest/kbuild.py index f71c20394..1fd494cfc 100644 --- a/vmtest/kbuild.py +++ b/vmtest/kbuild.py @@ -51,10 +51,6 @@ class _Patch(NamedTuple): name="proc-kcore-allow-enabling-CONFIG_PROC_KCORE-on-ARM.patch", versions=((None, None),), ), - _Patch( - name="9p-fix-slab-cache-name-creation-for-real.patch", - versions=((KernelVersion("6.12"), None),), - ), _Patch( name="filelock-fix-name-of-file_lease-slab-cache.patch", versions=((KernelVersion("6.9"), KernelVersion("6.10")),), @@ -146,6 +142,44 @@ class _Patch(NamedTuple): name="lib-raid6-add-option-to-skip-algo-benchmarking.patch", versions=((None, KernelVersion("5.0")),), ), + _Patch( + name="5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch", + versions=((KernelVersion("5.18.18"), KernelVersion("5.19")),), + ), + # We could backport this further, but we currently only need it between + # Linux kernel commits 50428fdc53ba ("powerpc: Add a ppc_inst_as_str() + # helper") (in v5.9) and 2a83afe72a2b ("powerpc/64: Drop + # ppc_inst_as_str()") (in v6.0). + _Patch( + name="gcc-12-disable-Wdangling-pointer-warning-for-now.patch", + versions=((KernelVersion("5.16"), KernelVersion("5.18.6")),), + ), + _Patch( + name="5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch", + versions=( + (KernelVersion("5.11"), KernelVersion("5.15.49")), + (KernelVersion("5.9"), KernelVersion("5.10.183")), + ), + ), + _Patch( + name="5.17-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch", + versions=((KernelVersion("5.5"), KernelVersion("5.18")),), + ), + _Patch( + name="5.4-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch", + versions=((KernelVersion("4.18"), KernelVersion("5.5")),), + ), + _Patch( + name="kernel-reboot-emergency_restart-Set-correct-system_s.patch", + versions=( + (KernelVersion("6.6"), KernelVersion("6.6.3")), + (KernelVersion("6.2"), KernelVersion("6.5.13")), + (KernelVersion("5.16"), KernelVersion("6.1.64")), + (KernelVersion("5.11"), KernelVersion("5.15.140")), + (KernelVersion("5.5"), KernelVersion("5.10.202")), + (None, KernelVersion("5.4.262")), + ), + ), ) diff --git a/vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch b/vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch new file mode 100644 index 000000000..ad9c67305 --- /dev/null +++ b/vmtest/patches/5.10-gcc-12-disable-Wdangling-pointer-warning-for-now.patch @@ -0,0 +1,72 @@ +From 1d8693376aaa10f12bbd67cfecb72a26a83058c5 Mon Sep 17 00:00:00 2001 +Message-ID: <1d8693376aaa10f12bbd67cfecb72a26a83058c5.1744698211.git.osandov@fb.com> +From: Linus Torvalds +Date: Thu, 9 Jun 2022 09:41:42 -0700 +Subject: [PATCH] gcc-12: disable '-Wdangling-pointer' warning for now +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +commit f7d63b50898172b9eb061b9e2daad61b428792d0 upstream. + +[ Upstream commit 49beadbd47c270a00754c107a837b4f29df4c822 ] + +While the concept of checking for dangling pointers to local variables +at function exit is really interesting, the gcc-12 implementation is not +compatible with reality, and results in false positives. + +For example, gcc sees us putting things on a local list head allocated +on the stack, which involves exactly those kinds of pointers to the +local stack entry: + + In function ‘__list_add’, + inlined from ‘list_add_tail’ at include/linux/list.h:102:2, + inlined from ‘rebuild_snap_realms’ at fs/ceph/snap.c:434:2: + include/linux/list.h:74:19: warning: storing the address of local variable ‘realm_queue’ in ‘*&realm_27(D)->rebuild_item.prev’ [-Wdangling-pointer=] + 74 | new->prev = prev; + | ~~~~~~~~~~^~~~~~ + +But then gcc - understandably - doesn't really understand the big +picture how the doubly linked list works, so doesn't see how we then end +up emptying said list head in a loop and the pointer we added has been +removed. + +Gcc also complains about us (intentionally) using this as a way to store +a kind of fake stack trace, eg + + drivers/acpi/acpica/utdebug.c:40:38: warning: storing the address of local variable ‘current_sp’ in ‘acpi_gbl_entry_stack_pointer’ [-Wdangling-pointer=] + 40 | acpi_gbl_entry_stack_pointer = ¤t_sp; + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ + +which is entirely reasonable from a compiler standpoint, and we may want +to change those kinds of patterns, but not not. + +So this is one of those "it would be lovely if the compiler were to +complain about us leaving dangling pointers to the stack", but not this +way. + +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + Makefile | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/Makefile b/Makefile +index 2f0efde21902..209d5ae9ddb5 100644 +--- a/Makefile ++++ b/Makefile +@@ -808,6 +808,10 @@ endif + KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ++ ++# These result in bogus false positives ++KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer) ++ + ifdef CONFIG_FRAME_POINTER + KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + else +-- +2.49.0 + diff --git a/vmtest/patches/5.17-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch b/vmtest/patches/5.17-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch new file mode 100644 index 000000000..ce86617bd --- /dev/null +++ b/vmtest/patches/5.17-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch @@ -0,0 +1,32 @@ +From e74439d0a3a12b7c90826dd4f8bded6cbe0e142a Mon Sep 17 00:00:00 2001 +Message-ID: +From: Omar Sandoval +Date: Mon, 2 Jun 2025 15:26:09 -0700 +Subject: [PATCH] page_pool: enable CONFIG_PAGE_POOL by default + +Since commit b530e9e1063e ("bpf: Add "live packet" mode for XDP in +BPF_PROG_RUN") (in v5.18), CONFIG_BPF_SYSCALL selects CONFIG_PAGE_POOL. +Before that, only certain network drivers enabled it. In order to test +page pool drgn helpers without enabling unnecessary network drivers, +always enable CONFIG_PAGE_POOL. + +Signed-off-by: Omar Sandoval +--- + net/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/Kconfig b/net/Kconfig +index 8a1f9d0287de..865f93e259da 100644 +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -433,6 +433,7 @@ config NET_DEVLINK + + config PAGE_POOL + bool ++ default y + + config FAILOVER + tristate "Generic failover module" +-- +2.49.0 + diff --git a/vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch b/vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch new file mode 100644 index 000000000..7fa450bb9 --- /dev/null +++ b/vmtest/patches/5.18-Revert-Makefile-link-with-z-noexecstack-no-warn-rwx-.patch @@ -0,0 +1,42 @@ +From 656f46da75e198bec61e4f2cc425d9fc9b2679cc Mon Sep 17 00:00:00 2001 +Message-ID: <656f46da75e198bec61e4f2cc425d9fc9b2679cc.1741383572.git.osandov@osandov.com> +From: Omar Sandoval +Date: Fri, 7 Mar 2025 13:38:13 -0800 +Subject: [PATCH] Revert "Makefile: link with -z noexecstack + --no-warn-rwx-segments" + +This reverts commit d81aa6bfff835ceea33c192d394f03e4a59cd12c. + +This results in a missing build ID on arm64 on v5.18-stable starting +with v5.18.18 if CONFIG_MODVERSIONS=y. In mainline and LTS kernels, this +was fixed properly by commit 99cb0d917ffa ("arch: fix broken BuildID for +arm64 and riscv") (in v6.2), but that had a bunch of followup fixes. +Commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing +CONFIG_MODULE_REL_CRCS") (in v5.19) also somehow works around it. Older +stable branches didn't get this noexecstack change, so that leaves us +with just v5.18 that needs this revert. + +Signed-off-by: Omar Sandoval +--- + Makefile | 5 ----- + 1 file changed, 5 deletions(-) + +diff --git a/Makefile b/Makefile +index fc7efcdab0a2..31e952dc676d 100644 +--- a/Makefile ++++ b/Makefile +@@ -1031,11 +1031,6 @@ KBUILD_CFLAGS += $(KCFLAGS) + KBUILD_LDFLAGS_MODULE += --build-id=sha1 + LDFLAGS_vmlinux += --build-id=sha1 + +-KBUILD_LDFLAGS += -z noexecstack +-ifeq ($(CONFIG_LD_IS_BFD),y) +-KBUILD_LDFLAGS += $(call ld-option,--no-warn-rwx-segments) +-endif +- + ifeq ($(CONFIG_STRIP_ASM_SYMS),y) + LDFLAGS_vmlinux += $(call ld-option, -X,) + endif +-- +2.48.1 + diff --git a/vmtest/patches/5.4-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch b/vmtest/patches/5.4-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch new file mode 100644 index 000000000..8bb5cb272 --- /dev/null +++ b/vmtest/patches/5.4-page_pool-enable-CONFIG_PAGE_POOL-by-default.patch @@ -0,0 +1,32 @@ +From c3216ba4d0bb84edf0529e9ad99cd2ae8362a2c2 Mon Sep 17 00:00:00 2001 +Message-ID: +From: Omar Sandoval +Date: Mon, 2 Jun 2025 15:26:09 -0700 +Subject: [PATCH] page_pool: enable CONFIG_PAGE_POOL by default + +Since commit b530e9e1063e ("bpf: Add "live packet" mode for XDP in +BPF_PROG_RUN") (in v5.18), CONFIG_BPF_SYSCALL selects CONFIG_PAGE_POOL. +Before that, only certain network drivers enabled it. In order to test +page pool drgn helpers without enabling unnecessary network drivers, +always enable CONFIG_PAGE_POOL. + +Signed-off-by: Omar Sandoval +--- + net/Kconfig | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/Kconfig b/net/Kconfig +index 48ed37cdd22f..9161d523bdd3 100644 +--- a/net/Kconfig ++++ b/net/Kconfig +@@ -435,6 +435,7 @@ config NET_DEVLINK + + config PAGE_POOL + bool ++ default y + + config FAILOVER + tristate "Generic failover module" +-- +2.49.0 + diff --git a/vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch b/vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch deleted file mode 100644 index db8207078..000000000 --- a/vmtest/patches/9p-fix-slab-cache-name-creation-for-real.patch +++ /dev/null @@ -1,50 +0,0 @@ -From a360f311f57a36e96d88fa8086b749159714dcd2 Mon Sep 17 00:00:00 2001 -Message-ID: -From: Linus Torvalds -Date: Mon, 21 Oct 2024 11:57:38 -0700 -Subject: [PATCH] 9p: fix slab cache name creation for real - -This was attempted by using the dev_name in the slab cache name, but as -Omar Sandoval pointed out, that can be an arbitrary string, eg something -like "/dev/root". Which in turn trips verify_dirent_name(), which fails -if a filename contains a slash. - -So just make it use a sequence counter, and make it an atomic_t to avoid -any possible races or locking issues. - -Reported-and-tested-by: Omar Sandoval -Link: https://lore.kernel.org/all/ZxafcO8KWMlXaeWE@telecaster.dhcp.thefacebook.com/ -Fixes: 79efebae4afc ("9p: Avoid creating multiple slab caches with the same name") -Acked-by: Vlastimil Babka -Cc: Dominique Martinet -Cc: Thorsten Leemhuis -Signed-off-by: Linus Torvalds ---- - net/9p/client.c | 4 +++- - 1 file changed, 3 insertions(+), 1 deletion(-) - -diff --git a/net/9p/client.c b/net/9p/client.c -index 9e7b9151816d..09f8ced9f8bb 100644 ---- a/net/9p/client.c -+++ b/net/9p/client.c -@@ -977,6 +977,7 @@ static int p9_client_version(struct p9_client *c) - struct p9_client *p9_client_create(const char *dev_name, char *options) - { - int err; -+ static atomic_t seqno = ATOMIC_INIT(0); - struct p9_client *clnt; - char *client_id; - char *cache_name; -@@ -1036,7 +1037,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) - if (err) - goto close_trans; - -- cache_name = kasprintf(GFP_KERNEL, "9p-fcall-cache-%s", dev_name); -+ cache_name = kasprintf(GFP_KERNEL, -+ "9p-fcall-cache-%u", atomic_inc_return(&seqno)); - if (!cache_name) { - err = -ENOMEM; - goto close_trans; --- -2.47.0 - diff --git a/vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch b/vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch new file mode 100644 index 000000000..2e547c2c9 --- /dev/null +++ b/vmtest/patches/gcc-12-disable-Wdangling-pointer-warning-for-now.patch @@ -0,0 +1,65 @@ +From 49beadbd47c270a00754c107a837b4f29df4c822 Mon Sep 17 00:00:00 2001 +Message-ID: <49beadbd47c270a00754c107a837b4f29df4c822.1744656535.git.osandov@fb.com> +From: Linus Torvalds +Date: Thu, 9 Jun 2022 09:41:42 -0700 +Subject: [PATCH] gcc-12: disable '-Wdangling-pointer' warning for now +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +While the concept of checking for dangling pointers to local variables +at function exit is really interesting, the gcc-12 implementation is not +compatible with reality, and results in false positives. + +For example, gcc sees us putting things on a local list head allocated +on the stack, which involves exactly those kinds of pointers to the +local stack entry: + + In function ‘__list_add’, + inlined from ‘list_add_tail’ at include/linux/list.h:102:2, + inlined from ‘rebuild_snap_realms’ at fs/ceph/snap.c:434:2: + include/linux/list.h:74:19: warning: storing the address of local variable ‘realm_queue’ in ‘*&realm_27(D)->rebuild_item.prev’ [-Wdangling-pointer=] + 74 | new->prev = prev; + | ~~~~~~~~~~^~~~~~ + +But then gcc - understandably - doesn't really understand the big +picture how the doubly linked list works, so doesn't see how we then end +up emptying said list head in a loop and the pointer we added has been +removed. + +Gcc also complains about us (intentionally) using this as a way to store +a kind of fake stack trace, eg + + drivers/acpi/acpica/utdebug.c:40:38: warning: storing the address of local variable ‘current_sp’ in ‘acpi_gbl_entry_stack_pointer’ [-Wdangling-pointer=] + 40 | acpi_gbl_entry_stack_pointer = ¤t_sp; + | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ + +which is entirely reasonable from a compiler standpoint, and we may want +to change those kinds of patterns, but not not. + +So this is one of those "it would be lovely if the compiler were to +complain about us leaving dangling pointers to the stack", but not this +way. + +Signed-off-by: Linus Torvalds +--- + Makefile | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/Makefile b/Makefile +index c43d825a3c4c..09208ffca353 100644 +--- a/Makefile ++++ b/Makefile +@@ -805,6 +805,9 @@ endif + KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) + ++# These result in bogus false positives ++KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer) ++ + ifdef CONFIG_FRAME_POINTER + KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls + else +-- +2.49.0 + diff --git a/vmtest/patches/kernel-reboot-emergency_restart-Set-correct-system_s.patch b/vmtest/patches/kernel-reboot-emergency_restart-Set-correct-system_s.patch new file mode 100644 index 000000000..e0c39e125 --- /dev/null +++ b/vmtest/patches/kernel-reboot-emergency_restart-Set-correct-system_s.patch @@ -0,0 +1,50 @@ +From 60466c067927abbcaff299845abd4b7069963139 Mon Sep 17 00:00:00 2001 +Message-ID: <60466c067927abbcaff299845abd4b7069963139.1748973068.git.osandov@osandov.com> +From: Benjamin Bara +Date: Sat, 15 Jul 2023 09:53:23 +0200 +Subject: [PATCH] kernel/reboot: emergency_restart: Set correct system_state + +As the emergency restart does not call kernel_restart_prepare(), the +system_state stays in SYSTEM_RUNNING. + +Since bae1d3a05a8b, this hinders i2c_in_atomic_xfer_mode() from becoming +active, and therefore might lead to avoidable warnings in the restart +handlers, e.g.: + +[ 12.667612] WARNING: CPU: 1 PID: 1 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x33c/0x6b0 +[ 12.676926] Voluntary context switch within RCU read-side critical section! +... +[ 12.742376] schedule_timeout from wait_for_completion_timeout+0x90/0x114 +[ 12.749179] wait_for_completion_timeout from tegra_i2c_wait_completion+0x40/0x70 +... +[ 12.994527] atomic_notifier_call_chain from machine_restart+0x34/0x58 +[ 13.001050] machine_restart from panic+0x2a8/0x32c + +Avoid these by setting the correct system_state. + +Fixes: bae1d3a05a8b ("i2c: core: remove use of in_atomic()") +Cc: stable@vger.kernel.org # v5.2+ +Reviewed-by: Dmitry Osipenko +Tested-by: Nishanth Menon +Signed-off-by: Benjamin Bara +Link: https://lore.kernel.org/r/20230327-tegra-pmic-reboot-v7-1-18699d5dcd76@skidata.com +Signed-off-by: Lee Jones +--- + kernel/reboot.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/reboot.c b/kernel/reboot.c +index 3bba88c7ffc6..6ebef11c8876 100644 +--- a/kernel/reboot.c ++++ b/kernel/reboot.c +@@ -74,6 +74,7 @@ void __weak (*pm_power_off)(void); + void emergency_restart(void) + { + kmsg_dump(KMSG_DUMP_EMERG); ++ system_state = SYSTEM_RESTART; + machine_emergency_restart(); + } + EXPORT_SYMBOL_GPL(emergency_restart); +-- +2.49.0 + diff --git a/vmtest/rootfsbuild.py b/vmtest/rootfsbuild.py index e7eaefaac..da62ce1c5 100644 --- a/vmtest/rootfsbuild.py +++ b/vmtest/rootfsbuild.py @@ -4,15 +4,8 @@ import logging from pathlib import Path import subprocess -import sys import tempfile -import typing - -if typing.TYPE_CHECKING: - if sys.version_info < (3, 8): - from typing_extensions import Literal - else: - from typing import Literal # novermin +from typing import Literal from vmtest.config import ARCHITECTURES, HOST_ARCHITECTURE, Architecture @@ -28,6 +21,7 @@ "libdw-dev", "libelf-dev", "libkdumpfile-dev", + "liblzma-dev", "libtool", "make", "pkgconf", @@ -37,6 +31,7 @@ "python3-setuptools", # Test dependencies. "btrfs-progs", + "check", "iproute2", "kexec-tools", "kmod", @@ -50,7 +45,7 @@ def build_rootfs( arch: Architecture, path: Path, *, - btrfs: "Literal['never', 'always', 'auto']" = "auto", + btrfs: Literal["never", "always", "auto"] = "auto", ) -> None: if path.exists(): logger.info("%s already exists", path) diff --git a/vmtest/vm.py b/vmtest/vm.py index 83eb6ca70..8362a1737 100644 --- a/vmtest/vm.py +++ b/vmtest/vm.py @@ -1,13 +1,13 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later +import contextlib import enum import os from pathlib import Path import re import shlex import shutil -import signal import socket import subprocess import sys @@ -41,10 +41,10 @@ {kdump_needs_nosmp} # On exit, power off. We don't use the poweroff command because very minimal -# installations don't have it (e.g., the debootstrap minbase variant). The -# magic SysRq returns immediately without waiting for the poweroff, so we sleep -# for a while and panic if it takes longer than that. -trap 'echo o > /proc/sysrq-trigger && sleep 60' exit +# installations don't have it (e.g., the debootstrap minbase variant). We don't +# use the "o" magic SysRq because it returns immediately. Since we run QEMU +# with -no-reboot, we can use the "b" magic SysRq, which is synchronous. +trap 'echo b > /proc/sysrq-trigger' exit umask 022 @@ -195,6 +195,18 @@ def _build_onoatimehack(dir: Path) -> Path: return onoatimehack_so +def _have_setpriv_pdeathsig() -> bool: + # util-linux supports setpriv --pdeathsig since v2.33. BusyBox doesn't + # support it as of v1.37. + try: + help = subprocess.run( + ["setpriv", "--help"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout + except FileNotFoundError: + return False + return b"--pdeathsig" in help + + class TestKmodMode(enum.Enum): NONE = 0 BUILD = 1 @@ -213,6 +225,7 @@ def run_in_vm( *, extra_qemu_options: Sequence[str] = (), test_kmod: TestKmodMode = TestKmodMode.NONE, + interactive: bool = False, ) -> int: if root_dir is None: if kernel.arch is HOST_ARCHITECTURE: @@ -239,6 +252,14 @@ def run_in_vm( onoatimehack_so = _build_onoatimehack(build_dir) env["LD_PRELOAD"] = f"{str(onoatimehack_so)}:{env.get('LD_PRELOAD', '')}" + # Kill the child QEMU process if we die. If we die between the fork() and + # the prctl(PR_SET_PDEATHSIG), then the signal won't be delivered, but then + # QEMU will fail to connect to our socket and exit. + if _have_setpriv_pdeathsig(): + setpriv_args = ["setpriv", "--pdeathsig=TERM"] + else: + setpriv_args = [] + kvm_args = [] if HOST_ARCHITECTURE is not None and kernel.arch.name == HOST_ARCHITECTURE.name: if os.access("/dev/kvm", os.R_OK | os.W_OK): @@ -249,17 +270,30 @@ def run_in_vm( file=sys.stderr, ) + if interactive: + serial_args = ["-serial", "mon:stdio"] + infile = None + else: + serial_args = [ + "-chardev", + "stdio,id=stdio,signal=off", + "-serial", + "chardev:stdio", + ] + infile = subprocess.DEVNULL + virtfs_options = "security_model=none,readonly=on" # multidevs was added in QEMU 4.2.0. if qemu_version >= (4, 2): virtfs_options += ",multidevs=remap" _9pfs_mount_options = f"trans=virtio,cache=loose,msize={1024 * 1024}" - with tempfile.TemporaryDirectory(prefix="drgn-vmtest-") as temp_dir, socket.socket( - socket.AF_UNIX - ) as server_sock: - temp_path = Path(temp_dir) + with contextlib.ExitStack() as exit_stack: + temp_path = Path( + exit_stack.enter_context(tempfile.TemporaryDirectory(prefix="drgn-vmtest-")) + ) socket_path = temp_path / "socket" + server_sock = exit_stack.enter_context(socket.socket(socket.AF_UNIX)) server_sock.bind(str(socket_path)) server_sock.listen() @@ -302,30 +336,26 @@ def run_in_vm( else: stty_command = "" - with init_path.open("w") as init_file: - init_file.write( - _INIT_TEMPLATE.format( - cwd=shlex.quote(host_dir_prefix + os.getcwd()), - kernel_dir=shlex.quote( - host_dir_prefix + str(kernel.path.resolve()) - ), - command=shlex.quote(command), - kdump_needs_nosmp="" if kvm_args else "export KDUMP_NEEDS_NOSMP=1", - test_kmod=test_kmod_command, - stty=stty_command, - ) + init_path.write_text( + _INIT_TEMPLATE.format( + cwd=shlex.quote(host_dir_prefix + os.getcwd()), + kernel_dir=shlex.quote(host_dir_prefix + str(kernel.path.resolve())), + command=shlex.quote(command), + kdump_needs_nosmp="" if kvm_args else "export KDUMP_NEEDS_NOSMP=1", + test_kmod=test_kmod_command, + stty=stty_command, ) + ) init_path.chmod(0o755) disk_path = temp_path / "disk" with disk_path.open("wb") as f: os.ftruncate(f.fileno(), 1024 * 1024 * 1024) - signal.signal(signal.SIGTERM, lambda *_: sys.exit(1)) - proc = subprocess.Popen( [ # fmt: off + *setpriv_args, *unshare_args, qemu_exe, *kvm_args, @@ -333,7 +363,7 @@ def run_in_vm( # Limit the number of cores to 8, otherwise we can reach an OOM troubles. "-smp", str(min(nproc(), 8)), "-m", "2G", - "-display", "none", "-serial", "mon:stdio", + "-display", "none", *serial_args, # This along with -append panic=-1 ensures that we exit on a # panic instead of hanging. @@ -362,27 +392,25 @@ def run_in_vm( # fmt: on ], env=env, + stdin=infile, ) try: server_sock.settimeout(5) try: - sock = server_sock.accept()[0] + sock = exit_stack.enter_context(server_sock.accept()[0]) except socket.timeout: raise LostVMError( f"QEMU did not connect within {server_sock.gettimeout()} seconds" ) - try: - status_buf = bytearray() - while True: - try: - buf = sock.recv(4) - except ConnectionResetError: - buf = b"" - if not buf: - break - status_buf.extend(buf) - finally: - sock.close() + status_buf = bytearray() + while True: + try: + buf = sock.recv(4) + except ConnectionResetError: + buf = b"" + if not buf: + break + status_buf.extend(buf) except BaseException: proc.terminate() raise @@ -518,6 +546,7 @@ def __call__( args.directory, extra_qemu_options=args.qemu_options, test_kmod=args.test_kmod, + interactive=True, ) ) except LostVMError as e: