diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py index 41f8163f..569f6d4f 100644 --- a/elftools/dwarf/dwarfinfo.py +++ b/elftools/dwarf/dwarfinfo.py @@ -7,7 +7,7 @@ # This code is in the public domain #------------------------------------------------------------------------------- import os -from collections import namedtuple +from collections import namedtuple, OrderedDict from bisect import bisect_right from ..construct.lib.container import Container @@ -16,6 +16,7 @@ parse_cstring_from_stream) from .structs import DWARFStructs from .compileunit import CompileUnit +from .typeunit import TypeUnit from .abbrevtable import AbbrevTable from .lineprogram import LineProgram from .callframe import CallFrameInfo @@ -82,7 +83,8 @@ def __init__(self, debug_rnglists_sec, debug_sup_sec, gnu_debugaltlink_sec, - gnu_debuglink_sec + gnu_debuglink_sec, + debug_types_sec ): """ config: A DwarfConfig object @@ -112,6 +114,7 @@ def __init__(self, self.debug_sup_sec = debug_sup_sec self.gnu_debugaltlink_sec = gnu_debugaltlink_sec self.gnu_debuglink_sec = gnu_debuglink_sec + self.debug_types_sec = debug_types_sec # Sets the supplementary_dwarfinfo to None. Client code can set this # to something else, typically a DWARFInfo file read from an ELFFile @@ -136,6 +139,9 @@ def __init__(self, self._cu_cache = [] self._cu_offsets_map = [] + # DWARF v4 type units by sig8 - OrderedDict created on Reference + self._type_units_by_sig = None + @property def has_debug_info(self): """ Return whether this contains debug information. @@ -145,6 +151,11 @@ def has_debug_info(self): """ return bool(self.debug_info_sec) + def has_debug_types(self): + """ Return whether this contains debug types information. + """ + return bool(self.debug_types_sec) + def get_DIE_from_lut_entry(self, lut_entry): """ Get the DIE from the pubnames or putbtypes lookup table entry. @@ -223,11 +234,32 @@ def get_CU_at(self, offset): return self._cached_CU_at_offset(offset) + def get_TU_by_sig8(self, sig8): + """ Find and return a Type Unit referenced by its signature + + sig8: + The 8 byte unique signature (as a 64-bit unsigned integer) + + Returns the TU with the given type signature by parsing the + .debug_types section. + + """ + self._parse_debug_types() + tu = self._type_units_by_sig.get(sig8) + if tu is None: + raise KeyError("Signature %016x not found in .debug_types" % sig8) + return tu + def iter_CUs(self): """ Yield all the compile units (CompileUnit objects) in the debug info """ return self._parse_CUs_iter() + def iter_TUs(self): + """Yield all the type units (TypeUnit objects) in the debug_types + """ + return self._parse_TUs_iter() + def get_abbrev_table(self, offset): """ Get an AbbrevTable from the given offset in the debug_abbrev section. @@ -416,11 +448,53 @@ def _parse_CUs_iter(self, offset=0): # Compute the offset of the next CU in the section. The unit_length # field of the CU header contains its size not including the length # field itself. - offset = ( offset + - cu['unit_length'] + - cu.structs.initial_length_field_size()) + offset = (offset + + cu['unit_length'] + + cu.structs.initial_length_field_size()) yield cu + def _parse_TUs_iter(self, offset=0): + """ Iterate Type Unit objects in order of appearance in the debug_types section. + + offset: + The offset of the first TU to yield. Additional iterations + will return the sequential unit objects. + + See .iter_TUs(). + """ + if self.debug_types_sec is None: + return + + while offset < self.debug_types_sec.size: + tu = self._parse_TU_at_offset(offset) + # Compute the offset of the next TU in the section. The unit_length + # field of the TU header contains its size not including the length + # field itself. + offset = (offset + + tu['unit_length'] + + tu.structs.initial_length_field_size()) + + yield tu + + def _parse_debug_types(self): + """ Check if the .debug_types section is previously parsed. If not, + parse all TUs and store them in an OrderedDict using their unique + 64-bit signature as the key. + + See .get_TU_by_sig8(). + """ + if self._type_units_by_sig is not None: + return + self._type_units_by_sig = OrderedDict() + + if self.debug_types_sec is None: + return + + # Collect all Type Units in the .debug_types section for access using + # their 8-byte unique signature + for tu in self._parse_TUs_iter(): + self._type_units_by_sig[tu['signature']] = tu + def _cached_CU_at_offset(self, offset): """ Return the CU with unit header at the given offset into the debug_info section from the cache. If not present, the unit is @@ -493,6 +567,50 @@ def _parse_CU_at_offset(self, offset): cu_offset=offset, cu_die_offset=cu_die_offset) + def _parse_TU_at_offset(self, offset): + """ Parse and return a Type Unit (TU) at the given offset in the debug_types stream. + """ + # Section 7.4 (32-bit and 64-bit DWARF Formats) of the DWARF spec v4 + # states that the first 32-bit word of the TU header determines + # whether the TU is represented with 32-bit or 64-bit DWARF format. + # + # So we peek at the first word in the TU header to determine its + # dwarf format. Based on it, we then create a new DWARFStructs + # instance suitable for this TU and use it to parse the rest. + # + initial_length = struct_parse( + self.structs.the_Dwarf_uint32, self.debug_types_sec.stream, offset) + dwarf_format = 64 if initial_length == 0xFFFFFFFF else 32 + + # Temporary structs for parsing the header + # The structs for the rest of the TUs depend on the header data. + tu_structs = DWARFStructs( + little_endian=self.config.little_endian, + dwarf_format=dwarf_format, + address_size=4, + dwarf_version=2) + + tu_header = struct_parse( + tu_structs.Dwarf_TU_header, self.debug_types_sec.stream, offset) + + # structs for the rest of the TU, taking into account bit-width and DWARF version + tu_structs = DWARFStructs( + little_endian=self.config.little_endian, + dwarf_format=dwarf_format, + address_size=tu_header['address_size'], + dwarf_version=tu_header['version']) + + tu_die_offset = self.debug_types_sec.stream.tell() + dwarf_assert( + self._is_supported_version(tu_header['version']), + "Expected supported DWARF version. Got '%s'" % tu_header['version']) + return TypeUnit( + header=tu_header, + dwarfinfo=self, + structs=tu_structs, + tu_offset=offset, + tu_die_offset=tu_die_offset) + def _is_supported_version(self, version): """ DWARF version supported by this parser """ diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py index 0cc51c44..d3f65615 100644 --- a/elftools/dwarf/structs.py +++ b/elftools/dwarf/structs.py @@ -53,6 +53,9 @@ class DWARFStructs(object): Dwarf_CU_header (+): Compilation unit header + Dwarf_TU_header (+): + Type unit header + Dwarf_abbrev_declaration (+): Abbreviation table declaration - doesn't include the initial code, only the contents. @@ -160,6 +163,7 @@ def _create_structs(self): self._create_initial_length() self._create_leb128() self._create_cu_header() + self._create_tu_header() self._create_abbrev_declaration() self._create_dw_form() self._create_lineprog_header() @@ -237,6 +241,15 @@ def _create_cu_header(self): Embed(dwarfv4_CU_header), )) + def _create_tu_header(self): + self.Dwarf_TU_header = Struct('Dwarf_TU_header', + self.Dwarf_initial_length('unit_length'), + self.Dwarf_uint16('version'), + self.Dwarf_offset('debug_abbrev_offset'), + self.Dwarf_uint8('address_size'), + self.Dwarf_uint64('signature'), + self.Dwarf_offset('type_offset')) + def _create_abbrev_declaration(self): self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry', Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG), diff --git a/elftools/dwarf/typeunit.py b/elftools/dwarf/typeunit.py new file mode 100644 index 00000000..d23bffd5 --- /dev/null +++ b/elftools/dwarf/typeunit.py @@ -0,0 +1,243 @@ +#------------------------------------------------------------------------------- +# elftools: dwarf/typeunit.py +# +# DWARF type unit +# +# Dinkar Khandalekar (contact@dinkar.dev) +# This code is in the public domain +#------------------------------------------------------------------------------- +from bisect import bisect_right +from .die import DIE +from ..common.utils import dwarf_assert + + +class TypeUnit(object): + """ A DWARF type unit (TU). + + A type unit contains type definition entries that can be used to + reference to type definition for debugging information entries in + other compilation units and type units. Each type unit must be uniquely + identified by a 64-bit signature. (DWARFv4 section 3.1.3) + + Type units are stored in the .debug_types section. This section was + introduced by the DWARFv4 standard (and removed in the DWARFv5 standard; + the underlying type units were relocated to the .debug_info + section - DWARFv5 section 1.4) + + Serves as a container and context to DIEs that describe type definitions + referenced from compilation units and other type units. + + TU header entries can be accessed as dict keys from this object, i.e. + tu = TypeUnit(...) + tu['version'] # version field of the TU header + + To get the top-level DIE describing the type unit, call the + get_top_DIE method. + """ + def __init__(self, header, dwarfinfo, structs, tu_offset, tu_die_offset): + """ header: + TU header for this type unit + + dwarfinfo: + The DWARFInfo context object which created this one + + structs: + A DWARFStructs instance suitable for this type unit + + tu_offset: + Offset in the stream to the beginning of this TU (its header) + + tu_die_offset: + Offset in the stream of the top DIE of this TU + """ + self.dwarfinfo = dwarfinfo + self.header = header + self.structs = structs + self.tu_offset = tu_offset + self.tu_die_offset = tu_die_offset + + # The abbreviation table for this TU. Filled lazily when DIEs are + # requested. + self._abbrev_table = None + + # A list of DIEs belonging to this TU. + # This list is lazily constructed as DIEs are iterated over. + self._dielist = [] + # A list of file offsets, corresponding (by index) to the DIEs + # in `self._dielist`. This list exists separately from + # `self._dielist` to make it binary searchable, enabling the + # DIE population strategy used in `iter_DIE_children`. + # Like `self._dielist`, this list is lazily constructed + # as DIEs are iterated over. + self._diemap = [] + + @property + def cu_offset(self): + """Simulates the cu_offset attribute required by the DIE by returning the tu_offset instead + """ + return self.tu_offset + + @property + def cu_die_offset(self): + """Simulates the cu_die_offset attribute required by the DIE by returning the tu_offset instead + """ + return self.tu_die_offset + + def dwarf_format(self): + """ Get the DWARF format (32 or 64) for this TU + """ + return self.structs.dwarf_format + + def get_abbrev_table(self): + """ Get the abbreviation table (AbbrevTable object) for this TU + """ + if self._abbrev_table is None: + self._abbrev_table = self.dwarfinfo.get_abbrev_table( + self['debug_abbrev_offset']) + return self._abbrev_table + + def get_top_DIE(self): + """ Get the top DIE (which is DW_TAG_type_unit entry) of this TU + """ + + # Note that a top DIE always has minimal offset and is therefore + # at the beginning of our lists, so no bisect is required. + if len(self._diemap) > 0: + return self._dielist[0] + + top = DIE( + cu=self, + stream=self.dwarfinfo.debug_types_sec.stream, + offset=self.tu_die_offset) + + self._dielist.insert(0, top) + self._diemap.insert(0, self.tu_die_offset) + + top._translate_indirect_attributes() # Can't translate indirect attributes until the top DIE has been parsed to the end + + return top + + def has_top_DIE(self): + """ Returns whether the top DIE in this TU has already been parsed and cached. + No parsing on demand! + """ + return len(self._diemap) > 0 + + @property + def size(self): + return self['unit_length'] + self.structs.initial_length_field_size() + + def iter_DIEs(self): + """ Iterate over all the DIEs in the TU, in order of their appearance. + Note that null DIEs will also be returned. + """ + return self._iter_DIE_subtree(self.get_top_DIE()) + + def iter_DIE_children(self, die): + """ Given a DIE, yields either its children, without null DIE list + terminator, or nothing, if that DIE has no children. + + The null DIE terminator is saved in that DIE when iteration ended. + """ + if not die.has_children: + return + + # `cur_offset` tracks the stream offset of the next DIE to yield + # as we iterate over our children, + cur_offset = die.offset + die.size + + while True: + child = self._get_cached_DIE(cur_offset) + + child.set_parent(die) + + if child.is_null(): + die._terminator = child + return + + yield child + + if not child.has_children: + cur_offset += child.size + elif "DW_AT_sibling" in child.attributes: + sibling = child.attributes["DW_AT_sibling"] + if sibling.form in ('DW_FORM_ref1', 'DW_FORM_ref2', + 'DW_FORM_ref4', 'DW_FORM_ref8', + 'DW_FORM_ref', 'DW_FORM_ref_udata'): + cur_offset = sibling.value + self.tu_offset + elif sibling.form == 'DW_FORM_ref_addr': + cur_offset = sibling.value + else: + raise NotImplementedError('sibling in form %s' % sibling.form) + else: + # If no DW_AT_sibling attribute is provided by the producer + # then the whole child subtree must be parsed to find its next + # sibling. There is one zero byte representing null DIE + # terminating children list. It is used to locate child subtree + # bounds. + + # If children are not parsed yet, this instruction will manage + # to recursive call of this function which will result in + # setting of `_terminator` attribute of the `child`. + if child._terminator is None: + for _ in self.iter_DIE_children(child): + pass + + cur_offset = child._terminator.offset + child._terminator.size + + #------ PRIVATE ------# + + def __getitem__(self, name): + """ Implement dict-like access to header entries + """ + return self.header[name] + + def _iter_DIE_subtree(self, die): + """ Given a DIE, this yields it with its subtree including null DIEs + (child list terminators). + """ + # If the die is an imported unit, replace it with what it refers to if + # we can + if die.tag == 'DW_TAG_imported_unit' and self.dwarfinfo.supplementary_dwarfinfo: + die = die.get_DIE_from_attribute('DW_AT_import') + yield die + if die.has_children: + for c in die.iter_children(): + for d in die.cu._iter_DIE_subtree(c): + yield d + yield die._terminator + + def _get_cached_DIE(self, offset): + """ Given a DIE offset, look it up in the cache. If not present, + parse the DIE and insert it into the cache. + + offset: + The offset of the DIE in the debug_types section to retrieve. + + The stream reference is copied from the top DIE. The top die will + also be parsed and cached if needed. + + See also get_DIE_from_refaddr(self, refaddr). + """ + # The top die must be in the cache if any DIE is in the cache. + # The stream is the same for all DIEs in this TU, so populate + # the top DIE and obtain a reference to its stream. + top_die_stream = self.get_top_DIE().stream + + # `offset` is the offset in the stream of the DIE we want to return. + # The map is maintined as a parallel array to the list. We call + # bisect each time to ensure new DIEs are inserted in the correct + # order within both `self._dielist` and `self._diemap`. + i = bisect_right(self._diemap, offset) + + # Note that `self._diemap` cannot be empty because a the top DIE + # was inserted by the call to .get_top_DIE(). Also it has the minimal + # offset, so the bisect_right insert point will always be at least 1. + if offset == self._diemap[i - 1]: + die = self._dielist[i - 1] + else: + die = DIE(cu=self, stream=top_die_stream, offset=offset) + self._dielist.insert(i, die) + self._diemap.insert(i, offset) + + return die diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py index c7154439..034741b1 100644 --- a/elftools/elf/elffile.py +++ b/elftools/elf/elffile.py @@ -260,7 +260,8 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): '.debug_pubnames', '.debug_addr', '.debug_str_offsets', '.debug_line_str', '.debug_loclists', '.debug_rnglists', - '.debug_sup', '.gnu_debugaltlink', '.gnu_debuglink') + '.debug_sup', '.gnu_debugaltlink', '.gnu_debuglink', + '.debug_types') compressed = bool(self.get_section_by_name('.zdebug_info')) if compressed: @@ -274,7 +275,8 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name, debug_pubnames_name, debug_addr_name, debug_str_offsets_name, debug_line_str_name, debug_loclists_sec_name, debug_rnglists_sec_name, - debug_sup_name, gnu_debugaltlink_name, gnu_debuglink, eh_frame_sec_name) = section_names + debug_sup_name, gnu_debugaltlink_name, gnu_debuglink, debug_types_sec_name, + eh_frame_sec_name) = section_names debug_sections = {} for secname in section_names: @@ -316,7 +318,8 @@ def get_dwarf_info(self, relocate_dwarf_sections=True, follow_links=True): debug_rnglists_sec=debug_sections[debug_rnglists_sec_name], debug_sup_sec=debug_sections[debug_sup_name], gnu_debugaltlink_sec=debug_sections[gnu_debugaltlink_name], - gnu_debuglink_sec=debug_sections[gnu_debuglink] + gnu_debuglink_sec=debug_sections[gnu_debuglink], + debug_types_sec=debug_sections[debug_types_sec_name] ) if follow_links: dwarfinfo.supplementary_dwarfinfo = self.get_supplementary_dwarfinfo(dwarfinfo) diff --git a/scripts/readelf.py b/scripts/readelf.py index da021dd0..3b187ee3 100755 --- a/scripts/readelf.py +++ b/scripts/readelf.py @@ -915,6 +915,7 @@ def display_debug_dump(self, dump_what): if dump_what == 'info': self._dump_debug_info() + self._dump_debug_types() elif dump_what == 'decodedline': self._dump_debug_line_programs() elif dump_what == 'frames': @@ -1196,6 +1197,58 @@ def _dump_debug_info(self): self._emitline() + def _dump_debug_types(self): + """Dump the debug types section + """ + if not self._dwarfinfo.has_debug_info: + return + if self._dwarfinfo.debug_types_sec is None: + return + self._emitline('Contents of the %s section:\n' % self._dwarfinfo.debug_types_sec.name) + + # Offset of the .debug_types section in the stream + section_offset = self._dwarfinfo.debug_types_sec.global_offset + + for tu in self._dwarfinfo.iter_TUs(): + self._emitline(' Compilation Unit @ offset %s:' % + self._format_hex(tu.tu_offset, alternate=True)) + self._emitline(' Length: %s (%s)' % (self._format_hex(tu['unit_length']), + '%s-bit' % tu.dwarf_format())) + self._emitline(' Version: %s' % tu['version']) + self._emitline(' Abbrev Offset: %s' % (self._format_hex(tu['debug_abbrev_offset'], alternate=True))) + self._emitline(' Pointer Size: %s' % tu['address_size']) + self._emitline(' Signature: 0x%x' % tu['signature']) + self._emitline(' Type Offset: 0x%x' % tu['type_offset']) + + die_depth = 0 + for die in tu.iter_DIEs(): + self._emitline(' <%s><%x>: Abbrev Number: %s%s' % ( + die_depth, + die.offset, + die.abbrev_code, + (' (%s)' % die.tag) if not die.is_null() else '')) + if die.is_null(): + die_depth -= 1 + continue + + for attr in die.attributes.values(): + name = attr.name + # Unknown attribute values are passed-through as integers + if isinstance(name, int): + name = 'Unknown AT value: %x' % name + + attr_desc = describe_attr_value(attr, die, section_offset) + + self._emitline(' <%x> %-18s: %s' % ( + attr.offset, + name, + attr_desc)) + + if die.has_children: + die_depth += 1 + + self._emitline() + def _dump_debug_line_programs(self): """ Dump the (decoded) line programs from .debug_line The programs are dumped in the order of the CUs they belong to. diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py index 9c6747fb..8c7e5221 100755 --- a/test/run_readelf_tests.py +++ b/test/run_readelf_tests.py @@ -72,6 +72,13 @@ def run_test_on_file(filename, verbose=False, opt=None): else: options = [opt] + if filename.endswith('dwarf_debug_types.elf'): + # TODO: excluding the binary with .debug_types section until the length + # the calculation for FDEs in binutils bug #31973 is fixed + # https://sourceware.org/bugzilla/show_bug.cgi?id=31973 + options.remove('--debug-dump=frames') + options.remove('--debug-dump=frames-interp') + for option in options: if verbose: testlog.info("..option='%s'" % option) diff --git a/test/test_refaddr_bitness.py b/test/test_refaddr_bitness.py index 4fc89888..df05685d 100644 --- a/test/test_refaddr_bitness.py +++ b/test/test_refaddr_bitness.py @@ -51,7 +51,8 @@ def test_main(self): debug_rnglists_sec = None, debug_sup_sec = None, gnu_debugaltlink_sec = None, - gnu_debuglink_sec = None + gnu_debuglink_sec = None, + debug_types_sec=None ) CUs = [cu for cu in di.iter_CUs()] diff --git a/test/testfiles_for_readelf/dwarf_debug_types.elf b/test/testfiles_for_readelf/dwarf_debug_types.elf new file mode 100644 index 00000000..aac6af5c Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_debug_types.elf differ diff --git a/test/testfiles_for_unittests/dwarf_debug_types.elf b/test/testfiles_for_unittests/dwarf_debug_types.elf new file mode 100644 index 00000000..aac6af5c Binary files /dev/null and b/test/testfiles_for_unittests/dwarf_debug_types.elf differ