1#!/usr/bin/python3
2# ELF support functionality for Python.
3# Copyright (C) 2022 Free Software Foundation, Inc.
4# This file is part of the GNU C Library.
5#
6# The GNU C Library is free software; you can redistribute it and/or
7# modify it under the terms of the GNU Lesser General Public
8# License as published by the Free Software Foundation; either
9# version 2.1 of the License, or (at your option) any later version.
10#
11# The GNU C Library is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14# Lesser General Public License for more details.
15#
16# You should have received a copy of the GNU Lesser General Public
17# License along with the GNU C Library; if not, see
18# <https://www.gnu.org/licenses/>.
19
20"""Basic ELF parser.
21
22Use Image.readfile(path) to read an ELF file into memory and begin
23parsing it.
24
25"""
26
27import collections
28import enum
29import struct
30
31if not hasattr(enum, 'IntFlag'):
32    import sys
33    sys.stdout.write(
34        'warning: glibcelf.py needs Python 3.6 for enum support\n')
35    sys.exit(77)
36
37class _OpenIntEnum(enum.IntEnum):
38    """Integer enumeration that supports arbitrary int values."""
39    @classmethod
40    def _missing_(cls, value):
41        # See enum.IntFlag._create_pseudo_member_.  This allows
42        # creating of enum constants with arbitrary integer values.
43        pseudo_member = int.__new__(cls, value)
44        pseudo_member._name_ = None
45        pseudo_member._value_ = value
46        return pseudo_member
47
48    def __repr__(self):
49        name = self._name_
50        if name is not None:
51            # The names have prefixes like SHT_, implying their type.
52            return name
53        return '{}({})'.format(self.__class__.__name__, self._value_)
54
55    def __str__(self):
56        name = self._name_
57        if name is not None:
58            return name
59        return str(self._value_)
60
61class ElfClass(_OpenIntEnum):
62    """ELF word size.  Type of EI_CLASS values."""
63    ELFCLASSNONE = 0
64    ELFCLASS32 = 1
65    ELFCLASS64 = 2
66
67class ElfData(_OpenIntEnum):
68    """ELF endianess.  Type of EI_DATA values."""
69    ELFDATANONE = 0
70    ELFDATA2LSB = 1
71    ELFDATA2MSB = 2
72
73class Machine(_OpenIntEnum):
74    """ELF machine type.  Type of values in Ehdr.e_machine field."""
75    EM_NONE = 0
76    EM_M32 = 1
77    EM_SPARC = 2
78    EM_386 = 3
79    EM_68K = 4
80    EM_88K = 5
81    EM_IAMCU = 6
82    EM_860 = 7
83    EM_MIPS = 8
84    EM_S370 = 9
85    EM_MIPS_RS3_LE = 10
86    EM_PARISC = 15
87    EM_VPP500 = 17
88    EM_SPARC32PLUS = 18
89    EM_960 = 19
90    EM_PPC = 20
91    EM_PPC64 = 21
92    EM_S390 = 22
93    EM_SPU = 23
94    EM_V800 = 36
95    EM_FR20 = 37
96    EM_RH32 = 38
97    EM_RCE = 39
98    EM_ARM = 40
99    EM_FAKE_ALPHA = 41
100    EM_SH = 42
101    EM_SPARCV9 = 43
102    EM_TRICORE = 44
103    EM_ARC = 45
104    EM_H8_300 = 46
105    EM_H8_300H = 47
106    EM_H8S = 48
107    EM_H8_500 = 49
108    EM_IA_64 = 50
109    EM_MIPS_X = 51
110    EM_COLDFIRE = 52
111    EM_68HC12 = 53
112    EM_MMA = 54
113    EM_PCP = 55
114    EM_NCPU = 56
115    EM_NDR1 = 57
116    EM_STARCORE = 58
117    EM_ME16 = 59
118    EM_ST100 = 60
119    EM_TINYJ = 61
120    EM_X86_64 = 62
121    EM_PDSP = 63
122    EM_PDP10 = 64
123    EM_PDP11 = 65
124    EM_FX66 = 66
125    EM_ST9PLUS = 67
126    EM_ST7 = 68
127    EM_68HC16 = 69
128    EM_68HC11 = 70
129    EM_68HC08 = 71
130    EM_68HC05 = 72
131    EM_SVX = 73
132    EM_ST19 = 74
133    EM_VAX = 75
134    EM_CRIS = 76
135    EM_JAVELIN = 77
136    EM_FIREPATH = 78
137    EM_ZSP = 79
138    EM_MMIX = 80
139    EM_HUANY = 81
140    EM_PRISM = 82
141    EM_AVR = 83
142    EM_FR30 = 84
143    EM_D10V = 85
144    EM_D30V = 86
145    EM_V850 = 87
146    EM_M32R = 88
147    EM_MN10300 = 89
148    EM_MN10200 = 90
149    EM_PJ = 91
150    EM_OPENRISC = 92
151    EM_ARC_COMPACT = 93
152    EM_XTENSA = 94
153    EM_VIDEOCORE = 95
154    EM_TMM_GPP = 96
155    EM_NS32K = 97
156    EM_TPC = 98
157    EM_SNP1K = 99
158    EM_ST200 = 100
159    EM_IP2K = 101
160    EM_MAX = 102
161    EM_CR = 103
162    EM_F2MC16 = 104
163    EM_MSP430 = 105
164    EM_BLACKFIN = 106
165    EM_SE_C33 = 107
166    EM_SEP = 108
167    EM_ARCA = 109
168    EM_UNICORE = 110
169    EM_EXCESS = 111
170    EM_DXP = 112
171    EM_ALTERA_NIOS2 = 113
172    EM_CRX = 114
173    EM_XGATE = 115
174    EM_C166 = 116
175    EM_M16C = 117
176    EM_DSPIC30F = 118
177    EM_CE = 119
178    EM_M32C = 120
179    EM_TSK3000 = 131
180    EM_RS08 = 132
181    EM_SHARC = 133
182    EM_ECOG2 = 134
183    EM_SCORE7 = 135
184    EM_DSP24 = 136
185    EM_VIDEOCORE3 = 137
186    EM_LATTICEMICO32 = 138
187    EM_SE_C17 = 139
188    EM_TI_C6000 = 140
189    EM_TI_C2000 = 141
190    EM_TI_C5500 = 142
191    EM_TI_ARP32 = 143
192    EM_TI_PRU = 144
193    EM_MMDSP_PLUS = 160
194    EM_CYPRESS_M8C = 161
195    EM_R32C = 162
196    EM_TRIMEDIA = 163
197    EM_QDSP6 = 164
198    EM_8051 = 165
199    EM_STXP7X = 166
200    EM_NDS32 = 167
201    EM_ECOG1X = 168
202    EM_MAXQ30 = 169
203    EM_XIMO16 = 170
204    EM_MANIK = 171
205    EM_CRAYNV2 = 172
206    EM_RX = 173
207    EM_METAG = 174
208    EM_MCST_ELBRUS = 175
209    EM_ECOG16 = 176
210    EM_CR16 = 177
211    EM_ETPU = 178
212    EM_SLE9X = 179
213    EM_L10M = 180
214    EM_K10M = 181
215    EM_AARCH64 = 183
216    EM_AVR32 = 185
217    EM_STM8 = 186
218    EM_TILE64 = 187
219    EM_TILEPRO = 188
220    EM_MICROBLAZE = 189
221    EM_CUDA = 190
222    EM_TILEGX = 191
223    EM_CLOUDSHIELD = 192
224    EM_COREA_1ST = 193
225    EM_COREA_2ND = 194
226    EM_ARCV2 = 195
227    EM_OPEN8 = 196
228    EM_RL78 = 197
229    EM_VIDEOCORE5 = 198
230    EM_78KOR = 199
231    EM_56800EX = 200
232    EM_BA1 = 201
233    EM_BA2 = 202
234    EM_XCORE = 203
235    EM_MCHP_PIC = 204
236    EM_INTELGT = 205
237    EM_KM32 = 210
238    EM_KMX32 = 211
239    EM_EMX16 = 212
240    EM_EMX8 = 213
241    EM_KVARC = 214
242    EM_CDP = 215
243    EM_COGE = 216
244    EM_COOL = 217
245    EM_NORC = 218
246    EM_CSR_KALIMBA = 219
247    EM_Z80 = 220
248    EM_VISIUM = 221
249    EM_FT32 = 222
250    EM_MOXIE = 223
251    EM_AMDGPU = 224
252    EM_RISCV = 243
253    EM_BPF = 247
254    EM_CSKY = 252
255    EM_LOONGARCH = 258
256    EM_NUM = 259
257    EM_ALPHA = 0x9026
258
259class Et(_OpenIntEnum):
260    """ELF file type.  Type of ET_* values and the Ehdr.e_type field."""
261    ET_NONE = 0
262    ET_REL = 1
263    ET_EXEC = 2
264    ET_DYN = 3
265    ET_CORE = 4
266
267class Shn(_OpenIntEnum):
268    """ELF reserved section indices."""
269    SHN_UNDEF = 0
270    SHN_BEFORE = 0xff00
271    SHN_AFTER = 0xff01
272    SHN_ABS = 0xfff1
273    SHN_COMMON = 0xfff2
274    SHN_XINDEX = 0xffff
275
276class ShnMIPS(enum.Enum):
277    """Supplemental SHN_* constants for EM_MIPS."""
278    SHN_MIPS_ACOMMON = 0xff00
279    SHN_MIPS_TEXT = 0xff01
280    SHN_MIPS_DATA = 0xff02
281    SHN_MIPS_SCOMMON = 0xff03
282    SHN_MIPS_SUNDEFINED = 0xff04
283
284class ShnPARISC(enum.Enum):
285    """Supplemental SHN_* constants for EM_PARISC."""
286    SHN_PARISC_ANSI_COMMON = 0xff00
287    SHN_PARISC_HUGE_COMMON = 0xff01
288
289class Sht(_OpenIntEnum):
290    """ELF section types.  Type of SHT_* values."""
291    SHT_NULL = 0
292    SHT_PROGBITS = 1
293    SHT_SYMTAB = 2
294    SHT_STRTAB = 3
295    SHT_RELA = 4
296    SHT_HASH = 5
297    SHT_DYNAMIC = 6
298    SHT_NOTE = 7
299    SHT_NOBITS = 8
300    SHT_REL = 9
301    SHT_SHLIB = 10
302    SHT_DYNSYM = 11
303    SHT_INIT_ARRAY = 14
304    SHT_FINI_ARRAY = 15
305    SHT_PREINIT_ARRAY = 16
306    SHT_GROUP = 17
307    SHT_SYMTAB_SHNDX = 18
308    SHT_RELR = 19
309    SHT_GNU_ATTRIBUTES = 0x6ffffff5
310    SHT_GNU_HASH = 0x6ffffff6
311    SHT_GNU_LIBLIST = 0x6ffffff7
312    SHT_CHECKSUM = 0x6ffffff8
313    SHT_SUNW_move = 0x6ffffffa
314    SHT_SUNW_COMDAT = 0x6ffffffb
315    SHT_SUNW_syminfo = 0x6ffffffc
316    SHT_GNU_verdef = 0x6ffffffd
317    SHT_GNU_verneed = 0x6ffffffe
318    SHT_GNU_versym = 0x6fffffff
319
320class ShtALPHA(enum.Enum):
321    """Supplemental SHT_* constants for EM_ALPHA."""
322    SHT_ALPHA_DEBUG = 0x70000001
323    SHT_ALPHA_REGINFO = 0x70000002
324
325class ShtARM(enum.Enum):
326    """Supplemental SHT_* constants for EM_ARM."""
327    SHT_ARM_EXIDX = 0x70000001
328    SHT_ARM_PREEMPTMAP = 0x70000002
329    SHT_ARM_ATTRIBUTES = 0x70000003
330
331class ShtCSKY(enum.Enum):
332    """Supplemental SHT_* constants for EM_CSKY."""
333    SHT_CSKY_ATTRIBUTES = 0x70000001
334
335class ShtIA_64(enum.Enum):
336    """Supplemental SHT_* constants for EM_IA_64."""
337    SHT_IA_64_EXT = 0x70000000
338    SHT_IA_64_UNWIND = 0x70000001
339
340class ShtMIPS(enum.Enum):
341    """Supplemental SHT_* constants for EM_MIPS."""
342    SHT_MIPS_LIBLIST = 0x70000000
343    SHT_MIPS_MSYM = 0x70000001
344    SHT_MIPS_CONFLICT = 0x70000002
345    SHT_MIPS_GPTAB = 0x70000003
346    SHT_MIPS_UCODE = 0x70000004
347    SHT_MIPS_DEBUG = 0x70000005
348    SHT_MIPS_REGINFO = 0x70000006
349    SHT_MIPS_PACKAGE = 0x70000007
350    SHT_MIPS_PACKSYM = 0x70000008
351    SHT_MIPS_RELD = 0x70000009
352    SHT_MIPS_IFACE = 0x7000000b
353    SHT_MIPS_CONTENT = 0x7000000c
354    SHT_MIPS_OPTIONS = 0x7000000d
355    SHT_MIPS_SHDR = 0x70000010
356    SHT_MIPS_FDESC = 0x70000011
357    SHT_MIPS_EXTSYM = 0x70000012
358    SHT_MIPS_DENSE = 0x70000013
359    SHT_MIPS_PDESC = 0x70000014
360    SHT_MIPS_LOCSYM = 0x70000015
361    SHT_MIPS_AUXSYM = 0x70000016
362    SHT_MIPS_OPTSYM = 0x70000017
363    SHT_MIPS_LOCSTR = 0x70000018
364    SHT_MIPS_LINE = 0x70000019
365    SHT_MIPS_RFDESC = 0x7000001a
366    SHT_MIPS_DELTASYM = 0x7000001b
367    SHT_MIPS_DELTAINST = 0x7000001c
368    SHT_MIPS_DELTACLASS = 0x7000001d
369    SHT_MIPS_DWARF = 0x7000001e
370    SHT_MIPS_DELTADECL = 0x7000001f
371    SHT_MIPS_SYMBOL_LIB = 0x70000020
372    SHT_MIPS_EVENTS = 0x70000021
373    SHT_MIPS_TRANSLATE = 0x70000022
374    SHT_MIPS_PIXIE = 0x70000023
375    SHT_MIPS_XLATE = 0x70000024
376    SHT_MIPS_XLATE_DEBUG = 0x70000025
377    SHT_MIPS_WHIRL = 0x70000026
378    SHT_MIPS_EH_REGION = 0x70000027
379    SHT_MIPS_XLATE_OLD = 0x70000028
380    SHT_MIPS_PDR_EXCEPTION = 0x70000029
381    SHT_MIPS_XHASH = 0x7000002b
382
383class ShtPARISC(enum.Enum):
384    """Supplemental SHT_* constants for EM_PARISC."""
385    SHT_PARISC_EXT = 0x70000000
386    SHT_PARISC_UNWIND = 0x70000001
387    SHT_PARISC_DOC = 0x70000002
388
389class ShtRISCV(enum.Enum):
390    """Supplemental SHT_* constants for EM_RISCV."""
391    SHT_RISCV_ATTRIBUTES = 0x70000003
392
393class Pf(enum.IntFlag):
394    """Program header flags.  Type of Phdr.p_flags values."""
395    PF_X = 1
396    PF_W = 2
397    PF_R = 4
398
399class PfARM(enum.IntFlag):
400    """Supplemental PF_* flags for EM_ARM."""
401    PF_ARM_SB = 0x10000000
402    PF_ARM_PI = 0x20000000
403    PF_ARM_ABS = 0x40000000
404
405class PfPARISC(enum.IntFlag):
406    """Supplemental PF_* flags for EM_PARISC."""
407    PF_HP_PAGE_SIZE = 0x00100000
408    PF_HP_FAR_SHARED = 0x00200000
409    PF_HP_NEAR_SHARED = 0x00400000
410    PF_HP_CODE = 0x01000000
411    PF_HP_MODIFY = 0x02000000
412    PF_HP_LAZYSWAP = 0x04000000
413    PF_HP_SBP = 0x08000000
414
415class PfIA_64(enum.IntFlag):
416    """Supplemental PF_* flags for EM_IA_64."""
417    PF_IA_64_NORECOV = 0x80000000
418
419class PfMIPS(enum.IntFlag):
420    """Supplemental PF_* flags for EM_MIPS."""
421    PF_MIPS_LOCAL = 0x10000000
422
423class Shf(enum.IntFlag):
424    """Section flags.  Type of Shdr.sh_type values."""
425    SHF_WRITE = 1 << 0
426    SHF_ALLOC = 1 << 1
427    SHF_EXECINSTR = 1 << 2
428    SHF_MERGE = 1 << 4
429    SHF_STRINGS = 1 << 5
430    SHF_INFO_LINK = 1 << 6
431    SHF_LINK_ORDER = 1 << 7
432    SHF_OS_NONCONFORMING = 256
433    SHF_GROUP = 1 << 9
434    SHF_TLS = 1 << 10
435    SHF_COMPRESSED = 1 << 11
436    SHF_GNU_RETAIN = 1 << 21
437    SHF_ORDERED = 1 << 30
438    SHF_EXCLUDE = 1 << 31
439
440class ShfALPHA(enum.IntFlag):
441    """Supplemental SHF_* constants for EM_ALPHA."""
442    SHF_ALPHA_GPREL = 0x10000000
443
444class ShfARM(enum.IntFlag):
445    """Supplemental SHF_* constants for EM_ARM."""
446    SHF_ARM_ENTRYSECT = 0x10000000
447    SHF_ARM_COMDEF = 0x80000000
448
449class ShfIA_64(enum.IntFlag):
450    """Supplemental SHF_* constants for EM_IA_64."""
451    SHF_IA_64_SHORT  = 0x10000000
452    SHF_IA_64_NORECOV = 0x20000000
453
454class ShfMIPS(enum.IntFlag):
455    """Supplemental SHF_* constants for EM_MIPS."""
456    SHF_MIPS_GPREL = 0x10000000
457    SHF_MIPS_MERGE = 0x20000000
458    SHF_MIPS_ADDR = 0x40000000
459    SHF_MIPS_STRINGS = 0x80000000
460    SHF_MIPS_NOSTRIP = 0x08000000
461    SHF_MIPS_LOCAL = 0x04000000
462    SHF_MIPS_NAMES = 0x02000000
463    SHF_MIPS_NODUPE = 0x01000000
464
465class ShfPARISC(enum.IntFlag):
466    """Supplemental SHF_* constants for EM_PARISC."""
467    SHF_PARISC_SHORT = 0x20000000
468    SHF_PARISC_HUGE = 0x40000000
469    SHF_PARISC_SBP = 0x80000000
470
471class Stb(_OpenIntEnum):
472    """ELF symbol binding type."""
473    STB_LOCAL = 0
474    STB_GLOBAL = 1
475    STB_WEAK = 2
476    STB_GNU_UNIQUE = 10
477    STB_MIPS_SPLIT_COMMON = 13
478
479class Stt(_OpenIntEnum):
480    """ELF symbol type."""
481    STT_NOTYPE = 0
482    STT_OBJECT = 1
483    STT_FUNC = 2
484    STT_SECTION = 3
485    STT_FILE = 4
486    STT_COMMON = 5
487    STT_TLS = 6
488    STT_GNU_IFUNC = 10
489
490class SttARM(enum.Enum):
491    """Supplemental STT_* constants for EM_ARM."""
492    STT_ARM_TFUNC = 13
493    STT_ARM_16BIT = 15
494
495class SttPARISC(enum.Enum):
496    """Supplemental STT_* constants for EM_PARISC."""
497    STT_HP_OPAQUE = 11
498    STT_HP_STUB = 12
499    STT_PARISC_MILLICODE = 13
500
501class SttSPARC(enum.Enum):
502    """Supplemental STT_* constants for EM_SPARC."""
503    STT_SPARC_REGISTER = 13
504
505class SttX86_64(enum.Enum):
506    """Supplemental STT_* constants for EM_X86_64."""
507    SHT_X86_64_UNWIND = 0x70000001
508
509class Pt(_OpenIntEnum):
510    """ELF program header types.  Type of Phdr.p_type."""
511    PT_NULL = 0
512    PT_LOAD = 1
513    PT_DYNAMIC = 2
514    PT_INTERP = 3
515    PT_NOTE = 4
516    PT_SHLIB = 5
517    PT_PHDR = 6
518    PT_TLS = 7
519    PT_NUM = 8
520    PT_GNU_EH_FRAME = 0x6474e550
521    PT_GNU_STACK = 0x6474e551
522    PT_GNU_RELRO = 0x6474e552
523    PT_GNU_PROPERTY = 0x6474e553
524    PT_SUNWBSS = 0x6ffffffa
525    PT_SUNWSTACK = 0x6ffffffb
526
527class PtAARCH64(enum.Enum):
528    """Supplemental PT_* constants for EM_AARCH64."""
529    PT_AARCH64_MEMTAG_MTE = 0x70000002
530
531class PtARM(enum.Enum):
532    """Supplemental PT_* constants for EM_ARM."""
533    PT_ARM_EXIDX = 0x70000001
534
535class PtIA_64(enum.Enum):
536    """Supplemental PT_* constants for EM_IA_64."""
537    PT_IA_64_HP_OPT_ANOT = 0x60000012
538    PT_IA_64_HP_HSL_ANOT = 0x60000013
539    PT_IA_64_HP_STACK = 0x60000014
540    PT_IA_64_ARCHEXT = 0x70000000
541    PT_IA_64_UNWIND = 0x70000001
542
543class PtMIPS(enum.Enum):
544    """Supplemental PT_* constants for EM_MIPS."""
545    PT_MIPS_REGINFO = 0x70000000
546    PT_MIPS_RTPROC = 0x70000001
547    PT_MIPS_OPTIONS = 0x70000002
548    PT_MIPS_ABIFLAGS = 0x70000003
549
550class PtPARISC(enum.Enum):
551    """Supplemental PT_* constants for EM_PARISC."""
552    PT_HP_TLS = 0x60000000
553    PT_HP_CORE_NONE = 0x60000001
554    PT_HP_CORE_VERSION = 0x60000002
555    PT_HP_CORE_KERNEL = 0x60000003
556    PT_HP_CORE_COMM = 0x60000004
557    PT_HP_CORE_PROC = 0x60000005
558    PT_HP_CORE_LOADABLE = 0x60000006
559    PT_HP_CORE_STACK = 0x60000007
560    PT_HP_CORE_SHM = 0x60000008
561    PT_HP_CORE_MMF = 0x60000009
562    PT_HP_PARALLEL = 0x60000010
563    PT_HP_FASTBIND = 0x60000011
564    PT_HP_OPT_ANNOT = 0x60000012
565    PT_HP_HSL_ANNOT = 0x60000013
566    PT_HP_STACK = 0x60000014
567    PT_PARISC_ARCHEXT = 0x70000000
568    PT_PARISC_UNWIND = 0x70000001
569
570class PtRISCV(enum.Enum):
571    """Supplemental PT_* constants for EM_RISCV."""
572    PT_RISCV_ATTRIBUTES = 0x70000003
573
574class Dt(_OpenIntEnum):
575    """ELF dynamic segment tags.  Type of Dyn.d_val."""
576    DT_NULL = 0
577    DT_NEEDED = 1
578    DT_PLTRELSZ = 2
579    DT_PLTGOT = 3
580    DT_HASH = 4
581    DT_STRTAB = 5
582    DT_SYMTAB = 6
583    DT_RELA = 7
584    DT_RELASZ = 8
585    DT_RELAENT = 9
586    DT_STRSZ = 10
587    DT_SYMENT = 11
588    DT_INIT = 12
589    DT_FINI = 13
590    DT_SONAME = 14
591    DT_RPATH = 15
592    DT_SYMBOLIC = 16
593    DT_REL = 17
594    DT_RELSZ = 18
595    DT_RELENT = 19
596    DT_PLTREL = 20
597    DT_DEBUG = 21
598    DT_TEXTREL = 22
599    DT_JMPREL = 23
600    DT_BIND_NOW = 24
601    DT_INIT_ARRAY = 25
602    DT_FINI_ARRAY = 26
603    DT_INIT_ARRAYSZ = 27
604    DT_FINI_ARRAYSZ = 28
605    DT_RUNPATH = 29
606    DT_FLAGS = 30
607    DT_PREINIT_ARRAY = 32
608    DT_PREINIT_ARRAYSZ = 33
609    DT_SYMTAB_SHNDX = 34
610    DT_RELRSZ = 35
611    DT_RELR = 36
612    DT_RELRENT = 37
613    DT_GNU_PRELINKED = 0x6ffffdf5
614    DT_GNU_CONFLICTSZ = 0x6ffffdf6
615    DT_GNU_LIBLISTSZ = 0x6ffffdf7
616    DT_CHECKSUM = 0x6ffffdf8
617    DT_PLTPADSZ = 0x6ffffdf9
618    DT_MOVEENT = 0x6ffffdfa
619    DT_MOVESZ = 0x6ffffdfb
620    DT_FEATURE_1 = 0x6ffffdfc
621    DT_POSFLAG_1 = 0x6ffffdfd
622    DT_SYMINSZ = 0x6ffffdfe
623    DT_SYMINENT = 0x6ffffdff
624    DT_GNU_HASH = 0x6ffffef5
625    DT_TLSDESC_PLT = 0x6ffffef6
626    DT_TLSDESC_GOT = 0x6ffffef7
627    DT_GNU_CONFLICT = 0x6ffffef8
628    DT_GNU_LIBLIST = 0x6ffffef9
629    DT_CONFIG = 0x6ffffefa
630    DT_DEPAUDIT = 0x6ffffefb
631    DT_AUDIT = 0x6ffffefc
632    DT_PLTPAD = 0x6ffffefd
633    DT_MOVETAB = 0x6ffffefe
634    DT_SYMINFO = 0x6ffffeff
635    DT_VERSYM = 0x6ffffff0
636    DT_RELACOUNT = 0x6ffffff9
637    DT_RELCOUNT = 0x6ffffffa
638    DT_FLAGS_1 = 0x6ffffffb
639    DT_VERDEF = 0x6ffffffc
640    DT_VERDEFNUM = 0x6ffffffd
641    DT_VERNEED = 0x6ffffffe
642    DT_VERNEEDNUM = 0x6fffffff
643    DT_AUXILIARY = 0x7ffffffd
644    DT_FILTER = 0x7fffffff
645
646class DtAARCH64(enum.Enum):
647    """Supplemental DT_* constants for EM_AARCH64."""
648    DT_AARCH64_BTI_PLT = 0x70000001
649    DT_AARCH64_PAC_PLT = 0x70000003
650    DT_AARCH64_VARIANT_PCS = 0x70000005
651
652class DtALPHA(enum.Enum):
653    """Supplemental DT_* constants for EM_ALPHA."""
654    DT_ALPHA_PLTRO = 0x70000000
655
656class DtALTERA_NIOS2(enum.Enum):
657    """Supplemental DT_* constants for EM_ALTERA_NIOS2."""
658    DT_NIOS2_GP = 0x70000002
659
660class DtIA_64(enum.Enum):
661    """Supplemental DT_* constants for EM_IA_64."""
662    DT_IA_64_PLT_RESERVE = 0x70000000
663
664class DtMIPS(enum.Enum):
665    """Supplemental DT_* constants for EM_MIPS."""
666    DT_MIPS_RLD_VERSION = 0x70000001
667    DT_MIPS_TIME_STAMP = 0x70000002
668    DT_MIPS_ICHECKSUM = 0x70000003
669    DT_MIPS_IVERSION = 0x70000004
670    DT_MIPS_FLAGS = 0x70000005
671    DT_MIPS_BASE_ADDRESS = 0x70000006
672    DT_MIPS_MSYM = 0x70000007
673    DT_MIPS_CONFLICT = 0x70000008
674    DT_MIPS_LIBLIST = 0x70000009
675    DT_MIPS_LOCAL_GOTNO = 0x7000000a
676    DT_MIPS_CONFLICTNO = 0x7000000b
677    DT_MIPS_LIBLISTNO = 0x70000010
678    DT_MIPS_SYMTABNO = 0x70000011
679    DT_MIPS_UNREFEXTNO = 0x70000012
680    DT_MIPS_GOTSYM = 0x70000013
681    DT_MIPS_HIPAGENO = 0x70000014
682    DT_MIPS_RLD_MAP = 0x70000016
683    DT_MIPS_DELTA_CLASS = 0x70000017
684    DT_MIPS_DELTA_CLASS_NO = 0x70000018
685    DT_MIPS_DELTA_INSTANCE = 0x70000019
686    DT_MIPS_DELTA_INSTANCE_NO = 0x7000001a
687    DT_MIPS_DELTA_RELOC = 0x7000001b
688    DT_MIPS_DELTA_RELOC_NO = 0x7000001c
689    DT_MIPS_DELTA_SYM = 0x7000001d
690    DT_MIPS_DELTA_SYM_NO = 0x7000001e
691    DT_MIPS_DELTA_CLASSSYM = 0x70000020
692    DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021
693    DT_MIPS_CXX_FLAGS = 0x70000022
694    DT_MIPS_PIXIE_INIT = 0x70000023
695    DT_MIPS_SYMBOL_LIB = 0x70000024
696    DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025
697    DT_MIPS_LOCAL_GOTIDX = 0x70000026
698    DT_MIPS_HIDDEN_GOTIDX = 0x70000027
699    DT_MIPS_PROTECTED_GOTIDX = 0x70000028
700    DT_MIPS_OPTIONS = 0x70000029
701    DT_MIPS_INTERFACE = 0x7000002a
702    DT_MIPS_DYNSTR_ALIGN = 0x7000002b
703    DT_MIPS_INTERFACE_SIZE = 0x7000002c
704    DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002d
705    DT_MIPS_PERF_SUFFIX = 0x7000002e
706    DT_MIPS_COMPACT_SIZE = 0x7000002f
707    DT_MIPS_GP_VALUE = 0x70000030
708    DT_MIPS_AUX_DYNAMIC = 0x70000031
709    DT_MIPS_PLTGOT = 0x70000032
710    DT_MIPS_RWPLT = 0x70000034
711    DT_MIPS_RLD_MAP_REL = 0x70000035
712    DT_MIPS_XHASH = 0x70000036
713
714class DtPPC(enum.Enum):
715    """Supplemental DT_* constants for EM_PPC."""
716    DT_PPC_GOT = 0x70000000
717    DT_PPC_OPT = 0x70000001
718
719class DtPPC64(enum.Enum):
720    """Supplemental DT_* constants for EM_PPC64."""
721    DT_PPC64_GLINK = 0x70000000
722    DT_PPC64_OPD = 0x70000001
723    DT_PPC64_OPDSZ = 0x70000002
724    DT_PPC64_OPT = 0x70000003
725
726class DtRISCV(enum.Enum):
727    """Supplemental DT_* constants for EM_RISCV."""
728    DT_RISCV_VARIANT_CC = 0x70000001
729
730class DtSPARC(enum.Enum):
731    """Supplemental DT_* constants for EM_SPARC."""
732    DT_SPARC_REGISTER = 0x70000001
733
734class StInfo:
735    """ELF symbol binding and type.  Type of the Sym.st_info field."""
736    def __init__(self, arg0, arg1=None):
737        if isinstance(arg0, int) and arg1 is None:
738            self.bind = Stb(arg0 >> 4)
739            self.type = Stt(arg0 & 15)
740        else:
741            self.bind = Stb(arg0)
742            self.type = Stt(arg1)
743
744    def value(self):
745        """Returns the raw value for the bind/type combination."""
746        return (self.bind.value() << 4) | (self.type.value())
747
748# Type in an ELF file.  Used for deserialization.
749_Layout = collections.namedtuple('_Layout', 'unpack size')
750
751def _define_layouts(baseclass: type, layout32: str, layout64: str,
752                    types=None, fields32=None):
753    """Assign variants dict to baseclass.
754
755    The variants dict is indexed by (ElfClass, ElfData) pairs, and its
756    values are _Layout instances.
757
758    """
759    struct32 = struct.Struct(layout32)
760    struct64 = struct.Struct(layout64)
761
762    # Check that the struct formats yield the right number of components.
763    for s in (struct32, struct64):
764        example = s.unpack(b' ' * s.size)
765        if len(example) != len(baseclass._fields):
766            raise ValueError('{!r} yields wrong field count: {} != {}'.format(
767                s.format, len(example),  len(baseclass._fields)))
768
769    # Check that field names in types are correct.
770    if types is None:
771        types = ()
772    for n in types:
773        if n not in baseclass._fields:
774            raise ValueError('{} does not have field {!r}'.format(
775                baseclass.__name__, n))
776
777    if fields32 is not None \
778       and set(fields32) != set(baseclass._fields):
779        raise ValueError('{!r} is not a permutation of the fields {!r}'.format(
780            fields32, baseclass._fields))
781
782    def unique_name(name, used_names = (set((baseclass.__name__,))
783                                        | set(baseclass._fields)
784                                        | {n.__name__
785                                           for n in (types or {}).values()})):
786        """Find a name that is not used for a class or field name."""
787        candidate = name
788        n = 0
789        while candidate in used_names:
790            n += 1
791            candidate = '{}{}'.format(name, n)
792        used_names.add(candidate)
793        return candidate
794
795    blob_name = unique_name('blob')
796    struct_unpack_name = unique_name('struct_unpack')
797    comps_name = unique_name('comps')
798
799    layouts = {}
800    for (bits, elfclass, layout, fields) in (
801            (32, ElfClass.ELFCLASS32, layout32, fields32),
802            (64, ElfClass.ELFCLASS64, layout64, None),
803    ):
804        for (elfdata, structprefix, funcsuffix) in (
805                (ElfData.ELFDATA2LSB, '<', 'LE'),
806                (ElfData.ELFDATA2MSB, '>', 'BE'),
807        ):
808            env = {
809                baseclass.__name__: baseclass,
810                struct_unpack_name: struct.unpack,
811            }
812
813            # Add the type converters.
814            if types:
815                for cls in types.values():
816                    env[cls.__name__] = cls
817
818            funcname = ''.join(
819                ('unpack_', baseclass.__name__, str(bits), funcsuffix))
820
821            code = '''
822def {funcname}({blob_name}):
823'''.format(funcname=funcname, blob_name=blob_name)
824
825            indent = ' ' * 4
826            unpack_call = '{}({!r}, {})'.format(
827                struct_unpack_name, structprefix + layout, blob_name)
828            field_names = ', '.join(baseclass._fields)
829            if types is None and fields is None:
830                code += '{}return {}({})\n'.format(
831                    indent, baseclass.__name__, unpack_call)
832            else:
833                # Destructuring tuple assignment.
834                if fields is None:
835                    code += '{}{} = {}\n'.format(
836                        indent, field_names, unpack_call)
837                else:
838                    # Use custom field order.
839                    code += '{}{} = {}\n'.format(
840                        indent, ', '.join(fields), unpack_call)
841
842                # Perform the type conversions.
843                for n in baseclass._fields:
844                    if n in types:
845                        code += '{}{} = {}({})\n'.format(
846                            indent, n, types[n].__name__, n)
847                # Create the named tuple.
848                code += '{}return {}({})\n'.format(
849                    indent, baseclass.__name__, field_names)
850
851            exec(code, env)
852            layouts[(elfclass, elfdata)] = _Layout(
853                env[funcname], struct.calcsize(layout))
854    baseclass.layouts = layouts
855
856
857# Corresponds to EI_* indices into Elf*_Ehdr.e_indent.
858class Ident(collections.namedtuple('Ident',
859    'ei_mag ei_class ei_data ei_version ei_osabi ei_abiversion ei_pad')):
860
861    def __new__(cls, *args):
862        """Construct an object from a blob or its constituent fields."""
863        if len(args) == 1:
864            return cls.unpack(args[0])
865        return cls.__base__.__new__(cls, *args)
866
867    @staticmethod
868    def unpack(blob: memoryview) -> 'Ident':
869        """Parse raws data into a tuple."""
870        ei_mag, ei_class, ei_data, ei_version, ei_osabi, ei_abiversion, \
871            ei_pad = struct.unpack('4s5B7s', blob)
872        return Ident(ei_mag, ElfClass(ei_class), ElfData(ei_data),
873                     ei_version, ei_osabi, ei_abiversion, ei_pad)
874    size = 16
875
876# Corresponds to Elf32_Ehdr and Elf64_Ehdr.
877Ehdr = collections.namedtuple('Ehdr',
878   'e_ident e_type e_machine e_version e_entry e_phoff e_shoff e_flags'
879    + ' e_ehsize e_phentsize e_phnum e_shentsize e_shnum e_shstrndx')
880_define_layouts(Ehdr,
881                layout32='16s2H5I6H',
882                layout64='16s2HI3QI6H',
883                types=dict(e_ident=Ident,
884                           e_machine=Machine,
885                           e_type=Et,
886                           e_shstrndx=Shn))
887
888# Corresponds to Elf32_Phdr and Elf64_Pdhr.  Order follows the latter.
889Phdr = collections.namedtuple('Phdr',
890    'p_type p_flags p_offset p_vaddr p_paddr p_filesz p_memsz p_align')
891_define_layouts(Phdr,
892                layout32='8I',
893                fields32=('p_type', 'p_offset', 'p_vaddr', 'p_paddr',
894                          'p_filesz', 'p_memsz', 'p_flags', 'p_align'),
895                layout64='2I6Q',
896            types=dict(p_type=Pt, p_flags=Pf))
897
898
899# Corresponds to Elf32_Shdr and Elf64_Shdr.
900class Shdr(collections.namedtuple('Shdr',
901    'sh_name sh_type sh_flags sh_addr sh_offset sh_size sh_link sh_info'
902    + ' sh_addralign sh_entsize')):
903    def resolve(self, strtab: 'StringTable') -> 'Shdr':
904        """Resolve sh_name using a string table."""
905        return self.__class__(strtab.get(self[0]), *self[1:])
906_define_layouts(Shdr,
907                layout32='10I',
908                layout64='2I4Q2I2Q',
909                types=dict(sh_type=Sht,
910                           sh_flags=Shf,
911                           sh_link=Shn))
912
913# Corresponds to Elf32_Dyn and Elf64_Dyn.  The nesting through the
914# d_un union is skipped, and d_ptr is missing (its representation in
915# Python would be identical to d_val).
916Dyn = collections.namedtuple('Dyn', 'd_tag d_val')
917_define_layouts(Dyn,
918                layout32='2i',
919                layout64='2q',
920                types=dict(d_tag=Dt))
921
922# Corresponds to Elf32_Sym and Elf64_Sym.
923class Sym(collections.namedtuple('Sym',
924    'st_name st_info st_other st_shndx st_value st_size')):
925    def resolve(self, strtab: 'StringTable') -> 'Sym':
926        """Resolve st_name using a string table."""
927        return self.__class__(strtab.get(self[0]), *self[1:])
928_define_layouts(Sym,
929                layout32='3I2BH',
930                layout64='I2BH2Q',
931                fields32=('st_name', 'st_value', 'st_size', 'st_info',
932                          'st_other', 'st_shndx'),
933                types=dict(st_shndx=Shn,
934                           st_info=StInfo))
935
936# Corresponds to Elf32_Rel and Elf64_Rel.
937Rel = collections.namedtuple('Rel', 'r_offset r_info')
938_define_layouts(Rel,
939                layout32='2I',
940                layout64='2Q')
941
942# Corresponds to Elf32_Rel and Elf64_Rel.
943Rela = collections.namedtuple('Rela', 'r_offset r_info r_addend')
944_define_layouts(Rela,
945                layout32='3I',
946                layout64='3Q')
947
948class StringTable:
949    """ELF string table."""
950    def __init__(self, blob):
951        """Create a new string table backed by the data in the blob.
952
953        blob: a memoryview-like object
954
955        """
956        self.blob = blob
957
958    def get(self, index) -> bytes:
959        """Returns the null-terminated byte string at the index."""
960        blob = self.blob
961        endindex = index
962        while True:
963            if blob[endindex] == 0:
964                return bytes(blob[index:endindex])
965            endindex += 1
966
967class Image:
968    """ELF image parser."""
969    def __init__(self, image):
970        """Create an ELF image from binary image data.
971
972        image: a memoryview-like object that supports efficient range
973        subscripting.
974
975        """
976        self.image = image
977        ident = self.read(Ident, 0)
978        classdata = (ident.ei_class, ident.ei_data)
979        # Set self.Ehdr etc. to the subtypes with the right parsers.
980        for typ in (Ehdr, Phdr, Shdr, Dyn, Sym, Rel, Rela):
981            setattr(self, typ.__name__, typ.layouts.get(classdata, None))
982
983        if self.Ehdr is not None:
984            self.ehdr = self.read(self.Ehdr, 0)
985            self._shdr_num = self._compute_shdr_num()
986        else:
987            self.ehdr = None
988            self._shdr_num = 0
989
990        self._section = {}
991        self._stringtab = {}
992
993        if self._shdr_num > 0:
994            self._shdr_strtab = self._find_shdr_strtab()
995        else:
996            self._shdr_strtab = None
997
998    @staticmethod
999    def readfile(path: str) -> 'Image':
1000        """Reads the ELF file at the specified path."""
1001        with open(path, 'rb') as inp:
1002            return Image(memoryview(inp.read()))
1003
1004    def _compute_shdr_num(self) -> int:
1005        """Computes the actual number of section headers."""
1006        shnum = self.ehdr.e_shnum
1007        if shnum == 0:
1008            if self.ehdr.e_shoff == 0 or self.ehdr.e_shentsize == 0:
1009                # No section headers.
1010                return 0
1011            # Otherwise the extension mechanism is used (which may be
1012            # needed because e_shnum is just 16 bits).
1013            return self.read(self.Shdr, self.ehdr.e_shoff).sh_size
1014        return shnum
1015
1016    def _find_shdr_strtab(self) -> StringTable:
1017        """Finds the section header string table (maybe via extensions)."""
1018        shstrndx = self.ehdr.e_shstrndx
1019        if shstrndx == Shn.SHN_XINDEX:
1020            shstrndx = self.read(self.Shdr, self.ehdr.e_shoff).sh_link
1021        return self._find_stringtab(shstrndx)
1022
1023    def read(self, typ: type, offset:int ):
1024        """Reads an object at a specific offset.
1025
1026        The type must have been enhanced using _define_variants.
1027
1028        """
1029        return typ.unpack(self.image[offset: offset + typ.size])
1030
1031    def phdrs(self) -> Phdr:
1032        """Generator iterating over the program headers."""
1033        if self.ehdr is None:
1034            return
1035        size = self.ehdr.e_phentsize
1036        if size != self.Phdr.size:
1037            raise ValueError('Unexpected Phdr size in ELF header: {} != {}'
1038                             .format(size, self.Phdr.size))
1039
1040        offset = self.ehdr.e_phoff
1041        for _ in range(self.ehdr.e_phnum):
1042            yield self.read(self.Phdr, offset)
1043            offset += size
1044
1045    def shdrs(self, resolve: bool=True) -> Shdr:
1046        """Generator iterating over the section headers.
1047
1048        If resolve, section names are automatically translated
1049        using the section header string table.
1050
1051        """
1052        if self._shdr_num == 0:
1053            return
1054
1055        size = self.ehdr.e_shentsize
1056        if size != self.Shdr.size:
1057            raise ValueError('Unexpected Shdr size in ELF header: {} != {}'
1058                             .format(size, self.Shdr.size))
1059
1060        offset = self.ehdr.e_shoff
1061        for _ in range(self._shdr_num):
1062            shdr = self.read(self.Shdr, offset)
1063            if resolve:
1064                shdr = shdr.resolve(self._shdr_strtab)
1065            yield shdr
1066            offset += size
1067
1068    def dynamic(self) -> Dyn:
1069        """Generator iterating over the dynamic segment."""
1070        for phdr in self.phdrs():
1071            if phdr.p_type == Pt.PT_DYNAMIC:
1072                # Pick the first dynamic segment, like the loader.
1073                if phdr.p_filesz == 0:
1074                    # Probably separated debuginfo.
1075                    return
1076                offset = phdr.p_offset
1077                end = offset + phdr.p_memsz
1078                size = self.Dyn.size
1079                while True:
1080                    next_offset = offset + size
1081                    if next_offset > end:
1082                        raise ValueError(
1083                            'Dynamic segment size {} is not a multiple of Dyn size {}'.format(
1084                                phdr.p_memsz, size))
1085                    yield self.read(self.Dyn, offset)
1086                    if next_offset == end:
1087                        return
1088                    offset = next_offset
1089
1090    def syms(self, shdr: Shdr, resolve: bool=True) -> Sym:
1091        """A generator iterating over a symbol table.
1092
1093        If resolve, symbol names are automatically translated using
1094        the string table for the symbol table.
1095
1096        """
1097        assert shdr.sh_type == Sht.SHT_SYMTAB
1098        size = shdr.sh_entsize
1099        if size != self.Sym.size:
1100            raise ValueError('Invalid symbol table entry size {}'.format(size))
1101        offset = shdr.sh_offset
1102        end = shdr.sh_offset + shdr.sh_size
1103        if resolve:
1104            strtab = self._find_stringtab(shdr.sh_link)
1105        while offset < end:
1106            sym = self.read(self.Sym, offset)
1107            if resolve:
1108                sym = sym.resolve(strtab)
1109            yield sym
1110            offset += size
1111        if offset != end:
1112            raise ValueError('Symbol table is not a multiple of entry size')
1113
1114    def lookup_string(self, strtab_index: int, strtab_offset: int) -> bytes:
1115        """Looks up a string in a string table identified by its link index."""
1116        try:
1117            strtab = self._stringtab[strtab_index]
1118        except KeyError:
1119            strtab = self._find_stringtab(strtab_index)
1120        return strtab.get(strtab_offset)
1121
1122    def find_section(self, shndx: Shn) -> Shdr:
1123        """Returns the section header for the indexed section.
1124
1125        The section name is not resolved.
1126        """
1127        try:
1128            return self._section[shndx]
1129        except KeyError:
1130            pass
1131        if shndx in Shn:
1132            raise ValueError('Reserved section index {}'.format(shndx))
1133        idx = shndx.value
1134        if idx < 0 or idx > self._shdr_num:
1135            raise ValueError('Section index {} out of range [0, {})'.format(
1136                idx, self._shdr_num))
1137        shdr = self.read(
1138            self.Shdr, self.ehdr.e_shoff + idx * self.Shdr.size)
1139        self._section[shndx] = shdr
1140        return shdr
1141
1142    def _find_stringtab(self, sh_link: int) -> StringTable:
1143        if sh_link in self._stringtab:
1144            return self._stringtab
1145        if sh_link < 0 or sh_link >= self._shdr_num:
1146            raise ValueError('Section index {} out of range [0, {})'.format(
1147                sh_link, self._shdr_num))
1148        shdr = self.read(
1149            self.Shdr, self.ehdr.e_shoff + sh_link * self.Shdr.size)
1150        if shdr.sh_type != Sht.SHT_STRTAB:
1151            raise ValueError(
1152                'Section {} is not a string table: {}'.format(
1153                    sh_link, shdr.sh_type))
1154        strtab = StringTable(
1155            self.image[shdr.sh_offset:shdr.sh_offset + shdr.sh_size])
1156        # This could retrain essentially arbitrary amounts of data,
1157        # but caching string tables seems important for performance.
1158        self._stringtab[sh_link] = strtab
1159        return strtab
1160
1161
1162__all__ = [name for name in dir() if name[0].isupper()]
1163