1#!/usr/bin/python3 2# Check that use of symbols declared in a given header does not result 3# in any symbols being brought in that are not reserved with external 4# linkage for the given standard. 5# Copyright (C) 2014-2022 Free Software Foundation, Inc. 6# This file is part of the GNU C Library. 7# 8# The GNU C Library is free software; you can redistribute it and/or 9# modify it under the terms of the GNU Lesser General Public 10# License as published by the Free Software Foundation; either 11# version 2.1 of the License, or (at your option) any later version. 12# 13# The GNU C Library is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16# Lesser General Public License for more details. 17# 18# You should have received a copy of the GNU Lesser General Public 19# License along with the GNU C Library; if not, see 20# <https://www.gnu.org/licenses/>. 21 22import argparse 23from collections import defaultdict 24import os.path 25import re 26import subprocess 27import sys 28import tempfile 29 30import glibcconform 31 32# The following whitelisted symbols are also allowed for now. 33# 34# * Bug 17576: stdin, stdout, stderr only reserved with external 35# linkage when stdio.h included (and possibly not then), not 36# generally. 37# 38# * Bug 18442: re_syntax_options wrongly brought in by regcomp and 39# used by re_comp. 40# 41WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'} 42 43 44def list_syms(filename): 45 """Return information about GLOBAL and WEAK symbols listed in readelf 46 -s output.""" 47 ret = [] 48 cur_file = filename 49 with open(filename, 'r') as syms_file: 50 for line in syms_file: 51 line = line.rstrip() 52 if line.startswith('File: '): 53 cur_file = line[len('File: '):] 54 cur_file = cur_file.split('/')[-1] 55 continue 56 # Architecture-specific st_other bits appear inside [] and 57 # disrupt the format of readelf output. 58 line = re.sub(r'\[.*?\]', '', line) 59 fields = line.split() 60 if len(fields) < 8: 61 continue 62 bind = fields[4] 63 ndx = fields[6] 64 sym = fields[7] 65 if bind not in ('GLOBAL', 'WEAK'): 66 continue 67 if not re.fullmatch('[A-Za-z0-9_]+', sym): 68 continue 69 ret.append((cur_file, sym, bind, ndx != 'UND')) 70 return ret 71 72 73def main(): 74 """The main entry point.""" 75 parser = argparse.ArgumentParser(description='Check link-time namespace.') 76 parser.add_argument('--header', metavar='HEADER', 77 help='name of header') 78 parser.add_argument('--standard', metavar='STD', 79 help='standard to use when processing header') 80 parser.add_argument('--cc', metavar='CC', 81 help='C compiler to use') 82 parser.add_argument('--flags', metavar='CFLAGS', 83 help='Compiler flags to use with CC') 84 parser.add_argument('--stdsyms', metavar='FILE', 85 help='File with list of standard symbols') 86 parser.add_argument('--libsyms', metavar='FILE', 87 help='File with symbol information from libraries') 88 parser.add_argument('--readelf', metavar='READELF', 89 help='readelf program to use') 90 args = parser.parse_args() 91 92 # Load the list of symbols that are OK. 93 stdsyms = set() 94 with open(args.stdsyms, 'r') as stdsyms_file: 95 for line in stdsyms_file: 96 stdsyms.add(line.rstrip()) 97 stdsyms |= WHITELIST 98 99 # Load information about GLOBAL and WEAK symbols defined or used 100 # in the standard libraries. 101 # Symbols from a given object, except for weak defined symbols. 102 seen_syms = defaultdict(list) 103 # Strong undefined symbols from a given object. 104 strong_undef_syms = defaultdict(list) 105 # Objects defining a given symbol (strongly or weakly). 106 sym_objs = defaultdict(list) 107 for file, name, bind, defined in list_syms(args.libsyms): 108 if defined: 109 sym_objs[name].append(file) 110 if bind == 'GLOBAL' or not defined: 111 seen_syms[file].append(name) 112 if bind == 'GLOBAL' and not defined: 113 strong_undef_syms[file].append(name) 114 115 # Determine what ELF-level symbols are brought in by use of C-level 116 # symbols declared in the given header. 117 # 118 # The rules followed are heuristic and so may produce false 119 # positives and false negatives. 120 # 121 # * All undefined symbols are considered of signficance, but it is 122 # possible that (a) any standard library definition is weak, so 123 # can be overridden by the user's definition, and (b) the symbol 124 # is only used conditionally and not if the program is limited to 125 # standard functionality. 126 # 127 # * If a symbol reference is only brought in by the user using a 128 # data symbol rather than a function from the standard library, 129 # this will not be detected. 130 # 131 # * If a symbol reference is only brought in by crt*.o or libgcc, 132 # this will not be detected. 133 # 134 # * If a symbol reference is only brought in through __builtin_foo 135 # in a standard macro being compiled to call foo, this will not be 136 # detected. 137 # 138 # * Header inclusions should be compiled several times with 139 # different options such as -O2, -D_FORTIFY_SOURCE and 140 # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined 141 # from such a compilation; this is not yet implemented. 142 # 143 # * This script finds symbols referenced through use of macros on 144 # the basis that if a macro calls an internal function, that 145 # function must also be declared in the header. However, the 146 # header might also declare implementation-namespace functions 147 # that are not called by any standard macro in the header, 148 # resulting in false positives for any symbols brought in only 149 # through use of those implementation-namespace functions. 150 # 151 # * Namespace issues can apply for dynamic linking as well as 152 # static linking, when a call is from one shared library to 153 # another or uses a PLT entry for a call within a shared library; 154 # such issues are only detected by this script if the same 155 # namespace issue applies for static linking. 156 seen_where = {} 157 files_seen = set() 158 all_undef = {} 159 current_undef = {} 160 compiler = '%s %s' % (args.cc, args.flags) 161 c_syms = glibcconform.list_exported_functions(compiler, args.standard, 162 args.header) 163 with tempfile.TemporaryDirectory() as temp_dir: 164 cincfile_name = os.path.join(temp_dir, 'undef.c') 165 cincfile_o_name = os.path.join(temp_dir, 'undef.o') 166 cincfile_sym_name = os.path.join(temp_dir, 'undef.sym') 167 cincfile_text = ('#include <%s>\n%s\n' 168 % (args.header, 169 '\n'.join('void *__glibc_test_%s = (void *) &%s;' 170 % (sym, sym) for sym in sorted(c_syms)))) 171 with open(cincfile_name, 'w') as cincfile: 172 cincfile.write(cincfile_text) 173 cmd = ('%s %s -D_ISOMAC %s -c %s -o %s' 174 % (args.cc, args.flags, glibcconform.CFLAGS[args.standard], 175 cincfile_name, cincfile_o_name)) 176 subprocess.check_call(cmd, shell=True) 177 cmd = ('LC_ALL=C %s -W -s %s > %s' 178 % (args.readelf, cincfile_o_name, cincfile_sym_name)) 179 subprocess.check_call(cmd, shell=True) 180 for file, name, bind, defined in list_syms(cincfile_sym_name): 181 if bind == 'GLOBAL' and not defined: 182 sym_text = '[initial] %s' % name 183 seen_where[name] = sym_text 184 all_undef[name] = sym_text 185 current_undef[name] = sym_text 186 187 while current_undef: 188 new_undef = {} 189 for sym, cu_sym in sorted(current_undef.items()): 190 for file in sym_objs[sym]: 191 if file in files_seen: 192 continue 193 files_seen.add(file) 194 for ssym in seen_syms[file]: 195 if ssym not in seen_where: 196 seen_where[ssym] = ('%s -> [%s] %s' 197 % (cu_sym, file, ssym)) 198 for usym in strong_undef_syms[file]: 199 if usym not in all_undef: 200 usym_text = '%s -> [%s] %s' % (cu_sym, file, usym) 201 all_undef[usym] = usym_text 202 new_undef[usym] = usym_text 203 current_undef = new_undef 204 205 ret = 0 206 for sym in sorted(seen_where): 207 if sym.startswith('_'): 208 continue 209 if sym in stdsyms: 210 continue 211 print(seen_where[sym]) 212 ret = 1 213 sys.exit(ret) 214 215 216if __name__ == '__main__': 217 main() 218