1#!/usr/bin/python3
2# Check that use of symbols declared in a given header does not result
3# in any symbols being brought in that are not reserved with external
4# linkage for the given standard.
5# Copyright (C) 2014-2022 Free Software Foundation, Inc.
6# This file is part of the GNU C Library.
7#
8# The GNU C Library is free software; you can redistribute it and/or
9# modify it under the terms of the GNU Lesser General Public
10# License as published by the Free Software Foundation; either
11# version 2.1 of the License, or (at your option) any later version.
12#
13# The GNU C Library is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# Lesser General Public License for more details.
17#
18# You should have received a copy of the GNU Lesser General Public
19# License along with the GNU C Library; if not, see
20# <https://www.gnu.org/licenses/>.
21
22import argparse
23from collections import defaultdict
24import os.path
25import re
26import subprocess
27import sys
28import tempfile
29
30import glibcconform
31
32# The following whitelisted symbols are also allowed for now.
33#
34# * Bug 17576: stdin, stdout, stderr only reserved with external
35# linkage when stdio.h included (and possibly not then), not
36# generally.
37#
38# * Bug 18442: re_syntax_options wrongly brought in by regcomp and
39# used by re_comp.
40#
41WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
42
43
44def list_syms(filename):
45    """Return information about GLOBAL and WEAK symbols listed in readelf
46    -s output."""
47    ret = []
48    cur_file = filename
49    with open(filename, 'r') as syms_file:
50        for line in syms_file:
51            line = line.rstrip()
52            if line.startswith('File: '):
53                cur_file = line[len('File: '):]
54                cur_file = cur_file.split('/')[-1]
55                continue
56            # Architecture-specific st_other bits appear inside [] and
57            # disrupt the format of readelf output.
58            line = re.sub(r'\[.*?\]', '', line)
59            fields = line.split()
60            if len(fields) < 8:
61                continue
62            bind = fields[4]
63            ndx = fields[6]
64            sym = fields[7]
65            if bind not in ('GLOBAL', 'WEAK'):
66                continue
67            if not re.fullmatch('[A-Za-z0-9_]+', sym):
68                continue
69            ret.append((cur_file, sym, bind, ndx != 'UND'))
70    return ret
71
72
73def main():
74    """The main entry point."""
75    parser = argparse.ArgumentParser(description='Check link-time namespace.')
76    parser.add_argument('--header', metavar='HEADER',
77                        help='name of header')
78    parser.add_argument('--standard', metavar='STD',
79                        help='standard to use when processing header')
80    parser.add_argument('--cc', metavar='CC',
81                        help='C compiler to use')
82    parser.add_argument('--flags', metavar='CFLAGS',
83                        help='Compiler flags to use with CC')
84    parser.add_argument('--stdsyms', metavar='FILE',
85                        help='File with list of standard symbols')
86    parser.add_argument('--libsyms', metavar='FILE',
87                        help='File with symbol information from libraries')
88    parser.add_argument('--readelf', metavar='READELF',
89                        help='readelf program to use')
90    args = parser.parse_args()
91
92    # Load the list of symbols that are OK.
93    stdsyms = set()
94    with open(args.stdsyms, 'r') as stdsyms_file:
95        for line in stdsyms_file:
96            stdsyms.add(line.rstrip())
97    stdsyms |= WHITELIST
98
99    # Load information about GLOBAL and WEAK symbols defined or used
100    # in the standard libraries.
101    # Symbols from a given object, except for weak defined symbols.
102    seen_syms = defaultdict(list)
103    # Strong undefined symbols from a given object.
104    strong_undef_syms = defaultdict(list)
105    # Objects defining a given symbol (strongly or weakly).
106    sym_objs = defaultdict(list)
107    for file, name, bind, defined in list_syms(args.libsyms):
108        if defined:
109            sym_objs[name].append(file)
110        if bind == 'GLOBAL' or not defined:
111            seen_syms[file].append(name)
112        if bind == 'GLOBAL' and not defined:
113            strong_undef_syms[file].append(name)
114
115    # Determine what ELF-level symbols are brought in by use of C-level
116    # symbols declared in the given header.
117    #
118    # The rules followed are heuristic and so may produce false
119    # positives and false negatives.
120    #
121    # * All undefined symbols are considered of signficance, but it is
122    # possible that (a) any standard library definition is weak, so
123    # can be overridden by the user's definition, and (b) the symbol
124    # is only used conditionally and not if the program is limited to
125    # standard functionality.
126    #
127    # * If a symbol reference is only brought in by the user using a
128    # data symbol rather than a function from the standard library,
129    # this will not be detected.
130    #
131    # * If a symbol reference is only brought in by crt*.o or libgcc,
132    # this will not be detected.
133    #
134    # * If a symbol reference is only brought in through __builtin_foo
135    # in a standard macro being compiled to call foo, this will not be
136    # detected.
137    #
138    # * Header inclusions should be compiled several times with
139    # different options such as -O2, -D_FORTIFY_SOURCE and
140    # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
141    # from such a compilation; this is not yet implemented.
142    #
143    # * This script finds symbols referenced through use of macros on
144    # the basis that if a macro calls an internal function, that
145    # function must also be declared in the header.  However, the
146    # header might also declare implementation-namespace functions
147    # that are not called by any standard macro in the header,
148    # resulting in false positives for any symbols brought in only
149    # through use of those implementation-namespace functions.
150    #
151    # * Namespace issues can apply for dynamic linking as well as
152    # static linking, when a call is from one shared library to
153    # another or uses a PLT entry for a call within a shared library;
154    # such issues are only detected by this script if the same
155    # namespace issue applies for static linking.
156    seen_where = {}
157    files_seen = set()
158    all_undef = {}
159    current_undef = {}
160    compiler = '%s %s' % (args.cc, args.flags)
161    c_syms = glibcconform.list_exported_functions(compiler, args.standard,
162                                                  args.header)
163    with tempfile.TemporaryDirectory() as temp_dir:
164        cincfile_name = os.path.join(temp_dir, 'undef.c')
165        cincfile_o_name = os.path.join(temp_dir, 'undef.o')
166        cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
167        cincfile_text = ('#include <%s>\n%s\n'
168                         % (args.header,
169                            '\n'.join('void *__glibc_test_%s = (void *) &%s;'
170                                      % (sym, sym) for sym in sorted(c_syms))))
171        with open(cincfile_name, 'w') as cincfile:
172            cincfile.write(cincfile_text)
173        cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
174               % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
175                  cincfile_name, cincfile_o_name))
176        subprocess.check_call(cmd, shell=True)
177        cmd = ('LC_ALL=C %s -W -s %s > %s'
178               % (args.readelf, cincfile_o_name, cincfile_sym_name))
179        subprocess.check_call(cmd, shell=True)
180        for file, name, bind, defined in list_syms(cincfile_sym_name):
181            if bind == 'GLOBAL' and not defined:
182                sym_text = '[initial] %s' % name
183                seen_where[name] = sym_text
184                all_undef[name] = sym_text
185                current_undef[name] = sym_text
186
187    while current_undef:
188        new_undef = {}
189        for sym, cu_sym in sorted(current_undef.items()):
190            for file in sym_objs[sym]:
191                if file in files_seen:
192                    continue
193                files_seen.add(file)
194                for ssym in seen_syms[file]:
195                    if ssym not in seen_where:
196                        seen_where[ssym] = ('%s -> [%s] %s'
197                                            % (cu_sym, file, ssym))
198                for usym in strong_undef_syms[file]:
199                    if usym not in all_undef:
200                        usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
201                        all_undef[usym] = usym_text
202                        new_undef[usym] = usym_text
203        current_undef = new_undef
204
205    ret = 0
206    for sym in sorted(seen_where):
207        if sym.startswith('_'):
208            continue
209        if sym in stdsyms:
210            continue
211        print(seen_where[sym])
212        ret = 1
213    sys.exit(ret)
214
215
216if __name__ == '__main__':
217    main()
218