1#!/usr/bin/perl
2# Generate the Summary of Library Facilities (summary.texi).
3
4# Copyright (C) 2017-2022 Free Software Foundation, Inc.
5# This file is part of the GNU C Library.
6
7# The GNU C Library is free software; you can redistribute it and/or
8# modify it under the terms of the GNU Lesser General Public License
9# as published by the Free Software Foundation; either version 2.1 of
10# the License, or (at your option) any later version.
11
12# The GNU C Library is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15# Lesser General Public License for more details.
16
17# You should have received a copy of the GNU Lesser General Public
18# License along with the GNU C Library; if not, see
19# <https://www.gnu.org/licenses/>.
20
21# Anything declared in a header or defined in a standard should have
22# its origins annotated using the @standards macro (see macro.texi).
23# This script checks all such elements in the manual (generally,
24# @def|item*-commands), ensuring annotations are present and correct.
25# If any errors are detected, they are all reported at the end and
26# failure is indicated.
27
28use strict;
29use warnings;
30use locale;
31use File::Basename;
32
33$| = 1;
34my $script = basename $0;
35
36&help if $ARGV[0] eq "--help"; # Will exit(0).
37
38my @texis = @ARGV;
39
40# Various regexes.
41my $nde = qr/^\@node /;
42my $def = qr/^\@def/;
43my $itm = qr/^\@item /;
44my $itms = qr/^\@itemx? /; # Don't match @itemize.
45my $ann = qr/^\@(def\w+|item)x? /; # Annotatable.
46my $std = qr/^\@standards\{/;
47my $stx = qr/^\@standardsx\{/;
48my $stds = qr/^\@standardsx?\{/;
49my $strict_std = qr/^\@standards\{([^,]+, )[^,\}]+\}$/;
50my $strict_stx = qr/^\@standardsx\{([^,]+, ){2}[^,\}]+\}$/;
51my $lcon = qr/([vf]?table|itemize|enumerate)/;
52my $list = qr/^\@${lcon}/;
53my $endl = qr/^\@end ${lcon}/;
54my $ign = qr/^\@ignore/;
55my $eig = qr/^\@end ignore/;
56
57# Global scope.
58my $node;
59our $texi;
60my $input;
61my %entries;
62my %errors;
63
64for $texi (@texis) {
65    open $input, '<', $texi or die "open $texi: $!";
66    while (my $line = <$input>) {
67	if ($line =~ $nde) {
68	    $node = &get_node($line);
69	} elsif ($line =~ $def) {
70	    &process_annotation($line);
71	} elsif ($line =~ $list) {
72	    &process_list($1); # @items occur in list or table context.
73	} elsif ($line =~ $stds) {
74	    &record_error("Misplaced annotation", ["[$.] ".$line]);
75	} elsif ($line =~ $ign) {
76	    while (<$input> !~ $eig) {}
77	}
78    }
79    close $input or die "close $texi: $!";
80}
81
82# Disabled until annotations are complete.
83&print_errors() if %errors && 0; # Will exit(1).
84
85print("\@c DO NOT EDIT THIS FILE!\n".
86      "\@c This file is generated by $script from the Texinfo sources.\n".
87      "\@c The \@items are \@include'd from a \@table in header.texi.\n\n");
88
89&print_entry($_) for sort keys %entries;
90
91# Processes an annotatable element, including any subsequent elements
92# in an @*x chain, ensuring @standards are present, with valid syntax,
93# either recording any errors detected or creating Summary entries.
94# This function is the heart of the script.
95#
96# Prototypes and standards are gathered into separate lists and used
97# to evaluate the completeness and correctness of annotations before
98# generating the Summary entries.  "Prototype" is used to refer to an
99# element's entire definition while avoiding conflation with
100# @def*-commands.  "Element" is strictly used here to refer to the
101# name extracted from the prototype, as used in @standardsx, for
102# sorting the Summary.
103sub process_annotation
104{
105    my $line = shift;
106    my (@prototypes, @standards, $i, @tmp);
107
108    # Gather prototypes and standards.
109    push @prototypes, $line;
110    while ($line = <$input>) {
111	last if $line !~ $ann;
112	push @prototypes, $line;
113    }
114    if ($line !~ $stds) { # The fundamental error.
115	return &record_error('Missing annotation', \@prototypes);
116    }
117    push @standards, $line;
118    push @standards, $line while ($line = <$input>) =~ $stds;
119
120    # If next line is an @item, seek back to catch it on the next
121    # iteration.  This avoids imposing a non-Texinfo syntax
122    # requirement of blank lines between consecutive annotated @items.
123    if ($line =~ $itm) {
124	seek $input, -length($line), 1 or die "seek: $!";
125    }
126
127    # Strict check for syntax errors.  Other matches are loose, which
128    # aids error detection and reporting by ensuring things that look
129    # like standards aren't simply passed over, but caught here.
130    for ($i=0; $i<@standards; ++$i) {
131	my $standard = $standards[$i];
132	if ($standard !~ $strict_std && $standard !~ $strict_stx) {
133	    push @tmp, $standard;
134	}
135    }
136    return &record_error('Invalid syntax', \@tmp) if @tmp;
137
138    # @standardsx should not be in non-@*x chains.
139    if (@prototypes == 1) {
140	for ($i=0; $i<@standards; ++$i) {
141	    return &record_error('Misplaced @standardsx', \@prototypes)
142		if $standards[$i] =~ $stx;
143	}
144    }
145    # @standards may only occur once in @*x chains, at the beginning.
146    if (@prototypes > 1) {
147	for ($i=1; $i<@standards; ++$i) {
148	    return &record_error('Misplaced @standards', \@prototypes)
149		if $standards[$i] =~ $std;
150	}
151    }
152
153    # The @standards are aligned.
154    &add_entries(\@prototypes, \@standards);
155}
156
157# Goes through the prototypes, cleaning them up and extracting the
158# elements, pairing them with the appropriate annotations to create
159# Summary entries.
160sub add_entries
161{
162    my ($prototypes, $standards) = @_;
163    my $isx = @{$prototypes} > 1 ? 1 : 0;
164    my $allx = $standards->[0] =~ $stx ? 1 : 0;
165    my ($defstd, $defhdr, %standardsx, $i, $j);
166
167    # Grab the default annotation and index any @standardsx.  Take
168    # care in case there is no default.
169    if ($isx) {
170	if (!$allx) {
171	    ($defstd, $defhdr)
172		= $standards->[0] =~ /${std}([^,]+), (.*)\}$/;
173	}
174	for ($i = $allx ? 0 : 1; $i<@{$standards}; ++$i) {
175	    my ($e, $s, $h)
176		= $standards->[$i] =~ /${stx}([^,]+), ([^,]+), (.*)\}$/;
177	    push @{$standardsx{$e}{hs}}, [$h, $s];
178	}
179    }
180
181    for ($i=0; $i<@{$prototypes}; ++$i) {
182	my $e = &get_element($prototypes->[$i]);
183	my $p = &get_prototype($prototypes->[$i]);
184	my ($s, $h);
185	if ($isx && exists $standardsx{$e}) {
186	    for ($j=0; $j<@{$standardsx{$e}{hs}}; ++$j) {
187		$h = $standardsx{$e}{hs}[$j]->[0];
188		$s = $standardsx{$e}{hs}[$j]->[1];
189		&record_entry($e, $p, $h, $s, $node);
190		++$standardsx{$e}{seen};
191	    }
192	} elsif ($isx && $allx) {
193	    &record_error('Missing annotation', [$prototypes->[$i]]);
194	} elsif ($isx) {
195	    &record_entry($e, $p, $defhdr, $defstd, $node);
196	} else {
197	    for ($j=0; $j<@{$standards}; ++$j) {
198		($s, $h) = $standards->[$j] =~ /${std}([^,]+), ([^,\}]+)\}$/;
199		&record_entry($e, $p, $h, $s, $node);
200	    }
201	}
202    }
203
204    # Check if there were any unmatched @standardsx.
205    for my $e (keys %standardsx) {
206	if (!exists $standardsx{$e}{seen}) {
207	    &record_error('Spurious @standardsx', [$e."\n"])
208	}
209    }
210}
211
212# Stores a Summary entry in %entries.  May be called multiple times
213# per element if multiple header and standard annotations exist.  Also
214# keys on prototypes, as some elements have multiple prototypes.  See
215# isnan in arith.texi for one example.
216sub record_entry
217{
218    my ($ele, $proto, $hdr, $std, $node) = @_;
219    push @{$entries{$ele}{$proto}}, [$hdr, $std, $node];
220}
221
222# Processes list or table contexts, with nesting.
223sub process_list
224{
225    my $type = shift;
226    my $in_vtbl = $type eq "vtable" ? 1 : 0;
227
228    while (my $line = <$input>) {
229	if ($line =~ $itms) {
230	    next if ! $in_vtbl; # Not an annotatable context.
231	    &process_annotation($line);
232	} elsif ($line =~ $def) {
233	    &process_annotation($line);
234	} elsif ($line =~ $stds) {
235	    &record_error('Misplaced annotation', ["[$.] ".$line]);
236	} elsif ($line =~ $endl) {
237	    return; # All done.
238	} elsif ($line =~ $list) {
239	    &process_list($1); # Nested list.
240	}
241    }
242}
243
244# Returns the current node from an @node line.  Used for referencing
245# from the Summary.
246sub get_node
247{
248    my $line = shift;
249    chomp $line;
250    $line =~ s/$nde//;
251    my ($n) = split ',', $line;
252    return $n
253}
254
255# Returns the cleaned up prototype from @def|item* lines.
256sub get_prototype
257{
258    my $dfn = shift;
259    chomp $dfn;
260    $dfn =~ s/\s+/ /g; # Collapse whitespace.
261    $dfn =~ s/ \{([^\}]*)\} / $1 /g; # Remove grouping braces.
262    $dfn =~ s/^\@\S+ //; # Remove @-command.
263    $dfn =~ s/^Macro //i; # Scrape off cruft...
264    $dfn =~ s/^Data Type //i;
265    $dfn =~ s/^Variable //i;
266    $dfn =~ s/^Deprecated Function //i;
267    $dfn =~ s/^SVID Macro //i;
268    $dfn =~ s/^Obsolete function //i;
269    $dfn =~ s/^Constant //i;
270    $dfn =~ s/^Type //i;
271    $dfn =~ s/^Function //i;
272    $dfn =~ s/^\{(.*)\}$/$1/; # Debrace yourself.
273    $dfn =~ s/^\{([^\}]*)\} /$1 /; # These ones too.
274    return $dfn;
275}
276
277# Returns an annotated element's name.
278#
279# Takes a line defining an annotatable element (e.g., @def|item*),
280# splitting it on whitespace.  The element is generally detected as
281# the member immediately preceding the first parenthesized expression
282# (e.g., a function), or the last token in the list.  Some additional
283# cleanup is applied to the element before returning it.
284sub get_element
285{
286    my $i = 0;
287    my @toks = split /\s+/, shift;
288    # tzname array uses '['; don't match function pointers.
289    ++$i while $toks[$i] && $toks[$i] !~ /^[\(\[](?!\*)/;
290    $toks[$i-1] =~ s/^\*//; # Strip pointer type syntax.
291    $toks[$i-1] =~ s/^\{?([^\}]+)\}?$/$1/; # Strip braces.
292    $toks[$i-1] =~ s/^\(\*([^\)]+)\)$/$1/; # Function pointers.
293    return $toks[$i-1];
294}
295
296# Records syntax errors detected in the manual related to @standards.
297# The @def|item*s are grouped by file, then errors, to make it easier
298# to track down exactly where and what the problems are.
299sub record_error
300{
301    my ($err, $list) = @_;
302    push @{$errors{$texi}{$err}}, $_ for (@{$list});
303    return 0;
304}
305
306# Reports all detected errors and exits with failure.  Indentation is
307# used for readability, and "ERROR" is used for visibility.
308sub print_errors
309{
310    for $texi (sort keys %errors) {
311	print STDERR "ERRORS in $texi:\n";
312	for my $err (sort keys %{$errors{$texi}}) {
313	    print STDERR "  $err:\n";
314	    print STDERR "    $_" for (@{$errors{$texi}{$err}});
315	}
316    }
317    print(STDERR "\nFor a description of expected syntax, see ".
318	  "\`$script --help'\n\n");
319    exit 1;
320}
321
322# Prints an entry in the Summary.
323#
324# All the blank lines in summary.texi may seem strange at first, but
325# they have significant impact on how Texinfo renders the output.
326# Essentially, each line is its own paragraph.  There is a @comment
327# with the element name, arguably unnecessary, but useful for seeing
328# the sorting order and extracted element names, and maintains the
329# format established by summary.awk.  Each @item in the @table is the
330# prototype, which may be anything from just a variable name to a
331# function declaration.  The body of each @item contains lines
332# annotating the headers and standards each element is declared
333# in/comes from, with a reference to the @node documenting the element
334# wrt. each header and standard combination.
335sub print_entry
336{
337    my $element = shift;
338    for my $prototype (sort keys %{$entries{$element}}) {
339	print "\@comment $element\n\@item $prototype\n\n";
340	for (@{$entries{$element}{$prototype}}) {
341	    my ($header, $standard, $node)
342		= ($_->[0], $_->[1], $_->[2]);
343	    if ($header =~ /^\(none\)$/i) {
344		$header = "\@emph{no header}";
345	    } elsif ($header =~ /\(optional\)$/) {
346		$header =~ s/^(\S+) \((.*)\)$/\@file{$1} \@emph{$2}/;
347	    } elsif ($header ne '???') {
348		$header = "\@file{$header}";
349	    }
350	    print "$header ($standard):  \@ref{$node}.\n\n";
351	}
352    }
353}
354
355# Document the syntax of @standards.
356sub help
357{
358    print "$script ";
359    print <<'EOH';
360generates the Summary of Library Facilities (summary.texi)
361from @standards and @standardsx macros in the Texinfo sources (see
362macros.texi).  While generating the Summary, it also checks that
363@standards are used, correctly.
364
365In general, any @def*-command or @item in a @vtable is considered
366annotatable.  "Misplaced annotation" refers to @standards macros
367detected outside an annotatable context.  "Missing annotation" refers
368to annotatable elements without @standards.  @standards are expected
369to immediately follow the elements being annotated.  In @*x lists,
370@standards sets the default annotation and may only occur as the first
371annotation ("Misplaced @standards").  @standardsx may not be used
372outside @*x lists ("Misplaced @standardsx").  "Spurious @standardsx"
373refers to otherwise valid @standardsx macros that were not matched to
374an element in an @*x list.  "Invalid syntax" means just that.
375
376The syntax of @standards annotations is designed to accommodate
377multiple header and standards annotations, as necessary.
378
379Examples:
380
381  @deftp FOO
382  @standards{STD, HDR}
383
384  @defvar BAR
385  @standards{STD, HDR1}
386  @standards{STD, HDR2}
387
388  @deftypefun foo
389  @deftypefunx fool
390  @standards{STD, HDR}
391
392  @item bar
393  @itemx baz
394  @standardsx{bar, STD1, HDR1}
395  @standardsx{baz, STD1, HDR1}
396  @standardsx{baz, STD2, HDR2}
397
398Note that @standardsx deviates from the usual Texinfo syntax in that
399it is optional and may be used without @standards.
400EOH
401    ; exit 0;
402}
403