1#!/usr/bin/env python3
2# SPDX-License-Identifier: LGPL-2.1-or-later
3
4import sys
5import collections
6import re
7from xml_helper import xml_parse, xml_print, tree
8from copy import deepcopy
9
10COLOPHON = '''\
11This index contains {count} entries in {sections} sections,
12referring to {pages} individual manual pages.
13'''
14
15def _extract_directives(directive_groups, formatting, page):
16    t = xml_parse(page)
17    section = t.find('./refmeta/manvolnum').text
18    pagename = t.find('./refmeta/refentrytitle').text
19
20    storopt = directive_groups['options']
21    for variablelist in t.iterfind('.//variablelist'):
22        klass = variablelist.attrib.get('class')
23        searchpath = variablelist.attrib.get('xpath','./varlistentry/term/varname')
24        storvar = directive_groups[klass or 'miscellaneous']
25        # <option>s go in OPTIONS, unless class is specified
26        for xpath, stor in ((searchpath, storvar),
27                            ('./varlistentry/term/option',
28                             storvar if klass else storopt)):
29            for name in variablelist.iterfind(xpath):
30                text = re.sub(r'([= ]).*', r'\1', name.text).rstrip()
31                if text.startswith('-'):
32                    # for options, merge options with and without mandatory arg
33                    text = text.partition('=')[0]
34                stor[text].append((pagename, section))
35                if text not in formatting:
36                    # use element as formatted display
37                    if name.text[-1] in "= '":
38                        name.clear()
39                    else:
40                        name.tail = ''
41                    name.text = text
42                    formatting[text] = name
43        extra = variablelist.attrib.get('extra-ref')
44        if extra:
45            stor[extra].append((pagename, section))
46            if extra not in formatting:
47                elt = tree.Element("varname")
48                elt.text= extra
49                formatting[extra] = elt
50
51    storfile = directive_groups['filenames']
52    for xpath, absolute_only in (('.//refsynopsisdiv//filename', False),
53                                 ('.//refsynopsisdiv//command', False),
54                                 ('.//filename', True)):
55        for name in t.iterfind(xpath):
56            if absolute_only and not (name.text and name.text.startswith('/')):
57                continue
58            if name.attrib.get('index') == 'false':
59                continue
60            name.tail = ''
61            if name.text:
62                if name.text.endswith('*'):
63                    name.text = name.text[:-1]
64                if not name.text.startswith('.'):
65                    text = name.text.partition(' ')[0]
66                    if text != name.text:
67                        name.clear()
68                        name.text = text
69                    if text.endswith('/'):
70                        text = text[:-1]
71                    storfile[text].append((pagename, section))
72                    if text not in formatting:
73                        # use element as formatted display
74                        formatting[text] = name
75            else:
76                text = ' '.join(name.itertext())
77                storfile[text].append((pagename, section))
78                formatting[text] = name
79
80    for name in t.iterfind('.//constant'):
81        if name.attrib.get('index') == 'false':
82            continue
83        name.tail = ''
84        if name.text.startswith('('): # a cast, strip it
85            name.text = name.text.partition(' ')[2]
86        klass = name.attrib.get('class') or 'constants'
87        storfile = directive_groups[klass]
88        storfile[name.text].append((pagename, section))
89        formatting[name.text] = name
90
91    storfile = directive_groups['specifiers']
92    for name in t.iterfind(".//table[@class='specifiers']//entry/literal"):
93        if name.text[0] != '%' or name.getparent().text is not None:
94            continue
95        if name.attrib.get('index') == 'false':
96            continue
97        storfile[name.text].append((pagename, section))
98        formatting[name.text] = name
99    for name in t.iterfind(".//literal[@class='specifiers']"):
100        storfile[name.text].append((pagename, section))
101        formatting[name.text] = name
102
103def _make_section(template, name, directives, formatting):
104    varlist = template.find(".//*[@id='{}']".format(name))
105    for varname, manpages in sorted(directives.items()):
106        entry = tree.SubElement(varlist, 'varlistentry')
107        term = tree.SubElement(entry, 'term')
108        display = deepcopy(formatting[varname])
109        term.append(display)
110
111        para = tree.SubElement(tree.SubElement(entry, 'listitem'), 'para')
112
113        b = None
114        for manpage, manvolume in sorted(set(manpages)):
115            if b is not None:
116                b.tail = ', '
117            b = tree.SubElement(para, 'citerefentry')
118            c = tree.SubElement(b, 'refentrytitle')
119            c.text = manpage
120            c.attrib['target'] = varname
121            d = tree.SubElement(b, 'manvolnum')
122            d.text = manvolume
123        entry.tail = '\n\n'
124
125def _make_colophon(template, groups):
126    count = 0
127    pages = set()
128    for group in groups:
129        count += len(group)
130        for pagelist in group.values():
131            pages |= set(pagelist)
132
133    para = template.find(".//para[@id='colophon']")
134    para.text = COLOPHON.format(count=count,
135                                sections=len(groups),
136                                pages=len(pages))
137
138def _make_page(template, directive_groups, formatting):
139    """Create an XML tree from directive_groups.
140
141    directive_groups = {
142       'class': {'variable': [('manpage', 'manvolume'), ...],
143                 'variable2': ...},
144       ...
145    }
146    """
147    for name, directives in directive_groups.items():
148        _make_section(template, name, directives, formatting)
149
150    _make_colophon(template, directive_groups.values())
151
152    return template
153
154def make_page(template_path, xml_files):
155    "Extract directives from xml_files and return XML index tree."
156    template = xml_parse(template_path)
157    names = [vl.get('id') for vl in template.iterfind('.//variablelist')]
158    directive_groups = {name:collections.defaultdict(list)
159                        for name in names}
160    formatting = {}
161    for page in xml_files:
162        try:
163            _extract_directives(directive_groups, formatting, page)
164        except Exception:
165            raise ValueError("failed to process " + page)
166
167    return _make_page(template, directive_groups, formatting)
168
169if __name__ == '__main__':
170    with open(sys.argv[1], 'wb') as f:
171        template_path = sys.argv[2]
172        xml_files = sys.argv[3:]
173        xml = make_page(template_path, xml_files)
174        f.write(xml_print(xml))
175