1#!/usr/bin/env python3 2# SPDX-License-Identifier: LGPL-2.1-or-later 3 4import sys 5import collections 6import re 7from xml_helper import xml_parse, xml_print, tree 8from copy import deepcopy 9 10COLOPHON = '''\ 11This index contains {count} entries in {sections} sections, 12referring to {pages} individual manual pages. 13''' 14 15def _extract_directives(directive_groups, formatting, page): 16 t = xml_parse(page) 17 section = t.find('./refmeta/manvolnum').text 18 pagename = t.find('./refmeta/refentrytitle').text 19 20 storopt = directive_groups['options'] 21 for variablelist in t.iterfind('.//variablelist'): 22 klass = variablelist.attrib.get('class') 23 searchpath = variablelist.attrib.get('xpath','./varlistentry/term/varname') 24 storvar = directive_groups[klass or 'miscellaneous'] 25 # <option>s go in OPTIONS, unless class is specified 26 for xpath, stor in ((searchpath, storvar), 27 ('./varlistentry/term/option', 28 storvar if klass else storopt)): 29 for name in variablelist.iterfind(xpath): 30 text = re.sub(r'([= ]).*', r'\1', name.text).rstrip() 31 if text.startswith('-'): 32 # for options, merge options with and without mandatory arg 33 text = text.partition('=')[0] 34 stor[text].append((pagename, section)) 35 if text not in formatting: 36 # use element as formatted display 37 if name.text[-1] in "= '": 38 name.clear() 39 else: 40 name.tail = '' 41 name.text = text 42 formatting[text] = name 43 extra = variablelist.attrib.get('extra-ref') 44 if extra: 45 stor[extra].append((pagename, section)) 46 if extra not in formatting: 47 elt = tree.Element("varname") 48 elt.text= extra 49 formatting[extra] = elt 50 51 storfile = directive_groups['filenames'] 52 for xpath, absolute_only in (('.//refsynopsisdiv//filename', False), 53 ('.//refsynopsisdiv//command', False), 54 ('.//filename', True)): 55 for name in t.iterfind(xpath): 56 if absolute_only and not (name.text and name.text.startswith('/')): 57 continue 58 if name.attrib.get('index') == 'false': 59 continue 60 name.tail = '' 61 if name.text: 62 if name.text.endswith('*'): 63 name.text = name.text[:-1] 64 if not name.text.startswith('.'): 65 text = name.text.partition(' ')[0] 66 if text != name.text: 67 name.clear() 68 name.text = text 69 if text.endswith('/'): 70 text = text[:-1] 71 storfile[text].append((pagename, section)) 72 if text not in formatting: 73 # use element as formatted display 74 formatting[text] = name 75 else: 76 text = ' '.join(name.itertext()) 77 storfile[text].append((pagename, section)) 78 formatting[text] = name 79 80 for name in t.iterfind('.//constant'): 81 if name.attrib.get('index') == 'false': 82 continue 83 name.tail = '' 84 if name.text.startswith('('): # a cast, strip it 85 name.text = name.text.partition(' ')[2] 86 klass = name.attrib.get('class') or 'constants' 87 storfile = directive_groups[klass] 88 storfile[name.text].append((pagename, section)) 89 formatting[name.text] = name 90 91 storfile = directive_groups['specifiers'] 92 for name in t.iterfind(".//table[@class='specifiers']//entry/literal"): 93 if name.text[0] != '%' or name.getparent().text is not None: 94 continue 95 if name.attrib.get('index') == 'false': 96 continue 97 storfile[name.text].append((pagename, section)) 98 formatting[name.text] = name 99 for name in t.iterfind(".//literal[@class='specifiers']"): 100 storfile[name.text].append((pagename, section)) 101 formatting[name.text] = name 102 103def _make_section(template, name, directives, formatting): 104 varlist = template.find(".//*[@id='{}']".format(name)) 105 for varname, manpages in sorted(directives.items()): 106 entry = tree.SubElement(varlist, 'varlistentry') 107 term = tree.SubElement(entry, 'term') 108 display = deepcopy(formatting[varname]) 109 term.append(display) 110 111 para = tree.SubElement(tree.SubElement(entry, 'listitem'), 'para') 112 113 b = None 114 for manpage, manvolume in sorted(set(manpages)): 115 if b is not None: 116 b.tail = ', ' 117 b = tree.SubElement(para, 'citerefentry') 118 c = tree.SubElement(b, 'refentrytitle') 119 c.text = manpage 120 c.attrib['target'] = varname 121 d = tree.SubElement(b, 'manvolnum') 122 d.text = manvolume 123 entry.tail = '\n\n' 124 125def _make_colophon(template, groups): 126 count = 0 127 pages = set() 128 for group in groups: 129 count += len(group) 130 for pagelist in group.values(): 131 pages |= set(pagelist) 132 133 para = template.find(".//para[@id='colophon']") 134 para.text = COLOPHON.format(count=count, 135 sections=len(groups), 136 pages=len(pages)) 137 138def _make_page(template, directive_groups, formatting): 139 """Create an XML tree from directive_groups. 140 141 directive_groups = { 142 'class': {'variable': [('manpage', 'manvolume'), ...], 143 'variable2': ...}, 144 ... 145 } 146 """ 147 for name, directives in directive_groups.items(): 148 _make_section(template, name, directives, formatting) 149 150 _make_colophon(template, directive_groups.values()) 151 152 return template 153 154def make_page(template_path, xml_files): 155 "Extract directives from xml_files and return XML index tree." 156 template = xml_parse(template_path) 157 names = [vl.get('id') for vl in template.iterfind('.//variablelist')] 158 directive_groups = {name:collections.defaultdict(list) 159 for name in names} 160 formatting = {} 161 for page in xml_files: 162 try: 163 _extract_directives(directive_groups, formatting, page) 164 except Exception: 165 raise ValueError("failed to process " + page) 166 167 return _make_page(template, directive_groups, formatting) 168 169if __name__ == '__main__': 170 with open(sys.argv[1], 'wb') as f: 171 template_path = sys.argv[2] 172 xml_files = sys.argv[3:] 173 xml = make_page(template_path, xml_files) 174 f.write(xml_print(xml)) 175