1#!/usr/bin/python3
2# -*- coding: utf-8 -*-
3#
4# Generate a translit_circle file from a UnicodeData file.
5# Copyright (C) 2015-2022 Free Software Foundation, Inc.
6# This file is part of the GNU C Library.
7#
8# The GNU C Library is free software; you can redistribute it and/or
9# modify it under the terms of the GNU Lesser General Public
10# License as published by the Free Software Foundation; either
11# version 2.1 of the License, or (at your option) any later version.
12#
13# The GNU C Library is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# Lesser General Public License for more details.
17#
18# You should have received a copy of the GNU Lesser General Public
19# License along with the GNU C Library; if not, see
20# <https://www.gnu.org/licenses/>.
21
22'''
23Generate a translit_circle file from UnicodeData.txt
24
25To see how this script is used, call it with the “-h” option:
26
27    $ ./gen_translit_circle -h
28    … prints usage message …
29'''
30
31import argparse
32import time
33import unicode_utils
34
35def read_input_file(filename):
36    '''Reads the original glibc translit_circle file to get the
37    original head and tail.
38
39    We want to replace only the part of the file between
40    “translit_start” and “translit_end”
41    '''
42    head = tail = ''
43    with open(filename, mode='r') as translit_file:
44        for line in translit_file:
45            head = head + line
46            if line.startswith('translit_start'):
47                break
48        for line in translit_file:
49            if line.startswith('translit_end'):
50                tail = line
51                break
52        for line in translit_file:
53            tail = tail + line
54    return (head, tail)
55
56def output_head(translit_file, unicode_version, head=''):
57    '''Write the header of the output file, i.e. the part of the file
58    before the “translit_start” line.
59    '''
60    if ARGS.input_file and head:
61        translit_file.write(head)
62    else:
63        translit_file.write('escape_char /\n')
64        translit_file.write('comment_char %\n')
65        translit_file.write(unicode_utils.COMMENT_HEADER)
66        translit_file.write('\n')
67        translit_file.write('% Transliterations of encircled characters.\n')
68        translit_file.write('% Generated automatically from UnicodeData.txt '
69                            + 'by gen_translit_circle.py '
70                            + 'on {:s} '.format(time.strftime('%Y-%m-%d'))
71                            + 'for Unicode {:s}.\n'.format(unicode_version))
72        translit_file.write('\n')
73        translit_file.write('LC_CTYPE\n')
74        translit_file.write('\n')
75        translit_file.write('translit_start\n')
76
77def output_tail(translit_file, tail=''):
78    '''Write the tail of the output file'''
79    if ARGS.input_file and tail:
80        translit_file.write(tail)
81    else:
82        translit_file.write('translit_end\n')
83        translit_file.write('\n')
84        translit_file.write('END LC_CTYPE\n')
85
86def output_transliteration(translit_file):
87    '''Write the new transliteration to the output file'''
88    translit_file.write('\n')
89    for code_point in sorted(unicode_utils.UNICODE_ATTRIBUTES):
90        name = unicode_utils.UNICODE_ATTRIBUTES[code_point]['name']
91        decomposition = unicode_utils.UNICODE_ATTRIBUTES[
92            code_point]['decomposition']
93        if decomposition.startswith('<circle>'):
94            decomposition = decomposition[9:]
95            decomposed_code_points = [int(x, 16)
96                                      for x in decomposition.split(' ')]
97            translit_file.write('% {:s}\n'.format(name))
98            translit_file.write('{:s} "<U0028>'.format(
99                unicode_utils.ucs_symbol(code_point)))
100            for decomposed_code_point in decomposed_code_points:
101                translit_file.write('{:s}'.format(
102                    unicode_utils.ucs_symbol(decomposed_code_point)))
103            translit_file.write('<U0029>"\n')
104    translit_file.write('\n')
105
106
107if __name__ == "__main__":
108    PARSER = argparse.ArgumentParser(
109        description='''
110        Generate a translit_circle file from UnicodeData.txt.
111        ''')
112    PARSER.add_argument(
113        '-u', '--unicode_data_file',
114        nargs='?',
115        type=str,
116        default='UnicodeData.txt',
117        help=('The UnicodeData.txt file to read, '
118              + 'default: %(default)s'))
119    PARSER.add_argument(
120        '-i', '--input_file',
121        nargs='?',
122        type=str,
123        help=''' The original glibc/localedata/locales/translit_combining
124        file.''')
125    PARSER.add_argument(
126        '-o', '--output_file',
127        nargs='?',
128        type=str,
129        default='translit_circle.new',
130        help='''The new translit_circle file, default: %(default)s.  If the
131        original glibc/localedata/locales/translit_circle file has
132        been given as an option, the header up to the
133        “translit_start” line and the tail from the “translit_end”
134        line to the end of the file will be copied unchanged into the
135        output file.  ''')
136    PARSER.add_argument(
137        '--unicode_version',
138        nargs='?',
139        required=True,
140        type=str,
141        help='The Unicode version of the input files used.')
142    ARGS = PARSER.parse_args()
143
144    unicode_utils.fill_attributes(ARGS.unicode_data_file)
145    HEAD = TAIL = ''
146    if ARGS.input_file:
147        (HEAD, TAIL) = read_input_file(ARGS.input_file)
148    with open(ARGS.output_file, mode='w') as TRANSLIT_FILE:
149        output_head(TRANSLIT_FILE, ARGS.unicode_version, head=HEAD)
150        output_transliteration(TRANSLIT_FILE)
151        output_tail(TRANSLIT_FILE, tail=TAIL)
152