1#!/usr/bin/python3
2# -*- coding: utf-8 -*-
3#
4# Generate a translit_font file from a UnicodeData file.
5# Copyright (C) 2015-2022 Free Software Foundation, Inc.
6# This file is part of the GNU C Library.
7#
8# The GNU C Library is free software; you can redistribute it and/or
9# modify it under the terms of the GNU Lesser General Public
10# License as published by the Free Software Foundation; either
11# version 2.1 of the License, or (at your option) any later version.
12#
13# The GNU C Library is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16# Lesser General Public License for more details.
17#
18# You should have received a copy of the GNU Lesser General Public
19# License along with the GNU C Library; if not, see
20# <https://www.gnu.org/licenses/>.
21
22'''
23Generate a translit_font file from UnicodeData.txt
24
25To see how this script is used, call it with the “-h” option:
26
27    $ ./gen_translit_font -h
28    … prints usage message …
29'''
30
31import argparse
32import time
33import unicode_utils
34
35def read_input_file(filename):
36    '''Reads the original glibc translit_font file to get the
37    original head and tail.
38
39    We want to replace only the part of the file between
40    “translit_start” and “translit_end”
41    '''
42    head = tail = ''
43    with open(filename, mode='r') as translit_file:
44        for line in translit_file:
45            head = head + line
46            if line.startswith('translit_start'):
47                break
48        for line in translit_file:
49            if line.startswith('translit_end'):
50                tail = line
51                break
52        for line in translit_file:
53            tail = tail + line
54    return (head, tail)
55
56def output_head(translit_file, unicode_version, head=''):
57    '''Write the header of the output file, i.e. the part of the file
58    before the “translit_start” line.
59    '''
60    if ARGS.input_file and head:
61        translit_file.write(head)
62    else:
63        translit_file.write('escape_char /\n')
64        translit_file.write('comment_char %\n')
65        translit_file.write(unicode_utils.COMMENT_HEADER)
66        translit_file.write('\n')
67        translit_file.write('% Transliterations of font equivalents.\n')
68        translit_file.write('% Generated automatically from UnicodeData.txt '
69                            + 'by gen_translit_font.py '
70                            + 'on {:s} '.format(time.strftime('%Y-%m-%d'))
71                            + 'for Unicode {:s}.\n'.format(unicode_version))
72        translit_file.write('\n')
73        translit_file.write('LC_CTYPE\n')
74        translit_file.write('\n')
75        translit_file.write('translit_start\n')
76
77def output_tail(translit_file, tail=''):
78    '''Write the tail of the output file'''
79    if ARGS.input_file and tail:
80        translit_file.write(tail)
81    else:
82        translit_file.write('translit_end\n')
83        translit_file.write('\n')
84        translit_file.write('END LC_CTYPE\n')
85
86def output_transliteration(translit_file):
87    '''Write the new transliteration to the output file'''
88    translit_file.write('\n')
89    for code_point in sorted(unicode_utils.UNICODE_ATTRIBUTES):
90        name = unicode_utils.UNICODE_ATTRIBUTES[code_point]['name']
91        decomposition = unicode_utils.UNICODE_ATTRIBUTES[
92            code_point]['decomposition']
93        if decomposition.startswith('<font>'):
94            decomposition = decomposition[7:]
95            decomposed_code_points = [[int(x, 16)
96                                       for x in decomposition.split(' ')]]
97            if decomposed_code_points[0]:
98                translit_file.write('{:s} '.format(
99                    unicode_utils.ucs_symbol(code_point)))
100                for index in range(0, len(decomposed_code_points)):
101                    if index > 0:
102                        translit_file.write(';')
103                    if len(decomposed_code_points[index]) > 1:
104                        translit_file.write('"')
105                    for decomposed_code_point in decomposed_code_points[index]:
106                        translit_file.write('{:s}'.format(
107                            unicode_utils.ucs_symbol(decomposed_code_point)))
108                    if len(decomposed_code_points[index]) > 1:
109                        translit_file.write('"')
110                translit_file.write(' % {:s}\n'.format(name))
111    translit_file.write('\n')
112
113if __name__ == "__main__":
114    PARSER = argparse.ArgumentParser(
115        description='''
116        Generate a translit_font file from UnicodeData.txt.
117        ''')
118    PARSER.add_argument(
119        '-u', '--unicode_data_file',
120        nargs='?',
121        type=str,
122        default='UnicodeData.txt',
123        help=('The UnicodeData.txt file to read, '
124              + 'default: %(default)s'))
125    PARSER.add_argument(
126        '-i', '--input_file',
127        nargs='?',
128        type=str,
129        help=''' The original glibc/localedata/locales/translit_font
130        file.''')
131    PARSER.add_argument(
132        '-o', '--output_file',
133        nargs='?',
134        type=str,
135        default='translit_font.new',
136        help='''The new translit_font file, default: %(default)s.  If the
137        original glibc/localedata/locales/translit_font file has
138        been given as an option, the header up to the
139        “translit_start” line and the tail from the “translit_end”
140        line to the end of the file will be copied unchanged into the
141        output file.  ''')
142    PARSER.add_argument(
143        '--unicode_version',
144        nargs='?',
145        required=True,
146        type=str,
147        help='The Unicode version of the input files used.')
148    ARGS = PARSER.parse_args()
149
150    unicode_utils.fill_attributes(ARGS.unicode_data_file)
151    HEAD = TAIL = ''
152    if ARGS.input_file:
153        (HEAD, TAIL) = read_input_file(ARGS.input_file)
154    with open(ARGS.output_file, mode='w') as TRANSLIT_FILE:
155        output_head(TRANSLIT_FILE, ARGS.unicode_version, head=HEAD)
156        output_transliteration(TRANSLIT_FILE)
157        output_tail(TRANSLIT_FILE, tail=TAIL)
158