1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 /* gunicode.c - Unicode manipulation functions
3  *
4  *  Copyright (C) 1999, 2000 Tom Tromey
5  *  Copyright © 2000, 2005 Red Hat, Inc.
6  */
7 
8 #include "gunicode.h"
9 
10 #define unichar uint32_t
11 
12 /**
13  * g_utf8_prev_char:
14  * @p: a pointer to a position within a UTF-8 encoded string
15  *
16  * Finds the previous UTF-8 character in the string before @p.
17  *
18  * @p does not have to be at the beginning of a UTF-8 character. No check
19  * is made to see if the character found is actually valid other than
20  * it starts with an appropriate byte. If @p might be the first
21  * character of the string, you must use g_utf8_find_prev_char() instead.
22  *
23  * Return value: a pointer to the found character.
24  **/
25 char *
utf8_prev_char(const char * p)26 utf8_prev_char (const char *p)
27 {
28   for (;;)
29     {
30       p--;
31       if ((*p & 0xc0) != 0x80)
32         return (char *)p;
33     }
34 }
35 
36 struct Interval
37 {
38   unichar start, end;
39 };
40 
41 static int
interval_compare(const void * key,const void * elt)42 interval_compare (const void *key, const void *elt)
43 {
44   unichar c = (unichar) (long) (key);
45   struct Interval *interval = (struct Interval *)elt;
46 
47   if (c < interval->start)
48     return -1;
49   if (c > interval->end)
50     return +1;
51 
52   return 0;
53 }
54 
55 /*
56  * NOTE:
57  *
58  * The tables for g_unichar_iswide() and g_unichar_iswide_cjk() are
59  * generated from the Unicode Character Database's file
60  * extracted/DerivedEastAsianWidth.txt using the gen-iswide-table.py
61  * in this way:
62  *
63  *   ./gen-iswide-table.py < path/to/ucd/extracted/DerivedEastAsianWidth.txt | fmt
64  *
65  * Last update for Unicode 6.0.
66  */
67 
68 /**
69  * g_unichar_iswide:
70  * @c: a Unicode character
71  *
72  * Determines if a character is typically rendered in a double-width
73  * cell.
74  *
75  * Return value: %TRUE if the character is wide
76  **/
77 bool
unichar_iswide(unichar c)78 unichar_iswide (unichar c)
79 {
80   /* See NOTE earlier for how to update this table. */
81   static const struct Interval wide[] = {
82     {0x1100, 0x115F}, {0x2329, 0x232A}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
83     {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3000, 0x303E}, {0x3041, 0x3096},
84     {0x3099, 0x30FF}, {0x3105, 0x312D}, {0x3131, 0x318E}, {0x3190, 0x31BA},
85     {0x31C0, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0x32FE},
86     {0x3300, 0x4DBF}, {0x4E00, 0xA48C}, {0xA490, 0xA4C6}, {0xA960, 0xA97C},
87     {0xAC00, 0xD7A3}, {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
88     {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60}, {0xFFE0, 0xFFE6},
89     {0x1B000, 0x1B001}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23A},
90     {0x1F240, 0x1F248}, {0x1F250, 0x1F251},
91     {0x1F300, 0x1F567}, /* Miscellaneous Symbols and Pictographs */
92     {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD},
93   };
94 
95   if (bsearch ((void *)(uintptr_t)c, wide, (sizeof (wide) / sizeof ((wide)[0])), sizeof wide[0],
96                interval_compare))
97     return true;
98 
99   return false;
100 }
101 
102 const char utf8_skip_data[256] = {
103   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
104   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
105   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
106   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
107   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
108   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
109   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
110   3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
111 };
112