1 /* Access functions for GB2312 conversion. 2 Copyright (C) 1998-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 #ifndef _GB2312_H 20 #define _GB2312_H 1 21 22 #include <gconv.h> 23 #include <stdint.h> 24 #include <assert.h> 25 26 /* Conversion table. */ 27 extern const uint16_t __gb2312_to_ucs[]; 28 29 uint32_t(always_inline)30static inline uint32_t 31 __attribute ((always_inline)) 32 gb2312_to_ucs4 (const unsigned char **s, size_t avail, unsigned char offset) 33 { 34 unsigned char ch = *(*s); 35 unsigned char ch2; 36 int idx; 37 38 if (ch < offset || (ch - offset) <= 0x20 || (ch - offset) > 0x77) 39 return __UNKNOWN_10646_CHAR; 40 41 if (avail < 2) 42 return 0; 43 44 ch2 = (*s)[1]; 45 if ((ch2 - offset) <= 0x20 || (ch2 - offset) >= 0x7f) 46 return __UNKNOWN_10646_CHAR; 47 48 idx = (ch - 0x21 - offset) * 94 + (ch2 - 0x21 - offset); 49 if (idx > 0x1ff1) 50 return __UNKNOWN_10646_CHAR; 51 52 (*s) += 2; 53 54 return __gb2312_to_ucs[idx] ?: ((*s) -= 2, __UNKNOWN_10646_CHAR); 55 } 56 57 58 extern const char __gb2312_from_ucs4_tab1[][2]; 59 extern const char __gb2312_from_ucs4_tab2[][2]; 60 extern const char __gb2312_from_ucs4_tab3[][2]; 61 extern const char __gb2312_from_ucs4_tab4[][2]; 62 extern const char __gb2312_from_ucs4_tab5[][2]; 63 extern const char __gb2312_from_ucs4_tab6[][2]; 64 extern const char __gb2312_from_ucs4_tab7[][2]; 65 extern const char __gb2312_from_ucs4_tab8[][2]; 66 extern const char __gb2312_from_ucs4_tab9[][2]; 67 size_t(always_inline)68static inline size_t 69 __attribute ((always_inline)) 70 ucs4_to_gb2312 (uint32_t wch, unsigned char *s, size_t avail) 71 { 72 unsigned int ch = (unsigned int) wch; 73 char buf[2]; 74 const char *cp = buf; 75 76 switch (ch) 77 { 78 case 0xa4 ... 0x101: 79 cp = __gb2312_from_ucs4_tab1[ch - 0xa4]; 80 break; 81 case 0x113: 82 cp = "\x28\x25"; 83 break; 84 case 0x11b: 85 cp = "\x28\x27"; 86 break; 87 case 0x12b: 88 cp = "\x28\x29"; 89 break; 90 case 0x14d: 91 cp = "\x28\x2d"; 92 break; 93 case 0x16b: 94 cp = "\x28\x31"; 95 break; 96 case 0x1ce: 97 cp = "\x28\x23"; 98 break; 99 case 0x1d0: 100 cp = "\x28\x2b"; 101 break; 102 case 0x1d2: 103 cp = "\x28\x2f"; 104 break; 105 case 0x1d4: 106 cp = "\x28\x33"; 107 break; 108 case 0x1d6: 109 cp = "\x28\x35"; 110 break; 111 case 0x1d8: 112 cp = "\x28\x36"; 113 break; 114 case 0x1da: 115 cp = "\x28\x37"; 116 break; 117 case 0x1dc: 118 cp = "\x28\x38"; 119 break; 120 case 0x2c7: 121 cp = "\x21\x26"; 122 break; 123 case 0x2c9: 124 cp = "\x21\x25"; 125 break; 126 case 0x391 ... 0x3c9: 127 cp = __gb2312_from_ucs4_tab2[ch - 0x391]; 128 break; 129 case 0x401 ... 0x451: 130 cp = __gb2312_from_ucs4_tab3[ch - 0x401]; 131 break; 132 case 0x2015 ... 0x203b: 133 cp = __gb2312_from_ucs4_tab4[ch - 0x2015]; 134 break; 135 case 0x2103 ... 0x22a5: 136 cp = __gb2312_from_ucs4_tab5[ch - 0x2103]; 137 break; 138 case 0x2312: 139 cp = "\x21\x50"; 140 break; 141 case 0x2460 ... 0x249b: 142 cp = __gb2312_from_ucs4_tab6[ch - 0x2460]; 143 break; 144 case 0x2500 ... 0x254b: 145 buf[0] = '\x29'; 146 buf[1] = '\x24' + (ch % 256); 147 break; 148 case 0x25a0: 149 cp = "\x21\x76"; 150 break; 151 case 0x25a1: 152 cp = "\x21\x75"; 153 break; 154 case 0x25b2: 155 cp = "\x21\x78"; 156 break; 157 case 0x25b3: 158 cp = "\x21\x77"; 159 break; 160 case 0x25c6: 161 cp = "\x21\x74"; 162 break; 163 case 0x25c7: 164 cp = "\x21\x73"; 165 break; 166 case 0x25cb: 167 cp = "\x21\x70"; 168 break; 169 case 0x25ce: 170 cp = "\x21\x72"; 171 break; 172 case 0x25cf: 173 cp = "\x21\x71"; 174 break; 175 case 0x2605: 176 cp = "\x21\x6f"; 177 break; 178 case 0x2606: 179 cp = "\x21\x6e"; 180 break; 181 case 0x2640: 182 cp = "\x21\x62"; 183 break; 184 case 0x2642: 185 cp = "\x21\x61"; 186 break; 187 case 0x3000 ... 0x3129: 188 cp = __gb2312_from_ucs4_tab7[ch - 0x3000]; 189 break; 190 case 0x3220 ... 0x3229: 191 buf[0] = '\x22'; 192 buf[1] = '\x65' + (ch - 0x3220); 193 break; 194 case 0x4e00 ... 0x9fa0: 195 cp = __gb2312_from_ucs4_tab8[ch - 0x4e00]; 196 break; 197 case 0xff01 ... 0xff5e: 198 cp = __gb2312_from_ucs4_tab9[ch - 0xff01]; 199 break; 200 case 0xffe0: 201 cp = "\x21\x69"; 202 break; 203 case 0xffe1: 204 cp = "\x21\x6a"; 205 break; 206 case 0xffe3: 207 cp = "\x23\x7e"; 208 break; 209 case 0xffe5: 210 cp = "\x23\x24"; 211 break; 212 default: 213 return __UNKNOWN_10646_CHAR; 214 } 215 216 if (cp[0] == '\0') 217 return __UNKNOWN_10646_CHAR; 218 219 assert (cp[1] != '\0'); 220 221 if (avail < 2) 222 return 0; 223 224 s[0] = cp[0]; 225 s[1] = cp[1]; 226 227 return 2; 228 } 229 230 #endif /* gb2312.h */ 231