1 /* Conversion from and to IBM937. 2 Copyright (C) 2000-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 /* IBM937 is designed for the representation of Traditional Chinese 20 using a stateful EBCDIC encoding scheme. It is also known as 21 CCSID 937 or CP937. See: 22 https://www-01.ibm.com/software/globalization/ccsid/ccsid937.html */ 23 24 #include <dlfcn.h> 25 #include <stdint.h> 26 #include <wchar.h> 27 #include <byteswap.h> 28 #include "ibm937.h" 29 30 /* The shift sequences for this charset (it does not use ESC). */ 31 #define SI 0x0F /* Shift In, host code to turn DBCS off. */ 32 #define SO 0x0E /* Shift Out, host code to turn DBCS on. */ 33 34 /* Definitions used in the body of the `gconv' function. */ 35 #define CHARSET_NAME "IBM937//" 36 #define FROM_LOOP from_ibm937 37 #define TO_LOOP to_ibm937 38 #define ONE_DIRECTION 0 39 #define FROM_LOOP_MIN_NEEDED_FROM 1 40 #define FROM_LOOP_MAX_NEEDED_FROM 2 41 #define FROM_LOOP_MIN_NEEDED_TO 4 42 #define FROM_LOOP_MAX_NEEDED_TO 4 43 #define TO_LOOP_MIN_NEEDED_FROM 4 44 #define TO_LOOP_MAX_NEEDED_FROM 4 45 #define TO_LOOP_MIN_NEEDED_TO 1 46 #define TO_LOOP_MAX_NEEDED_TO 3 47 #define PREPARE_LOOP \ 48 int save_curcs; \ 49 int *curcsp = &data->__statep->__count; 50 #define EXTRA_LOOP_ARGS , curcsp 51 52 /* Definitions of initialization and destructor function. */ 53 #define DEFINE_INIT 1 54 #define DEFINE_FINI 1 55 56 57 /* Since this is a stateful encoding we have to provide code which resets 58 the output state to the initial state. This has to be done during the 59 flushing. */ 60 #define EMIT_SHIFT_TO_INIT \ 61 if ((data->__statep->__count & ~7) != sb) \ 62 { \ 63 if (FROM_DIRECTION) \ 64 data->__statep->__count &= 7; \ 65 else \ 66 { \ 67 /* We are not in the initial state. To switch back we have \ 68 to emit `SI'. */ \ 69 if (__glibc_unlikely (outbuf >= outend)) \ 70 /* We don't have enough room in the output buffer. */ \ 71 status = __GCONV_FULL_OUTPUT; \ 72 else \ 73 { \ 74 /* Write out the shift sequence. */ \ 75 *outbuf++ = SI; \ 76 data->__statep->__count &= 7; \ 77 } \ 78 } \ 79 } 80 81 82 /* Since we might have to reset input pointer we must be able to save 83 and retore the state. */ 84 #define SAVE_RESET_STATE(Save) \ 85 if (Save) \ 86 save_curcs = *curcsp; \ 87 else \ 88 *curcsp = save_curcs 89 90 91 /* Current codeset type. */ 92 enum 93 { 94 sb = 0, 95 db = 64 96 }; 97 98 /* First, define the conversion function from IBM-937 to UCS4. */ 99 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM 100 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM 101 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO 102 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO 103 #define LOOPFCT FROM_LOOP 104 #define BODY \ 105 { \ 106 uint32_t ch = *inptr; \ 107 uint32_t res; \ 108 \ 109 if (__builtin_expect (ch, 0) == SO) \ 110 { \ 111 /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ 112 curcs = db; \ 113 ++inptr; \ 114 continue; \ 115 } \ 116 else if (__builtin_expect (ch, 0) == SI) \ 117 { \ 118 /* Shift IN, change to SBCS converter (redundant escape okay). */ \ 119 curcs = sb; \ 120 ++inptr; \ 121 continue; \ 122 } \ 123 \ 124 if (curcs == sb) \ 125 { \ 126 /* Use the IBM937 table for single byte. */ \ 127 res = __ibm937sb_to_ucs4[ch]; \ 128 if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \ 129 { \ 130 /* This is an illegal character. */ \ 131 STANDARD_FROM_LOOP_ERR_HANDLER (1); \ 132 } \ 133 else \ 134 { \ 135 put32 (outptr, res); \ 136 outptr += 4; \ 137 } \ 138 ++inptr; \ 139 } \ 140 else \ 141 { \ 142 const struct gap *rp2 = __ibm937db_to_ucs4_idx; \ 143 \ 144 assert (curcs == db); \ 145 \ 146 /* Use the IBM937 table for double byte. */ \ 147 if (__glibc_unlikely (inptr + 1 >= inend)) \ 148 { \ 149 /* The second character is not available. \ 150 Store the intermediate result. */ \ 151 result = __GCONV_INCOMPLETE_INPUT; \ 152 break; \ 153 } \ 154 \ 155 ch = (ch * 0x100) + inptr[1]; \ 156 while (ch > rp2->end) \ 157 ++rp2; \ 158 \ 159 if (__builtin_expect (rp2->start == 0xffff, 0) \ 160 || __builtin_expect (ch < rp2->start, 0) \ 161 || (res = __ibm937db_to_ucs4[ch + rp2->idx], \ 162 __builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \ 163 { \ 164 /* This is an illegal character. */ \ 165 STANDARD_FROM_LOOP_ERR_HANDLER (2); \ 166 } \ 167 else \ 168 { \ 169 put32 (outptr, res); \ 170 outptr += 4; \ 171 } \ 172 inptr += 2; \ 173 } \ 174 } 175 #define LOOP_NEED_FLAGS 176 #define EXTRA_LOOP_DECLS , int *curcsp 177 #define INIT_PARAMS int curcs = *curcsp & ~7 178 #define UPDATE_PARAMS *curcsp = curcs 179 #include <iconv/loop.c> 180 181 /* Next, define the other direction. */ 182 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM 183 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM 184 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO 185 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO 186 #define LOOPFCT TO_LOOP 187 #define BODY \ 188 { \ 189 uint32_t ch = get32 (inptr); \ 190 const struct gap *rp1 = __ucs4_to_ibm937sb_idx; \ 191 const struct gap *rp2 = __ucs4_to_ibm937db_idx; \ 192 const char *cp; \ 193 \ 194 if (__glibc_unlikely (ch >= 0xffff)) \ 195 { \ 196 UNICODE_TAG_HANDLER (ch, 4); \ 197 \ 198 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 199 } \ 200 \ 201 while (ch > rp1->end) \ 202 ++rp1; \ 203 \ 204 /* Use the UCS4 table for single byte. */ \ 205 if (__builtin_expect (ch < rp1->start, 0) \ 206 || (cp = __ucs4_to_ibm937sb[ch + rp1->idx], \ 207 __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \ 208 { \ 209 /* Use the UCS4 table for double byte. */ \ 210 while (ch > rp2->end) \ 211 ++rp2; \ 212 \ 213 if (__builtin_expect (ch < rp2->start, 0) \ 214 || (cp = __ucs4_to_ibm937db[ch + rp2->idx], \ 215 __builtin_expect (cp[0], L'\1')==L'\0' && ch != '\0')) \ 216 { \ 217 /* This is an illegal character. */ \ 218 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 219 } \ 220 else \ 221 { \ 222 if (curcs == sb) \ 223 { \ 224 if (__glibc_unlikely (outptr + 1 > outend)) \ 225 { \ 226 result = __GCONV_FULL_OUTPUT; \ 227 break; \ 228 } \ 229 *outptr++ = SO; \ 230 curcs = db; \ 231 } \ 232 \ 233 if (__glibc_unlikely (outptr + 2 > outend)) \ 234 { \ 235 result = __GCONV_FULL_OUTPUT; \ 236 break; \ 237 } \ 238 *outptr++ = cp[0]; \ 239 *outptr++ = cp[1]; \ 240 } \ 241 } \ 242 else \ 243 { \ 244 if (curcs == db) \ 245 { \ 246 if (__glibc_unlikely (outptr + 1 > outend)) \ 247 { \ 248 result = __GCONV_FULL_OUTPUT; \ 249 break; \ 250 } \ 251 *outptr++ = SI; \ 252 curcs = sb; \ 253 } \ 254 \ 255 if (__glibc_unlikely (outptr + 1 > outend)) \ 256 { \ 257 result = __GCONV_FULL_OUTPUT; \ 258 break; \ 259 } \ 260 *outptr++ = cp[0]; \ 261 } \ 262 \ 263 /* Now that we wrote the output increment the input pointer. */ \ 264 inptr += 4; \ 265 } 266 #define LOOP_NEED_FLAGS 267 #define EXTRA_LOOP_DECLS , int *curcsp 268 #define INIT_PARAMS int curcs = *curcsp & ~7 269 #define REINIT_PARAMS curcs = *curcsp & ~7 270 #define UPDATE_PARAMS *curcsp = curcs 271 #include <iconv/loop.c> 272 273 /* Now define the toplevel functions. */ 274 #include <iconv/skeleton.c> 275