1 /* Conversion from and to IBM933. 2 Copyright (C) 2000-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 /* IBM933 is designed for the representation of Korean using a stateful 20 EBCDIC encoding scheme. It is also known as CCSID 933 or CP933. See: 21 https://www-01.ibm.com/software/globalization/ccsid/ccsid933.html */ 22 23 #include <dlfcn.h> 24 #include <stdint.h> 25 #include <wchar.h> 26 #include <byteswap.h> 27 #include "ibm933.h" 28 29 /* The shift sequences for this charset (it does not use ESC). */ 30 #define SI 0x0F /* Shift In, host code to turn DBCS off. */ 31 #define SO 0x0E /* Shift Out, host code to turn DBCS on. */ 32 33 /* Definitions used in the body of the `gconv' function. */ 34 #define CHARSET_NAME "IBM933//" 35 #define FROM_LOOP from_ibm933 36 #define TO_LOOP to_ibm933 37 #define ONE_DIRECTION 0 38 #define FROM_LOOP_MIN_NEEDED_FROM 1 39 #define FROM_LOOP_MAX_NEEDED_FROM 2 40 #define FROM_LOOP_MIN_NEEDED_TO 4 41 #define FROM_LOOP_MAX_NEEDED_TO 4 42 #define TO_LOOP_MIN_NEEDED_FROM 4 43 #define TO_LOOP_MAX_NEEDED_FROM 4 44 #define TO_LOOP_MIN_NEEDED_TO 1 45 #define TO_LOOP_MAX_NEEDED_TO 3 46 #define PREPARE_LOOP \ 47 int save_curcs; \ 48 int *curcsp = &data->__statep->__count; 49 #define EXTRA_LOOP_ARGS , curcsp 50 51 /* Definitions of initialization and destructor function. */ 52 #define DEFINE_INIT 1 53 #define DEFINE_FINI 1 54 55 56 /* Since this is a stateful encoding we have to provide code which resets 57 the output state to the initial state. This has to be done during the 58 flushing. */ 59 #define EMIT_SHIFT_TO_INIT \ 60 if ((data->__statep->__count & ~7) != sb) \ 61 { \ 62 if (FROM_DIRECTION) \ 63 data->__statep->__count &= 7; \ 64 else \ 65 { \ 66 /* We are not in the initial state. To switch back we have \ 67 to emit `SI'. */ \ 68 if (__glibc_unlikely (outbuf >= outend)) \ 69 /* We don't have enough room in the output buffer. */ \ 70 status = __GCONV_FULL_OUTPUT; \ 71 else \ 72 { \ 73 /* Write out the shift sequence. */ \ 74 *outbuf++ = SI; \ 75 data->__statep->__count &= 7; \ 76 } \ 77 } \ 78 } 79 80 81 /* Since we might have to reset input pointer we must be able to save 82 and retore the state. */ 83 #define SAVE_RESET_STATE(Save) \ 84 if (Save) \ 85 save_curcs = *curcsp; \ 86 else \ 87 *curcsp = save_curcs 88 89 90 /* Current codeset type. */ 91 enum 92 { 93 sb = 0, 94 db = 64 95 }; 96 97 /* First, define the conversion function from IBM-933 to UCS4. */ 98 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM 99 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM 100 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO 101 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO 102 #define LOOPFCT FROM_LOOP 103 #define BODY \ 104 { \ 105 uint32_t ch = *inptr; \ 106 uint32_t res; \ 107 \ 108 if (__builtin_expect (ch, 0) == SO) \ 109 { \ 110 /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ 111 curcs = db; \ 112 ++inptr; \ 113 continue; \ 114 } \ 115 else if (__builtin_expect (ch, 0) == SI) \ 116 { \ 117 /* Shift IN, change to SBCS converter (redundant escape okay). */ \ 118 curcs = sb; \ 119 ++inptr; \ 120 continue; \ 121 } \ 122 \ 123 if (curcs == sb) \ 124 { \ 125 /* Use the IBM933 table for single byte. */ \ 126 res = __ibm933sb_to_ucs4[ch]; \ 127 if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \ 128 { \ 129 /* This is an illegal character. */ \ 130 STANDARD_FROM_LOOP_ERR_HANDLER (1); \ 131 } \ 132 else \ 133 { \ 134 put32 (outptr, res); \ 135 outptr += 4; \ 136 } \ 137 ++inptr; \ 138 } \ 139 else \ 140 { \ 141 const struct gap *rp2 = __ibm933db_to_ucs4_idx; \ 142 \ 143 assert (curcs == db); \ 144 \ 145 /* Use the IBM933 table for double byte. */ \ 146 if (__glibc_unlikely (inptr + 1 >= inend)) \ 147 { \ 148 /* The second character is not available. Store the \ 149 intermediate result. */ \ 150 result = __GCONV_INCOMPLETE_INPUT; \ 151 break; \ 152 } \ 153 \ 154 ch = (ch * 0x100) + inptr[1]; \ 155 while (ch > rp2->end) \ 156 ++rp2; \ 157 \ 158 if (__builtin_expect (rp2->start == 0xffff, 0) \ 159 || __builtin_expect (ch < rp2->start, 0) \ 160 || (res = __ibm933db_to_ucs4[ch + rp2->idx], \ 161 __builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \ 162 { \ 163 /* This is an illegal character. */ \ 164 STANDARD_FROM_LOOP_ERR_HANDLER (2); \ 165 } \ 166 else \ 167 { \ 168 put32 (outptr, res); \ 169 outptr += 4; \ 170 inptr += 2; \ 171 } \ 172 } \ 173 } 174 #define LOOP_NEED_FLAGS 175 #define EXTRA_LOOP_DECLS , int *curcsp 176 #define INIT_PARAMS int curcs = *curcsp & ~7 177 #define UPDATE_PARAMS *curcsp = curcs 178 #include <iconv/loop.c> 179 180 /* Next, define the other direction. */ 181 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM 182 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM 183 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO 184 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO 185 #define LOOPFCT TO_LOOP 186 #define BODY \ 187 { \ 188 uint32_t ch = get32 (inptr); \ 189 const struct gap *rp1 = __ucs4_to_ibm933sb_idx; \ 190 const struct gap *rp2 = __ucs4_to_ibm933db_idx; \ 191 \ 192 if (__glibc_unlikely (ch >= 0xffff)) \ 193 { \ 194 UNICODE_TAG_HANDLER (ch, 4); \ 195 \ 196 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 197 } \ 198 \ 199 while (ch > rp1->end) \ 200 ++rp1; \ 201 \ 202 /* Use the UCS4 table for single byte. */ \ 203 unsigned char sbconv; \ 204 if (__builtin_expect (ch < rp1->start, 0) \ 205 || (sbconv = __ucs4_to_ibm933sb[ch + rp1->idx], \ 206 __builtin_expect (sbconv, L'\1') == L'\0' && ch != '\0')) \ 207 { \ 208 /* Use the UCS4 table for double byte. */ \ 209 while (ch > rp2->end) \ 210 ++rp2; \ 211 \ 212 const char *cp; \ 213 if (__builtin_expect (ch < rp2->start, 0) \ 214 || (cp = __ucs4_to_ibm933db[ch + rp2->idx], \ 215 __builtin_expect (cp[0], L'\1')==L'\0' && ch != '\0')) \ 216 { \ 217 /* This is an illegal character. */ \ 218 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 219 } \ 220 else \ 221 { \ 222 if (curcs == sb) \ 223 { \ 224 if (__glibc_unlikely (outptr + 1 > outend)) \ 225 { \ 226 result = __GCONV_FULL_OUTPUT; \ 227 break; \ 228 } \ 229 *outptr++ = SO; \ 230 curcs = db; \ 231 } \ 232 \ 233 if (__glibc_unlikely (outptr + 2 > outend)) \ 234 { \ 235 result = __GCONV_FULL_OUTPUT; \ 236 break; \ 237 } \ 238 *outptr++ = cp[0]; \ 239 *outptr++ = cp[1]; \ 240 } \ 241 } \ 242 else \ 243 { \ 244 if (curcs == db) \ 245 { \ 246 if (__glibc_unlikely (outptr + 1 > outend)) \ 247 { \ 248 result = __GCONV_FULL_OUTPUT; \ 249 break; \ 250 } \ 251 *outptr++ = SI; \ 252 curcs = sb; \ 253 } \ 254 \ 255 if (__glibc_unlikely (outptr + 1 > outend)) \ 256 { \ 257 result = __GCONV_FULL_OUTPUT; \ 258 break; \ 259 } \ 260 *outptr++ = sbconv; \ 261 } \ 262 \ 263 /* Now that we wrote the output increment the input pointer. */ \ 264 inptr += 4; \ 265 } 266 #define LOOP_NEED_FLAGS 267 #define EXTRA_LOOP_DECLS , int *curcsp 268 #define INIT_PARAMS int curcs = *curcsp & ~7 269 #define REINIT_PARAMS curcs = *curcsp & ~7 270 #define UPDATE_PARAMS *curcsp = curcs 271 #include <iconv/loop.c> 272 273 /* Now define the toplevel functions. */ 274 #include <iconv/skeleton.c> 275