1 /* Conversion from and to IBM1364. 2 Copyright (C) 2005-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 #include <dlfcn.h> 20 #include <stdint.h> 21 #include <wchar.h> 22 #include <byteswap.h> 23 24 #ifndef CHARSET_NAME 25 /* This is really the IBM1364 converter, not another module sharing 26 the code. */ 27 # define DATA_HEADER "ibm1364.h" 28 # define CHARSET_NAME "IBM1364//" 29 # define FROM_LOOP from_ibm1364 30 # define TO_LOOP to_ibm1364 31 # define SB_TO_UCS4 __ibm1364sb_to_ucs4 32 # define DB_TO_UCS4_IDX __ibm1364db_to_ucs4_idx 33 # define DB_TO_UCS4 __ibm1364db_to_ucs4 34 # define UCS4_TO_SB_IDX __ucs4_to_ibm1364sb_idx 35 # define UCS4_TO_SB __ucs4_to_ibm1364sb 36 # define UCS4_TO_DB_IDX __ucs4_to_ibm1364db_idx 37 # define UCS4_TO_DB __ucs4_to_ibm1364db 38 # define UCS_LIMIT 0xffff 39 #endif 40 41 42 #include DATA_HEADER 43 44 /* The shift sequences for this charset (it does not use ESC). */ 45 #define SI 0x0F /* Shift In, host code to turn DBCS off. */ 46 #define SO 0x0E /* Shift Out, host code to turn DBCS on. */ 47 48 /* Definitions used in the body of the `gconv' function. */ 49 #define MIN_NEEDED_FROM 1 50 #define MAX_NEEDED_FROM 2 51 #define MIN_NEEDED_TO 4 52 #ifdef HAS_COMBINED 53 # define MAX_NEEDED_TO 8 54 #else 55 # define MAX_NEEDED_TO 4 56 #endif 57 #define ONE_DIRECTION 0 58 #define PREPARE_LOOP \ 59 int save_curcs; \ 60 int *curcsp = &data->__statep->__count; 61 #define EXTRA_LOOP_ARGS , curcsp 62 63 /* Definitions of initialization and destructor function. */ 64 #define DEFINE_INIT 1 65 #define DEFINE_FINI 1 66 67 68 /* Since this is a stateful encoding we have to provide code which resets 69 the output state to the initial state. This has to be done during the 70 flushing. */ 71 #define EMIT_SHIFT_TO_INIT \ 72 if ((data->__statep->__count & ~7) != sb) \ 73 { \ 74 if (FROM_DIRECTION) \ 75 data->__statep->__count &= 7; \ 76 else \ 77 { \ 78 /* We are not in the initial state. To switch back we have \ 79 to emit `SI'. */ \ 80 if (__glibc_unlikely (outbuf >= outend)) \ 81 /* We don't have enough room in the output buffer. */ \ 82 status = __GCONV_FULL_OUTPUT; \ 83 else \ 84 { \ 85 /* Write out the shift sequence. */ \ 86 *outbuf++ = SI; \ 87 data->__statep->__count &= 7; \ 88 } \ 89 } \ 90 } 91 92 93 /* Since we might have to reset input pointer we must be able to save 94 and retore the state. */ 95 #define SAVE_RESET_STATE(Save) \ 96 if (Save) \ 97 save_curcs = *curcsp; \ 98 else \ 99 *curcsp = save_curcs 100 101 102 /* Current codeset type. */ 103 enum 104 { 105 sb = 0, 106 db = 64 107 }; 108 109 110 /* Subroutine to write out converted UCS4 from IBM-13XX. */ 111 #ifdef HAS_COMBINED 112 # define SUB_COMBINED_UCS_FROM_IBM13XX \ 113 { \ 114 if (res != UCS_LIMIT || ch < __TO_UCS4_COMBINED_MIN \ 115 || ch > __TO_UCS4_COMBINED_MAX) \ 116 { \ 117 put32 (outptr, res); \ 118 outptr += 4; \ 119 } \ 120 else \ 121 { \ 122 /* This is a combined character. Make sure we have room. */ \ 123 if (__glibc_unlikely (outptr + 8 > outend)) \ 124 { \ 125 result = __GCONV_FULL_OUTPUT; \ 126 break; \ 127 } \ 128 \ 129 const struct divide *cmbp \ 130 = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN]; \ 131 assert (cmbp->res1 != 0 && cmbp->res2 != 0); \ 132 \ 133 put32 (outptr, cmbp->res1); \ 134 outptr += 4; \ 135 put32 (outptr, cmbp->res2); \ 136 outptr += 4; \ 137 } \ 138 } 139 #else 140 # define SUB_COMBINED_UCS_FROM_IBM13XX \ 141 { \ 142 put32 (outptr, res); \ 143 outptr += 4; \ 144 } 145 #endif /* HAS_COMBINED */ 146 147 148 /* First, define the conversion function from IBM-13XX to UCS4. */ 149 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM 150 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM 151 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO 152 #define MAX_NEEDED_OUTPUT MAX_NEEDED_TO 153 #define LOOPFCT FROM_LOOP 154 #define BODY \ 155 { \ 156 uint32_t ch = *inptr; \ 157 \ 158 if (__builtin_expect (ch, 0) == SO) \ 159 { \ 160 /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ 161 curcs = db; \ 162 ++inptr; \ 163 continue; \ 164 } \ 165 if (__builtin_expect (ch, 0) == SI) \ 166 { \ 167 /* Shift IN, change to SBCS converter (redundant escape okay). */ \ 168 curcs = sb; \ 169 ++inptr; \ 170 continue; \ 171 } \ 172 \ 173 if (curcs == sb) \ 174 { \ 175 /* Use the IBM13XX table for single byte. */ \ 176 uint32_t res = SB_TO_UCS4[ch]; \ 177 if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \ 178 { \ 179 /* This is an illegal character. */ \ 180 if (! ignore_errors_p ()) \ 181 { \ 182 result = __GCONV_ILLEGAL_INPUT; \ 183 break; \ 184 } \ 185 ++*irreversible; \ 186 } \ 187 else \ 188 { \ 189 put32 (outptr, res); \ 190 outptr += 4; \ 191 } \ 192 ++inptr; \ 193 } \ 194 else \ 195 { \ 196 assert (curcs == db); \ 197 \ 198 if (__glibc_unlikely (inptr + 1 >= inend)) \ 199 { \ 200 /* The second character is not available. Store the \ 201 intermediate result. */ \ 202 result = __GCONV_INCOMPLETE_INPUT; \ 203 break; \ 204 } \ 205 \ 206 ch = (ch * 0x100) + inptr[1]; \ 207 \ 208 /* Use the IBM1364 table for double byte. */ \ 209 const struct gap *rp2 = DB_TO_UCS4_IDX; \ 210 while (ch > rp2->end) \ 211 ++rp2; \ 212 \ 213 uint32_t res; \ 214 if (__builtin_expect (rp2->start == 0xffff, 0) \ 215 || __builtin_expect (ch < rp2->start, 0) \ 216 || (res = DB_TO_UCS4[ch + rp2->idx], \ 217 __builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \ 218 { \ 219 /* This is an illegal character. */ \ 220 if (! ignore_errors_p ()) \ 221 { \ 222 result = __GCONV_ILLEGAL_INPUT; \ 223 break; \ 224 } \ 225 ++*irreversible; \ 226 } \ 227 else \ 228 { \ 229 SUB_COMBINED_UCS_FROM_IBM13XX; \ 230 } \ 231 inptr += 2; \ 232 } \ 233 } 234 #define LOOP_NEED_FLAGS 235 #define EXTRA_LOOP_DECLS , int *curcsp 236 #define INIT_PARAMS int curcs = *curcsp & ~7 237 #define UPDATE_PARAMS *curcsp = curcs 238 #include <iconv/loop.c> 239 240 241 /* Subroutine to convert two UCS4 codes to IBM-13XX. */ 242 #ifdef HAS_COMBINED 243 # define SUB_COMBINED_UCS_TO_IBM13XX \ 244 { \ 245 const struct combine *cmbp = UCS4_COMB_TO_DB; \ 246 while (cmbp->res1 < ch) \ 247 ++cmbp; \ 248 /* XXX if last char is beginning of combining store in state */ \ 249 if (cmbp->res1 == ch && inptr + 4 < inend) \ 250 { \ 251 /* See if input is part of a combined character. */ \ 252 uint32_t ch_next = get32 (inptr + 4); \ 253 while (cmbp->res2 != ch_next) \ 254 { \ 255 ++cmbp; \ 256 if (cmbp->res1 != ch) \ 257 goto not_combined; \ 258 } \ 259 \ 260 /* It is a combined character. First make sure we are in \ 261 double byte mode. */ \ 262 if (curcs == sb) \ 263 { \ 264 /* We know there is room for at least one byte. */ \ 265 *outptr++ = SO; \ 266 curcs = db; \ 267 } \ 268 \ 269 if (__glibc_unlikely (outptr + 2 > outend)) \ 270 { \ 271 result = __GCONV_FULL_OUTPUT; \ 272 break; \ 273 } \ 274 *outptr++ = cmbp->ch[0]; \ 275 *outptr++ = cmbp->ch[1]; \ 276 inptr += 8; \ 277 continue; \ 278 \ 279 not_combined:; \ 280 } \ 281 } 282 #else 283 # define SUB_COMBINED_UCS_TO_IBM13XX 284 #endif /* HAS_COMBINED */ 285 286 287 /* Next, define the other direction. */ 288 #define MIN_NEEDED_INPUT MIN_NEEDED_TO 289 #define MAX_NEEDED_INPUT MAX_NEEDED_TO 290 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM 291 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM 292 #define LOOPFCT TO_LOOP 293 #define BODY \ 294 { \ 295 uint32_t ch = get32 (inptr); \ 296 \ 297 if (__glibc_unlikely (ch >= UCS_LIMIT)) \ 298 { \ 299 UNICODE_TAG_HANDLER (ch, 4); \ 300 \ 301 if (! ignore_errors_p ()) \ 302 { \ 303 result = __GCONV_ILLEGAL_INPUT; \ 304 break; \ 305 } \ 306 ++*irreversible; \ 307 inptr += 4; \ 308 continue; \ 309 } \ 310 \ 311 SUB_COMBINED_UCS_TO_IBM13XX; \ 312 \ 313 const struct gap *rp1 = UCS4_TO_SB_IDX; \ 314 while (ch > rp1->end) \ 315 ++rp1; \ 316 \ 317 /* Use the UCS4 table for single byte. */ \ 318 const char *cp; \ 319 if (__builtin_expect (ch < rp1->start, 0) \ 320 || (cp = UCS4_TO_SB[ch + rp1->idx], \ 321 __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \ 322 { \ 323 /* Use the UCS4 table for double byte. */ \ 324 const struct gap *rp2 = UCS4_TO_DB_IDX; \ 325 while (ch > rp2->end) \ 326 ++rp2; \ 327 \ 328 if (__builtin_expect (ch < rp2->start, 0) \ 329 || (cp = UCS4_TO_DB[ch + rp2->idx], \ 330 __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \ 331 { \ 332 /* This is an illegal character. */ \ 333 if (! ignore_errors_p ()) \ 334 { \ 335 result = __GCONV_ILLEGAL_INPUT; \ 336 break; \ 337 } \ 338 ++*irreversible; \ 339 } \ 340 else \ 341 { \ 342 if (curcs == sb) \ 343 { \ 344 /* We know there is room for at least one byte. */ \ 345 *outptr++ = SO; \ 346 curcs = db; \ 347 } \ 348 \ 349 if (__glibc_unlikely (outptr + 2 > outend)) \ 350 { \ 351 result = __GCONV_FULL_OUTPUT; \ 352 break; \ 353 } \ 354 *outptr++ = cp[0]; \ 355 *outptr++ = cp[1]; \ 356 } \ 357 } \ 358 else \ 359 { \ 360 if (__glibc_unlikely (curcs == db)) \ 361 { \ 362 /* We know there is room for at least one byte. */ \ 363 *outptr++ = SI; \ 364 curcs = sb; \ 365 \ 366 if (__glibc_unlikely (outptr >= outend)) \ 367 { \ 368 result = __GCONV_FULL_OUTPUT; \ 369 break; \ 370 } \ 371 } \ 372 \ 373 *outptr++ = cp[0]; \ 374 } \ 375 \ 376 /* Now that we wrote the output increment the input pointer. */ \ 377 inptr += 4; \ 378 } 379 #define LOOP_NEED_FLAGS 380 #define EXTRA_LOOP_DECLS , int *curcsp 381 #define INIT_PARAMS int curcs = *curcsp & ~7 382 #define REINIT_PARAMS curcs = *curcsp & ~7 383 #define UPDATE_PARAMS *curcsp = curcs 384 #include <iconv/loop.c> 385 386 /* Now define the toplevel functions. */ 387 #include <iconv/skeleton.c> 388