1 /* Conversion from and to TSCII. 2 Copyright (C) 2002-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 #include <dlfcn.h> 20 #include <stdint.h> 21 #include <gconv.h> 22 #include <assert.h> 23 24 /* TSCII is an 8-bit encoding consisting of: 25 0x00..0x7F: ASCII 26 0x80..0x90, 0x95..0x9F, 0xAB..0xFE: 27 Tamil letters and glyphs 28 0xA1..0xA5, 0xAA: Tamil combining letters (after the base character) 29 0xA6..0xA8: Tamil combining letters (before the base character) 30 0x91..0x94: Punctuation 31 0xA9: Symbols 32 */ 33 34 /* Definitions used in the body of the `gconv' function. */ 35 #define CHARSET_NAME "TSCII//" 36 #define FROM_LOOP from_tscii 37 #define TO_LOOP to_tscii 38 #define DEFINE_INIT 1 39 #define DEFINE_FINI 1 40 #define ONE_DIRECTION 0 41 #define FROM_LOOP_MIN_NEEDED_FROM 1 42 #define FROM_LOOP_MAX_NEEDED_FROM 2 43 #define FROM_LOOP_MIN_NEEDED_TO 4 44 #define FROM_LOOP_MAX_NEEDED_TO 16 45 #define TO_LOOP_MIN_NEEDED_FROM 4 46 #define TO_LOOP_MAX_NEEDED_FROM 4 47 #define TO_LOOP_MIN_NEEDED_TO 1 48 #define TO_LOOP_MAX_NEEDED_TO 3 49 #define PREPARE_LOOP \ 50 int saved_state; \ 51 int *statep = &data->__statep->__count; 52 #define EXTRA_LOOP_ARGS , statep 53 54 55 /* Since we might have to reset input pointer we must be able to save 56 and restore the state. */ 57 #define SAVE_RESET_STATE(Save) \ 58 if (Save) \ 59 saved_state = *statep; \ 60 else \ 61 *statep = saved_state 62 63 64 /* During TSCII to UCS-4 conversion, the COUNT element of the state contains 65 the last UCS-4 character to be output, shifted by 8 bits, and an encoded 66 representation of additional UCS-4 characters to be output (if any), 67 shifted by 4 bits. This character can be: 68 0x0000 Nothing pending. 69 0x0BCD Pending VIRAMA sign. If bit 3 is set, it may be 70 omitted if followed by a vowel sign U or UU. 71 0x0BC6, 0x0BC7, 0x0BC8 Pending vowel sign. Bit 3 is set after the 72 consonant was seen. 73 Other Bit 3 always cleared. */ 74 75 /* During UCS-4 to TSCII conversion, the COUNT element of the state contains 76 the last byte (or sometimes the last two bytes) to be output, shifted by 77 3 bits. This can be: 78 0x00 Nothing pending. 79 0xB8..0xC9, 0x83..0x86 A consonant. 80 0xEC, 0x8A A consonant with VIRAMA sign (final or joining). 81 0x87, 0xC38A Two consonants combined through a VIRAMA sign. */ 82 83 /* Since this is a stateful encoding we have to provide code which resets 84 the output state to the initial state. This has to be done during the 85 flushing. */ 86 #define EMIT_SHIFT_TO_INIT \ 87 if (data->__statep->__count != 0) \ 88 { \ 89 if (FROM_DIRECTION) \ 90 { \ 91 do \ 92 { \ 93 if (__glibc_unlikely (outbuf + 4 > outend)) \ 94 { \ 95 /* We don't have enough room in the output buffer. */ \ 96 status = __GCONV_FULL_OUTPUT; \ 97 break; \ 98 } \ 99 /* Write out the pending character. */ \ 100 *((uint32_t *) outbuf) = data->__statep->__count >> 8; \ 101 outbuf += sizeof (uint32_t); \ 102 /* Retrieve the successor state. */ \ 103 data->__statep->__count = \ 104 tscii_next_state[(data->__statep->__count >> 4) & 0x0f]; \ 105 } \ 106 while (data->__statep->__count != 0); \ 107 } \ 108 else \ 109 { \ 110 uint32_t last = data->__statep->__count >> 3; \ 111 if (__glibc_unlikely (last >> 8)) \ 112 { \ 113 /* Write out the last character, two bytes. */ \ 114 if (__glibc_likely (outbuf + 2 <= outend)) \ 115 { \ 116 *outbuf++ = last & 0xff; \ 117 *outbuf++ = (last >> 8) & 0xff; \ 118 data->__statep->__count = 0; \ 119 } \ 120 else \ 121 /* We don't have enough room in the output buffer. */ \ 122 status = __GCONV_FULL_OUTPUT; \ 123 } \ 124 else \ 125 { \ 126 /* Write out the last character, a single byte. */ \ 127 if (__glibc_likely (outbuf < outend)) \ 128 { \ 129 *outbuf++ = last & 0xff; \ 130 data->__statep->__count = 0; \ 131 } \ 132 else \ 133 /* We don't have enough room in the output buffer. */ \ 134 status = __GCONV_FULL_OUTPUT; \ 135 } \ 136 } \ 137 } 138 139 140 /* First define the conversion function from TSCII to UCS-4. */ 141 142 static const uint16_t tscii_to_ucs4[128][2] = 143 { 144 { 0x0BE6, 0 }, 145 { 0x0BE7, 0 }, 146 { 0, 0 }, /* 0x82 - maps to <U0BB8><U0BCD><U0BB0><U0BC0> */ 147 { 0x0B9C, 0 }, 148 { 0x0BB7, 0 }, 149 { 0x0BB8, 0 }, 150 { 0x0BB9, 0 }, 151 { 0, 0 }, /* 0x87 - maps to <U0B95><U0BCD><U0BB7> */ 152 { 0x0B9C, 0x0BCD }, 153 { 0x0BB7, 0x0BCD }, 154 { 0, 0 }, /* 0x8a - maps to <U0BB8> and buffers <U0BCD> */ 155 { 0, 0 }, /* 0x8b - maps to <U0BB9> and buffers <U0BCD> */ 156 { 0, 0 }, /* 0x8c - maps to <U0B95><U0BCD><U0BB7><U0BCD> */ 157 { 0x0BE8, 0 }, 158 { 0x0BE9, 0 }, 159 { 0x0BEA, 0 }, 160 { 0x0BEB, 0 }, 161 { 0x2018, 0 }, 162 { 0x2019, 0 }, 163 { 0x201C, 0 }, 164 { 0x201D, 0 }, 165 { 0x0BEC, 0 }, 166 { 0x0BED, 0 }, 167 { 0x0BEE, 0 }, 168 { 0x0BEF, 0 }, 169 { 0x0B99, 0x0BC1 }, 170 { 0x0B9E, 0x0BC1 }, 171 { 0x0B99, 0x0BC2 }, 172 { 0x0B9E, 0x0BC2 }, 173 { 0x0BF0, 0 }, 174 { 0x0BF1, 0 }, 175 { 0x0BF2, 0 }, 176 { 0, 0 }, /* 0xa0 - unmapped */ 177 { 0x0BBE, 0 }, 178 { 0x0BBF, 0 }, 179 { 0x0BC0, 0 }, 180 { 0x0BC1, 0 }, 181 { 0x0BC2, 0 }, 182 { 0, 0 }, /* 0xa6 - buffers <U0BC6> */ 183 { 0, 0 }, /* 0xa7 - buffers <U0BC7> */ 184 { 0, 0 }, /* 0xa8 - buffers <U0BC8> */ 185 { 0x00A9, 0 }, 186 { 0x0BD7, 0 }, 187 { 0x0B85, 0 }, 188 { 0x0B86, 0 }, 189 { 0x0B87, 0 }, 190 { 0x0B88, 0 }, 191 { 0x0B89, 0 }, 192 { 0x0B8A, 0 }, 193 { 0x0B8E, 0 }, 194 { 0x0B8F, 0 }, 195 { 0x0B90, 0 }, 196 { 0x0B92, 0 }, 197 { 0x0B93, 0 }, 198 { 0x0B94, 0 }, 199 { 0x0B83, 0 }, 200 { 0x0B95, 0 }, 201 { 0x0B99, 0 }, 202 { 0x0B9A, 0 }, 203 { 0x0B9E, 0 }, 204 { 0x0B9F, 0 }, 205 { 0x0BA3, 0 }, 206 { 0x0BA4, 0 }, 207 { 0x0BA8, 0 }, 208 { 0x0BAA, 0 }, 209 { 0x0BAE, 0 }, 210 { 0x0BAF, 0 }, 211 { 0x0BB0, 0 }, 212 { 0x0BB2, 0 }, 213 { 0x0BB5, 0 }, 214 { 0x0BB4, 0 }, 215 { 0x0BB3, 0 }, 216 { 0x0BB1, 0 }, 217 { 0x0BA9, 0 }, 218 { 0x0B9F, 0x0BBF }, 219 { 0x0B9F, 0x0BC0 }, 220 { 0x0B95, 0x0BC1 }, 221 { 0x0B9A, 0x0BC1 }, 222 { 0x0B9F, 0x0BC1 }, 223 { 0x0BA3, 0x0BC1 }, 224 { 0x0BA4, 0x0BC1 }, 225 { 0x0BA8, 0x0BC1 }, 226 { 0x0BAA, 0x0BC1 }, 227 { 0x0BAE, 0x0BC1 }, 228 { 0x0BAF, 0x0BC1 }, 229 { 0x0BB0, 0x0BC1 }, 230 { 0x0BB2, 0x0BC1 }, 231 { 0x0BB5, 0x0BC1 }, 232 { 0x0BB4, 0x0BC1 }, 233 { 0x0BB3, 0x0BC1 }, 234 { 0x0BB1, 0x0BC1 }, 235 { 0x0BA9, 0x0BC1 }, 236 { 0x0B95, 0x0BC2 }, 237 { 0x0B9A, 0x0BC2 }, 238 { 0x0B9F, 0x0BC2 }, 239 { 0x0BA3, 0x0BC2 }, 240 { 0x0BA4, 0x0BC2 }, 241 { 0x0BA8, 0x0BC2 }, 242 { 0x0BAA, 0x0BC2 }, 243 { 0x0BAE, 0x0BC2 }, 244 { 0x0BAF, 0x0BC2 }, 245 { 0x0BB0, 0x0BC2 }, 246 { 0x0BB2, 0x0BC2 }, 247 { 0x0BB5, 0x0BC2 }, 248 { 0x0BB4, 0x0BC2 }, 249 { 0x0BB3, 0x0BC2 }, 250 { 0x0BB1, 0x0BC2 }, 251 { 0x0BA9, 0x0BC2 }, 252 { 0x0B95, 0x0BCD }, 253 { 0x0B99, 0x0BCD }, 254 { 0x0B9A, 0x0BCD }, 255 { 0x0B9E, 0x0BCD }, 256 { 0x0B9F, 0x0BCD }, 257 { 0x0BA3, 0x0BCD }, 258 { 0x0BA4, 0x0BCD }, 259 { 0x0BA8, 0x0BCD }, 260 { 0x0BAA, 0x0BCD }, 261 { 0x0BAE, 0x0BCD }, 262 { 0x0BAF, 0x0BCD }, 263 { 0x0BB0, 0x0BCD }, 264 { 0x0BB2, 0x0BCD }, 265 { 0x0BB5, 0x0BCD }, 266 { 0x0BB4, 0x0BCD }, 267 { 0x0BB3, 0x0BCD }, 268 { 0x0BB1, 0x0BCD }, 269 { 0x0BA9, 0x0BCD }, 270 { 0x0B87, 0 }, 271 { 0, 0 } /* 0xff - unmapped */ 272 }; 273 274 static const uint32_t tscii_next_state[6] = 275 { 276 /* 0 means no more pending Unicode characters. */ 277 0, 278 /* 1 means <U0BB7>. */ 279 (0x0BB7 << 8), 280 /* 2 means <U0BC0>. */ 281 (0x0BC0 << 8), 282 /* 3 means <U0BCD>. */ 283 (0x0BCD << 8), 284 /* 4 means <U0BB0><U0BC0>. */ 285 (0x0BB0 << 8) + (2 << 4), 286 /* 5 means <U0BB7><U0BCD>. */ 287 (0x0BB7 << 8) + (3 << 4) 288 }; 289 290 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM 291 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM 292 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO 293 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO 294 #define LOOPFCT FROM_LOOP 295 #define BODY \ 296 { \ 297 uint32_t ch = *inptr; \ 298 \ 299 if ((*statep >> 8) != 0) \ 300 { \ 301 /* Attempt to combine the last character with this one. */ \ 302 uint32_t last = *statep >> 8; \ 303 \ 304 if (last == 0x0BCD && (*statep & (1 << 3))) \ 305 { \ 306 if (ch == 0xa4 || ch == 0xa5) \ 307 { \ 308 ch += 0xb1d; \ 309 /* Now ch = 0x0BC1 or ch = 0x0BC2. */ \ 310 put32 (outptr, ch); \ 311 outptr += 4; \ 312 *statep = 0; \ 313 inptr++; \ 314 continue; \ 315 } \ 316 } \ 317 else if (last >= 0x0BC6 && last <= 0x0BC8) \ 318 { \ 319 if ((last == 0x0BC6 && ch == 0xa1) \ 320 || (last == 0x0BC7 && (ch == 0xa1 || ch == 0xaa))) \ 321 { \ 322 ch = last + 4 + (ch != 0xa1); \ 323 /* Now ch = 0x0BCA or ch = 0x0BCB or ch = 0x0BCC. */ \ 324 put32 (outptr, ch); \ 325 outptr += 4; \ 326 *statep = 0; \ 327 inptr++; \ 328 continue; \ 329 } \ 330 if ((ch >= 0xb8 && ch <= 0xc9) && (*statep & (1 << 3)) == 0) \ 331 { \ 332 ch = tscii_to_ucs4[ch - 0x80][0]; \ 333 put32 (outptr, ch); \ 334 outptr += 4; \ 335 *statep |= 1 << 3; \ 336 inptr++; \ 337 continue; \ 338 } \ 339 } \ 340 \ 341 do \ 342 { \ 343 /* Output the buffered character. */ \ 344 put32 (outptr, last); \ 345 outptr += 4; \ 346 /* Retrieve the successor state. */ \ 347 *statep = tscii_next_state[(*statep >> 4) & 0x0f]; \ 348 } \ 349 while (*statep != 0 && __builtin_expect (outptr + 4 <= outend, 1)); \ 350 \ 351 if (*statep != 0) \ 352 { \ 353 /* We don't have enough room in the output buffer. \ 354 Tell the caller why we terminate the loop. */ \ 355 result = __GCONV_FULL_OUTPUT; \ 356 break; \ 357 } \ 358 \ 359 continue; \ 360 } \ 361 \ 362 if (ch < 0x80) \ 363 { \ 364 /* Plain ASCII character. */ \ 365 put32 (outptr, ch); \ 366 outptr += 4; \ 367 } \ 368 else \ 369 { \ 370 /* Tamil character. */ \ 371 uint32_t u1 = tscii_to_ucs4[ch - 0x80][0]; \ 372 \ 373 if (u1 != 0) \ 374 { \ 375 uint32_t u2 = tscii_to_ucs4[ch - 0x80][1]; \ 376 \ 377 inptr++; \ 378 \ 379 put32 (outptr, u1); \ 380 outptr += 4; \ 381 \ 382 if (u2 != 0) \ 383 { \ 384 /* See whether we have room for two characters. Otherwise \ 385 store only the first character now, and put the second \ 386 one into the queue. */ \ 387 if (__glibc_unlikely (outptr + 4 > outend)) \ 388 { \ 389 *statep = u2 << 8; \ 390 result = __GCONV_FULL_OUTPUT; \ 391 break; \ 392 } \ 393 put32 (outptr, u2); \ 394 outptr += 4; \ 395 } \ 396 continue; \ 397 } \ 398 /* Special handling of a few Tamil characters. */ \ 399 else if (ch == 0xa6 || ch == 0xa7 || ch == 0xa8) \ 400 { \ 401 ch += 0x0b20; \ 402 /* Now ch = 0x0BC6 or ch = 0x0BC7 or ch = 0x0BC8. */ \ 403 *statep = ch << 8; \ 404 inptr++; \ 405 continue; \ 406 } \ 407 else if (ch == 0x8a || ch == 0x8b) \ 408 { \ 409 ch += 0x0b2e; \ 410 /* Now ch = 0x0BB8 or ch = 0x0BB9. */ \ 411 put32 (outptr, ch); \ 412 outptr += 4; \ 413 *statep = (0x0BCD << 8) + (1 << 3); \ 414 inptr++; \ 415 continue; \ 416 } \ 417 else if (ch == 0x82) \ 418 { \ 419 /* Output <U0BB8><U0BCD><U0BB0><U0BC0>, if we have room for \ 420 four characters. */ \ 421 inptr++; \ 422 put32 (outptr, 0x0BB8); \ 423 outptr += 4; \ 424 if (__glibc_unlikely (outptr + 4 > outend)) \ 425 { \ 426 *statep = (0x0BCD << 8) + (4 << 4); \ 427 result = __GCONV_FULL_OUTPUT; \ 428 break; \ 429 } \ 430 put32 (outptr, 0x0BCD); \ 431 outptr += 4; \ 432 if (__glibc_unlikely (outptr + 4 > outend)) \ 433 { \ 434 *statep = (0x0BB0 << 8) + (2 << 4); \ 435 result = __GCONV_FULL_OUTPUT; \ 436 break; \ 437 } \ 438 put32 (outptr, 0x0BB0); \ 439 outptr += 4; \ 440 if (__glibc_unlikely (outptr + 4 > outend)) \ 441 { \ 442 *statep = (0x0BC0 << 8); \ 443 result = __GCONV_FULL_OUTPUT; \ 444 break; \ 445 } \ 446 put32 (outptr, 0x0BC0); \ 447 outptr += 4; \ 448 continue; \ 449 } \ 450 else if (ch == 0x87) \ 451 { \ 452 /* Output <U0B95><U0BCD><U0BB7>, if we have room for \ 453 three characters. */ \ 454 inptr++; \ 455 put32 (outptr, 0x0B95); \ 456 outptr += 4; \ 457 if (__glibc_unlikely (outptr + 4 > outend)) \ 458 { \ 459 *statep = (0x0BCD << 8) + (1 << 4); \ 460 result = __GCONV_FULL_OUTPUT; \ 461 break; \ 462 } \ 463 put32 (outptr, 0x0BCD); \ 464 outptr += 4; \ 465 if (__glibc_unlikely (outptr + 4 > outend)) \ 466 { \ 467 *statep = (0x0BB7 << 8); \ 468 result = __GCONV_FULL_OUTPUT; \ 469 break; \ 470 } \ 471 put32 (outptr, 0x0BB7); \ 472 outptr += 4; \ 473 continue; \ 474 } \ 475 else if (ch == 0x8c) \ 476 { \ 477 /* Output <U0B95><U0BCD><U0BB7><U0BCD>, if we have room for \ 478 four characters. */ \ 479 inptr++; \ 480 put32 (outptr, 0x0B95); \ 481 outptr += 4; \ 482 if (__glibc_unlikely (outptr + 4 > outend)) \ 483 { \ 484 *statep = (0x0BCD << 8) + (5 << 4); \ 485 result = __GCONV_FULL_OUTPUT; \ 486 break; \ 487 } \ 488 put32 (outptr, 0x0BCD); \ 489 outptr += 4; \ 490 if (__glibc_unlikely (outptr + 4 > outend)) \ 491 { \ 492 *statep = (0x0BB7 << 8) + (3 << 4); \ 493 result = __GCONV_FULL_OUTPUT; \ 494 break; \ 495 } \ 496 put32 (outptr, 0x0BB7); \ 497 outptr += 4; \ 498 if (__glibc_unlikely (outptr + 4 > outend)) \ 499 { \ 500 *statep = (0x0BCD << 8); \ 501 result = __GCONV_FULL_OUTPUT; \ 502 break; \ 503 } \ 504 put32 (outptr, 0x0BCD); \ 505 outptr += 4; \ 506 continue; \ 507 } \ 508 else \ 509 { \ 510 /* This is illegal. */ \ 511 STANDARD_FROM_LOOP_ERR_HANDLER (1); \ 512 } \ 513 } \ 514 \ 515 /* Now that we wrote the output increment the input pointer. */ \ 516 inptr++; \ 517 } 518 #define LOOP_NEED_FLAGS 519 #define EXTRA_LOOP_DECLS , int *statep 520 #include <iconv/loop.c> 521 522 523 /* Next, define the other direction, from UCS-4 to TSCII. */ 524 525 static const uint8_t ucs4_to_tscii[128] = 526 { 527 0, 0, 0, 0xb7, 0, 0xab, 0xac, 0xfe, /* 0x0B80..0x0B87 */ 528 0xae, 0xaf, 0xb0, 0, 0, 0, 0xb1, 0xb2, /* 0x0B88..0x0B8F */ 529 0xb3, 0, 0xb4, 0xb5, 0xb6, 0xb8, 0, 0, /* 0x0B90..0x0B97 */ 530 0, 0xb9, 0xba, 0, 0x83, 0, 0xbb, 0xbc, /* 0x0B98..0x0B9F */ 531 0, 0, 0, 0xbd, 0xbe, 0, 0, 0, /* 0x0BA0..0x0BA7 */ 532 0xbf, 0xc9, 0xc0, 0, 0, 0, 0xc1, 0xc2, /* 0x0BA8..0x0BAF */ 533 0xc3, 0xc8, 0xc4, 0xc7, 0xc6, 0xc5, 0, 0x84, /* 0x0BB0..0x0BB7 */ 534 0x85, 0x86, 0, 0, 0, 0, 0xa1, 0xa2, /* 0x0BB8..0x0BBF */ 535 0xa3, 0xa4, 0xa5, 0, 0, 0, 0xa6, 0xa7, /* 0x0BC0..0x0BC7 */ 536 0xa8, 0, 0, 0, 0, 0, 0, 0, /* 0x0BC8..0x0BCF */ 537 0, 0, 0, 0, 0, 0, 0, 0xaa, /* 0x0BD0..0x0BD7 */ 538 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0BD8..0x0BDF */ 539 0, 0, 0, 0, 0, 0, 0x80, 0x81, /* 0x0BE0..0x0BE7 */ 540 0x8d, 0x8e, 0x8f, 0x90, 0x95, 0x96, 0x97, 0x98, /* 0x0BE8..0x0BEF */ 541 0x9d, 0x9e, 0x9f, 0, 0, 0, 0, 0, /* 0x0BF0..0x0BF7 */ 542 0, 0, 0, 0, 0, 0, 0, 0 /* 0x0BF8..0x0BFF */ 543 }; 544 545 static const uint8_t consonant_with_u[18] = 546 { 547 0xcc, 0x99, 0xcd, 0x9a, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 548 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb 549 }; 550 551 static const uint8_t consonant_with_uu[18] = 552 { 553 0xdc, 0x9b, 0xdd, 0x9c, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 554 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb 555 }; 556 557 static const uint8_t consonant_with_virama[18] = 558 { 559 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 560 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd 561 }; 562 563 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM 564 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM 565 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO 566 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO 567 #define LOOPFCT TO_LOOP 568 #define BODY \ 569 { \ 570 uint32_t ch = get32 (inptr); \ 571 \ 572 if ((*statep >> 3) != 0) \ 573 { \ 574 /* Attempt to combine the last character with this one. */ \ 575 uint32_t last = *statep >> 3; \ 576 \ 577 if (last >= 0xb8 && last <= 0xc9) \ 578 { \ 579 if (ch == 0x0BC1) \ 580 { \ 581 *outptr++ = consonant_with_u[last - 0xb8]; \ 582 *statep = 0; \ 583 inptr += 4; \ 584 continue; \ 585 } \ 586 if (ch == 0x0BC2) \ 587 { \ 588 *outptr++ = consonant_with_uu[last - 0xb8]; \ 589 *statep = 0; \ 590 inptr += 4; \ 591 continue; \ 592 } \ 593 if (ch == 0x0BC6) \ 594 { \ 595 if (__glibc_likely (outptr + 2 <= outend)) \ 596 { \ 597 *outptr++ = 0xa6; \ 598 *outptr++ = last; \ 599 *statep = 0; \ 600 inptr += 4; \ 601 continue; \ 602 } \ 603 else \ 604 { \ 605 result = __GCONV_FULL_OUTPUT; \ 606 break; \ 607 } \ 608 } \ 609 if (ch == 0x0BC7) \ 610 { \ 611 if (__glibc_likely (outptr + 2 <= outend)) \ 612 { \ 613 *outptr++ = 0xa7; \ 614 *outptr++ = last; \ 615 *statep = 0; \ 616 inptr += 4; \ 617 continue; \ 618 } \ 619 else \ 620 { \ 621 result = __GCONV_FULL_OUTPUT; \ 622 break; \ 623 } \ 624 } \ 625 if (ch == 0x0BC8) \ 626 { \ 627 if (__glibc_likely (outptr + 2 <= outend)) \ 628 { \ 629 *outptr++ = 0xa8; \ 630 *outptr++ = last; \ 631 *statep = 0; \ 632 inptr += 4; \ 633 continue; \ 634 } \ 635 else \ 636 { \ 637 result = __GCONV_FULL_OUTPUT; \ 638 break; \ 639 } \ 640 } \ 641 if (ch == 0x0BCA) \ 642 { \ 643 if (__glibc_likely (outptr + 3 <= outend)) \ 644 { \ 645 *outptr++ = 0xa6; \ 646 *outptr++ = last; \ 647 *outptr++ = 0xa1; \ 648 *statep = 0; \ 649 inptr += 4; \ 650 continue; \ 651 } \ 652 else \ 653 { \ 654 result = __GCONV_FULL_OUTPUT; \ 655 break; \ 656 } \ 657 } \ 658 if (ch == 0x0BCB) \ 659 { \ 660 if (__glibc_likely (outptr + 3 <= outend)) \ 661 { \ 662 *outptr++ = 0xa7; \ 663 *outptr++ = last; \ 664 *outptr++ = 0xa1; \ 665 *statep = 0; \ 666 inptr += 4; \ 667 continue; \ 668 } \ 669 else \ 670 { \ 671 result = __GCONV_FULL_OUTPUT; \ 672 break; \ 673 } \ 674 } \ 675 if (ch == 0x0BCC) \ 676 { \ 677 if (__glibc_likely (outptr + 3 <= outend)) \ 678 { \ 679 *outptr++ = 0xa7; \ 680 *outptr++ = last; \ 681 *outptr++ = 0xaa; \ 682 *statep = 0; \ 683 inptr += 4; \ 684 continue; \ 685 } \ 686 else \ 687 { \ 688 result = __GCONV_FULL_OUTPUT; \ 689 break; \ 690 } \ 691 } \ 692 if (ch == 0x0BCD) \ 693 { \ 694 if (last != 0xb8) \ 695 { \ 696 *outptr++ = consonant_with_virama[last - 0xb8]; \ 697 *statep = 0; \ 698 } \ 699 else \ 700 *statep = 0xec << 3; \ 701 inptr += 4; \ 702 continue; \ 703 } \ 704 if (last == 0xbc && (ch == 0x0BBF || ch == 0x0BC0)) \ 705 { \ 706 *outptr++ = ch - 0x0af5; \ 707 *statep = 0; \ 708 inptr += 4; \ 709 continue; \ 710 } \ 711 } \ 712 else if (last >= 0x83 && last <= 0x86) \ 713 { \ 714 if (last >= 0x85 && (ch == 0x0BC1 || ch == 0x0BC2)) \ 715 { \ 716 *outptr++ = last + 5; \ 717 *statep = 0; \ 718 continue; \ 719 } \ 720 if (ch == 0x0BCD) \ 721 { \ 722 if (last != 0x85) \ 723 { \ 724 *outptr++ = last + 5; \ 725 *statep = 0; \ 726 } \ 727 else \ 728 *statep = 0x8a << 3; \ 729 inptr += 4; \ 730 continue; \ 731 } \ 732 } \ 733 else if (last == 0xec) \ 734 { \ 735 if (ch == 0x0BB7) \ 736 { \ 737 *statep = 0x87 << 3; \ 738 inptr += 4; \ 739 continue; \ 740 } \ 741 } \ 742 else if (last == 0x8a) \ 743 { \ 744 if (ch == 0x0BB0) \ 745 { \ 746 *statep = 0xc38a << 3; \ 747 inptr += 4; \ 748 continue; \ 749 } \ 750 } \ 751 else if (last == 0x87) \ 752 { \ 753 if (ch == 0x0BCD) \ 754 { \ 755 *outptr++ = 0x8c; \ 756 *statep = 0; \ 757 inptr += 4; \ 758 continue; \ 759 } \ 760 } \ 761 else \ 762 { \ 763 assert (last == 0xc38a); \ 764 if (ch == 0x0BC0) \ 765 { \ 766 *outptr++ = 0x82; \ 767 *statep = 0; \ 768 inptr += 4; \ 769 continue; \ 770 } \ 771 } \ 772 \ 773 /* Output the buffered character. */ \ 774 if (__glibc_unlikely (last >> 8)) \ 775 { \ 776 if (__glibc_likely (outptr + 2 <= outend)) \ 777 { \ 778 *outptr++ = last & 0xff; \ 779 *outptr++ = (last >> 8) & 0xff; \ 780 } \ 781 else \ 782 { \ 783 result = __GCONV_FULL_OUTPUT; \ 784 break; \ 785 } \ 786 } \ 787 else \ 788 *outptr++ = last & 0xff; \ 789 *statep = 0; \ 790 continue; \ 791 } \ 792 \ 793 if (ch < 0x80) \ 794 /* Plain ASCII character. */ \ 795 *outptr++ = ch; \ 796 else if (ch >= 0x0B80 && ch <= 0x0BFF) \ 797 { \ 798 /* Tamil character. */ \ 799 uint8_t t = ucs4_to_tscii[ch - 0x0B80]; \ 800 \ 801 if (t != 0) \ 802 { \ 803 if ((t >= 0xb8 && t <= 0xc9) || (t >= 0x83 && t <= 0x86)) \ 804 *statep = (uint32_t) t << 3; \ 805 else \ 806 *outptr++ = t; \ 807 } \ 808 else if (ch >= 0x0BCA && ch <= 0x0BCC) \ 809 { \ 810 /* See whether we have room for two bytes. */ \ 811 if (__glibc_likely (outptr + 2 <= outend)) \ 812 { \ 813 *outptr++ = (ch == 0x0BCA ? 0xa6 : 0xa7); \ 814 *outptr++ = (ch != 0x0BCC ? 0xa1 : 0xaa); \ 815 } \ 816 else \ 817 { \ 818 result = __GCONV_FULL_OUTPUT; \ 819 break; \ 820 } \ 821 } \ 822 else \ 823 { \ 824 /* Illegal character. */ \ 825 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 826 } \ 827 } \ 828 else if (ch == 0x00A9) \ 829 *outptr++ = ch; \ 830 else if (ch == 0x2018 || ch == 0x2019) \ 831 *outptr++ = ch - 0x1f87; \ 832 else if (ch == 0x201C || ch == 0x201D) \ 833 *outptr++ = ch - 0x1f89; \ 834 else \ 835 { \ 836 UNICODE_TAG_HANDLER (ch, 4); \ 837 \ 838 /* Illegal character. */ \ 839 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 840 } \ 841 \ 842 /* Now that we wrote the output increment the input pointer. */ \ 843 inptr += 4; \ 844 } 845 #define LOOP_NEED_FLAGS 846 #define EXTRA_LOOP_DECLS , int *statep 847 #include <iconv/loop.c> 848 849 850 /* Now define the toplevel functions. */ 851 #include <iconv/skeleton.c> 852