1 /* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2 Copyright (C) 1998-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <dlfcn.h>
21 #include <gconv.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include "jis0201.h"
26 #include "jis0208.h"
27 #include "jis0212.h"
28 #include "gb2312.h"
29 #include "ksc5601.h"
30
31 struct gap
32 {
33 uint16_t start;
34 uint16_t end;
35 int32_t idx;
36 };
37
38 #include "iso8859-7jp.h"
39
40 /* This makes obvious what everybody knows: 0x1b is the Esc character. */
41 #define ESC 0x1b
42
43 /* We provide our own initialization and destructor function. */
44 #define DEFINE_INIT 0
45 #define DEFINE_FINI 0
46
47 /* Definitions used in the body of the `gconv' function. */
48 #define FROM_LOOP from_iso2022jp_loop
49 #define TO_LOOP to_iso2022jp_loop
50 #define ONE_DIRECTION 0
51 #define FROM_LOOP_MIN_NEEDED_FROM 1
52 #define FROM_LOOP_MAX_NEEDED_FROM 4
53 #define FROM_LOOP_MIN_NEEDED_TO 4
54 #define FROM_LOOP_MAX_NEEDED_TO 4
55 #define TO_LOOP_MIN_NEEDED_FROM 4
56 #define TO_LOOP_MAX_NEEDED_FROM 4
57 #define TO_LOOP_MIN_NEEDED_TO 1
58 #define TO_LOOP_MAX_NEEDED_TO 6
59 #define FROM_DIRECTION (dir == from_iso2022jp)
60 #define PREPARE_LOOP \
61 enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
62 enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
63 int save_set; \
64 int *setp = &data->__statep->__count;
65 #define EXTRA_LOOP_ARGS , var, setp
66
67
68 /* Direction of the transformation. */
69 enum direction
70 {
71 illegal_dir,
72 to_iso2022jp,
73 from_iso2022jp
74 };
75
76 /* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
77 enum variant
78 {
79 illegal_var,
80 iso2022jp,
81 iso2022jp2
82 };
83
84
85 struct iso2022jp_data
86 {
87 enum direction dir;
88 enum variant var;
89 };
90
91
92 /* The COUNT element of the state keeps track of the currently selected
93 character set. The possible values are: */
94 enum
95 {
96 ASCII_set = 0,
97 JISX0208_1978_set = 1 << 3,
98 JISX0208_1983_set = 2 << 3,
99 JISX0201_Roman_set = 3 << 3,
100 JISX0201_Kana_set = 4 << 3,
101 GB2312_set = 5 << 3,
102 KSC5601_set = 6 << 3,
103 JISX0212_set = 7 << 3,
104 CURRENT_SEL_MASK = 7 << 3
105 };
106
107 /* The second value stored is the designation of the G2 set. The following
108 values are possible: */
109 enum
110 {
111 UNSPECIFIED_set = 0,
112 ISO88591_set = 1 << 6,
113 ISO88597_set = 2 << 6,
114 CURRENT_ASSIGN_MASK = 3 << 6
115 };
116
117 /* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
118 describes the language tag parsing status. The possible values are as
119 follows. Values >= TAG_language are temporary tag parsing states. */
120 enum
121 {
122 TAG_none = 0,
123 TAG_language = 4 << 8,
124 TAG_language_j = 5 << 8,
125 TAG_language_ja = 1 << 8,
126 TAG_language_k = 6 << 8,
127 TAG_language_ko = 2 << 8,
128 TAG_language_z = 7 << 8,
129 TAG_language_zh = 3 << 8,
130 CURRENT_TAG_MASK = 7 << 8
131 };
132
133
134 extern int gconv_init (struct __gconv_step *step);
135 int
gconv_init(struct __gconv_step * step)136 gconv_init (struct __gconv_step *step)
137 {
138 /* Determine which direction. */
139 struct iso2022jp_data *new_data;
140 enum direction dir = illegal_dir;
141 enum variant var = illegal_var;
142 int result;
143
144 if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
145 {
146 dir = from_iso2022jp;
147 var = iso2022jp;
148 }
149 else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
150 {
151 dir = to_iso2022jp;
152 var = iso2022jp;
153 }
154 else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
155 {
156 dir = from_iso2022jp;
157 var = iso2022jp2;
158 }
159 else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
160 {
161 dir = to_iso2022jp;
162 var = iso2022jp2;
163 }
164
165 result = __GCONV_NOCONV;
166 if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
167 {
168 new_data
169 = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
170
171 result = __GCONV_NOMEM;
172 if (new_data != NULL)
173 {
174 new_data->dir = dir;
175 new_data->var = var;
176 step->__data = new_data;
177
178 if (dir == from_iso2022jp)
179 {
180 step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
181 step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
182 step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
183 step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
184 }
185 else
186 {
187 step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
188 step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
189 step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
190 step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
191 }
192
193 /* Yes, this is a stateful encoding. */
194 step->__stateful = 1;
195
196 result = __GCONV_OK;
197 }
198 }
199
200 return result;
201 }
202
203
204 extern void gconv_end (struct __gconv_step *data);
205 void
gconv_end(struct __gconv_step * data)206 gconv_end (struct __gconv_step *data)
207 {
208 free (data->__data);
209 }
210
211
212 /* Since this is a stateful encoding we have to provide code which resets
213 the output state to the initial state. This has to be done during the
214 flushing. */
215 #define EMIT_SHIFT_TO_INIT \
216 /* Avoid warning about unused variable 'var'. */ \
217 (void) var; \
218 \
219 if ((data->__statep->__count & ~7) != ASCII_set) \
220 { \
221 if (dir == from_iso2022jp \
222 || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
223 { \
224 /* It's easy, we don't have to emit anything, we just reset the \
225 state for the input. Note that this also clears the G2 \
226 designation. */ \
227 data->__statep->__count &= 7; \
228 data->__statep->__count |= ASCII_set; \
229 } \
230 else \
231 { \
232 /* We are not in the initial state. To switch back we have \
233 to emit the sequence `Esc ( B'. */ \
234 if (__glibc_unlikely (outbuf + 3 > outend)) \
235 /* We don't have enough room in the output buffer. */ \
236 status = __GCONV_FULL_OUTPUT; \
237 else \
238 { \
239 /* Write out the shift sequence. */ \
240 *outbuf++ = ESC; \
241 *outbuf++ = '('; \
242 *outbuf++ = 'B'; \
243 /* Note that this also clears the G2 designation. */ \
244 data->__statep->__count &= 7; \
245 data->__statep->__count |= ASCII_set; \
246 } \
247 } \
248 }
249
250
251 /* Since we might have to reset input pointer we must be able to save
252 and retore the state. */
253 #define SAVE_RESET_STATE(Save) \
254 if (Save) \
255 save_set = *setp; \
256 else \
257 *setp = save_set
258
259
260 /* First define the conversion function from ISO-2022-JP to UCS4. */
261 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
262 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
263 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
264 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
265 #define LOOPFCT FROM_LOOP
266 #define BODY \
267 { \
268 uint32_t ch = *inptr; \
269 \
270 /* Recognize escape sequences. */ \
271 if (__builtin_expect (ch, 0) == ESC) \
272 { \
273 /* We now must be prepared to read two to three more \
274 characters. If we have a match in the first character but \
275 then the input buffer ends we terminate with an error since \
276 we must not risk missing an escape sequence just because it \
277 is not entirely in the current input buffer. */ \
278 if (__builtin_expect (inptr + 2 >= inend, 0) \
279 || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
280 && __builtin_expect (inptr + 3 >= inend, 0))) \
281 { \
282 /* Not enough input available. */ \
283 result = __GCONV_INCOMPLETE_INPUT; \
284 break; \
285 } \
286 \
287 if (inptr[1] == '(') \
288 { \
289 if (inptr[2] == 'B') \
290 { \
291 /* ASCII selected. */ \
292 set = ASCII_set; \
293 inptr += 3; \
294 continue; \
295 } \
296 else if (inptr[2] == 'J') \
297 { \
298 /* JIS X 0201 selected. */ \
299 set = JISX0201_Roman_set; \
300 inptr += 3; \
301 continue; \
302 } \
303 else if (var == iso2022jp2 && inptr[2] == 'I') \
304 { \
305 /* JIS X 0201 selected. */ \
306 set = JISX0201_Kana_set; \
307 inptr += 3; \
308 continue; \
309 } \
310 } \
311 else if (inptr[1] == '$') \
312 { \
313 if (inptr[2] == '@') \
314 { \
315 /* JIS X 0208-1978 selected. */ \
316 set = JISX0208_1978_set; \
317 inptr += 3; \
318 continue; \
319 } \
320 else if (inptr[2] == 'B') \
321 { \
322 /* JIS X 0208-1983 selected. */ \
323 set = JISX0208_1983_set; \
324 inptr += 3; \
325 continue; \
326 } \
327 else if (var == iso2022jp2) \
328 { \
329 if (inptr[2] == 'A') \
330 { \
331 /* GB 2312-1980 selected. */ \
332 set = GB2312_set; \
333 inptr += 3; \
334 continue; \
335 } \
336 else if (inptr[2] == '(') \
337 { \
338 if (inptr[3] == 'C') \
339 { \
340 /* KSC 5601-1987 selected. */ \
341 set = KSC5601_set; \
342 inptr += 4; \
343 continue; \
344 } \
345 else if (inptr[3] == 'D') \
346 { \
347 /* JIS X 0212-1990 selected. */ \
348 set = JISX0212_set; \
349 inptr += 4; \
350 continue; \
351 } \
352 } \
353 } \
354 } \
355 else if (var == iso2022jp2 && inptr[1] == '.') \
356 { \
357 if (inptr[2] == 'A') \
358 { \
359 /* ISO 8859-1-GR selected. */ \
360 set2 = ISO88591_set; \
361 inptr += 3; \
362 continue; \
363 } \
364 else if (inptr[2] == 'F') \
365 { \
366 /* ISO 8859-7-GR selected. */ \
367 set2 = ISO88597_set; \
368 inptr += 3; \
369 continue; \
370 } \
371 } \
372 } \
373 \
374 if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
375 { \
376 if (set2 == ISO88591_set) \
377 { \
378 ch = inptr[2] | 0x80; \
379 inptr += 3; \
380 } \
381 else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
382 { \
383 /* We use the table from the ISO 8859-7 module. */ \
384 if (inptr[2] < 0x20 || inptr[2] >= 0x80) \
385 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
386 ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
387 if (ch == 0) \
388 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
389 inptr += 3; \
390 } \
391 else \
392 { \
393 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
394 } \
395 } \
396 else if (ch >= 0x80) \
397 { \
398 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
399 } \
400 else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
401 /* Almost done, just advance the input pointer. */ \
402 ++inptr; \
403 else if (set == JISX0201_Roman_set) \
404 { \
405 /* Use the JIS X 0201 table. */ \
406 ch = jisx0201_to_ucs4 (ch); \
407 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
408 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
409 ++inptr; \
410 } \
411 else if (set == JISX0201_Kana_set) \
412 { \
413 /* Use the JIS X 0201 table. */ \
414 ch = jisx0201_to_ucs4 (ch + 0x80); \
415 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
416 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
417 ++inptr; \
418 } \
419 else \
420 { \
421 if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
422 /* XXX I don't have the tables for these two old variants of \
423 JIS X 0208. Therefore I'm using the tables for JIS X \
424 0208-1990. If somebody has problems with this please \
425 provide the appropriate tables. */ \
426 ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
427 else if (set == JISX0212_set) \
428 /* Use the JIS X 0212 table. */ \
429 ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
430 else if (set == GB2312_set) \
431 /* Use the GB 2312 table. */ \
432 ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
433 else \
434 { \
435 assert (set == KSC5601_set); \
436 \
437 /* Use the KSC 5601 table. */ \
438 ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
439 } \
440 \
441 if (__glibc_unlikely (ch == 0)) \
442 { \
443 result = __GCONV_INCOMPLETE_INPUT; \
444 break; \
445 } \
446 else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
447 { \
448 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
449 } \
450 } \
451 \
452 put32 (outptr, ch); \
453 outptr += 4; \
454 }
455 #define LOOP_NEED_FLAGS
456 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
457 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
458 int set2 = *setp & CURRENT_ASSIGN_MASK
459 #define UPDATE_PARAMS *setp = set | set2
460 #include <iconv/loop.c>
461
462
463 /* Next, define the other direction. */
464
465 enum conversion { none = 0, european, japanese, chinese, korean, other };
466
467 /* A datatype for conversion lists. */
468 typedef unsigned int cvlist_t;
469 #define CVLIST(cv1, cv2, cv3, cv4, cv5) \
470 ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
471 #define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
472 #define CVLIST_REST(cvl) ((cvl) >> 3)
473 static const cvlist_t conversion_lists[4] =
474 {
475 /* TAG_none */ CVLIST (japanese, european, chinese, korean, other),
476 /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
477 /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
478 /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
479 };
480
481 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
482 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
483 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
484 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
485 #define LOOPFCT TO_LOOP
486 #define BODY \
487 { \
488 uint32_t ch; \
489 size_t written; \
490 \
491 ch = get32 (inptr); \
492 \
493 if (var == iso2022jp2) \
494 { \
495 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
496 if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
497 { \
498 ch &= 0x7f; \
499 if (ch >= 'A' && ch <= 'Z') \
500 ch += 'a' - 'A'; \
501 if (ch == 0x01) \
502 tag = TAG_language; \
503 else if (ch == 'j' && tag == TAG_language) \
504 tag = TAG_language_j; \
505 else if (ch == 'a' && tag == TAG_language_j) \
506 tag = TAG_language_ja; \
507 else if (ch == 'k' && tag == TAG_language) \
508 tag = TAG_language_k; \
509 else if (ch == 'o' && tag == TAG_language_k) \
510 tag = TAG_language_ko; \
511 else if (ch == 'z' && tag == TAG_language) \
512 tag = TAG_language_z; \
513 else if (ch == 'h' && tag == TAG_language_z) \
514 tag = TAG_language_zh; \
515 else if (ch == 0x7f) \
516 tag = TAG_none; \
517 else \
518 { \
519 /* Other tag characters reset the tag parsing state (if the \
520 current state is a temporary state) or are ignored (if \
521 the current state is a stable one). */ \
522 if (tag >= TAG_language) \
523 tag = TAG_none; \
524 } \
525 \
526 inptr += 4; \
527 continue; \
528 } \
529 \
530 /* Non-tag characters reset the tag parsing state, if the current \
531 state is a temporary state. */ \
532 if (__glibc_unlikely (tag >= TAG_language)) \
533 tag = TAG_none; \
534 } \
535 \
536 /* First see whether we can write the character using the currently \
537 selected character set. But ignore the selected character set if \
538 the current language tag shows different preferences. */ \
539 if (set == ASCII_set) \
540 { \
541 /* Please note that the NUL byte is *not* matched if we are not \
542 currently using the ASCII charset. This is because we must \
543 switch to the initial state whenever a NUL byte is written. */ \
544 if (ch <= 0x7f) \
545 { \
546 *outptr++ = ch; \
547 written = 1; \
548 \
549 /* At the beginning of a line, G2 designation is cleared. */ \
550 if (var == iso2022jp2 && ch == 0x0a) \
551 set2 = UNSPECIFIED_set; \
552 } \
553 else \
554 written = __UNKNOWN_10646_CHAR; \
555 } \
556 /* ISO-2022-JP recommends to encode the newline character always in \
557 ASCII since this allows a context-free interpretation of the \
558 characters at the beginning of the next line. Otherwise it would \
559 have to be known whether the last line ended using ASCII or \
560 JIS X 0201. */ \
561 else if (set == JISX0201_Roman_set \
562 && (__builtin_expect (tag == TAG_none, 1) \
563 || tag == TAG_language_ja)) \
564 { \
565 unsigned char buf[1]; \
566 written = ucs4_to_jisx0201 (ch, buf); \
567 if (written != __UNKNOWN_10646_CHAR) \
568 { \
569 if (buf[0] > 0x20 && buf[0] < 0x80) \
570 { \
571 *outptr++ = buf[0]; \
572 written = 1; \
573 } \
574 else \
575 written = __UNKNOWN_10646_CHAR; \
576 } \
577 } \
578 else if (set == JISX0201_Kana_set \
579 && (__builtin_expect (tag == TAG_none, 1) \
580 || tag == TAG_language_ja)) \
581 { \
582 unsigned char buf[1]; \
583 written = ucs4_to_jisx0201 (ch, buf); \
584 if (written != __UNKNOWN_10646_CHAR) \
585 { \
586 if (buf[0] > 0xa0 && buf[0] < 0xe0) \
587 { \
588 *outptr++ = buf[0] - 0x80; \
589 written = 1; \
590 } \
591 else \
592 written = __UNKNOWN_10646_CHAR; \
593 } \
594 } \
595 else \
596 { \
597 if ((set == JISX0208_1978_set || set == JISX0208_1983_set) \
598 && (__builtin_expect (tag == TAG_none, 1) \
599 || tag == TAG_language_ja)) \
600 written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
601 else if (set == JISX0212_set \
602 && (__builtin_expect (tag == TAG_none, 1) \
603 || tag == TAG_language_ja)) \
604 written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
605 else if (set == GB2312_set \
606 && (__builtin_expect (tag == TAG_none, 1) \
607 || tag == TAG_language_zh)) \
608 written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
609 else if (set == KSC5601_set \
610 && (__builtin_expect (tag == TAG_none, 1) \
611 || tag == TAG_language_ko)) \
612 written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
613 else \
614 written = __UNKNOWN_10646_CHAR; \
615 \
616 if (__glibc_unlikely (written == 0)) \
617 { \
618 result = __GCONV_FULL_OUTPUT; \
619 break; \
620 } \
621 else if (written != __UNKNOWN_10646_CHAR) \
622 outptr += written; \
623 } \
624 \
625 if (written == __UNKNOWN_10646_CHAR \
626 && __builtin_expect (tag == TAG_none, 1)) \
627 { \
628 if (set2 == ISO88591_set) \
629 { \
630 if (ch >= 0x80 && ch <= 0xff) \
631 { \
632 if (__glibc_unlikely (outptr + 3 > outend)) \
633 { \
634 result = __GCONV_FULL_OUTPUT; \
635 break; \
636 } \
637 \
638 *outptr++ = ESC; \
639 *outptr++ = 'N'; \
640 *outptr++ = ch & 0x7f; \
641 written = 3; \
642 } \
643 } \
644 else if (set2 == ISO88597_set) \
645 { \
646 if (__glibc_likely (ch < 0xffff)) \
647 { \
648 const struct gap *rp = from_idx; \
649 \
650 while (ch > rp->end) \
651 ++rp; \
652 if (ch >= rp->start) \
653 { \
654 unsigned char res = \
655 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
656 if (res != '\0') \
657 { \
658 if (__glibc_unlikely (outptr + 3 > outend)) \
659 { \
660 result = __GCONV_FULL_OUTPUT; \
661 break; \
662 } \
663 \
664 *outptr++ = ESC; \
665 *outptr++ = 'N'; \
666 *outptr++ = res & 0x7f; \
667 written = 3; \
668 } \
669 } \
670 } \
671 } \
672 } \
673 \
674 if (written == __UNKNOWN_10646_CHAR) \
675 { \
676 /* The attempts to use the currently selected character set \
677 failed, either because the language tag changed, or because \
678 the character requires a different character set, or because \
679 the character is unknown. \
680 The CJK character sets partially overlap when seen as subsets \
681 of ISO 10646; therefore there is no single correct result. \
682 We use a preferrence order which depends on the language tag. */ \
683 \
684 if (ch <= 0x7f) \
685 { \
686 /* We must encode using ASCII. First write out the \
687 escape sequence. */ \
688 if (__glibc_unlikely (outptr + 3 > outend)) \
689 { \
690 result = __GCONV_FULL_OUTPUT; \
691 break; \
692 } \
693 \
694 *outptr++ = ESC; \
695 *outptr++ = '('; \
696 *outptr++ = 'B'; \
697 set = ASCII_set; \
698 \
699 if (__glibc_unlikely (outptr + 1 > outend)) \
700 { \
701 result = __GCONV_FULL_OUTPUT; \
702 break; \
703 } \
704 *outptr++ = ch; \
705 \
706 /* At the beginning of a line, G2 designation is cleared. */ \
707 if (var == iso2022jp2 && ch == 0x0a) \
708 set2 = UNSPECIFIED_set; \
709 } \
710 else \
711 { \
712 /* Now it becomes difficult. We must search the other \
713 character sets one by one. Use an ordered conversion \
714 list that depends on the current language tag. */ \
715 cvlist_t conversion_list; \
716 unsigned char buf[2]; \
717 int res = __GCONV_ILLEGAL_INPUT; \
718 \
719 if (var == iso2022jp2) \
720 conversion_list = conversion_lists[tag >> 8]; \
721 else \
722 conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
723 \
724 do \
725 switch (CVLIST_FIRST (conversion_list)) \
726 { \
727 case european: \
728 \
729 /* Try ISO 8859-1 upper half. */ \
730 if (ch >= 0x80 && ch <= 0xff) \
731 { \
732 if (set2 != ISO88591_set) \
733 { \
734 if (__builtin_expect (outptr + 3 > outend, 0)) \
735 { \
736 res = __GCONV_FULL_OUTPUT; \
737 break; \
738 } \
739 *outptr++ = ESC; \
740 *outptr++ = '.'; \
741 *outptr++ = 'A'; \
742 set2 = ISO88591_set; \
743 } \
744 \
745 if (__glibc_unlikely (outptr + 3 > outend)) \
746 { \
747 res = __GCONV_FULL_OUTPUT; \
748 break; \
749 } \
750 *outptr++ = ESC; \
751 *outptr++ = 'N'; \
752 *outptr++ = ch - 0x80; \
753 res = __GCONV_OK; \
754 break; \
755 } \
756 \
757 /* Try ISO 8859-7 upper half. */ \
758 if (__glibc_likely (ch < 0xffff)) \
759 { \
760 const struct gap *rp = from_idx; \
761 \
762 while (ch > rp->end) \
763 ++rp; \
764 if (ch >= rp->start) \
765 { \
766 unsigned char ch2 = \
767 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
768 if (ch2 != '\0') \
769 { \
770 if (set2 != ISO88597_set) \
771 { \
772 if (__builtin_expect (outptr + 3 > outend, \
773 0)) \
774 { \
775 res = __GCONV_FULL_OUTPUT; \
776 break; \
777 } \
778 *outptr++ = ESC; \
779 *outptr++ = '.'; \
780 *outptr++ = 'F'; \
781 set2 = ISO88597_set; \
782 } \
783 \
784 if (__builtin_expect (outptr + 3 > outend, 0)) \
785 { \
786 res = __GCONV_FULL_OUTPUT; \
787 break; \
788 } \
789 *outptr++ = ESC; \
790 *outptr++ = 'N'; \
791 *outptr++ = ch2 - 0x80; \
792 res = __GCONV_OK; \
793 break; \
794 } \
795 } \
796 } \
797 \
798 break; \
799 \
800 case japanese: \
801 \
802 /* Try JIS X 0201 Roman. */ \
803 written = ucs4_to_jisx0201 (ch, buf); \
804 if (written != __UNKNOWN_10646_CHAR \
805 && buf[0] > 0x20 && buf[0] < 0x80) \
806 { \
807 if (set != JISX0201_Roman_set) \
808 { \
809 if (__builtin_expect (outptr + 3 > outend, 0)) \
810 { \
811 res = __GCONV_FULL_OUTPUT; \
812 break; \
813 } \
814 *outptr++ = ESC; \
815 *outptr++ = '('; \
816 *outptr++ = 'J'; \
817 set = JISX0201_Roman_set; \
818 } \
819 \
820 if (__glibc_unlikely (outptr + 1 > outend)) \
821 { \
822 res = __GCONV_FULL_OUTPUT; \
823 break; \
824 } \
825 *outptr++ = buf[0]; \
826 res = __GCONV_OK; \
827 break; \
828 } \
829 \
830 /* Try JIS X 0208. */ \
831 written = ucs4_to_jisx0208 (ch, buf, 2); \
832 if (written != __UNKNOWN_10646_CHAR) \
833 { \
834 if (set != JISX0208_1983_set) \
835 { \
836 if (__builtin_expect (outptr + 3 > outend, 0)) \
837 { \
838 res = __GCONV_FULL_OUTPUT; \
839 break; \
840 } \
841 *outptr++ = ESC; \
842 *outptr++ = '$'; \
843 *outptr++ = 'B'; \
844 set = JISX0208_1983_set; \
845 } \
846 \
847 if (__glibc_unlikely (outptr + 2 > outend)) \
848 { \
849 res = __GCONV_FULL_OUTPUT; \
850 break; \
851 } \
852 *outptr++ = buf[0]; \
853 *outptr++ = buf[1]; \
854 res = __GCONV_OK; \
855 break; \
856 } \
857 \
858 if (__glibc_unlikely (var == iso2022jp)) \
859 /* Don't use the other Japanese character sets. */ \
860 break; \
861 \
862 /* Try JIS X 0212. */ \
863 written = ucs4_to_jisx0212 (ch, buf, 2); \
864 if (written != __UNKNOWN_10646_CHAR) \
865 { \
866 if (set != JISX0212_set) \
867 { \
868 if (__builtin_expect (outptr + 4 > outend, 0)) \
869 { \
870 res = __GCONV_FULL_OUTPUT; \
871 break; \
872 } \
873 *outptr++ = ESC; \
874 *outptr++ = '$'; \
875 *outptr++ = '('; \
876 *outptr++ = 'D'; \
877 set = JISX0212_set; \
878 } \
879 \
880 if (__glibc_unlikely (outptr + 2 > outend)) \
881 { \
882 res = __GCONV_FULL_OUTPUT; \
883 break; \
884 } \
885 *outptr++ = buf[0]; \
886 *outptr++ = buf[1]; \
887 res = __GCONV_OK; \
888 break; \
889 } \
890 \
891 break; \
892 \
893 case chinese: \
894 assert (var == iso2022jp2); \
895 \
896 /* Try GB 2312. */ \
897 written = ucs4_to_gb2312 (ch, buf, 2); \
898 if (written != __UNKNOWN_10646_CHAR) \
899 { \
900 if (set != GB2312_set) \
901 { \
902 if (__builtin_expect (outptr + 3 > outend, 0)) \
903 { \
904 res = __GCONV_FULL_OUTPUT; \
905 break; \
906 } \
907 *outptr++ = ESC; \
908 *outptr++ = '$'; \
909 *outptr++ = 'A'; \
910 set = GB2312_set; \
911 } \
912 \
913 if (__glibc_unlikely (outptr + 2 > outend)) \
914 { \
915 res = __GCONV_FULL_OUTPUT; \
916 break; \
917 } \
918 *outptr++ = buf[0]; \
919 *outptr++ = buf[1]; \
920 res = __GCONV_OK; \
921 break; \
922 } \
923 \
924 break; \
925 \
926 case korean: \
927 assert (var == iso2022jp2); \
928 \
929 /* Try KSC 5601. */ \
930 written = ucs4_to_ksc5601 (ch, buf, 2); \
931 if (written != __UNKNOWN_10646_CHAR) \
932 { \
933 if (set != KSC5601_set) \
934 { \
935 if (__builtin_expect (outptr + 4 > outend, 0)) \
936 { \
937 res = __GCONV_FULL_OUTPUT; \
938 break; \
939 } \
940 *outptr++ = ESC; \
941 *outptr++ = '$'; \
942 *outptr++ = '('; \
943 *outptr++ = 'C'; \
944 set = KSC5601_set; \
945 } \
946 \
947 if (__glibc_unlikely (outptr + 2 > outend)) \
948 { \
949 res = __GCONV_FULL_OUTPUT; \
950 break; \
951 } \
952 *outptr++ = buf[0]; \
953 *outptr++ = buf[1]; \
954 res = __GCONV_OK; \
955 break; \
956 } \
957 \
958 break; \
959 \
960 case other: \
961 assert (var == iso2022jp2); \
962 \
963 /* Try JIS X 0201 Kana. This is not officially part \
964 of ISO-2022-JP-2, according to RFC 1554. Therefore \
965 we try this only after all other attempts. */ \
966 written = ucs4_to_jisx0201 (ch, buf); \
967 if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
968 { \
969 if (set != JISX0201_Kana_set) \
970 { \
971 if (__builtin_expect (outptr + 3 > outend, 0)) \
972 { \
973 res = __GCONV_FULL_OUTPUT; \
974 break; \
975 } \
976 *outptr++ = ESC; \
977 *outptr++ = '('; \
978 *outptr++ = 'I'; \
979 set = JISX0201_Kana_set; \
980 } \
981 \
982 if (__glibc_unlikely (outptr + 1 > outend)) \
983 { \
984 res = __GCONV_FULL_OUTPUT; \
985 break; \
986 } \
987 *outptr++ = buf[0] - 0x80; \
988 res = __GCONV_OK; \
989 break; \
990 } \
991 \
992 break; \
993 \
994 default: \
995 abort (); \
996 } \
997 while (res == __GCONV_ILLEGAL_INPUT \
998 && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
999 \
1000 if (res == __GCONV_FULL_OUTPUT) \
1001 { \
1002 result = res; \
1003 break; \
1004 } \
1005 \
1006 if (res == __GCONV_ILLEGAL_INPUT) \
1007 { \
1008 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1009 } \
1010 } \
1011 } \
1012 \
1013 /* Now that we wrote the output increment the input pointer. */ \
1014 inptr += 4; \
1015 }
1016 #define LOOP_NEED_FLAGS
1017 #define EXTRA_LOOP_DECLS , enum variant var, int *setp
1018 #define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1019 int set2 = *setp & CURRENT_ASSIGN_MASK; \
1020 int tag = *setp & CURRENT_TAG_MASK;
1021 #define REINIT_PARAMS do \
1022 { \
1023 set = *setp & CURRENT_SEL_MASK; \
1024 set2 = *setp & CURRENT_ASSIGN_MASK; \
1025 tag = *setp & CURRENT_TAG_MASK; \
1026 } \
1027 while (0)
1028 #define UPDATE_PARAMS *setp = set | set2 | tag
1029 #include <iconv/loop.c>
1030
1031
1032 /* Now define the toplevel functions. */
1033 #include <iconv/skeleton.c>
1034