1 /* Conversion module for ISO-2022-JP-3.
2    Copyright (C) 1998-2022 Free Software Foundation, Inc.
3    Copyright The GNU Toolchain Authors.
4    This file is part of the GNU C Library.
5 
6    The GNU C Library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Lesser General Public
8    License as published by the Free Software Foundation; either
9    version 2.1 of the License, or (at your option) any later version.
10 
11    The GNU C Library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15 
16    You should have received a copy of the GNU Lesser General Public
17    License along with the GNU C Library; if not, see
18    <https://www.gnu.org/licenses/>.  */
19 
20 #include <assert.h>
21 #include <dlfcn.h>
22 #include <gconv.h>
23 #include <stdint.h>
24 #include <string.h>
25 
26 #include "jis0201.h"
27 #include "jis0208.h"
28 #include "jisx0213.h"
29 
30 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
31 #define ESC 0x1b
32 
33 /* Definitions used in the body of the `gconv' function.  */
34 #define CHARSET_NAME		"ISO-2022-JP-3//"
35 #define FROM_LOOP		from_iso2022jp3_loop
36 #define TO_LOOP			to_iso2022jp3_loop
37 #define DEFINE_INIT		1
38 #define DEFINE_FINI		1
39 #define ONE_DIRECTION		0
40 #define FROM_LOOP_MIN_NEEDED_FROM	1
41 #define FROM_LOOP_MAX_NEEDED_FROM	4
42 #define FROM_LOOP_MIN_NEEDED_TO		4
43 #define FROM_LOOP_MAX_NEEDED_TO		8
44 #define TO_LOOP_MIN_NEEDED_FROM		4
45 #define TO_LOOP_MAX_NEEDED_FROM		4
46 #define TO_LOOP_MIN_NEEDED_TO		1
47 #define TO_LOOP_MAX_NEEDED_TO		6
48 #define PREPARE_LOOP \
49   int saved_state;							      \
50   int *statep = &data->__statep->__count;
51 #define EXTRA_LOOP_ARGS		, statep
52 
53 
54 /* The COUNT element of the state keeps track of the currently selected
55    character set.  The possible values are:  */
56 enum
57 {
58   ASCII_set = 0,		/* Esc ( B */
59   JISX0208_1978_set = 1 << 3,	/* Esc $ @ */
60   JISX0208_1983_set = 2 << 3,	/* Esc $ B */
61   JISX0201_Roman_set = 3 << 3,	/* Esc ( J */
62   JISX0201_Kana_set = 4 << 3,	/* Esc ( I */
63   JISX0213_1_2000_set = 5 << 3,	/* Esc $ ( O */
64   JISX0213_2_set = 6 << 3,	/* Esc $ ( P */
65   JISX0213_1_2004_set = 7 << 3,	/* Esc $ ( Q */
66   CURRENT_SEL_MASK = 7 << 3
67 };
68 
69 /* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
70    state also contains the last two bytes to be output, shifted by 6
71    bits, and a one-bit indicator whether they must be preceded by the
72    shift sequence, in bit 22.  During ISO-2022-JP-3 to UCS-4
73    conversion, COUNT may also contain a non-zero pending wide
74    character, shifted by six bits.  This happens for certain inputs in
75    JISX0213_1_2004_set and JISX0213_2_set if the second wide character
76    in a combining sequence cannot be written because the buffer is
77    full.  */
78 
79 /* Since this is a stateful encoding we have to provide code which resets
80    the output state to the initial state.  This has to be done during the
81    flushing.  */
82 #define EMIT_SHIFT_TO_INIT \
83   if ((data->__statep->__count & ~7) != ASCII_set)			      \
84     {									      \
85       if (FROM_DIRECTION)						      \
86 	{								      \
87 	  uint32_t ch = data->__statep->__count >> 6;			      \
88 									      \
89 	  if (__glibc_unlikely (ch != 0))				      \
90 	    {								      \
91 	      if (__glibc_likely (outbuf + 4 <= outend))		      \
92 		{							      \
93 		  /* Write out the last character.  */			      \
94 		  put32u (outbuf, ch);					      \
95 		  outbuf += 4;						      \
96 		  data->__statep->__count &= 7;				      \
97 		  data->__statep->__count |= ASCII_set;			      \
98 		}							      \
99 	      else							      \
100 		/* We don't have enough room in the output buffer.  */	      \
101 		status = __GCONV_FULL_OUTPUT;				      \
102 	    }								      \
103 	  else								      \
104 	    {								      \
105 	      data->__statep->__count &= 7;				      \
106 	      data->__statep->__count |= ASCII_set;			      \
107 	    }								      \
108 	}								      \
109       else								      \
110 	{								      \
111 	  /* We are not in the initial state.  To switch back we have	      \
112 	     to write out the buffered character and/or emit the sequence     \
113 	     `Esc ( B'.  */						      \
114 	  size_t need =							      \
115 	    (data->__statep->__count >> 6				      \
116 	     ? (data->__statep->__count >> 22 ? 3 : 0) + 2		      \
117 	     : 0)							      \
118 	    + ((data->__statep->__count & CURRENT_SEL_MASK) != ASCII_set      \
119 	       ? 3 : 0);						      \
120 									      \
121 	  if (__glibc_unlikely (outbuf + need > outend))		      \
122 	    /* We don't have enough room in the output buffer.  */	      \
123 	    status = __GCONV_FULL_OUTPUT;				      \
124 	  else								      \
125 	    {								      \
126 	      if (data->__statep->__count >> 6)				      \
127 		{							      \
128 		  uint32_t lasttwo = data->__statep->__count >> 6;	      \
129 									      \
130 		  if (lasttwo >> 16)					      \
131 		    {							      \
132 		      /* Write out the shift sequence before the last	      \
133 			 character.  */					      \
134 		      assert ((data->__statep->__count & CURRENT_SEL_MASK)    \
135 			      == JISX0208_1983_set);			      \
136 		      *outbuf++ = ESC;					      \
137 		      *outbuf++ = '$';					      \
138 		      *outbuf++ = 'B';					      \
139 		    }							      \
140 		  /* Write out the last character.  */			      \
141 		  *outbuf++ = (lasttwo >> 8) & 0xff;			      \
142 		  *outbuf++ = lasttwo & 0xff;				      \
143 		}							      \
144 	      if ((data->__statep->__count & CURRENT_SEL_MASK) != ASCII_set)  \
145 		{							      \
146 		  /* Write out the shift sequence.  */			      \
147 		  *outbuf++ = ESC;					      \
148 		  *outbuf++ = '(';					      \
149 		  *outbuf++ = 'B';					      \
150 		}							      \
151 	      data->__statep->__count &= 7;				      \
152 	      data->__statep->__count |= ASCII_set;			      \
153 	    }								      \
154 	}								      \
155     }
156 
157 
158 /* Since we might have to reset input pointer we must be able to save
159    and retore the state.  */
160 #define SAVE_RESET_STATE(Save) \
161   if (Save)								      \
162     saved_state = *statep;						      \
163   else									      \
164     *statep = saved_state
165 
166 
167 /* First define the conversion function from ISO-2022-JP-3 to UCS-4.  */
168 #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
169 #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
170 #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
171 #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
172 #define LOOPFCT			FROM_LOOP
173 #define BODY \
174   {									      \
175     uint32_t ch;							      \
176 									      \
177     /* Output any pending character.  */				      \
178     ch = set >> 6;							      \
179     if (__glibc_unlikely (ch != 0))					      \
180       {									      \
181 	put32 (outptr, ch);						      \
182 	outptr += 4;							      \
183 	/* Remove the pending character, but preserve state bits.  */	      \
184 	set &= (1 << 6) - 1;						      \
185 	continue;							      \
186       }									      \
187 									      \
188     /* Otherwise read the next input byte.  */				      \
189     ch = *inptr;							      \
190 									      \
191     /* Recognize escape sequences.  */					      \
192     if (__glibc_unlikely (ch == ESC))					      \
193       {									      \
194 	/* We now must be prepared to read two to three more bytes.	      \
195 	   If we have a match in the first byte but then the input buffer     \
196 	   ends we terminate with an error since we must not risk missing     \
197 	   an escape sequence just because it is not entirely in the	      \
198 	   current input buffer.  */					      \
199 	if (__builtin_expect (inptr + 2 >= inend, 0)			      \
200 	    || (inptr[1] == '$' && inptr[2] == '('			      \
201 		&& __builtin_expect (inptr + 3 >= inend, 0)))		      \
202 	  {								      \
203 	    /* Not enough input available.  */				      \
204 	    result = __GCONV_INCOMPLETE_INPUT;				      \
205 	    break;							      \
206 	  }								      \
207 									      \
208 	if (inptr[1] == '(')						      \
209 	  {								      \
210 	    if (inptr[2] == 'B')					      \
211 	      {								      \
212 		/* ASCII selected.  */					      \
213 		set = ASCII_set;					      \
214 		inptr += 3;						      \
215 		continue;						      \
216 	      }								      \
217 	    else if (inptr[2] == 'J')					      \
218 	      {								      \
219 		/* JIS X 0201 selected.  */				      \
220 		set = JISX0201_Roman_set;				      \
221 		inptr += 3;						      \
222 		continue;						      \
223 	      }								      \
224 	    else if (inptr[2] == 'I')					      \
225 	      {								      \
226 		/* JIS X 0201 selected.  */				      \
227 		set = JISX0201_Kana_set;				      \
228 		inptr += 3;						      \
229 		continue;						      \
230 	      }								      \
231 	  }								      \
232 	else if (inptr[1] == '$')					      \
233 	  {								      \
234 	    if (inptr[2] == '@')					      \
235 	      {								      \
236 		/* JIS X 0208-1978 selected.  */			      \
237 		set = JISX0208_1978_set;				      \
238 		inptr += 3;						      \
239 		continue;						      \
240 	      }								      \
241 	    else if (inptr[2] == 'B')					      \
242 	      {								      \
243 		/* JIS X 0208-1983 selected.  */			      \
244 		set = JISX0208_1983_set;				      \
245 		inptr += 3;						      \
246 		continue;						      \
247 	      }								      \
248 	    else if (inptr[2] == '(')					      \
249 	      {								      \
250 		if (inptr[3] == 'O' || inptr[3] == 'Q')			      \
251 		  {							      \
252 		    /* JIS X 0213 plane 1 selected.  */			      \
253 		    /* In this direction we don't need to distinguish the     \
254 		       versions from 2000 and 2004. */			      \
255 		    set = JISX0213_1_2004_set;				      \
256 		    inptr += 4;						      \
257 		    continue;						      \
258 		  }							      \
259 		else if (inptr[3] == 'P')				      \
260 		  {							      \
261 		    /* JIS X 0213 plane 2 selected.  */			      \
262 		    set = JISX0213_2_set;				      \
263 		    inptr += 4;						      \
264 		    continue;						      \
265 		  }							      \
266 	      }								      \
267 	  }								      \
268       }									      \
269 									      \
270     if (ch >= 0x80)							      \
271       {									      \
272 	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
273       }									      \
274     else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f))		      \
275       /* Almost done, just advance the input pointer.  */		      \
276       ++inptr;								      \
277     else if (set == JISX0201_Roman_set)					      \
278       {									      \
279 	/* Use the JIS X 0201 table.  */				      \
280 	ch = jisx0201_to_ucs4 (ch);					      \
281 	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
282 	  {								      \
283 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
284 	  }								      \
285 	++inptr;							      \
286       }									      \
287     else if (set == JISX0201_Kana_set)					      \
288       {									      \
289 	/* Use the JIS X 0201 table.  */				      \
290 	ch = jisx0201_to_ucs4 (ch + 0x80);				      \
291 	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
292 	  {								      \
293 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
294 	  }								      \
295 	++inptr;							      \
296       }									      \
297     else if (set == JISX0208_1978_set || set == JISX0208_1983_set)	      \
298       {									      \
299 	/* XXX I don't have the tables for these two old variants of	      \
300 	   JIS X 0208.  Therefore I'm using the tables for JIS X	      \
301 	   0208-1990.  If somebody has problems with this please	      \
302 	   provide the appropriate tables.  */				      \
303 	ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0);		      \
304 									      \
305 	if (__glibc_unlikely (ch == 0))					      \
306 	  {								      \
307 	    result = __GCONV_INCOMPLETE_INPUT;				      \
308 	    break;							      \
309 	  }								      \
310 	else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
311 	  {								      \
312 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
313 	  }								      \
314       }									      \
315     else /* (set == JISX0213_1_2004_set || set == JISX0213_2_set) */	      \
316       {									      \
317 	if (__glibc_unlikely (inptr + 1 >= inend))			      \
318 	  {								      \
319 	    result = __GCONV_INCOMPLETE_INPUT;				      \
320 	    break;							      \
321 	  }								      \
322 									      \
323 	ch = jisx0213_to_ucs4 (						      \
324 	       ((JISX0213_1_2004_set - set + (1 << 3)) << 5) + ch,	      \
325 	       inptr[1]);						      \
326 	if (ch == 0)							      \
327 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
328 									      \
329 	if (ch < 0x80)							      \
330 	  {								      \
331 	    /* It's a combining character.  */				      \
332 	    uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0];	      \
333 	    uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1];	      \
334 									      \
335 	    inptr += 2;							      \
336 									      \
337 	    put32 (outptr, u1);						      \
338 	    outptr += 4;						      \
339 									      \
340 	    /* See whether we have room for two characters.  */		      \
341 	    if (outptr + 4 <= outend)					      \
342 	      {								      \
343 		put32 (outptr, u2);					      \
344 		outptr += 4;						      \
345 		continue;						      \
346 	      }								      \
347 									      \
348 	    /* Otherwise store only the first character now, and	      \
349 	       put the second one into the queue.  */			      \
350 	    set |= u2 << 6;						      \
351 	    /* Tell the caller why we terminate the loop.  */		      \
352 	    result = __GCONV_FULL_OUTPUT;				      \
353 	    break;							      \
354 	  }								      \
355 									      \
356 	inptr += 2;							      \
357       }									      \
358 									      \
359     put32 (outptr, ch);							      \
360     outptr += 4;							      \
361   }
362 #define LOOP_NEED_FLAGS
363 #define EXTRA_LOOP_DECLS	, int *statep
364 #define INIT_PARAMS		int set = *statep
365 #define UPDATE_PARAMS		*statep = set
366 #include <iconv/loop.c>
367 
368 
369 /* Next, define the other direction, from UCS-4 to ISO-2022-JP-3.  */
370 
371 /* Composition tables for each of the relevant combining characters.  */
372 static const struct
373 {
374   uint16_t base;
375   uint16_t composed;
376 } comp_table_data[] =
377 {
378 #define COMP_TABLE_IDX_02E5 0
379 #define COMP_TABLE_LEN_02E5 1
380   { 0x2b64, 0x2b65 }, /* 0x12B65 = 0x12B64 U+02E5 */
381 #define COMP_TABLE_IDX_02E9 (COMP_TABLE_IDX_02E5 + COMP_TABLE_LEN_02E5)
382 #define COMP_TABLE_LEN_02E9 1
383   { 0x2b60, 0x2b66 }, /* 0x12B66 = 0x12B60 U+02E9 */
384 #define COMP_TABLE_IDX_0300 (COMP_TABLE_IDX_02E9 + COMP_TABLE_LEN_02E9)
385 #define COMP_TABLE_LEN_0300 5
386   { 0x295c, 0x2b44 }, /* 0x12B44 = 0x1295C U+0300 */
387   { 0x2b38, 0x2b48 }, /* 0x12B48 = 0x12B38 U+0300 */
388   { 0x2b37, 0x2b4a }, /* 0x12B4A = 0x12B37 U+0300 */
389   { 0x2b30, 0x2b4c }, /* 0x12B4C = 0x12B30 U+0300 */
390   { 0x2b43, 0x2b4e }, /* 0x12B4E = 0x12B43 U+0300 */
391 #define COMP_TABLE_IDX_0301 (COMP_TABLE_IDX_0300 + COMP_TABLE_LEN_0300)
392 #define COMP_TABLE_LEN_0301 4
393   { 0x2b38, 0x2b49 }, /* 0x12B49 = 0x12B38 U+0301 */
394   { 0x2b37, 0x2b4b }, /* 0x12B4B = 0x12B37 U+0301 */
395   { 0x2b30, 0x2b4d }, /* 0x12B4D = 0x12B30 U+0301 */
396   { 0x2b43, 0x2b4f }, /* 0x12B4F = 0x12B43 U+0301 */
397 #define COMP_TABLE_IDX_309A (COMP_TABLE_IDX_0301 + COMP_TABLE_LEN_0301)
398 #define COMP_TABLE_LEN_309A 14
399   { 0x242b, 0x2477 }, /* 0x12477 = 0x1242B U+309A */
400   { 0x242d, 0x2478 }, /* 0x12478 = 0x1242D U+309A */
401   { 0x242f, 0x2479 }, /* 0x12479 = 0x1242F U+309A */
402   { 0x2431, 0x247a }, /* 0x1247A = 0x12431 U+309A */
403   { 0x2433, 0x247b }, /* 0x1247B = 0x12433 U+309A */
404   { 0x252b, 0x2577 }, /* 0x12577 = 0x1252B U+309A */
405   { 0x252d, 0x2578 }, /* 0x12578 = 0x1252D U+309A */
406   { 0x252f, 0x2579 }, /* 0x12579 = 0x1252F U+309A */
407   { 0x2531, 0x257a }, /* 0x1257A = 0x12531 U+309A */
408   { 0x2533, 0x257b }, /* 0x1257B = 0x12533 U+309A */
409   { 0x253b, 0x257c }, /* 0x1257C = 0x1253B U+309A */
410   { 0x2544, 0x257d }, /* 0x1257D = 0x12544 U+309A */
411   { 0x2548, 0x257e }, /* 0x1257E = 0x12548 U+309A */
412   { 0x2675, 0x2678 }, /* 0x12678 = 0x12675 U+309A */
413 };
414 
415 #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
416 #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
417 #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
418 #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
419 #define LOOPFCT			TO_LOOP
420 #define BODY \
421   {									      \
422     uint32_t ch = get32 (inptr);					      \
423 									      \
424     if (lasttwo != 0)							      \
425       {									      \
426 	/* Attempt to combine the last character with this one.  */	      \
427 	unsigned int idx;						      \
428 	unsigned int len;						      \
429 									      \
430 	if (ch == 0x02e5)						      \
431 	  idx = COMP_TABLE_IDX_02E5, len = COMP_TABLE_LEN_02E5;		      \
432 	else if (ch == 0x02e9)						      \
433 	  idx = COMP_TABLE_IDX_02E9, len = COMP_TABLE_LEN_02E9;		      \
434 	else if (ch == 0x0300)						      \
435 	  idx = COMP_TABLE_IDX_0300, len = COMP_TABLE_LEN_0300;		      \
436 	else if (ch == 0x0301)						      \
437 	  idx = COMP_TABLE_IDX_0301, len = COMP_TABLE_LEN_0301;		      \
438 	else if (ch == 0x309a)						      \
439 	  idx = COMP_TABLE_IDX_309A, len = COMP_TABLE_LEN_309A;		      \
440 	else								      \
441 	  goto not_combining;						      \
442 									      \
443 	do								      \
444 	  if (comp_table_data[idx].base == (uint16_t) lasttwo)		      \
445 	    break;							      \
446 	while (++idx, --len > 0);					      \
447 									      \
448 	if (len > 0)							      \
449 	  {								      \
450 	    /* Output the combined character.  */			      \
451 	    /* We know the combined character is in JISX0213 plane 1,	      \
452 	       but the buffered character may have been in JISX0208 or in     \
453 	       JISX0213 plane 1.  */					      \
454 	    size_t need =						      \
455 	      (lasttwo >> 16						      \
456 	       || (set != JISX0213_1_2000_set && set != JISX0213_1_2004_set)  \
457 	       ? 4 : 0);						      \
458 									      \
459 	    if (__glibc_unlikely (outptr + need + 2 > outend))		      \
460 	      {								      \
461 		result = __GCONV_FULL_OUTPUT;				      \
462 		break;							      \
463 	      }								      \
464 	    if (need)							      \
465 	      {								      \
466 		/* But first, output the escape sequence.  */		      \
467 		*outptr++ = ESC;					      \
468 		*outptr++ = '$';					      \
469 		*outptr++ = '(';					      \
470 		*outptr++ = 'O';					      \
471 		set = JISX0213_1_2000_set;				      \
472 	      }								      \
473 	    lasttwo = comp_table_data[idx].composed;			      \
474 	    *outptr++ = (lasttwo >> 8) & 0xff;				      \
475 	    *outptr++ = lasttwo & 0xff;					      \
476 	    lasttwo = 0;						      \
477 	    inptr += 4;							      \
478 	    continue;							      \
479 	  }								      \
480 									      \
481       not_combining:							      \
482 	/* Output the buffered character.  */				      \
483 	/* We know it is in JISX0208 or in JISX0213 plane 1.  */	      \
484 	{								      \
485 	  size_t need = (lasttwo >> 16 ? 3 : 0);			      \
486 									      \
487 	  if (__glibc_unlikely (outptr + need + 2 > outend))		      \
488 	    {								      \
489 	      result = __GCONV_FULL_OUTPUT;				      \
490 	      break;							      \
491 	    }								      \
492 	  if (need)							      \
493 	    {								      \
494 	      /* But first, output the escape sequence.  */		      \
495 	      assert (set == JISX0208_1983_set);			      \
496 	      *outptr++ = ESC;						      \
497 	      *outptr++ = '$';						      \
498 	      *outptr++ = 'B';						      \
499 	    }								      \
500 	  *outptr++ = (lasttwo >> 8) & 0xff;				      \
501 	  *outptr++ = lasttwo & 0xff;					      \
502 	  lasttwo = 0;							      \
503 	  continue;							      \
504 	}								      \
505       }									      \
506 									      \
507     /* First see whether we can write the character using the currently	      \
508        selected character set.  */					      \
509     if (set == ASCII_set)						      \
510       {									      \
511 	/* Please note that the NUL byte is *not* matched if we are not	      \
512 	   currently using the ASCII charset.  This is because we must	      \
513 	   switch to the initial state whenever a NUL byte is written.  */    \
514 	if (ch <= 0x7f)							      \
515 	  {								      \
516 	    *outptr++ = ch;						      \
517 	    inptr += 4;							      \
518 	    continue;							      \
519 	  }								      \
520       }									      \
521     /* ISO-2022-JP recommends to encode the newline character always in	      \
522        ASCII since this allows a context-free interpretation of the	      \
523        characters at the beginning of the next line.  Otherwise it would      \
524        have to be known whether the last line ended using ASCII or	      \
525        JIS X 0201.  */							      \
526     else if (set == JISX0201_Roman_set)					      \
527       {									      \
528 	unsigned char buf[1];						      \
529 	if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR		      \
530 	    && buf[0] > 0x20 && buf[0] < 0x80)				      \
531 	  {								      \
532 	    *outptr++ = buf[0];						      \
533 	    inptr += 4;							      \
534 	    continue;							      \
535 	  }								      \
536       }									      \
537     else if (set == JISX0201_Kana_set)					      \
538       {									      \
539 	unsigned char buf[1];						      \
540 	if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR		      \
541 	    && buf[0] >= 0x80)						      \
542 	  {								      \
543 	    *outptr++ = buf[0] - 0x80;					      \
544 	    inptr += 4;							      \
545 	    continue;							      \
546 	  }								      \
547       }									      \
548     else if (/*set == JISX0208_1978_set || */ set == JISX0208_1983_set)	      \
549       {									      \
550 	size_t written = ucs4_to_jisx0208 (ch, outptr, outend - outptr);      \
551 									      \
552 	if (written != __UNKNOWN_10646_CHAR)				      \
553 	  {								      \
554 	    uint32_t jch = ucs4_to_jisx0213 (ch);			      \
555 									      \
556 	    if (jch & 0x0080)						      \
557 	      {								      \
558 		/* A possible match in comp_table_data.  Buffer it.  */	      \
559 		lasttwo = jch & 0x7f7f;					      \
560 		inptr += 4;						      \
561 		continue;						      \
562 	      }								      \
563 	    if (__glibc_unlikely (written == 0))			      \
564 	      {								      \
565 		result = __GCONV_FULL_OUTPUT;				      \
566 		break;							      \
567 	      }								      \
568 	    else							      \
569 	      {								      \
570 	 	outptr += written;					      \
571 		inptr += 4;						      \
572 		continue;						      \
573 	     }								      \
574 	  }								      \
575       }									      \
576     else								      \
577       {									      \
578 	/* (set == JISX0213_1_2000_set || set == JISX0213_1_2004_set	      \
579 	    || set == JISX0213_2_set) */				      \
580 	uint32_t jch = ucs4_to_jisx0213 (ch);				      \
581 									      \
582 	if (jch != 0							      \
583 	    && (jch & 0x8000						      \
584 		? set == JISX0213_2_set					      \
585 		: (set == JISX0213_1_2004_set				      \
586 		   || (set == JISX0213_1_2000_set			      \
587 		       && !jisx0213_added_in_2004_p (jch)))))		      \
588 	  {								      \
589 	    if (jch & 0x0080)						      \
590 	      {								      \
591 		/* A possible match in comp_table_data.  Buffer it.  */	      \
592 									      \
593 		/* We know it's a JISX 0213 plane 1 character.  */	      \
594 		assert ((jch & 0x8000) == 0);				      \
595 									      \
596 		lasttwo = jch & 0x7f7f;					      \
597 		inptr += 4;						      \
598 		continue;						      \
599 	      }								      \
600 									      \
601 	    if (__glibc_unlikely (outptr + 1 >= outend))		      \
602 	      {								      \
603 		result = __GCONV_FULL_OUTPUT;				      \
604 		break;							      \
605 	      }								      \
606 	    *outptr++ = (jch >> 8) & 0x7f;				      \
607 	    *outptr++ = jch & 0x7f;					      \
608 	    inptr += 4;							      \
609 	    continue;							      \
610 	  }								      \
611       }									      \
612 									      \
613     /* The attempts to use the currently selected character set failed,	      \
614        either because the character requires a different character set,	      \
615        or because the character is unknown.  */				      \
616 									      \
617     if (ch <= 0x7f)							      \
618       {									      \
619 	/* We must encode using ASCII.  First write out the escape	      \
620 	   sequence.  */						      \
621 	if (__glibc_unlikely (outptr + 3 > outend))			      \
622 	  {								      \
623 	    result = __GCONV_FULL_OUTPUT;				      \
624 	    break;							      \
625 	  }								      \
626 									      \
627 	*outptr++ = ESC;						      \
628 	*outptr++ = '(';						      \
629 	*outptr++ = 'B';						      \
630 	set = ASCII_set;						      \
631 									      \
632 	if (__glibc_unlikely (outptr >= outend))			      \
633 	  {								      \
634 	    result = __GCONV_FULL_OUTPUT;				      \
635 	    break;							      \
636 	  }								      \
637 	*outptr++ = ch;							      \
638       }									      \
639     else								      \
640       {									      \
641 	unsigned char buf[2];						      \
642 									      \
643 	/* Try JIS X 0201 Roman.  */					      \
644 	if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR		      \
645 	    && buf[0] > 0x20 && buf[0] < 0x80)				      \
646 	  {								      \
647 	    if (set != JISX0201_Roman_set)				      \
648 	      {								      \
649 		if (__glibc_unlikely (outptr + 3 > outend))		      \
650 		  {							      \
651 		    result = __GCONV_FULL_OUTPUT;			      \
652 		    break;						      \
653 		  }							      \
654 		*outptr++ = ESC;					      \
655 		*outptr++ = '(';					      \
656 		*outptr++ = 'J';					      \
657 		set = JISX0201_Roman_set;				      \
658 	      }								      \
659 									      \
660 	    if (__glibc_unlikely (outptr >= outend))			      \
661 	      {								      \
662 		result = __GCONV_FULL_OUTPUT;				      \
663 		break;							      \
664 	      }								      \
665 	    *outptr++ = buf[0];						      \
666 	  }								      \
667 	else								      \
668 	  {								      \
669 	    uint32_t jch = ucs4_to_jisx0213 (ch);			      \
670 									      \
671 	    /* Try JIS X 0208.  */					      \
672 	    size_t written = ucs4_to_jisx0208 (ch, buf, 2);		      \
673 	    if (written != __UNKNOWN_10646_CHAR)			      \
674 	      {								      \
675 		if (jch & 0x0080)					      \
676 		  {							      \
677 		    /* A possible match in comp_table_data.  Buffer it.  */   \
678 		    lasttwo = ((set != JISX0208_1983_set ? 1 : 0) << 16)      \
679 			      | (jch & 0x7f7f);				      \
680 		    set = JISX0208_1983_set;				      \
681 		    inptr += 4;						      \
682 		    continue;						      \
683 		  }							      \
684 									      \
685 		if (set != JISX0208_1983_set)				      \
686 		  {							      \
687 		    if (__glibc_unlikely (outptr + 3 > outend))		      \
688 		      {							      \
689 			result = __GCONV_FULL_OUTPUT;			      \
690 			break;						      \
691 		      }							      \
692 		    *outptr++ = ESC;					      \
693 		    *outptr++ = '$';					      \
694 		    *outptr++ = 'B';					      \
695 		    set = JISX0208_1983_set;				      \
696 		  }							      \
697 									      \
698 		if (__glibc_unlikely (outptr + 2 > outend))		      \
699 		  {							      \
700 		    result = __GCONV_FULL_OUTPUT;			      \
701 		    break;						      \
702 		  }							      \
703 		*outptr++ = buf[0];					      \
704 		*outptr++ = buf[1];					      \
705 	      }								      \
706 	    else							      \
707 	      {								      \
708 		/* Try JIS X 0213.  */					      \
709 		if (jch != 0)						      \
710 		  {							      \
711 		    int new_set =					      \
712 		      (jch & 0x8000					      \
713 		       ? JISX0213_2_set					      \
714 		       : jisx0213_added_in_2004_p (jch)			      \
715 			 ? JISX0213_1_2004_set				      \
716 			 : JISX0213_1_2000_set);			      \
717 									      \
718 		    if (set != new_set)					      \
719 		      {							      \
720 			if (__glibc_unlikely (outptr + 4 > outend))	      \
721 			  {						      \
722 			    result = __GCONV_FULL_OUTPUT;		      \
723 			    break;					      \
724 			  }						      \
725 			*outptr++ = ESC;				      \
726 			*outptr++ = '$';				      \
727 			*outptr++ = '(';				      \
728 			*outptr++ =					      \
729 			  ((new_set - JISX0213_1_2000_set) >> 3) + 'O';	      \
730 			set = new_set;					      \
731 		      }							      \
732 									      \
733 		    if (jch & 0x0080)					      \
734 		      {							      \
735 			/* A possible match in comp_table_data.		      \
736 			   Buffer it.  */				      \
737 									      \
738 			/* We know it's a JIS X 0213 plane 1 character.  */   \
739 			assert ((jch & 0x8000) == 0);			      \
740 									      \
741 			lasttwo = jch & 0x7f7f;				      \
742 			inptr += 4;					      \
743 			continue;					      \
744 		      }							      \
745 									      \
746 		    if (__glibc_unlikely (outptr + 1 >= outend))	      \
747 		      {							      \
748 			result = __GCONV_FULL_OUTPUT;			      \
749 			break;						      \
750 		      }							      \
751 		    *outptr++ = (jch >> 8) & 0x7f;			      \
752 		    *outptr++ = jch & 0x7f;				      \
753 		  }							      \
754 		else							      \
755 		  {							      \
756 		    /* Try JIS X 0201 Katakana.  This is officially not part  \
757 		       of ISO-2022-JP-3.  Therefore we try it after all other \
758 		       attempts.  */					      \
759 		    if (ucs4_to_jisx0201 (ch, buf) != __UNKNOWN_10646_CHAR    \
760 			&& buf[0] >= 0x80)				      \
761 		      {							      \
762 			if (set != JISX0201_Kana_set)			      \
763 			  {						      \
764 			    if (__builtin_expect (outptr + 3 > outend, 0))    \
765 			      {						      \
766 				result = __GCONV_FULL_OUTPUT;		      \
767 				break;					      \
768 			      }						      \
769 			    *outptr++ = ESC;				      \
770 			    *outptr++ = '(';				      \
771 			    *outptr++ = 'I';				      \
772 			    set = JISX0201_Kana_set;			      \
773 			  }						      \
774 									      \
775 			if (__glibc_unlikely (outptr >= outend))	      \
776 			  {						      \
777 			    result = __GCONV_FULL_OUTPUT;		      \
778 			    break;					      \
779 			  }						      \
780 			*outptr++ = buf[0] - 0x80;			      \
781 		      }							      \
782 		    else						      \
783 		      {							      \
784 			UNICODE_TAG_HANDLER (ch, 4);			      \
785 									      \
786 			/* Illegal character.  */			      \
787 			STANDARD_TO_LOOP_ERR_HANDLER (4);		      \
788 		      }							      \
789 		  }							      \
790 	      }								      \
791 	  }								      \
792       }									      \
793 									      \
794     /* Now that we wrote the output increment the input pointer.  */	      \
795     inptr += 4;								      \
796   }
797 #define LOOP_NEED_FLAGS
798 #define EXTRA_LOOP_DECLS	, int *statep
799 #define INIT_PARAMS		int set = *statep & CURRENT_SEL_MASK;	      \
800 				uint32_t lasttwo = *statep >> 6
801 #define REINIT_PARAMS		do					      \
802 				  {					      \
803 				    set = *statep & CURRENT_SEL_MASK;	      \
804 				    lasttwo = *statep >> 6;		      \
805 				  }					      \
806 				while (0)
807 #define UPDATE_PARAMS		*statep = set | (lasttwo << 6)
808 #include <iconv/loop.c>
809 
810 
811 /* Now define the toplevel functions.  */
812 #include <iconv/skeleton.c>
813