1 /* Conversion module for ISO-2022-CN-EXT.
2    Copyright (C) 2000-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dlfcn.h>
20 #include <gconv.h>
21 #include <stdint.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "gb2312.h"
25 #include "iso-ir-165.h"
26 #include "cns11643.h"
27 #include "cns11643l1.h"
28 #include "cns11643l2.h"
29 #include <libc-diag.h>
30 
31 #include <assert.h>
32 
33 /* This makes obvious what everybody knows: 0x1b is the Esc character.  */
34 #define ESC	0x1b
35 
36 /* We have single-byte shift-in and shift-out sequences, and the single
37    shift sequences SS2 and SS3 which replaces the SS2/SS3 designation for
38    the next two bytes.  */
39 #define SI	0x0f
40 #define SO	0x0e
41 #define SS2_0	ESC
42 #define SS2_1	0x4e
43 #define SS3_0	ESC
44 #define SS3_1	0x4f
45 
46 /* Definitions used in the body of the `gconv' function.  */
47 #define CHARSET_NAME		"ISO-2022-CN-EXT//"
48 #define DEFINE_INIT		1
49 #define DEFINE_FINI		1
50 #define ONE_DIRECTION		0
51 #define FROM_LOOP		from_iso2022cn_ext_loop
52 #define TO_LOOP			to_iso2022cn_ext_loop
53 #define FROM_LOOP_MIN_NEEDED_FROM	1
54 #define FROM_LOOP_MAX_NEEDED_FROM	4
55 #define FROM_LOOP_MIN_NEEDED_TO		4
56 #define FROM_LOOP_MAX_NEEDED_TO		4
57 #define TO_LOOP_MIN_NEEDED_FROM		4
58 #define TO_LOOP_MAX_NEEDED_FROM		4
59 #define TO_LOOP_MIN_NEEDED_TO		1
60 #define TO_LOOP_MAX_NEEDED_TO		6
61 #define PREPARE_LOOP \
62   int save_set;								      \
63   int *setp = &data->__statep->__count;
64 #define EXTRA_LOOP_ARGS		, setp
65 
66 
67 /* The charsets GB/T 12345-90, GB 7589-87, GB/T 13131-9X, GB 7590-87,
68    and GB/T 13132-9X are not registered to the best of my knowledge and
69    therefore have no escape sequence assigned.  We cannot handle them
70    for this reason.  Tell the implementation about this.  */
71 #define X12345	'\0'
72 #define X7589	'\0'
73 #define X13131	'\0'
74 #define X7590	'\0'
75 #define X13132	'\0'
76 
77 
78 /* The COUNT element of the state keeps track of the currently selected
79    character set.  The possible values are:  */
80 enum
81 {
82   ASCII_set = 0,
83   GB2312_set,
84   GB12345_set,
85   CNS11643_1_set,
86   ISO_IR_165_set,
87   SO_mask = 7,
88 
89   GB7589_set = 1 << 3,
90   GB13131_set = 2 << 3,
91   CNS11643_2_set = 3 << 3,
92   SS2_mask = 3 << 3,
93 
94   GB7590_set = 1 << 5,
95   GB13132_set = 2 << 5,
96   CNS11643_3_set = 3 << 5,
97   CNS11643_4_set = 4 << 5,
98   CNS11643_5_set = 5 << 5,
99   CNS11643_6_set = 6 << 5,
100   CNS11643_7_set = 7 << 5,
101   SS3_mask = 7 << 5,
102 
103 #define CURRENT_MASK (SO_mask | SS2_mask | SS3_mask)
104 
105   GB2312_ann = 1 << 8,
106   GB12345_ann = 2 << 8,
107   CNS11643_1_ann = 3 << 8,
108   ISO_IR_165_ann = 4 << 8,
109   SO_ann = 7 << 8,
110 
111   GB7589_ann = 1 << 11,
112   GB13131_ann = 2 << 11,
113   CNS11643_2_ann = 3 << 11,
114   SS2_ann = 3 << 11,
115 
116   GB7590_ann = 1 << 13,
117   GB13132_ann = 2 << 13,
118   CNS11643_3_ann = 3 << 13,
119   CNS11643_4_ann = 4 << 13,
120   CNS11643_5_ann = 5 << 13,
121   CNS11643_6_ann = 6 << 13,
122   CNS11643_7_ann = 7 << 13,
123   SS3_ann = 7 << 13
124 };
125 
126 
127 /* Since this is a stateful encoding we have to provide code which resets
128    the output state to the initial state.  This has to be done during the
129    flushing.  */
130 #define EMIT_SHIFT_TO_INIT \
131   if (data->__statep->__count >> 3 != ASCII_set)			      \
132     {									      \
133       if (FROM_DIRECTION)						      \
134 	/* It's easy, we don't have to emit anything, we just reset the	      \
135 	   state for the input.  */					      \
136 	data->__statep->__count = ASCII_set << 3;			      \
137       else								      \
138 	{								      \
139 	  /* We are not in the initial state.  To switch back we have	      \
140 	     to emit `SI'.  */						      \
141 	  if (__glibc_unlikely (outbuf == outend))			      \
142 	    /* We don't have enough room in the output buffer.  */	      \
143 	    status = __GCONV_FULL_OUTPUT;				      \
144 	  else								      \
145 	    {								      \
146 	      /* Write out the shift sequence.  */			      \
147 	      *outbuf++ = SI;						      \
148 	      if (data->__flags & __GCONV_IS_LAST)			      \
149 		*irreversible += 1;					      \
150 	      data->__statep->__count = ASCII_set << 3;			      \
151 	    }								      \
152 	}								      \
153     }
154 
155 
156 /* Since we might have to reset input pointer we must be able to save
157    and retore the state.  */
158 #define SAVE_RESET_STATE(Save) \
159   if (Save)								      \
160     save_set = *setp;							      \
161   else									      \
162     *setp = save_set
163 
164 
165 /* First define the conversion function from ISO-2022-CN to UCS4.  */
166 #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
167 #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
168 #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
169 #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
170 #define LOOPFCT			FROM_LOOP
171 #define BODY \
172   {									      \
173     uint32_t ch = *inptr;						      \
174 									      \
175     /* This is a 7bit character set, disallow all 8bit characters.  */	      \
176     if (ch > 0x7f)							      \
177       STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
178 									      \
179     /* Recognize escape sequences.  */					      \
180     if (ch == ESC)							      \
181       {									      \
182 	/* There are three kinds of escape sequences we have to handle:	      \
183 	   - those announcing the use of GB and CNS characters on the	      \
184 	     line; we can simply ignore them				      \
185 	   - the initial byte of the SS2 sequence.			      \
186 	   - the initial byte of the SS3 sequence.			      \
187 	*/								      \
188 	if (inptr + 2 > inend						      \
189 	    || (inptr[1] == '$'						      \
190 		&& (inptr + 3 > inend					      \
191 		    || (inptr[2] == ')' && inptr + 4 > inend)		      \
192 		    || (inptr[2] == '*' && inptr + 4 > inend)		      \
193 		    || (inptr[2] == '+' && inptr + 4 > inend)))		      \
194 	    || (inptr[1] == SS2_1 && inptr + 4 > inend)			      \
195 	    || (inptr[1] == SS3_1 && inptr + 4 > inend))		      \
196 	  {								      \
197 	    result = __GCONV_INCOMPLETE_INPUT;				      \
198 	    break;							      \
199 	  }								      \
200 	if (inptr[1] == '$'						      \
201 	    && ((inptr[2] == ')'					      \
202 		 && (inptr[3] == 'A'					      \
203 		     || (X12345 != '\0' && inptr[3] == X12345)		      \
204 		     || inptr[3] == 'E' || inptr[3] == 'G'))		      \
205 		|| (inptr[2] == '*'					      \
206 		    && ((X7589 != '\0' && inptr[3] == X7589)		      \
207 			|| (X13131 != '\0' && inptr[3] == X13131)	      \
208 			|| inptr[3] == 'H'))				      \
209 		|| (inptr[2] == '+'					      \
210 		    && ((X7590 != '\0' && inptr[3] == X7590)		      \
211 			|| (X13132 != '\0' && inptr[3] == X13132)	      \
212 			|| inptr[3] == 'I' || inptr[3] == 'J'		      \
213 			|| inptr[3] == 'K' || inptr[3] == 'L'		      \
214 			|| inptr[3] == 'M'))))				      \
215 	  {								      \
216 	    /* OK, we accept those character sets.  */			      \
217 	    if (inptr[3] == 'A')					      \
218 	      ann = (ann & ~SO_ann) | GB2312_ann;			      \
219 	    else if (inptr[3] == 'G')					      \
220 	      ann = (ann & ~SO_ann) | CNS11643_1_ann;			      \
221 	    else if (inptr[3] == 'E')					      \
222 	      ann = (ann & ~SO_ann) | ISO_IR_165_ann;			      \
223 	    else if (X12345 != '\0' && inptr[3] == X12345)		      \
224 	      ann = (ann & ~SO_ann) | GB12345_ann;			      \
225 	    else if (inptr[3] == 'H')					      \
226 	      ann = (ann & ~SS2_ann) | CNS11643_2_ann;			      \
227 	    else if (X7589 != '\0' && inptr[3] == X7589)		      \
228 	      ann = (ann & ~SS2_ann) | GB7589_ann;			      \
229 	    else if (X13131 != '\0' && inptr[3] == X13131)		      \
230 	      ann = (ann & ~SS2_ann) | GB13131_ann;			      \
231 	    else if (inptr[3] == 'I')					      \
232 	      ann = (ann & ~SS3_ann) | CNS11643_3_ann;			      \
233 	    else if (inptr[3] == 'J')					      \
234 	      ann = (ann & ~SS3_ann) | CNS11643_4_ann;			      \
235 	    else if (inptr[3] == 'K')					      \
236 	      ann = (ann & ~SS3_ann) | CNS11643_5_ann;			      \
237 	    else if (inptr[3] == 'L')					      \
238 	      ann = (ann & ~SS3_ann) | CNS11643_6_ann;			      \
239 	    else if (inptr[3] == 'M')					      \
240 	      ann = (ann & ~SS3_ann) | CNS11643_7_ann;			      \
241 	    else if (X7590 != '\0' && inptr[3] == X7590)		      \
242 	      ann = (ann & ~SS3_ann) | GB7590_ann;			      \
243 	    else if (X13132 != '\0' && inptr[3] == X13132)		      \
244 	      ann = (ann & ~SS3_ann) | GB13132_ann;			      \
245 	    inptr += 4;							      \
246 	    continue;							      \
247 	  }								      \
248       }									      \
249     else if (ch == SO)							      \
250       {									      \
251 	/* Switch to use GB2312, GB12345, CNS 11643 plane 1, or ISO-IR-165,   \
252 	   depending on which S0 designation came last.  The only problem     \
253 	   is what to do with faulty input files where no designator came.    \
254 	   XXX For now I'll default to use GB2312.  If this is not the	      \
255 	   best behavior (e.g., we should flag an error) let me know.  */     \
256 	++inptr;							      \
257 	if ((ann & SO_ann) != 0)					      \
258 	  switch (ann & SO_ann)						      \
259 	    {								      \
260 	    case GB2312_ann:						      \
261 	      set = GB2312_set;						      \
262 	      break;							      \
263 	    case GB12345_ann:						      \
264 	      set = GB12345_set;					      \
265 	      break;							      \
266 	    case CNS11643_1_ann:					      \
267 	      set = CNS11643_1_set;					      \
268 	      break;							      \
269 	    case ISO_IR_165_ann:					      \
270 	      set = ISO_IR_165_set;					      \
271 	      break;							      \
272 	    default:							      \
273 	      abort ();							      \
274 	    }								      \
275 	else								      \
276 	  {								      \
277 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
278 	  }								      \
279 	continue;							      \
280       }									      \
281     else if (ch == SI)							      \
282       {									      \
283 	/* Switch to use ASCII.  */					      \
284 	++inptr;							      \
285 	set = ASCII_set;						      \
286 	continue;							      \
287       }									      \
288 									      \
289     if (ch == ESC && inptr[1] == SS2_1)					      \
290       {									      \
291 	/* This is a character from CNS 11643 plane 2.			      \
292 	   XXX We could test here whether the use of this character	      \
293 	   set was announced.						      \
294 	   XXX Currently GB7589 and GB13131 are not supported.  */	      \
295 	inptr += 2;							      \
296 	ch = cns11643l2_to_ucs4 (&inptr, 2, 0);				      \
297 	if (ch == __UNKNOWN_10646_CHAR)					      \
298 	  STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
299       }									      \
300     /* Note that we can assume here that at least 4 bytes are available if    \
301        the first byte is ESC since otherwise the first if would have been     \
302        true.  */							      \
303     else if (ch == ESC && inptr[1] == SS3_1)				      \
304       {									      \
305 	/* This is a character from CNS 11643 plane 3 or higher.	      \
306 	   XXX Currently GB7590 and GB13132 are not supported.  */	      \
307 	unsigned char buf[3];						      \
308 	const unsigned char *tmp = buf;					      \
309 									      \
310 	buf[1] = inptr[2];						      \
311 	buf[2] = inptr[3];						      \
312 	switch (ann & SS3_ann)						      \
313 	  {								      \
314 	  case CNS11643_3_ann:						      \
315 	    buf[0] = 0x23;						      \
316 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
317 	    break;							      \
318 	  case CNS11643_4_ann:						      \
319 	    buf[0] = 0x24;						      \
320 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
321 	    break;							      \
322 	  case CNS11643_5_ann:						      \
323 	    buf[0] = 0x25;						      \
324 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
325 	    break;							      \
326 	  case CNS11643_6_ann:						      \
327 	    buf[0] = 0x26;						      \
328 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
329 	    break;							      \
330 	  case CNS11643_7_ann:						      \
331 	    buf[0] = 0x27;						      \
332 	    ch = cns11643_to_ucs4 (&tmp, 3, 0);				      \
333 	    break;							      \
334 	  default:							      \
335 	    /* XXX Currently GB7590 and GB13132 are not supported.  */	      \
336 	    ch = __UNKNOWN_10646_CHAR;					      \
337 	    break;							      \
338 	  }								      \
339 	if (ch == __UNKNOWN_10646_CHAR)					      \
340 	  {								      \
341 	    STANDARD_FROM_LOOP_ERR_HANDLER (4);				      \
342 	  }								      \
343 	assert (tmp == buf + 3);					      \
344 	inptr += 4;							      \
345       }									      \
346     else if (set == ASCII_set)						      \
347       {									      \
348 	/* Almost done, just advance the input pointer.  */		      \
349 	++inptr;							      \
350       }									      \
351     else								      \
352       {									      \
353 	/* That's pretty easy, we have a dedicated functions for this.  */    \
354 	if (inend - inptr < 2)						      \
355 	  {								      \
356 	    result = __GCONV_INCOMPLETE_INPUT;				      \
357 	    break;							      \
358 	  }								      \
359 	if (set == GB2312_set)						      \
360 	  ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0);		      \
361 	else if (set == ISO_IR_165_set)					      \
362 	  ch = isoir165_to_ucs4 (&inptr, inend - inptr);		      \
363 	else								      \
364 	  {								      \
365 	    assert (set == CNS11643_1_set);				      \
366 	    ch = cns11643l1_to_ucs4 (&inptr, inend - inptr, 0);		      \
367 	  }								      \
368 									      \
369 	if (ch == 0)							      \
370 	  {								      \
371 	    result = __GCONV_INCOMPLETE_INPUT;				      \
372 	    break;							      \
373 	  }								      \
374 	else if (ch == __UNKNOWN_10646_CHAR)				      \
375 	  {								      \
376 	    STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
377 	  }								      \
378       }									      \
379 									      \
380     *((uint32_t *) outptr) = ch;					      \
381     outptr += sizeof (uint32_t);					      \
382   }
383 #define EXTRA_LOOP_DECLS	, int *setp
384 #define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \
385 				int ann = (*setp >> 3) & ~CURRENT_MASK
386 #define UPDATE_PARAMS		*setp = (set | ann) << 3
387 #define LOOP_NEED_FLAGS
388 #include <iconv/loop.c>
389 
390 
391 /* Next, define the other direction.  */
392 #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
393 #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
394 #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
395 #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
396 #define LOOPFCT			TO_LOOP
397 /* With GCC 5.3 when compiling with -Os the compiler emits a warning
398    that buf[0] and buf[1] may be used uninitialized.  This can only
399    happen in the case where tmpbuf[3] is used, and in that case the
400    write to the tmpbuf[1] and tmpbuf[2] was assured because
401    ucs4_to_cns11643 would have filled in those entries.  The difficulty
402    is in getting the compiler to see this logic because tmpbuf[0] is
403    involved in determining the code page and is the indicator that
404    tmpbuf[2] is initialized.  */
405 DIAG_PUSH_NEEDS_COMMENT;
406 DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized");
407 #define BODY \
408   {									      \
409     uint32_t ch;							      \
410     size_t written = 0;							      \
411 									      \
412     ch = *((const uint32_t *) inptr);					      \
413 									      \
414     /* First see whether we can write the character using the currently	      \
415        selected character set.  */					      \
416     if (ch < 0x80)							      \
417       {									      \
418 	if (set != ASCII_set)						      \
419 	  {								      \
420 	    *outptr++ = SI;						      \
421 	    set = ASCII_set;						      \
422 	    if (outptr == outend)					      \
423 	      {								      \
424 		result = __GCONV_FULL_OUTPUT;				      \
425 		break;							      \
426 	      }								      \
427 	  }								      \
428 									      \
429 	*outptr++ = ch;							      \
430 	written = 1;							      \
431 									      \
432 	/* At the end of the line we have to clear the `ann' flags since      \
433 	   every line must contain this information again.  */		      \
434 	if (ch == L'\n')						      \
435 	  ann = 0;							      \
436       }									      \
437     else								      \
438       {									      \
439 	unsigned char buf[2] = { 0, 0 };				      \
440 	int used;							      \
441 									      \
442 	if (set == GB2312_set || ((ann & SO_ann) != CNS11643_1_ann	      \
443 				  && (ann & SO_ann) != ISO_IR_165_ann))	      \
444 	  {								      \
445 	    written = ucs4_to_gb2312 (ch, buf, 2);			      \
446 	    used = GB2312_set;						      \
447 	  }								      \
448 	else if (set == ISO_IR_165_set || (ann & SO_ann) == ISO_IR_165_set)   \
449 	  {								      \
450 	    written = ucs4_to_isoir165 (ch, buf, 2);			      \
451 	    used = ISO_IR_165_set;					      \
452 	  }								      \
453 	else								      \
454 	  {								      \
455 	    written = ucs4_to_cns11643l1 (ch, buf, 2);			      \
456 	    used = CNS11643_1_set;					      \
457 	  }								      \
458 									      \
459 	if (written == __UNKNOWN_10646_CHAR)				      \
460 	  {								      \
461 	    /* Cannot convert it using the currently selected SO set.	      \
462 	       Next try the SS2 set.  */				      \
463 	    written = ucs4_to_cns11643l2 (ch, buf, 2);			      \
464 	    if (written != __UNKNOWN_10646_CHAR)			      \
465 	      /* Yep, that worked.  */					      \
466 	      used = CNS11643_2_set;					      \
467 	    else							      \
468 	      {								      \
469 		unsigned char tmpbuf[3];				      \
470 									      \
471 		switch (0)						      \
472 		  {							      \
473 		  default:						      \
474 		    /* Well, see whether we have to change the SO set.  */    \
475 									      \
476 		    if (used != GB2312_set)				      \
477 		      {							      \
478 			written = ucs4_to_gb2312 (ch, buf, 2);		      \
479 			if (written != __UNKNOWN_10646_CHAR)		      \
480 			  {						      \
481 			    used = GB2312_set;				      \
482 			    break;					      \
483 			  }						      \
484 		      }							      \
485 									      \
486 		    if (used != ISO_IR_165_set)				      \
487 		      {							      \
488 			written = ucs4_to_isoir165 (ch, buf, 2);	      \
489 			if (written != __UNKNOWN_10646_CHAR)		      \
490 			  {						      \
491 			    used = ISO_IR_165_set;			      \
492 			    break;					      \
493 			  }						      \
494 		      }							      \
495 									      \
496 		    if (used != CNS11643_1_set)				      \
497 		      {							      \
498 			written = ucs4_to_cns11643l1 (ch, buf, 2);	      \
499 			if (written != __UNKNOWN_10646_CHAR)		      \
500 			  {						      \
501 			    used = CNS11643_1_set;			      \
502 			    break;					      \
503 			  }						      \
504 		      }							      \
505 									      \
506 		    written = ucs4_to_cns11643 (ch, tmpbuf, 3);		      \
507 		    if (written == 3 && tmpbuf[0] >= 3 && tmpbuf[0] <= 7)     \
508 		      {							      \
509 			buf[0] = tmpbuf[1];				      \
510 			buf[1] = tmpbuf[2];				      \
511 			switch (tmpbuf[0])				      \
512 			  {						      \
513 			  case 3:					      \
514 			    used = CNS11643_3_set;			      \
515 			    break;					      \
516 			  case 4:					      \
517 			    used = CNS11643_4_set;			      \
518 			    break;					      \
519 			  case 5:					      \
520 			    used = CNS11643_5_set;			      \
521 			    break;					      \
522 			  case 6:					      \
523 			    used = CNS11643_6_set;			      \
524 			    break;					      \
525 			  case 7:					      \
526 			    used = CNS11643_7_set;			      \
527 			    break;					      \
528 			  default:					      \
529 			    abort ();					      \
530 			  }						      \
531 			written = 2;					      \
532 			break;						      \
533 		      }							      \
534 									      \
535 		    /* XXX Currently GB7590 and GB13132 are not supported.  */\
536 									      \
537 		    /* Even this does not work.  Error.  */		      \
538 		    used = ASCII_set;					      \
539 		  }							      \
540 		if (used == ASCII_set)					      \
541 		  {							      \
542 		    UNICODE_TAG_HANDLER (ch, 4);			      \
543 		    STANDARD_TO_LOOP_ERR_HANDLER (4);			      \
544 		  }							      \
545 	      }								      \
546 	  }								      \
547 	assert (written == 2);						      \
548 									      \
549 	/* See whether we have to emit an escape sequence.  */		      \
550 	if (set != used)						      \
551 	  {								      \
552 	    /* First see whether we announced that we use this		      \
553 	       character set.  */					      \
554 	    if ((used & SO_mask) != 0 && (ann & SO_ann) != (used << 8))	      \
555 	      {								      \
556 		const char *escseq;					      \
557 									      \
558 		if (outptr + 4 > outend)				      \
559 		  {							      \
560 		    result = __GCONV_FULL_OUTPUT;			      \
561 		    break;						      \
562 		  }							      \
563 									      \
564 		assert (used >= 1 && used <= 4);			      \
565 		escseq = ")A\0\0)G)E" + (used - 1) * 2;			      \
566 		*outptr++ = ESC;					      \
567 		*outptr++ = '$';					      \
568 		*outptr++ = *escseq++;					      \
569 		*outptr++ = *escseq++;					      \
570 									      \
571 		ann = (ann & ~SO_ann) | (used << 8);			      \
572 	      }								      \
573 	    else if ((used & SS2_mask) != 0 && (ann & SS2_ann) != (used << 8))\
574 	      {								      \
575 		const char *escseq;					      \
576 									      \
577 		assert (used == CNS11643_2_set); /* XXX */		      \
578 		escseq = "*H";						      \
579 		*outptr++ = ESC;					      \
580 		*outptr++ = '$';					      \
581 		*outptr++ = *escseq++;					      \
582 		*outptr++ = *escseq++;					      \
583 									      \
584 		ann = (ann & ~SS2_ann) | (used << 8);			      \
585 	      }								      \
586 	    else if ((used & SS3_mask) != 0 && (ann & SS3_ann) != (used << 8))\
587 	      {								      \
588 		const char *escseq;					      \
589 									      \
590 		assert ((used >> 5) >= 3 && (used >> 5) <= 7);		      \
591 		escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2;		      \
592 		*outptr++ = ESC;					      \
593 		*outptr++ = '$';					      \
594 		*outptr++ = *escseq++;					      \
595 		*outptr++ = *escseq++;					      \
596 									      \
597 		ann = (ann & ~SS3_ann) | (used << 8);			      \
598 	      }								      \
599 									      \
600 	    if (used == CNS11643_2_set)					      \
601 	      {								      \
602 		if (outptr + 2 > outend)				      \
603 		  {							      \
604 		    result = __GCONV_FULL_OUTPUT;			      \
605 		    break;						      \
606 		  }							      \
607 		*outptr++ = SS2_0;					      \
608 		*outptr++ = SS2_1;					      \
609 	      }								      \
610 	    else if (used >= CNS11643_3_set && used <= CNS11643_7_set)	      \
611 	      {								      \
612 		if (outptr + 2 > outend)				      \
613 		  {							      \
614 		    result = __GCONV_FULL_OUTPUT;			      \
615 		    break;						      \
616 		  }							      \
617 		*outptr++ = SS3_0;					      \
618 		*outptr++ = SS3_1;					      \
619 	      }								      \
620 	    else							      \
621 	      {								      \
622 		/* We only have to emit something if currently ASCII is	      \
623 		   selected.  Otherwise we are switching within the	      \
624 		   SO charset.  */					      \
625 		if (set == ASCII_set)					      \
626 		  {							      \
627 		    if (outptr + 1 > outend)				      \
628 		      {							      \
629 			result = __GCONV_FULL_OUTPUT;			      \
630 			break;						      \
631 		      }							      \
632 		    *outptr++ = SO;					      \
633 		  }							      \
634 	      }								      \
635 									      \
636 	    /* Always test the length here since we have used up all the      \
637 	       guaranteed output buffer slots.  */			      \
638 	    if (outptr + 2 > outend)					      \
639 	      {								      \
640 		result = __GCONV_FULL_OUTPUT;				      \
641 		break;							      \
642 	      }								      \
643 	  }								      \
644 	else if (outptr + 2 > outend)					      \
645 	  {								      \
646 	    result = __GCONV_FULL_OUTPUT;				      \
647 	    break;							      \
648 	  }								      \
649 									      \
650 	*outptr++ = buf[0];						      \
651 	*outptr++ = buf[1];						      \
652 	set = used;							      \
653       }									      \
654 									      \
655     /* Now that we wrote the output increment the input pointer.  */	      \
656     inptr += 4;								      \
657   }
658 DIAG_POP_NEEDS_COMMENT;
659 #define EXTRA_LOOP_DECLS	, int *setp
660 #define INIT_PARAMS		int set = (*setp >> 3) & CURRENT_MASK; \
661 				int ann = (*setp >> 3) & ~CURRENT_MASK
662 #define REINIT_PARAMS		do					      \
663 				  {					      \
664 				    set = (*setp >> 3) & CURRENT_MASK;	      \
665 				    ann = (*setp >> 3) & ~CURRENT_MASK;	      \
666 				  }					      \
667 				while (0)
668 #define UPDATE_PARAMS		*setp = (set | ann) << 3
669 #define LOOP_NEED_FLAGS
670 #include <iconv/loop.c>
671 
672 
673 /* Now define the toplevel functions.  */
674 #include <iconv/skeleton.c>
675