1 /* Conversion module for UTF-7.
2    Copyright (C) 2000-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 /* UTF-7 is a legacy encoding used for transmitting Unicode within the
20    ASCII character set, used primarily by mail agents.  New programs
21    are encouraged to use UTF-8 instead.
22 
23    UTF-7 is specified in RFC 2152 (and old RFC 1641, RFC 1642).  The
24    original Base64 encoding is defined in RFC 2045.  */
25 
26 #include <dlfcn.h>
27 #include <gconv.h>
28 #include <stdint.h>
29 #include <stdlib.h>
30 #include <string.h>
31 
32 
33 enum variant
34 {
35   UTF7,
36   UTF_7_IMAP
37 };
38 
39 /* Must be in the same order as enum variant above.  */
40 static const char names[] =
41   "UTF-7//\0"
42   "UTF-7-IMAP//\0"
43   "\0";
44 
45 static uint32_t
shift_character(enum variant const var)46 shift_character (enum variant const var)
47 {
48   if (var == UTF7)
49     return '+';
50   else if (var == UTF_7_IMAP)
51     return '&';
52   else
53     abort ();
54 }
55 
56 static bool
between(uint32_t const ch,uint32_t const lower_bound,uint32_t const upper_bound)57 between (uint32_t const ch,
58 	 uint32_t const lower_bound, uint32_t const upper_bound)
59 {
60   return (ch >= lower_bound && ch <= upper_bound);
61 }
62 
63 /* The set of "direct characters":
64    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
65    FOR UTF-7-IMAP
66    A-Z a-z 0-9 ' ( ) , - . / : ? space
67    ! " # $ % + * ; < = > @ [ \ ] ^ _ ` { | } ~
68 */
69 
70 static bool
isdirect(uint32_t ch,enum variant var)71 isdirect (uint32_t ch, enum variant var)
72 {
73   if (var == UTF7)
74     return (between (ch, 'A', 'Z')
75 	    || between (ch, 'a', 'z')
76 	    || between (ch, '0', '9')
77 	    || ch == '\'' || ch == '(' || ch == ')'
78 	    || between (ch, ',', '/')
79 	    || ch == ':' || ch == '?'
80 	    || ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r');
81   else if (var == UTF_7_IMAP)
82     return (ch != '&' && between (ch, ' ', '~'));
83   abort ();
84 }
85 
86 
87 /* The set of "direct and optional direct characters":
88    A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr
89    (UTF-7 only)
90    ! " # $ % & * ; < = > @ [ ] ^ _ ` { | }
91 */
92 
93 static bool
isxdirect(uint32_t ch,enum variant var)94 isxdirect (uint32_t ch, enum variant var)
95 {
96   if (isdirect (ch, var))
97     return true;
98   if (var != UTF7)
99     return false;
100   return between (ch, '!', '&')
101     || ch == '*'
102     || between (ch, ';', '@')
103     || (between (ch, '[', '`') && ch != '\\')
104     || between (ch, '{', '}');
105 }
106 
107 
108 /* Characters which needs to trigger an explicit shift back to US-ASCII (UTF-7
109    only): Modified base64 + '-' (shift back character)
110    A-Z a-z 0-9 + / -
111 */
112 
113 static bool
needs_explicit_shift(uint32_t ch)114 needs_explicit_shift (uint32_t ch)
115 {
116   return (between (ch, 'A', 'Z')
117 	  || between (ch, 'a', 'z')
118 	  || between (ch, '/', '9') || ch == '+' || ch == '-');
119 }
120 
121 
122 /* Converts a value in the range 0..63 to a base64 encoded char.  */
123 static unsigned char
base64(unsigned int i,enum variant var)124 base64 (unsigned int i, enum variant var)
125 {
126   if (i < 26)
127     return i + 'A';
128   else if (i < 52)
129     return i - 26 + 'a';
130   else if (i < 62)
131     return i - 52 + '0';
132   else if (i == 62)
133     return '+';
134   else if (i == 63 && var == UTF7)
135     return '/';
136   else if (i == 63 && var == UTF_7_IMAP)
137     return ',';
138   else
139     abort ();
140 }
141 
142 
143 /* Definitions used in the body of the `gconv' function.  */
144 #define DEFINE_INIT		0
145 #define DEFINE_FINI		0
146 #define FROM_LOOP		from_utf7_loop
147 #define TO_LOOP			to_utf7_loop
148 #define MIN_NEEDED_FROM		1
149 #define MAX_NEEDED_FROM		6
150 #define MIN_NEEDED_TO		4
151 #define MAX_NEEDED_TO		4
152 #define ONE_DIRECTION		0
153 #define FROM_DIRECTION      (dir == from_utf7)
154 #define PREPARE_LOOP \
155   mbstate_t saved_state;						      \
156   mbstate_t *statep = data->__statep;					      \
157   enum direction dir = ((struct utf7_data *) step->__data)->dir;	      \
158   enum variant var = ((struct utf7_data *) step->__data)->var;
159 #define EXTRA_LOOP_ARGS		, statep, var
160 
161 
162 enum direction
163 {
164   illegal_dir,
165   from_utf7,
166   to_utf7
167 };
168 
169 struct utf7_data
170 {
171   enum direction dir;
172   enum variant var;
173 };
174 
175 /* Since we might have to reset input pointer we must be able to save
176    and restore the state.  */
177 #define SAVE_RESET_STATE(Save) \
178   if (Save)								      \
179     saved_state = *statep;						      \
180   else									      \
181     *statep = saved_state
182 
183 int
gconv_init(struct __gconv_step * step)184 gconv_init (struct __gconv_step *step)
185 {
186   /* Determine which direction.  */
187   struct utf7_data *new_data;
188   enum direction dir = illegal_dir;
189 
190   enum variant var = 0;
191   for (const char *name = names; *name != '\0';
192        name = __rawmemchr (name, '\0') + 1)
193     {
194       if (__strcasecmp (step->__from_name, name) == 0)
195 	{
196 	  dir = from_utf7;
197 	  break;
198 	}
199       else if (__strcasecmp (step->__to_name, name) == 0)
200 	{
201 	  dir = to_utf7;
202 	  break;
203 	}
204       ++var;
205     }
206 
207   if (__glibc_likely (dir != illegal_dir))
208     {
209       new_data = malloc (sizeof (*new_data));
210       if (new_data == NULL)
211 	return __GCONV_NOMEM;
212 
213       new_data->dir = dir;
214       new_data->var = var;
215       step->__data = new_data;
216 
217       if (dir == from_utf7)
218 	{
219 	  step->__min_needed_from = MIN_NEEDED_FROM;
220 	  step->__max_needed_from = MAX_NEEDED_FROM;
221 	  step->__min_needed_to = MIN_NEEDED_TO;
222 	  step->__max_needed_to = MAX_NEEDED_TO;
223 	}
224       else
225 	{
226 	  step->__min_needed_from = MIN_NEEDED_TO;
227 	  step->__max_needed_from = MAX_NEEDED_TO;
228 	  step->__min_needed_to = MIN_NEEDED_FROM;
229 	  step->__max_needed_to = MAX_NEEDED_FROM;
230 	}
231     }
232   else
233     return __GCONV_NOCONV;
234 
235   step->__stateful = 1;
236 
237   return __GCONV_OK;
238 }
239 
240 void
gconv_end(struct __gconv_step * data)241 gconv_end (struct __gconv_step *data)
242 {
243   free (data->__data);
244 }
245 
246 
247 
248 /* First define the conversion function from UTF-7 to UCS4.
249    The state is structured as follows:
250      __count bit 2..0: zero
251      __count bit 8..3: shift
252      __wch: data
253    Precise meaning:
254      shift      data
255        0         --          not inside base64 encoding
256      1..32  XX..XX00..00     inside base64, (32 - shift) bits pending
257    This state layout is simpler than relying on STORE_REST/UNPACK_BYTES.
258 
259    When shift = 0, __wch needs to store at most one lookahead byte (see
260    __GCONV_INCOMPLETE_INPUT below).
261 */
262 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
263 #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
264 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
265 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
266 #define LOOPFCT			FROM_LOOP
267 #define BODY \
268   {									      \
269     uint_fast8_t ch = *inptr;						      \
270 									      \
271     if ((statep->__count >> 3) == 0)					      \
272       {									      \
273 	/* base64 encoding inactive.  */				      \
274 	if (isxdirect (ch, var))					      \
275 	  {								      \
276 	    inptr++;							      \
277 	    put32 (outptr, ch);						      \
278 	    outptr += 4;						      \
279 	  }								      \
280 	else if (__glibc_likely (ch == shift_character (var)))		      \
281 	  {								      \
282 	    if (__glibc_unlikely (inptr + 2 > inend))			      \
283 	      {								      \
284 		/* Not enough input available.  */			      \
285 		result = __GCONV_INCOMPLETE_INPUT;			      \
286 		break;							      \
287 	      }								      \
288 	    if (inptr[1] == '-')					      \
289 	      {								      \
290 		inptr += 2;						      \
291 		put32 (outptr, ch);					      \
292 		outptr += 4;						      \
293 	      }								      \
294 	    else							      \
295 	      {								      \
296 		/* Switch into base64 mode.  */				      \
297 		inptr++;						      \
298 		statep->__count = (32 << 3);				      \
299 		statep->__value.__wch = 0;				      \
300 	      }								      \
301 	  }								      \
302 	else								      \
303 	  {								      \
304 	    /* The input is invalid.  */				      \
305 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
306 	  }								      \
307       }									      \
308     else								      \
309       {									      \
310 	/* base64 encoding active.  */					      \
311 	uint32_t i;							      \
312 	int shift;							      \
313 									      \
314 	if (ch >= 'A' && ch <= 'Z')					      \
315 	  i = ch - 'A';							      \
316 	else if (ch >= 'a' && ch <= 'z')				      \
317 	  i = ch - 'a' + 26;						      \
318 	else if (ch >= '0' && ch <= '9')				      \
319 	  i = ch - '0' + 52;						      \
320 	else if (ch == '+')						      \
321 	  i = 62;							      \
322 	else if ((var == UTF7 && ch == '/')                                   \
323 		  || (var == UTF_7_IMAP && ch == ','))			      \
324 	  i = 63;							      \
325 	else								      \
326 	  {								      \
327 	    /* Terminate base64 encoding.  */				      \
328 									      \
329 	    /* If accumulated data is nonzero, the input is invalid.  */      \
330 	    /* Also, partial UTF-16 characters are invalid.  */		      \
331 	    /* In IMAP variant, must be terminated by '-'.  */		      \
332 	    if (__glibc_unlikely (statep->__value.__wch != 0)		      \
333 		|| __glibc_unlikely ((statep->__count >> 3) <= 26)	      \
334 		|| __glibc_unlikely (var == UTF_7_IMAP && ch != '-'))	      \
335 	      {								      \
336 		STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));    \
337 	      }								      \
338 									      \
339 	    if (ch == '-')						      \
340 	      inptr++;							      \
341 									      \
342 	    statep->__count = 0;					      \
343 	    continue;							      \
344 	  }								      \
345 									      \
346 	/* Concatenate the base64 integer i to the accumulator.  */	      \
347 	shift = (statep->__count >> 3);					      \
348 	if (shift > 6)							      \
349 	  {								      \
350 	    uint32_t wch;						      \
351 									      \
352 	    shift -= 6;							      \
353 	    wch = statep->__value.__wch | (i << shift);			      \
354 									      \
355 	    if (shift <= 16 && shift > 10)				      \
356 	      {								      \
357 		/* An UTF-16 character has just been completed.  */	      \
358 		uint32_t wc1 = wch >> 16;				      \
359 									      \
360 		/* UTF-16: When we see a High Surrogate, we must also decode  \
361 		   the following Low Surrogate. */			      \
362 		if (!(wc1 >= 0xd800 && wc1 < 0xdc00))			      \
363 		  {							      \
364 		    wch = wch << 16;					      \
365 		    shift += 16;					      \
366 		    put32 (outptr, wc1);				      \
367 		    outptr += 4;					      \
368 		  }							      \
369 	      }								      \
370 	    else if (shift <= 10 && shift > 4)				      \
371 	      {								      \
372 		/* After a High Surrogate, verify that the next 16 bit	      \
373 		   indeed form a Low Surrogate.  */			      \
374 		uint32_t wc2 = wch & 0xffff;				      \
375 									      \
376 		if (! __glibc_likely (wc2 >= 0xdc00 && wc2 < 0xe000))	      \
377 		  {							      \
378 		    STANDARD_FROM_LOOP_ERR_HANDLER ((statep->__count = 0, 1));\
379 		  }							      \
380 	      }								      \
381 									      \
382 	    statep->__value.__wch = wch;				      \
383 	  }								      \
384 	else								      \
385 	  {								      \
386 	    /* An UTF-16 surrogate pair has just been completed.  */	      \
387 	    uint32_t wc1 = (uint32_t) statep->__value.__wch >> 16;	      \
388 	    uint32_t wc2 = ((uint32_t) statep->__value.__wch & 0xffff)	      \
389 			   | (i >> (6 - shift));			      \
390 									      \
391 	    statep->__value.__wch = (i << shift) << 26;			      \
392 	    shift += 26;						      \
393 									      \
394 	    assert (wc1 >= 0xd800 && wc1 < 0xdc00);			      \
395 	    assert (wc2 >= 0xdc00 && wc2 < 0xe000);			      \
396 	    put32 (outptr,						      \
397 		   0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00));	      \
398 	    outptr += 4;						      \
399 	  }								      \
400 									      \
401 	statep->__count = shift << 3;					      \
402 									      \
403 	/* Now that we digested the input increment the input pointer.  */    \
404 	inptr++;							      \
405       }									      \
406   }
407 #define LOOP_NEED_FLAGS
408 #define EXTRA_LOOP_DECLS	, mbstate_t *statep, enum variant var
409 #include <iconv/loop.c>
410 
411 
412 /* Next, define the conversion from UCS4 to UTF-7.
413    The state is structured as follows:
414      __count bit 2..0: zero
415      __count bit 4..3: shift
416      __count bit 8..5: data
417    Precise meaning:
418      shift      data
419        0         0           not inside base64 encoding
420        1         0           inside base64, no pending bits
421        2       XX00          inside base64, 2 bits known for next byte
422        3       XXXX          inside base64, 4 bits known for next byte
423 
424    __count bit 2..0 and __wch are always zero, because this direction
425    never returns __GCONV_INCOMPLETE_INPUT.
426 */
427 #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
428 #define MAX_NEEDED_INPUT	MAX_NEEDED_TO
429 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
430 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
431 #define LOOPFCT			TO_LOOP
432 #define BODY \
433   {									      \
434     uint32_t ch = get32 (inptr);					      \
435 									      \
436     if ((statep->__count & 0x18) == 0)					      \
437       {									      \
438 	/* base64 encoding inactive */					      \
439 	if (isdirect (ch, var))						      \
440 	  {								      \
441 	    *outptr++ = (unsigned char) ch;				      \
442 	  }								      \
443 	else								      \
444 	  {								      \
445 	    size_t count;						      \
446 									      \
447 	    if (ch == shift_character (var))				      \
448 	      count = 2;						      \
449 	    else if (ch < 0x10000)					      \
450 	      count = 3;						      \
451 	    else if (ch < 0x110000)					      \
452 	      count = 6;						      \
453 	    else							      \
454 	      STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
455 									      \
456 	    if (__glibc_unlikely (outptr + count > outend))		      \
457 	      {								      \
458 		result = __GCONV_FULL_OUTPUT;				      \
459 		break;							      \
460 	      }								      \
461 									      \
462 	    *outptr++ = shift_character (var);				      \
463 	    if (ch == shift_character (var))				      \
464 	      *outptr++ = '-';						      \
465 	    else if (ch < 0x10000)					      \
466 	      {								      \
467 		*outptr++ = base64 (ch >> 10, var);			      \
468 		*outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
469 		statep->__count = ((ch & 15) << 5) | (3 << 3);		      \
470 	      }								      \
471 	    else if (ch < 0x110000)					      \
472 	      {								      \
473 		uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10);		      \
474 		uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);	      \
475 									      \
476 		ch = (ch1 << 16) | ch2;					      \
477 		*outptr++ = base64 (ch >> 26, var);			      \
478 		*outptr++ = base64 ((ch >> 20) & 0x3f, var);		      \
479 		*outptr++ = base64 ((ch >> 14) & 0x3f, var);		      \
480 		*outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
481 		*outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
482 		statep->__count = ((ch & 3) << 7) | (2 << 3);		      \
483 	      }								      \
484 	    else							      \
485 	      abort ();							      \
486 	  }								      \
487       }									      \
488     else								      \
489       {									      \
490 	/* base64 encoding active */					      \
491 	if ((var == UTF_7_IMAP && ch == '&') || isdirect (ch, var))	      \
492 	  {								      \
493 	    /* deactivate base64 encoding */				      \
494 	    size_t count;						      \
495 									      \
496 	    count = ((statep->__count & 0x18) >= 0x10)			      \
497 	      + (var == UTF_7_IMAP || needs_explicit_shift (ch))	      \
498 	      + (var == UTF_7_IMAP && ch == '&')			      \
499 	      + 1;							      \
500 	    if (__glibc_unlikely (outptr + count > outend))		      \
501 	      {								      \
502 		result = __GCONV_FULL_OUTPUT;				      \
503 		break;							      \
504 	      }								      \
505 									      \
506 	    if ((statep->__count & 0x18) >= 0x10)			      \
507 	      *outptr++ = base64 ((statep->__count >> 3) & ~3, var);	      \
508 	    if (var == UTF_7_IMAP || needs_explicit_shift (ch))		      \
509 	      *outptr++ = '-';						      \
510 	    *outptr++ = (unsigned char) ch;				      \
511 	    if (var == UTF_7_IMAP && ch == '&')				      \
512 	      *outptr++ = '-';						      \
513 	    statep->__count = 0;					      \
514 	  }								      \
515 	else								      \
516 	  {								      \
517 	    size_t count;						      \
518 									      \
519 	    if (ch < 0x10000)						      \
520 	      count = ((statep->__count & 0x18) >= 0x10 ? 3 : 2);	      \
521 	    else if (ch < 0x110000)					      \
522 	      count = ((statep->__count & 0x18) >= 0x18 ? 6 : 5);	      \
523 	    else							      \
524 	      STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
525 									      \
526 	    if (__glibc_unlikely (outptr + count > outend))		      \
527 	      {								      \
528 		result = __GCONV_FULL_OUTPUT;				      \
529 		break;							      \
530 	      }								      \
531 									      \
532 	    if (ch < 0x10000)						      \
533 	      {								      \
534 		switch ((statep->__count >> 3) & 3)			      \
535 		  {							      \
536 		  case 1:						      \
537 		    *outptr++ = base64 (ch >> 10, var);			      \
538 		    *outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
539 		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
540 		    break;						      \
541 		  case 2:						      \
542 		    *outptr++ =						      \
543 		      base64 (((statep->__count >> 3) & ~3) | (ch >> 12),     \
544 			      var);					      \
545 		    *outptr++ = base64 ((ch >> 6) & 0x3f, var);		      \
546 		    *outptr++ = base64 (ch & 0x3f, var);		      \
547 		    statep->__count = (1 << 3);				      \
548 		    break;						      \
549 		  case 3:						      \
550 		    *outptr++ =						      \
551 		      base64 (((statep->__count >> 3) & ~3) | (ch >> 14),     \
552 			      var);					      \
553 		    *outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
554 		    *outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
555 		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
556 		    break;						      \
557 		  default:						      \
558 		    abort ();						      \
559 		  }							      \
560 	      }								      \
561 	    else if (ch < 0x110000)					      \
562 	      {								      \
563 		uint32_t ch1 = 0xd800 + ((ch - 0x10000) >> 10);		      \
564 		uint32_t ch2 = 0xdc00 + ((ch - 0x10000) & 0x3ff);	      \
565 									      \
566 		ch = (ch1 << 16) | ch2;					      \
567 		switch ((statep->__count >> 3) & 3)			      \
568 		  {							      \
569 		  case 1:						      \
570 		    *outptr++ = base64 (ch >> 26, var);			      \
571 		    *outptr++ = base64 ((ch >> 20) & 0x3f, var);	      \
572 		    *outptr++ = base64 ((ch >> 14) & 0x3f, var);	      \
573 		    *outptr++ = base64 ((ch >> 8) & 0x3f, var);		      \
574 		    *outptr++ = base64 ((ch >> 2) & 0x3f, var);		      \
575 		    statep->__count = ((ch & 3) << 7) | (2 << 3);	      \
576 		    break;						      \
577 		  case 2:						      \
578 		    *outptr++ =						      \
579 		      base64 (((statep->__count >> 3) & ~3) | (ch >> 28),     \
580 			      var);					      \
581 		    *outptr++ = base64 ((ch >> 22) & 0x3f, var);	      \
582 		    *outptr++ = base64 ((ch >> 16) & 0x3f, var);	      \
583 		    *outptr++ = base64 ((ch >> 10) & 0x3f, var);	      \
584 		    *outptr++ = base64 ((ch >> 4) & 0x3f, var);		      \
585 		    statep->__count = ((ch & 15) << 5) | (3 << 3);	      \
586 		    break;						      \
587 		  case 3:						      \
588 		    *outptr++ =						      \
589 		      base64 (((statep->__count >> 3) & ~3) | (ch >> 30),     \
590 			      var);					      \
591 		    *outptr++ = base64 ((ch >> 24) & 0x3f, var);	      \
592 		    *outptr++ = base64 ((ch >> 18) & 0x3f, var);	      \
593 		    *outptr++ = base64 ((ch >> 12) & 0x3f, var);	      \
594 		    *outptr++ = base64 ((ch >> 6) & 0x3f, var);		      \
595 		    *outptr++ = base64 (ch & 0x3f, var);		      \
596 		    statep->__count = (1 << 3);				      \
597 		    break;						      \
598 		  default:						      \
599 		    abort ();						      \
600 		  }							      \
601 	      }								      \
602 	    else							      \
603 	      abort ();							      \
604 	  }								      \
605       }									      \
606 									      \
607     /* Now that we wrote the output increment the input pointer.  */	      \
608     inptr += 4;								      \
609   }
610 #define LOOP_NEED_FLAGS
611 #define EXTRA_LOOP_DECLS	, mbstate_t *statep, enum variant var
612 #include <iconv/loop.c>
613 
614 
615 /* Since this is a stateful encoding we have to provide code which resets
616    the output state to the initial state.  This has to be done during the
617    flushing.  */
618 #define EMIT_SHIFT_TO_INIT \
619   if (FROM_DIRECTION)							      \
620     /* Nothing to emit.  */						      \
621     memset (data->__statep, '\0', sizeof (mbstate_t));			      \
622   else									      \
623     {									      \
624       /* The "to UTF-7" direction.  Flush the remaining bits and terminate    \
625 	 with a '-' byte.  This will guarantee correct decoding if more	      \
626 	 UTF-7 encoded text is added afterwards.  */			      \
627       int state = data->__statep->__count;				      \
628 									      \
629       if (state & 0x18)							      \
630 	{								      \
631 	  /* Deactivate base64 encoding.  */				      \
632 	  size_t count = ((state & 0x18) >= 0x10) + 1;			      \
633 									      \
634 	  if (__glibc_unlikely (outbuf + count > outend))		      \
635 	    /* We don't have enough room in the output buffer.  */	      \
636 	    status = __GCONV_FULL_OUTPUT;				      \
637 	  else								      \
638 	    {								      \
639 	      /* Write out the shift sequence.  */			      \
640 	      if ((state & 0x18) >= 0x10)				      \
641 		*outbuf++ = base64 ((state >> 3) & ~3, var);		      \
642 	      *outbuf++ = '-';						      \
643 									      \
644 	      data->__statep->__count = 0;				      \
645 	    }								      \
646 	}								      \
647       else								      \
648 	data->__statep->__count = 0;					      \
649     }
650 
651 
652 /* Now define the toplevel functions.  */
653 #include <iconv/skeleton.c>
654