1 /* Simple transformations functions.
2    Copyright (C) 1997-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <byteswap.h>
20 #include <dlfcn.h>
21 #include <endian.h>
22 #include <errno.h>
23 #include <gconv.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <wchar.h>
28 #include <sys/param.h>
29 #include <gconv_int.h>
30 
31 #define BUILTIN_ALIAS(s1, s2) /* nothing */
32 #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \
33 			       MinF, MaxF, MinT, MaxT) \
34   extern int Fct (struct __gconv_step *, struct __gconv_step_data *,	      \
35 		  const unsigned char **, const unsigned char *,	      \
36 		  unsigned char **, size_t *, int, int);
37 #include "gconv_builtin.h"
38 
39 
40 #ifndef EILSEQ
41 # define EILSEQ EINVAL
42 #endif
43 
44 
45 /* Specialized conversion function for a single byte to INTERNAL, recognizing
46    only ASCII characters.  */
47 wint_t
__gconv_btwoc_ascii(struct __gconv_step * step,unsigned char c)48 __gconv_btwoc_ascii (struct __gconv_step *step, unsigned char c)
49 {
50   if (c < 0x80)
51     return c;
52   else
53     return WEOF;
54 }
55 
56 
57 /* Transform from the internal, UCS4-like format, to UCS4.  The
58    difference between the internal ucs4 format and the real UCS4
59    format is, if any, the endianess.  The Unicode/ISO 10646 says that
60    unless some higher protocol specifies it differently, the byte
61    order is big endian.*/
62 #define DEFINE_INIT		0
63 #define DEFINE_FINI		0
64 #define MIN_NEEDED_FROM		4
65 #define MIN_NEEDED_TO		4
66 #define FROM_DIRECTION		1
67 #define FROM_LOOP		internal_ucs4_loop
68 #define TO_LOOP			internal_ucs4_loop /* This is not used.  */
69 #define FUNCTION_NAME		__gconv_transform_internal_ucs4
70 #define ONE_DIRECTION		0
71 
72 
73 static inline int
74 __attribute ((always_inline))
internal_ucs4_loop(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)75 internal_ucs4_loop (struct __gconv_step *step,
76 		    struct __gconv_step_data *step_data,
77 		    const unsigned char **inptrp, const unsigned char *inend,
78 		    unsigned char **outptrp, const unsigned char *outend,
79 		    size_t *irreversible)
80 {
81   const unsigned char *inptr = *inptrp;
82   unsigned char *outptr = *outptrp;
83   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
84   int result;
85 
86 #if __BYTE_ORDER == __LITTLE_ENDIAN
87   /* Sigh, we have to do some real work.  */
88   size_t cnt;
89   uint32_t *outptr32 = (uint32_t *) outptr;
90 
91   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
92     *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
93 
94   *inptrp = inptr;
95   *outptrp = (unsigned char *) outptr32;
96 #elif __BYTE_ORDER == __BIG_ENDIAN
97   /* Simply copy the data.  */
98   *inptrp = inptr + n_convert * 4;
99   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
100 #else
101 # error "This endianess is not supported."
102 #endif
103 
104   /* Determine the status.  */
105   if (*inptrp == inend)
106     result = __GCONV_EMPTY_INPUT;
107   else if (*outptrp + 4 > outend)
108     result = __GCONV_FULL_OUTPUT;
109   else
110     result = __GCONV_INCOMPLETE_INPUT;
111 
112   return result;
113 }
114 
115 #if !_STRING_ARCH_unaligned
116 static inline int
117 __attribute ((always_inline))
internal_ucs4_loop_unaligned(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)118 internal_ucs4_loop_unaligned (struct __gconv_step *step,
119 			      struct __gconv_step_data *step_data,
120 			      const unsigned char **inptrp,
121 			      const unsigned char *inend,
122 			      unsigned char **outptrp,
123 			      const unsigned char *outend,
124 			      size_t *irreversible)
125 {
126   const unsigned char *inptr = *inptrp;
127   unsigned char *outptr = *outptrp;
128   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
129   int result;
130 
131 # if __BYTE_ORDER == __LITTLE_ENDIAN
132   /* Sigh, we have to do some real work.  */
133   size_t cnt;
134 
135   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
136     {
137       outptr[0] = inptr[3];
138       outptr[1] = inptr[2];
139       outptr[2] = inptr[1];
140       outptr[3] = inptr[0];
141     }
142 
143   *inptrp = inptr;
144   *outptrp = outptr;
145 # elif __BYTE_ORDER == __BIG_ENDIAN
146   /* Simply copy the data.  */
147   *inptrp = inptr + n_convert * 4;
148   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
149 # else
150 #  error "This endianess is not supported."
151 # endif
152 
153   /* Determine the status.  */
154   if (*inptrp == inend)
155     result = __GCONV_EMPTY_INPUT;
156   else if (*outptrp + 4 > outend)
157     result = __GCONV_FULL_OUTPUT;
158   else
159     result = __GCONV_INCOMPLETE_INPUT;
160 
161   return result;
162 }
163 #endif
164 
165 
166 static inline int
167 __attribute ((always_inline))
internal_ucs4_loop_single(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)168 internal_ucs4_loop_single (struct __gconv_step *step,
169 			   struct __gconv_step_data *step_data,
170 			   const unsigned char **inptrp,
171 			   const unsigned char *inend,
172 			   unsigned char **outptrp,
173 			   const unsigned char *outend,
174 			   size_t *irreversible)
175 {
176   mbstate_t *state = step_data->__statep;
177   size_t cnt = state->__count & 7;
178 
179   while (*inptrp < inend && cnt < 4)
180     state->__value.__wchb[cnt++] = *(*inptrp)++;
181 
182   if (__glibc_unlikely (cnt < 4))
183     {
184       /* Still not enough bytes.  Store the ones in the input buffer.  */
185       state->__count &= ~7;
186       state->__count |= cnt;
187 
188       return __GCONV_INCOMPLETE_INPUT;
189     }
190 
191 #if __BYTE_ORDER == __LITTLE_ENDIAN
192   (*outptrp)[0] = state->__value.__wchb[3];
193   (*outptrp)[1] = state->__value.__wchb[2];
194   (*outptrp)[2] = state->__value.__wchb[1];
195   (*outptrp)[3] = state->__value.__wchb[0];
196 
197 #elif __BYTE_ORDER == __BIG_ENDIAN
198   /* XXX unaligned */
199   (*outptrp)[0] = state->__value.__wchb[0];
200   (*outptrp)[1] = state->__value.__wchb[1];
201   (*outptrp)[2] = state->__value.__wchb[2];
202   (*outptrp)[3] = state->__value.__wchb[3];
203 #else
204 # error "This endianess is not supported."
205 #endif
206   *outptrp += 4;
207 
208   /* Clear the state buffer.  */
209   state->__count &= ~7;
210 
211   return __GCONV_OK;
212 }
213 
214 #include <iconv/skeleton.c>
215 
216 
217 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
218    for the other direction we have to check for correct values here.  */
219 #define DEFINE_INIT		0
220 #define DEFINE_FINI		0
221 #define MIN_NEEDED_FROM		4
222 #define MIN_NEEDED_TO		4
223 #define FROM_DIRECTION		1
224 #define FROM_LOOP		ucs4_internal_loop
225 #define TO_LOOP			ucs4_internal_loop /* This is not used.  */
226 #define FUNCTION_NAME		__gconv_transform_ucs4_internal
227 #define ONE_DIRECTION		0
228 
229 
230 static inline int
231 __attribute ((always_inline))
ucs4_internal_loop(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)232 ucs4_internal_loop (struct __gconv_step *step,
233 		    struct __gconv_step_data *step_data,
234 		    const unsigned char **inptrp, const unsigned char *inend,
235 		    unsigned char **outptrp, const unsigned char *outend,
236 		    size_t *irreversible)
237 {
238   int flags = step_data->__flags;
239   const unsigned char *inptr = *inptrp;
240   unsigned char *outptr = *outptrp;
241   int result;
242 
243   for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
244     {
245       uint32_t inval;
246 
247 #if __BYTE_ORDER == __LITTLE_ENDIAN
248       inval = bswap_32 (*(const uint32_t *) inptr);
249 #else
250       inval = *(const uint32_t *) inptr;
251 #endif
252 
253       if (__glibc_unlikely (inval > 0x7fffffff))
254 	{
255 	  /* The value is too large.  We don't try transliteration here since
256 	     this is not an error because of the lack of possibilities to
257 	     represent the result.  This is a genuine bug in the input since
258 	     UCS4 does not allow such values.  */
259 	  if (irreversible == NULL)
260 	    /* We are transliterating, don't try to correct anything.  */
261 	    return __GCONV_ILLEGAL_INPUT;
262 
263 	  if (flags & __GCONV_IGNORE_ERRORS)
264 	    {
265 	      /* Just ignore this character.  */
266 	      ++*irreversible;
267 	      continue;
268 	    }
269 
270 	  *inptrp = inptr;
271 	  *outptrp = outptr;
272 	  return __GCONV_ILLEGAL_INPUT;
273 	}
274 
275       *((uint32_t *) outptr) = inval;
276       outptr += sizeof (uint32_t);
277     }
278 
279   *inptrp = inptr;
280   *outptrp = outptr;
281 
282   /* Determine the status.  */
283   if (*inptrp == inend)
284     result = __GCONV_EMPTY_INPUT;
285   else if (*outptrp + 4 > outend)
286     result = __GCONV_FULL_OUTPUT;
287   else
288     result = __GCONV_INCOMPLETE_INPUT;
289 
290   return result;
291 }
292 
293 #if !_STRING_ARCH_unaligned
294 static inline int
295 __attribute ((always_inline))
ucs4_internal_loop_unaligned(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)296 ucs4_internal_loop_unaligned (struct __gconv_step *step,
297 			      struct __gconv_step_data *step_data,
298 			      const unsigned char **inptrp,
299 			      const unsigned char *inend,
300 			      unsigned char **outptrp,
301 			      const unsigned char *outend,
302 			      size_t *irreversible)
303 {
304   int flags = step_data->__flags;
305   const unsigned char *inptr = *inptrp;
306   unsigned char *outptr = *outptrp;
307   int result;
308 
309   for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
310     {
311       if (__glibc_unlikely (inptr[0] > 0x80))
312 	{
313 	  /* The value is too large.  We don't try transliteration here since
314 	     this is not an error because of the lack of possibilities to
315 	     represent the result.  This is a genuine bug in the input since
316 	     UCS4 does not allow such values.  */
317 	  if (irreversible == NULL)
318 	    /* We are transliterating, don't try to correct anything.  */
319 	    return __GCONV_ILLEGAL_INPUT;
320 
321 	  if (flags & __GCONV_IGNORE_ERRORS)
322 	    {
323 	      /* Just ignore this character.  */
324 	      ++*irreversible;
325 	      continue;
326 	    }
327 
328 	  *inptrp = inptr;
329 	  *outptrp = outptr;
330 	  return __GCONV_ILLEGAL_INPUT;
331 	}
332 
333 # if __BYTE_ORDER == __LITTLE_ENDIAN
334       outptr[3] = inptr[0];
335       outptr[2] = inptr[1];
336       outptr[1] = inptr[2];
337       outptr[0] = inptr[3];
338 # else
339       outptr[0] = inptr[0];
340       outptr[1] = inptr[1];
341       outptr[2] = inptr[2];
342       outptr[3] = inptr[3];
343 # endif
344       outptr += 4;
345     }
346 
347   *inptrp = inptr;
348   *outptrp = outptr;
349 
350   /* Determine the status.  */
351   if (*inptrp == inend)
352     result = __GCONV_EMPTY_INPUT;
353   else if (*outptrp + 4 > outend)
354     result = __GCONV_FULL_OUTPUT;
355   else
356     result = __GCONV_INCOMPLETE_INPUT;
357 
358   return result;
359 }
360 #endif
361 
362 
363 static inline int
364 __attribute ((always_inline))
ucs4_internal_loop_single(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)365 ucs4_internal_loop_single (struct __gconv_step *step,
366 			   struct __gconv_step_data *step_data,
367 			   const unsigned char **inptrp,
368 			   const unsigned char *inend,
369 			   unsigned char **outptrp,
370 			   const unsigned char *outend,
371 			   size_t *irreversible)
372 {
373   mbstate_t *state = step_data->__statep;
374   int flags = step_data->__flags;
375   size_t cnt = state->__count & 7;
376 
377   while (*inptrp < inend && cnt < 4)
378     state->__value.__wchb[cnt++] = *(*inptrp)++;
379 
380   if (__glibc_unlikely (cnt < 4))
381     {
382       /* Still not enough bytes.  Store the ones in the input buffer.  */
383       state->__count &= ~7;
384       state->__count |= cnt;
385 
386       return __GCONV_INCOMPLETE_INPUT;
387     }
388 
389   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[0] > 0x80,
390 			0))
391     {
392       /* The value is too large.  We don't try transliteration here since
393 	 this is not an error because of the lack of possibilities to
394 	 represent the result.  This is a genuine bug in the input since
395 	 UCS4 does not allow such values.  */
396       if (!(flags & __GCONV_IGNORE_ERRORS))
397 	{
398 	  *inptrp -= cnt - (state->__count & 7);
399 	  return __GCONV_ILLEGAL_INPUT;
400 	}
401     }
402   else
403     {
404 #if __BYTE_ORDER == __LITTLE_ENDIAN
405       (*outptrp)[0] = state->__value.__wchb[3];
406       (*outptrp)[1] = state->__value.__wchb[2];
407       (*outptrp)[2] = state->__value.__wchb[1];
408       (*outptrp)[3] = state->__value.__wchb[0];
409 #elif __BYTE_ORDER == __BIG_ENDIAN
410       (*outptrp)[0] = state->__value.__wchb[0];
411       (*outptrp)[1] = state->__value.__wchb[1];
412       (*outptrp)[2] = state->__value.__wchb[2];
413       (*outptrp)[3] = state->__value.__wchb[3];
414 #endif
415 
416       *outptrp += 4;
417     }
418 
419   /* Clear the state buffer.  */
420   state->__count &= ~7;
421 
422   return __GCONV_OK;
423 }
424 
425 #include <iconv/skeleton.c>
426 
427 
428 /* Similarly for the little endian form.  */
429 #define DEFINE_INIT		0
430 #define DEFINE_FINI		0
431 #define MIN_NEEDED_FROM		4
432 #define MIN_NEEDED_TO		4
433 #define FROM_DIRECTION		1
434 #define FROM_LOOP		internal_ucs4le_loop
435 #define TO_LOOP			internal_ucs4le_loop /* This is not used.  */
436 #define FUNCTION_NAME		__gconv_transform_internal_ucs4le
437 #define ONE_DIRECTION		0
438 
439 
440 static inline int
441 __attribute ((always_inline))
internal_ucs4le_loop(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)442 internal_ucs4le_loop (struct __gconv_step *step,
443 		      struct __gconv_step_data *step_data,
444 		      const unsigned char **inptrp, const unsigned char *inend,
445 		      unsigned char **outptrp, const unsigned char *outend,
446 		      size_t *irreversible)
447 {
448   const unsigned char *inptr = *inptrp;
449   unsigned char *outptr = *outptrp;
450   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
451   int result;
452 
453 #if __BYTE_ORDER == __BIG_ENDIAN
454   /* Sigh, we have to do some real work.  */
455   size_t cnt;
456   uint32_t *outptr32 = (uint32_t *) outptr;
457 
458   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4)
459     *outptr32++ = bswap_32 (*(const uint32_t *) inptr);
460   outptr = (unsigned char *) outptr32;
461 
462   *inptrp = inptr;
463   *outptrp = outptr;
464 #elif __BYTE_ORDER == __LITTLE_ENDIAN
465   /* Simply copy the data.  */
466   *inptrp = inptr + n_convert * 4;
467   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
468 #else
469 # error "This endianess is not supported."
470 #endif
471 
472   /* Determine the status.  */
473   if (*inptrp == inend)
474     result = __GCONV_EMPTY_INPUT;
475   else if (*outptrp + 4 > outend)
476     result = __GCONV_FULL_OUTPUT;
477   else
478     result = __GCONV_INCOMPLETE_INPUT;
479 
480   return result;
481 }
482 
483 #if !_STRING_ARCH_unaligned
484 static inline int
485 __attribute ((always_inline))
internal_ucs4le_loop_unaligned(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)486 internal_ucs4le_loop_unaligned (struct __gconv_step *step,
487 				struct __gconv_step_data *step_data,
488 				const unsigned char **inptrp,
489 				const unsigned char *inend,
490 				unsigned char **outptrp,
491 				const unsigned char *outend,
492 				size_t *irreversible)
493 {
494   const unsigned char *inptr = *inptrp;
495   unsigned char *outptr = *outptrp;
496   size_t n_convert = MIN (inend - inptr, outend - outptr) / 4;
497   int result;
498 
499 # if __BYTE_ORDER == __BIG_ENDIAN
500   /* Sigh, we have to do some real work.  */
501   size_t cnt;
502 
503   for (cnt = 0; cnt < n_convert; ++cnt, inptr += 4, outptr += 4)
504     {
505       outptr[0] = inptr[3];
506       outptr[1] = inptr[2];
507       outptr[2] = inptr[1];
508       outptr[3] = inptr[0];
509     }
510 
511   *inptrp = inptr;
512   *outptrp = outptr;
513 # elif __BYTE_ORDER == __LITTLE_ENDIAN
514   /* Simply copy the data.  */
515   *inptrp = inptr + n_convert * 4;
516   *outptrp = __mempcpy (outptr, inptr, n_convert * 4);
517 # else
518 #  error "This endianess is not supported."
519 # endif
520 
521   /* Determine the status.  */
522   if (*inptrp == inend)
523     result = __GCONV_EMPTY_INPUT;
524   else if (*inptrp + 4 > inend)
525     result = __GCONV_INCOMPLETE_INPUT;
526   else
527     {
528       assert (*outptrp + 4 > outend);
529       result = __GCONV_FULL_OUTPUT;
530     }
531 
532   return result;
533 }
534 #endif
535 
536 
537 static inline int
538 __attribute ((always_inline))
internal_ucs4le_loop_single(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)539 internal_ucs4le_loop_single (struct __gconv_step *step,
540 			     struct __gconv_step_data *step_data,
541 			     const unsigned char **inptrp,
542 			     const unsigned char *inend,
543 			     unsigned char **outptrp,
544 			     const unsigned char *outend,
545 			     size_t *irreversible)
546 {
547   mbstate_t *state = step_data->__statep;
548   size_t cnt = state->__count & 7;
549 
550   while (*inptrp < inend && cnt < 4)
551     state->__value.__wchb[cnt++] = *(*inptrp)++;
552 
553   if (__glibc_unlikely (cnt < 4))
554     {
555       /* Still not enough bytes.  Store the ones in the input buffer.  */
556       state->__count &= ~7;
557       state->__count |= cnt;
558 
559       return __GCONV_INCOMPLETE_INPUT;
560     }
561 
562 #if __BYTE_ORDER == __BIG_ENDIAN
563   (*outptrp)[0] = state->__value.__wchb[3];
564   (*outptrp)[1] = state->__value.__wchb[2];
565   (*outptrp)[2] = state->__value.__wchb[1];
566   (*outptrp)[3] = state->__value.__wchb[0];
567 
568 #else
569   /* XXX unaligned */
570   (*outptrp)[0] = state->__value.__wchb[0];
571   (*outptrp)[1] = state->__value.__wchb[1];
572   (*outptrp)[2] = state->__value.__wchb[2];
573   (*outptrp)[3] = state->__value.__wchb[3];
574 
575 #endif
576 
577   *outptrp += 4;
578 
579   /* Clear the state buffer.  */
580   state->__count &= ~7;
581 
582   return __GCONV_OK;
583 }
584 
585 #include <iconv/skeleton.c>
586 
587 
588 /* And finally from UCS4-LE to the internal encoding.  */
589 #define DEFINE_INIT		0
590 #define DEFINE_FINI		0
591 #define MIN_NEEDED_FROM		4
592 #define MIN_NEEDED_TO		4
593 #define FROM_DIRECTION		1
594 #define FROM_LOOP		ucs4le_internal_loop
595 #define TO_LOOP			ucs4le_internal_loop /* This is not used.  */
596 #define FUNCTION_NAME		__gconv_transform_ucs4le_internal
597 #define ONE_DIRECTION		0
598 
599 
600 static inline int
601 __attribute ((always_inline))
ucs4le_internal_loop(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)602 ucs4le_internal_loop (struct __gconv_step *step,
603 		      struct __gconv_step_data *step_data,
604 		      const unsigned char **inptrp, const unsigned char *inend,
605 		      unsigned char **outptrp, const unsigned char *outend,
606 		      size_t *irreversible)
607 {
608   int flags = step_data->__flags;
609   const unsigned char *inptr = *inptrp;
610   unsigned char *outptr = *outptrp;
611   int result;
612 
613   for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
614     {
615       uint32_t inval;
616 
617 #if __BYTE_ORDER == __BIG_ENDIAN
618       inval = bswap_32 (*(const uint32_t *) inptr);
619 #else
620       inval = *(const uint32_t *) inptr;
621 #endif
622 
623       if (__glibc_unlikely (inval > 0x7fffffff))
624 	{
625 	  /* The value is too large.  We don't try transliteration here since
626 	     this is not an error because of the lack of possibilities to
627 	     represent the result.  This is a genuine bug in the input since
628 	     UCS4 does not allow such values.  */
629 	  if (irreversible == NULL)
630 	    /* We are transliterating, don't try to correct anything.  */
631 	    return __GCONV_ILLEGAL_INPUT;
632 
633 	  if (flags & __GCONV_IGNORE_ERRORS)
634 	    {
635 	      /* Just ignore this character.  */
636 	      ++*irreversible;
637 	      continue;
638 	    }
639 
640 	  *inptrp = inptr;
641 	  *outptrp = outptr;
642 	  return __GCONV_ILLEGAL_INPUT;
643 	}
644 
645       *((uint32_t *) outptr) = inval;
646       outptr += sizeof (uint32_t);
647     }
648 
649   *inptrp = inptr;
650   *outptrp = outptr;
651 
652   /* Determine the status.  */
653   if (*inptrp == inend)
654     result = __GCONV_EMPTY_INPUT;
655   else if (*inptrp + 4 > inend)
656     result = __GCONV_INCOMPLETE_INPUT;
657   else
658     {
659       assert (*outptrp + 4 > outend);
660       result = __GCONV_FULL_OUTPUT;
661     }
662 
663   return result;
664 }
665 
666 #if !_STRING_ARCH_unaligned
667 static inline int
668 __attribute ((always_inline))
ucs4le_internal_loop_unaligned(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)669 ucs4le_internal_loop_unaligned (struct __gconv_step *step,
670 				struct __gconv_step_data *step_data,
671 				const unsigned char **inptrp,
672 				const unsigned char *inend,
673 				unsigned char **outptrp,
674 				const unsigned char *outend,
675 				size_t *irreversible)
676 {
677   int flags = step_data->__flags;
678   const unsigned char *inptr = *inptrp;
679   unsigned char *outptr = *outptrp;
680   int result;
681 
682   for (; inptr + 4 <= inend && outptr + 4 <= outend; inptr += 4)
683     {
684       if (__glibc_unlikely (inptr[3] > 0x80))
685 	{
686 	  /* The value is too large.  We don't try transliteration here since
687 	     this is not an error because of the lack of possibilities to
688 	     represent the result.  This is a genuine bug in the input since
689 	     UCS4 does not allow such values.  */
690 	  if (irreversible == NULL)
691 	    /* We are transliterating, don't try to correct anything.  */
692 	    return __GCONV_ILLEGAL_INPUT;
693 
694 	  if (flags & __GCONV_IGNORE_ERRORS)
695 	    {
696 	      /* Just ignore this character.  */
697 	      ++*irreversible;
698 	      continue;
699 	    }
700 
701 	  *inptrp = inptr;
702 	  *outptrp = outptr;
703 	  return __GCONV_ILLEGAL_INPUT;
704 	}
705 
706 # if __BYTE_ORDER == __BIG_ENDIAN
707       outptr[3] = inptr[0];
708       outptr[2] = inptr[1];
709       outptr[1] = inptr[2];
710       outptr[0] = inptr[3];
711 # else
712       outptr[0] = inptr[0];
713       outptr[1] = inptr[1];
714       outptr[2] = inptr[2];
715       outptr[3] = inptr[3];
716 # endif
717 
718       outptr += 4;
719     }
720 
721   *inptrp = inptr;
722   *outptrp = outptr;
723 
724   /* Determine the status.  */
725   if (*inptrp == inend)
726     result = __GCONV_EMPTY_INPUT;
727   else if (*inptrp + 4 > inend)
728     result = __GCONV_INCOMPLETE_INPUT;
729   else
730     {
731       assert (*outptrp + 4 > outend);
732       result = __GCONV_FULL_OUTPUT;
733     }
734 
735   return result;
736 }
737 #endif
738 
739 
740 static inline int
741 __attribute ((always_inline))
ucs4le_internal_loop_single(struct __gconv_step * step,struct __gconv_step_data * step_data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outptrp,const unsigned char * outend,size_t * irreversible)742 ucs4le_internal_loop_single (struct __gconv_step *step,
743 			     struct __gconv_step_data *step_data,
744 			     const unsigned char **inptrp,
745 			     const unsigned char *inend,
746 			     unsigned char **outptrp,
747 			     const unsigned char *outend,
748 			     size_t *irreversible)
749 {
750   mbstate_t *state = step_data->__statep;
751   int flags = step_data->__flags;
752   size_t cnt = state->__count & 7;
753 
754   while (*inptrp < inend && cnt < 4)
755     state->__value.__wchb[cnt++] = *(*inptrp)++;
756 
757   if (__glibc_unlikely (cnt < 4))
758     {
759       /* Still not enough bytes.  Store the ones in the input buffer.  */
760       state->__count &= ~7;
761       state->__count |= cnt;
762 
763       return __GCONV_INCOMPLETE_INPUT;
764     }
765 
766   if (__builtin_expect (((unsigned char *) state->__value.__wchb)[3] > 0x80,
767 			0))
768     {
769       /* The value is too large.  We don't try transliteration here since
770 	 this is not an error because of the lack of possibilities to
771 	 represent the result.  This is a genuine bug in the input since
772 	 UCS4 does not allow such values.  */
773       if (!(flags & __GCONV_IGNORE_ERRORS))
774 	return __GCONV_ILLEGAL_INPUT;
775     }
776   else
777     {
778 #if __BYTE_ORDER == __BIG_ENDIAN
779       (*outptrp)[0] = state->__value.__wchb[3];
780       (*outptrp)[1] = state->__value.__wchb[2];
781       (*outptrp)[2] = state->__value.__wchb[1];
782       (*outptrp)[3] = state->__value.__wchb[0];
783 #else
784       (*outptrp)[0] = state->__value.__wchb[0];
785       (*outptrp)[1] = state->__value.__wchb[1];
786       (*outptrp)[2] = state->__value.__wchb[2];
787       (*outptrp)[3] = state->__value.__wchb[3];
788 #endif
789 
790       *outptrp += 4;
791     }
792 
793   /* Clear the state buffer.  */
794   state->__count &= ~7;
795 
796   return __GCONV_OK;
797 }
798 
799 #include <iconv/skeleton.c>
800 
801 
802 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
803 #define DEFINE_INIT		0
804 #define DEFINE_FINI		0
805 #define MIN_NEEDED_FROM		1
806 #define MIN_NEEDED_TO		4
807 #define FROM_DIRECTION		1
808 #define FROM_LOOP		ascii_internal_loop
809 #define TO_LOOP			ascii_internal_loop /* This is not used.  */
810 #define FUNCTION_NAME		__gconv_transform_ascii_internal
811 #define ONE_DIRECTION		1
812 
813 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
814 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
815 #define LOOPFCT			FROM_LOOP
816 #define BODY \
817   {									      \
818     if (__glibc_unlikely (*inptr > '\x7f'))				      \
819       {									      \
820 	/* The value is too large.  We don't try transliteration here since   \
821 	   this is not an error because of the lack of possibilities to	      \
822 	   represent the result.  This is a genuine bug in the input since    \
823 	   ASCII does not allow such values.  */			      \
824 	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
825       }									      \
826     else								      \
827       {									      \
828 	/* It's an one byte sequence.  */				      \
829 	*((uint32_t *) outptr) = *inptr++;				      \
830 	outptr += sizeof (uint32_t);					      \
831       }									      \
832   }
833 #define LOOP_NEED_FLAGS
834 #include <iconv/loop.c>
835 #include <iconv/skeleton.c>
836 
837 
838 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
839 #define DEFINE_INIT		0
840 #define DEFINE_FINI		0
841 #define MIN_NEEDED_FROM		4
842 #define MIN_NEEDED_TO		1
843 #define FROM_DIRECTION		1
844 #define FROM_LOOP		internal_ascii_loop
845 #define TO_LOOP			internal_ascii_loop /* This is not used.  */
846 #define FUNCTION_NAME		__gconv_transform_internal_ascii
847 #define ONE_DIRECTION		1
848 
849 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
850 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
851 #define LOOPFCT			FROM_LOOP
852 #define BODY \
853   {									      \
854     if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f))		      \
855       {									      \
856 	UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);		      \
857 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
858       }									      \
859     else								      \
860       {									      \
861 	/* It's an one byte sequence.  */				      \
862 	*outptr++ = *((const uint32_t *) inptr);			      \
863 	inptr += sizeof (uint32_t);					      \
864       }									      \
865   }
866 #define LOOP_NEED_FLAGS
867 #include <iconv/loop.c>
868 #include <iconv/skeleton.c>
869 
870 
871 /* Convert from the internal (UCS4-like) format to UTF-8.  */
872 #define DEFINE_INIT		0
873 #define DEFINE_FINI		0
874 #define MIN_NEEDED_FROM		4
875 #define MIN_NEEDED_TO		1
876 #define MAX_NEEDED_TO		6
877 #define FROM_DIRECTION		1
878 #define FROM_LOOP		internal_utf8_loop
879 #define TO_LOOP			internal_utf8_loop /* This is not used.  */
880 #define FUNCTION_NAME		__gconv_transform_internal_utf8
881 #define ONE_DIRECTION		1
882 
883 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
884 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
885 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
886 #define LOOPFCT			FROM_LOOP
887 #define BODY \
888   {									      \
889     uint32_t wc = *((const uint32_t *) inptr);				      \
890 									      \
891     if (__glibc_likely (wc < 0x80))					      \
892       /* It's an one byte sequence.  */					      \
893       *outptr++ = (unsigned char) wc;					      \
894     else if (__glibc_likely (wc <= 0x7fffffff				      \
895 			     && (wc < 0xd800 || wc > 0xdfff)))		      \
896       {									      \
897 	size_t step;							      \
898 	unsigned char *start;						      \
899 									      \
900 	for (step = 2; step < 6; ++step)				      \
901 	  if ((wc & (~(uint32_t)0 << (5 * step + 1))) == 0)		      \
902 	    break;							      \
903 									      \
904 	if (__glibc_unlikely (outptr + step > outend))			      \
905 	  {								      \
906 	    /* Too long.  */						      \
907 	    result = __GCONV_FULL_OUTPUT;				      \
908 	    break;							      \
909 	  }								      \
910 									      \
911 	start = outptr;							      \
912 	*outptr = (unsigned char) (~0xff >> step);			      \
913 	outptr += step;							      \
914 	do								      \
915 	  {								      \
916 	    start[--step] = 0x80 | (wc & 0x3f);				      \
917 	    wc >>= 6;							      \
918 	  }								      \
919 	while (step > 1);						      \
920 	start[0] |= wc;							      \
921       }									      \
922     else								      \
923       {									      \
924 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
925       }									      \
926 									      \
927     inptr += 4;								      \
928   }
929 #define LOOP_NEED_FLAGS
930 #include <iconv/loop.c>
931 #include <iconv/skeleton.c>
932 
933 
934 /* Convert from UTF-8 to the internal (UCS4-like) format.  */
935 #define DEFINE_INIT		0
936 #define DEFINE_FINI		0
937 #define MIN_NEEDED_FROM		1
938 #define MAX_NEEDED_FROM		6
939 #define MIN_NEEDED_TO		4
940 #define FROM_DIRECTION		1
941 #define FROM_LOOP		utf8_internal_loop
942 #define TO_LOOP			utf8_internal_loop /* This is not used.  */
943 #define FUNCTION_NAME		__gconv_transform_utf8_internal
944 #define ONE_DIRECTION		1
945 
946 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
947 #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
948 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
949 #define LOOPFCT			FROM_LOOP
950 #define BODY \
951   {									      \
952     /* Next input byte.  */						      \
953     uint32_t ch = *inptr;						      \
954 									      \
955     if (__glibc_likely (ch < 0x80))					      \
956       {									      \
957 	/* One byte sequence.  */					      \
958 	++inptr;							      \
959       }									      \
960     else								      \
961       {									      \
962 	unsigned int cnt;						      \
963 	unsigned int i;						      \
964 									      \
965 	if (ch >= 0xc2 && ch < 0xe0)					      \
966 	  {								      \
967 	    /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
968 	       otherwise the wide character could have been represented	      \
969 	       using a single byte.  */					      \
970 	    cnt = 2;							      \
971 	    ch &= 0x1f;							      \
972 	  }								      \
973 	else if (__glibc_likely ((ch & 0xf0) == 0xe0))			      \
974 	  {								      \
975 	    /* We expect three bytes.  */				      \
976 	    cnt = 3;							      \
977 	    ch &= 0x0f;							      \
978 	  }								      \
979 	else if (__glibc_likely ((ch & 0xf8) == 0xf0))			      \
980 	  {								      \
981 	    /* We expect four bytes.  */				      \
982 	    cnt = 4;							      \
983 	    ch &= 0x07;							      \
984 	  }								      \
985 	else if (__glibc_likely ((ch & 0xfc) == 0xf8))			      \
986 	  {								      \
987 	    /* We expect five bytes.  */				      \
988 	    cnt = 5;							      \
989 	    ch &= 0x03;							      \
990 	  }								      \
991 	else if (__glibc_likely ((ch & 0xfe) == 0xfc))			      \
992 	  {								      \
993 	    /* We expect six bytes.  */					      \
994 	    cnt = 6;							      \
995 	    ch &= 0x01;							      \
996 	  }								      \
997 	else								      \
998 	  {								      \
999 	    /* Search the end of this ill-formed UTF-8 character.  This	      \
1000 	       is the next byte with (x & 0xc0) != 0x80.  */		      \
1001 	    i = 0;							      \
1002 	    do								      \
1003 	      ++i;							      \
1004 	    while (inptr + i < inend					      \
1005 		   && (*(inptr + i) & 0xc0) == 0x80			      \
1006 		   && i < 5);						      \
1007 									      \
1008 	  errout:							      \
1009 	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				      \
1010 	  }								      \
1011 									      \
1012 	if (__glibc_unlikely (inptr + cnt > inend))			      \
1013 	  {								      \
1014 	    /* We don't have enough input.  But before we report that check   \
1015 	       that all the bytes are correct.  */			      \
1016 	    for (i = 1; inptr + i < inend; ++i)				      \
1017 	      if ((inptr[i] & 0xc0) != 0x80)				      \
1018 		break;							      \
1019 									      \
1020 	    if (__glibc_likely (inptr + i == inend))			      \
1021 	      {								      \
1022 		result = __GCONV_INCOMPLETE_INPUT;			      \
1023 		break;							      \
1024 	      }								      \
1025 									      \
1026 	    goto errout;						      \
1027 	  }								      \
1028 									      \
1029 	/* Read the possible remaining bytes.  */			      \
1030 	for (i = 1; i < cnt; ++i)					      \
1031 	  {								      \
1032 	    uint32_t byte = inptr[i];					      \
1033 									      \
1034 	    if ((byte & 0xc0) != 0x80)					      \
1035 	      /* This is an illegal encoding.  */			      \
1036 	      break;							      \
1037 									      \
1038 	    ch <<= 6;							      \
1039 	    ch |= byte & 0x3f;						      \
1040 	  }								      \
1041 									      \
1042 	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0.		      \
1043 	   If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could	      \
1044 	   have been represented with fewer than cnt bytes.  */		      \
1045 	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)		      \
1046 	    /* Do not accept UTF-16 surrogates.  */			      \
1047 	    || (ch >= 0xd800 && ch <= 0xdfff))				      \
1048 	  {								      \
1049 	    /* This is an illegal encoding.  */				      \
1050 	    goto errout;						      \
1051 	  }								      \
1052 									      \
1053 	inptr += cnt;							      \
1054       }									      \
1055 									      \
1056     /* Now adjust the pointers and store the result.  */		      \
1057     *((uint32_t *) outptr) = ch;					      \
1058     outptr += sizeof (uint32_t);					      \
1059   }
1060 #define LOOP_NEED_FLAGS
1061 
1062 #define STORE_REST \
1063   {									      \
1064     /* We store the remaining bytes while converting them into the UCS4	      \
1065        format.  We can assume that the first byte in the buffer is	      \
1066        correct and that it requires a larger number of bytes than there	      \
1067        are in the input buffer.  */					      \
1068     wint_t ch = **inptrp;						      \
1069     size_t cnt, r;							      \
1070 									      \
1071     state->__count = inend - *inptrp;					      \
1072 									      \
1073     assert (ch != 0xc0 && ch != 0xc1);					      \
1074     if (ch >= 0xc2 && ch < 0xe0)					      \
1075       {									      \
1076 	/* We expect two bytes.  The first byte cannot be 0xc0 or	      \
1077 	   0xc1, otherwise the wide character could have been		      \
1078 	   represented using a single byte.  */				      \
1079 	cnt = 2;							      \
1080 	ch &= 0x1f;							      \
1081       }									      \
1082     else if (__glibc_likely ((ch & 0xf0) == 0xe0))			      \
1083       {									      \
1084 	/* We expect three bytes.  */					      \
1085 	cnt = 3;							      \
1086 	ch &= 0x0f;							      \
1087       }									      \
1088     else if (__glibc_likely ((ch & 0xf8) == 0xf0))			      \
1089       {									      \
1090 	/* We expect four bytes.  */					      \
1091 	cnt = 4;							      \
1092 	ch &= 0x07;							      \
1093       }									      \
1094     else if (__glibc_likely ((ch & 0xfc) == 0xf8))			      \
1095       {									      \
1096 	/* We expect five bytes.  */					      \
1097 	cnt = 5;							      \
1098 	ch &= 0x03;							      \
1099       }									      \
1100     else								      \
1101       {									      \
1102 	/* We expect six bytes.  */					      \
1103 	cnt = 6;							      \
1104 	ch &= 0x01;							      \
1105       }									      \
1106 									      \
1107     /* The first byte is already consumed.  */				      \
1108     r = cnt - 1;							      \
1109     while (++(*inptrp) < inend)						      \
1110       {									      \
1111 	ch <<= 6;							      \
1112 	ch |= **inptrp & 0x3f;						      \
1113 	--r;								      \
1114       }									      \
1115 									      \
1116     /* Shift for the so far missing bytes.  */				      \
1117     ch <<= r * 6;							      \
1118 									      \
1119     /* Store the number of bytes expected for the entire sequence.  */	      \
1120     state->__count |= cnt << 8;						      \
1121 									      \
1122     /* Store the value.  */						      \
1123     state->__value.__wch = ch;						      \
1124   }
1125 
1126 #define UNPACK_BYTES \
1127   {									      \
1128     static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
1129     wint_t wch = state->__value.__wch;					      \
1130     size_t ntotal = state->__count >> 8;				      \
1131 									      \
1132     inlen = state->__count & 255;					      \
1133 									      \
1134     bytebuf[0] = inmask[ntotal - 2];					      \
1135 									      \
1136     do									      \
1137       {									      \
1138 	if (--ntotal < inlen)						      \
1139 	  bytebuf[ntotal] = 0x80 | (wch & 0x3f);			      \
1140 	wch >>= 6;							      \
1141       }									      \
1142     while (ntotal > 1);							      \
1143 									      \
1144     bytebuf[0] |= wch;							      \
1145   }
1146 
1147 #define CLEAR_STATE \
1148   state->__count = 0
1149 
1150 
1151 #include <iconv/loop.c>
1152 #include <iconv/skeleton.c>
1153 
1154 
1155 /* Convert from UCS2 to the internal (UCS4-like) format.  */
1156 #define DEFINE_INIT		0
1157 #define DEFINE_FINI		0
1158 #define MIN_NEEDED_FROM		2
1159 #define MIN_NEEDED_TO		4
1160 #define FROM_DIRECTION		1
1161 #define FROM_LOOP		ucs2_internal_loop
1162 #define TO_LOOP			ucs2_internal_loop /* This is not used.  */
1163 #define FUNCTION_NAME		__gconv_transform_ucs2_internal
1164 #define ONE_DIRECTION		1
1165 
1166 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
1167 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
1168 #define LOOPFCT			FROM_LOOP
1169 #define BODY \
1170   {									      \
1171     uint16_t u1 = get16 (inptr);					      \
1172 									      \
1173     if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			      \
1174       {									      \
1175 	/* Surrogate characters in UCS-2 input are not valid.  Reject	      \
1176 	   them.  (Catching this here is not security relevant.)  */	      \
1177 	STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
1178       }									      \
1179 									      \
1180     *((uint32_t *) outptr) = u1;					      \
1181     outptr += sizeof (uint32_t);					      \
1182     inptr += 2;								      \
1183   }
1184 #define LOOP_NEED_FLAGS
1185 #include <iconv/loop.c>
1186 #include <iconv/skeleton.c>
1187 
1188 
1189 /* Convert from the internal (UCS4-like) format to UCS2.  */
1190 #define DEFINE_INIT		0
1191 #define DEFINE_FINI		0
1192 #define MIN_NEEDED_FROM		4
1193 #define MIN_NEEDED_TO		2
1194 #define FROM_DIRECTION		1
1195 #define FROM_LOOP		internal_ucs2_loop
1196 #define TO_LOOP			internal_ucs2_loop /* This is not used.  */
1197 #define FUNCTION_NAME		__gconv_transform_internal_ucs2
1198 #define ONE_DIRECTION		1
1199 
1200 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
1201 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
1202 #define LOOPFCT			FROM_LOOP
1203 #define BODY \
1204   {									      \
1205     uint32_t val = *((const uint32_t *) inptr);				      \
1206 									      \
1207     if (__glibc_unlikely (val >= 0x10000))				      \
1208       {									      \
1209 	UNICODE_TAG_HANDLER (val, 4);					      \
1210 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
1211       }									      \
1212     else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		      \
1213       {									      \
1214 	/* Surrogate characters in UCS-4 input are not valid.		      \
1215 	   We must catch this, because the UCS-2 output might be	      \
1216 	   interpreted as UTF-16 by other programs.  If we let		      \
1217 	   surrogates pass through, attackers could make a security	      \
1218 	   hole exploit by synthesizing any desired plane 1-16		      \
1219 	   character.  */						      \
1220 	result = __GCONV_ILLEGAL_INPUT;					      \
1221 	if (! ignore_errors_p ())					      \
1222 	  break;							      \
1223 	inptr += 4;							      \
1224 	++*irreversible;						      \
1225 	continue;							      \
1226       }									      \
1227     else								      \
1228       {									      \
1229 	put16 (outptr, val);						      \
1230 	outptr += sizeof (uint16_t);					      \
1231 	inptr += 4;							      \
1232       }									      \
1233   }
1234 #define LOOP_NEED_FLAGS
1235 #include <iconv/loop.c>
1236 #include <iconv/skeleton.c>
1237 
1238 
1239 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
1240 #define DEFINE_INIT		0
1241 #define DEFINE_FINI		0
1242 #define MIN_NEEDED_FROM		2
1243 #define MIN_NEEDED_TO		4
1244 #define FROM_DIRECTION		1
1245 #define FROM_LOOP		ucs2reverse_internal_loop
1246 #define TO_LOOP			ucs2reverse_internal_loop/* This is not used.*/
1247 #define FUNCTION_NAME		__gconv_transform_ucs2reverse_internal
1248 #define ONE_DIRECTION		1
1249 
1250 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
1251 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
1252 #define LOOPFCT			FROM_LOOP
1253 #define BODY \
1254   {									      \
1255     uint16_t u1 = bswap_16 (get16 (inptr));				      \
1256 									      \
1257     if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			      \
1258       {									      \
1259 	/* Surrogate characters in UCS-2 input are not valid.  Reject	      \
1260 	   them.  (Catching this here is not security relevant.)  */	      \
1261 	if (! ignore_errors_p ())					      \
1262 	  {								      \
1263 	    result = __GCONV_ILLEGAL_INPUT;				      \
1264 	    break;							      \
1265 	  }								      \
1266 	inptr += 2;							      \
1267 	++*irreversible;						      \
1268 	continue;							      \
1269       }									      \
1270 									      \
1271     *((uint32_t *) outptr) = u1;					      \
1272     outptr += sizeof (uint32_t);					      \
1273     inptr += 2;								      \
1274   }
1275 #define LOOP_NEED_FLAGS
1276 #include <iconv/loop.c>
1277 #include <iconv/skeleton.c>
1278 
1279 
1280 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1281 #define DEFINE_INIT		0
1282 #define DEFINE_FINI		0
1283 #define MIN_NEEDED_FROM		4
1284 #define MIN_NEEDED_TO		2
1285 #define FROM_DIRECTION		1
1286 #define FROM_LOOP		internal_ucs2reverse_loop
1287 #define TO_LOOP			internal_ucs2reverse_loop/* This is not used.*/
1288 #define FUNCTION_NAME		__gconv_transform_internal_ucs2reverse
1289 #define ONE_DIRECTION		1
1290 
1291 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
1292 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
1293 #define LOOPFCT			FROM_LOOP
1294 #define BODY \
1295   {									      \
1296     uint32_t val = *((const uint32_t *) inptr);				      \
1297     if (__glibc_unlikely (val >= 0x10000))				      \
1298       {									      \
1299 	UNICODE_TAG_HANDLER (val, 4);					      \
1300 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
1301       }									      \
1302     else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		      \
1303       {									      \
1304 	/* Surrogate characters in UCS-4 input are not valid.		      \
1305 	   We must catch this, because the UCS-2 output might be	      \
1306 	   interpreted as UTF-16 by other programs.  If we let		      \
1307 	   surrogates pass through, attackers could make a security	      \
1308 	   hole exploit by synthesizing any desired plane 1-16		      \
1309 	   character.  */						      \
1310 	if (! ignore_errors_p ())					      \
1311 	  {								      \
1312 	    result = __GCONV_ILLEGAL_INPUT;				      \
1313 	    break;							      \
1314 	  }								      \
1315 	inptr += 4;							      \
1316 	++*irreversible;						      \
1317 	continue;							      \
1318       }									      \
1319     else								      \
1320       {									      \
1321 	put16 (outptr, bswap_16 (val));					      \
1322 	outptr += sizeof (uint16_t);					      \
1323 	inptr += 4;							      \
1324       }									      \
1325   }
1326 #define LOOP_NEED_FLAGS
1327 #include <iconv/loop.c>
1328 #include <iconv/skeleton.c>
1329