1 /* Multibyte to UTF-8 conversion.
2    Copyright (C) 2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <assert.h>
20 #include <dlfcn.h>
21 #include <errno.h>
22 #include <gconv.h>
23 #include <uchar.h>
24 #include <wcsmbsload.h>
25 
26 #include <sysdep.h>
27 
28 #ifndef EILSEQ
29 # define EILSEQ EINVAL
30 #endif
31 
32 
33 /* This is the private state used if PS is NULL.  */
34 static mbstate_t state;
35 
36 size_t
mbrtoc8(char8_t * pc8,const char * s,size_t n,mbstate_t * ps)37 mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
38 {
39   /* This implementation depends on the converter invoked by mbrtowc not
40      needing to retain state in either the top most bit of ps->__count or
41      in ps->__value between invocations.  This implementation uses the
42      top most bit of ps->__count to indicate that trailing code units are
43      yet to be written and uses ps->__value to store those code units.  */
44 
45   if (ps == NULL)
46     ps = &state;
47 
48   /* If state indicates that trailing code units are yet to be written, write
49      those first regardless of whether 's' is a null pointer.  */
50   if (ps->__count & 0x80000000)
51     {
52       /* ps->__value.__wchb[3] stores the index of the next code unit to
53          write.  Code units are stored in reverse order.  */
54       size_t i = ps->__value.__wchb[3];
55       if (pc8 != NULL)
56 	{
57 	  *pc8 = ps->__value.__wchb[i];
58 	}
59       if (i == 0)
60 	{
61 	  ps->__count &= 0x7fffffff;
62 	  ps->__value.__wch = 0;
63 	}
64       else
65 	--ps->__value.__wchb[3];
66       return -3;
67     }
68 
69   if (s == NULL)
70     {
71       /* if 's' is a null pointer, behave as if a null pointer was passed for
72          'pc8', an empty string was passed for 's', and 1 passed for 'n'.  */
73       pc8 = NULL;
74       s = "";
75       n = 1;
76     }
77 
78   wchar_t wc;
79   size_t result;
80 
81   result = mbrtowc (&wc, s, n, ps);
82   if (result <= n)
83     {
84       if (wc <= 0x7F)
85 	{
86 	  if (pc8 != NULL)
87 	    *pc8 = wc;
88 	}
89       else if (wc <= 0x7FF)
90 	{
91 	  if (pc8 != NULL)
92 	    *pc8 = 0xC0 + ((wc >> 6) & 0x1F);
93 	  ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
94 	  ps->__value.__wchb[3] = 0;
95 	  ps->__count |= 0x80000000;
96 	}
97       else if (wc <= 0xFFFF)
98 	{
99 	  if (pc8 != NULL)
100 	    *pc8 = 0xE0 + ((wc >> 12) & 0x0F);
101 	  ps->__value.__wchb[1] = 0x80 + ((wc >> 6) & 0x3F);
102 	  ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
103 	  ps->__value.__wchb[3] = 1;
104 	  ps->__count |= 0x80000000;
105 	}
106       else if (wc <= 0x10FFFF)
107 	{
108 	  if (pc8 != NULL)
109 	    *pc8 = 0xF0 + ((wc >> 18) & 0x07);
110 	  ps->__value.__wchb[2] = 0x80 + ((wc >> 12) & 0x3F);
111 	  ps->__value.__wchb[1] = 0x80 + ((wc >> 6) & 0x3F);
112 	  ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
113 	  ps->__value.__wchb[3] = 2;
114 	  ps->__count |= 0x80000000;
115 	}
116     }
117   if (result == 0 && wc != 0)
118     {
119       /* mbrtowc() never returns -3.  When a MB sequence converts to multiple
120          WCs, no input is consumed when writing the subsequent WCs resulting
121          in a result of 0 even if a null character wasn't written.  */
122       result = -3;
123     }
124 
125   return result;
126 }
127