1 /* Multibyte to UTF-8 conversion.
2 Copyright (C) 2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <assert.h>
20 #include <dlfcn.h>
21 #include <errno.h>
22 #include <gconv.h>
23 #include <uchar.h>
24 #include <wcsmbsload.h>
25
26 #include <sysdep.h>
27
28 #ifndef EILSEQ
29 # define EILSEQ EINVAL
30 #endif
31
32
33 /* This is the private state used if PS is NULL. */
34 static mbstate_t state;
35
36 size_t
mbrtoc8(char8_t * pc8,const char * s,size_t n,mbstate_t * ps)37 mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
38 {
39 /* This implementation depends on the converter invoked by mbrtowc not
40 needing to retain state in either the top most bit of ps->__count or
41 in ps->__value between invocations. This implementation uses the
42 top most bit of ps->__count to indicate that trailing code units are
43 yet to be written and uses ps->__value to store those code units. */
44
45 if (ps == NULL)
46 ps = &state;
47
48 /* If state indicates that trailing code units are yet to be written, write
49 those first regardless of whether 's' is a null pointer. */
50 if (ps->__count & 0x80000000)
51 {
52 /* ps->__value.__wchb[3] stores the index of the next code unit to
53 write. Code units are stored in reverse order. */
54 size_t i = ps->__value.__wchb[3];
55 if (pc8 != NULL)
56 {
57 *pc8 = ps->__value.__wchb[i];
58 }
59 if (i == 0)
60 {
61 ps->__count &= 0x7fffffff;
62 ps->__value.__wch = 0;
63 }
64 else
65 --ps->__value.__wchb[3];
66 return -3;
67 }
68
69 if (s == NULL)
70 {
71 /* if 's' is a null pointer, behave as if a null pointer was passed for
72 'pc8', an empty string was passed for 's', and 1 passed for 'n'. */
73 pc8 = NULL;
74 s = "";
75 n = 1;
76 }
77
78 wchar_t wc;
79 size_t result;
80
81 result = mbrtowc (&wc, s, n, ps);
82 if (result <= n)
83 {
84 if (wc <= 0x7F)
85 {
86 if (pc8 != NULL)
87 *pc8 = wc;
88 }
89 else if (wc <= 0x7FF)
90 {
91 if (pc8 != NULL)
92 *pc8 = 0xC0 + ((wc >> 6) & 0x1F);
93 ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
94 ps->__value.__wchb[3] = 0;
95 ps->__count |= 0x80000000;
96 }
97 else if (wc <= 0xFFFF)
98 {
99 if (pc8 != NULL)
100 *pc8 = 0xE0 + ((wc >> 12) & 0x0F);
101 ps->__value.__wchb[1] = 0x80 + ((wc >> 6) & 0x3F);
102 ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
103 ps->__value.__wchb[3] = 1;
104 ps->__count |= 0x80000000;
105 }
106 else if (wc <= 0x10FFFF)
107 {
108 if (pc8 != NULL)
109 *pc8 = 0xF0 + ((wc >> 18) & 0x07);
110 ps->__value.__wchb[2] = 0x80 + ((wc >> 12) & 0x3F);
111 ps->__value.__wchb[1] = 0x80 + ((wc >> 6) & 0x3F);
112 ps->__value.__wchb[0] = 0x80 + (wc & 0x3F);
113 ps->__value.__wchb[3] = 2;
114 ps->__count |= 0x80000000;
115 }
116 }
117 if (result == 0 && wc != 0)
118 {
119 /* mbrtowc() never returns -3. When a MB sequence converts to multiple
120 WCs, no input is consumed when writing the subsequent WCs resulting
121 in a result of 0 even if a null character wasn't written. */
122 result = -3;
123 }
124
125 return result;
126 }
127