1 /* Mapping tables for EUC-KR handling.
2    Copyright (C) 1998-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dlfcn.h>
20 #include <stdint.h>
21 #include <ksc5601.h>
22 
23 
24 static inline void
25 __attribute ((always_inline))
euckr_from_ucs4(uint32_t ch,unsigned char * cp)26 euckr_from_ucs4 (uint32_t ch, unsigned char *cp)
27 {
28   if (ch > 0x9f)
29     {
30       if (__builtin_expect (ch, 0) == 0x20a9)
31 	{
32 	  /* Half-width Korean Currency WON sign.  There is no
33              equivalent in EUC-KR.  Some mappings use \x5c because
34              this is what some old Korean ASCII variants used but this
35              is causing problems.  We map it to the FULL WIDTH WON SIGN.  */
36 	  cp[0] = '\xa3';
37 	  cp[1] = '\xdc';
38 	}
39       else if (__builtin_expect (ucs4_to_ksc5601 (ch, cp, 2), 0)
40 	  != __UNKNOWN_10646_CHAR)
41 	{
42 	  cp[0] |= 0x80;
43 	  cp[1] |= 0x80;
44 	}
45       else
46 	cp[0] = cp[1] = '\0';
47     }
48   else
49     {
50       /* There is no mapping for U005c but we nevertheless map it to
51 	 \x5c.  */
52       cp[0] = (unsigned char) ch;
53       cp[1] = '\0';
54     }
55 }
56 
57 
58 /* Definitions used in the body of the `gconv' function.  */
59 #define CHARSET_NAME		"EUC-KR//"
60 #define FROM_LOOP		from_euc_kr
61 #define TO_LOOP			to_euc_kr
62 #define DEFINE_INIT		1
63 #define DEFINE_FINI		1
64 #define MIN_NEEDED_FROM		1
65 #define MAX_NEEDED_FROM		2
66 #define MIN_NEEDED_TO		4
67 #define ONE_DIRECTION		0
68 
69 
70 /* First define the conversion function from EUC-KR to UCS4.  */
71 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
72 #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
73 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
74 #define LOOPFCT			FROM_LOOP
75 #define BODY \
76   {									      \
77     uint32_t ch = *inptr;						      \
78 									      \
79     if (ch <= 0x9f)							      \
80       ++inptr;								      \
81     else if (__glibc_unlikely (ch == 0xa0))				      \
82       {									      \
83 	/* This is illegal.  */						      \
84 	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
85       }									      \
86     else								      \
87       {									      \
88 	/* Two-byte character.  First test whether the next byte	      \
89 	   is also available.  */					      \
90 	ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0x80);		      \
91 	if (__glibc_unlikely (ch == 0))					      \
92 	  {								      \
93 	    /* The second byte is not available.  */			      \
94 	    result = __GCONV_INCOMPLETE_INPUT;				      \
95 	    break;							      \
96 	  }								      \
97 	if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
98 	  /* This is an illegal character.  */				      \
99 	  STANDARD_FROM_LOOP_ERR_HANDLER (2);				      \
100       }									      \
101 									      \
102     put32 (outptr, ch);							      \
103     outptr += 4;							      \
104   }
105 #define LOOP_NEED_FLAGS
106 #define ONEBYTE_BODY \
107   {									      \
108     if (c <= 0x9f)							      \
109       return c;								      \
110     else								      \
111       return WEOF;							      \
112   }
113 #include <iconv/loop.c>
114 
115 
116 /* Next, define the other direction.  */
117 #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
118 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
119 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
120 #define LOOPFCT			TO_LOOP
121 #define BODY \
122   {									      \
123     uint32_t ch = get32 (inptr);					      \
124     unsigned char cp[2];						      \
125 									      \
126     /* Decomposing Hangul syllables not available in KS C 5601 into	      \
127        Jamos should be considered either here or in euckr_from_ucs4() */      \
128     euckr_from_ucs4 (ch, cp);						      \
129 									      \
130     if (__builtin_expect (cp[0], '\1') == '\0' && ch != 0)		      \
131       {									      \
132 	UNICODE_TAG_HANDLER (ch, 4);					      \
133 									      \
134 	/* Illegal character.  */					      \
135 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
136       }									      \
137 									      \
138     *outptr++ = cp[0];							      \
139     /* Now test for a possible second byte and write this if possible.  */    \
140     if (cp[1] != '\0')							      \
141       {									      \
142 	if (__glibc_unlikely (outptr >= outend))			      \
143 	  {								      \
144 	    /* The result does not fit into the buffer.  */		      \
145 	    --outptr;							      \
146 	    result = __GCONV_FULL_OUTPUT;				      \
147 	    break;							      \
148 	  }								      \
149 	*outptr++ = cp[1];						      \
150       }									      \
151 									      \
152     inptr += 4;								      \
153   }
154 #define LOOP_NEED_FLAGS
155 #include <iconv/loop.c>
156 
157 
158 /* Now define the toplevel functions.  */
159 #include <iconv/skeleton.c>
160