1 /* Mapping tables for EUC-CN handling.
2    Copyright (C) 1998-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dlfcn.h>
20 #include <gb2312.h>
21 #include <stdint.h>
22 
23 /* Definitions used in the body of the `gconv' function.  */
24 #define CHARSET_NAME		"EUC-CN//"
25 #define FROM_LOOP		from_euc_cn
26 #define TO_LOOP			to_euc_cn
27 #define DEFINE_INIT		1
28 #define DEFINE_FINI		1
29 #define MIN_NEEDED_FROM		1
30 #define MAX_NEEDED_FROM		2
31 #define MIN_NEEDED_TO		4
32 #define ONE_DIRECTION		0
33 
34 
35 /* First define the conversion function from EUC-CN to UCS4.  */
36 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
37 #define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
38 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
39 #define LOOPFCT			FROM_LOOP
40 #define BODY \
41   {									      \
42     uint32_t ch = *inptr;						      \
43 									      \
44     if (ch <= 0x7f)							      \
45       ++inptr;								      \
46     else								      \
47       if ((__builtin_expect (ch <= 0xa0, 0) && ch != 0x8e && ch != 0x8f)      \
48 	  || __builtin_expect (ch > 0xfe, 0))				      \
49 	{								      \
50 	  /* This is illegal.  */					      \
51 	  STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
52 	}								      \
53       else								      \
54 	{								      \
55 	  /* Two or more byte character.  First test whether the	      \
56 	     next byte is also available.  */				      \
57 	  const unsigned char *endp;					      \
58 									      \
59 	  if (__glibc_unlikely (inptr + 1 >= inend))			      \
60 	    {								      \
61 	      /* The second character is not available.  Store		      \
62 		 the intermediate result.  */				      \
63 	      result = __GCONV_INCOMPLETE_INPUT;			      \
64 	      break;							      \
65 	    }								      \
66 									      \
67 	  ch = inptr[1];						      \
68 									      \
69 	  /* All second bytes of a multibyte character must be >= 0xa1. */    \
70 	  if (__glibc_unlikely (ch < 0xa1))				      \
71 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
72 									      \
73 	  /* This is code set 1: GB 2312-80.  */			      \
74 	  endp = inptr;							      \
75 									      \
76 	  ch = gb2312_to_ucs4 (&endp, 2, 0x80);				      \
77 	  if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR))		      \
78 	    {								      \
79 	      /* This is an illegal character.  */			      \
80 	      STANDARD_FROM_LOOP_ERR_HANDLER (2);			      \
81 	    }								      \
82 									      \
83 	  inptr += 2;							      \
84 	}								      \
85 									      \
86     put32 (outptr, ch);							      \
87     outptr += 4;							      \
88   }
89 #define LOOP_NEED_FLAGS
90 #define ONEBYTE_BODY \
91   {									      \
92     if (c < 0x80)							      \
93       return c;								      \
94     else								      \
95       return WEOF;							      \
96   }
97 #include <iconv/loop.c>
98 
99 
100 /* Next, define the other direction.  */
101 #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
102 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
103 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
104 #define LOOPFCT			TO_LOOP
105 #define BODY \
106   {									      \
107     uint32_t ch = get32 (inptr);					      \
108 									      \
109     if (ch <= L'\x7f')							      \
110       /* It's plain ASCII.  */						      \
111       *outptr++ = (unsigned char) ch;					      \
112     else								      \
113       {									      \
114 	size_t found;							      \
115 									      \
116 	found = ucs4_to_gb2312 (ch, outptr, outend - outptr);		      \
117 	if (__builtin_expect (found, 1) != 0)				      \
118 	  {								      \
119 	    if (__builtin_expect (found, 0) == __UNKNOWN_10646_CHAR)	      \
120 	      {								      \
121 		UNICODE_TAG_HANDLER (ch, 4);				      \
122 									      \
123 		/* Illegal character.  */				      \
124 		STANDARD_TO_LOOP_ERR_HANDLER (4);			      \
125 	      }								      \
126 									      \
127 	    /* It's a GB 2312 character, adjust it for EUC-CN.  */	      \
128 	    *outptr++ += 0x80;						      \
129 	    *outptr++ += 0x80;						      \
130 	  }								      \
131 	else								      \
132 	  {								      \
133 	    /* We ran out of space.  */					      \
134 	    result = __GCONV_FULL_OUTPUT;				      \
135 	    break;							      \
136 	  }								      \
137       }									      \
138     inptr += 4;								      \
139   }
140 #define LOOP_NEED_FLAGS
141 #include <iconv/loop.c>
142 
143 
144 /* Now define the toplevel functions.  */
145 #include <iconv/skeleton.c>
146