1 /* Conversion from and to IBM1364.
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dlfcn.h>
20 #include <stdint.h>
21 #include <wchar.h>
22 #include <byteswap.h>
23 
24 #ifndef CHARSET_NAME
25 /* This is really the IBM1364 converter, not another module sharing
26    the code.  */
27 # define DATA_HEADER	"ibm1364.h"
28 # define CHARSET_NAME	"IBM1364//"
29 # define FROM_LOOP	from_ibm1364
30 # define TO_LOOP	to_ibm1364
31 # define SB_TO_UCS4	__ibm1364sb_to_ucs4
32 # define DB_TO_UCS4_IDX	__ibm1364db_to_ucs4_idx
33 # define DB_TO_UCS4	__ibm1364db_to_ucs4
34 # define UCS4_TO_SB_IDX	__ucs4_to_ibm1364sb_idx
35 # define UCS4_TO_SB	__ucs4_to_ibm1364sb
36 # define UCS4_TO_DB_IDX	__ucs4_to_ibm1364db_idx
37 # define UCS4_TO_DB	__ucs4_to_ibm1364db
38 # define UCS_LIMIT	0xffff
39 #endif
40 
41 
42 #include DATA_HEADER
43 
44 /* The shift sequences for this charset (it does not use ESC).  */
45 #define SI 		0x0F  /* Shift In, host code to turn DBCS off.  */
46 #define SO 		0x0E  /* Shift Out, host code to turn DBCS on.  */
47 
48 /* Definitions used in the body of the `gconv' function.  */
49 #define MIN_NEEDED_FROM	1
50 #define MAX_NEEDED_FROM	2
51 #define MIN_NEEDED_TO	4
52 #ifdef HAS_COMBINED
53 # define MAX_NEEDED_TO	8
54 #else
55 # define MAX_NEEDED_TO	4
56 #endif
57 #define ONE_DIRECTION	0
58 #define PREPARE_LOOP \
59   int save_curcs;							      \
60   int *curcsp = &data->__statep->__count;
61 #define EXTRA_LOOP_ARGS		, curcsp
62 
63 /* Definitions of initialization and destructor function.  */
64 #define DEFINE_INIT	1
65 #define DEFINE_FINI	1
66 
67 
68 /* Since this is a stateful encoding we have to provide code which resets
69    the output state to the initial state.  This has to be done during the
70    flushing.  */
71 #define EMIT_SHIFT_TO_INIT \
72   if ((data->__statep->__count & ~7) != sb)				      \
73     {									      \
74       if (FROM_DIRECTION)						      \
75 	data->__statep->__count &= 7;					      \
76       else								      \
77 	{								      \
78 	  /* We are not in the initial state.  To switch back we have	      \
79 	     to emit `SI'.  */						      \
80 	  if (__glibc_unlikely (outbuf >= outend))			      \
81 	    /* We don't have enough room in the output buffer.  */	      \
82 	    status = __GCONV_FULL_OUTPUT;				      \
83 	  else								      \
84 	    {								      \
85 	      /* Write out the shift sequence.  */			      \
86 	      *outbuf++ = SI;						      \
87 	      data->__statep->__count &= 7;				      \
88 	    }								      \
89 	}								      \
90     }
91 
92 
93 /* Since we might have to reset input pointer we must be able to save
94    and retore the state.  */
95 #define SAVE_RESET_STATE(Save) \
96   if (Save)								      \
97     save_curcs = *curcsp;						      \
98   else									      \
99     *curcsp = save_curcs
100 
101 
102 /* Current codeset type.  */
103 enum
104 {
105   sb = 0,
106   db = 64
107 };
108 
109 
110 /* Subroutine to write out converted UCS4 from IBM-13XX.  */
111 #ifdef HAS_COMBINED
112 # define SUB_COMBINED_UCS_FROM_IBM13XX \
113   {									      \
114     if (res != UCS_LIMIT || ch < __TO_UCS4_COMBINED_MIN			      \
115 	|| ch > __TO_UCS4_COMBINED_MAX)					      \
116       {									      \
117 	put32 (outptr, res);						      \
118 	outptr += 4;							      \
119       }									      \
120     else								      \
121       {									      \
122 	/* This is a combined character.  Make sure we have room.  */	      \
123 	if (__glibc_unlikely (outptr + 8 > outend))			      \
124 	  {								      \
125 	    result = __GCONV_FULL_OUTPUT;				      \
126 	    break;							      \
127 	  }								      \
128 									      \
129 	const struct divide *cmbp					      \
130 	  = &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN];		      \
131 	assert (cmbp->res1 != 0 && cmbp->res2 != 0);			      \
132 									      \
133 	put32 (outptr, cmbp->res1);					      \
134 	outptr += 4;							      \
135 	put32 (outptr, cmbp->res2);					      \
136 	outptr += 4;							      \
137       }									      \
138   }
139 #else
140 # define SUB_COMBINED_UCS_FROM_IBM13XX \
141   {									      \
142     put32 (outptr, res);						      \
143     outptr += 4;							      \
144   }
145 #endif /* HAS_COMBINED */
146 
147 
148 /* First, define the conversion function from IBM-13XX to UCS4.  */
149 #define MIN_NEEDED_INPUT  	MIN_NEEDED_FROM
150 #define MAX_NEEDED_INPUT  	MAX_NEEDED_FROM
151 #define MIN_NEEDED_OUTPUT 	MIN_NEEDED_TO
152 #define MAX_NEEDED_OUTPUT 	MAX_NEEDED_TO
153 #define LOOPFCT 		FROM_LOOP
154 #define BODY \
155   {									      \
156     uint32_t ch = *inptr;						      \
157 									      \
158     if (__builtin_expect (ch, 0) == SO)					      \
159       {									      \
160 	/* Shift OUT, change to DBCS converter (redundant escape okay).  */   \
161 	curcs = db;							      \
162 	++inptr;							      \
163 	continue;							      \
164       }									      \
165     if (__builtin_expect (ch, 0) == SI)					      \
166       {									      \
167 	/* Shift IN, change to SBCS converter (redundant escape okay).  */    \
168 	curcs = sb;							      \
169 	++inptr;							      \
170 	continue;							      \
171       }									      \
172 									      \
173     if (curcs == sb)							      \
174       {									      \
175 	/* Use the IBM13XX table for single byte.  */			      \
176 	uint32_t res = SB_TO_UCS4[ch];				      \
177 	if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0')	      \
178 	  {								      \
179 	    /* This is an illegal character.  */			      \
180 	    if (! ignore_errors_p ())					      \
181 	      {								      \
182 		result = __GCONV_ILLEGAL_INPUT;				      \
183 		break;							      \
184 	      }								      \
185 	    ++*irreversible;						      \
186 	  }								      \
187 	else								      \
188 	  {								      \
189 	    put32 (outptr, res);					      \
190 	    outptr += 4;						      \
191 	  }								      \
192 	++inptr;							      \
193       }									      \
194     else								      \
195       {									      \
196 	assert (curcs == db);						      \
197 									      \
198 	if (__glibc_unlikely (inptr + 1 >= inend))			      \
199 	  {								      \
200 	    /* The second character is not available.  Store the	      \
201 	       intermediate result.  */					      \
202 	    result = __GCONV_INCOMPLETE_INPUT;				      \
203 	    break;							      \
204 	  }								      \
205 									      \
206 	ch = (ch * 0x100) + inptr[1];					      \
207 									      \
208 	/* Use the IBM1364 table for double byte.  */			      \
209 	const struct gap *rp2 = DB_TO_UCS4_IDX;				      \
210 	while (ch > rp2->end)						      \
211 	  ++rp2;							      \
212 									      \
213 	uint32_t res;							      \
214 	if (__builtin_expect (rp2->start == 0xffff, 0)			      \
215 	    || __builtin_expect (ch < rp2->start, 0)			      \
216 	    || (res = DB_TO_UCS4[ch + rp2->idx],			      \
217 		__builtin_expect (res, L'\1') == L'\0' && ch != '\0'))	      \
218 	  {								      \
219 	    /* This is an illegal character.  */			      \
220 	    if (! ignore_errors_p ())					      \
221 	      {								      \
222 		result = __GCONV_ILLEGAL_INPUT;				      \
223 		break;							      \
224 	      }								      \
225 	    ++*irreversible;						      \
226 	  }								      \
227 	else								      \
228 	  {								      \
229 	    SUB_COMBINED_UCS_FROM_IBM13XX;				      \
230 	  }								      \
231 	inptr += 2;							      \
232       }									      \
233   }
234 #define LOOP_NEED_FLAGS
235 #define EXTRA_LOOP_DECLS	, int *curcsp
236 #define INIT_PARAMS		int curcs = *curcsp & ~7
237 #define UPDATE_PARAMS		*curcsp = curcs
238 #include <iconv/loop.c>
239 
240 
241 /* Subroutine to convert two UCS4 codes to IBM-13XX.  */
242 #ifdef HAS_COMBINED
243 # define SUB_COMBINED_UCS_TO_IBM13XX \
244   {									      \
245     const struct combine *cmbp = UCS4_COMB_TO_DB;			      \
246     while (cmbp->res1 < ch)						      \
247       ++cmbp;								      \
248     /* XXX if last char is beginning of combining store in state */	      \
249     if (cmbp->res1 == ch && inptr + 4 < inend)				      \
250       {									      \
251 	/* See if input is part of a combined character.  */		      \
252 	uint32_t ch_next = get32 (inptr + 4);				      \
253 	while (cmbp->res2 != ch_next)					      \
254 	  {								      \
255 	    ++cmbp;							      \
256 	    if (cmbp->res1 != ch)					      \
257 	      goto not_combined;					      \
258 	  }								      \
259 									      \
260 	/* It is a combined character.  First make sure we are in	      \
261 	   double byte mode.  */					      \
262 	if (curcs == sb)						      \
263 	  {								      \
264 	    /* We know there is room for at least one byte.  */		      \
265 	    *outptr++ = SO;						      \
266 	    curcs = db;							      \
267 	  }								      \
268 									      \
269 	if (__glibc_unlikely (outptr + 2 > outend))			      \
270 	  {								      \
271 	    result = __GCONV_FULL_OUTPUT;				      \
272 	    break;							      \
273 	  }								      \
274 	*outptr++ = cmbp->ch[0];					      \
275 	*outptr++ = cmbp->ch[1];					      \
276 	inptr += 8;							      \
277 	continue;							      \
278 									      \
279       not_combined:;							      \
280       }									      \
281   }
282 #else
283 # define SUB_COMBINED_UCS_TO_IBM13XX
284 #endif /* HAS_COMBINED */
285 
286 
287 /* Next, define the other direction.  */
288 #define MIN_NEEDED_INPUT	MIN_NEEDED_TO
289 #define MAX_NEEDED_INPUT  	MAX_NEEDED_TO
290 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
291 #define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
292 #define LOOPFCT			TO_LOOP
293 #define BODY \
294   {									      \
295     uint32_t ch = get32 (inptr);					      \
296 									      \
297     if (__glibc_unlikely (ch >= UCS_LIMIT))				      \
298       {									      \
299 	UNICODE_TAG_HANDLER (ch, 4);					      \
300 									      \
301 	if (! ignore_errors_p ())					      \
302 	  {								      \
303 	    result = __GCONV_ILLEGAL_INPUT;				      \
304 	    break;							      \
305 	  }								      \
306 	++*irreversible;						      \
307 	inptr += 4;							      \
308 	continue;							      \
309       }									      \
310 									      \
311     SUB_COMBINED_UCS_TO_IBM13XX;					      \
312 									      \
313     const struct gap *rp1 = UCS4_TO_SB_IDX;				      \
314     while (ch > rp1->end)						      \
315       ++rp1;								      \
316 									      \
317     /* Use the UCS4 table for single byte.  */				      \
318     const char *cp;							      \
319     if (__builtin_expect (ch < rp1->start, 0)				      \
320 	|| (cp = UCS4_TO_SB[ch + rp1->idx],				      \
321 	    __builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))	      \
322       {									      \
323 	/* Use the UCS4 table for double byte.  */			      \
324 	const struct gap *rp2 = UCS4_TO_DB_IDX;				      \
325 	while (ch > rp2->end)						      \
326 	  ++rp2;							      \
327 									      \
328 	if (__builtin_expect (ch < rp2->start, 0)			      \
329 	    || (cp = UCS4_TO_DB[ch + rp2->idx],				      \
330 		__builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0'))      \
331 	  {								      \
332 	    /* This is an illegal character.  */			      \
333 	    if (! ignore_errors_p ())					      \
334 	      {								      \
335 		result = __GCONV_ILLEGAL_INPUT;				      \
336 		break;							      \
337 	      }								      \
338 	    ++*irreversible;						      \
339 	  }								      \
340 	else								      \
341 	  {								      \
342 	    if (curcs == sb)						      \
343 	      {								      \
344 		/* We know there is room for at least one byte.  */	      \
345 		*outptr++ = SO;						      \
346 		curcs = db;						      \
347 	      }								      \
348 									      \
349 	    if (__glibc_unlikely (outptr + 2 > outend))			      \
350 	      {								      \
351 		result = __GCONV_FULL_OUTPUT;				      \
352 		break;							      \
353 	      }								      \
354 	    *outptr++ = cp[0];						      \
355 	    *outptr++ = cp[1];						      \
356 	  }								      \
357       }									      \
358     else								      \
359       {									      \
360 	if (__glibc_unlikely (curcs == db))				      \
361 	  {								      \
362 	    /* We know there is room for at least one byte.  */		      \
363 	    *outptr++ = SI;						      \
364 	    curcs = sb;							      \
365 									      \
366 	    if (__glibc_unlikely (outptr >= outend))			      \
367 	      {								      \
368 		result = __GCONV_FULL_OUTPUT;				      \
369 		break;							      \
370 	      }								      \
371 	  }								      \
372 									      \
373 	*outptr++ = cp[0];						      \
374       }									      \
375 									      \
376     /* Now that we wrote the output increment the input pointer.  */	      \
377     inptr += 4;								      \
378   }
379 #define LOOP_NEED_FLAGS
380 #define EXTRA_LOOP_DECLS	, int *curcsp
381 #define INIT_PARAMS		int curcs = *curcsp & ~7
382 #define REINIT_PARAMS		curcs = *curcsp & ~7
383 #define UPDATE_PARAMS		*curcsp = curcs
384 #include <iconv/loop.c>
385 
386 /* Now define the toplevel functions.  */
387 #include <iconv/skeleton.c>
388