1 /* Conversion from and to TSCII.
2    Copyright (C) 2002-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #include <dlfcn.h>
20 #include <stdint.h>
21 #include <gconv.h>
22 #include <assert.h>
23 
24 /* TSCII is an 8-bit encoding consisting of:
25    0x00..0x7F:       ASCII
26    0x80..0x90, 0x95..0x9F, 0xAB..0xFE:
27                      Tamil letters and glyphs
28    0xA1..0xA5, 0xAA: Tamil combining letters (after the base character)
29    0xA6..0xA8:       Tamil combining letters (before the base character)
30    0x91..0x94:       Punctuation
31    0xA9:             Symbols
32 */
33 
34 /* Definitions used in the body of the `gconv' function.  */
35 #define CHARSET_NAME		"TSCII//"
36 #define FROM_LOOP		from_tscii
37 #define TO_LOOP			to_tscii
38 #define DEFINE_INIT		1
39 #define DEFINE_FINI		1
40 #define ONE_DIRECTION		0
41 #define FROM_LOOP_MIN_NEEDED_FROM	1
42 #define FROM_LOOP_MAX_NEEDED_FROM	2
43 #define FROM_LOOP_MIN_NEEDED_TO		4
44 #define FROM_LOOP_MAX_NEEDED_TO	       16
45 #define TO_LOOP_MIN_NEEDED_FROM		4
46 #define TO_LOOP_MAX_NEEDED_FROM		4
47 #define TO_LOOP_MIN_NEEDED_TO		1
48 #define TO_LOOP_MAX_NEEDED_TO		3
49 #define PREPARE_LOOP \
50   int saved_state;							      \
51   int *statep = &data->__statep->__count;
52 #define EXTRA_LOOP_ARGS		, statep
53 
54 
55 /* Since we might have to reset input pointer we must be able to save
56    and restore the state.  */
57 #define SAVE_RESET_STATE(Save) \
58   if (Save)								      \
59     saved_state = *statep;						      \
60   else									      \
61     *statep = saved_state
62 
63 
64 /* During TSCII to UCS-4 conversion, the COUNT element of the state contains
65    the last UCS-4 character to be output, shifted by 8 bits, and an encoded
66    representation of additional UCS-4 characters to be output (if any),
67    shifted by 4 bits.  This character can be:
68      0x0000                   Nothing pending.
69      0x0BCD                   Pending VIRAMA sign. If bit 3 is set, it may be
70                               omitted if followed by a vowel sign U or UU.
71      0x0BC6, 0x0BC7, 0x0BC8   Pending vowel sign.  Bit 3 is set after the
72                               consonant was seen.
73      Other                    Bit 3 always cleared.  */
74 
75 /* During UCS-4 to TSCII conversion, the COUNT element of the state contains
76    the last byte (or sometimes the last two bytes) to be output, shifted by
77    3 bits. This can be:
78      0x00                     Nothing pending.
79      0xB8..0xC9, 0x83..0x86   A consonant.
80      0xEC, 0x8A               A consonant with VIRAMA sign (final or joining).
81      0x87, 0xC38A             Two consonants combined through a VIRAMA sign. */
82 
83 /* Since this is a stateful encoding we have to provide code which resets
84    the output state to the initial state.  This has to be done during the
85    flushing.  */
86 #define EMIT_SHIFT_TO_INIT \
87   if (data->__statep->__count != 0)					      \
88     {									      \
89       if (FROM_DIRECTION)						      \
90 	{								      \
91 	  do								      \
92 	    {								      \
93 	      if (__glibc_unlikely (outbuf + 4 > outend))		      \
94 		{							      \
95 		  /* We don't have enough room in the output buffer.  */      \
96 		  status = __GCONV_FULL_OUTPUT;				      \
97 		  break;						      \
98 		}							      \
99 	      /* Write out the pending character.  */			      \
100 	      *((uint32_t *) outbuf) = data->__statep->__count >> 8;	      \
101 	      outbuf += sizeof (uint32_t);				      \
102 	      /* Retrieve the successor state.  */			      \
103 	      data->__statep->__count =					      \
104 		tscii_next_state[(data->__statep->__count >> 4) & 0x0f];      \
105 	    }								      \
106 	  while (data->__statep->__count != 0);				      \
107 	}								      \
108       else								      \
109 	{								      \
110 	  uint32_t last = data->__statep->__count >> 3;			      \
111 	  if (__glibc_unlikely (last >> 8))				      \
112 	    {								      \
113 	      /* Write out the last character, two bytes.  */		      \
114 	      if (__glibc_likely (outbuf + 2 <= outend))		      \
115 		{							      \
116 		  *outbuf++ = last & 0xff;				      \
117 		  *outbuf++ = (last >> 8) & 0xff;			      \
118 		  data->__statep->__count = 0;				      \
119 		}							      \
120 	      else							      \
121 		/* We don't have enough room in the output buffer.  */	      \
122 		status = __GCONV_FULL_OUTPUT;				      \
123 	    }								      \
124 	  else								      \
125 	    {								      \
126 	      /* Write out the last character, a single byte.  */	      \
127 	      if (__glibc_likely (outbuf < outend))			      \
128 		{							      \
129 		  *outbuf++ = last & 0xff;				      \
130 		  data->__statep->__count = 0;				      \
131 		}							      \
132 	      else							      \
133 		/* We don't have enough room in the output buffer.  */	      \
134 		status = __GCONV_FULL_OUTPUT;				      \
135 	    }								      \
136 	}								      \
137     }
138 
139 
140 /* First define the conversion function from TSCII to UCS-4.  */
141 
142 static const uint16_t tscii_to_ucs4[128][2] =
143   {
144     { 0x0BE6,      0 },
145     { 0x0BE7,      0 },
146     {      0,      0 },	/* 0x82 - maps to <U0BB8><U0BCD><U0BB0><U0BC0> */
147     { 0x0B9C,      0 },
148     { 0x0BB7,      0 },
149     { 0x0BB8,      0 },
150     { 0x0BB9,      0 },
151     {      0,      0 },	/* 0x87 - maps to <U0B95><U0BCD><U0BB7> */
152     { 0x0B9C, 0x0BCD },
153     { 0x0BB7, 0x0BCD },
154     {      0,      0 }, /* 0x8a - maps to <U0BB8> and buffers <U0BCD> */
155     {      0,      0 }, /* 0x8b - maps to <U0BB9> and buffers <U0BCD> */
156     {      0,      0 },	/* 0x8c - maps to <U0B95><U0BCD><U0BB7><U0BCD> */
157     { 0x0BE8,      0 },
158     { 0x0BE9,      0 },
159     { 0x0BEA,      0 },
160     { 0x0BEB,      0 },
161     { 0x2018,      0 },
162     { 0x2019,      0 },
163     { 0x201C,      0 },
164     { 0x201D,      0 },
165     { 0x0BEC,      0 },
166     { 0x0BED,      0 },
167     { 0x0BEE,      0 },
168     { 0x0BEF,      0 },
169     { 0x0B99, 0x0BC1 },
170     { 0x0B9E, 0x0BC1 },
171     { 0x0B99, 0x0BC2 },
172     { 0x0B9E, 0x0BC2 },
173     { 0x0BF0,      0 },
174     { 0x0BF1,      0 },
175     { 0x0BF2,      0 },
176     {      0,      0 },	/* 0xa0 - unmapped */
177     { 0x0BBE,      0 },
178     { 0x0BBF,      0 },
179     { 0x0BC0,      0 },
180     { 0x0BC1,      0 },
181     { 0x0BC2,      0 },
182     {      0,      0 }, /* 0xa6 - buffers <U0BC6> */
183     {      0,      0 }, /* 0xa7 - buffers <U0BC7> */
184     {      0,      0 }, /* 0xa8 - buffers <U0BC8> */
185     { 0x00A9,      0 },
186     { 0x0BD7,      0 },
187     { 0x0B85,      0 },
188     { 0x0B86,      0 },
189     { 0x0B87,      0 },
190     { 0x0B88,      0 },
191     { 0x0B89,      0 },
192     { 0x0B8A,      0 },
193     { 0x0B8E,      0 },
194     { 0x0B8F,      0 },
195     { 0x0B90,      0 },
196     { 0x0B92,      0 },
197     { 0x0B93,      0 },
198     { 0x0B94,      0 },
199     { 0x0B83,      0 },
200     { 0x0B95,      0 },
201     { 0x0B99,      0 },
202     { 0x0B9A,      0 },
203     { 0x0B9E,      0 },
204     { 0x0B9F,      0 },
205     { 0x0BA3,      0 },
206     { 0x0BA4,      0 },
207     { 0x0BA8,      0 },
208     { 0x0BAA,      0 },
209     { 0x0BAE,      0 },
210     { 0x0BAF,      0 },
211     { 0x0BB0,      0 },
212     { 0x0BB2,      0 },
213     { 0x0BB5,      0 },
214     { 0x0BB4,      0 },
215     { 0x0BB3,      0 },
216     { 0x0BB1,      0 },
217     { 0x0BA9,      0 },
218     { 0x0B9F, 0x0BBF },
219     { 0x0B9F, 0x0BC0 },
220     { 0x0B95, 0x0BC1 },
221     { 0x0B9A, 0x0BC1 },
222     { 0x0B9F, 0x0BC1 },
223     { 0x0BA3, 0x0BC1 },
224     { 0x0BA4, 0x0BC1 },
225     { 0x0BA8, 0x0BC1 },
226     { 0x0BAA, 0x0BC1 },
227     { 0x0BAE, 0x0BC1 },
228     { 0x0BAF, 0x0BC1 },
229     { 0x0BB0, 0x0BC1 },
230     { 0x0BB2, 0x0BC1 },
231     { 0x0BB5, 0x0BC1 },
232     { 0x0BB4, 0x0BC1 },
233     { 0x0BB3, 0x0BC1 },
234     { 0x0BB1, 0x0BC1 },
235     { 0x0BA9, 0x0BC1 },
236     { 0x0B95, 0x0BC2 },
237     { 0x0B9A, 0x0BC2 },
238     { 0x0B9F, 0x0BC2 },
239     { 0x0BA3, 0x0BC2 },
240     { 0x0BA4, 0x0BC2 },
241     { 0x0BA8, 0x0BC2 },
242     { 0x0BAA, 0x0BC2 },
243     { 0x0BAE, 0x0BC2 },
244     { 0x0BAF, 0x0BC2 },
245     { 0x0BB0, 0x0BC2 },
246     { 0x0BB2, 0x0BC2 },
247     { 0x0BB5, 0x0BC2 },
248     { 0x0BB4, 0x0BC2 },
249     { 0x0BB3, 0x0BC2 },
250     { 0x0BB1, 0x0BC2 },
251     { 0x0BA9, 0x0BC2 },
252     { 0x0B95, 0x0BCD },
253     { 0x0B99, 0x0BCD },
254     { 0x0B9A, 0x0BCD },
255     { 0x0B9E, 0x0BCD },
256     { 0x0B9F, 0x0BCD },
257     { 0x0BA3, 0x0BCD },
258     { 0x0BA4, 0x0BCD },
259     { 0x0BA8, 0x0BCD },
260     { 0x0BAA, 0x0BCD },
261     { 0x0BAE, 0x0BCD },
262     { 0x0BAF, 0x0BCD },
263     { 0x0BB0, 0x0BCD },
264     { 0x0BB2, 0x0BCD },
265     { 0x0BB5, 0x0BCD },
266     { 0x0BB4, 0x0BCD },
267     { 0x0BB3, 0x0BCD },
268     { 0x0BB1, 0x0BCD },
269     { 0x0BA9, 0x0BCD },
270     { 0x0B87,      0 },
271     {      0,      0 }	/* 0xff - unmapped */
272   };
273 
274 static const uint32_t tscii_next_state[6] =
275   {
276     /* 0 means no more pending Unicode characters.  */
277     0,
278     /* 1 means <U0BB7>.  */
279     (0x0BB7 << 8),
280     /* 2 means <U0BC0>.  */
281     (0x0BC0 << 8),
282     /* 3 means <U0BCD>.  */
283     (0x0BCD << 8),
284     /* 4 means <U0BB0><U0BC0>.  */
285     (0x0BB0 << 8) + (2 << 4),
286     /* 5 means <U0BB7><U0BCD>.  */
287     (0x0BB7 << 8) + (3 << 4)
288   };
289 
290 #define MIN_NEEDED_INPUT	FROM_LOOP_MIN_NEEDED_FROM
291 #define MAX_NEEDED_INPUT	FROM_LOOP_MAX_NEEDED_FROM
292 #define MIN_NEEDED_OUTPUT	FROM_LOOP_MIN_NEEDED_TO
293 #define MAX_NEEDED_OUTPUT	FROM_LOOP_MAX_NEEDED_TO
294 #define LOOPFCT			FROM_LOOP
295 #define BODY \
296   {									      \
297     uint32_t ch = *inptr;						      \
298 									      \
299     if ((*statep >> 8) != 0)						      \
300       {									      \
301 	/* Attempt to combine the last character with this one.  */	      \
302 	uint32_t last = *statep >> 8;					      \
303 									      \
304 	if (last == 0x0BCD && (*statep & (1 << 3)))			      \
305 	  {								      \
306 	    if (ch == 0xa4 || ch == 0xa5)				      \
307 	      {								      \
308 		ch += 0xb1d;						      \
309 		/* Now ch = 0x0BC1 or ch = 0x0BC2.  */			      \
310 		put32 (outptr, ch);					      \
311 		outptr += 4;						      \
312 		*statep = 0;						      \
313 		inptr++;						      \
314 		continue;						      \
315 	      }								      \
316 	  }								      \
317 	else if (last >= 0x0BC6 && last <= 0x0BC8)			      \
318 	  {								      \
319 	    if ((last == 0x0BC6 && ch == 0xa1)				      \
320 		|| (last == 0x0BC7 && (ch == 0xa1 || ch == 0xaa)))	      \
321 	      {								      \
322 		ch = last + 4 + (ch != 0xa1);				      \
323 		/* Now ch = 0x0BCA or ch = 0x0BCB or ch = 0x0BCC.  */	      \
324 		put32 (outptr, ch);					      \
325 		outptr += 4;						      \
326 		*statep = 0;						      \
327 		inptr++;						      \
328 		continue;						      \
329 	      }								      \
330 	    if ((ch >= 0xb8 && ch <= 0xc9) && (*statep & (1 << 3)) == 0)      \
331 	      {								      \
332 		ch = tscii_to_ucs4[ch - 0x80][0];			      \
333 		put32 (outptr, ch);					      \
334 		outptr += 4;						      \
335 		*statep |= 1 << 3;					      \
336 		inptr++;						      \
337 		continue;						      \
338 	      }								      \
339 	  }								      \
340 									      \
341 	do								      \
342 	  {								      \
343 	    /* Output the buffered character.  */			      \
344 	    put32 (outptr, last);					      \
345 	    outptr += 4;						      \
346 	    /* Retrieve the successor state.  */			      \
347 	    *statep = tscii_next_state[(*statep >> 4) & 0x0f];		      \
348 	  }								      \
349 	while (*statep != 0 && __builtin_expect (outptr + 4 <= outend, 1));   \
350 									      \
351 	if (*statep != 0)						      \
352 	  {								      \
353 	    /* We don't have enough room in the output buffer.		      \
354 	       Tell the caller why we terminate the loop.  */		      \
355 	    result = __GCONV_FULL_OUTPUT;				      \
356 	    break;							      \
357 	  }								      \
358 									      \
359 	continue;							      \
360       }									      \
361 									      \
362     if (ch < 0x80)							      \
363       {									      \
364 	/* Plain ASCII character.  */					      \
365 	put32 (outptr, ch);						      \
366 	outptr += 4;							      \
367       }									      \
368     else								      \
369       {									      \
370 	/* Tamil character.  */						      \
371 	uint32_t u1 = tscii_to_ucs4[ch - 0x80][0];			      \
372 									      \
373 	if (u1 != 0)							      \
374 	  {								      \
375 	    uint32_t u2 = tscii_to_ucs4[ch - 0x80][1];			      \
376 									      \
377 	    inptr++;							      \
378 									      \
379 	    put32 (outptr, u1);						      \
380 	    outptr += 4;						      \
381 									      \
382 	    if (u2 != 0)						      \
383 	      {								      \
384 		/* See whether we have room for two characters.  Otherwise    \
385 		   store only the first character now, and put the second     \
386 		   one into the queue.  */				      \
387 		if (__glibc_unlikely (outptr + 4 > outend))		      \
388 		  {							      \
389 		    *statep = u2 << 8;					      \
390 		    result = __GCONV_FULL_OUTPUT;			      \
391 		    break;						      \
392 		  }							      \
393 		put32 (outptr, u2);					      \
394 		outptr += 4;						      \
395 	      }								      \
396 	    continue;							      \
397 	  }								      \
398 	/* Special handling of a few Tamil characters.  */		      \
399 	else if (ch == 0xa6 || ch == 0xa7 || ch == 0xa8)		      \
400 	  {								      \
401 	    ch += 0x0b20;						      \
402 	    /* Now ch = 0x0BC6 or ch = 0x0BC7 or ch = 0x0BC8.  */	      \
403 	    *statep = ch << 8;						      \
404 	    inptr++;							      \
405 	    continue;							      \
406 	  }								      \
407 	else if (ch == 0x8a || ch == 0x8b)				      \
408 	  {								      \
409 	    ch += 0x0b2e;						      \
410 	    /* Now ch = 0x0BB8 or ch = 0x0BB9.  */			      \
411 	    put32 (outptr, ch);						      \
412 	    outptr += 4;						      \
413 	    *statep = (0x0BCD << 8) + (1 << 3);				      \
414 	    inptr++;							      \
415 	    continue;							      \
416 	  }								      \
417 	else if (ch == 0x82)						      \
418 	  {								      \
419 	    /* Output <U0BB8><U0BCD><U0BB0><U0BC0>, if we have room for	      \
420 	       four characters.  */					      \
421 	    inptr++;							      \
422 	    put32 (outptr, 0x0BB8);					      \
423 	    outptr += 4;						      \
424 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
425 	      {								      \
426 		*statep = (0x0BCD << 8) + (4 << 4);			      \
427 		result = __GCONV_FULL_OUTPUT;				      \
428 		break;							      \
429 	      }								      \
430 	    put32 (outptr, 0x0BCD);					      \
431 	    outptr += 4;						      \
432 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
433 	      {								      \
434 		*statep = (0x0BB0 << 8) + (2 << 4);			      \
435 		result = __GCONV_FULL_OUTPUT;				      \
436 		break;							      \
437 	      }								      \
438 	    put32 (outptr, 0x0BB0);					      \
439 	    outptr += 4;						      \
440 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
441 	      {								      \
442 		*statep = (0x0BC0 << 8);				      \
443 		result = __GCONV_FULL_OUTPUT;				      \
444 		break;							      \
445 	      }								      \
446 	    put32 (outptr, 0x0BC0);					      \
447 	    outptr += 4;						      \
448 	    continue;							      \
449 	  }								      \
450 	else if (ch == 0x87)						      \
451 	  {								      \
452 	    /* Output <U0B95><U0BCD><U0BB7>, if we have room for	      \
453 	       three characters.  */					      \
454 	    inptr++;							      \
455 	    put32 (outptr, 0x0B95);					      \
456 	    outptr += 4;						      \
457 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
458 	      {								      \
459 		*statep = (0x0BCD << 8) + (1 << 4);			      \
460 		result = __GCONV_FULL_OUTPUT;				      \
461 		break;							      \
462 	      }								      \
463 	    put32 (outptr, 0x0BCD);					      \
464 	    outptr += 4;						      \
465 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
466 	      {								      \
467 		*statep = (0x0BB7 << 8);				      \
468 		result = __GCONV_FULL_OUTPUT;				      \
469 		break;							      \
470 	      }								      \
471 	    put32 (outptr, 0x0BB7);					      \
472 	    outptr += 4;						      \
473 	    continue;							      \
474 	  }								      \
475 	else if (ch == 0x8c)						      \
476 	  {								      \
477 	    /* Output <U0B95><U0BCD><U0BB7><U0BCD>, if we have room for	      \
478 	       four characters.  */					      \
479 	    inptr++;							      \
480 	    put32 (outptr, 0x0B95);					      \
481 	    outptr += 4;						      \
482 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
483 	      {								      \
484 		*statep = (0x0BCD << 8) + (5 << 4);			      \
485 		result = __GCONV_FULL_OUTPUT;				      \
486 		break;							      \
487 	      }								      \
488 	    put32 (outptr, 0x0BCD);					      \
489 	    outptr += 4;						      \
490 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
491 	      {								      \
492 		*statep = (0x0BB7 << 8) + (3 << 4);			      \
493 		result = __GCONV_FULL_OUTPUT;				      \
494 		break;							      \
495 	      }								      \
496 	    put32 (outptr, 0x0BB7);					      \
497 	    outptr += 4;						      \
498 	    if (__glibc_unlikely (outptr + 4 > outend))			      \
499 	      {								      \
500 		*statep = (0x0BCD << 8);				      \
501 		result = __GCONV_FULL_OUTPUT;				      \
502 		break;							      \
503 	      }								      \
504 	    put32 (outptr, 0x0BCD);					      \
505 	    outptr += 4;						      \
506 	    continue;							      \
507 	  }								      \
508 	else								      \
509 	  {								      \
510 	    /* This is illegal.  */					      \
511 	    STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
512 	  }								      \
513       }									      \
514 									      \
515     /* Now that we wrote the output increment the input pointer.  */	      \
516     inptr++;								      \
517   }
518 #define LOOP_NEED_FLAGS
519 #define EXTRA_LOOP_DECLS	, int *statep
520 #include <iconv/loop.c>
521 
522 
523 /* Next, define the other direction, from UCS-4 to TSCII.  */
524 
525 static const uint8_t ucs4_to_tscii[128] =
526   {
527        0,    0,    0, 0xb7,    0, 0xab, 0xac, 0xfe, /* 0x0B80..0x0B87 */
528     0xae, 0xaf, 0xb0,    0,    0,    0, 0xb1, 0xb2, /* 0x0B88..0x0B8F */
529     0xb3,    0, 0xb4, 0xb5, 0xb6, 0xb8,    0,    0, /* 0x0B90..0x0B97 */
530        0, 0xb9, 0xba,    0, 0x83,    0, 0xbb, 0xbc, /* 0x0B98..0x0B9F */
531        0,    0,    0, 0xbd, 0xbe,    0,    0,    0, /* 0x0BA0..0x0BA7 */
532     0xbf, 0xc9, 0xc0,    0,    0,    0, 0xc1, 0xc2, /* 0x0BA8..0x0BAF */
533     0xc3, 0xc8, 0xc4, 0xc7, 0xc6, 0xc5,    0, 0x84, /* 0x0BB0..0x0BB7 */
534     0x85, 0x86,    0,    0,    0,    0, 0xa1, 0xa2, /* 0x0BB8..0x0BBF */
535     0xa3, 0xa4, 0xa5,    0,    0,    0, 0xa6, 0xa7, /* 0x0BC0..0x0BC7 */
536     0xa8,    0,    0,    0,    0,    0,    0,    0, /* 0x0BC8..0x0BCF */
537        0,    0,    0,    0,    0,    0,    0, 0xaa, /* 0x0BD0..0x0BD7 */
538        0,    0,    0,    0,    0,    0,    0,    0, /* 0x0BD8..0x0BDF */
539        0,    0,    0,    0,    0,    0, 0x80, 0x81, /* 0x0BE0..0x0BE7 */
540     0x8d, 0x8e, 0x8f, 0x90, 0x95, 0x96, 0x97, 0x98, /* 0x0BE8..0x0BEF */
541     0x9d, 0x9e, 0x9f,    0,    0,    0,    0,    0, /* 0x0BF0..0x0BF7 */
542        0,    0,    0,    0,    0,    0,    0,    0  /* 0x0BF8..0x0BFF */
543   };
544 
545 static const uint8_t consonant_with_u[18] =
546   {
547     0xcc, 0x99, 0xcd, 0x9a, 0xce, 0xcf, 0xd0, 0xd1, 0xd2,
548     0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb
549   };
550 
551 static const uint8_t consonant_with_uu[18] =
552   {
553     0xdc, 0x9b, 0xdd, 0x9c, 0xde, 0xdf, 0xe0, 0xe1, 0xe2,
554     0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb
555   };
556 
557 static const uint8_t consonant_with_virama[18] =
558   {
559     0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4,
560     0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd
561   };
562 
563 #define MIN_NEEDED_INPUT	TO_LOOP_MIN_NEEDED_FROM
564 #define MAX_NEEDED_INPUT	TO_LOOP_MAX_NEEDED_FROM
565 #define MIN_NEEDED_OUTPUT	TO_LOOP_MIN_NEEDED_TO
566 #define MAX_NEEDED_OUTPUT	TO_LOOP_MAX_NEEDED_TO
567 #define LOOPFCT			TO_LOOP
568 #define BODY \
569   {									      \
570     uint32_t ch = get32 (inptr);					      \
571 									      \
572     if ((*statep >> 3) != 0)						      \
573       {									      \
574 	/* Attempt to combine the last character with this one.  */	      \
575 	uint32_t last = *statep >> 3;					      \
576 									      \
577 	if (last >= 0xb8 && last <= 0xc9)				      \
578 	  {								      \
579 	    if (ch == 0x0BC1)						      \
580 	      {								      \
581 		*outptr++ = consonant_with_u[last - 0xb8];		      \
582 		*statep = 0;						      \
583 		inptr += 4;						      \
584 		continue;						      \
585 	      }								      \
586 	    if (ch == 0x0BC2)						      \
587 	      {								      \
588 		*outptr++ = consonant_with_uu[last - 0xb8];		      \
589 		*statep = 0;						      \
590 		inptr += 4;						      \
591 		continue;						      \
592 	      }								      \
593 	    if (ch == 0x0BC6)						      \
594 	      {								      \
595 		if (__glibc_likely (outptr + 2 <= outend))		      \
596 		  {							      \
597 		    *outptr++ = 0xa6;					      \
598 		    *outptr++ = last;					      \
599 		    *statep = 0;					      \
600 		    inptr += 4;						      \
601 		    continue;						      \
602 		  }							      \
603 		else							      \
604 		  {							      \
605 		    result = __GCONV_FULL_OUTPUT;			      \
606 		    break;						      \
607 		  }							      \
608 	      }								      \
609 	    if (ch == 0x0BC7)						      \
610 	      {								      \
611 		if (__glibc_likely (outptr + 2 <= outend))		      \
612 		  {							      \
613 		    *outptr++ = 0xa7;					      \
614 		    *outptr++ = last;					      \
615 		    *statep = 0;					      \
616 		    inptr += 4;						      \
617 		    continue;						      \
618 		  }							      \
619 		else							      \
620 		  {							      \
621 		    result = __GCONV_FULL_OUTPUT;			      \
622 		    break;						      \
623 		  }							      \
624 	      }								      \
625 	    if (ch == 0x0BC8)						      \
626 	      {								      \
627 		if (__glibc_likely (outptr + 2 <= outend))		      \
628 		  {							      \
629 		    *outptr++ = 0xa8;					      \
630 		    *outptr++ = last;					      \
631 		    *statep = 0;					      \
632 		    inptr += 4;						      \
633 		    continue;						      \
634 		  }							      \
635 		else							      \
636 		  {							      \
637 		    result = __GCONV_FULL_OUTPUT;			      \
638 		    break;						      \
639 		  }							      \
640 	      }								      \
641 	    if (ch == 0x0BCA)						      \
642 	      {								      \
643 		if (__glibc_likely (outptr + 3 <= outend))		      \
644 		  {							      \
645 		    *outptr++ = 0xa6;					      \
646 		    *outptr++ = last;					      \
647 		    *outptr++ = 0xa1;					      \
648 		    *statep = 0;					      \
649 		    inptr += 4;						      \
650 		    continue;						      \
651 		  }							      \
652 		else							      \
653 		  {							      \
654 		    result = __GCONV_FULL_OUTPUT;			      \
655 		    break;						      \
656 		  }							      \
657 	      }								      \
658 	    if (ch == 0x0BCB)						      \
659 	      {								      \
660 		if (__glibc_likely (outptr + 3 <= outend))		      \
661 		  {							      \
662 		    *outptr++ = 0xa7;					      \
663 		    *outptr++ = last;					      \
664 		    *outptr++ = 0xa1;					      \
665 		    *statep = 0;					      \
666 		    inptr += 4;						      \
667 		    continue;						      \
668 		  }							      \
669 		else							      \
670 		  {							      \
671 		    result = __GCONV_FULL_OUTPUT;			      \
672 		    break;						      \
673 		  }							      \
674 	      }								      \
675 	    if (ch == 0x0BCC)						      \
676 	      {								      \
677 		if (__glibc_likely (outptr + 3 <= outend))		      \
678 		  {							      \
679 		    *outptr++ = 0xa7;					      \
680 		    *outptr++ = last;					      \
681 		    *outptr++ = 0xaa;					      \
682 		    *statep = 0;					      \
683 		    inptr += 4;						      \
684 		    continue;						      \
685 		  }							      \
686 		else							      \
687 		  {							      \
688 		    result = __GCONV_FULL_OUTPUT;			      \
689 		    break;						      \
690 		  }							      \
691 	      }								      \
692 	    if (ch == 0x0BCD)						      \
693 	      {								      \
694 		if (last != 0xb8)					      \
695 		  {							      \
696 		    *outptr++ = consonant_with_virama[last - 0xb8];	      \
697 		    *statep = 0;					      \
698 		  }							      \
699 		else							      \
700 		  *statep = 0xec << 3;					      \
701 		inptr += 4;						      \
702 		continue;						      \
703 	      }								      \
704 	    if (last == 0xbc && (ch == 0x0BBF || ch == 0x0BC0))		      \
705 	      {								      \
706 		*outptr++ = ch - 0x0af5;				      \
707 		*statep = 0;						      \
708 		inptr += 4;						      \
709 		continue;						      \
710 	      }								      \
711 	  }								      \
712 	else if (last >= 0x83 && last <= 0x86)				      \
713 	  {								      \
714 	    if (last >= 0x85 && (ch == 0x0BC1 || ch == 0x0BC2))		      \
715 	      {								      \
716 		*outptr++ = last + 5;					      \
717 		*statep = 0;						      \
718 		continue;						      \
719 	      }								      \
720 	    if (ch == 0x0BCD)						      \
721 	      {								      \
722 		if (last != 0x85)					      \
723 		  {							      \
724 		    *outptr++ = last + 5;				      \
725 		    *statep = 0;					      \
726 		  }							      \
727 		else							      \
728 		  *statep = 0x8a << 3;					      \
729 		inptr += 4;						      \
730 		continue;						      \
731 	      }								      \
732 	  }								      \
733 	else if (last == 0xec)						      \
734 	  {								      \
735 	    if (ch == 0x0BB7)						      \
736 	      {								      \
737 		*statep = 0x87 << 3;					      \
738 		inptr += 4;						      \
739 		continue;						      \
740 	      }								      \
741 	  }								      \
742 	else if (last == 0x8a)						      \
743 	  {								      \
744 	    if (ch == 0x0BB0)						      \
745 	      {								      \
746 		*statep = 0xc38a << 3;					      \
747 		inptr += 4;						      \
748 		continue;						      \
749 	      }								      \
750 	  }								      \
751 	else if (last == 0x87)						      \
752 	  {								      \
753 	    if (ch == 0x0BCD)						      \
754 	      {								      \
755 		*outptr++ = 0x8c;					      \
756 		*statep = 0;						      \
757 		inptr += 4;						      \
758 		continue;						      \
759 	      }								      \
760 	  }								      \
761 	else								      \
762 	  {								      \
763 	    assert (last == 0xc38a);					      \
764 	    if (ch == 0x0BC0)						      \
765 	      {								      \
766 		*outptr++ = 0x82;					      \
767 		*statep = 0;						      \
768 		inptr += 4;						      \
769 		continue;						      \
770 	      }								      \
771 	  }								      \
772 									      \
773 	/* Output the buffered character.  */				      \
774 	if (__glibc_unlikely (last >> 8))				      \
775 	  {								      \
776 	    if (__glibc_likely (outptr + 2 <= outend))			      \
777 	      {								      \
778 		*outptr++ = last & 0xff;				      \
779 		*outptr++ = (last >> 8) & 0xff;				      \
780 	      }								      \
781 	    else							      \
782 	      {								      \
783 		result = __GCONV_FULL_OUTPUT;				      \
784 		break;							      \
785 	      }								      \
786 	  }								      \
787         else								      \
788 	  *outptr++ = last & 0xff;					      \
789 	*statep = 0;							      \
790 	continue;							      \
791       }									      \
792 									      \
793     if (ch < 0x80)							      \
794       /* Plain ASCII character.  */					      \
795       *outptr++ = ch;							      \
796     else if (ch >= 0x0B80 && ch <= 0x0BFF)				      \
797       {									      \
798 	/* Tamil character.  */						      \
799 	uint8_t t = ucs4_to_tscii[ch - 0x0B80];				      \
800 									      \
801 	if (t != 0)							      \
802 	  {								      \
803 	    if ((t >= 0xb8 && t <= 0xc9) || (t >= 0x83 && t <= 0x86))	      \
804 	      *statep = (uint32_t) t << 3;				      \
805 	    else							      \
806 	      *outptr++ = t;						      \
807 	  }								      \
808 	else if (ch >= 0x0BCA && ch <= 0x0BCC)				      \
809 	  {								      \
810 	    /* See whether we have room for two bytes.  */		      \
811 	    if (__glibc_likely (outptr + 2 <= outend))			      \
812 	      {								      \
813 		*outptr++ = (ch == 0x0BCA ? 0xa6 : 0xa7);		      \
814 		*outptr++ = (ch != 0x0BCC ? 0xa1 : 0xaa);		      \
815 	      }								      \
816 	    else							      \
817 	      {								      \
818 		result = __GCONV_FULL_OUTPUT;				      \
819 		break;							      \
820 	      }								      \
821 	  }								      \
822 	else								      \
823 	  {								      \
824 	    /* Illegal character.  */					      \
825 	    STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
826 	  }								      \
827       }									      \
828     else if (ch == 0x00A9)						      \
829       *outptr++ = ch;							      \
830     else if (ch == 0x2018 || ch == 0x2019)				      \
831       *outptr++ = ch - 0x1f87;						      \
832     else if (ch == 0x201C || ch == 0x201D)				      \
833       *outptr++ = ch - 0x1f89;						      \
834     else								      \
835       {									      \
836 	UNICODE_TAG_HANDLER (ch, 4);					      \
837 									      \
838 	/* Illegal character.  */					      \
839 	STANDARD_TO_LOOP_ERR_HANDLER (4);				      \
840       }									      \
841 									      \
842     /* Now that we wrote the output increment the input pointer.  */	      \
843     inptr += 4;								      \
844   }
845 #define LOOP_NEED_FLAGS
846 #define EXTRA_LOOP_DECLS	, int *statep
847 #include <iconv/loop.c>
848 
849 
850 /* Now define the toplevel functions.  */
851 #include <iconv/skeleton.c>
852