1 /* Conversion from and to IBM930. 2 Copyright (C) 2000-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 /* IBM930 is designed for the representation of Japanese Katakana/Kanji 20 using a stateful EBCDIC encoding scheme. It is also known as 21 CCSID 930 or CP930. See: 22 https://www-01.ibm.com/software/globalization/ccsid/ccsid930.html */ 23 24 #include <dlfcn.h> 25 #include <stdint.h> 26 #include <wchar.h> 27 #include <byteswap.h> 28 #include "ibm930.h" 29 30 /* The shift sequences for this charset (it does not use ESC). */ 31 #define SI 0x0F /* Shift In, host code to turn DBCS off. */ 32 #define SO 0x0E /* Shift Out, host code to turn DBCS on. */ 33 34 /* Definitions used in the body of the `gconv' function. */ 35 #define CHARSET_NAME "IBM930//" 36 #define FROM_LOOP from_ibm930 37 #define TO_LOOP to_ibm930 38 #define ONE_DIRECTION 0 39 #define FROM_LOOP_MIN_NEEDED_FROM 1 40 #define FROM_LOOP_MAX_NEEDED_FROM 2 41 #define FROM_LOOP_MIN_NEEDED_TO 4 42 #define FROM_LOOP_MAX_NEEDED_TO 4 43 #define TO_LOOP_MIN_NEEDED_FROM 4 44 #define TO_LOOP_MAX_NEEDED_FROM 4 45 #define TO_LOOP_MIN_NEEDED_TO 1 46 #define TO_LOOP_MAX_NEEDED_TO 3 47 #define PREPARE_LOOP \ 48 int save_curcs; \ 49 int *curcsp = &data->__statep->__count; 50 #define EXTRA_LOOP_ARGS , curcsp 51 52 /* Definitions of initialization and destructor function. */ 53 #define DEFINE_INIT 1 54 #define DEFINE_FINI 1 55 56 57 /* Since this is a stateful encoding we have to provide code which resets 58 the output state to the initial state. This has to be done during the 59 flushing. */ 60 #define EMIT_SHIFT_TO_INIT \ 61 if ((data->__statep->__count & ~7) != sb) \ 62 { \ 63 if (FROM_DIRECTION) \ 64 data->__statep->__count &= 7; \ 65 else \ 66 { \ 67 /* We are not in the initial state. To switch back we have \ 68 to emit `SI'. */ \ 69 if (__glibc_unlikely (outbuf >= outend)) \ 70 /* We don't have enough room in the output buffer. */ \ 71 status = __GCONV_FULL_OUTPUT; \ 72 else \ 73 { \ 74 /* Write out the shift sequence. */ \ 75 *outbuf++ = SI; \ 76 data->__statep->__count &= 7; \ 77 } \ 78 } \ 79 } 80 81 82 /* Since we might have to reset input pointer we must be able to save 83 and retore the state. */ 84 #define SAVE_RESET_STATE(Save) \ 85 if (Save) \ 86 save_curcs = *curcsp; \ 87 else \ 88 *curcsp = save_curcs 89 90 91 /* Current codeset type. */ 92 enum 93 { 94 sb = 0, 95 db = 64 96 }; 97 98 99 /* First, define the conversion function from IBM-930 to UCS4. */ 100 #define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM 101 #define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM 102 #define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO 103 #define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO 104 #define LOOPFCT FROM_LOOP 105 #define BODY \ 106 { \ 107 uint32_t ch = *inptr; \ 108 uint32_t res; \ 109 \ 110 if (__builtin_expect (ch, 0) == SO) \ 111 { \ 112 /* Shift OUT, change to DBCS converter (redundant escape okay). */ \ 113 curcs = db; \ 114 ++inptr; \ 115 continue; \ 116 } \ 117 else if (__builtin_expect (ch, 0) == SI) \ 118 { \ 119 /* Shift IN, change to SBCS converter (redundant escape okay). */ \ 120 curcs = sb; \ 121 ++inptr; \ 122 continue; \ 123 } \ 124 \ 125 if (curcs == sb) \ 126 { \ 127 /* Use the IBM930 table for single byte. */ \ 128 res = __ibm930sb_to_ucs4[ch]; \ 129 if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \ 130 { \ 131 /* This is an illegal character. */ \ 132 STANDARD_FROM_LOOP_ERR_HANDLER (1); \ 133 } \ 134 else \ 135 { \ 136 put32 (outptr, res); \ 137 outptr += 4; \ 138 } \ 139 ++inptr; \ 140 } \ 141 else \ 142 { \ 143 /* Use the IBM930 table for double byte. */ \ 144 const struct gap *rp2 = __ibm930db_to_ucs4_idx; \ 145 \ 146 assert (curcs == db); \ 147 \ 148 if (__glibc_unlikely (inptr + 1 >= inend)) \ 149 { \ 150 /* The second character is not available. Store the \ 151 intermediate result. */ \ 152 result = __GCONV_INCOMPLETE_INPUT; \ 153 break; \ 154 } \ 155 \ 156 ch = (ch * 0x100) + inptr[1]; \ 157 while (ch > rp2->end) \ 158 ++rp2; \ 159 \ 160 if (__builtin_expect (rp2->start == 0xffff, 0) \ 161 || __builtin_expect (ch < rp2->start, 0) \ 162 || (res = __ibm930db_to_ucs4[ch + rp2->idx], \ 163 __builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \ 164 { \ 165 /* This is an illegal character. */ \ 166 STANDARD_FROM_LOOP_ERR_HANDLER (2); \ 167 } \ 168 else \ 169 { \ 170 put32 (outptr, res); \ 171 outptr += 4; \ 172 } \ 173 inptr += 2; \ 174 } \ 175 } 176 #define LOOP_NEED_FLAGS 177 #define EXTRA_LOOP_DECLS , int *curcsp 178 #define INIT_PARAMS int curcs = *curcsp & ~7 179 #define UPDATE_PARAMS *curcsp = curcs 180 #include <iconv/loop.c> 181 182 /* Next, define the other direction. */ 183 #define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM 184 #define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM 185 #define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO 186 #define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO 187 #define LOOPFCT TO_LOOP 188 #define BODY \ 189 { \ 190 uint32_t ch = get32 (inptr); \ 191 const struct gap *rp1 = __ucs4_to_ibm930sb_idx; \ 192 const struct gap *rp2 = __ucs4_to_ibm930db_idx; \ 193 \ 194 if (__glibc_unlikely (ch >= 0xffff)) \ 195 { \ 196 UNICODE_TAG_HANDLER (ch, 4); \ 197 \ 198 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 199 } \ 200 \ 201 while (ch > rp1->end) \ 202 ++rp1; \ 203 \ 204 /* Use the UCS4 table for single byte. */ \ 205 unsigned char sbconv; \ 206 if (__builtin_expect (ch < rp1->start, 0) \ 207 || (sbconv = __ucs4_to_ibm930sb[ch + rp1->idx], \ 208 __builtin_expect (sbconv, L'\1') == L'\0' && ch != '\0')) \ 209 { \ 210 /* Use the UCS4 table for double byte. */ \ 211 while (ch > rp2->end) \ 212 ++rp2; \ 213 \ 214 const char *cp; \ 215 if (__builtin_expect (ch < rp2->start, 0) \ 216 || (cp = __ucs4_to_ibm930db[ch + rp2->idx], \ 217 __builtin_expect (cp[0], L'\1')== L'\0' && ch != '\0')) \ 218 { \ 219 /* This is an illegal character. */ \ 220 STANDARD_TO_LOOP_ERR_HANDLER (4); \ 221 } \ 222 else \ 223 { \ 224 if (curcs == sb) \ 225 { \ 226 if (__glibc_unlikely (outptr + 1 > outend)) \ 227 { \ 228 result = __GCONV_FULL_OUTPUT; \ 229 break; \ 230 } \ 231 *outptr++ = SO; \ 232 curcs = db; \ 233 } \ 234 \ 235 if (__glibc_unlikely (outptr + 2 > outend)) \ 236 { \ 237 result = __GCONV_FULL_OUTPUT; \ 238 break; \ 239 } \ 240 *outptr++ = cp[0]; \ 241 *outptr++ = cp[1]; \ 242 } \ 243 } \ 244 else \ 245 { \ 246 if (curcs == db) \ 247 { \ 248 if (__glibc_unlikely (outptr + 1 > outend)) \ 249 { \ 250 result = __GCONV_FULL_OUTPUT; \ 251 break; \ 252 } \ 253 *outptr++ = SI; \ 254 curcs = sb; \ 255 } \ 256 \ 257 if (__glibc_unlikely (outptr + 1 > outend)) \ 258 { \ 259 result = __GCONV_FULL_OUTPUT; \ 260 break; \ 261 } \ 262 if (ch == 0x7e) \ 263 *outptr++ = 0xa1; \ 264 else if (ch == 0x5c) \ 265 *outptr++ = 0x5b; \ 266 else \ 267 *outptr++ = sbconv; \ 268 } \ 269 \ 270 /* Now that we wrote the output increment the input pointer. */ \ 271 inptr += 4; \ 272 } 273 #define LOOP_NEED_FLAGS 274 #define EXTRA_LOOP_DECLS , int *curcsp 275 #define INIT_PARAMS int curcs = *curcsp & ~7 276 #define REINIT_PARAMS curcs = *curcsp & ~7 277 #define UPDATE_PARAMS *curcsp = curcs 278 #include <iconv/loop.c> 279 280 /* Now define the toplevel functions. */ 281 #include <iconv/skeleton.c> 282