1 /* Simple transformations functions - s390 version.
2    Copyright (C) 2016-2022 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 #if defined HAVE_S390_VX_ASM_SUPPORT
20 # include <ifunc-resolve.h>
21 
22 # if defined HAVE_S390_VX_GCC_SUPPORT
23 #  define ASM_CLOBBER_VR(NR) , NR
24 # else
25 #  define ASM_CLOBBER_VR(NR)
26 # endif
27 
28 # define ICONV_C_NAME(NAME) __##NAME##_c
29 # define ICONV_VX_NAME(NAME) __##NAME##_vx
30 # ifdef HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
31 /* We support z13 instructions by default -> Just use the vector variant.  */
32 #  define ICONV_VX_IFUNC(FUNC) strong_alias (ICONV_VX_NAME (FUNC), FUNC)
33 # else
34 /* We have to use ifunc to determine if z13 instructions are supported.  */
35 #  define ICONV_VX_IFUNC(FUNC)						\
36   s390_libc_ifunc_expr (ICONV_C_NAME (FUNC), FUNC,			\
37 			(hwcap & HWCAP_S390_VX)				\
38 			? ICONV_VX_NAME (FUNC)				\
39 			: ICONV_C_NAME (FUNC)				\
40 			)
41 # endif
42 # define ICONV_VX_SINGLE(NAME)						\
43   static __typeof (NAME##_single) __##NAME##_vx_single __attribute__((alias(#NAME "_single")));
44 
45 /* Generate the transformations which are used, if the target machine does not
46    support vector instructions.  */
47 # define __gconv_transform_ascii_internal		\
48   ICONV_C_NAME (__gconv_transform_ascii_internal)
49 # define __gconv_transform_internal_ascii		\
50   ICONV_C_NAME (__gconv_transform_internal_ascii)
51 # define __gconv_transform_internal_ucs4le		\
52   ICONV_C_NAME (__gconv_transform_internal_ucs4le)
53 # define __gconv_transform_ucs4_internal		\
54   ICONV_C_NAME (__gconv_transform_ucs4_internal)
55 # define __gconv_transform_ucs4le_internal		\
56   ICONV_C_NAME (__gconv_transform_ucs4le_internal)
57 # define __gconv_transform_ucs2_internal		\
58   ICONV_C_NAME (__gconv_transform_ucs2_internal)
59 # define __gconv_transform_ucs2reverse_internal		\
60   ICONV_C_NAME (__gconv_transform_ucs2reverse_internal)
61 # define __gconv_transform_internal_ucs2		\
62   ICONV_C_NAME (__gconv_transform_internal_ucs2)
63 # define __gconv_transform_internal_ucs2reverse		\
64   ICONV_C_NAME (__gconv_transform_internal_ucs2reverse)
65 
66 
67 # include <iconv/gconv_simple.c>
68 
69 # undef __gconv_transform_ascii_internal
70 # undef __gconv_transform_internal_ascii
71 # undef __gconv_transform_internal_ucs4le
72 # undef __gconv_transform_ucs4_internal
73 # undef __gconv_transform_ucs4le_internal
74 # undef __gconv_transform_ucs2_internal
75 # undef __gconv_transform_ucs2reverse_internal
76 # undef __gconv_transform_internal_ucs2
77 # undef __gconv_transform_internal_ucs2reverse
78 
79 /* Now define the functions with vector support.  */
80 # if defined __s390x__
81 #  define CONVERT_32BIT_SIZE_T(REG)
82 # else
83 #  define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t"
84 # endif
85 
86 /* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
87 # define DEFINE_INIT		0
88 # define DEFINE_FINI		0
89 # define MIN_NEEDED_FROM	1
90 # define MIN_NEEDED_TO		4
91 # define FROM_DIRECTION		1
92 # define FROM_LOOP		ICONV_VX_NAME (ascii_internal_loop)
93 # define TO_LOOP		ICONV_VX_NAME (ascii_internal_loop) /* This is not used.  */
94 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ascii_internal)
95 # define ONE_DIRECTION		1
96 
97 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
98 # define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
99 # define LOOPFCT		FROM_LOOP
100 # define BODY_ORIG_ERROR						\
101     /* The value is too large.  We don't try transliteration here since \
102        this is not an error because of the lack of possibilities to	\
103        represent the result.  This is a genuine bug in the input since	\
104        ASCII does not allow such values.  */				\
105     STANDARD_FROM_LOOP_ERR_HANDLER (1);
106 
107 # define BODY_ORIG							\
108   {									\
109     if (__glibc_unlikely (*inptr > '\x7f'))				\
110       {									\
111 	BODY_ORIG_ERROR							\
112       }									\
113     else								\
114       {									\
115 	/* It's an one byte sequence.  */				\
116 	*((uint32_t *) outptr) = *inptr++;				\
117 	outptr += sizeof (uint32_t);					\
118       }									\
119   }
120 # define BODY								\
121   {									\
122     size_t len = inend - inptr;						\
123     if (len > (outend - outptr) / 4)					\
124       len = (outend - outptr) / 4;					\
125     size_t loop_count, tmp;						\
126     __asm__ volatile (".machine push\n\t"				\
127 		      ".machine \"z13\"\n\t"				\
128 		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
129 		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
130 		      "    vrepib %%v30,0x7f\n\t" /* For compare > 0x7f.  */ \
131 		      "    srlg %[R_LI],%[R_LEN],4\n\t"			\
132 		      "    vrepib %%v31,0x20\n\t"			\
133 		      "    clgije %[R_LI],0,1f\n\t"			\
134 		      "0:  \n\t" /* Handle 16-byte blocks.  */		\
135 		      "    vl %%v16,0(%[R_IN])\n\t"			\
136 		      /* Checking for values > 0x7f.  */		\
137 		      "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
138 		      "    jno 10f\n\t"					\
139 		      /* Enlarge to UCS4.  */				\
140 		      "    vuplhb %%v17,%%v16\n\t"			\
141 		      "    vupllb %%v18,%%v16\n\t"			\
142 		      "    vuplhh %%v19,%%v17\n\t"			\
143 		      "    vupllh %%v20,%%v17\n\t"			\
144 		      "    vuplhh %%v21,%%v18\n\t"			\
145 		      "    vupllh %%v22,%%v18\n\t"			\
146 		      /* Store 64bytes to buf_out.  */			\
147 		      "    vstm %%v19,%%v22,0(%[R_OUT])\n\t"		\
148 		      "    la %[R_IN],16(%[R_IN])\n\t"			\
149 		      "    la %[R_OUT],64(%[R_OUT])\n\t"		\
150 		      "    brctg %[R_LI],0b\n\t"			\
151 		      "    lghi %[R_LI],15\n\t"				\
152 		      "    ngr %[R_LEN],%[R_LI]\n\t"			\
153 		      "    je 20f\n\t" /* Jump away if no remaining bytes.  */ \
154 		      /* Handle remaining bytes.  */			\
155 		      "1: aghik %[R_LI],%[R_LEN],-1\n\t"		\
156 		      "    jl 20f\n\t" /* Jump away if no remaining bytes.  */ \
157 		      "    vll %%v16,%[R_LI],0(%[R_IN])\n\t"		\
158 		      /* Checking for values > 0x7f.  */		\
159 		      "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
160 		      "    vlgvb %[R_TMP],%%v17,7\n\t"			\
161 		      "    clr %[R_TMP],%[R_LI]\n\t"			\
162 		      "    locrh %[R_TMP],%[R_LEN]\n\t"			\
163 		      "    locghih %[R_LEN],0\n\t"			\
164 		      "    j 12f\n\t"					\
165 		      "10:\n\t"						\
166 		      /* Found a value > 0x7f.				\
167 			 Store the preceding chars.  */			\
168 		      "    vlgvb %[R_TMP],%%v17,7\n\t"			\
169 		      "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
170 		      "    sllk %[R_TMP],%[R_TMP],2\n\t"		\
171 		      "    ahi %[R_TMP],-1\n\t"				\
172 		      "    jl 20f\n\t"					\
173 		      "    lgr %[R_LI],%[R_TMP]\n\t"			\
174 		      "    vuplhb %%v17,%%v16\n\t"			\
175 		      "    vuplhh %%v19,%%v17\n\t"			\
176 		      "    vstl %%v19,%[R_LI],0(%[R_OUT])\n\t"		\
177 		      "    ahi %[R_LI],-16\n\t"				\
178 		      "    jl 11f\n\t"					\
179 		      "    vupllh %%v20,%%v17\n\t"			\
180 		      "    vstl %%v20,%[R_LI],16(%[R_OUT])\n\t"		\
181 		      "    ahi %[R_LI],-16\n\t"				\
182 		      "    jl 11f\n\t"					\
183 		      "    vupllb %%v18,%%v16\n\t"			\
184 		      "    vuplhh %%v21,%%v18\n\t"			\
185 		      "    vstl %%v21,%[R_LI],32(%[R_OUT])\n\t"		\
186 		      "    ahi %[R_LI],-16\n\t"				\
187 		      "    jl 11f\n\t"					\
188 		      "    vupllh %%v22,%%v18\n\t"			\
189 		      "    vstl %%v22,%[R_LI],48(%[R_OUT])\n\t"		\
190 		      "11:\n\t"						\
191 		      "    la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t"	\
192 		      "20:\n\t"						\
193 		      ".machine pop"					\
194 		      : /* outputs */ [R_OUT] "+a" (outptr)		\
195 			, [R_IN] "+a" (inptr)				\
196 			, [R_LEN] "+d" (len)				\
197 			, [R_LI] "=d" (loop_count)			\
198 			, [R_TMP] "=a" (tmp)				\
199 		      : /* inputs */					\
200 		      : /* clobber list*/ "memory", "cc"		\
201 			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
202 			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
203 			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
204 			ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30")	\
205 			ASM_CLOBBER_VR ("v31")				\
206 		      );						\
207     if (len > 0)							\
208       {									\
209 	/* Found an invalid character at the next input byte.  */	\
210 	BODY_ORIG_ERROR							\
211       }									\
212   }
213 
214 # define LOOP_NEED_FLAGS
215 # include <iconv/loop.c>
216 # include <iconv/skeleton.c>
217 # undef BODY_ORIG
218 # undef BODY_ORIG_ERROR
219 ICONV_VX_IFUNC (__gconv_transform_ascii_internal)
220 
221 /* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
222 # define DEFINE_INIT		0
223 # define DEFINE_FINI		0
224 # define MIN_NEEDED_FROM	4
225 # define MIN_NEEDED_TO		1
226 # define FROM_DIRECTION		1
227 # define FROM_LOOP		ICONV_VX_NAME (internal_ascii_loop)
228 # define TO_LOOP		ICONV_VX_NAME (internal_ascii_loop) /* This is not used.  */
229 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ascii)
230 # define ONE_DIRECTION		1
231 
232 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
233 # define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
234 # define LOOPFCT		FROM_LOOP
235 # define BODY_ORIG_ERROR						\
236   UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);			\
237   STANDARD_TO_LOOP_ERR_HANDLER (4);
238 
239 # define BODY_ORIG							\
240   {									\
241     if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f))		\
242       {									\
243 	BODY_ORIG_ERROR							\
244       }									\
245     else								\
246       {									\
247 	/* It's an one byte sequence.  */				\
248 	*outptr++ = *((const uint32_t *) inptr);			\
249 	inptr += sizeof (uint32_t);					\
250       }									\
251   }
252 # define BODY								\
253   {									\
254     size_t len = (inend - inptr) / 4;					\
255     if (len > outend - outptr)						\
256       len = outend - outptr;						\
257     size_t loop_count, tmp, tmp2;					\
258     __asm__ volatile (".machine push\n\t"				\
259 		      ".machine \"z13\"\n\t"				\
260 		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
261 		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
262 		      /* Setup to check for ch > 0x7f.  */		\
263 		      "    vzero %%v21\n\t"				\
264 		      "    srlg %[R_LI],%[R_LEN],4\n\t"			\
265 		      "    vleih %%v21,8192,0\n\t"  /* element 0:   >  */ \
266 		      "    vleih %%v21,-8192,2\n\t" /* element 1: =<>  */ \
267 		      "    vleif %%v20,127,0\n\t"   /* element 0: 127  */ \
268 		      "    lghi %[R_TMP],0\n\t"				\
269 		      "    clgije %[R_LI],0,1f\n\t"			\
270 		      "0:\n\t"						\
271 		      "    vlm %%v16,%%v19,0(%[R_IN])\n\t"		\
272 		      /* Shorten to byte values.  */			\
273 		      "    vpkf %%v23,%%v16,%%v17\n\t"			\
274 		      "    vpkf %%v24,%%v18,%%v19\n\t"			\
275 		      "    vpkh %%v23,%%v23,%%v24\n\t"			\
276 		      /* Checking for values > 0x7f.  */		\
277 		      "    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
278 		      "    jno 10f\n\t"					\
279 		      "    vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
280 		      "    jno 11f\n\t"					\
281 		      "    vstrcfs %%v22,%%v18,%%v20,%%v21\n\t"		\
282 		      "    jno 12f\n\t"					\
283 		      "    vstrcfs %%v22,%%v19,%%v20,%%v21\n\t"		\
284 		      "    jno 13f\n\t"					\
285 		      /* Store 16bytes to outptr.  */			\
286 		      "    vst %%v23,0(%[R_OUT])\n\t"			\
287 		      "    la %[R_IN],64(%[R_IN])\n\t"			\
288 		      "    la %[R_OUT],16(%[R_OUT])\n\t"		\
289 		      "    brctg %[R_LI],0b\n\t"			\
290 		      "    lghi %[R_LI],15\n\t"				\
291 		      "    ngr %[R_LEN],%[R_LI]\n\t"			\
292 		      "    je 20f\n\t" /* Jump away if no remaining bytes.  */ \
293 		      /* Handle remaining bytes.  */			\
294 		      "1: sllg %[R_LI],%[R_LEN],2\n\t"			\
295 		      "    aghi %[R_LI],-1\n\t"				\
296 		      "    jl 20f\n\t" /* Jump away if no remaining bytes.  */ \
297 		      /* Load remaining 1...63 bytes.  */		\
298 		      "    vll %%v16,%[R_LI],0(%[R_IN])\n\t"		\
299 		      "    ahi %[R_LI],-16\n\t"				\
300 		      "    jl 2f\n\t"					\
301 		      "    vll %%v17,%[R_LI],16(%[R_IN])\n\t"		\
302 		      "    ahi %[R_LI],-16\n\t"				\
303 		      "    jl 2f\n\t"					\
304 		      "    vll %%v18,%[R_LI],32(%[R_IN])\n\t"		\
305 		      "    ahi %[R_LI],-16\n\t"				\
306 		      "    jl 2f\n\t"					\
307 		      "    vll %%v19,%[R_LI],48(%[R_IN])\n\t"		\
308 		      "2:\n\t"						\
309 		      /* Shorten to byte values.  */			\
310 		      "    vpkf %%v23,%%v16,%%v17\n\t"			\
311 		      "    vpkf %%v24,%%v18,%%v19\n\t"			\
312 		      "    vpkh %%v23,%%v23,%%v24\n\t"			\
313 		      "    sllg %[R_LI],%[R_LEN],2\n\t"			\
314 		      "    aghi %[R_LI],-16\n\t"			\
315 		      "    jl 3f\n\t" /* v16 is not fully loaded.  */	\
316 		      "    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
317 		      "    jno 10f\n\t"					\
318 		      "    aghi %[R_LI],-16\n\t"			\
319 		      "    jl 4f\n\t" /* v17 is not fully loaded.  */	\
320 		      "    vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
321 		      "    jno 11f\n\t"					\
322 		      "    aghi %[R_LI],-16\n\t"			\
323 		      "    jl 5f\n\t" /* v18 is not fully loaded.  */	\
324 		      "    vstrcfs %%v22,%%v18,%%v20,%%v21\n\t"		\
325 		      "    jno 12f\n\t"					\
326 		      "    aghi %[R_LI],-16\n\t"			\
327 		      /* v19 is not fully loaded. */			\
328 		      "    lghi %[R_TMP],12\n\t"			\
329 		      "    vstrcfs %%v22,%%v19,%%v20,%%v21\n\t"		\
330 		      "6: vlgvb %[R_I],%%v22,7\n\t"			\
331 		      "    aghi %[R_LI],16\n\t"				\
332 		      "    clrjl %[R_I],%[R_LI],14f\n\t"		\
333 		      "    lgr %[R_I],%[R_LEN]\n\t"			\
334 		      "    lghi %[R_LEN],0\n\t"				\
335 		      "    j 15f\n\t"					\
336 		      "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
337 		      "    j 6b\n\t"					\
338 		      "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
339 		      "    lghi %[R_TMP],4\n\t"				\
340 		      "    j 6b\n\t"					\
341 		      "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
342 		      "    lghi %[R_TMP],8\n\t"				\
343 		      "    j 6b\n\t"					\
344 		      /* Found a value > 0x7f.  */			\
345 		      "13: ahi %[R_TMP],4\n\t"				\
346 		      "12: ahi %[R_TMP],4\n\t"				\
347 		      "11: ahi %[R_TMP],4\n\t"				\
348 		      "10: vlgvb %[R_I],%%v22,7\n\t"			\
349 		      "14: srlg %[R_I],%[R_I],2\n\t"			\
350 		      "    agr %[R_I],%[R_TMP]\n\t"			\
351 		      "    je 20f\n\t"					\
352 		      /* Store characters before invalid one...  */	\
353 		      "15: aghi %[R_I],-1\n\t"				\
354 		      "    vstl %%v23,%[R_I],0(%[R_OUT])\n\t"		\
355 		      /* ... and update pointers.  */			\
356 		      "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"		\
357 		      "    sllg %[R_I],%[R_I],2\n\t"			\
358 		      "    la %[R_IN],4(%[R_I],%[R_IN])\n\t"		\
359 		      "20:\n\t"						\
360 		      ".machine pop"					\
361 		      : /* outputs */ [R_OUT] "+a" (outptr)		\
362 			, [R_IN] "+a" (inptr)				\
363 			, [R_LEN] "+d" (len)				\
364 			, [R_LI] "=d" (loop_count)			\
365 			, [R_I] "=a" (tmp2)				\
366 			, [R_TMP] "=d" (tmp)				\
367 		      : /* inputs */					\
368 		      : /* clobber list*/ "memory", "cc"		\
369 			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
370 			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
371 			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
372 			ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23")	\
373 			ASM_CLOBBER_VR ("v24")				\
374 		      );						\
375     if (len > 0)							\
376       {									\
377 	/* Found an invalid character > 0x7f at next character.  */	\
378 	BODY_ORIG_ERROR							\
379       }									\
380   }
381 # define LOOP_NEED_FLAGS
382 # include <iconv/loop.c>
383 # include <iconv/skeleton.c>
384 # undef BODY_ORIG
385 # undef BODY_ORIG_ERROR
ICONV_VX_IFUNC(__gconv_transform_internal_ascii)386 ICONV_VX_IFUNC (__gconv_transform_internal_ascii)
387 
388 
389 /* Convert from internal UCS4 to UCS4 little endian form.  */
390 # define DEFINE_INIT		0
391 # define DEFINE_FINI		0
392 # define MIN_NEEDED_FROM	4
393 # define MIN_NEEDED_TO		4
394 # define FROM_DIRECTION		1
395 # define FROM_LOOP		ICONV_VX_NAME (internal_ucs4le_loop)
396 # define TO_LOOP		ICONV_VX_NAME (internal_ucs4le_loop) /* This is not used.  */
397 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs4le)
398 # define ONE_DIRECTION		0
399 
400 static inline int
401 __attribute ((always_inline))
402 ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
403 				      struct __gconv_step_data *step_data,
404 				      const unsigned char **inptrp,
405 				      const unsigned char *inend,
406 				      unsigned char **outptrp,
407 				      const unsigned char *outend,
408 				      size_t *irreversible)
409 {
410   const unsigned char *inptr = *inptrp;
411   unsigned char *outptr = *outptrp;
412   int result;
413   size_t len = MIN (inend - inptr, outend - outptr) / 4;
414   size_t loop_count;
415   __asm__ volatile (".machine push\n\t"
416 		    ".machine \"z13\"\n\t"
417 		    ".machinemode \"zarch_nohighgprs\"\n\t"
418 		    CONVERT_32BIT_SIZE_T ([R_LEN])
419 		    "    bras %[R_LI],1f\n\t"
420 		    /* Vector permute mask:  */
421 		    "    .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
422 		    "1:  vl %%v20,0(%[R_LI])\n\t"
423 		    /* Process 64byte (16char) blocks.  */
424 		    "    srlg %[R_LI],%[R_LEN],4\n\t"
425 		    "    clgije %[R_LI],0,10f\n\t"
426 		    "0:  vlm %%v16,%%v19,0(%[R_IN])\n\t"
427 		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
428 		    "    vperm %%v17,%%v17,%%v17,%%v20\n\t"
429 		    "    vperm %%v18,%%v18,%%v18,%%v20\n\t"
430 		    "    vperm %%v19,%%v19,%%v19,%%v20\n\t"
431 		    "    vstm %%v16,%%v19,0(%[R_OUT])\n\t"
432 		    "    la %[R_IN],64(%[R_IN])\n\t"
433 		    "    la %[R_OUT],64(%[R_OUT])\n\t"
434 		    "    brctg %[R_LI],0b\n\t"
435 		    "    llgfr %[R_LEN],%[R_LEN]\n\t"
436 		    "    nilf %[R_LEN],15\n\t"
437 		    /* Process 16byte (4char) blocks.  */
438 		    "10: srlg %[R_LI],%[R_LEN],2\n\t"
439 		    "    clgije %[R_LI],0,20f\n\t"
440 		    "11: vl %%v16,0(%[R_IN])\n\t"
441 		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
442 		    "    vst %%v16,0(%[R_OUT])\n\t"
443 		    "    la %[R_IN],16(%[R_IN])\n\t"
444 		    "    la %[R_OUT],16(%[R_OUT])\n\t"
445 		    "    brctg %[R_LI],11b\n\t"
446 		    "    nill %[R_LEN],3\n\t"
447 		    /* Process <16bytes.  */
448 		    "20: sll %[R_LEN],2\n\t"
449 		    "    ahi %[R_LEN],-1\n\t"
450 		    "    jl 30f\n\t"
451 		    "    vll %%v16,%[R_LEN],0(%[R_IN])\n\t"
452 		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
453 		    "    vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t"
454 		    "    la %[R_IN],1(%[R_LEN],%[R_IN])\n\t"
455 		    "    la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t"
456 		    "30: \n\t"
457 		    ".machine pop"
458 		    : /* outputs */ [R_OUT] "+a" (outptr)
459 		      , [R_IN] "+a" (inptr)
460 		      , [R_LI] "=a" (loop_count)
461 		      , [R_LEN] "+a" (len)
462 		    : /* inputs */
463 		    : /* clobber list*/ "memory", "cc"
464 		      ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")
465 		      ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")
466 		      ASM_CLOBBER_VR ("v20")
467 		    );
468   *inptrp = inptr;
469   *outptrp = outptr;
470 
471   /* Determine the status.  */
472   if (*inptrp == inend)
473     result = __GCONV_EMPTY_INPUT;
474   else if (*outptrp + 4 > outend)
475     result = __GCONV_FULL_OUTPUT;
476   else
477     result = __GCONV_INCOMPLETE_INPUT;
478 
479   return result;
480 }
481 
482 ICONV_VX_SINGLE (internal_ucs4le_loop)
483 # include <iconv/skeleton.c>
ICONV_VX_IFUNC(__gconv_transform_internal_ucs4le)484 ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le)
485 
486 
487 /* Transform from UCS4 to the internal, UCS4-like format.  Unlike
488    for the other direction we have to check for correct values here.  */
489 # define DEFINE_INIT		0
490 # define DEFINE_FINI		0
491 # define MIN_NEEDED_FROM	4
492 # define MIN_NEEDED_TO		4
493 # define FROM_DIRECTION		1
494 # define FROM_LOOP		ICONV_VX_NAME (ucs4_internal_loop)
495 # define TO_LOOP		ICONV_VX_NAME (ucs4_internal_loop) /* This is not used.  */
496 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs4_internal)
497 # define ONE_DIRECTION		0
498 
499 
500 static inline int
501 __attribute ((always_inline))
502 ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
503 				    struct __gconv_step_data *step_data,
504 				    const unsigned char **inptrp,
505 				    const unsigned char *inend,
506 				    unsigned char **outptrp,
507 				    const unsigned char *outend,
508 				    size_t *irreversible)
509 {
510   int flags = step_data->__flags;
511   const unsigned char *inptr = *inptrp;
512   unsigned char *outptr = *outptrp;
513   int result;
514   size_t len, loop_count;
515   do
516     {
517       len = MIN (inend - inptr, outend - outptr) / 4;
518       __asm__ volatile (".machine push\n\t"
519 			".machine \"z13\"\n\t"
520 			".machinemode \"zarch_nohighgprs\"\n\t"
521 			CONVERT_32BIT_SIZE_T ([R_LEN])
522 			/* Setup to check for ch > 0x7fffffff.  */
523 			"    larl %[R_LI],9f\n\t"
524 			"    vlm %%v20,%%v21,0(%[R_LI])\n\t"
525 			"    srlg %[R_LI],%[R_LEN],2\n\t"
526 			"    clgije %[R_LI],0,1f\n\t"
527 			/* Process 16byte (4char) blocks.  */
528 			"0:  vl %%v16,0(%[R_IN])\n\t"
529 			"    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
530 			"    jno 10f\n\t"
531 			"    vst %%v16,0(%[R_OUT])\n\t"
532 			"    la %[R_IN],16(%[R_IN])\n\t"
533 			"    la %[R_OUT],16(%[R_OUT])\n\t"
534 			"    brctg %[R_LI],0b\n\t"
535 			"    llgfr %[R_LEN],%[R_LEN]\n\t"
536 			"    nilf %[R_LEN],3\n\t"
537 			/* Process <16bytes.  */
538 			"1:  sll %[R_LEN],2\n\t"
539 			"    ahik %[R_LI],%[R_LEN],-1\n\t"
540 			"    jl 20f\n\t" /* No further bytes available.  */
541 			"    vll %%v16,%[R_LI],0(%[R_IN])\n\t"
542 			"    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
543 			"    vlgvb %[R_LI],%%v22,7\n\t"
544 			"    clr %[R_LI],%[R_LEN]\n\t"
545 			"    locgrhe %[R_LI],%[R_LEN]\n\t"
546 			"    locghihe %[R_LEN],0\n\t"
547 			"    j 11f\n\t"
548 			/* v20: Vector string range compare values.  */
549 			"9:  .long 0x7fffffff,0x0,0x0,0x0\n\t"
550 			/* v21: Vector string range compare control-bits.
551 			   element 0: >; element 1: =<> (always true)  */
552 			"    .long 0x20000000,0xE0000000,0x0,0x0\n\t"
553 			/* Found a value > 0x7fffffff.  */
554 			"10: vlgvb %[R_LI],%%v22,7\n\t"
555 			/* Store characters before invalid one.  */
556 			"11: aghi %[R_LI],-1\n\t"
557 			"    jl 20f\n\t"
558 			"    vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
559 			"    la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
560 			"    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
561 			"20:\n\t"
562 			".machine pop"
563 			: /* outputs */ [R_OUT] "+a" (outptr)
564 			  , [R_IN] "+a" (inptr)
565 			  , [R_LI] "=a" (loop_count)
566 			  , [R_LEN] "+d" (len)
567 			: /* inputs */
568 			: /* clobber list*/ "memory", "cc"
569 			  ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
570 			  ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
571 			);
572       if (len > 0)
573 	{
574 	  /* The value is too large.  We don't try transliteration here since
575 	     this is not an error because of the lack of possibilities to
576 	     represent the result.  This is a genuine bug in the input since
577 	     UCS4 does not allow such values.  */
578 	  if (irreversible == NULL)
579 	    /* We are transliterating, don't try to correct anything.  */
580 	    return __GCONV_ILLEGAL_INPUT;
581 
582 	  if (flags & __GCONV_IGNORE_ERRORS)
583 	    {
584 	      /* Just ignore this character.  */
585 	      ++*irreversible;
586 	      inptr += 4;
587 	      continue;
588 	    }
589 
590 	  *inptrp = inptr;
591 	  *outptrp = outptr;
592 	  return __GCONV_ILLEGAL_INPUT;
593 	}
594     }
595   while (len > 0);
596 
597   *inptrp = inptr;
598   *outptrp = outptr;
599 
600   /* Determine the status.  */
601   if (*inptrp == inend)
602     result = __GCONV_EMPTY_INPUT;
603   else if (*outptrp + 4 > outend)
604     result = __GCONV_FULL_OUTPUT;
605   else
606     result = __GCONV_INCOMPLETE_INPUT;
607 
608   return result;
609 }
610 
611 ICONV_VX_SINGLE (ucs4_internal_loop)
612 # include <iconv/skeleton.c>
ICONV_VX_IFUNC(__gconv_transform_ucs4_internal)613 ICONV_VX_IFUNC (__gconv_transform_ucs4_internal)
614 
615 
616 /* Transform from UCS4-LE to the internal encoding.  */
617 # define DEFINE_INIT		0
618 # define DEFINE_FINI		0
619 # define MIN_NEEDED_FROM	4
620 # define MIN_NEEDED_TO		4
621 # define FROM_DIRECTION		1
622 # define FROM_LOOP		ICONV_VX_NAME (ucs4le_internal_loop)
623 # define TO_LOOP		ICONV_VX_NAME (ucs4le_internal_loop) /* This is not used.  */
624 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs4le_internal)
625 # define ONE_DIRECTION		0
626 
627 static inline int
628 __attribute ((always_inline))
629 ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
630 				      struct __gconv_step_data *step_data,
631 				      const unsigned char **inptrp,
632 				      const unsigned char *inend,
633 				      unsigned char **outptrp,
634 				      const unsigned char *outend,
635 				      size_t *irreversible)
636 {
637   int flags = step_data->__flags;
638   const unsigned char *inptr = *inptrp;
639   unsigned char *outptr = *outptrp;
640   int result;
641   size_t len, loop_count;
642   do
643     {
644       len = MIN (inend - inptr, outend - outptr) / 4;
645       __asm__ volatile (".machine push\n\t"
646 			".machine \"z13\"\n\t"
647 			".machinemode \"zarch_nohighgprs\"\n\t"
648 			CONVERT_32BIT_SIZE_T ([R_LEN])
649 			/* Setup to check for ch > 0x7fffffff.  */
650 			"    larl %[R_LI],9f\n\t"
651 			"    vlm %%v20,%%v22,0(%[R_LI])\n\t"
652 			"    srlg %[R_LI],%[R_LEN],2\n\t"
653 			"    clgije %[R_LI],0,1f\n\t"
654 			/* Process 16byte (4char) blocks.  */
655 			"0:  vl %%v16,0(%[R_IN])\n\t"
656 			"    vperm %%v16,%%v16,%%v16,%%v22\n\t"
657 			"    vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
658 			"    jno 10f\n\t"
659 			"    vst %%v16,0(%[R_OUT])\n\t"
660 			"    la %[R_IN],16(%[R_IN])\n\t"
661 			"    la %[R_OUT],16(%[R_OUT])\n\t"
662 			"    brctg %[R_LI],0b\n\t"
663 			"    llgfr %[R_LEN],%[R_LEN]\n\t"
664 			"    nilf %[R_LEN],3\n\t"
665 			/* Process <16bytes.  */
666 			"1:  sll %[R_LEN],2\n\t"
667 			"    ahik %[R_LI],%[R_LEN],-1\n\t"
668 			"    jl 20f\n\t" /* No further bytes available.  */
669 			"    vll %%v16,%[R_LI],0(%[R_IN])\n\t"
670 			"    vperm %%v16,%%v16,%%v16,%%v22\n\t"
671 			"    vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
672 			"    vlgvb %[R_LI],%%v23,7\n\t"
673 			"    clr %[R_LI],%[R_LEN]\n\t"
674 			"    locgrhe %[R_LI],%[R_LEN]\n\t"
675 			"    locghihe %[R_LEN],0\n\t"
676 			"    j 11f\n\t"
677 			/* v20: Vector string range compare values.  */
678 			"9: .long 0x7fffffff,0x0,0x0,0x0\n\t"
679 			/* v21: Vector string range compare control-bits.
680 			   element 0: >; element 1: =<> (always true)  */
681 			"    .long 0x20000000,0xE0000000,0x0,0x0\n\t"
682 			/* v22: Vector permute mask.  */
683 			"    .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
684 			/* Found a value > 0x7fffffff.  */
685 			"10: vlgvb %[R_LI],%%v23,7\n\t"
686 			/* Store characters before invalid one.  */
687 			"11: aghi %[R_LI],-1\n\t"
688 			"    jl 20f\n\t"
689 			"    vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
690 			"    la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
691 			"    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
692 			"20:\n\t"
693 			".machine pop"
694 			: /* outputs */ [R_OUT] "+a" (outptr)
695 			  , [R_IN] "+a" (inptr)
696 			  , [R_LI] "=a" (loop_count)
697 			  , [R_LEN] "+d" (len)
698 			: /* inputs */
699 			: /* clobber list*/ "memory", "cc"
700 			  ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
701 			  ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
702 			  ASM_CLOBBER_VR ("v23")
703 			);
704       if (len > 0)
705 	{
706 	  /* The value is too large.  We don't try transliteration here since
707 	     this is not an error because of the lack of possibilities to
708 	     represent the result.  This is a genuine bug in the input since
709 	     UCS4 does not allow such values.  */
710 	  if (irreversible == NULL)
711 	    /* We are transliterating, don't try to correct anything.  */
712 	    return __GCONV_ILLEGAL_INPUT;
713 
714 	  if (flags & __GCONV_IGNORE_ERRORS)
715 	    {
716 	      /* Just ignore this character.  */
717 	      ++*irreversible;
718 	      inptr += 4;
719 	      continue;
720 	    }
721 
722 	  *inptrp = inptr;
723 	  *outptrp = outptr;
724 	  return __GCONV_ILLEGAL_INPUT;
725 	}
726     }
727   while (len > 0);
728 
729   *inptrp = inptr;
730   *outptrp = outptr;
731 
732   /* Determine the status.  */
733   if (*inptrp == inend)
734     result = __GCONV_EMPTY_INPUT;
735   else if (*inptrp + 4 > inend)
736     result = __GCONV_INCOMPLETE_INPUT;
737   else
738     {
739       assert (*outptrp + 4 > outend);
740       result = __GCONV_FULL_OUTPUT;
741     }
742 
743   return result;
744 }
745 ICONV_VX_SINGLE (ucs4le_internal_loop)
746 # include <iconv/skeleton.c>
747 ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal)
748 
749 /* Convert from UCS2 to the internal (UCS4-like) format.  */
750 # define DEFINE_INIT		0
751 # define DEFINE_FINI		0
752 # define MIN_NEEDED_FROM	2
753 # define MIN_NEEDED_TO		4
754 # define FROM_DIRECTION		1
755 # define FROM_LOOP		ICONV_VX_NAME (ucs2_internal_loop)
756 # define TO_LOOP		ICONV_VX_NAME (ucs2_internal_loop) /* This is not used.  */
757 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs2_internal)
758 # define ONE_DIRECTION		1
759 
760 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
761 # define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
762 # define LOOPFCT		FROM_LOOP
763 # define BODY_ORIG_ERROR						\
764   /* Surrogate characters in UCS-2 input are not valid.  Reject		\
765      them.  (Catching this here is not security relevant.)  */		\
766   STANDARD_FROM_LOOP_ERR_HANDLER (2);
767 # define BODY_ORIG							\
768   {									\
769     uint16_t u1 = get16 (inptr);					\
770 									\
771     if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			\
772       {									\
773 	BODY_ORIG_ERROR							\
774       }									\
775 									\
776     *((uint32_t *) outptr) = u1;					\
777     outptr += sizeof (uint32_t);					\
778     inptr += 2;								\
779   }
780 # define BODY								\
781   {									\
782     size_t len, tmp, tmp2;						\
783     len = MIN ((inend - inptr) / 2, (outend - outptr) / 4);		\
784     __asm__ volatile (".machine push\n\t"				\
785 		      ".machine \"z13\"\n\t"				\
786 		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
787 		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
788 		      /* Setup to check for ch >= 0xd800 && ch < 0xe000.  */ \
789 		      "    larl %[R_TMP],9f\n\t"			\
790 		      "    vlm %%v20,%%v21,0(%[R_TMP])\n\t"		\
791 		      "    srlg %[R_TMP],%[R_LEN],3\n\t"		\
792 		      "    clgije %[R_TMP],0,1f\n\t"			\
793 		      /* Process 16byte (8char) blocks.  */		\
794 		      "0:  vl %%v16,0(%[R_IN])\n\t"			\
795 		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
796 		      /* Enlarge UCS2 to UCS4.  */			\
797 		      "    vuplhh %%v17,%%v16\n\t"			\
798 		      "    vupllh %%v18,%%v16\n\t"			\
799 		      "    jno 10f\n\t"					\
800 		      /* Store 32bytes to buf_out.  */			\
801 		      "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
802 		      "    la %[R_IN],16(%[R_IN])\n\t"			\
803 		      "    la %[R_OUT],32(%[R_OUT])\n\t"		\
804 		      "    brctg %[R_TMP],0b\n\t"			\
805 		      "    llgfr %[R_LEN],%[R_LEN]\n\t"			\
806 		      "    nilf %[R_LEN],7\n\t"				\
807 		      /* Process <16bytes.  */				\
808 		      "1:  sll %[R_LEN],1\n\t"				\
809 		      "    ahik %[R_TMP],%[R_LEN],-1\n\t"		\
810 		      "    jl 20f\n\t" /* No further bytes available.  */ \
811 		      "    vll %%v16,%[R_TMP],0(%[R_IN])\n\t"		\
812 		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
813 		      /* Enlarge UCS2 to UCS4.  */			\
814 		      "    vuplhh %%v17,%%v16\n\t"			\
815 		      "    vupllh %%v18,%%v16\n\t"			\
816 		      "    vlgvb %[R_TMP],%%v19,7\n\t"			\
817 		      "    clr %[R_TMP],%[R_LEN]\n\t"			\
818 		      "    locgrhe %[R_TMP],%[R_LEN]\n\t"		\
819 		      "    locghihe %[R_LEN],0\n\t"			\
820 		      "    j 11f\n\t"					\
821 		      /* v20: Vector string range compare values.  */	\
822 		      "9:  .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
823 		      /* v21: Vector string range compare control-bits.	\
824 			 element 0: =>; element 1: <  */		\
825 		      "    .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
826 		      /* Found an element: ch >= 0xd800 && ch < 0xe000  */ \
827 		      "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
828 		      "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
829 		      "    sll %[R_TMP],1\n\t"				\
830 		      "    lgr %[R_TMP2],%[R_TMP]\n\t"			\
831 		      "    ahi %[R_TMP],-1\n\t"				\
832 		      "    jl 20f\n\t"					\
833 		      "    vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t"		\
834 		      "    ahi %[R_TMP],-16\n\t"			\
835 		      "    jl 19f\n\t"					\
836 		      "    vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t"	\
837 		      "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t"	\
838 		      "20: \n\t"					\
839 		      ".machine pop"					\
840 		      : /* outputs */ [R_OUT] "+a" (outptr)		\
841 			, [R_IN] "+a" (inptr)				\
842 			, [R_TMP] "=a" (tmp)				\
843 			, [R_TMP2] "=a" (tmp2)				\
844 			, [R_LEN] "+d" (len)				\
845 		      : /* inputs */					\
846 		      : /* clobber list*/ "memory", "cc"		\
847 			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
848 			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
849 			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
850 		      );						\
851     if (len > 0)							\
852       {									\
853 	/* Found an invalid character at next input-char.  */		\
854 	BODY_ORIG_ERROR							\
855       }									\
856   }
857 
858 # define LOOP_NEED_FLAGS
859 # include <iconv/loop.c>
860 # include <iconv/skeleton.c>
861 # undef BODY_ORIG
862 # undef BODY_ORIG_ERROR
863 ICONV_VX_IFUNC (__gconv_transform_ucs2_internal)
864 
865 /* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
866 # define DEFINE_INIT		0
867 # define DEFINE_FINI		0
868 # define MIN_NEEDED_FROM	2
869 # define MIN_NEEDED_TO		4
870 # define FROM_DIRECTION		1
871 # define FROM_LOOP		ICONV_VX_NAME (ucs2reverse_internal_loop)
872 # define TO_LOOP		ICONV_VX_NAME (ucs2reverse_internal_loop) /* This is not used.*/
873 # define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs2reverse_internal)
874 # define ONE_DIRECTION		1
875 
876 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
877 # define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
878 # define LOOPFCT		FROM_LOOP
879 # define BODY_ORIG_ERROR						\
880   /* Surrogate characters in UCS-2 input are not valid.  Reject		\
881      them.  (Catching this here is not security relevant.)  */		\
882   if (! ignore_errors_p ())						\
883     {									\
884       result = __GCONV_ILLEGAL_INPUT;					\
885       break;								\
886     }									\
887   inptr += 2;								\
888   ++*irreversible;							\
889   continue;
890 
891 # define BODY_ORIG \
892   {									\
893     uint16_t u1 = bswap_16 (get16 (inptr));				\
894 									\
895     if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			\
896       {									\
897 	BODY_ORIG_ERROR							\
898       }									\
899 									\
900     *((uint32_t *) outptr) = u1;					\
901     outptr += sizeof (uint32_t);					\
902     inptr += 2;								\
903   }
904 # define BODY								\
905   {									\
906     size_t len, tmp, tmp2;						\
907     len = MIN ((inend - inptr) / 2, (outend - outptr) / 4);		\
908     __asm__ volatile (".machine push\n\t"				\
909 		      ".machine \"z13\"\n\t"				\
910 		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
911 		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
912 		      /* Setup to check for ch >= 0xd800 && ch < 0xe000.  */ \
913 		      "    larl %[R_TMP],9f\n\t"			\
914 		      "    vlm %%v20,%%v22,0(%[R_TMP])\n\t"		\
915 		      "    srlg %[R_TMP],%[R_LEN],3\n\t"		\
916 		      "    clgije %[R_TMP],0,1f\n\t"			\
917 		      /* Process 16byte (8char) blocks.  */		\
918 		      "0:  vl %%v16,0(%[R_IN])\n\t"			\
919 		      "    vperm %%v16,%%v16,%%v16,%%v22\n\t"		\
920 		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
921 		      /* Enlarge UCS2 to UCS4.  */			\
922 		      "    vuplhh %%v17,%%v16\n\t"			\
923 		      "    vupllh %%v18,%%v16\n\t"			\
924 		      "    jno 10f\n\t"					\
925 		      /* Store 32bytes to buf_out.  */			\
926 		      "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
927 		      "    la %[R_IN],16(%[R_IN])\n\t"			\
928 		      "    la %[R_OUT],32(%[R_OUT])\n\t"		\
929 		      "    brctg %[R_TMP],0b\n\t"			\
930 		      "    llgfr %[R_LEN],%[R_LEN]\n\t"			\
931 		      "    nilf %[R_LEN],7\n\t"				\
932 		      /* Process <16bytes.  */				\
933 		      "1:  sll %[R_LEN],1\n\t"				\
934 		      "    ahik %[R_TMP],%[R_LEN],-1\n\t"		\
935 		      "    jl 20f\n\t" /* No further bytes available.  */ \
936 		      "    vll %%v16,%[R_TMP],0(%[R_IN])\n\t"		\
937 		      "    vperm %%v16,%%v16,%%v16,%%v22\n\t"		\
938 		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
939 		      /* Enlarge UCS2 to UCS4.  */			\
940 		      "    vuplhh %%v17,%%v16\n\t"			\
941 		      "    vupllh %%v18,%%v16\n\t"			\
942 		      "    vlgvb %[R_TMP],%%v19,7\n\t"			\
943 		      "    clr %[R_TMP],%[R_LEN]\n\t"			\
944 		      "    locgrhe %[R_TMP],%[R_LEN]\n\t"		\
945 		      "    locghihe %[R_LEN],0\n\t"			\
946 		      "    j 11f\n\t"					\
947 		      /* v20: Vector string range compare values.  */	\
948 		      "9:  .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
949 		      /* v21: Vector string range compare control-bits.	\
950 			 element 0: =>; element 1: <  */		\
951 		      "    .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
952 		      /* v22: Vector permute mask.  */			\
953 		      "    .short 0x0100,0x0302,0x0504,0x0706\n\t"	\
954 		      "    .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t"	\
955 		      /* Found an element: ch >= 0xd800 && ch < 0xe000  */ \
956 		      "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
957 		      "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
958 		      "    sll %[R_TMP],1\n\t"				\
959 		      "    lgr %[R_TMP2],%[R_TMP]\n\t"			\
960 		      "    ahi %[R_TMP],-1\n\t"				\
961 		      "    jl 20f\n\t"					\
962 		      "    vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t"		\
963 		      "    ahi %[R_TMP],-16\n\t"			\
964 		      "    jl 19f\n\t"					\
965 		      "    vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t"	\
966 		      "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t"	\
967 		      "20: \n\t"					\
968 		      ".machine pop"					\
969 		      : /* outputs */ [R_OUT] "+a" (outptr)		\
970 			, [R_IN] "+a" (inptr)				\
971 			, [R_TMP] "=a" (tmp)				\
972 			, [R_TMP2] "=a" (tmp2)				\
973 			, [R_LEN] "+d" (len)				\
974 		      : /* inputs */					\
975 		      : /* clobber list*/ "memory", "cc"		\
976 			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
977 			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
978 			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
979 			ASM_CLOBBER_VR ("v22")				\
980 		      );						\
981     if (len > 0)							\
982       {									\
983 	/* Found an invalid character at next input-char.  */		\
984 	BODY_ORIG_ERROR							\
985       }									\
986   }
987 # define LOOP_NEED_FLAGS
988 # include <iconv/loop.c>
989 # include <iconv/skeleton.c>
990 # undef BODY_ORIG
991 # undef BODY_ORIG_ERROR
992 ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal)
993 
994 /* Convert from the internal (UCS4-like) format to UCS2.  */
995 #define DEFINE_INIT		0
996 #define DEFINE_FINI		0
997 #define MIN_NEEDED_FROM		4
998 #define MIN_NEEDED_TO		2
999 #define FROM_DIRECTION		1
1000 #define FROM_LOOP		ICONV_VX_NAME (internal_ucs2_loop)
1001 #define TO_LOOP			ICONV_VX_NAME (internal_ucs2_loop) /* This is not used.  */
1002 #define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs2)
1003 #define ONE_DIRECTION		1
1004 
1005 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
1006 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
1007 #define LOOPFCT			FROM_LOOP
1008 #define BODY_ORIG							\
1009   {									\
1010     uint32_t val = *((const uint32_t *) inptr);				\
1011 									\
1012     if (__glibc_unlikely (val >= 0x10000))				\
1013       {									\
1014 	UNICODE_TAG_HANDLER (val, 4);					\
1015 	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
1016       }									\
1017     else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		\
1018       {									\
1019 	/* Surrogate characters in UCS-4 input are not valid.		\
1020 	   We must catch this, because the UCS-2 output might be	\
1021 	   interpreted as UTF-16 by other programs.  If we let		\
1022 	   surrogates pass through, attackers could make a security	\
1023 	   hole exploit by synthesizing any desired plane 1-16		\
1024 	   character.  */						\
1025 	result = __GCONV_ILLEGAL_INPUT;					\
1026 	if (! ignore_errors_p ())					\
1027 	  break;							\
1028 	inptr += 4;							\
1029 	++*irreversible;						\
1030 	continue;							\
1031       }									\
1032     else								\
1033       {									\
1034 	put16 (outptr, val);						\
1035 	outptr += sizeof (uint16_t);					\
1036 	inptr += 4;							\
1037       }									\
1038   }
1039 # define BODY								\
1040   {									\
1041     if (__builtin_expect (inend - inptr < 32, 1)			\
1042 	|| outend - outptr < 16)					\
1043       /* Convert remaining bytes with c code.  */			\
1044       BODY_ORIG								\
1045     else								\
1046       {									\
1047 	/* Convert in 32 byte blocks.  */				\
1048 	size_t loop_count = (inend - inptr) / 32;			\
1049 	size_t tmp, tmp2;						\
1050 	if (loop_count > (outend - outptr) / 16)			\
1051 	  loop_count = (outend - outptr) / 16;				\
1052 	__asm__ volatile (".machine push\n\t"				\
1053 			  ".machine \"z13\"\n\t"			\
1054 			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
1055 			  CONVERT_32BIT_SIZE_T ([R_LI])			\
1056 			  "    larl %[R_I],3f\n\t"			\
1057 			  "    vlm %%v20,%%v23,0(%[R_I])\n\t"		\
1058 			  "0:  \n\t"					\
1059 			  "    vlm %%v16,%%v17,0(%[R_IN])\n\t"		\
1060 			  /* Shorten UCS4 to UCS2.  */			\
1061 			  "    vpkf %%v18,%%v16,%%v17\n\t"		\
1062 			  "    vstrcfs %%v19,%%v16,%%v20,%%v21\n\t"	\
1063 			  "    jno 11f\n\t"				\
1064 			  "1:  vstrcfs %%v19,%%v17,%%v20,%%v21\n\t"	\
1065 			  "    jno 10f\n\t"				\
1066 			  /* Store 16bytes to buf_out.  */		\
1067 			  "2:  vst %%v18,0(%[R_OUT])\n\t"		\
1068 			  "    la %[R_IN],32(%[R_IN])\n\t"		\
1069 			  "    la %[R_OUT],16(%[R_OUT])\n\t"		\
1070 			  "    brctg %[R_LI],0b\n\t"			\
1071 			  "    j 20f\n\t"				\
1072 			  /* Setup to check for ch >= 0xd800. (v20, v21)  */ \
1073 			  "3:  .long 0xd800,0xd800,0x0,0x0\n\t"		\
1074 			  "    .long 0xa0000000,0xa0000000,0x0,0x0\n\t"	\
1075 			  /* Setup to check for ch >= 0xe000		\
1076 			     && ch < 0x10000. (v22,v23)  */		\
1077 			  "    .long 0xe000,0x10000,0x0,0x0\n\t"	\
1078 			  "    .long 0xa0000000,0x40000000,0x0,0x0\n\t"	\
1079 			  /* v16 contains only valid chars. Check in v17: \
1080 			     ch >= 0xe000 && ch <= 0xffff.  */		\
1081 			  "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t"	\
1082 			  "    jo 2b\n\t" /* All ch's in this range, proceed.   */ \
1083 			  "    lghi %[R_TMP],16\n\t"			\
1084 			  "    j 12f\n\t"				\
1085 			  /* Maybe v16 contains invalid chars.		\
1086 			     Check ch >= 0xe000 && ch <= 0xffff.  */	\
1087 			  "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t"	\
1088 			  "    jo 1b\n\t" /* All ch's in this range, proceed.   */ \
1089 			  "    lghi %[R_TMP],0\n\t"			\
1090 			  "12: vlgvb %[R_I],%%v19,7\n\t"		\
1091 			  "    agr %[R_I],%[R_TMP]\n\t"			\
1092 			  "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
1093 			  "    srl %[R_I],1\n\t"			\
1094 			  "    ahi %[R_I],-1\n\t"			\
1095 			  "    jl 20f\n\t"				\
1096 			  "    vstl %%v18,%[R_I],0(%[R_OUT])\n\t"	\
1097 			  "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"	\
1098 			  "20:\n\t"					\
1099 			  ".machine pop"				\
1100 			  : /* outputs */ [R_OUT] "+a" (outptr)		\
1101 			    , [R_IN] "+a" (inptr)			\
1102 			    , [R_LI] "+d" (loop_count)			\
1103 			    , [R_I] "=a" (tmp2)				\
1104 			    , [R_TMP] "=d" (tmp)			\
1105 			  : /* inputs */				\
1106 			  : /* clobber list*/ "memory", "cc"		\
1107 			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
1108 			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
1109 			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
1110 			    ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
1111 			  );						\
1112 	if (loop_count > 0)						\
1113 	  {								\
1114 	    /* Found an invalid character at next character.  */	\
1115 	    BODY_ORIG							\
1116 	  }								\
1117       }									\
1118   }
1119 #define LOOP_NEED_FLAGS
1120 #include <iconv/loop.c>
1121 #include <iconv/skeleton.c>
1122 # undef BODY_ORIG
1123 ICONV_VX_IFUNC (__gconv_transform_internal_ucs2)
1124 
1125 /* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
1126 #define DEFINE_INIT		0
1127 #define DEFINE_FINI		0
1128 #define MIN_NEEDED_FROM		4
1129 #define MIN_NEEDED_TO		2
1130 #define FROM_DIRECTION		1
1131 #define FROM_LOOP		ICONV_VX_NAME (internal_ucs2reverse_loop)
1132 #define TO_LOOP			ICONV_VX_NAME (internal_ucs2reverse_loop)/* This is not used.*/
1133 #define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs2reverse)
1134 #define ONE_DIRECTION		1
1135 
1136 #define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
1137 #define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
1138 #define LOOPFCT			FROM_LOOP
1139 #define BODY_ORIG							\
1140   {									\
1141     uint32_t val = *((const uint32_t *) inptr);				\
1142     if (__glibc_unlikely (val >= 0x10000))				\
1143       {									\
1144 	UNICODE_TAG_HANDLER (val, 4);					\
1145 	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
1146       }									\
1147     else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		\
1148       {									\
1149 	/* Surrogate characters in UCS-4 input are not valid.		\
1150 	   We must catch this, because the UCS-2 output might be	\
1151 	   interpreted as UTF-16 by other programs.  If we let		\
1152 	   surrogates pass through, attackers could make a security	\
1153 	   hole exploit by synthesizing any desired plane 1-16		\
1154 	   character.  */						\
1155 	if (! ignore_errors_p ())					\
1156 	  {								\
1157 	    result = __GCONV_ILLEGAL_INPUT;				\
1158 	    break;							\
1159 	  }								\
1160 	inptr += 4;							\
1161 	++*irreversible;						\
1162 	continue;							\
1163       }									\
1164     else								\
1165       {									\
1166 	put16 (outptr, bswap_16 (val));					\
1167 	outptr += sizeof (uint16_t);					\
1168 	inptr += 4;							\
1169       }									\
1170   }
1171 # define BODY								\
1172   {									\
1173     if (__builtin_expect (inend - inptr < 32, 1)			\
1174 	|| outend - outptr < 16)					\
1175       /* Convert remaining bytes with c code.  */			\
1176       BODY_ORIG								\
1177     else								\
1178       {									\
1179 	/* Convert in 32 byte blocks.  */				\
1180 	size_t loop_count = (inend - inptr) / 32;			\
1181 	size_t tmp, tmp2;						\
1182 	if (loop_count > (outend - outptr) / 16)			\
1183 	  loop_count = (outend - outptr) / 16;				\
1184 	__asm__ volatile (".machine push\n\t"				\
1185 			  ".machine \"z13\"\n\t"			\
1186 			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
1187 			  CONVERT_32BIT_SIZE_T ([R_LI])			\
1188 			  "    larl %[R_I],3f\n\t"			\
1189 			  "    vlm %%v20,%%v24,0(%[R_I])\n\t"		\
1190 			  "0:  \n\t"					\
1191 			  "    vlm %%v16,%%v17,0(%[R_IN])\n\t"		\
1192 			  /* Shorten UCS4 to UCS2 and byteswap.  */	\
1193 			  "    vpkf %%v18,%%v16,%%v17\n\t"		\
1194 			  "    vperm %%v18,%%v18,%%v18,%%v24\n\t"	\
1195 			  "    vstrcfs %%v19,%%v16,%%v20,%%v21\n\t"	\
1196 			  "    jno 11f\n\t"				\
1197 			  "1:  vstrcfs %%v19,%%v17,%%v20,%%v21\n\t"	\
1198 			  "    jno 10f\n\t"				\
1199 			  /* Store 16bytes to buf_out.  */		\
1200 			  "2: vst %%v18,0(%[R_OUT])\n\t"		\
1201 			  "    la %[R_IN],32(%[R_IN])\n\t"		\
1202 			  "    la %[R_OUT],16(%[R_OUT])\n\t"		\
1203 			  "    brctg %[R_LI],0b\n\t"			\
1204 			  "    j 20f\n\t"				\
1205 			  /* Setup to check for ch >= 0xd800. (v20, v21)  */ \
1206 			  "3: .long 0xd800,0xd800,0x0,0x0\n\t"		\
1207 			  "    .long 0xa0000000,0xa0000000,0x0,0x0\n\t"	\
1208 			  /* Setup to check for ch >= 0xe000		\
1209 			     && ch < 0x10000. (v22,v23)  */		\
1210 			  "    .long 0xe000,0x10000,0x0,0x0\n\t"	\
1211 			  "    .long 0xa0000000,0x40000000,0x0,0x0\n\t"	\
1212 			  /* Vector permute mask (v24)  */		\
1213 			  "    .short 0x0100,0x0302,0x0504,0x0706\n\t"	\
1214 			  "    .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t"	\
1215 			  /* v16 contains only valid chars. Check in v17: \
1216 			     ch >= 0xe000 && ch <= 0xffff.  */		\
1217 			  "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t"	\
1218 			  "    jo 2b\n\t" /* All ch's in this range, proceed.  */ \
1219 			  "    lghi %[R_TMP],16\n\t"			\
1220 			  "    j 12f\n\t"				\
1221 			  /* Maybe v16 contains invalid chars.		\
1222 			     Check ch >= 0xe000 && ch <= 0xffff.  */	\
1223 			  "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t"	\
1224 			  "    jo 1b\n\t" /* All ch's in this range, proceed.  */ \
1225 			  "    lghi %[R_TMP],0\n\t"			\
1226 			  "12: vlgvb %[R_I],%%v19,7\n\t"		\
1227 			  "    agr %[R_I],%[R_TMP]\n\t"			\
1228 			  "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
1229 			  "    srl %[R_I],1\n\t"			\
1230 			  "    ahi %[R_I],-1\n\t"			\
1231 			  "    jl 20f\n\t"				\
1232 			  "    vstl %%v18,%[R_I],0(%[R_OUT])\n\t"	\
1233 			  "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"	\
1234 			  "20:\n\t"					\
1235 			  ".machine pop"				\
1236 			  : /* outputs */ [R_OUT] "+a" (outptr)		\
1237 			    , [R_IN] "+a" (inptr)			\
1238 			    , [R_LI] "+d" (loop_count)			\
1239 			    , [R_I] "=a" (tmp2)				\
1240 			    , [R_TMP] "=d" (tmp)			\
1241 			  : /* inputs */				\
1242 			  : /* clobber list*/ "memory", "cc"		\
1243 			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
1244 			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
1245 			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
1246 			    ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
1247 			    ASM_CLOBBER_VR ("v24")			\
1248 			  );						\
1249 	if (loop_count > 0)						\
1250 	  {								\
1251 	    /* Found an invalid character at next character.  */	\
1252 	    BODY_ORIG							\
1253 	  }								\
1254       }									\
1255   }
1256 #define LOOP_NEED_FLAGS
1257 #include <iconv/loop.c>
1258 #include <iconv/skeleton.c>
1259 # undef BODY_ORIG
1260 ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse)
1261 
1262 
1263 #else
1264 /* Generate the internal transformations without ifunc if build environment
1265    lacks vector support. Instead simply include the common version.  */
1266 # include <iconv/gconv_simple.c>
1267 #endif /* !defined HAVE_S390_VX_ASM_SUPPORT */
1268