1/* memset with SSE2 2 Copyright (C) 2010-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21#include <sysdep.h> 22#include "asm-syntax.h" 23 24#define CFI_PUSH(REG) \ 25 cfi_adjust_cfa_offset (4); \ 26 cfi_rel_offset (REG, 0) 27 28#define CFI_POP(REG) \ 29 cfi_adjust_cfa_offset (-4); \ 30 cfi_restore (REG) 31 32#define PUSH(REG) pushl REG; CFI_PUSH (REG) 33#define POP(REG) popl REG; CFI_POP (REG) 34 35#define DEST PARMS 36#define CHR DEST+4 37#define LEN CHR+4 38#define SETRTNVAL movl DEST(%esp), %eax 39 40#ifdef PIC 41# define ENTRANCE PUSH (%ebx); 42# define RETURN_END POP (%ebx); ret 43# define RETURN RETURN_END; CFI_PUSH (%ebx) 44# define PARMS 8 /* Preserve EBX. */ 45# define JMPTBL(I, B) I - B 46 47/* Load an entry in a jump table into EBX and branch to it. TABLE is a 48 jump table with relative offsets. */ 49# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 50 /* We first load PC into EBX. */ \ 51 SETUP_PIC_REG(bx); \ 52 /* Get the address of the jump table. */ \ 53 add $(TABLE - .), %ebx; \ 54 /* Get the entry and convert the relative offset to the \ 55 absolute address. */ \ 56 add (%ebx,%ecx,4), %ebx; \ 57 add %ecx, %edx; \ 58 /* We loaded the jump table and adjusted EDX. Go. */ \ 59 _CET_NOTRACK jmp *%ebx 60#else 61# define ENTRANCE 62# define RETURN_END ret 63# define RETURN RETURN_END 64# define PARMS 4 65# define JMPTBL(I, B) I 66 67/* Branch to an entry in a jump table. TABLE is a jump table with 68 absolute offsets. */ 69# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 70 add %ecx, %edx; \ 71 _CET_NOTRACK jmp *TABLE(,%ecx,4) 72#endif 73 74 .section .text.sse2,"ax",@progbits 75#if defined SHARED && IS_IN (libc) 76ENTRY (__memset_chk_sse2) 77 movl 12(%esp), %eax 78 cmpl %eax, 16(%esp) 79 jb HIDDEN_JUMPTARGET (__chk_fail) 80END (__memset_chk_sse2) 81#endif 82ENTRY (__memset_sse2) 83 ENTRANCE 84 85 movl LEN(%esp), %ecx 86 movzbl CHR(%esp), %eax 87 movb %al, %ah 88 /* Fill the whole EAX with pattern. */ 89 movl %eax, %edx 90 shl $16, %eax 91 or %edx, %eax 92 movl DEST(%esp), %edx 93 cmp $32, %ecx 94 jae L(32bytesormore) 95 96L(write_less32bytes): 97 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 98 99 100 .pushsection .rodata.sse2,"a",@progbits 101 ALIGN (2) 102L(table_less_32bytes): 103 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 104 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 105 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 106 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 107 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 108 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 109 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 110 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 111 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 112 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 113 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 114 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 115 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 116 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 117 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 118 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 119 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 120 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 121 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 122 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 123 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 124 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 125 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 126 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 127 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 128 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 129 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 130 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 131 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 132 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 133 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 134 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 135 .popsection 136 137 ALIGN (4) 138L(write_28bytes): 139 movl %eax, -28(%edx) 140L(write_24bytes): 141 movl %eax, -24(%edx) 142L(write_20bytes): 143 movl %eax, -20(%edx) 144L(write_16bytes): 145 movl %eax, -16(%edx) 146L(write_12bytes): 147 movl %eax, -12(%edx) 148L(write_8bytes): 149 movl %eax, -8(%edx) 150L(write_4bytes): 151 movl %eax, -4(%edx) 152L(write_0bytes): 153 SETRTNVAL 154 RETURN 155 156 ALIGN (4) 157L(write_29bytes): 158 movl %eax, -29(%edx) 159L(write_25bytes): 160 movl %eax, -25(%edx) 161L(write_21bytes): 162 movl %eax, -21(%edx) 163L(write_17bytes): 164 movl %eax, -17(%edx) 165L(write_13bytes): 166 movl %eax, -13(%edx) 167L(write_9bytes): 168 movl %eax, -9(%edx) 169L(write_5bytes): 170 movl %eax, -5(%edx) 171L(write_1bytes): 172 movb %al, -1(%edx) 173 SETRTNVAL 174 RETURN 175 176 ALIGN (4) 177L(write_30bytes): 178 movl %eax, -30(%edx) 179L(write_26bytes): 180 movl %eax, -26(%edx) 181L(write_22bytes): 182 movl %eax, -22(%edx) 183L(write_18bytes): 184 movl %eax, -18(%edx) 185L(write_14bytes): 186 movl %eax, -14(%edx) 187L(write_10bytes): 188 movl %eax, -10(%edx) 189L(write_6bytes): 190 movl %eax, -6(%edx) 191L(write_2bytes): 192 movw %ax, -2(%edx) 193 SETRTNVAL 194 RETURN 195 196 ALIGN (4) 197L(write_31bytes): 198 movl %eax, -31(%edx) 199L(write_27bytes): 200 movl %eax, -27(%edx) 201L(write_23bytes): 202 movl %eax, -23(%edx) 203L(write_19bytes): 204 movl %eax, -19(%edx) 205L(write_15bytes): 206 movl %eax, -15(%edx) 207L(write_11bytes): 208 movl %eax, -11(%edx) 209L(write_7bytes): 210 movl %eax, -7(%edx) 211L(write_3bytes): 212 movw %ax, -3(%edx) 213 movb %al, -1(%edx) 214 SETRTNVAL 215 RETURN 216 217 ALIGN (4) 218/* ECX > 32 and EDX is 4 byte aligned. */ 219L(32bytesormore): 220 /* Fill xmm0 with the pattern. */ 221 movd %eax, %xmm0 222 pshufd $0, %xmm0, %xmm0 223 testl $0xf, %edx 224 jz L(aligned_16) 225/* ECX > 32 and EDX is not 16 byte aligned. */ 226L(not_aligned_16): 227 movdqu %xmm0, (%edx) 228 movl %edx, %eax 229 and $-16, %edx 230 add $16, %edx 231 sub %edx, %eax 232 add %eax, %ecx 233 movd %xmm0, %eax 234 235 ALIGN (4) 236L(aligned_16): 237 cmp $128, %ecx 238 jae L(128bytesormore) 239 240L(aligned_16_less128bytes): 241 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 242 243 ALIGN (4) 244L(128bytesormore): 245#ifdef SHARED_CACHE_SIZE 246 PUSH (%ebx) 247 mov $SHARED_CACHE_SIZE, %ebx 248#else 249# ifdef PIC 250 SETUP_PIC_REG(bx) 251 add $_GLOBAL_OFFSET_TABLE_, %ebx 252 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 253# else 254 PUSH (%ebx) 255 mov __x86_shared_cache_size, %ebx 256# endif 257#endif 258 cmp %ebx, %ecx 259 jae L(128bytesormore_nt_start) 260 261 262#ifdef DATA_CACHE_SIZE 263 POP (%ebx) 264# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 265 cmp $DATA_CACHE_SIZE, %ecx 266#else 267# ifdef PIC 268# define RESTORE_EBX_STATE 269 SETUP_PIC_REG(bx) 270 add $_GLOBAL_OFFSET_TABLE_, %ebx 271 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 272# else 273 POP (%ebx) 274# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 275 cmp __x86_data_cache_size, %ecx 276# endif 277#endif 278 279 jae L(128bytes_L2_normal) 280 subl $128, %ecx 281L(128bytesormore_normal): 282 sub $128, %ecx 283 movdqa %xmm0, (%edx) 284 movdqa %xmm0, 0x10(%edx) 285 movdqa %xmm0, 0x20(%edx) 286 movdqa %xmm0, 0x30(%edx) 287 movdqa %xmm0, 0x40(%edx) 288 movdqa %xmm0, 0x50(%edx) 289 movdqa %xmm0, 0x60(%edx) 290 movdqa %xmm0, 0x70(%edx) 291 lea 128(%edx), %edx 292 jb L(128bytesless_normal) 293 294 295 sub $128, %ecx 296 movdqa %xmm0, (%edx) 297 movdqa %xmm0, 0x10(%edx) 298 movdqa %xmm0, 0x20(%edx) 299 movdqa %xmm0, 0x30(%edx) 300 movdqa %xmm0, 0x40(%edx) 301 movdqa %xmm0, 0x50(%edx) 302 movdqa %xmm0, 0x60(%edx) 303 movdqa %xmm0, 0x70(%edx) 304 lea 128(%edx), %edx 305 jae L(128bytesormore_normal) 306 307L(128bytesless_normal): 308 add $128, %ecx 309 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 310 311 ALIGN (4) 312L(128bytes_L2_normal): 313 prefetcht0 0x380(%edx) 314 prefetcht0 0x3c0(%edx) 315 sub $128, %ecx 316 movdqa %xmm0, (%edx) 317 movaps %xmm0, 0x10(%edx) 318 movaps %xmm0, 0x20(%edx) 319 movaps %xmm0, 0x30(%edx) 320 movaps %xmm0, 0x40(%edx) 321 movaps %xmm0, 0x50(%edx) 322 movaps %xmm0, 0x60(%edx) 323 movaps %xmm0, 0x70(%edx) 324 add $128, %edx 325 cmp $128, %ecx 326 jae L(128bytes_L2_normal) 327 328L(128bytesless_L2_normal): 329 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 330 331 RESTORE_EBX_STATE 332L(128bytesormore_nt_start): 333 sub %ebx, %ecx 334 ALIGN (4) 335L(128bytesormore_shared_cache_loop): 336 prefetcht0 0x3c0(%edx) 337 prefetcht0 0x380(%edx) 338 sub $0x80, %ebx 339 movdqa %xmm0, (%edx) 340 movdqa %xmm0, 0x10(%edx) 341 movdqa %xmm0, 0x20(%edx) 342 movdqa %xmm0, 0x30(%edx) 343 movdqa %xmm0, 0x40(%edx) 344 movdqa %xmm0, 0x50(%edx) 345 movdqa %xmm0, 0x60(%edx) 346 movdqa %xmm0, 0x70(%edx) 347 add $0x80, %edx 348 cmp $0x80, %ebx 349 jae L(128bytesormore_shared_cache_loop) 350 cmp $0x80, %ecx 351 jb L(shared_cache_loop_end) 352 ALIGN (4) 353L(128bytesormore_nt): 354 sub $0x80, %ecx 355 movntdq %xmm0, (%edx) 356 movntdq %xmm0, 0x10(%edx) 357 movntdq %xmm0, 0x20(%edx) 358 movntdq %xmm0, 0x30(%edx) 359 movntdq %xmm0, 0x40(%edx) 360 movntdq %xmm0, 0x50(%edx) 361 movntdq %xmm0, 0x60(%edx) 362 movntdq %xmm0, 0x70(%edx) 363 add $0x80, %edx 364 cmp $0x80, %ecx 365 jae L(128bytesormore_nt) 366 sfence 367L(shared_cache_loop_end): 368#if defined DATA_CACHE_SIZE || !defined PIC 369 POP (%ebx) 370#endif 371 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 372 373 374 .pushsection .rodata.sse2,"a",@progbits 375 ALIGN (2) 376L(table_16_128bytes): 377 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 378 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 379 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 380 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 381 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 382 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 383 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 384 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 385 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 386 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 387 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 388 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 389 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 390 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 391 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 392 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 393 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 394 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 395 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 396 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 397 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 398 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 399 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 400 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 401 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 402 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 403 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 404 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 405 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 406 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 407 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 408 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 409 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 410 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 411 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 412 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 413 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 414 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 415 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 416 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 417 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 418 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 419 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 420 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 421 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 422 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 423 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 424 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 425 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 426 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 427 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 428 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 429 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 430 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 431 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 432 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 433 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 434 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 435 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 436 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 437 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 438 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 439 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 440 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 441 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 442 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 443 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 444 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 445 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 446 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 447 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 448 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 449 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 450 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 451 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 452 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 453 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 454 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 470 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 471 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 472 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 473 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 474 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 475 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 476 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 477 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 478 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 479 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 480 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 481 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 482 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 505 .popsection 506 507 ALIGN (4) 508L(aligned_16_112bytes): 509 movdqa %xmm0, -112(%edx) 510L(aligned_16_96bytes): 511 movdqa %xmm0, -96(%edx) 512L(aligned_16_80bytes): 513 movdqa %xmm0, -80(%edx) 514L(aligned_16_64bytes): 515 movdqa %xmm0, -64(%edx) 516L(aligned_16_48bytes): 517 movdqa %xmm0, -48(%edx) 518L(aligned_16_32bytes): 519 movdqa %xmm0, -32(%edx) 520L(aligned_16_16bytes): 521 movdqa %xmm0, -16(%edx) 522L(aligned_16_0bytes): 523 SETRTNVAL 524 RETURN 525 526 ALIGN (4) 527L(aligned_16_113bytes): 528 movdqa %xmm0, -113(%edx) 529L(aligned_16_97bytes): 530 movdqa %xmm0, -97(%edx) 531L(aligned_16_81bytes): 532 movdqa %xmm0, -81(%edx) 533L(aligned_16_65bytes): 534 movdqa %xmm0, -65(%edx) 535L(aligned_16_49bytes): 536 movdqa %xmm0, -49(%edx) 537L(aligned_16_33bytes): 538 movdqa %xmm0, -33(%edx) 539L(aligned_16_17bytes): 540 movdqa %xmm0, -17(%edx) 541L(aligned_16_1bytes): 542 movb %al, -1(%edx) 543 SETRTNVAL 544 RETURN 545 546 ALIGN (4) 547L(aligned_16_114bytes): 548 movdqa %xmm0, -114(%edx) 549L(aligned_16_98bytes): 550 movdqa %xmm0, -98(%edx) 551L(aligned_16_82bytes): 552 movdqa %xmm0, -82(%edx) 553L(aligned_16_66bytes): 554 movdqa %xmm0, -66(%edx) 555L(aligned_16_50bytes): 556 movdqa %xmm0, -50(%edx) 557L(aligned_16_34bytes): 558 movdqa %xmm0, -34(%edx) 559L(aligned_16_18bytes): 560 movdqa %xmm0, -18(%edx) 561L(aligned_16_2bytes): 562 movw %ax, -2(%edx) 563 SETRTNVAL 564 RETURN 565 566 ALIGN (4) 567L(aligned_16_115bytes): 568 movdqa %xmm0, -115(%edx) 569L(aligned_16_99bytes): 570 movdqa %xmm0, -99(%edx) 571L(aligned_16_83bytes): 572 movdqa %xmm0, -83(%edx) 573L(aligned_16_67bytes): 574 movdqa %xmm0, -67(%edx) 575L(aligned_16_51bytes): 576 movdqa %xmm0, -51(%edx) 577L(aligned_16_35bytes): 578 movdqa %xmm0, -35(%edx) 579L(aligned_16_19bytes): 580 movdqa %xmm0, -19(%edx) 581L(aligned_16_3bytes): 582 movw %ax, -3(%edx) 583 movb %al, -1(%edx) 584 SETRTNVAL 585 RETURN 586 587 ALIGN (4) 588L(aligned_16_116bytes): 589 movdqa %xmm0, -116(%edx) 590L(aligned_16_100bytes): 591 movdqa %xmm0, -100(%edx) 592L(aligned_16_84bytes): 593 movdqa %xmm0, -84(%edx) 594L(aligned_16_68bytes): 595 movdqa %xmm0, -68(%edx) 596L(aligned_16_52bytes): 597 movdqa %xmm0, -52(%edx) 598L(aligned_16_36bytes): 599 movdqa %xmm0, -36(%edx) 600L(aligned_16_20bytes): 601 movdqa %xmm0, -20(%edx) 602L(aligned_16_4bytes): 603 movl %eax, -4(%edx) 604 SETRTNVAL 605 RETURN 606 607 ALIGN (4) 608L(aligned_16_117bytes): 609 movdqa %xmm0, -117(%edx) 610L(aligned_16_101bytes): 611 movdqa %xmm0, -101(%edx) 612L(aligned_16_85bytes): 613 movdqa %xmm0, -85(%edx) 614L(aligned_16_69bytes): 615 movdqa %xmm0, -69(%edx) 616L(aligned_16_53bytes): 617 movdqa %xmm0, -53(%edx) 618L(aligned_16_37bytes): 619 movdqa %xmm0, -37(%edx) 620L(aligned_16_21bytes): 621 movdqa %xmm0, -21(%edx) 622L(aligned_16_5bytes): 623 movl %eax, -5(%edx) 624 movb %al, -1(%edx) 625 SETRTNVAL 626 RETURN 627 628 ALIGN (4) 629L(aligned_16_118bytes): 630 movdqa %xmm0, -118(%edx) 631L(aligned_16_102bytes): 632 movdqa %xmm0, -102(%edx) 633L(aligned_16_86bytes): 634 movdqa %xmm0, -86(%edx) 635L(aligned_16_70bytes): 636 movdqa %xmm0, -70(%edx) 637L(aligned_16_54bytes): 638 movdqa %xmm0, -54(%edx) 639L(aligned_16_38bytes): 640 movdqa %xmm0, -38(%edx) 641L(aligned_16_22bytes): 642 movdqa %xmm0, -22(%edx) 643L(aligned_16_6bytes): 644 movl %eax, -6(%edx) 645 movw %ax, -2(%edx) 646 SETRTNVAL 647 RETURN 648 649 ALIGN (4) 650L(aligned_16_119bytes): 651 movdqa %xmm0, -119(%edx) 652L(aligned_16_103bytes): 653 movdqa %xmm0, -103(%edx) 654L(aligned_16_87bytes): 655 movdqa %xmm0, -87(%edx) 656L(aligned_16_71bytes): 657 movdqa %xmm0, -71(%edx) 658L(aligned_16_55bytes): 659 movdqa %xmm0, -55(%edx) 660L(aligned_16_39bytes): 661 movdqa %xmm0, -39(%edx) 662L(aligned_16_23bytes): 663 movdqa %xmm0, -23(%edx) 664L(aligned_16_7bytes): 665 movl %eax, -7(%edx) 666 movw %ax, -3(%edx) 667 movb %al, -1(%edx) 668 SETRTNVAL 669 RETURN 670 671 ALIGN (4) 672L(aligned_16_120bytes): 673 movdqa %xmm0, -120(%edx) 674L(aligned_16_104bytes): 675 movdqa %xmm0, -104(%edx) 676L(aligned_16_88bytes): 677 movdqa %xmm0, -88(%edx) 678L(aligned_16_72bytes): 679 movdqa %xmm0, -72(%edx) 680L(aligned_16_56bytes): 681 movdqa %xmm0, -56(%edx) 682L(aligned_16_40bytes): 683 movdqa %xmm0, -40(%edx) 684L(aligned_16_24bytes): 685 movdqa %xmm0, -24(%edx) 686L(aligned_16_8bytes): 687 movq %xmm0, -8(%edx) 688 SETRTNVAL 689 RETURN 690 691 ALIGN (4) 692L(aligned_16_121bytes): 693 movdqa %xmm0, -121(%edx) 694L(aligned_16_105bytes): 695 movdqa %xmm0, -105(%edx) 696L(aligned_16_89bytes): 697 movdqa %xmm0, -89(%edx) 698L(aligned_16_73bytes): 699 movdqa %xmm0, -73(%edx) 700L(aligned_16_57bytes): 701 movdqa %xmm0, -57(%edx) 702L(aligned_16_41bytes): 703 movdqa %xmm0, -41(%edx) 704L(aligned_16_25bytes): 705 movdqa %xmm0, -25(%edx) 706L(aligned_16_9bytes): 707 movq %xmm0, -9(%edx) 708 movb %al, -1(%edx) 709 SETRTNVAL 710 RETURN 711 712 ALIGN (4) 713L(aligned_16_122bytes): 714 movdqa %xmm0, -122(%edx) 715L(aligned_16_106bytes): 716 movdqa %xmm0, -106(%edx) 717L(aligned_16_90bytes): 718 movdqa %xmm0, -90(%edx) 719L(aligned_16_74bytes): 720 movdqa %xmm0, -74(%edx) 721L(aligned_16_58bytes): 722 movdqa %xmm0, -58(%edx) 723L(aligned_16_42bytes): 724 movdqa %xmm0, -42(%edx) 725L(aligned_16_26bytes): 726 movdqa %xmm0, -26(%edx) 727L(aligned_16_10bytes): 728 movq %xmm0, -10(%edx) 729 movw %ax, -2(%edx) 730 SETRTNVAL 731 RETURN 732 733 ALIGN (4) 734L(aligned_16_123bytes): 735 movdqa %xmm0, -123(%edx) 736L(aligned_16_107bytes): 737 movdqa %xmm0, -107(%edx) 738L(aligned_16_91bytes): 739 movdqa %xmm0, -91(%edx) 740L(aligned_16_75bytes): 741 movdqa %xmm0, -75(%edx) 742L(aligned_16_59bytes): 743 movdqa %xmm0, -59(%edx) 744L(aligned_16_43bytes): 745 movdqa %xmm0, -43(%edx) 746L(aligned_16_27bytes): 747 movdqa %xmm0, -27(%edx) 748L(aligned_16_11bytes): 749 movq %xmm0, -11(%edx) 750 movw %ax, -3(%edx) 751 movb %al, -1(%edx) 752 SETRTNVAL 753 RETURN 754 755 ALIGN (4) 756L(aligned_16_124bytes): 757 movdqa %xmm0, -124(%edx) 758L(aligned_16_108bytes): 759 movdqa %xmm0, -108(%edx) 760L(aligned_16_92bytes): 761 movdqa %xmm0, -92(%edx) 762L(aligned_16_76bytes): 763 movdqa %xmm0, -76(%edx) 764L(aligned_16_60bytes): 765 movdqa %xmm0, -60(%edx) 766L(aligned_16_44bytes): 767 movdqa %xmm0, -44(%edx) 768L(aligned_16_28bytes): 769 movdqa %xmm0, -28(%edx) 770L(aligned_16_12bytes): 771 movq %xmm0, -12(%edx) 772 movl %eax, -4(%edx) 773 SETRTNVAL 774 RETURN 775 776 ALIGN (4) 777L(aligned_16_125bytes): 778 movdqa %xmm0, -125(%edx) 779L(aligned_16_109bytes): 780 movdqa %xmm0, -109(%edx) 781L(aligned_16_93bytes): 782 movdqa %xmm0, -93(%edx) 783L(aligned_16_77bytes): 784 movdqa %xmm0, -77(%edx) 785L(aligned_16_61bytes): 786 movdqa %xmm0, -61(%edx) 787L(aligned_16_45bytes): 788 movdqa %xmm0, -45(%edx) 789L(aligned_16_29bytes): 790 movdqa %xmm0, -29(%edx) 791L(aligned_16_13bytes): 792 movq %xmm0, -13(%edx) 793 movl %eax, -5(%edx) 794 movb %al, -1(%edx) 795 SETRTNVAL 796 RETURN 797 798 ALIGN (4) 799L(aligned_16_126bytes): 800 movdqa %xmm0, -126(%edx) 801L(aligned_16_110bytes): 802 movdqa %xmm0, -110(%edx) 803L(aligned_16_94bytes): 804 movdqa %xmm0, -94(%edx) 805L(aligned_16_78bytes): 806 movdqa %xmm0, -78(%edx) 807L(aligned_16_62bytes): 808 movdqa %xmm0, -62(%edx) 809L(aligned_16_46bytes): 810 movdqa %xmm0, -46(%edx) 811L(aligned_16_30bytes): 812 movdqa %xmm0, -30(%edx) 813L(aligned_16_14bytes): 814 movq %xmm0, -14(%edx) 815 movl %eax, -6(%edx) 816 movw %ax, -2(%edx) 817 SETRTNVAL 818 RETURN 819 820 ALIGN (4) 821L(aligned_16_127bytes): 822 movdqa %xmm0, -127(%edx) 823L(aligned_16_111bytes): 824 movdqa %xmm0, -111(%edx) 825L(aligned_16_95bytes): 826 movdqa %xmm0, -95(%edx) 827L(aligned_16_79bytes): 828 movdqa %xmm0, -79(%edx) 829L(aligned_16_63bytes): 830 movdqa %xmm0, -63(%edx) 831L(aligned_16_47bytes): 832 movdqa %xmm0, -47(%edx) 833L(aligned_16_31bytes): 834 movdqa %xmm0, -31(%edx) 835L(aligned_16_15bytes): 836 movq %xmm0, -15(%edx) 837 movl %eax, -7(%edx) 838 movw %ax, -3(%edx) 839 movb %al, -1(%edx) 840 SETRTNVAL 841 RETURN_END 842 843END (__memset_sse2) 844 845#endif 846