1/* memset with SSE2 and REP string. 2 Copyright (C) 2010-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21#include <sysdep.h> 22#include "asm-syntax.h" 23 24#define CFI_PUSH(REG) \ 25 cfi_adjust_cfa_offset (4); \ 26 cfi_rel_offset (REG, 0) 27 28#define CFI_POP(REG) \ 29 cfi_adjust_cfa_offset (-4); \ 30 cfi_restore (REG) 31 32#define PUSH(REG) pushl REG; CFI_PUSH (REG) 33#define POP(REG) popl REG; CFI_POP (REG) 34 35#define DEST PARMS 36#define CHR DEST+4 37#define LEN CHR+4 38#define SETRTNVAL movl DEST(%esp), %eax 39 40#ifdef PIC 41# define ENTRANCE PUSH (%ebx); 42# define RETURN_END POP (%ebx); ret 43# define RETURN RETURN_END; CFI_PUSH (%ebx) 44# define PARMS 8 /* Preserve EBX. */ 45# define JMPTBL(I, B) I - B 46 47/* Load an entry in a jump table into EBX and branch to it. TABLE is a 48 jump table with relative offsets. */ 49# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 50 /* We first load PC into EBX. */ \ 51 SETUP_PIC_REG(bx); \ 52 /* Get the address of the jump table. */ \ 53 add $(TABLE - .), %ebx; \ 54 /* Get the entry and convert the relative offset to the \ 55 absolute address. */ \ 56 add (%ebx,%ecx,4), %ebx; \ 57 add %ecx, %edx; \ 58 /* We loaded the jump table and adjusted EDX. Go. */ \ 59 _CET_NOTRACK jmp *%ebx 60#else 61# define ENTRANCE 62# define RETURN_END ret 63# define RETURN RETURN_END 64# define PARMS 4 65# define JMPTBL(I, B) I 66 67/* Branch to an entry in a jump table. TABLE is a jump table with 68 absolute offsets. */ 69# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 70 add %ecx, %edx; \ 71 _CET_NOTRACK jmp *TABLE(,%ecx,4) 72#endif 73 74 .section .text.sse2,"ax",@progbits 75#if defined SHARED && IS_IN (libc) 76ENTRY (__memset_chk_sse2_rep) 77 movl 12(%esp), %eax 78 cmpl %eax, 16(%esp) 79 jb HIDDEN_JUMPTARGET (__chk_fail) 80END (__memset_chk_sse2_rep) 81#endif 82ENTRY (__memset_sse2_rep) 83 ENTRANCE 84 85 movl LEN(%esp), %ecx 86 movzbl CHR(%esp), %eax 87 movb %al, %ah 88 /* Fill the whole EAX with pattern. */ 89 movl %eax, %edx 90 shl $16, %eax 91 or %edx, %eax 92 movl DEST(%esp), %edx 93 cmp $32, %ecx 94 jae L(32bytesormore) 95 96L(write_less32bytes): 97 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 98 99 100 .pushsection .rodata.sse2,"a",@progbits 101 ALIGN (2) 102L(table_less_32bytes): 103 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 104 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 105 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 106 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 107 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 108 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 109 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 110 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 111 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 112 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 113 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 114 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 115 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 116 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 117 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 118 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 119 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 120 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 121 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 122 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 123 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 124 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 125 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 126 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 127 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 128 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 129 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 130 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 131 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 132 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 133 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 134 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 135 .popsection 136 137 ALIGN (4) 138L(write_28bytes): 139 movl %eax, -28(%edx) 140L(write_24bytes): 141 movl %eax, -24(%edx) 142L(write_20bytes): 143 movl %eax, -20(%edx) 144L(write_16bytes): 145 movl %eax, -16(%edx) 146L(write_12bytes): 147 movl %eax, -12(%edx) 148L(write_8bytes): 149 movl %eax, -8(%edx) 150L(write_4bytes): 151 movl %eax, -4(%edx) 152L(write_0bytes): 153 SETRTNVAL 154 RETURN 155 156 ALIGN (4) 157L(write_29bytes): 158 movl %eax, -29(%edx) 159L(write_25bytes): 160 movl %eax, -25(%edx) 161L(write_21bytes): 162 movl %eax, -21(%edx) 163L(write_17bytes): 164 movl %eax, -17(%edx) 165L(write_13bytes): 166 movl %eax, -13(%edx) 167L(write_9bytes): 168 movl %eax, -9(%edx) 169L(write_5bytes): 170 movl %eax, -5(%edx) 171L(write_1bytes): 172 movb %al, -1(%edx) 173 SETRTNVAL 174 RETURN 175 176 ALIGN (4) 177L(write_30bytes): 178 movl %eax, -30(%edx) 179L(write_26bytes): 180 movl %eax, -26(%edx) 181L(write_22bytes): 182 movl %eax, -22(%edx) 183L(write_18bytes): 184 movl %eax, -18(%edx) 185L(write_14bytes): 186 movl %eax, -14(%edx) 187L(write_10bytes): 188 movl %eax, -10(%edx) 189L(write_6bytes): 190 movl %eax, -6(%edx) 191L(write_2bytes): 192 movw %ax, -2(%edx) 193 SETRTNVAL 194 RETURN 195 196 ALIGN (4) 197L(write_31bytes): 198 movl %eax, -31(%edx) 199L(write_27bytes): 200 movl %eax, -27(%edx) 201L(write_23bytes): 202 movl %eax, -23(%edx) 203L(write_19bytes): 204 movl %eax, -19(%edx) 205L(write_15bytes): 206 movl %eax, -15(%edx) 207L(write_11bytes): 208 movl %eax, -11(%edx) 209L(write_7bytes): 210 movl %eax, -7(%edx) 211L(write_3bytes): 212 movw %ax, -3(%edx) 213 movb %al, -1(%edx) 214 SETRTNVAL 215 RETURN 216 217 ALIGN (4) 218/* ECX > 32 and EDX is 4 byte aligned. */ 219L(32bytesormore): 220 /* Fill xmm0 with the pattern. */ 221 movd %eax, %xmm0 222 pshufd $0, %xmm0, %xmm0 223 testl $0xf, %edx 224 jz L(aligned_16) 225/* ECX > 32 and EDX is not 16 byte aligned. */ 226L(not_aligned_16): 227 movdqu %xmm0, (%edx) 228 movl %edx, %eax 229 and $-16, %edx 230 add $16, %edx 231 sub %edx, %eax 232 add %eax, %ecx 233 movd %xmm0, %eax 234 235 ALIGN (4) 236L(aligned_16): 237 cmp $128, %ecx 238 jae L(128bytesormore) 239 240L(aligned_16_less128bytes): 241 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 242 243 ALIGN (4) 244L(128bytesormore): 245 PUSH (%edi) 246#ifdef DATA_CACHE_SIZE 247 PUSH (%ebx) 248 mov $DATA_CACHE_SIZE, %ebx 249#else 250# ifdef PIC 251 SETUP_PIC_REG(bx) 252 add $_GLOBAL_OFFSET_TABLE_, %ebx 253 mov __x86_data_cache_size@GOTOFF(%ebx), %ebx 254# else 255 PUSH (%ebx) 256 mov __x86_data_cache_size, %ebx 257# endif 258#endif 259 mov %ebx, %edi 260 shr $4, %ebx 261 sub %ebx, %edi 262#if defined DATA_CACHE_SIZE || !defined PIC 263 POP (%ebx) 264#endif 265/* 266 * When data size approximate the end of L1 cache, 267 * fast string will prefetch and combine data efficiently. 268 */ 269 cmp %edi, %ecx 270 jae L(128bytesormore_endof_L1) 271 subl $128, %ecx 272L(128bytesormore_normal): 273 sub $128, %ecx 274 movdqa %xmm0, (%edx) 275 movdqa %xmm0, 0x10(%edx) 276 movdqa %xmm0, 0x20(%edx) 277 movdqa %xmm0, 0x30(%edx) 278 movdqa %xmm0, 0x40(%edx) 279 movdqa %xmm0, 0x50(%edx) 280 movdqa %xmm0, 0x60(%edx) 281 movdqa %xmm0, 0x70(%edx) 282 lea 128(%edx), %edx 283 jb L(128bytesless_normal) 284 285 286 sub $128, %ecx 287 movdqa %xmm0, (%edx) 288 movdqa %xmm0, 0x10(%edx) 289 movdqa %xmm0, 0x20(%edx) 290 movdqa %xmm0, 0x30(%edx) 291 movdqa %xmm0, 0x40(%edx) 292 movdqa %xmm0, 0x50(%edx) 293 movdqa %xmm0, 0x60(%edx) 294 movdqa %xmm0, 0x70(%edx) 295 lea 128(%edx), %edx 296 jae L(128bytesormore_normal) 297 298L(128bytesless_normal): 299 POP (%edi) 300 add $128, %ecx 301 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 302 303 CFI_PUSH (%edi) 304 ALIGN (4) 305L(128bytesormore_endof_L1): 306 mov %edx, %edi 307 mov %ecx, %edx 308 shr $2, %ecx 309 and $3, %edx 310 rep stosl 311 jz L(copy_page_by_rep_exit) 312 cmp $2, %edx 313 jb L(copy_page_by_rep_left_1) 314 movw %ax, (%edi) 315 add $2, %edi 316 sub $2, %edx 317 jz L(copy_page_by_rep_exit) 318L(copy_page_by_rep_left_1): 319 movb %al, (%edi) 320L(copy_page_by_rep_exit): 321 POP (%edi) 322 SETRTNVAL 323 RETURN 324 325 .pushsection .rodata.sse2,"a",@progbits 326 ALIGN (2) 327L(table_16_128bytes): 328 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 329 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 330 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 331 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 332 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 333 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 334 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 335 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 336 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 337 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 338 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 339 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 340 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 341 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 342 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 343 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 344 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 345 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 346 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 347 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 348 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 349 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 350 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 351 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 352 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 353 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 354 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 355 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 356 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 357 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 358 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 359 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 360 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 361 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 362 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 363 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 364 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 365 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 366 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 367 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 368 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 369 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 370 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 371 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 372 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 373 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 374 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 375 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 376 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 377 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 378 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 379 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 380 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 381 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 382 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 383 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 384 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 385 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 386 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 387 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 388 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 389 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 390 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 391 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 392 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 393 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 394 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 395 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 396 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 397 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 398 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 399 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 400 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 401 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 402 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 403 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 404 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 405 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 406 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 407 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 408 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 409 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 410 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 411 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 412 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 413 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 414 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 415 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 416 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 417 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 418 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 419 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 420 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 421 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 422 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 423 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 424 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 425 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 426 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 427 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 428 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 429 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 430 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 431 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 432 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 433 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 434 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 435 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 436 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 437 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 438 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 439 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 440 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 441 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 442 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 443 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 444 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 445 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 446 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 447 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 448 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 449 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 450 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 451 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 452 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 453 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 454 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 456 .popsection 457 458 ALIGN (4) 459L(aligned_16_112bytes): 460 movdqa %xmm0, -112(%edx) 461L(aligned_16_96bytes): 462 movdqa %xmm0, -96(%edx) 463L(aligned_16_80bytes): 464 movdqa %xmm0, -80(%edx) 465L(aligned_16_64bytes): 466 movdqa %xmm0, -64(%edx) 467L(aligned_16_48bytes): 468 movdqa %xmm0, -48(%edx) 469L(aligned_16_32bytes): 470 movdqa %xmm0, -32(%edx) 471L(aligned_16_16bytes): 472 movdqa %xmm0, -16(%edx) 473L(aligned_16_0bytes): 474 SETRTNVAL 475 RETURN 476 477 ALIGN (4) 478L(aligned_16_113bytes): 479 movdqa %xmm0, -113(%edx) 480L(aligned_16_97bytes): 481 movdqa %xmm0, -97(%edx) 482L(aligned_16_81bytes): 483 movdqa %xmm0, -81(%edx) 484L(aligned_16_65bytes): 485 movdqa %xmm0, -65(%edx) 486L(aligned_16_49bytes): 487 movdqa %xmm0, -49(%edx) 488L(aligned_16_33bytes): 489 movdqa %xmm0, -33(%edx) 490L(aligned_16_17bytes): 491 movdqa %xmm0, -17(%edx) 492L(aligned_16_1bytes): 493 movb %al, -1(%edx) 494 SETRTNVAL 495 RETURN 496 497 ALIGN (4) 498L(aligned_16_114bytes): 499 movdqa %xmm0, -114(%edx) 500L(aligned_16_98bytes): 501 movdqa %xmm0, -98(%edx) 502L(aligned_16_82bytes): 503 movdqa %xmm0, -82(%edx) 504L(aligned_16_66bytes): 505 movdqa %xmm0, -66(%edx) 506L(aligned_16_50bytes): 507 movdqa %xmm0, -50(%edx) 508L(aligned_16_34bytes): 509 movdqa %xmm0, -34(%edx) 510L(aligned_16_18bytes): 511 movdqa %xmm0, -18(%edx) 512L(aligned_16_2bytes): 513 movw %ax, -2(%edx) 514 SETRTNVAL 515 RETURN 516 517 ALIGN (4) 518L(aligned_16_115bytes): 519 movdqa %xmm0, -115(%edx) 520L(aligned_16_99bytes): 521 movdqa %xmm0, -99(%edx) 522L(aligned_16_83bytes): 523 movdqa %xmm0, -83(%edx) 524L(aligned_16_67bytes): 525 movdqa %xmm0, -67(%edx) 526L(aligned_16_51bytes): 527 movdqa %xmm0, -51(%edx) 528L(aligned_16_35bytes): 529 movdqa %xmm0, -35(%edx) 530L(aligned_16_19bytes): 531 movdqa %xmm0, -19(%edx) 532L(aligned_16_3bytes): 533 movw %ax, -3(%edx) 534 movb %al, -1(%edx) 535 SETRTNVAL 536 RETURN 537 538 ALIGN (4) 539L(aligned_16_116bytes): 540 movdqa %xmm0, -116(%edx) 541L(aligned_16_100bytes): 542 movdqa %xmm0, -100(%edx) 543L(aligned_16_84bytes): 544 movdqa %xmm0, -84(%edx) 545L(aligned_16_68bytes): 546 movdqa %xmm0, -68(%edx) 547L(aligned_16_52bytes): 548 movdqa %xmm0, -52(%edx) 549L(aligned_16_36bytes): 550 movdqa %xmm0, -36(%edx) 551L(aligned_16_20bytes): 552 movdqa %xmm0, -20(%edx) 553L(aligned_16_4bytes): 554 movl %eax, -4(%edx) 555 SETRTNVAL 556 RETURN 557 558 ALIGN (4) 559L(aligned_16_117bytes): 560 movdqa %xmm0, -117(%edx) 561L(aligned_16_101bytes): 562 movdqa %xmm0, -101(%edx) 563L(aligned_16_85bytes): 564 movdqa %xmm0, -85(%edx) 565L(aligned_16_69bytes): 566 movdqa %xmm0, -69(%edx) 567L(aligned_16_53bytes): 568 movdqa %xmm0, -53(%edx) 569L(aligned_16_37bytes): 570 movdqa %xmm0, -37(%edx) 571L(aligned_16_21bytes): 572 movdqa %xmm0, -21(%edx) 573L(aligned_16_5bytes): 574 movl %eax, -5(%edx) 575 movb %al, -1(%edx) 576 SETRTNVAL 577 RETURN 578 579 ALIGN (4) 580L(aligned_16_118bytes): 581 movdqa %xmm0, -118(%edx) 582L(aligned_16_102bytes): 583 movdqa %xmm0, -102(%edx) 584L(aligned_16_86bytes): 585 movdqa %xmm0, -86(%edx) 586L(aligned_16_70bytes): 587 movdqa %xmm0, -70(%edx) 588L(aligned_16_54bytes): 589 movdqa %xmm0, -54(%edx) 590L(aligned_16_38bytes): 591 movdqa %xmm0, -38(%edx) 592L(aligned_16_22bytes): 593 movdqa %xmm0, -22(%edx) 594L(aligned_16_6bytes): 595 movl %eax, -6(%edx) 596 movw %ax, -2(%edx) 597 SETRTNVAL 598 RETURN 599 600 ALIGN (4) 601L(aligned_16_119bytes): 602 movdqa %xmm0, -119(%edx) 603L(aligned_16_103bytes): 604 movdqa %xmm0, -103(%edx) 605L(aligned_16_87bytes): 606 movdqa %xmm0, -87(%edx) 607L(aligned_16_71bytes): 608 movdqa %xmm0, -71(%edx) 609L(aligned_16_55bytes): 610 movdqa %xmm0, -55(%edx) 611L(aligned_16_39bytes): 612 movdqa %xmm0, -39(%edx) 613L(aligned_16_23bytes): 614 movdqa %xmm0, -23(%edx) 615L(aligned_16_7bytes): 616 movl %eax, -7(%edx) 617 movw %ax, -3(%edx) 618 movb %al, -1(%edx) 619 SETRTNVAL 620 RETURN 621 622 ALIGN (4) 623L(aligned_16_120bytes): 624 movdqa %xmm0, -120(%edx) 625L(aligned_16_104bytes): 626 movdqa %xmm0, -104(%edx) 627L(aligned_16_88bytes): 628 movdqa %xmm0, -88(%edx) 629L(aligned_16_72bytes): 630 movdqa %xmm0, -72(%edx) 631L(aligned_16_56bytes): 632 movdqa %xmm0, -56(%edx) 633L(aligned_16_40bytes): 634 movdqa %xmm0, -40(%edx) 635L(aligned_16_24bytes): 636 movdqa %xmm0, -24(%edx) 637L(aligned_16_8bytes): 638 movq %xmm0, -8(%edx) 639 SETRTNVAL 640 RETURN 641 642 ALIGN (4) 643L(aligned_16_121bytes): 644 movdqa %xmm0, -121(%edx) 645L(aligned_16_105bytes): 646 movdqa %xmm0, -105(%edx) 647L(aligned_16_89bytes): 648 movdqa %xmm0, -89(%edx) 649L(aligned_16_73bytes): 650 movdqa %xmm0, -73(%edx) 651L(aligned_16_57bytes): 652 movdqa %xmm0, -57(%edx) 653L(aligned_16_41bytes): 654 movdqa %xmm0, -41(%edx) 655L(aligned_16_25bytes): 656 movdqa %xmm0, -25(%edx) 657L(aligned_16_9bytes): 658 movq %xmm0, -9(%edx) 659 movb %al, -1(%edx) 660 SETRTNVAL 661 RETURN 662 663 ALIGN (4) 664L(aligned_16_122bytes): 665 movdqa %xmm0, -122(%edx) 666L(aligned_16_106bytes): 667 movdqa %xmm0, -106(%edx) 668L(aligned_16_90bytes): 669 movdqa %xmm0, -90(%edx) 670L(aligned_16_74bytes): 671 movdqa %xmm0, -74(%edx) 672L(aligned_16_58bytes): 673 movdqa %xmm0, -58(%edx) 674L(aligned_16_42bytes): 675 movdqa %xmm0, -42(%edx) 676L(aligned_16_26bytes): 677 movdqa %xmm0, -26(%edx) 678L(aligned_16_10bytes): 679 movq %xmm0, -10(%edx) 680 movw %ax, -2(%edx) 681 SETRTNVAL 682 RETURN 683 684 ALIGN (4) 685L(aligned_16_123bytes): 686 movdqa %xmm0, -123(%edx) 687L(aligned_16_107bytes): 688 movdqa %xmm0, -107(%edx) 689L(aligned_16_91bytes): 690 movdqa %xmm0, -91(%edx) 691L(aligned_16_75bytes): 692 movdqa %xmm0, -75(%edx) 693L(aligned_16_59bytes): 694 movdqa %xmm0, -59(%edx) 695L(aligned_16_43bytes): 696 movdqa %xmm0, -43(%edx) 697L(aligned_16_27bytes): 698 movdqa %xmm0, -27(%edx) 699L(aligned_16_11bytes): 700 movq %xmm0, -11(%edx) 701 movw %ax, -3(%edx) 702 movb %al, -1(%edx) 703 SETRTNVAL 704 RETURN 705 706 ALIGN (4) 707L(aligned_16_124bytes): 708 movdqa %xmm0, -124(%edx) 709L(aligned_16_108bytes): 710 movdqa %xmm0, -108(%edx) 711L(aligned_16_92bytes): 712 movdqa %xmm0, -92(%edx) 713L(aligned_16_76bytes): 714 movdqa %xmm0, -76(%edx) 715L(aligned_16_60bytes): 716 movdqa %xmm0, -60(%edx) 717L(aligned_16_44bytes): 718 movdqa %xmm0, -44(%edx) 719L(aligned_16_28bytes): 720 movdqa %xmm0, -28(%edx) 721L(aligned_16_12bytes): 722 movq %xmm0, -12(%edx) 723 movl %eax, -4(%edx) 724 SETRTNVAL 725 RETURN 726 727 ALIGN (4) 728L(aligned_16_125bytes): 729 movdqa %xmm0, -125(%edx) 730L(aligned_16_109bytes): 731 movdqa %xmm0, -109(%edx) 732L(aligned_16_93bytes): 733 movdqa %xmm0, -93(%edx) 734L(aligned_16_77bytes): 735 movdqa %xmm0, -77(%edx) 736L(aligned_16_61bytes): 737 movdqa %xmm0, -61(%edx) 738L(aligned_16_45bytes): 739 movdqa %xmm0, -45(%edx) 740L(aligned_16_29bytes): 741 movdqa %xmm0, -29(%edx) 742L(aligned_16_13bytes): 743 movq %xmm0, -13(%edx) 744 movl %eax, -5(%edx) 745 movb %al, -1(%edx) 746 SETRTNVAL 747 RETURN 748 749 ALIGN (4) 750L(aligned_16_126bytes): 751 movdqa %xmm0, -126(%edx) 752L(aligned_16_110bytes): 753 movdqa %xmm0, -110(%edx) 754L(aligned_16_94bytes): 755 movdqa %xmm0, -94(%edx) 756L(aligned_16_78bytes): 757 movdqa %xmm0, -78(%edx) 758L(aligned_16_62bytes): 759 movdqa %xmm0, -62(%edx) 760L(aligned_16_46bytes): 761 movdqa %xmm0, -46(%edx) 762L(aligned_16_30bytes): 763 movdqa %xmm0, -30(%edx) 764L(aligned_16_14bytes): 765 movq %xmm0, -14(%edx) 766 movl %eax, -6(%edx) 767 movw %ax, -2(%edx) 768 SETRTNVAL 769 RETURN 770 771 ALIGN (4) 772L(aligned_16_127bytes): 773 movdqa %xmm0, -127(%edx) 774L(aligned_16_111bytes): 775 movdqa %xmm0, -111(%edx) 776L(aligned_16_95bytes): 777 movdqa %xmm0, -95(%edx) 778L(aligned_16_79bytes): 779 movdqa %xmm0, -79(%edx) 780L(aligned_16_63bytes): 781 movdqa %xmm0, -63(%edx) 782L(aligned_16_47bytes): 783 movdqa %xmm0, -47(%edx) 784L(aligned_16_31bytes): 785 movdqa %xmm0, -31(%edx) 786L(aligned_16_15bytes): 787 movq %xmm0, -15(%edx) 788 movl %eax, -7(%edx) 789 movw %ax, -3(%edx) 790 movb %al, -1(%edx) 791 SETRTNVAL 792 RETURN_END 793 794END (__memset_sse2_rep) 795 796#endif 797