1 /* PLT trampolines. x86-64 version. 2 Copyright (C) 2009-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 #ifndef SECTION 20 # define SECTION(p) p 21 #endif 22 23 .section SECTION(.text),"ax",@progbits 24 #ifdef _dl_runtime_resolve 25 26 # undef REGISTER_SAVE_AREA 27 # undef LOCAL_STORAGE_AREA 28 # undef BASE 29 30 # if (STATE_SAVE_ALIGNMENT % 16) != 0 31 # error STATE_SAVE_ALIGNMENT must be multples of 16 32 # endif 33 34 # if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 35 # error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT 36 # endif 37 38 # if DL_RUNTIME_RESOLVE_REALIGN_STACK 39 /* Local stack area before jumping to function address: RBX. */ 40 # define LOCAL_STORAGE_AREA 8 41 # define BASE rbx 42 # ifdef USE_FXSAVE 43 /* Use fxsave to save XMM registers. */ 44 # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) 45 # if (REGISTER_SAVE_AREA % 16) != 0 46 # error REGISTER_SAVE_AREA must be multples of 16 47 # endif 48 # endif 49 # else 50 # ifndef USE_FXSAVE 51 # error USE_FXSAVE must be defined 52 # endif 53 /* Use fxsave to save XMM registers. */ 54 # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) 55 /* Local stack area before jumping to function address: All saved 56 registers. */ 57 # define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA 58 # define BASE rsp 59 # if (REGISTER_SAVE_AREA % 16) != 8 60 # error REGISTER_SAVE_AREA must be odd multples of 8 61 # endif 62 # endif 63 64 .globl _dl_runtime_resolve 65 .hidden _dl_runtime_resolve 66 .type _dl_runtime_resolve, @function 67 .align 16 68 cfi_startproc 69 _dl_runtime_resolve: 70 cfi_adjust_cfa_offset(16) # Incorporate PLT 71 _CET_ENDBR 72 # if DL_RUNTIME_RESOLVE_REALIGN_STACK 73 # if LOCAL_STORAGE_AREA != 8 74 # error LOCAL_STORAGE_AREA must be 8 75 # endif 76 pushq %rbx # push subtracts stack by 8. 77 cfi_adjust_cfa_offset(8) 78 cfi_rel_offset(%rbx, 0) 79 mov %RSP_LP, %RBX_LP 80 cfi_def_cfa_register(%rbx) 81 and $-STATE_SAVE_ALIGNMENT, %RSP_LP 82 # endif 83 # ifdef REGISTER_SAVE_AREA 84 sub $REGISTER_SAVE_AREA, %RSP_LP 85 # if !DL_RUNTIME_RESOLVE_REALIGN_STACK 86 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) 87 # endif 88 # else 89 # Allocate stack space of the required size to save the state. 90 # if IS_IN (rtld) 91 sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP 92 # else 93 sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP 94 # endif 95 # endif 96 # Preserve registers otherwise clobbered. 97 movq %rax, REGISTER_SAVE_RAX(%rsp) 98 movq %rcx, REGISTER_SAVE_RCX(%rsp) 99 movq %rdx, REGISTER_SAVE_RDX(%rsp) 100 movq %rsi, REGISTER_SAVE_RSI(%rsp) 101 movq %rdi, REGISTER_SAVE_RDI(%rsp) 102 movq %r8, REGISTER_SAVE_R8(%rsp) 103 movq %r9, REGISTER_SAVE_R9(%rsp) 104 # ifdef USE_FXSAVE 105 fxsave STATE_SAVE_OFFSET(%rsp) 106 # else 107 movl $STATE_SAVE_MASK, %eax 108 xorl %edx, %edx 109 # Clear the XSAVE Header. 110 # ifdef USE_XSAVE 111 movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp) 112 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp) 113 # endif 114 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp) 115 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp) 116 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp) 117 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp) 118 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp) 119 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp) 120 # ifdef USE_XSAVE 121 xsave STATE_SAVE_OFFSET(%rsp) 122 # else 123 xsavec STATE_SAVE_OFFSET(%rsp) 124 # endif 125 # endif 126 # Copy args pushed by PLT in register. 127 # %rdi: link_map, %rsi: reloc_index 128 mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP 129 mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP 130 call _dl_fixup # Call resolver. 131 mov %RAX_LP, %R11_LP # Save return value 132 # Get register content back. 133 # ifdef USE_FXSAVE 134 fxrstor STATE_SAVE_OFFSET(%rsp) 135 # else 136 movl $STATE_SAVE_MASK, %eax 137 xorl %edx, %edx 138 xrstor STATE_SAVE_OFFSET(%rsp) 139 # endif 140 movq REGISTER_SAVE_R9(%rsp), %r9 141 movq REGISTER_SAVE_R8(%rsp), %r8 142 movq REGISTER_SAVE_RDI(%rsp), %rdi 143 movq REGISTER_SAVE_RSI(%rsp), %rsi 144 movq REGISTER_SAVE_RDX(%rsp), %rdx 145 movq REGISTER_SAVE_RCX(%rsp), %rcx 146 movq REGISTER_SAVE_RAX(%rsp), %rax 147 # if DL_RUNTIME_RESOLVE_REALIGN_STACK 148 mov %RBX_LP, %RSP_LP 149 cfi_def_cfa_register(%rsp) 150 movq (%rsp), %rbx 151 cfi_restore(%rbx) 152 # endif 153 # Adjust stack(PLT did 2 pushes) 154 add $(LOCAL_STORAGE_AREA + 16), %RSP_LP 155 cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16)) 156 jmp *%r11 # Jump to function address. 157 cfi_endproc 158 .size _dl_runtime_resolve, .-_dl_runtime_resolve 159 #endif 160 161 162 #if !defined PROF && defined _dl_runtime_profile 163 # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 164 # error LR_VECTOR_OFFSET must be multples of VEC_SIZE 165 # endif 166 167 .globl _dl_runtime_profile 168 .hidden _dl_runtime_profile 169 .type _dl_runtime_profile, @function 170 .align 16 171 _dl_runtime_profile: 172 cfi_startproc 173 cfi_adjust_cfa_offset(16) # Incorporate PLT 174 _CET_ENDBR 175 /* The La_x86_64_regs data structure pointed to by the 176 fourth paramater must be VEC_SIZE-byte aligned. This must 177 be explicitly enforced. We have the set up a dynamically 178 sized stack frame. %rbx points to the top half which 179 has a fixed size and preserves the original stack pointer. */ 180 181 sub $32, %RSP_LP # Allocate the local storage. 182 cfi_adjust_cfa_offset(32) 183 movq %rbx, (%rsp) 184 cfi_rel_offset(%rbx, 0) 185 186 /* On the stack: 187 56(%rbx) parameter #1 188 48(%rbx) return address 189 190 40(%rbx) reloc index 191 32(%rbx) link_map 192 193 24(%rbx) La_x86_64_regs pointer 194 16(%rbx) framesize 195 8(%rbx) rax 196 (%rbx) rbx 197 */ 198 199 movq %rax, 8(%rsp) 200 mov %RSP_LP, %RBX_LP 201 cfi_def_cfa_register(%rbx) 202 203 /* Actively align the La_x86_64_regs structure. */ 204 and $-VEC_SIZE, %RSP_LP 205 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers 206 to detect if any xmm0-xmm7 registers are changed by audit 207 module. */ 208 sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP 209 movq %rsp, 24(%rbx) 210 211 /* Fill the La_x86_64_regs structure. */ 212 movq %rdx, LR_RDX_OFFSET(%rsp) 213 movq %r8, LR_R8_OFFSET(%rsp) 214 movq %r9, LR_R9_OFFSET(%rsp) 215 movq %rcx, LR_RCX_OFFSET(%rsp) 216 movq %rsi, LR_RSI_OFFSET(%rsp) 217 movq %rdi, LR_RDI_OFFSET(%rsp) 218 movq %rbp, LR_RBP_OFFSET(%rsp) 219 220 lea 48(%rbx), %RAX_LP 221 movq %rax, LR_RSP_OFFSET(%rsp) 222 223 /* We always store the XMM registers even if AVX is available. 224 This is to provide backward binary compatibility for existing 225 audit modules. */ 226 VMOVA %xmm0, (LR_XMM_OFFSET + XMM_SIZE*0)(%rsp) 227 VMOVA %xmm1, (LR_XMM_OFFSET + XMM_SIZE*1)(%rsp) 228 VMOVA %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 229 VMOVA %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 230 VMOVA %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 231 VMOVA %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 232 VMOVA %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 233 VMOVA %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 234 235 # ifdef RESTORE_AVX 236 /* This is to support AVX audit modules. */ 237 VMOVA %VEC(0), (LR_VECTOR_OFFSET + VECTOR_SIZE*0)(%rsp) 238 VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE*1)(%rsp) 239 VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) 240 VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) 241 VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) 242 VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) 243 VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) 244 VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) 245 246 /* Save xmm0-xmm7 registers to detect if any of them are 247 changed by audit module. */ 248 vmovdqa %xmm0, (LR_SIZE + XMM_SIZE*0)(%rsp) 249 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE*1)(%rsp) 250 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) 251 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) 252 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) 253 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) 254 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) 255 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) 256 # endif 257 258 mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx. 259 mov 48(%rbx), %RDX_LP # Load return address if needed. 260 mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register. 261 mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index 262 lea 16(%rbx), %R8_LP # Address of framesize 263 call _dl_profile_fixup # Call resolver. 264 265 mov %RAX_LP, %R11_LP # Save return value. 266 267 movq 8(%rbx), %rax # Get back register content. 268 movq LR_RDX_OFFSET(%rsp), %rdx 269 movq LR_R8_OFFSET(%rsp), %r8 270 movq LR_R9_OFFSET(%rsp), %r9 271 272 VMOVA (LR_XMM_OFFSET + XMM_SIZE*0)(%rsp), %xmm0 273 VMOVA (LR_XMM_OFFSET + XMM_SIZE*1)(%rsp), %xmm1 274 VMOVA (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 275 VMOVA (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 276 VMOVA (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 277 VMOVA (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 278 VMOVA (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 279 VMOVA (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 280 281 # ifdef RESTORE_AVX 282 /* Check if any xmm0-xmm7 registers are changed by audit 283 module. */ 284 vpcmpeqb (LR_SIZE)(%rsp), %xmm0, %xmm8 285 vpmovmskb %xmm8, %esi 286 incw %si 287 je 2f 288 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) 289 jmp 1f 290 2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0) 291 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) 292 293 1: vpcmpeqb (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 294 vpmovmskb %xmm8, %esi 295 incw %si 296 je 2f 297 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) 298 jmp 1f 299 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) 300 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 301 302 1: vpcmpeqb (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 303 vpmovmskb %xmm8, %esi 304 incw %si 305 je 2f 306 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) 307 jmp 1f 308 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) 309 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 310 311 1: vpcmpeqb (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 312 vpmovmskb %xmm8, %esi 313 incw %si 314 je 2f 315 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) 316 jmp 1f 317 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) 318 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 319 320 1: vpcmpeqb (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 321 vpmovmskb %xmm8, %esi 322 incw %si 323 je 2f 324 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) 325 jmp 1f 326 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) 327 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 328 329 1: vpcmpeqb (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 330 vpmovmskb %xmm8, %esi 331 incw %si 332 je 2f 333 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) 334 jmp 1f 335 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) 336 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 337 338 1: vpcmpeqb (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 339 vpmovmskb %xmm8, %esi 340 incw %si 341 je 2f 342 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) 343 jmp 1f 344 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) 345 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 346 347 1: vpcmpeqb (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 348 vpmovmskb %xmm8, %esi 349 incw %si 350 je 2f 351 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) 352 jmp 1f 353 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) 354 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 355 356 1: 357 # endif 358 359 mov 16(%rbx), %RCX_LP # Anything in framesize? 360 test %RCX_LP, %RCX_LP 361 jns 3f 362 363 /* There's nothing in the frame size, so there 364 will be no call to the _dl_audit_pltexit. */ 365 366 /* Get back registers content. */ 367 movq LR_RCX_OFFSET(%rsp), %rcx 368 movq LR_RSI_OFFSET(%rsp), %rsi 369 movq LR_RDI_OFFSET(%rsp), %rdi 370 371 mov %RBX_LP, %RSP_LP 372 movq (%rsp), %rbx 373 cfi_restore(%rbx) 374 cfi_def_cfa_register(%rsp) 375 376 add $48, %RSP_LP # Adjust the stack to the return value 377 # (eats the reloc index and link_map) 378 cfi_adjust_cfa_offset(-48) 379 jmp *%r11 # Jump to function address. 380 381 3: 382 cfi_adjust_cfa_offset(48) 383 cfi_rel_offset(%rbx, 0) 384 cfi_def_cfa_register(%rbx) 385 386 /* At this point we need to prepare new stack for the function 387 which has to be called. We copy the original stack to a 388 temporary buffer of the size specified by the 'framesize' 389 returned from _dl_profile_fixup */ 390 391 lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack 392 add $8, %RCX_LP 393 and $-16, %RCX_LP 394 sub %RCX_LP, %RSP_LP 395 mov %RSP_LP, %RDI_LP 396 rep movsb 397 398 movq 24(%rdi), %rcx # Get back register content. 399 movq 32(%rdi), %rsi 400 movq 40(%rdi), %rdi 401 402 call *%r11 403 404 mov 24(%rbx), %RSP_LP # Drop the copied stack content 405 406 /* Now we have to prepare the La_x86_64_retval structure for the 407 _dl_audit_pltexit. The La_x86_64_regs is being pointed by rsp now, 408 so we just need to allocate the sizeof(La_x86_64_retval) space on 409 the stack, since the alignment has already been taken care of. */ 410 # ifdef RESTORE_AVX 411 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE 412 registers to detect if xmm0/xmm1 registers are changed 413 by audit module. Since rsp is aligned to VEC_SIZE, we 414 need to make sure that the address of La_x86_64_retval + 415 LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */ 416 # define LRV_SPACE (LRV_SIZE + XMM_SIZE*2) 417 # define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1)) 418 # if LRV_MISALIGNED == 0 419 sub $LRV_SPACE, %RSP_LP 420 # else 421 sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP 422 # endif 423 # else 424 sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval) 425 # endif 426 mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx. 427 428 /* Fill in the La_x86_64_retval structure. */ 429 movq %rax, LRV_RAX_OFFSET(%rcx) 430 movq %rdx, LRV_RDX_OFFSET(%rcx) 431 432 VMOVA %xmm0, LRV_XMM0_OFFSET(%rcx) 433 VMOVA %xmm1, LRV_XMM1_OFFSET(%rcx) 434 435 # ifdef RESTORE_AVX 436 /* This is to support AVX audit modules. */ 437 VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx) 438 VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx) 439 440 /* Save xmm0/xmm1 registers to detect if they are changed 441 by audit module. */ 442 vmovdqa %xmm0, (LRV_SIZE + XMM_SIZE*0)(%rcx) 443 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE*1)(%rcx) 444 # endif 445 446 fstpt LRV_ST0_OFFSET(%rcx) 447 fstpt LRV_ST1_OFFSET(%rcx) 448 449 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. 450 movq 40(%rbx), %rsi # Copy args pushed by PLT in register. 451 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index 452 call _dl_audit_pltexit 453 454 /* Restore return registers. */ 455 movq LRV_RAX_OFFSET(%rsp), %rax 456 movq LRV_RDX_OFFSET(%rsp), %rdx 457 458 VMOVA LRV_XMM0_OFFSET(%rsp), %xmm0 459 VMOVA LRV_XMM1_OFFSET(%rsp), %xmm1 460 461 # ifdef RESTORE_AVX 462 /* Check if xmm0/xmm1 registers are changed by audit module. */ 463 vpcmpeqb (LRV_SIZE)(%rsp), %xmm0, %xmm2 464 vpmovmskb %xmm2, %esi 465 incw %si 466 jne 1f 467 VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0) 468 469 1: vpcmpeqb (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 470 vpmovmskb %xmm2, %esi 471 incw %si 472 jne 1f 473 VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1) 474 475 1: 476 # endif 477 478 fldt LRV_ST1_OFFSET(%rsp) 479 fldt LRV_ST0_OFFSET(%rsp) 480 481 mov %RBX_LP, %RSP_LP 482 movq (%rsp), %rbx 483 cfi_restore(%rbx) 484 cfi_def_cfa_register(%rsp) 485 486 add $48, %RSP_LP # Adjust the stack to the return value 487 # (eats the reloc index and link_map) 488 cfi_adjust_cfa_offset(-48) 489 retq 490 491 cfi_endproc 492 .size _dl_runtime_profile, .-_dl_runtime_profile 493 #endif 494