1/* Function sincos vectorized with SSE2. 2 Copyright (C) 2014-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_d_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVbN2vl8l8_sincos) 24WRAPPER_IMPL_SSE2_fFF sincos 25END (_ZGVbN2vl8l8_sincos) 26libmvec_hidden_def (_ZGVbN2vl8l8_sincos) 27 28/* SSE2 ISA version as wrapper to scalar (for vector 29 function declared with #pragma omp declare simd notinbranch). */ 30.macro WRAPPER_IMPL_SSE2_fFF_vvv callee 31#ifndef __ILP32__ 32 subq $88, %rsp 33 cfi_adjust_cfa_offset(88) 34 movaps %xmm0, 64(%rsp) 35 lea (%rsp), %rdi 36 movdqa %xmm1, 32(%rdi) 37 lea 16(%rsp), %rsi 38 movdqa %xmm2, 32(%rsi) 39 call JUMPTARGET(\callee) 40 movsd 72(%rsp), %xmm0 41 lea 8(%rsp), %rdi 42 lea 24(%rsp), %rsi 43 call JUMPTARGET(\callee) 44 movq 32(%rsp), %rdx 45 movq 48(%rsp), %rsi 46 movq 40(%rsp), %r8 47 movq 56(%rsp), %r10 48 movq (%rsp), %rax 49 movq 16(%rsp), %rcx 50 movq 8(%rsp), %rdi 51 movq 24(%rsp), %r9 52 movq %rax, (%rdx) 53 movq %rcx, (%rsi) 54 movq %rdi, (%r8) 55 movq %r9, (%r10) 56 addq $88, %rsp 57 cfi_adjust_cfa_offset(-88) 58 ret 59#else 60 pushq %rbp 61 .cfi_def_cfa_offset 16 62 .cfi_offset 6, -16 63 pushq %rbx 64 .cfi_def_cfa_offset 24 65 .cfi_offset 3, -24 66 subl $88, %esp 67 .cfi_def_cfa_offset 112 68 leal 64(%rsp), %esi 69 movaps %xmm1, 32(%esp) 70 leal 48(%rsp), %edi 71 movaps %xmm2, 16(%esp) 72 movq %rsi, %rbp 73 movq %rdi, %rbx 74 movaps %xmm0, (%esp) 75 call JUMPTARGET(\callee) 76 movupd 8(%esp), %xmm0 77 leal 8(%rbp), %esi 78 leal 8(%rbx), %edi 79 call JUMPTARGET(\callee) 80 movdqa 32(%esp), %xmm1 81 movsd 48(%esp), %xmm0 82 movq %xmm1, %rax 83 movdqa 16(%esp), %xmm2 84 movsd %xmm0, (%eax) 85 movsd 56(%esp), %xmm0 86 pextrd $1, %xmm1, %eax 87 movsd %xmm0, (%eax) 88 movsd 64(%esp), %xmm0 89 movq %xmm2, %rax 90 movsd %xmm0, (%eax) 91 movsd 72(%esp), %xmm0 92 pextrd $1, %xmm2, %eax 93 movsd %xmm0, (%eax) 94 addl $88, %esp 95 .cfi_def_cfa_offset 24 96 popq %rbx 97 .cfi_def_cfa_offset 16 98 popq %rbp 99 .cfi_def_cfa_offset 8 100 ret 101#endif 102.endm 103 104ENTRY (_ZGVbN2vvv_sincos) 105WRAPPER_IMPL_SSE2_fFF_vvv sincos 106END (_ZGVbN2vvv_sincos) 107 108#ifndef USE_MULTIARCH 109 libmvec_hidden_def (_ZGVbN2vvv_sincos) 110#endif 111