1/* Function sincosf vectorized with SSE2. 2 Copyright (C) 2014-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_s_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVbN4vl4l4_sincosf) 24WRAPPER_IMPL_SSE2_fFF sincosf 25END (_ZGVbN4vl4l4_sincosf) 26libmvec_hidden_def (_ZGVbN4vl4l4_sincosf) 27 28/* SSE2 ISA version as wrapper to scalar (for vector 29 function declared with #pragma omp declare simd notinbranch). */ 30.macro WRAPPER_IMPL_SSE2_fFF_vvv callee 31#ifndef __ILP32__ 32 subq $120, %rsp 33 cfi_adjust_cfa_offset(120) 34 movaps %xmm0, 96(%rsp) 35 lea (%rsp), %rdi 36 movdqa %xmm1, 32(%rdi) 37 lea 16(%rsp), %rsi 38 movdqa %xmm2, 32(%rsi) 39 movdqa %xmm3, 48(%rsi) 40 movdqa %xmm4, 64(%rsi) 41 call JUMPTARGET(\callee) 42 movss 100(%rsp), %xmm0 43 lea 4(%rsp), %rdi 44 lea 20(%rsp), %rsi 45 call JUMPTARGET(\callee) 46 movss 104(%rsp), %xmm0 47 lea 8(%rsp), %rdi 48 lea 24(%rsp), %rsi 49 call JUMPTARGET(\callee) 50 movss 108(%rsp), %xmm0 51 lea 12(%rsp), %rdi 52 lea 28(%rsp), %rsi 53 call JUMPTARGET(\callee) 54 movq 32(%rsp), %rdx 55 movq 40(%rsp), %rsi 56 movq 48(%rsp), %r8 57 movq 56(%rsp), %r10 58 movl (%rsp), %eax 59 movl 4(%rsp), %ecx 60 movl 8(%rsp), %edi 61 movl 12(%rsp), %r9d 62 movl %eax, (%rdx) 63 movl %ecx, (%rsi) 64 movq 64(%rsp), %rax 65 movq 72(%rsp), %rcx 66 movl %edi, (%r8) 67 movl %r9d, (%r10) 68 movq 80(%rsp), %rdi 69 movq 88(%rsp), %r9 70 movl 16(%rsp), %r11d 71 movl 20(%rsp), %edx 72 movl 24(%rsp), %esi 73 movl 28(%rsp), %r8d 74 movl %r11d, (%rax) 75 movl %edx, (%rcx) 76 movl %esi, (%rdi) 77 movl %r8d, (%r9) 78 addq $120, %rsp 79 cfi_adjust_cfa_offset(-120) 80 ret 81#else 82 pushq %rbp 83 .cfi_def_cfa_offset 16 84 .cfi_offset 6, -16 85 pushq %rbx 86 .cfi_def_cfa_offset 24 87 .cfi_offset 3, -24 88 subl $88, %esp 89 .cfi_def_cfa_offset 112 90 leal 64(%rsp), %esi 91 movaps %xmm1, (%esp) 92 leal 48(%rsp), %edi 93 movaps %xmm2, 16(%esp) 94 movq %rsi, %rbp 95 movq %rdi, %rbx 96 movaps %xmm0, 32(%esp) 97 call JUMPTARGET(\callee) 98 movups 36(%esp), %xmm0 99 leal 4(%rbp), %esi 100 leal 4(%rbx), %edi 101 call JUMPTARGET(\callee) 102 movups 40(%esp), %xmm0 103 leal 8(%rbp), %esi 104 leal 8(%rbx), %edi 105 call JUMPTARGET(\callee) 106 movups 44(%esp), %xmm0 107 leal 12(%rbp), %esi 108 leal 12(%rbx), %edi 109 call JUMPTARGET(\callee) 110 movq (%esp), %rax 111 movss 48(%esp), %xmm0 112 movdqa (%esp), %xmm4 113 movdqa 16(%esp), %xmm7 114 movss %xmm0, (%eax) 115 movss 52(%esp), %xmm0 116 pextrd $1, %xmm4, %eax 117 movss %xmm0, (%eax) 118 movq 8(%esp), %rax 119 movss 56(%esp), %xmm0 120 movss %xmm0, (%eax) 121 movss 60(%esp), %xmm0 122 pextrd $3, %xmm4, %eax 123 movss %xmm0, (%eax) 124 movq 16(%esp), %rax 125 movss 64(%esp), %xmm0 126 movss %xmm0, (%eax) 127 movss 68(%esp), %xmm0 128 pextrd $1, %xmm7, %eax 129 movss %xmm0, (%eax) 130 movq 24(%esp), %rax 131 movss 72(%esp), %xmm0 132 movss %xmm0, (%eax) 133 movss 76(%esp), %xmm0 134 pextrd $3, %xmm7, %eax 135 movss %xmm0, (%eax) 136 addl $88, %esp 137 .cfi_def_cfa_offset 24 138 popq %rbx 139 .cfi_def_cfa_offset 16 140 popq %rbp 141 .cfi_def_cfa_offset 8 142 ret 143#endif 144.endm 145 146ENTRY (_ZGVbN4vvv_sincosf) 147WRAPPER_IMPL_SSE2_fFF_vvv sincosf 148END (_ZGVbN4vvv_sincosf) 149 150#ifndef USE_MULTIARCH 151 libmvec_hidden_def (_ZGVbN4vvv_sincosf) 152#endif 153