1/* Function sincos vectorized with AVX2, wrapper version. 2 Copyright (C) 2014-2022 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include "svml_d_wrapper_impl.h" 21 22 .text 23ENTRY (_ZGVdN4vl8l8_sincos) 24WRAPPER_IMPL_AVX_fFF _ZGVbN2vl8l8_sincos 25END (_ZGVdN4vl8l8_sincos) 26libmvec_hidden_def (_ZGVdN4vl8l8_sincos) 27 28/* AVX2 ISA version as wrapper to SSE ISA version (for vector 29 function declared with #pragma omp declare simd notinbranch). */ 30.macro WRAPPER_IMPL_AVX2_fFF_vvv callee 31#ifndef __ILP32__ 32 pushq %rbp 33 cfi_adjust_cfa_offset (8) 34 cfi_rel_offset (%rbp, 0) 35 movq %rsp, %rbp 36 cfi_def_cfa_register (%rbp) 37 andq $-32, %rsp 38 subq $160, %rsp 39 vmovupd %ymm0, 128(%rsp) 40 lea (%rsp), %rdi 41 vmovdqu %ymm1, 64(%rdi) 42 vmovdqu %ymm2, 96(%rdi) 43 lea 32(%rsp), %rsi 44 vzeroupper 45 call HIDDEN_JUMPTARGET(\callee) 46 vmovupd 144(%rsp), %xmm0 47 lea 16(%rsp), %rdi 48 lea 48(%rsp), %rsi 49 call HIDDEN_JUMPTARGET(\callee) 50 movq 64(%rsp), %rdx 51 movq 96(%rsp), %rsi 52 movq 72(%rsp), %r8 53 movq 104(%rsp), %r10 54 movq (%rsp), %rax 55 movq 32(%rsp), %rcx 56 movq 8(%rsp), %rdi 57 movq 40(%rsp), %r9 58 movq %rax, (%rdx) 59 movq %rcx, (%rsi) 60 movq 80(%rsp), %rax 61 movq 112(%rsp), %rcx 62 movq %rdi, (%r8) 63 movq %r9, (%r10) 64 movq 88(%rsp), %rdi 65 movq 120(%rsp), %r9 66 movq 16(%rsp), %r11 67 movq 48(%rsp), %rdx 68 movq 24(%rsp), %rsi 69 movq 56(%rsp), %r8 70 movq %r11, (%rax) 71 movq %rdx, (%rcx) 72 movq %rsi, (%rdi) 73 movq %r8, (%r9) 74 movq %rbp, %rsp 75 cfi_def_cfa_register (%rsp) 76 popq %rbp 77 cfi_adjust_cfa_offset (-8) 78 cfi_restore (%rbp) 79 ret 80#else 81 leal 8(%rsp), %r10d 82 .cfi_def_cfa 10, 0 83 andl $-32, %esp 84 pushq -8(%r10d) 85 pushq %rbp 86 .cfi_escape 0x10,0x6,0x2,0x76,0 87 movl %esp, %ebp 88 pushq %r12 89 leal -80(%rbp), %esi 90 pushq %r10 91 .cfi_escape 0xf,0x3,0x76,0x70,0x6 92 .cfi_escape 0x10,0xc,0x2,0x76,0x78 93 leal -112(%rbp), %edi 94 movq %rsi, %r12 95 pushq %rbx 96 .cfi_escape 0x10,0x3,0x2,0x76,0x68 97 movq %rdi, %rbx 98 subl $152, %esp 99 vmovaps %xmm1, -128(%ebp) 100 vmovaps %xmm2, -144(%ebp) 101 vmovapd %ymm0, -176(%ebp) 102 vzeroupper 103 call HIDDEN_JUMPTARGET(\callee) 104 leal 16(%r12), %esi 105 vmovapd -160(%ebp), %xmm0 106 leal 16(%rbx), %edi 107 call HIDDEN_JUMPTARGET(\callee) 108 movq -128(%ebp), %rax 109 vmovsd -112(%ebp), %xmm0 110 vmovdqa -128(%ebp), %xmm5 111 vmovdqa -144(%ebp), %xmm1 112 vmovsd %xmm0, (%eax) 113 vmovsd -104(%ebp), %xmm0 114 vpextrd $1, %xmm5, %eax 115 vmovsd %xmm0, (%eax) 116 movq -120(%ebp), %rax 117 vmovsd -96(%ebp), %xmm0 118 vmovsd %xmm0, (%eax) 119 vmovsd -88(%ebp), %xmm0 120 vpextrd $3, %xmm5, %eax 121 vmovsd %xmm0, (%eax) 122 movq -144(%ebp), %rax 123 vmovsd -80(%ebp), %xmm0 124 vmovsd %xmm0, (%eax) 125 vmovsd -72(%ebp), %xmm0 126 vpextrd $1, %xmm1, %eax 127 vmovsd %xmm0, (%eax) 128 movq -136(%ebp), %rax 129 vmovsd -64(%ebp), %xmm0 130 vmovsd %xmm0, (%eax) 131 vmovsd -56(%ebp), %xmm0 132 vpextrd $3, %xmm1, %eax 133 vmovsd %xmm0, (%eax) 134 addl $152, %esp 135 popq %rbx 136 popq %r10 137 .cfi_def_cfa 10, 0 138 popq %r12 139 popq %rbp 140 leal -8(%r10), %esp 141 .cfi_def_cfa 7, 8 142 ret 143#endif 144.endm 145 146ENTRY (_ZGVdN4vvv_sincos) 147WRAPPER_IMPL_AVX2_fFF_vvv _ZGVbN2vl8l8_sincos 148END (_ZGVdN4vvv_sincos) 149 150#ifndef USE_MULTIARCH 151 libmvec_hidden_def (_ZGVdN4vvv_sincos) 152#endif 153