1/* Function sincos vectorized with SSE2.
2   Copyright (C) 2014-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "svml_d_wrapper_impl.h"
21
22	.text
23ENTRY (_ZGVbN2vl8l8_sincos)
24WRAPPER_IMPL_SSE2_fFF sincos
25END (_ZGVbN2vl8l8_sincos)
26libmvec_hidden_def (_ZGVbN2vl8l8_sincos)
27
28/* SSE2 ISA version as wrapper to scalar (for vector
29   function declared with #pragma omp declare simd notinbranch).  */
30.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
31#ifndef __ILP32__
32        subq      $88, %rsp
33        cfi_adjust_cfa_offset(88)
34        movaps    %xmm0, 64(%rsp)
35        lea       (%rsp), %rdi
36        movdqa    %xmm1, 32(%rdi)
37        lea       16(%rsp), %rsi
38        movdqa    %xmm2, 32(%rsi)
39        call      JUMPTARGET(\callee)
40        movsd     72(%rsp), %xmm0
41        lea       8(%rsp), %rdi
42        lea       24(%rsp), %rsi
43        call      JUMPTARGET(\callee)
44        movq      32(%rsp), %rdx
45        movq      48(%rsp), %rsi
46        movq      40(%rsp), %r8
47        movq      56(%rsp), %r10
48        movq      (%rsp), %rax
49        movq      16(%rsp), %rcx
50        movq      8(%rsp), %rdi
51        movq      24(%rsp), %r9
52        movq      %rax, (%rdx)
53        movq      %rcx, (%rsi)
54        movq      %rdi, (%r8)
55        movq      %r9, (%r10)
56        addq      $88, %rsp
57        cfi_adjust_cfa_offset(-88)
58        ret
59#else
60        pushq   %rbp
61        .cfi_def_cfa_offset 16
62        .cfi_offset 6, -16
63        pushq   %rbx
64        .cfi_def_cfa_offset 24
65        .cfi_offset 3, -24
66        subl    $88, %esp
67        .cfi_def_cfa_offset 112
68        leal    64(%rsp), %esi
69        movaps  %xmm1, 32(%esp)
70        leal    48(%rsp), %edi
71        movaps  %xmm2, 16(%esp)
72        movq    %rsi, %rbp
73        movq    %rdi, %rbx
74        movaps  %xmm0, (%esp)
75        call    JUMPTARGET(\callee)
76        movupd  8(%esp), %xmm0
77        leal    8(%rbp), %esi
78        leal    8(%rbx), %edi
79        call    JUMPTARGET(\callee)
80        movdqa  32(%esp), %xmm1
81        movsd   48(%esp), %xmm0
82        movq    %xmm1, %rax
83        movdqa  16(%esp), %xmm2
84        movsd   %xmm0, (%eax)
85        movsd   56(%esp), %xmm0
86        pextrd  $1, %xmm1, %eax
87        movsd   %xmm0, (%eax)
88        movsd   64(%esp), %xmm0
89        movq    %xmm2, %rax
90        movsd   %xmm0, (%eax)
91        movsd   72(%esp), %xmm0
92        pextrd  $1, %xmm2, %eax
93        movsd   %xmm0, (%eax)
94        addl    $88, %esp
95        .cfi_def_cfa_offset 24
96        popq    %rbx
97        .cfi_def_cfa_offset 16
98        popq    %rbp
99        .cfi_def_cfa_offset 8
100        ret
101#endif
102.endm
103
104ENTRY (_ZGVbN2vvv_sincos)
105WRAPPER_IMPL_SSE2_fFF_vvv sincos
106END (_ZGVbN2vvv_sincos)
107
108#ifndef USE_MULTIARCH
109 libmvec_hidden_def (_ZGVbN2vvv_sincos)
110#endif
111