1/* Function sincosf vectorized with SSE2.
2   Copyright (C) 2014-2022 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "svml_s_wrapper_impl.h"
21
22	.text
23ENTRY (_ZGVbN4vl4l4_sincosf)
24WRAPPER_IMPL_SSE2_fFF sincosf
25END (_ZGVbN4vl4l4_sincosf)
26libmvec_hidden_def (_ZGVbN4vl4l4_sincosf)
27
28/* SSE2 ISA version as wrapper to scalar (for vector
29   function declared with #pragma omp declare simd notinbranch).  */
30.macro WRAPPER_IMPL_SSE2_fFF_vvv callee
31#ifndef __ILP32__
32        subq      $120, %rsp
33        cfi_adjust_cfa_offset(120)
34        movaps    %xmm0, 96(%rsp)
35        lea       (%rsp), %rdi
36        movdqa    %xmm1, 32(%rdi)
37        lea       16(%rsp), %rsi
38        movdqa    %xmm2, 32(%rsi)
39        movdqa    %xmm3, 48(%rsi)
40        movdqa    %xmm4, 64(%rsi)
41        call      JUMPTARGET(\callee)
42        movss     100(%rsp), %xmm0
43        lea       4(%rsp), %rdi
44        lea       20(%rsp), %rsi
45        call      JUMPTARGET(\callee)
46        movss     104(%rsp), %xmm0
47        lea       8(%rsp), %rdi
48        lea       24(%rsp), %rsi
49        call      JUMPTARGET(\callee)
50        movss     108(%rsp), %xmm0
51        lea       12(%rsp), %rdi
52        lea       28(%rsp), %rsi
53        call      JUMPTARGET(\callee)
54        movq      32(%rsp), %rdx
55        movq      40(%rsp), %rsi
56        movq      48(%rsp), %r8
57        movq      56(%rsp), %r10
58        movl      (%rsp), %eax
59        movl      4(%rsp), %ecx
60        movl      8(%rsp), %edi
61        movl      12(%rsp), %r9d
62        movl      %eax, (%rdx)
63        movl      %ecx, (%rsi)
64        movq      64(%rsp), %rax
65        movq      72(%rsp), %rcx
66        movl      %edi, (%r8)
67        movl      %r9d, (%r10)
68        movq      80(%rsp), %rdi
69        movq      88(%rsp), %r9
70        movl      16(%rsp), %r11d
71        movl      20(%rsp), %edx
72        movl      24(%rsp), %esi
73        movl      28(%rsp), %r8d
74        movl      %r11d, (%rax)
75        movl      %edx, (%rcx)
76        movl      %esi, (%rdi)
77        movl      %r8d, (%r9)
78        addq      $120, %rsp
79        cfi_adjust_cfa_offset(-120)
80        ret
81#else
82        pushq   %rbp
83        .cfi_def_cfa_offset 16
84        .cfi_offset 6, -16
85        pushq   %rbx
86        .cfi_def_cfa_offset 24
87        .cfi_offset 3, -24
88        subl    $88, %esp
89        .cfi_def_cfa_offset 112
90        leal    64(%rsp), %esi
91        movaps  %xmm1, (%esp)
92        leal    48(%rsp), %edi
93        movaps  %xmm2, 16(%esp)
94        movq    %rsi, %rbp
95        movq    %rdi, %rbx
96        movaps  %xmm0, 32(%esp)
97        call    JUMPTARGET(\callee)
98        movups  36(%esp), %xmm0
99        leal    4(%rbp), %esi
100        leal    4(%rbx), %edi
101        call    JUMPTARGET(\callee)
102        movups  40(%esp), %xmm0
103        leal    8(%rbp), %esi
104        leal    8(%rbx), %edi
105        call    JUMPTARGET(\callee)
106        movups  44(%esp), %xmm0
107        leal    12(%rbp), %esi
108        leal    12(%rbx), %edi
109        call    JUMPTARGET(\callee)
110        movq    (%esp), %rax
111        movss   48(%esp), %xmm0
112        movdqa  (%esp), %xmm4
113        movdqa  16(%esp), %xmm7
114        movss   %xmm0, (%eax)
115        movss   52(%esp), %xmm0
116        pextrd  $1, %xmm4, %eax
117        movss   %xmm0, (%eax)
118        movq    8(%esp), %rax
119        movss   56(%esp), %xmm0
120        movss   %xmm0, (%eax)
121        movss   60(%esp), %xmm0
122        pextrd  $3, %xmm4, %eax
123        movss   %xmm0, (%eax)
124        movq    16(%esp), %rax
125        movss   64(%esp), %xmm0
126        movss   %xmm0, (%eax)
127        movss   68(%esp), %xmm0
128        pextrd  $1, %xmm7, %eax
129        movss   %xmm0, (%eax)
130        movq    24(%esp), %rax
131        movss   72(%esp), %xmm0
132        movss   %xmm0, (%eax)
133        movss   76(%esp), %xmm0
134        pextrd  $3, %xmm7, %eax
135        movss   %xmm0, (%eax)
136        addl    $88, %esp
137        .cfi_def_cfa_offset 24
138        popq    %rbx
139        .cfi_def_cfa_offset 16
140        popq    %rbp
141        .cfi_def_cfa_offset 8
142        ret
143#endif
144.endm
145
146ENTRY (_ZGVbN4vvv_sincosf)
147WRAPPER_IMPL_SSE2_fFF_vvv sincosf
148END (_ZGVbN4vvv_sincosf)
149
150#ifndef USE_MULTIARCH
151 libmvec_hidden_def (_ZGVbN4vvv_sincosf)
152#endif
153