1/* x86-64 __mpn_lshift --
2   Copyright (C) 2007-2022 Free Software Foundation, Inc.
3   This file is part of the GNU MP Library.
4
5   The GNU MP Library is free software; you can redistribute it and/or modify
6   it under the terms of the GNU Lesser General Public License as published by
7   the Free Software Foundation; either version 2.1 of the License, or (at your
8   option) any later version.
9
10   The GNU MP Library is distributed in the hope that it will be useful, but
11   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
13   License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with the GNU MP Library; see the file COPYING.LIB.  If not,
17   see <https://www.gnu.org/licenses/>.  */
18
19#include "sysdep.h"
20#include "asm-syntax.h"
21
22#define rp	%rdi
23#define up	%rsi
24#define n	%rdx
25#define cnt	%cl
26
27	.text
28ENTRY (__mpn_lshift)
29	lea	-8(rp,n,8), rp
30	lea	-8(up,n,8), up
31
32	mov	%edx, %eax
33	and	$3, %eax
34	jne	L(nb00)
35L(b00):	/* n = 4, 8, 12, ... */
36	mov	(up), %r10
37	mov	-8(up), %r11
38	xor	%eax, %eax
39	shld	%cl, %r10, %rax
40	mov	-16(up), %r8
41	lea	24(rp), rp
42	sub	$4, n
43	jmp	L(00)
44
45L(nb00):/* n = 1, 5, 9, ... */
46	cmp	$2, %eax
47	jae	L(nb01)
48L(b01):	mov	(up), %r9
49	xor	%eax, %eax
50	shld	%cl, %r9, %rax
51	sub	$2, n
52	jb	L(le1)
53	mov	-8(up), %r10
54	mov	-16(up), %r11
55	lea	-8(up), up
56	lea	16(rp), rp
57	jmp	L(01)
58L(le1):	shl	%cl, %r9
59	mov	%r9, (rp)
60	ret
61
62L(nb01):/* n = 2, 6, 10, ... */
63	jne	L(b11)
64L(b10):	mov	(up), %r8
65	mov	-8(up), %r9
66	xor	%eax, %eax
67	shld	%cl, %r8, %rax
68	sub	$3, n
69	jb	L(le2)
70	mov	-16(up), %r10
71	lea	-16(up), up
72	lea	8(rp), rp
73	jmp	L(10)
74L(le2):	shld	%cl, %r9, %r8
75	mov	%r8, (rp)
76	shl	%cl, %r9
77	mov	%r9, -8(rp)
78	ret
79
80	.p2align 4		/* performance critical! */
81L(b11):	/* n = 3, 7, 11, ... */
82	mov	(up), %r11
83	mov	-8(up), %r8
84	xor	%eax, %eax
85	shld	%cl, %r11, %rax
86	mov	-16(up), %r9
87	lea	-24(up), up
88	sub	$4, n
89	jb	L(end)
90
91	.p2align 4
92L(top):	shld	%cl, %r8, %r11
93	mov	(up), %r10
94	mov	%r11, (rp)
95L(10):	shld	%cl, %r9, %r8
96	mov	-8(up), %r11
97	mov	%r8, -8(rp)
98L(01):	shld	%cl, %r10, %r9
99	mov	-16(up), %r8
100	mov	%r9, -16(rp)
101L(00):	shld	%cl, %r11, %r10
102	mov	-24(up), %r9
103	mov	%r10, -24(rp)
104	add	$-32, up
105	lea	-32(rp), rp
106	sub	$4, n
107	jnc	L(top)
108
109L(end):	shld	%cl, %r8, %r11
110	mov	%r11, (rp)
111	shld	%cl, %r9, %r8
112	mov	%r8, -8(rp)
113	shl	%cl, %r9
114	mov	%r9, -16(rp)
115	ret
116END (__mpn_lshift)
117