1/* x86-64 __mpn_rshift --
2   Copyright (C) 2007-2022 Free Software Foundation, Inc.
3   This file is part of the GNU MP Library.
4
5   The GNU MP Library is free software; you can redistribute it and/or modify
6   it under the terms of the GNU Lesser General Public License as published by
7   the Free Software Foundation; either version 2.1 of the License, or (at your
8   option) any later version.
9
10   The GNU MP Library is distributed in the hope that it will be useful, but
11   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
13   License for more details.
14
15   You should have received a copy of the GNU Lesser General Public License
16   along with the GNU MP Library; see the file COPYING.LIB.  If not,
17   see <https://www.gnu.org/licenses/>.  */
18
19#include "sysdep.h"
20#include "asm-syntax.h"
21
22#define rp	%rdi
23#define up	%rsi
24#define n	%rdx
25#define cnt	%cl
26
27	.text
28ENTRY (__mpn_rshift)
29	mov	%edx, %eax
30	and	$3, %eax
31	jne	L(nb00)
32L(b00):	/* n = 4, 8, 12, ... */
33	mov	(up), %r10
34	mov	8(up), %r11
35	xor	%eax, %eax
36	shrd	%cl, %r10, %rax
37	mov	16(up), %r8
38	lea	8(up), up
39	lea	-24(rp), rp
40	sub	$4, n
41	jmp	L(00)
42
43L(nb00):/* n = 1, 5, 9, ... */
44	cmp	$2, %eax
45	jae	L(nb01)
46L(b01):	mov	(up), %r9
47	xor	%eax, %eax
48	shrd	%cl, %r9, %rax
49	sub	$2, n
50	jb	L(le1)
51	mov	8(up), %r10
52	mov	16(up), %r11
53	lea	16(up), up
54	lea	-16(rp), rp
55	jmp	L(01)
56L(le1): shr	%cl, %r9
57	mov	%r9, (rp)
58	ret
59
60L(nb01):/* n = 2, 6, 10, ... */
61	jne	L(b11)
62L(b10):	mov	(up), %r8
63	mov	8(up), %r9
64	xor	%eax, %eax
65	shrd	%cl, %r8, %rax
66	sub	$3, n
67	jb	L(le2)
68	mov	16(up), %r10
69	lea	24(up), up
70	lea	-8(rp), rp
71	jmp	L(10)
72L(le2): shrd	%cl, %r9, %r8
73	mov	%r8, (rp)
74	shr	%cl, %r9
75	mov	%r9, 8(rp)
76	ret
77
78	.p2align 4
79L(b11):	/* n = 3, 7, 11, ... */
80	mov	(up), %r11
81	mov	8(up), %r8
82	xor	%eax, %eax
83	shrd	%cl, %r11, %rax
84	mov	16(up), %r9
85	lea	32(up), up
86	sub	$4, n
87	jb	L(end)
88
89	.p2align 4
90L(top):	shrd	%cl, %r8, %r11
91	mov	-8(up), %r10
92	mov	%r11, (rp)
93L(10):	shrd	%cl, %r9, %r8
94	mov	(up), %r11
95	mov	%r8, 8(rp)
96L(01):	shrd	%cl, %r10, %r9
97	mov	8(up), %r8
98	mov	%r9, 16(rp)
99L(00):	shrd	%cl, %r11, %r10
100	mov	16(up), %r9
101	mov	%r10, 24(rp)
102	add	$32, up
103	lea	32(rp), rp
104	sub	$4, n
105	jnc	L(top)
106
107L(end):	shrd	%cl, %r8, %r11
108	mov	%r11, (rp)
109	shrd	%cl, %r9, %r8
110	mov	%r8, 8(rp)
111	shr	%cl, %r9
112	mov	%r9, 16(rp)
113	ret
114END (__mpn_rshift)
115