1/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2 *
3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4 *
5 * License:
6 * This code can be distributed under the terms of the GNU General Public
7 * License (GPL) Version 2 provided that the above header down to and
8 * including this sentence is retained in full.
9 */
10
11.extern crypto_ft_tab
12.extern crypto_it_tab
13.extern crypto_fl_tab
14.extern crypto_il_tab
15
16.text
17
18#include <asm/asm-offsets.h>
19
20#define R1	%rax
21#define R1E	%eax
22#define R1X	%ax
23#define R1H	%ah
24#define R1L	%al
25#define R2	%rbx
26#define R2E	%ebx
27#define R2X	%bx
28#define R2H	%bh
29#define R2L	%bl
30#define R3	%rcx
31#define R3E	%ecx
32#define R3X	%cx
33#define R3H	%ch
34#define R3L	%cl
35#define R4	%rdx
36#define R4E	%edx
37#define R4X	%dx
38#define R4H	%dh
39#define R4L	%dl
40#define R5	%rsi
41#define R5E	%esi
42#define R6	%rdi
43#define R6E	%edi
44#define R7	%rbp
45#define R7E	%ebp
46#define R8	%r8
47#define R9	%r9
48#define R10	%r10
49#define R11	%r11
50
51#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
52	.global	FUNC;			\
53	.type	FUNC,@function;		\
54	.align	8;			\
55FUNC:	movq	r1,r2;			\
56	movq	r3,r4;			\
57	leaq	KEY+48(r8),r9;		\
58	movq	r10,r11;		\
59	movl	(r7),r5 ## E;		\
60	movl	4(r7),r1 ## E;		\
61	movl	8(r7),r6 ## E;		\
62	movl	12(r7),r7 ## E;		\
63	movl	480(r8),r10 ## E;	\
64	xorl	-48(r9),r5 ## E;	\
65	xorl	-44(r9),r1 ## E;	\
66	xorl	-40(r9),r6 ## E;	\
67	xorl	-36(r9),r7 ## E;	\
68	cmpl	$24,r10 ## E;		\
69	jb	B128;			\
70	leaq	32(r9),r9;		\
71	je	B192;			\
72	leaq	32(r9),r9;
73
74#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
75	movq	r1,r2;			\
76	movq	r3,r4;			\
77	movl	r5 ## E,(r9);		\
78	movl	r6 ## E,4(r9);		\
79	movl	r7 ## E,8(r9);		\
80	movl	r8 ## E,12(r9);		\
81	ret;
82
83#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
84	movzbl	r2 ## H,r5 ## E;	\
85	movzbl	r2 ## L,r6 ## E;	\
86	movl	TAB+1024(,r5,4),r5 ## E;\
87	movw	r4 ## X,r2 ## X;	\
88	movl	TAB(,r6,4),r6 ## E;	\
89	roll	$16,r2 ## E;		\
90	shrl	$16,r4 ## E;		\
91	movzbl	r4 ## H,r7 ## E;	\
92	movzbl	r4 ## L,r4 ## E;	\
93	xorl	OFFSET(r8),ra ## E;	\
94	xorl	OFFSET+4(r8),rb ## E;	\
95	xorl	TAB+3072(,r7,4),r5 ## E;\
96	xorl	TAB+2048(,r4,4),r6 ## E;\
97	movzbl	r1 ## L,r7 ## E;	\
98	movzbl	r1 ## H,r4 ## E;	\
99	movl	TAB+1024(,r4,4),r4 ## E;\
100	movw	r3 ## X,r1 ## X;	\
101	roll	$16,r1 ## E;		\
102	shrl	$16,r3 ## E;		\
103	xorl	TAB(,r7,4),r5 ## E;	\
104	movzbl	r3 ## H,r7 ## E;	\
105	movzbl	r3 ## L,r3 ## E;	\
106	xorl	TAB+3072(,r7,4),r4 ## E;\
107	xorl	TAB+2048(,r3,4),r5 ## E;\
108	movzbl	r1 ## H,r7 ## E;	\
109	movzbl	r1 ## L,r3 ## E;	\
110	shrl	$16,r1 ## E;		\
111	xorl	TAB+3072(,r7,4),r6 ## E;\
112	movl	TAB+2048(,r3,4),r3 ## E;\
113	movzbl	r1 ## H,r7 ## E;	\
114	movzbl	r1 ## L,r1 ## E;	\
115	xorl	TAB+1024(,r7,4),r6 ## E;\
116	xorl	TAB(,r1,4),r3 ## E;	\
117	movzbl	r2 ## H,r1 ## E;	\
118	movzbl	r2 ## L,r7 ## E;	\
119	shrl	$16,r2 ## E;		\
120	xorl	TAB+3072(,r1,4),r3 ## E;\
121	xorl	TAB+2048(,r7,4),r4 ## E;\
122	movzbl	r2 ## H,r1 ## E;	\
123	movzbl	r2 ## L,r2 ## E;	\
124	xorl	OFFSET+8(r8),rc ## E;	\
125	xorl	OFFSET+12(r8),rd ## E;	\
126	xorl	TAB+1024(,r1,4),r3 ## E;\
127	xorl	TAB(,r2,4),r4 ## E;
128
129#define move_regs(r1,r2,r3,r4) \
130	movl	r3 ## E,r1 ## E;	\
131	movl	r4 ## E,r2 ## E;
132
133#define entry(FUNC,KEY,B128,B192) \
134	prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
135
136#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
137
138#define encrypt_round(TAB,OFFSET) \
139	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
140	move_regs(R1,R2,R5,R6)
141
142#define encrypt_final(TAB,OFFSET) \
143	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
144
145#define decrypt_round(TAB,OFFSET) \
146	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
147	move_regs(R1,R2,R5,R6)
148
149#define decrypt_final(TAB,OFFSET) \
150	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
151
152/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
153
154	entry(aes_enc_blk,0,enc128,enc192)
155	encrypt_round(crypto_ft_tab,-96)
156	encrypt_round(crypto_ft_tab,-80)
157enc192:	encrypt_round(crypto_ft_tab,-64)
158	encrypt_round(crypto_ft_tab,-48)
159enc128:	encrypt_round(crypto_ft_tab,-32)
160	encrypt_round(crypto_ft_tab,-16)
161	encrypt_round(crypto_ft_tab,  0)
162	encrypt_round(crypto_ft_tab, 16)
163	encrypt_round(crypto_ft_tab, 32)
164	encrypt_round(crypto_ft_tab, 48)
165	encrypt_round(crypto_ft_tab, 64)
166	encrypt_round(crypto_ft_tab, 80)
167	encrypt_round(crypto_ft_tab, 96)
168	encrypt_final(crypto_fl_tab,112)
169	return
170
171/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
172
173	entry(aes_dec_blk,240,dec128,dec192)
174	decrypt_round(crypto_it_tab,-96)
175	decrypt_round(crypto_it_tab,-80)
176dec192:	decrypt_round(crypto_it_tab,-64)
177	decrypt_round(crypto_it_tab,-48)
178dec128:	decrypt_round(crypto_it_tab,-32)
179	decrypt_round(crypto_it_tab,-16)
180	decrypt_round(crypto_it_tab,  0)
181	decrypt_round(crypto_it_tab, 16)
182	decrypt_round(crypto_it_tab, 32)
183	decrypt_round(crypto_it_tab, 48)
184	decrypt_round(crypto_it_tab, 64)
185	decrypt_round(crypto_it_tab, 80)
186	decrypt_round(crypto_it_tab, 96)
187	decrypt_final(crypto_il_tab,112)
188	return
189