1 /*
2  * Copyright (C) 2017 Denys Vlasenko
3  *
4  * Licensed under GPLv2, see file LICENSE in this source tree.
5  */
6 
7 /* This AES implementation is derived from tiny-AES128-C code,
8  * which was put by its author into public domain:
9  *
10  * tiny-AES128-C/unlicense.txt, Dec 8, 2014
11  * """
12  * This is free and unencumbered software released into the public domain.
13  *
14  * Anyone is free to copy, modify, publish, use, compile, sell, or
15  * distribute this software, either in source code form or as a compiled
16  * binary, for any purpose, commercial or non-commercial, and by any
17  * means.
18  *
19  * In jurisdictions that recognize copyright laws, the author or authors
20  * of this software dedicate any and all copyright interest in the
21  * software to the public domain. We make this dedication for the benefit
22  * of the public at large and to the detriment of our heirs and
23  * successors. We intend this dedication to be an overt act of
24  * relinquishment in perpetuity of all present and future rights to this
25  * software under copyright law.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
30  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
31  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
32  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
33  * OTHER DEALINGS IN THE SOFTWARE.
34  * """
35  */
36 /* Note that only original tiny-AES128-C code is public domain.
37  * The derived code in this file has been expanded to also implement aes192
38  * and aes256 and use more efficient word-sized operations in many places,
39  * and put under GPLv2 license.
40  */
41 #include "tls.h"
42 
43 // The lookup-tables are marked const so they can be placed in read-only storage instead of RAM
44 // The numbers below can be computed dynamically trading ROM for RAM -
45 // This can be useful in (embedded) bootloader applications, where ROM is often limited.
46 static const uint8_t sbox[] = {
47 	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
48 	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
49 	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
50 	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
51 	0xb7, 0xfd, 0x93, 0x26,	0x36, 0x3f, 0xf7, 0xcc,
52 	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
53 	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
54 	0x07, 0x12, 0x80, 0xe2,	0xeb, 0x27, 0xb2, 0x75,
55 	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
56 	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
57 	0x53, 0xd1, 0x00, 0xed,	0x20, 0xfc, 0xb1, 0x5b,
58 	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
59 	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
60 	0x45, 0xf9, 0x02, 0x7f,	0x50, 0x3c, 0x9f, 0xa8,
61 	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
62 	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
63 	0xcd, 0x0c, 0x13, 0xec,	0x5f, 0x97, 0x44, 0x17,
64 	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
65 	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
66 	0x46, 0xee, 0xb8, 0x14,	0xde, 0x5e, 0x0b, 0xdb,
67 	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
68 	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
69 	0xe7, 0xc8, 0x37, 0x6d,	0x8d, 0xd5, 0x4e, 0xa9,
70 	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
71 	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
72 	0xe8, 0xdd, 0x74, 0x1f,	0x4b, 0xbd, 0x8b, 0x8a,
73 	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
74 	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
75 	0xe1, 0xf8, 0x98, 0x11,	0x69, 0xd9, 0x8e, 0x94,
76 	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
77 	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
78 	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
79 };
80 
81 static const uint8_t rsbox[] = {
82 	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
83 	0xbf, 0x40, 0xa3, 0x9e,	0x81, 0xf3, 0xd7, 0xfb,
84 	0x7c, 0xe3, 0x39, 0x82,	0x9b, 0x2f, 0xff, 0x87,
85 	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
86 	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
87 	0xee, 0x4c, 0x95, 0x0b,	0x42, 0xfa, 0xc3, 0x4e,
88 	0x08, 0x2e, 0xa1, 0x66,	0x28, 0xd9, 0x24, 0xb2,
89 	0x76, 0x5b, 0xa2, 0x49,	0x6d, 0x8b, 0xd1, 0x25,
90 	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
91 	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
92 	0x6c, 0x70, 0x48, 0x50,	0xfd, 0xed, 0xb9, 0xda,
93 	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
94 	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
95 	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
96 	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
97 	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
98 	0x3a, 0x91, 0x11, 0x41,	0x4f, 0x67, 0xdc, 0xea,
99 	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
100 	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
101 	0xe2, 0xf9, 0x37, 0xe8,	0x1c, 0x75, 0xdf, 0x6e,
102 	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
103 	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
104 	0xfc, 0x56, 0x3e, 0x4b,	0xc6, 0xd2, 0x79, 0x20,
105 	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
106 	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
107 	0xb1, 0x12, 0x10, 0x59,	0x27, 0x80, 0xec, 0x5f,
108 	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
109 	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
110 	0xa0, 0xe0, 0x3b, 0x4d,	0xae, 0x2a, 0xf5, 0xb0,
111 	0xc8, 0xeb, 0xbb, 0x3c,	0x83, 0x53, 0x99, 0x61,
112 	0x17, 0x2b, 0x04, 0x7e,	0xba, 0x77, 0xd6, 0x26,
113 	0xe1, 0x69, 0x14, 0x63,	0x55, 0x21, 0x0c, 0x7d,
114 };
115 
116 // SubWord() is a function that takes a four-byte input word and
117 // applies the S-box to each of the four bytes to produce an output word.
Subword(uint32_t x)118 static uint32_t Subword(uint32_t x)
119 {
120 	return (sbox[(x >> 24)      ] << 24)
121 	|      (sbox[(x >> 16) & 255] << 16)
122 	|      (sbox[(x >> 8 ) & 255] << 8 )
123 	|      (sbox[(x      ) & 255]      );
124 }
125 
126 // This function produces Nb(Nr+1) round keys.
127 // The round keys are used in each round to decrypt the states.
KeyExpansion(uint32_t * RoundKey,const void * key,unsigned key_len)128 static int KeyExpansion(uint32_t *RoundKey, const void *key, unsigned key_len)
129 {
130 	// The round constant word array, Rcon[i], contains the values given by
131 	// x to th e power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8).
132 	// Note that i starts at 2, not 0.
133 	static const uint8_t Rcon[] ALIGN1 = {
134 		0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36
135 	//..... 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6,...
136 	// but aes256 only uses values up to 0x36
137 	};
138 	int rounds, words_key, words_RoundKey;
139 	int i, j, k;
140 
141 	// key_len 16: aes128, rounds 10, words_key 4, words_RoundKey 44
142 	// key_len 24: aes192, rounds 12, words_key 6, words_RoundKey 52
143 	// key_len 32: aes256, rounds 14, words_key 8, words_RoundKey 60
144 	words_key = key_len / 4;
145 	rounds = 6 + (key_len / 4);
146 	words_RoundKey = 28 + key_len;
147 
148 	// The first round key is the key itself.
149 	for (i = 0; i < words_key; i++)
150 		RoundKey[i] = get_unaligned_be32((uint32_t*)key + i);
151 	// i == words_key now
152 
153 	// All other round keys are found from the previous round keys.
154 	j = k = 0;
155 	for (; i < words_RoundKey; i++) {
156 		uint32_t tempa;
157 
158 		tempa = RoundKey[i - 1];
159 		if (j == 0) {
160 			// RotWord(): rotates the 4 bytes in a word to the left once.
161 			tempa = (tempa << 8) | (tempa >> 24);
162 			tempa = Subword(tempa);
163 			tempa ^= (uint32_t)Rcon[k] << 24;
164 		} else if (words_key > 6 && j == 4) {
165 			tempa = Subword(tempa);
166 		}
167 		RoundKey[i] = RoundKey[i - words_key] ^ tempa;
168 		j++;
169 		if (j == words_key) {
170 			j = 0;
171 			k++;
172 		}
173 	}
174 	return rounds;
175 }
176 
177 // This function adds the round key to state.
178 // The round key is added to the state by an XOR function.
AddRoundKey(unsigned astate[16],const uint32_t * RoundKeys)179 static void AddRoundKey(unsigned astate[16], const uint32_t *RoundKeys)
180 {
181 	int i;
182 
183 	for (i = 0; i < 16; i += 4) {
184 		uint32_t n = *RoundKeys++;
185 		astate[i + 0] ^= (n >> 24);
186 		astate[i + 1] ^= (n >> 16) & 255;
187 		astate[i + 2] ^= (n >> 8) & 255;
188 		astate[i + 3] ^= n & 255;
189 	}
190 }
191 
192 // The SubBytes Function Substitutes the values in the
193 // state matrix with values in an S-box.
SubBytes(unsigned astate[16])194 static void SubBytes(unsigned astate[16])
195 {
196 	int i;
197 
198 	for (i = 0; i < 16; i++)
199 		astate[i] = sbox[astate[i]];
200 }
201 
202 // Our code actually stores "columns" (in aes encryption terminology)
203 // of state in rows: first 4 elements are "row 0, col 0", "row 1, col 0".
204 // "row 2, col 0", "row 3, col 0". The fifth element is "row 0, col 1",
205 // and so on.
206 #define ASTATE(col,row) astate[(col)*4 + (row)]
207 
208 // The ShiftRows() function shifts the rows in the state to the left.
209 // Each row is shifted with different offset.
210 // Offset = Row number. So the first row is not shifted.
ShiftRows(unsigned astate[16])211 static void ShiftRows(unsigned astate[16])
212 {
213 	unsigned v;
214 
215 	// Rotate first row 1 columns to left
216 	v = ASTATE(0,1);
217 	ASTATE(0,1) = ASTATE(1,1);
218 	ASTATE(1,1) = ASTATE(2,1);
219 	ASTATE(2,1) = ASTATE(3,1);
220 	ASTATE(3,1) = v;
221 
222 	// Rotate second row 2 columns to left
223 	v = ASTATE(0,2); ASTATE(0,2) = ASTATE(2,2); ASTATE(2,2) = v;
224 	v = ASTATE(1,2); ASTATE(1,2) = ASTATE(3,2); ASTATE(3,2) = v;
225 
226 	// Rotate third row 3 columns to left
227 	v = ASTATE(3,3);
228 	ASTATE(3,3) = ASTATE(2,3);
229 	ASTATE(2,3) = ASTATE(1,3);
230 	ASTATE(1,3) = ASTATE(0,3);
231 	ASTATE(0,3) = v;
232 }
233 
234 // MixColumns function mixes the columns of the state matrix
MixColumns(unsigned astate[16])235 static void MixColumns(unsigned astate[16])
236 {
237 	int i;
238 
239 	for (i = 0; i < 16; i += 4) {
240 		unsigned a, b, c, d;
241 		unsigned x, y, z, t;
242 
243 		a = astate[i + 0];
244 		b = astate[i + 1];
245 		c = astate[i + 2];
246 		d = astate[i + 3];
247 		x = (a << 1) ^ b ^ (b << 1) ^ c ^ d;
248 		y = a ^ (b << 1) ^ c ^ (c << 1) ^ d;
249 		z = a ^ b ^ (c << 1) ^ d ^ (d << 1);
250 		t = a ^ (a << 1) ^ b ^ c ^ (d << 1);
251 		astate[i + 0] = x ^ ((-(int)(x >> 8)) & 0x11b);
252 		astate[i + 1] = y ^ ((-(int)(y >> 8)) & 0x11b);
253 		astate[i + 2] = z ^ ((-(int)(z >> 8)) & 0x11b);
254 		astate[i + 3] = t ^ ((-(int)(t >> 8)) & 0x11b);
255 	}
256 }
257 
258 // The SubBytes Function Substitutes the values in the
259 // state matrix with values in an S-box.
InvSubBytes(unsigned astate[16])260 static void InvSubBytes(unsigned astate[16])
261 {
262 	int i;
263 
264 	for (i = 0; i < 16; i++)
265 		astate[i] = rsbox[astate[i]];
266 }
267 
InvShiftRows(unsigned astate[16])268 static void InvShiftRows(unsigned astate[16])
269 {
270 	unsigned v;
271 
272 	// Rotate first row 1 columns to right
273 	v = ASTATE(3,1);
274 	ASTATE(3,1) = ASTATE(2,1);
275 	ASTATE(2,1) = ASTATE(1,1);
276 	ASTATE(1,1) = ASTATE(0,1);
277 	ASTATE(0,1) = v;
278 
279 	// Rotate second row 2 columns to right
280 	v = ASTATE(0,2); ASTATE(0,2) = ASTATE(2,2); ASTATE(2,2) = v;
281 	v = ASTATE(1,2); ASTATE(1,2) = ASTATE(3,2); ASTATE(3,2) = v;
282 
283 	// Rotate third row 3 columns to right
284 	v = ASTATE(0,3);
285 	ASTATE(0,3) = ASTATE(1,3);
286 	ASTATE(1,3) = ASTATE(2,3);
287 	ASTATE(2,3) = ASTATE(3,3);
288 	ASTATE(3,3) = v;
289 }
290 
Multiply(unsigned x)291 static ALWAYS_INLINE unsigned Multiply(unsigned x)
292 {
293 	unsigned y;
294 
295 	y = x >> 8;
296 	return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 255;
297 }
298 
299 // MixColumns function mixes the columns of the state matrix.
300 // The method used to multiply may be difficult to understand for the inexperienced.
301 // Please use the references to gain more information.
InvMixColumns(unsigned astate[16])302 static void InvMixColumns(unsigned astate[16])
303 {
304 	int i;
305 
306 	for (i = 0; i < 16; i += 4) {
307 		unsigned a, b, c, d;
308 		unsigned x, y, z, t;
309 
310 		a = astate[i + 0];
311 		b = astate[i + 1];
312 		c = astate[i + 2];
313 		d = astate[i + 3];
314 		x = (a << 1) ^ (a << 2) ^ (a << 3) ^ b ^ (b << 1) ^ (b << 3)
315 		/***/ ^ c ^ (c << 2) ^ (c << 3) ^ d ^ (d << 3);
316 		astate[i + 0] = Multiply(x);
317 		y = a ^ (a << 3) ^ (b << 1) ^ (b << 2) ^ (b << 3)
318 		/***/ ^ c ^ (c << 1) ^ (c << 3) ^ d ^ (d << 2) ^ (d << 3);
319 		astate[i + 1] = Multiply(y);
320 		z = a ^ (a << 2) ^ (a << 3) ^ b ^ (b << 3)
321 		/***/ ^ (c << 1) ^ (c << 2) ^ (c << 3) ^ d ^ (d << 1) ^ (d << 3);
322 		astate[i + 2] = Multiply(z);
323 		t = a ^ (a << 1) ^ (a << 3) ^ b ^ (b << 2) ^ (b << 3)
324 		/***/ ^ c ^ (c << 3) ^ (d << 1) ^ (d << 2) ^ (d << 3);
325 		astate[i + 3] = Multiply(t);
326 	}
327 }
328 
aes_encrypt_1(struct tls_aes * aes,unsigned astate[16])329 static void aes_encrypt_1(struct tls_aes *aes, unsigned astate[16])
330 {
331 	unsigned rounds = aes->rounds;
332 	const uint32_t *RoundKey = aes->key;
333 
334 	for (;;) {
335 		AddRoundKey(astate, RoundKey);
336 		RoundKey += 4;
337 		SubBytes(astate);
338 		ShiftRows(astate);
339 		if (--rounds == 0)
340 			break;
341 		MixColumns(astate);
342 	}
343 	AddRoundKey(astate, RoundKey);
344 }
345 
aes_setkey(struct tls_aes * aes,const void * key,unsigned key_len)346 void FAST_FUNC aes_setkey(struct tls_aes *aes, const void *key, unsigned key_len)
347 {
348 	aes->rounds = KeyExpansion(aes->key, key, key_len);
349 }
350 
aes_encrypt_one_block(struct tls_aes * aes,const void * data,void * dst)351 void FAST_FUNC aes_encrypt_one_block(struct tls_aes *aes, const void *data, void *dst)
352 {
353 	unsigned astate[16];
354 	unsigned i;
355 
356 	const uint8_t *pt = data;
357 	uint8_t *ct = dst;
358 
359 	for (i = 0; i < 16; i++)
360 		astate[i] = pt[i];
361 	aes_encrypt_1(aes, astate);
362 	for (i = 0; i < 16; i++)
363 		ct[i] = astate[i];
364 }
365 
aes_cbc_encrypt(struct tls_aes * aes,void * iv,const void * data,size_t len,void * dst)366 void FAST_FUNC aes_cbc_encrypt(struct tls_aes *aes, void *iv, const void *data, size_t len, void *dst)
367 {
368 	uint8_t iv2[16];
369 
370 	const uint8_t *pt = data;
371 	uint8_t *ct = dst;
372 
373 	memcpy(iv2, iv, 16);
374 	while (len > 0) {
375 		{
376 			/* almost aes_encrypt_one_block(rounds, RoundKey, pt, ct);
377 			 * but xor'ing of IV with plaintext[] is combined
378 			 * with plaintext[] -> astate[]
379 			 */
380 			int i;
381 			unsigned astate[16];
382 			for (i = 0; i < 16; i++)
383 				astate[i] = pt[i] ^ iv2[i];
384 			aes_encrypt_1(aes, astate);
385 			for (i = 0; i < 16; i++)
386 				iv2[i] = ct[i] = astate[i];
387 		}
388 		ct += 16;
389 		pt += 16;
390 		len -= 16;
391 	}
392 }
393 
aes_decrypt_1(struct tls_aes * aes,unsigned astate[16])394 static void aes_decrypt_1(struct tls_aes *aes, unsigned astate[16])
395 {
396 	unsigned rounds = aes->rounds;
397 	const uint32_t *RoundKey = aes->key;
398 
399 	RoundKey += rounds * 4;
400 	AddRoundKey(astate, RoundKey);
401 	for (;;) {
402 		InvShiftRows(astate);
403 		InvSubBytes(astate);
404 		RoundKey -= 4;
405 		AddRoundKey(astate, RoundKey);
406 		if (--rounds == 0)
407 			break;
408 		InvMixColumns(astate);
409 	}
410 }
411 
412 #if 0 //UNUSED
413 static void aes_decrypt_one_block(struct tls_aes *aes, const void *data, void *dst)
414 {
415 	unsigned rounds = aes->rounds;
416 	const uint32_t *RoundKey = aes->key;
417 	unsigned astate[16];
418 	unsigned i;
419 
420 	const uint8_t *ct = data;
421 	uint8_t *pt = dst;
422 
423 	for (i = 0; i < 16; i++)
424 		astate[i] = ct[i];
425 	aes_decrypt_1(aes, astate);
426 	for (i = 0; i < 16; i++)
427 		pt[i] = astate[i];
428 }
429 #endif
430 
aes_cbc_decrypt(struct tls_aes * aes,void * iv,const void * data,size_t len,void * dst)431 void FAST_FUNC aes_cbc_decrypt(struct tls_aes *aes, void *iv, const void *data, size_t len, void *dst)
432 {
433 	uint8_t iv2[16];
434 	uint8_t iv3[16];
435 	uint8_t *ivbuf;
436 	uint8_t *ivnext;
437 
438 	const uint8_t *ct = data;
439 	uint8_t *pt = dst;
440 
441 	ivbuf = memcpy(iv2, iv, 16);
442 	while (len) {
443 		ivnext = (ivbuf==iv2) ? iv3 : iv2;
444 		{
445 			/* almost aes_decrypt_one_block(rounds, RoundKey, ct, pt)
446 			 * but xor'ing of ivbuf is combined with astate[] -> plaintext[]
447 			 */
448 			int i;
449 			unsigned astate[16];
450 			for (i = 0; i < 16; i++)
451 				ivnext[i] = astate[i] = ct[i];
452 			aes_decrypt_1(aes, astate);
453 			for (i = 0; i < 16; i++)
454 				pt[i] = astate[i] ^ ivbuf[i];
455 		}
456 		ivbuf = ivnext;
457 		ct += 16;
458 		pt += 16;
459 		len -= 16;
460 	}
461 }
462