1 /*
2  * Copyright (C) 2018 Denys Vlasenko
3  *
4  * Licensed under GPLv2, see file LICENSE in this source tree.
5  */
6 
7 #include "tls.h"
8 
9 typedef uint8_t byte;
10 typedef uint32_t word32;
11 #define XMEMSET memset
12 #define XMEMCPY memcpy
13 
14 /* from wolfssl-3.15.3/wolfcrypt/src/aes.c */
15 
16 #ifdef UNUSED
FlattenSzInBits(byte * buf,word32 sz)17 static ALWAYS_INLINE void FlattenSzInBits(byte* buf, word32 sz)
18 {
19     /* Multiply the sz by 8 */
20 //bbox: these sizes are never even close to 2^32/8
21 //    word32 szHi = (sz >> (8*sizeof(sz) - 3));
22     sz <<= 3;
23 
24     /* copy over the words of the sz into the destination buffer */
25 //    buf[0] = (szHi >> 24) & 0xff;
26 //    buf[1] = (szHi >> 16) & 0xff;
27 //    buf[2] = (szHi >>  8) & 0xff;
28 //    buf[3] = szHi & 0xff;
29     *(uint32_t*)(buf + 0) = 0;
30 //    buf[4] = (sz >> 24) & 0xff;
31 //    buf[5] = (sz >> 16) & 0xff;
32 //    buf[6] = (sz >>  8) & 0xff;
33 //    buf[7] = sz & 0xff;
34     *(uint32_t*)(buf + 4) = SWAP_BE32(sz);
35 }
36 #endif
37 
RIGHTSHIFTX(byte * x)38 static void RIGHTSHIFTX(byte* x)
39 {
40 #define l ((unsigned long*)x)
41 #if 0
42 
43     // Generic byte-at-a-time algorithm
44     int i;
45     byte carryIn = (x[15] & 0x01) ? 0xE1 : 0;
46     for (i = 0; i < AES_BLOCK_SIZE; i++) {
47         byte carryOut = (x[i] << 7); // zero, or 0x80
48         x[i] = (x[i] >> 1) ^ carryIn;
49         carryIn = carryOut;
50     }
51 
52 #elif BB_BIG_ENDIAN
53 
54     // Big-endian can shift-right in larger than byte chunks
55     // (we use the fact that 'x' is long-aligned)
56     unsigned long carryIn = (x[15] & 0x01)
57         ? ((unsigned long)0xE1 << (LONG_BIT-8))
58         : 0;
59 # if ULONG_MAX <= 0xffffffff
60     int i;
61     for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
62         unsigned long carryOut = l[i] << (LONG_BIT-1); // zero, or 0x800..00
63         l[i] = (l[i] >> 1) ^ carryIn;
64         carryIn = carryOut;
65     }
66 # else
67     // 64-bit code: need to process only 2 words
68     unsigned long carryOut = l[0] << (LONG_BIT-1); // zero, or 0x800..00
69     l[0] = (l[0] >> 1) ^ carryIn;
70     l[1] = (l[1] >> 1) ^ carryOut;
71 # endif
72 
73 #else /* LITTLE_ENDIAN */
74 
75     // In order to use word-sized ops, little-endian needs to byteswap.
76     // On x86, code size increase is ~10 bytes compared to byte-by-byte.
77     unsigned long carryIn = (x[15] & 0x01)
78         ? ((unsigned long)0xE1 << (LONG_BIT-8))
79         : 0;
80 # if ULONG_MAX <= 0xffffffff
81     int i;
82     for (i = 0; i < AES_BLOCK_SIZE/sizeof(long); i++) {
83         unsigned long ti = SWAP_BE32(l[i]);
84         unsigned long carryOut = ti << (LONG_BIT-1); // zero, or 0x800..00
85         ti = (ti >> 1) ^ carryIn;
86         l[i] = SWAP_BE32(ti);
87         carryIn = carryOut;
88     }
89 # else
90     // 64-bit code: need to process only 2 words
91     unsigned long tt = SWAP_BE64(l[0]);
92     unsigned long carryOut = tt << (LONG_BIT-1); // zero, or 0x800..00
93     tt = (tt >> 1) ^ carryIn; l[0] = SWAP_BE64(tt);
94     tt = SWAP_BE64(l[1]);
95     tt = (tt >> 1) ^ carryOut; l[1] = SWAP_BE64(tt);
96 # endif
97 
98 #endif /* LITTLE_ENDIAN */
99 #undef l
100 }
101 
102 // Caller guarantees X is aligned
GMULT(byte * X,byte * Y)103 static void GMULT(byte* X, byte* Y)
104 {
105     byte Z[AES_BLOCK_SIZE] ALIGNED_long;
106     //byte V[AES_BLOCK_SIZE] ALIGNED_long;
107     int i;
108 
109     XMEMSET(Z, 0, AES_BLOCK_SIZE);
110     //XMEMCPY(V, X, AES_BLOCK_SIZE);
111     for (i = 0; i < AES_BLOCK_SIZE; i++) {
112         uint32_t y = 0x800000 | Y[i];
113         for (;;) { // for every bit in Y[i], from msb to lsb
114             if (y & 0x80) {
115                 xorbuf_aligned_AES_BLOCK_SIZE(Z, X); // was V, not X
116             }
117             RIGHTSHIFTX(X); // was V, not X
118             y = y << 1;
119             if ((int32_t)y < 0) // if bit 0x80000000 set = if 8 iterations done
120                 break;
121         }
122     }
123     XMEMCPY(X, Z, AES_BLOCK_SIZE);
124 }
125 
126 //bbox:
127 // for TLS AES-GCM, a (which is AAD) is always 13 bytes long, and bbox code provides
128 // extra 3 zeroed bytes, making it a[16], or a[AES_BLOCK_SIZE].
129 // Resulting auth tag in s[] is also always AES_BLOCK_SIZE bytes.
130 //
131 // This allows some simplifications.
132 #define aSz 13
133 #define sSz AES_BLOCK_SIZE
aesgcm_GHASH(byte * h,const byte * a,const byte * c,unsigned cSz,byte * s)134 void FAST_FUNC aesgcm_GHASH(byte* h,
135     const byte* a, //unsigned aSz,
136     const byte* c, unsigned cSz,
137     byte* s //, unsigned sSz
138 )
139 {
140     byte x[AES_BLOCK_SIZE] ALIGNED_long;
141 //    byte scratch[AES_BLOCK_SIZE] ALIGNED_long;
142     unsigned blocks, partial;
143     //was: byte* h = aes->H;
144 
145     //XMEMSET(x, 0, AES_BLOCK_SIZE);
146 
147     /* Hash in A, the Additional Authentication Data */
148 //    if (aSz != 0 && a != NULL) {
149 //        blocks = aSz / AES_BLOCK_SIZE;
150 //        partial = aSz % AES_BLOCK_SIZE;
151 //        while (blocks--) {
152             //xorbuf(x, a, AES_BLOCK_SIZE);
153             XMEMCPY(x, a, AES_BLOCK_SIZE);// memcpy(x,a) = memset(x,0)+xorbuf(x,a)
154             GMULT(x, h);
155 //            a += AES_BLOCK_SIZE;
156 //        }
157 //        if (partial != 0) {
158 //            XMEMSET(scratch, 0, AES_BLOCK_SIZE);
159 //            XMEMCPY(scratch, a, partial);
160 //            xorbuf(x, scratch, AES_BLOCK_SIZE);
161 //            GMULT(x, h);
162 //        }
163 //    }
164 
165     /* Hash in C, the Ciphertext */
166     if (cSz != 0 /*&& c != NULL*/) {
167         blocks = cSz / AES_BLOCK_SIZE;
168         partial = cSz % AES_BLOCK_SIZE;
169         while (blocks--) {
170             if (BB_UNALIGNED_MEMACCESS_OK) // c is not guaranteed to be aligned
171                 xorbuf_aligned_AES_BLOCK_SIZE(x, c);
172             else
173                 xorbuf(x, c, AES_BLOCK_SIZE);
174             GMULT(x, h);
175             c += AES_BLOCK_SIZE;
176         }
177         if (partial != 0) {
178             //XMEMSET(scratch, 0, AES_BLOCK_SIZE);
179             //XMEMCPY(scratch, c, partial);
180             //xorbuf(x, scratch, AES_BLOCK_SIZE);
181             xorbuf(x, c, partial);//same result as above
182             GMULT(x, h);
183         }
184     }
185 
186     /* Hash in the lengths of A and C in bits */
187     //FlattenSzInBits(&scratch[0], aSz);
188     //FlattenSzInBits(&scratch[8], cSz);
189     //xorbuf_aligned_AES_BLOCK_SIZE(x, scratch);
190     // simpler:
191 #define P32(v) ((uint32_t*)v)
192   //P32(x)[0] ^= 0;
193     P32(x)[1] ^= SWAP_BE32(aSz * 8);
194   //P32(x)[2] ^= 0;
195     P32(x)[3] ^= SWAP_BE32(cSz * 8);
196 #undef P32
197 
198     GMULT(x, h);
199 
200     /* Copy the result into s. */
201     XMEMCPY(s, x, sSz);
202 }
203