1 /*
2  *
3  *      Copyright (c) 1993 Ning and David Mosberger.
4 
5  This is based on code originally written by Bas Laarhoven (bas@vimec.nl)
6  and David L. Brown, Jr., and incorporates improvements suggested by
7  Kai Harrekilde-Petersen.
8 
9  This program is free software; you can redistribute it and/or
10  modify it under the terms of the GNU General Public License as
11  published by the Free Software Foundation; either version 2, or (at
12  your option) any later version.
13 
14  This program is distributed in the hope that it will be useful, but
15  WITHOUT ANY WARRANTY; without even the implied warranty of
16  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  General Public License for more details.
18 
19  You should have received a copy of the GNU General Public License
20  along with this program; see the file COPYING.  If not, write to
21  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
22  USA.
23 
24  *
25  * $Source: /homes/cvs/ftape-stacked/ftape/lowlevel/ftape-ecc.c,v $
26  * $Revision: 1.3 $
27  * $Date: 1997/10/05 19:18:10 $
28  *
29  *      This file contains the Reed-Solomon error correction code
30  *      for the QIC-40/80 floppy-tape driver for Linux.
31  */
32 
33 #include <linux/ftape.h>
34 
35 #include "../lowlevel/ftape-tracing.h"
36 #include "../lowlevel/ftape-ecc.h"
37 
38 /* Machines that are big-endian should define macro BIG_ENDIAN.
39  * Unfortunately, there doesn't appear to be a standard include file
40  * that works for all OSs.
41  */
42 
43 #if defined(__sparc__) || defined(__hppa)
44 #define BIG_ENDIAN
45 #endif				/* __sparc__ || __hppa */
46 
47 #if defined(__mips__)
48 #error Find a smart way to determine the Endianness of the MIPS CPU
49 #endif
50 
51 /* Notice: to minimize the potential for confusion, we use r to
52  *         denote the independent variable of the polynomials in the
53  *         Galois Field GF(2^8).  We reserve x for polynomials that
54  *         that have coefficients in GF(2^8).
55  *
56  * The Galois Field in which coefficient arithmetic is performed are
57  * the polynomials over Z_2 (i.e., 0 and 1) modulo the irreducible
58  * polynomial f(r), where f(r)=r^8 + r^7 + r^2 + r + 1.  A polynomial
59  * is represented as a byte with the MSB as the coefficient of r^7 and
60  * the LSB as the coefficient of r^0.  For example, the binary
61  * representation of f(x) is 0x187 (of course, this doesn't fit into 8
62  * bits).  In this field, the polynomial r is a primitive element.
63  * That is, r^i with i in 0,...,255 enumerates all elements in the
64  * field.
65  *
66  * The generator polynomial for the QIC-80 ECC is
67  *
68  *      g(x) = x^3 + r^105*x^2 + r^105*x + 1
69  *
70  * which can be factored into:
71  *
72  *      g(x) = (x-r^-1)(x-r^0)(x-r^1)
73  *
74  * the byte representation of the coefficients are:
75  *
76  *      r^105 = 0xc0
77  *      r^-1  = 0xc3
78  *      r^0   = 0x01
79  *      r^1   = 0x02
80  *
81  * Notice that r^-1 = r^254 as exponent arithmetic is performed
82  * modulo 2^8-1 = 255.
83  *
84  * For more information on Galois Fields and Reed-Solomon codes, refer
85  * to any good book.  I found _An Introduction to Error Correcting
86  * Codes with Applications_ by S. A. Vanstone and P. C. van Oorschot
87  * to be a good introduction into the former.  _CODING THEORY: The
88  * Essentials_ I found very useful for its concise description of
89  * Reed-Solomon encoding/decoding.
90  *
91  */
92 
93 typedef __u8 Matrix[3][3];
94 
95 /*
96  * gfpow[] is defined such that gfpow[i] returns r^i if
97  * i is in the range [0..255].
98  */
99 static const __u8 gfpow[] =
100 {
101 	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
102 	0x87, 0x89, 0x95, 0xad, 0xdd, 0x3d, 0x7a, 0xf4,
103 	0x6f, 0xde, 0x3b, 0x76, 0xec, 0x5f, 0xbe, 0xfb,
104 	0x71, 0xe2, 0x43, 0x86, 0x8b, 0x91, 0xa5, 0xcd,
105 	0x1d, 0x3a, 0x74, 0xe8, 0x57, 0xae, 0xdb, 0x31,
106 	0x62, 0xc4, 0x0f, 0x1e, 0x3c, 0x78, 0xf0, 0x67,
107 	0xce, 0x1b, 0x36, 0x6c, 0xd8, 0x37, 0x6e, 0xdc,
108 	0x3f, 0x7e, 0xfc, 0x7f, 0xfe, 0x7b, 0xf6, 0x6b,
109 	0xd6, 0x2b, 0x56, 0xac, 0xdf, 0x39, 0x72, 0xe4,
110 	0x4f, 0x9e, 0xbb, 0xf1, 0x65, 0xca, 0x13, 0x26,
111 	0x4c, 0x98, 0xb7, 0xe9, 0x55, 0xaa, 0xd3, 0x21,
112 	0x42, 0x84, 0x8f, 0x99, 0xb5, 0xed, 0x5d, 0xba,
113 	0xf3, 0x61, 0xc2, 0x03, 0x06, 0x0c, 0x18, 0x30,
114 	0x60, 0xc0, 0x07, 0x0e, 0x1c, 0x38, 0x70, 0xe0,
115 	0x47, 0x8e, 0x9b, 0xb1, 0xe5, 0x4d, 0x9a, 0xb3,
116 	0xe1, 0x45, 0x8a, 0x93, 0xa1, 0xc5, 0x0d, 0x1a,
117 	0x34, 0x68, 0xd0, 0x27, 0x4e, 0x9c, 0xbf, 0xf9,
118 	0x75, 0xea, 0x53, 0xa6, 0xcb, 0x11, 0x22, 0x44,
119 	0x88, 0x97, 0xa9, 0xd5, 0x2d, 0x5a, 0xb4, 0xef,
120 	0x59, 0xb2, 0xe3, 0x41, 0x82, 0x83, 0x81, 0x85,
121 	0x8d, 0x9d, 0xbd, 0xfd, 0x7d, 0xfa, 0x73, 0xe6,
122 	0x4b, 0x96, 0xab, 0xd1, 0x25, 0x4a, 0x94, 0xaf,
123 	0xd9, 0x35, 0x6a, 0xd4, 0x2f, 0x5e, 0xbc, 0xff,
124 	0x79, 0xf2, 0x63, 0xc6, 0x0b, 0x16, 0x2c, 0x58,
125 	0xb0, 0xe7, 0x49, 0x92, 0xa3, 0xc1, 0x05, 0x0a,
126 	0x14, 0x28, 0x50, 0xa0, 0xc7, 0x09, 0x12, 0x24,
127 	0x48, 0x90, 0xa7, 0xc9, 0x15, 0x2a, 0x54, 0xa8,
128 	0xd7, 0x29, 0x52, 0xa4, 0xcf, 0x19, 0x32, 0x64,
129 	0xc8, 0x17, 0x2e, 0x5c, 0xb8, 0xf7, 0x69, 0xd2,
130 	0x23, 0x46, 0x8c, 0x9f, 0xb9, 0xf5, 0x6d, 0xda,
131 	0x33, 0x66, 0xcc, 0x1f, 0x3e, 0x7c, 0xf8, 0x77,
132 	0xee, 0x5b, 0xb6, 0xeb, 0x51, 0xa2, 0xc3, 0x01
133 };
134 
135 /*
136  * This is a log table.  That is, gflog[r^i] returns i (modulo f(r)).
137  * gflog[0] is undefined and the first element is therefore not valid.
138  */
139 static const __u8 gflog[256] =
140 {
141 	0xff, 0x00, 0x01, 0x63, 0x02, 0xc6, 0x64, 0x6a,
142 	0x03, 0xcd, 0xc7, 0xbc, 0x65, 0x7e, 0x6b, 0x2a,
143 	0x04, 0x8d, 0xce, 0x4e, 0xc8, 0xd4, 0xbd, 0xe1,
144 	0x66, 0xdd, 0x7f, 0x31, 0x6c, 0x20, 0x2b, 0xf3,
145 	0x05, 0x57, 0x8e, 0xe8, 0xcf, 0xac, 0x4f, 0x83,
146 	0xc9, 0xd9, 0xd5, 0x41, 0xbe, 0x94, 0xe2, 0xb4,
147 	0x67, 0x27, 0xde, 0xf0, 0x80, 0xb1, 0x32, 0x35,
148 	0x6d, 0x45, 0x21, 0x12, 0x2c, 0x0d, 0xf4, 0x38,
149 	0x06, 0x9b, 0x58, 0x1a, 0x8f, 0x79, 0xe9, 0x70,
150 	0xd0, 0xc2, 0xad, 0xa8, 0x50, 0x75, 0x84, 0x48,
151 	0xca, 0xfc, 0xda, 0x8a, 0xd6, 0x54, 0x42, 0x24,
152 	0xbf, 0x98, 0x95, 0xf9, 0xe3, 0x5e, 0xb5, 0x15,
153 	0x68, 0x61, 0x28, 0xba, 0xdf, 0x4c, 0xf1, 0x2f,
154 	0x81, 0xe6, 0xb2, 0x3f, 0x33, 0xee, 0x36, 0x10,
155 	0x6e, 0x18, 0x46, 0xa6, 0x22, 0x88, 0x13, 0xf7,
156 	0x2d, 0xb8, 0x0e, 0x3d, 0xf5, 0xa4, 0x39, 0x3b,
157 	0x07, 0x9e, 0x9c, 0x9d, 0x59, 0x9f, 0x1b, 0x08,
158 	0x90, 0x09, 0x7a, 0x1c, 0xea, 0xa0, 0x71, 0x5a,
159 	0xd1, 0x1d, 0xc3, 0x7b, 0xae, 0x0a, 0xa9, 0x91,
160 	0x51, 0x5b, 0x76, 0x72, 0x85, 0xa1, 0x49, 0xeb,
161 	0xcb, 0x7c, 0xfd, 0xc4, 0xdb, 0x1e, 0x8b, 0xd2,
162 	0xd7, 0x92, 0x55, 0xaa, 0x43, 0x0b, 0x25, 0xaf,
163 	0xc0, 0x73, 0x99, 0x77, 0x96, 0x5c, 0xfa, 0x52,
164 	0xe4, 0xec, 0x5f, 0x4a, 0xb6, 0xa2, 0x16, 0x86,
165 	0x69, 0xc5, 0x62, 0xfe, 0x29, 0x7d, 0xbb, 0xcc,
166 	0xe0, 0xd3, 0x4d, 0x8c, 0xf2, 0x1f, 0x30, 0xdc,
167 	0x82, 0xab, 0xe7, 0x56, 0xb3, 0x93, 0x40, 0xd8,
168 	0x34, 0xb0, 0xef, 0x26, 0x37, 0x0c, 0x11, 0x44,
169 	0x6f, 0x78, 0x19, 0x9a, 0x47, 0x74, 0xa7, 0xc1,
170 	0x23, 0x53, 0x89, 0xfb, 0x14, 0x5d, 0xf8, 0x97,
171 	0x2e, 0x4b, 0xb9, 0x60, 0x0f, 0xed, 0x3e, 0xe5,
172 	0xf6, 0x87, 0xa5, 0x17, 0x3a, 0xa3, 0x3c, 0xb7
173 };
174 
175 /* This is a multiplication table for the factor 0xc0 (i.e., r^105 (mod f(r)).
176  * gfmul_c0[f] returns r^105 * f(r) (modulo f(r)).
177  */
178 static const __u8 gfmul_c0[256] =
179 {
180 	0x00, 0xc0, 0x07, 0xc7, 0x0e, 0xce, 0x09, 0xc9,
181 	0x1c, 0xdc, 0x1b, 0xdb, 0x12, 0xd2, 0x15, 0xd5,
182 	0x38, 0xf8, 0x3f, 0xff, 0x36, 0xf6, 0x31, 0xf1,
183 	0x24, 0xe4, 0x23, 0xe3, 0x2a, 0xea, 0x2d, 0xed,
184 	0x70, 0xb0, 0x77, 0xb7, 0x7e, 0xbe, 0x79, 0xb9,
185 	0x6c, 0xac, 0x6b, 0xab, 0x62, 0xa2, 0x65, 0xa5,
186 	0x48, 0x88, 0x4f, 0x8f, 0x46, 0x86, 0x41, 0x81,
187 	0x54, 0x94, 0x53, 0x93, 0x5a, 0x9a, 0x5d, 0x9d,
188 	0xe0, 0x20, 0xe7, 0x27, 0xee, 0x2e, 0xe9, 0x29,
189 	0xfc, 0x3c, 0xfb, 0x3b, 0xf2, 0x32, 0xf5, 0x35,
190 	0xd8, 0x18, 0xdf, 0x1f, 0xd6, 0x16, 0xd1, 0x11,
191 	0xc4, 0x04, 0xc3, 0x03, 0xca, 0x0a, 0xcd, 0x0d,
192 	0x90, 0x50, 0x97, 0x57, 0x9e, 0x5e, 0x99, 0x59,
193 	0x8c, 0x4c, 0x8b, 0x4b, 0x82, 0x42, 0x85, 0x45,
194 	0xa8, 0x68, 0xaf, 0x6f, 0xa6, 0x66, 0xa1, 0x61,
195 	0xb4, 0x74, 0xb3, 0x73, 0xba, 0x7a, 0xbd, 0x7d,
196 	0x47, 0x87, 0x40, 0x80, 0x49, 0x89, 0x4e, 0x8e,
197 	0x5b, 0x9b, 0x5c, 0x9c, 0x55, 0x95, 0x52, 0x92,
198 	0x7f, 0xbf, 0x78, 0xb8, 0x71, 0xb1, 0x76, 0xb6,
199 	0x63, 0xa3, 0x64, 0xa4, 0x6d, 0xad, 0x6a, 0xaa,
200 	0x37, 0xf7, 0x30, 0xf0, 0x39, 0xf9, 0x3e, 0xfe,
201 	0x2b, 0xeb, 0x2c, 0xec, 0x25, 0xe5, 0x22, 0xe2,
202 	0x0f, 0xcf, 0x08, 0xc8, 0x01, 0xc1, 0x06, 0xc6,
203 	0x13, 0xd3, 0x14, 0xd4, 0x1d, 0xdd, 0x1a, 0xda,
204 	0xa7, 0x67, 0xa0, 0x60, 0xa9, 0x69, 0xae, 0x6e,
205 	0xbb, 0x7b, 0xbc, 0x7c, 0xb5, 0x75, 0xb2, 0x72,
206 	0x9f, 0x5f, 0x98, 0x58, 0x91, 0x51, 0x96, 0x56,
207 	0x83, 0x43, 0x84, 0x44, 0x8d, 0x4d, 0x8a, 0x4a,
208 	0xd7, 0x17, 0xd0, 0x10, 0xd9, 0x19, 0xde, 0x1e,
209 	0xcb, 0x0b, 0xcc, 0x0c, 0xc5, 0x05, 0xc2, 0x02,
210 	0xef, 0x2f, 0xe8, 0x28, 0xe1, 0x21, 0xe6, 0x26,
211 	0xf3, 0x33, 0xf4, 0x34, 0xfd, 0x3d, 0xfa, 0x3a
212 };
213 
214 
215 /* Returns V modulo 255 provided V is in the range -255,-254,...,509.
216  */
mod255(int v)217 static inline __u8 mod255(int v)
218 {
219 	if (v > 0) {
220 		if (v < 255) {
221 			return v;
222 		} else {
223 			return v - 255;
224 		}
225 	} else {
226 		return v + 255;
227 	}
228 }
229 
230 
231 /* Add two numbers in the field.  Addition in this field is equivalent
232  * to a bit-wise exclusive OR operation---subtraction is therefore
233  * identical to addition.
234  */
gfadd(__u8 a,__u8 b)235 static inline __u8 gfadd(__u8 a, __u8 b)
236 {
237 	return a ^ b;
238 }
239 
240 
241 /* Add two vectors of numbers in the field.  Each byte in A and B gets
242  * added individually.
243  */
gfadd_long(unsigned long a,unsigned long b)244 static inline unsigned long gfadd_long(unsigned long a, unsigned long b)
245 {
246 	return a ^ b;
247 }
248 
249 
250 /* Multiply two numbers in the field:
251  */
gfmul(__u8 a,__u8 b)252 static inline __u8 gfmul(__u8 a, __u8 b)
253 {
254 	if (a && b) {
255 		return gfpow[mod255(gflog[a] + gflog[b])];
256 	} else {
257 		return 0;
258 	}
259 }
260 
261 
262 /* Just like gfmul, except we have already looked up the log of the
263  * second number.
264  */
gfmul_exp(__u8 a,int b)265 static inline __u8 gfmul_exp(__u8 a, int b)
266 {
267 	if (a) {
268 		return gfpow[mod255(gflog[a] + b)];
269 	} else {
270 		return 0;
271 	}
272 }
273 
274 
275 /* Just like gfmul_exp, except that A is a vector of numbers.  That
276  * is, each byte in A gets multiplied by gfpow[mod255(B)].
277  */
gfmul_exp_long(unsigned long a,int b)278 static inline unsigned long gfmul_exp_long(unsigned long a, int b)
279 {
280 	__u8 t;
281 
282 	if (sizeof(long) == 4) {
283 		return (
284 		((t = (__u32)a >> 24 & 0xff) ?
285 		 (((__u32) gfpow[mod255(gflog[t] + b)]) << 24) : 0) |
286 		((t = (__u32)a >> 16 & 0xff) ?
287 		 (((__u32) gfpow[mod255(gflog[t] + b)]) << 16) : 0) |
288 		((t = (__u32)a >> 8 & 0xff) ?
289 		 (((__u32) gfpow[mod255(gflog[t] + b)]) << 8) : 0) |
290 		((t = (__u32)a >> 0 & 0xff) ?
291 		 (((__u32) gfpow[mod255(gflog[t] + b)]) << 0) : 0));
292 	} else if (sizeof(long) == 8) {
293 		return (
294 		((t = (__u64)a >> 56 & 0xff) ?
295 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 56) : 0) |
296 		((t = (__u64)a >> 48 & 0xff) ?
297 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 48) : 0) |
298 		((t = (__u64)a >> 40 & 0xff) ?
299 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 40) : 0) |
300 		((t = (__u64)a >> 32 & 0xff) ?
301 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 32) : 0) |
302 		((t = (__u64)a >> 24 & 0xff) ?
303 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 24) : 0) |
304 		((t = (__u64)a >> 16 & 0xff) ?
305 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 16) : 0) |
306 		((t = (__u64)a >> 8 & 0xff) ?
307 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 8) : 0) |
308 		((t = (__u64)a >> 0 & 0xff) ?
309 		 (((__u64) gfpow[mod255(gflog[t] + b)]) << 0) : 0));
310 	} else {
311 		TRACE_FUN(ft_t_any);
312 		TRACE_ABORT(-1, ft_t_err, "Error: size of long is %d bytes",
313 			    (int)sizeof(long));
314 	}
315 }
316 
317 
318 /* Divide two numbers in the field.  Returns a/b (modulo f(x)).
319  */
gfdiv(__u8 a,__u8 b)320 static inline __u8 gfdiv(__u8 a, __u8 b)
321 {
322 	if (!b) {
323 		TRACE_FUN(ft_t_any);
324 		TRACE_ABORT(0xff, ft_t_bug, "Error: division by zero");
325 	} else if (a == 0) {
326 		return 0;
327 	} else {
328 		return gfpow[mod255(gflog[a] - gflog[b])];
329 	}
330 }
331 
332 
333 /* The following functions return the inverse of the matrix of the
334  * linear system that needs to be solved to determine the error
335  * magnitudes.  The first deals with matrices of rank 3, while the
336  * second deals with matrices of rank 2.  The error indices are passed
337  * in arguments L0,..,L2 (0=first sector, 31=last sector).  The error
338  * indices must be sorted in ascending order, i.e., L0<L1<L2.
339  *
340  * The linear system that needs to be solved for the error magnitudes
341  * is A * b = s, where s is the known vector of syndromes, b is the
342  * vector of error magnitudes and A in the ORDER=3 case:
343  *
344  *    A_3 = {{1/r^L[0], 1/r^L[1], 1/r^L[2]},
345  *          {        1,        1,        1},
346  *          { r^L[0], r^L[1], r^L[2]}}
347  */
gfinv3(__u8 l0,__u8 l1,__u8 l2,Matrix Ainv)348 static inline int gfinv3(__u8 l0,
349 			 __u8 l1,
350 			 __u8 l2,
351 			 Matrix Ainv)
352 {
353 	__u8 det;
354 	__u8 t20, t10, t21, t12, t01, t02;
355 	int log_det;
356 
357 	/* compute some intermediate results: */
358 	t20 = gfpow[l2 - l0];	        /* t20 = r^l2/r^l0 */
359 	t10 = gfpow[l1 - l0];	        /* t10 = r^l1/r^l0 */
360 	t21 = gfpow[l2 - l1];	        /* t21 = r^l2/r^l1 */
361 	t12 = gfpow[l1 - l2 + 255];	/* t12 = r^l1/r^l2 */
362 	t01 = gfpow[l0 - l1 + 255];	/* t01 = r^l0/r^l1 */
363 	t02 = gfpow[l0 - l2 + 255];	/* t02 = r^l0/r^l2 */
364 	/* Calculate the determinant of matrix A_3^-1 (sometimes
365 	 * called the Vandermonde determinant):
366 	 */
367 	det = gfadd(t20, gfadd(t10, gfadd(t21, gfadd(t12, gfadd(t01, t02)))));
368 	if (!det) {
369 		TRACE_FUN(ft_t_any);
370 		TRACE_ABORT(0, ft_t_err,
371 			   "Inversion failed (3 CRC errors, >0 CRC failures)");
372 	}
373 	log_det = 255 - gflog[det];
374 
375 	/* Now, calculate all of the coefficients:
376 	 */
377 	Ainv[0][0]= gfmul_exp(gfadd(gfpow[l1], gfpow[l2]), log_det);
378 	Ainv[0][1]= gfmul_exp(gfadd(t21, t12), log_det);
379 	Ainv[0][2]= gfmul_exp(gfadd(gfpow[255 - l1], gfpow[255 - l2]),log_det);
380 
381 	Ainv[1][0]= gfmul_exp(gfadd(gfpow[l0], gfpow[l2]), log_det);
382 	Ainv[1][1]= gfmul_exp(gfadd(t20, t02), log_det);
383 	Ainv[1][2]= gfmul_exp(gfadd(gfpow[255 - l0], gfpow[255 - l2]),log_det);
384 
385 	Ainv[2][0]= gfmul_exp(gfadd(gfpow[l0], gfpow[l1]), log_det);
386 	Ainv[2][1]= gfmul_exp(gfadd(t10, t01), log_det);
387 	Ainv[2][2]= gfmul_exp(gfadd(gfpow[255 - l0], gfpow[255 - l1]),log_det);
388 
389 	return 1;
390 }
391 
392 
gfinv2(__u8 l0,__u8 l1,Matrix Ainv)393 static inline int gfinv2(__u8 l0, __u8 l1, Matrix Ainv)
394 {
395 	__u8 det;
396 	__u8 t1, t2;
397 	int log_det;
398 
399 	t1 = gfpow[255 - l0];
400 	t2 = gfpow[255 - l1];
401 	det = gfadd(t1, t2);
402 	if (!det) {
403 		TRACE_FUN(ft_t_any);
404 		TRACE_ABORT(0, ft_t_err,
405 			   "Inversion failed (2 CRC errors, >0 CRC failures)");
406 	}
407 	log_det = 255 - gflog[det];
408 
409 	/* Now, calculate all of the coefficients:
410 	 */
411 	Ainv[0][0] = Ainv[1][0] = gfpow[log_det];
412 
413 	Ainv[0][1] = gfmul_exp(t2, log_det);
414 	Ainv[1][1] = gfmul_exp(t1, log_det);
415 
416 	return 1;
417 }
418 
419 
420 /* Multiply matrix A by vector S and return result in vector B.  M is
421  * assumed to be of order NxN, S and B of order Nx1.
422  */
gfmat_mul(int n,Matrix A,__u8 * s,__u8 * b)423 static inline void gfmat_mul(int n, Matrix A,
424 			     __u8 *s, __u8 *b)
425 {
426 	int i, j;
427 	__u8 dot_prod;
428 
429 	for (i = 0; i < n; ++i) {
430 		dot_prod = 0;
431 		for (j = 0; j < n; ++j) {
432 			dot_prod = gfadd(dot_prod, gfmul(A[i][j], s[j]));
433 		}
434 		b[i] = dot_prod;
435 	}
436 }
437 
438 
439 
440 /* The Reed Solomon ECC codes are computed over the N-th byte of each
441  * block, where N=SECTOR_SIZE.  There are up to 29 blocks of data, and
442  * 3 blocks of ECC.  The blocks are stored contiguously in memory.  A
443  * segment, consequently, is assumed to have at least 4 blocks: one or
444  * more data blocks plus three ECC blocks.
445  *
446  * Notice: In QIC-80 speak, a CRC error is a sector with an incorrect
447  *         CRC.  A CRC failure is a sector with incorrect data, but
448  *         a valid CRC.  In the error control literature, the former
449  *         is usually called "erasure", the latter "error."
450  */
451 /* Compute the parity bytes for C columns of data, where C is the
452  * number of bytes that fit into a long integer.  We use a linear
453  * feed-back register to do this.  The parity bytes P[0], P[STRIDE],
454  * P[2*STRIDE] are computed such that:
455  *
456  *              x^k * p(x) + m(x) = 0 (modulo g(x))
457  *
458  * where k = NBLOCKS,
459  *       p(x) = P[0] + P[STRIDE]*x + P[2*STRIDE]*x^2, and
460  *       m(x) = sum_{i=0}^k m_i*x^i.
461  *       m_i = DATA[i*SECTOR_SIZE]
462  */
set_parity(unsigned long * data,int nblocks,unsigned long * p,int stride)463 static inline void set_parity(unsigned long *data,
464 			      int nblocks,
465 			      unsigned long *p,
466 			      int stride)
467 {
468 	unsigned long p0, p1, p2, t1, t2, *end;
469 
470 	end = data + nblocks * (FT_SECTOR_SIZE / sizeof(long));
471 	p0 = p1 = p2 = 0;
472 	while (data < end) {
473 		/* The new parity bytes p0_i, p1_i, p2_i are computed
474 		 * from the old values p0_{i-1}, p1_{i-1}, p2_{i-1}
475 		 * recursively as:
476 		 *
477 		 *        p0_i = p1_{i-1} + r^105 * (m_{i-1} - p0_{i-1})
478 		 *        p1_i = p2_{i-1} + r^105 * (m_{i-1} - p0_{i-1})
479 		 *        p2_i =                    (m_{i-1} - p0_{i-1})
480 		 *
481 		 * With the initial condition: p0_0 = p1_0 = p2_0 = 0.
482 		 */
483 		t1 = gfadd_long(*data, p0);
484 		/*
485 		 * Multiply each byte in t1 by 0xc0:
486 		 */
487 		if (sizeof(long) == 4) {
488 			t2= (((__u32) gfmul_c0[(__u32)t1 >> 24 & 0xff]) << 24 |
489 			     ((__u32) gfmul_c0[(__u32)t1 >> 16 & 0xff]) << 16 |
490 			     ((__u32) gfmul_c0[(__u32)t1 >>  8 & 0xff]) <<  8 |
491 			     ((__u32) gfmul_c0[(__u32)t1 >>  0 & 0xff]) <<  0);
492 		} else if (sizeof(long) == 8) {
493 			t2= (((__u64) gfmul_c0[(__u64)t1 >> 56 & 0xff]) << 56 |
494 			     ((__u64) gfmul_c0[(__u64)t1 >> 48 & 0xff]) << 48 |
495 			     ((__u64) gfmul_c0[(__u64)t1 >> 40 & 0xff]) << 40 |
496 			     ((__u64) gfmul_c0[(__u64)t1 >> 32 & 0xff]) << 32 |
497 			     ((__u64) gfmul_c0[(__u64)t1 >> 24 & 0xff]) << 24 |
498 			     ((__u64) gfmul_c0[(__u64)t1 >> 16 & 0xff]) << 16 |
499 			     ((__u64) gfmul_c0[(__u64)t1 >>  8 & 0xff]) <<  8 |
500 			     ((__u64) gfmul_c0[(__u64)t1 >>  0 & 0xff]) <<  0);
501 		} else {
502 			TRACE_FUN(ft_t_any);
503 			TRACE(ft_t_err, "Error: long is of size %d",
504 			      (int) sizeof(long));
505 			TRACE_EXIT;
506 		}
507 		p0 = gfadd_long(t2, p1);
508 		p1 = gfadd_long(t2, p2);
509 		p2 = t1;
510 		data += FT_SECTOR_SIZE / sizeof(long);
511 	}
512 	*p = p0;
513 	p += stride;
514 	*p = p1;
515 	p += stride;
516 	*p = p2;
517 	return;
518 }
519 
520 
521 /* Compute the 3 syndrome values.  DATA should point to the first byte
522  * of the column for which the syndromes are desired.  The syndromes
523  * are computed over the first NBLOCKS of rows.  The three bytes will
524  * be placed in S[0], S[1], and S[2].
525  *
526  * S[i] is the value of the "message" polynomial m(x) evaluated at the
527  * i-th root of the generator polynomial g(x).
528  *
529  * As g(x)=(x-r^-1)(x-1)(x-r^1) we evaluate the message polynomial at
530  * x=r^-1 to get S[0], at x=r^0=1 to get S[1], and at x=r to get S[2].
531  * This could be done directly and efficiently via the Horner scheme.
532  * However, it would require multiplication tables for the factors
533  * r^-1 (0xc3) and r (0x02).  The following scheme does not require
534  * any multiplication tables beyond what's needed for set_parity()
535  * anyway and is slightly faster if there are no errors and slightly
536  * slower if there are errors.  The latter is hopefully the infrequent
537  * case.
538  *
539  * To understand the alternative algorithm, notice that set_parity(m,
540  * k, p) computes parity bytes such that:
541  *
542  *      x^k * p(x) = m(x) (modulo g(x)).
543  *
544  * That is, to evaluate m(r^m), where r^m is a root of g(x), we can
545  * simply evaluate (r^m)^k*p(r^m).  Also, notice that p is 0 if and
546  * only if s is zero.  That is, if all parity bytes are 0, we know
547  * there is no error in the data and consequently there is no need to
548  * compute s(x) at all!  In all other cases, we compute s(x) from p(x)
549  * by evaluating (r^m)^k*p(r^m) for m=-1, m=0, and m=1.  The p(x)
550  * polynomial is evaluated via the Horner scheme.
551  */
compute_syndromes(unsigned long * data,int nblocks,unsigned long * s)552 static int compute_syndromes(unsigned long *data, int nblocks, unsigned long *s)
553 {
554 	unsigned long p[3];
555 
556 	set_parity(data, nblocks, p, 1);
557 	if (p[0] | p[1] | p[2]) {
558 		/* Some of the checked columns do not have a zero
559 		 * syndrome.  For simplicity, we compute the syndromes
560 		 * for all columns that we have computed the
561 		 * remainders for.
562 		 */
563 		s[0] = gfmul_exp_long(
564 			gfadd_long(p[0],
565 				   gfmul_exp_long(
566 					   gfadd_long(p[1],
567 						      gfmul_exp_long(p[2], -1)),
568 					   -1)),
569 			-nblocks);
570 		s[1] = gfadd_long(gfadd_long(p[2], p[1]), p[0]);
571 		s[2] = gfmul_exp_long(
572 			gfadd_long(p[0],
573 				   gfmul_exp_long(
574 					   gfadd_long(p[1],
575 						      gfmul_exp_long(p[2], 1)),
576 					   1)),
577 			nblocks);
578 		return 0;
579 	} else {
580 		return 1;
581 	}
582 }
583 
584 
585 /* Correct the block in the column pointed to by DATA.  There are NBAD
586  * CRC errors and their indices are in BAD_LOC[0], up to
587  * BAD_LOC[NBAD-1].  If NBAD>1, Ainv holds the inverse of the matrix
588  * of the linear system that needs to be solved to determine the error
589  * magnitudes.  S[0], S[1], and S[2] are the syndrome values.  If row
590  * j gets corrected, then bit j will be set in CORRECTION_MAP.
591  */
correct_block(__u8 * data,int nblocks,int nbad,int * bad_loc,Matrix Ainv,__u8 * s,SectorMap * correction_map)592 static inline int correct_block(__u8 *data, int nblocks,
593 				int nbad, int *bad_loc, Matrix Ainv,
594 				__u8 *s,
595 				SectorMap * correction_map)
596 {
597 	int ncorrected = 0;
598 	int i;
599 	__u8 t1, t2;
600 	__u8 c0, c1, c2;	/* check bytes */
601 	__u8 error_mag[3], log_error_mag;
602 	__u8 *dp, l, e;
603 	TRACE_FUN(ft_t_any);
604 
605 	switch (nbad) {
606 	case 0:
607 		/* might have a CRC failure: */
608 		if (s[0] == 0) {
609 			/* more than one error */
610 			TRACE_ABORT(-1, ft_t_err,
611 				 "ECC failed (0 CRC errors, >1 CRC failures)");
612 		}
613 		t1 = gfdiv(s[1], s[0]);
614 		if ((bad_loc[nbad++] = gflog[t1]) >= nblocks) {
615 			TRACE(ft_t_err,
616 			      "ECC failed (0 CRC errors, >1 CRC failures)");
617 			TRACE_ABORT(-1, ft_t_err,
618 				  "attempt to correct data at %d", bad_loc[0]);
619 		}
620 		error_mag[0] = s[1];
621 		break;
622 	case 1:
623 		t1 = gfadd(gfmul_exp(s[1], bad_loc[0]), s[2]);
624 		t2 = gfadd(gfmul_exp(s[0], bad_loc[0]), s[1]);
625 		if (t1 == 0 && t2 == 0) {
626 			/* one erasure, no error: */
627 			Ainv[0][0] = gfpow[bad_loc[0]];
628 		} else if (t1 == 0 || t2 == 0) {
629 			/* one erasure and more than one error: */
630 			TRACE_ABORT(-1, ft_t_err,
631 				    "ECC failed (1 erasure, >1 error)");
632 		} else {
633 			/* one erasure, one error: */
634 			if ((bad_loc[nbad++] = gflog[gfdiv(t1, t2)])
635 			    >= nblocks) {
636 				TRACE(ft_t_err, "ECC failed "
637 				      "(1 CRC errors, >1 CRC failures)");
638 				TRACE_ABORT(-1, ft_t_err,
639 					    "attempt to correct data at %d",
640 					    bad_loc[1]);
641 			}
642 			if (!gfinv2(bad_loc[0], bad_loc[1], Ainv)) {
643 				/* inversion failed---must have more
644                                  *  than one error
645 				 */
646 				TRACE_EXIT -1;
647 			}
648 		}
649 		/* FALL THROUGH TO ERROR MAGNITUDE COMPUTATION:
650 		 */
651 	case 2:
652 	case 3:
653 		/* compute error magnitudes: */
654 		gfmat_mul(nbad, Ainv, s, error_mag);
655 		break;
656 
657 	default:
658 		TRACE_ABORT(-1, ft_t_err,
659 			    "Internal Error: number of CRC errors > 3");
660 	}
661 
662 	/* Perform correction by adding ERROR_MAG[i] to the byte at
663 	 * offset BAD_LOC[i].  Also add the value of the computed
664 	 * error polynomial to the syndrome values.  If the correction
665 	 * was successful, the resulting check bytes should be zero
666 	 * (i.e., the corrected data is a valid code word).
667 	 */
668 	c0 = s[0];
669 	c1 = s[1];
670 	c2 = s[2];
671 	for (i = 0; i < nbad; ++i) {
672 		e = error_mag[i];
673 		if (e) {
674 			/* correct the byte at offset L by magnitude E: */
675 			l = bad_loc[i];
676 			dp = &data[l * FT_SECTOR_SIZE];
677 			*dp = gfadd(*dp, e);
678 			*correction_map |= 1 << l;
679 			++ncorrected;
680 
681 			log_error_mag = gflog[e];
682 			c0 = gfadd(c0, gfpow[mod255(log_error_mag - l)]);
683 			c1 = gfadd(c1, e);
684 			c2 = gfadd(c2, gfpow[mod255(log_error_mag + l)]);
685 		}
686 	}
687 	if (c0 || c1 || c2) {
688 		TRACE_ABORT(-1, ft_t_err,
689 			    "ECC self-check failed, too many errors");
690 	}
691 	TRACE_EXIT ncorrected;
692 }
693 
694 
695 #if defined(ECC_SANITY_CHECK) || defined(ECC_PARANOID)
696 
697 /* Perform a sanity check on the computed parity bytes:
698  */
sanity_check(unsigned long * data,int nblocks)699 static int sanity_check(unsigned long *data, int nblocks)
700 {
701 	TRACE_FUN(ft_t_any);
702 	unsigned long s[3];
703 
704 	if (!compute_syndromes(data, nblocks, s)) {
705 		TRACE_ABORT(0, ft_bug,
706 			    "Internal Error: syndrome self-check failed");
707 	}
708 	TRACE_EXIT 1;
709 }
710 
711 #endif /* defined(ECC_SANITY_CHECK) || defined(ECC_PARANOID) */
712 
713 /* Compute the parity for an entire segment of data.
714  */
ftape_ecc_set_segment_parity(struct memory_segment * mseg)715 int ftape_ecc_set_segment_parity(struct memory_segment *mseg)
716 {
717 	int i;
718 	__u8 *parity_bytes;
719 
720 	parity_bytes = &mseg->data[(mseg->blocks - 3) * FT_SECTOR_SIZE];
721 	for (i = 0; i < FT_SECTOR_SIZE; i += sizeof(long)) {
722 		set_parity((unsigned long *) &mseg->data[i], mseg->blocks - 3,
723 			   (unsigned long *) &parity_bytes[i],
724 			   FT_SECTOR_SIZE / sizeof(long));
725 #ifdef ECC_PARANOID
726 		if (!sanity_check((unsigned long *) &mseg->data[i],
727 				   mseg->blocks)) {
728 			return -1;
729 		}
730 #endif				/* ECC_PARANOID */
731 	}
732 	return 0;
733 }
734 
735 
736 /* Checks and corrects (if possible) the segment MSEG.  Returns one of
737  * ECC_OK, ECC_CORRECTED, and ECC_FAILED.
738  */
ftape_ecc_correct_data(struct memory_segment * mseg)739 int ftape_ecc_correct_data(struct memory_segment *mseg)
740 {
741 	int col, i, result;
742 	int ncorrected = 0;
743 	int nerasures = 0;	/* # of erasures (CRC errors) */
744 	int erasure_loc[3];	/* erasure locations */
745 	unsigned long ss[3];
746 	__u8 s[3];
747 	Matrix Ainv;
748 	TRACE_FUN(ft_t_flow);
749 
750 	mseg->corrected = 0;
751 
752 	/* find first column that has non-zero syndromes: */
753 	for (col = 0; col < FT_SECTOR_SIZE; col += sizeof(long)) {
754 		if (!compute_syndromes((unsigned long *) &mseg->data[col],
755 				       mseg->blocks, ss)) {
756 			/* something is wrong---have to fix things */
757 			break;
758 		}
759 	}
760 	if (col >= FT_SECTOR_SIZE) {
761 		/* all syndromes are ok, therefore nothing to correct */
762 		TRACE_EXIT ECC_OK;
763 	}
764 	/* count the number of CRC errors if there were any: */
765 	if (mseg->read_bad) {
766 		for (i = 0; i < mseg->blocks; i++) {
767 			if (BAD_CHECK(mseg->read_bad, i)) {
768 				if (nerasures >= 3) {
769 					/* this is too much for ECC */
770 					TRACE_ABORT(ECC_FAILED, ft_t_err,
771 						"ECC failed (>3 CRC errors)");
772 				}	/* if */
773 				erasure_loc[nerasures++] = i;
774 			}
775 		}
776 	}
777 	/*
778 	 * If there are at least 2 CRC errors, determine inverse of matrix
779 	 * of linear system to be solved:
780 	 */
781 	switch (nerasures) {
782 	case 2:
783 		if (!gfinv2(erasure_loc[0], erasure_loc[1], Ainv)) {
784 			TRACE_EXIT ECC_FAILED;
785 		}
786 		break;
787 	case 3:
788 		if (!gfinv3(erasure_loc[0], erasure_loc[1],
789 			    erasure_loc[2], Ainv)) {
790 			TRACE_EXIT ECC_FAILED;
791 		}
792 		break;
793 	default:
794 		/* this is not an error condition... */
795 		break;
796 	}
797 
798 	do {
799 		for (i = 0; i < sizeof(long); ++i) {
800 			s[0] = ss[0];
801 			s[1] = ss[1];
802 			s[2] = ss[2];
803 			if (s[0] | s[1] | s[2]) {
804 #ifdef BIG_ENDIAN
805 				result = correct_block(
806 					&mseg->data[col + sizeof(long) - 1 - i],
807 					mseg->blocks,
808 					nerasures,
809 					erasure_loc,
810 					Ainv,
811 					s,
812 					&mseg->corrected);
813 #else
814 				result = correct_block(&mseg->data[col + i],
815 						       mseg->blocks,
816 						       nerasures,
817 						       erasure_loc,
818 						       Ainv,
819 						       s,
820 						       &mseg->corrected);
821 #endif
822 				if (result < 0) {
823 					TRACE_EXIT ECC_FAILED;
824 				}
825 				ncorrected += result;
826 			}
827 			ss[0] >>= 8;
828 			ss[1] >>= 8;
829 			ss[2] >>= 8;
830 		}
831 
832 #ifdef ECC_SANITY_CHECK
833 		if (!sanity_check((unsigned long *) &mseg->data[col],
834 				  mseg->blocks)) {
835 			TRACE_EXIT ECC_FAILED;
836 		}
837 #endif				/* ECC_SANITY_CHECK */
838 
839 		/* find next column with non-zero syndromes: */
840 		while ((col += sizeof(long)) < FT_SECTOR_SIZE) {
841 			if (!compute_syndromes((unsigned long *)
842 				    &mseg->data[col], mseg->blocks, ss)) {
843 				/* something is wrong---have to fix things */
844 				break;
845 			}
846 		}
847 	} while (col < FT_SECTOR_SIZE);
848 	if (ncorrected && nerasures == 0) {
849 		TRACE(ft_t_warn, "block contained error not caught by CRC");
850 	}
851 	TRACE((ncorrected > 0) ? ft_t_noise : ft_t_any, "number of corrections: %d", ncorrected);
852 	TRACE_EXIT ncorrected ? ECC_CORRECTED : ECC_OK;
853 }
854