1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * 842 Software Compression
4  *
5  * Copyright (C) 2015 Dan Streetman, IBM Corp
6  *
7  * See 842.h for details of the 842 compressed format.
8  */
9 
10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
11 #define MODULE_NAME "842_compress"
12 
13 #include <linux/hashtable.h>
14 
15 #include "842.h"
16 #include "842_debugfs.h"
17 
18 #define SW842_HASHTABLE8_BITS	(10)
19 #define SW842_HASHTABLE4_BITS	(11)
20 #define SW842_HASHTABLE2_BITS	(10)
21 
22 /* By default, we allow compressing input buffers of any length, but we must
23  * use the non-standard "short data" template so the decompressor can correctly
24  * reproduce the uncompressed data buffer at the right length.  However the
25  * hardware 842 compressor will not recognize the "short data" template, and
26  * will fail to decompress any compressed buffer containing it (I have no idea
27  * why anyone would want to use software to compress and hardware to decompress
28  * but that's beside the point).  This parameter forces the compression
29  * function to simply reject any input buffer that isn't a multiple of 8 bytes
30  * long, instead of using the "short data" template, so that all compressed
31  * buffers produced by this function will be decompressable by the 842 hardware
32  * decompressor.  Unless you have a specific need for that, leave this disabled
33  * so that any length buffer can be compressed.
34  */
35 static bool sw842_strict;
36 module_param_named(strict, sw842_strict, bool, 0644);
37 
38 static u8 comp_ops[OPS_MAX][5] = { /* params size in bits */
39 	{ I8, N0, N0, N0, 0x19 }, /* 8 */
40 	{ I4, I4, N0, N0, 0x18 }, /* 18 */
41 	{ I4, I2, I2, N0, 0x17 }, /* 25 */
42 	{ I2, I2, I4, N0, 0x13 }, /* 25 */
43 	{ I2, I2, I2, I2, 0x12 }, /* 32 */
44 	{ I4, I2, D2, N0, 0x16 }, /* 33 */
45 	{ I4, D2, I2, N0, 0x15 }, /* 33 */
46 	{ I2, D2, I4, N0, 0x0e }, /* 33 */
47 	{ D2, I2, I4, N0, 0x09 }, /* 33 */
48 	{ I2, I2, I2, D2, 0x11 }, /* 40 */
49 	{ I2, I2, D2, I2, 0x10 }, /* 40 */
50 	{ I2, D2, I2, I2, 0x0d }, /* 40 */
51 	{ D2, I2, I2, I2, 0x08 }, /* 40 */
52 	{ I4, D4, N0, N0, 0x14 }, /* 41 */
53 	{ D4, I4, N0, N0, 0x04 }, /* 41 */
54 	{ I2, I2, D4, N0, 0x0f }, /* 48 */
55 	{ I2, D2, I2, D2, 0x0c }, /* 48 */
56 	{ I2, D4, I2, N0, 0x0b }, /* 48 */
57 	{ D2, I2, I2, D2, 0x07 }, /* 48 */
58 	{ D2, I2, D2, I2, 0x06 }, /* 48 */
59 	{ D4, I2, I2, N0, 0x03 }, /* 48 */
60 	{ I2, D2, D4, N0, 0x0a }, /* 56 */
61 	{ D2, I2, D4, N0, 0x05 }, /* 56 */
62 	{ D4, I2, D2, N0, 0x02 }, /* 56 */
63 	{ D4, D2, I2, N0, 0x01 }, /* 56 */
64 	{ D8, N0, N0, N0, 0x00 }, /* 64 */
65 };
66 
67 struct sw842_hlist_node8 {
68 	struct hlist_node node;
69 	u64 data;
70 	u8 index;
71 };
72 
73 struct sw842_hlist_node4 {
74 	struct hlist_node node;
75 	u32 data;
76 	u16 index;
77 };
78 
79 struct sw842_hlist_node2 {
80 	struct hlist_node node;
81 	u16 data;
82 	u8 index;
83 };
84 
85 #define INDEX_NOT_FOUND		(-1)
86 #define INDEX_NOT_CHECKED	(-2)
87 
88 struct sw842_param {
89 	u8 *in;
90 	u8 *instart;
91 	u64 ilen;
92 	u8 *out;
93 	u64 olen;
94 	u8 bit;
95 	u64 data8[1];
96 	u32 data4[2];
97 	u16 data2[4];
98 	int index8[1];
99 	int index4[2];
100 	int index2[4];
101 	DECLARE_HASHTABLE(htable8, SW842_HASHTABLE8_BITS);
102 	DECLARE_HASHTABLE(htable4, SW842_HASHTABLE4_BITS);
103 	DECLARE_HASHTABLE(htable2, SW842_HASHTABLE2_BITS);
104 	struct sw842_hlist_node8 node8[1 << I8_BITS];
105 	struct sw842_hlist_node4 node4[1 << I4_BITS];
106 	struct sw842_hlist_node2 node2[1 << I2_BITS];
107 };
108 
109 #define get_input_data(p, o, b)						\
110 	be##b##_to_cpu(get_unaligned((__be##b *)((p)->in + (o))))
111 
112 #define init_hashtable_nodes(p, b)	do {			\
113 	int _i;							\
114 	hash_init((p)->htable##b);				\
115 	for (_i = 0; _i < ARRAY_SIZE((p)->node##b); _i++) {	\
116 		(p)->node##b[_i].index = _i;			\
117 		(p)->node##b[_i].data = 0;			\
118 		INIT_HLIST_NODE(&(p)->node##b[_i].node);	\
119 	}							\
120 } while (0)
121 
122 #define find_index(p, b, n)	({					\
123 	struct sw842_hlist_node##b *_n;					\
124 	p->index##b[n] = INDEX_NOT_FOUND;				\
125 	hash_for_each_possible(p->htable##b, _n, node, p->data##b[n]) {	\
126 		if (p->data##b[n] == _n->data) {			\
127 			p->index##b[n] = _n->index;			\
128 			break;						\
129 		}							\
130 	}								\
131 	p->index##b[n] >= 0;						\
132 })
133 
134 #define check_index(p, b, n)			\
135 	((p)->index##b[n] == INDEX_NOT_CHECKED	\
136 	 ? find_index(p, b, n)			\
137 	 : (p)->index##b[n] >= 0)
138 
139 #define replace_hash(p, b, i, d)	do {				\
140 	struct sw842_hlist_node##b *_n = &(p)->node##b[(i)+(d)];	\
141 	hash_del(&_n->node);						\
142 	_n->data = (p)->data##b[d];					\
143 	pr_debug("add hash index%x %x pos %x data %lx\n", b,		\
144 		 (unsigned int)_n->index,				\
145 		 (unsigned int)((p)->in - (p)->instart),		\
146 		 (unsigned long)_n->data);				\
147 	hash_add((p)->htable##b, &_n->node, _n->data);			\
148 } while (0)
149 
150 static u8 bmask[8] = { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe };
151 
152 static int add_bits(struct sw842_param *p, u64 d, u8 n);
153 
__split_add_bits(struct sw842_param * p,u64 d,u8 n,u8 s)154 static int __split_add_bits(struct sw842_param *p, u64 d, u8 n, u8 s)
155 {
156 	int ret;
157 
158 	if (n <= s)
159 		return -EINVAL;
160 
161 	ret = add_bits(p, d >> s, n - s);
162 	if (ret)
163 		return ret;
164 	return add_bits(p, d & GENMASK_ULL(s - 1, 0), s);
165 }
166 
add_bits(struct sw842_param * p,u64 d,u8 n)167 static int add_bits(struct sw842_param *p, u64 d, u8 n)
168 {
169 	int b = p->bit, bits = b + n, s = round_up(bits, 8) - bits;
170 	u64 o;
171 	u8 *out = p->out;
172 
173 	pr_debug("add %u bits %lx\n", (unsigned char)n, (unsigned long)d);
174 
175 	if (n > 64)
176 		return -EINVAL;
177 
178 	/* split this up if writing to > 8 bytes (i.e. n == 64 && p->bit > 0),
179 	 * or if we're at the end of the output buffer and would write past end
180 	 */
181 	if (bits > 64)
182 		return __split_add_bits(p, d, n, 32);
183 	else if (p->olen < 8 && bits > 32 && bits <= 56)
184 		return __split_add_bits(p, d, n, 16);
185 	else if (p->olen < 4 && bits > 16 && bits <= 24)
186 		return __split_add_bits(p, d, n, 8);
187 
188 	if (DIV_ROUND_UP(bits, 8) > p->olen)
189 		return -ENOSPC;
190 
191 	o = *out & bmask[b];
192 	d <<= s;
193 
194 	if (bits <= 8)
195 		*out = o | d;
196 	else if (bits <= 16)
197 		put_unaligned(cpu_to_be16(o << 8 | d), (__be16 *)out);
198 	else if (bits <= 24)
199 		put_unaligned(cpu_to_be32(o << 24 | d << 8), (__be32 *)out);
200 	else if (bits <= 32)
201 		put_unaligned(cpu_to_be32(o << 24 | d), (__be32 *)out);
202 	else if (bits <= 40)
203 		put_unaligned(cpu_to_be64(o << 56 | d << 24), (__be64 *)out);
204 	else if (bits <= 48)
205 		put_unaligned(cpu_to_be64(o << 56 | d << 16), (__be64 *)out);
206 	else if (bits <= 56)
207 		put_unaligned(cpu_to_be64(o << 56 | d << 8), (__be64 *)out);
208 	else
209 		put_unaligned(cpu_to_be64(o << 56 | d), (__be64 *)out);
210 
211 	p->bit += n;
212 
213 	if (p->bit > 7) {
214 		p->out += p->bit / 8;
215 		p->olen -= p->bit / 8;
216 		p->bit %= 8;
217 	}
218 
219 	return 0;
220 }
221 
add_template(struct sw842_param * p,u8 c)222 static int add_template(struct sw842_param *p, u8 c)
223 {
224 	int ret, i, b = 0;
225 	u8 *t = comp_ops[c];
226 	bool inv = false;
227 
228 	if (c >= OPS_MAX)
229 		return -EINVAL;
230 
231 	pr_debug("template %x\n", t[4]);
232 
233 	ret = add_bits(p, t[4], OP_BITS);
234 	if (ret)
235 		return ret;
236 
237 	for (i = 0; i < 4; i++) {
238 		pr_debug("op %x\n", t[i]);
239 
240 		switch (t[i] & OP_AMOUNT) {
241 		case OP_AMOUNT_8:
242 			if (b)
243 				inv = true;
244 			else if (t[i] & OP_ACTION_INDEX)
245 				ret = add_bits(p, p->index8[0], I8_BITS);
246 			else if (t[i] & OP_ACTION_DATA)
247 				ret = add_bits(p, p->data8[0], 64);
248 			else
249 				inv = true;
250 			break;
251 		case OP_AMOUNT_4:
252 			if (b == 2 && t[i] & OP_ACTION_DATA)
253 				ret = add_bits(p, get_input_data(p, 2, 32), 32);
254 			else if (b != 0 && b != 4)
255 				inv = true;
256 			else if (t[i] & OP_ACTION_INDEX)
257 				ret = add_bits(p, p->index4[b >> 2], I4_BITS);
258 			else if (t[i] & OP_ACTION_DATA)
259 				ret = add_bits(p, p->data4[b >> 2], 32);
260 			else
261 				inv = true;
262 			break;
263 		case OP_AMOUNT_2:
264 			if (b != 0 && b != 2 && b != 4 && b != 6)
265 				inv = true;
266 			if (t[i] & OP_ACTION_INDEX)
267 				ret = add_bits(p, p->index2[b >> 1], I2_BITS);
268 			else if (t[i] & OP_ACTION_DATA)
269 				ret = add_bits(p, p->data2[b >> 1], 16);
270 			else
271 				inv = true;
272 			break;
273 		case OP_AMOUNT_0:
274 			inv = (b != 8) || !(t[i] & OP_ACTION_NOOP);
275 			break;
276 		default:
277 			inv = true;
278 			break;
279 		}
280 
281 		if (ret)
282 			return ret;
283 
284 		if (inv) {
285 			pr_err("Invalid templ %x op %d : %x %x %x %x\n",
286 			       c, i, t[0], t[1], t[2], t[3]);
287 			return -EINVAL;
288 		}
289 
290 		b += t[i] & OP_AMOUNT;
291 	}
292 
293 	if (b != 8) {
294 		pr_err("Invalid template %x len %x : %x %x %x %x\n",
295 		       c, b, t[0], t[1], t[2], t[3]);
296 		return -EINVAL;
297 	}
298 
299 	if (sw842_template_counts)
300 		atomic_inc(&template_count[t[4]]);
301 
302 	return 0;
303 }
304 
add_repeat_template(struct sw842_param * p,u8 r)305 static int add_repeat_template(struct sw842_param *p, u8 r)
306 {
307 	int ret;
308 
309 	/* repeat param is 0-based */
310 	if (!r || --r > REPEAT_BITS_MAX)
311 		return -EINVAL;
312 
313 	ret = add_bits(p, OP_REPEAT, OP_BITS);
314 	if (ret)
315 		return ret;
316 
317 	ret = add_bits(p, r, REPEAT_BITS);
318 	if (ret)
319 		return ret;
320 
321 	if (sw842_template_counts)
322 		atomic_inc(&template_repeat_count);
323 
324 	return 0;
325 }
326 
add_short_data_template(struct sw842_param * p,u8 b)327 static int add_short_data_template(struct sw842_param *p, u8 b)
328 {
329 	int ret, i;
330 
331 	if (!b || b > SHORT_DATA_BITS_MAX)
332 		return -EINVAL;
333 
334 	ret = add_bits(p, OP_SHORT_DATA, OP_BITS);
335 	if (ret)
336 		return ret;
337 
338 	ret = add_bits(p, b, SHORT_DATA_BITS);
339 	if (ret)
340 		return ret;
341 
342 	for (i = 0; i < b; i++) {
343 		ret = add_bits(p, p->in[i], 8);
344 		if (ret)
345 			return ret;
346 	}
347 
348 	if (sw842_template_counts)
349 		atomic_inc(&template_short_data_count);
350 
351 	return 0;
352 }
353 
add_zeros_template(struct sw842_param * p)354 static int add_zeros_template(struct sw842_param *p)
355 {
356 	int ret = add_bits(p, OP_ZEROS, OP_BITS);
357 
358 	if (ret)
359 		return ret;
360 
361 	if (sw842_template_counts)
362 		atomic_inc(&template_zeros_count);
363 
364 	return 0;
365 }
366 
add_end_template(struct sw842_param * p)367 static int add_end_template(struct sw842_param *p)
368 {
369 	int ret = add_bits(p, OP_END, OP_BITS);
370 
371 	if (ret)
372 		return ret;
373 
374 	if (sw842_template_counts)
375 		atomic_inc(&template_end_count);
376 
377 	return 0;
378 }
379 
check_template(struct sw842_param * p,u8 c)380 static bool check_template(struct sw842_param *p, u8 c)
381 {
382 	u8 *t = comp_ops[c];
383 	int i, match, b = 0;
384 
385 	if (c >= OPS_MAX)
386 		return false;
387 
388 	for (i = 0; i < 4; i++) {
389 		if (t[i] & OP_ACTION_INDEX) {
390 			if (t[i] & OP_AMOUNT_2)
391 				match = check_index(p, 2, b >> 1);
392 			else if (t[i] & OP_AMOUNT_4)
393 				match = check_index(p, 4, b >> 2);
394 			else if (t[i] & OP_AMOUNT_8)
395 				match = check_index(p, 8, 0);
396 			else
397 				return false;
398 			if (!match)
399 				return false;
400 		}
401 
402 		b += t[i] & OP_AMOUNT;
403 	}
404 
405 	return true;
406 }
407 
get_next_data(struct sw842_param * p)408 static void get_next_data(struct sw842_param *p)
409 {
410 	p->data8[0] = get_input_data(p, 0, 64);
411 	p->data4[0] = get_input_data(p, 0, 32);
412 	p->data4[1] = get_input_data(p, 4, 32);
413 	p->data2[0] = get_input_data(p, 0, 16);
414 	p->data2[1] = get_input_data(p, 2, 16);
415 	p->data2[2] = get_input_data(p, 4, 16);
416 	p->data2[3] = get_input_data(p, 6, 16);
417 }
418 
419 /* update the hashtable entries.
420  * only call this after finding/adding the current template
421  * the dataN fields for the current 8 byte block must be already updated
422  */
update_hashtables(struct sw842_param * p)423 static void update_hashtables(struct sw842_param *p)
424 {
425 	u64 pos = p->in - p->instart;
426 	u64 n8 = (pos >> 3) % (1 << I8_BITS);
427 	u64 n4 = (pos >> 2) % (1 << I4_BITS);
428 	u64 n2 = (pos >> 1) % (1 << I2_BITS);
429 
430 	replace_hash(p, 8, n8, 0);
431 	replace_hash(p, 4, n4, 0);
432 	replace_hash(p, 4, n4, 1);
433 	replace_hash(p, 2, n2, 0);
434 	replace_hash(p, 2, n2, 1);
435 	replace_hash(p, 2, n2, 2);
436 	replace_hash(p, 2, n2, 3);
437 }
438 
439 /* find the next template to use, and add it
440  * the p->dataN fields must already be set for the current 8 byte block
441  */
process_next(struct sw842_param * p)442 static int process_next(struct sw842_param *p)
443 {
444 	int ret, i;
445 
446 	p->index8[0] = INDEX_NOT_CHECKED;
447 	p->index4[0] = INDEX_NOT_CHECKED;
448 	p->index4[1] = INDEX_NOT_CHECKED;
449 	p->index2[0] = INDEX_NOT_CHECKED;
450 	p->index2[1] = INDEX_NOT_CHECKED;
451 	p->index2[2] = INDEX_NOT_CHECKED;
452 	p->index2[3] = INDEX_NOT_CHECKED;
453 
454 	/* check up to OPS_MAX - 1; last op is our fallback */
455 	for (i = 0; i < OPS_MAX - 1; i++) {
456 		if (check_template(p, i))
457 			break;
458 	}
459 
460 	ret = add_template(p, i);
461 	if (ret)
462 		return ret;
463 
464 	return 0;
465 }
466 
467 /**
468  * sw842_compress
469  *
470  * Compress the uncompressed buffer of length @ilen at @in to the output buffer
471  * @out, using no more than @olen bytes, using the 842 compression format.
472  *
473  * Returns: 0 on success, error on failure.  The @olen parameter
474  * will contain the number of output bytes written on success, or
475  * 0 on error.
476  */
sw842_compress(const u8 * in,unsigned int ilen,u8 * out,unsigned int * olen,void * wmem)477 int sw842_compress(const u8 *in, unsigned int ilen,
478 		   u8 *out, unsigned int *olen, void *wmem)
479 {
480 	struct sw842_param *p = (struct sw842_param *)wmem;
481 	int ret;
482 	u64 last, next, pad, total;
483 	u8 repeat_count = 0;
484 	u32 crc;
485 
486 	BUILD_BUG_ON(sizeof(*p) > SW842_MEM_COMPRESS);
487 
488 	init_hashtable_nodes(p, 8);
489 	init_hashtable_nodes(p, 4);
490 	init_hashtable_nodes(p, 2);
491 
492 	p->in = (u8 *)in;
493 	p->instart = p->in;
494 	p->ilen = ilen;
495 	p->out = out;
496 	p->olen = *olen;
497 	p->bit = 0;
498 
499 	total = p->olen;
500 
501 	*olen = 0;
502 
503 	/* if using strict mode, we can only compress a multiple of 8 */
504 	if (sw842_strict && (ilen % 8)) {
505 		pr_err("Using strict mode, can't compress len %d\n", ilen);
506 		return -EINVAL;
507 	}
508 
509 	/* let's compress at least 8 bytes, mkay? */
510 	if (unlikely(ilen < 8))
511 		goto skip_comp;
512 
513 	/* make initial 'last' different so we don't match the first time */
514 	last = ~get_unaligned((u64 *)p->in);
515 
516 	while (p->ilen > 7) {
517 		next = get_unaligned((u64 *)p->in);
518 
519 		/* must get the next data, as we need to update the hashtable
520 		 * entries with the new data every time
521 		 */
522 		get_next_data(p);
523 
524 		/* we don't care about endianness in last or next;
525 		 * we're just comparing 8 bytes to another 8 bytes,
526 		 * they're both the same endianness
527 		 */
528 		if (next == last) {
529 			/* repeat count bits are 0-based, so we stop at +1 */
530 			if (++repeat_count <= REPEAT_BITS_MAX)
531 				goto repeat;
532 		}
533 		if (repeat_count) {
534 			ret = add_repeat_template(p, repeat_count);
535 			repeat_count = 0;
536 			if (next == last) /* reached max repeat bits */
537 				goto repeat;
538 		}
539 
540 		if (next == 0)
541 			ret = add_zeros_template(p);
542 		else
543 			ret = process_next(p);
544 
545 		if (ret)
546 			return ret;
547 
548 repeat:
549 		last = next;
550 		update_hashtables(p);
551 		p->in += 8;
552 		p->ilen -= 8;
553 	}
554 
555 	if (repeat_count) {
556 		ret = add_repeat_template(p, repeat_count);
557 		if (ret)
558 			return ret;
559 	}
560 
561 skip_comp:
562 	if (p->ilen > 0) {
563 		ret = add_short_data_template(p, p->ilen);
564 		if (ret)
565 			return ret;
566 
567 		p->in += p->ilen;
568 		p->ilen = 0;
569 	}
570 
571 	ret = add_end_template(p);
572 	if (ret)
573 		return ret;
574 
575 	/*
576 	 * crc(0:31) is appended to target data starting with the next
577 	 * bit after End of stream template.
578 	 * nx842 calculates CRC for data in big-endian format. So doing
579 	 * same here so that sw842 decompression can be used for both
580 	 * compressed data.
581 	 */
582 	crc = crc32_be(0, in, ilen);
583 	ret = add_bits(p, crc, CRC_BITS);
584 	if (ret)
585 		return ret;
586 
587 	if (p->bit) {
588 		p->out++;
589 		p->olen--;
590 		p->bit = 0;
591 	}
592 
593 	/* pad compressed length to multiple of 8 */
594 	pad = (8 - ((total - p->olen) % 8)) % 8;
595 	if (pad) {
596 		if (pad > p->olen) /* we were so close! */
597 			return -ENOSPC;
598 		memset(p->out, 0, pad);
599 		p->out += pad;
600 		p->olen -= pad;
601 	}
602 
603 	if (unlikely((total - p->olen) > UINT_MAX))
604 		return -ENOSPC;
605 
606 	*olen = total - p->olen;
607 
608 	return 0;
609 }
610 EXPORT_SYMBOL_GPL(sw842_compress);
611 
sw842_init(void)612 static int __init sw842_init(void)
613 {
614 	if (sw842_template_counts)
615 		sw842_debugfs_create();
616 
617 	return 0;
618 }
619 module_init(sw842_init);
620 
sw842_exit(void)621 static void __exit sw842_exit(void)
622 {
623 	if (sw842_template_counts)
624 		sw842_debugfs_remove();
625 }
626 module_exit(sw842_exit);
627 
628 MODULE_LICENSE("GPL");
629 MODULE_DESCRIPTION("Software 842 Compressor");
630 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
631