1 /* Linux driver for Philips webcam
2    Decompression for chipset version 2 et 3
3    (C) 2004-2006  Luc Saillard (luc@saillard.org)
4 
5    NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx
6    driver and thus may have bugs that are not present in the original version.
7    Please send bug reports and support requests to <luc@saillard.org>.
8    The decompression routines have been implemented by reverse-engineering the
9    Nemosoft binary pwcx module. Caveat emptor.
10 
11    This program is free software; you can redistribute it and/or modify
12    it under the terms of the GNU General Public License as published by
13    the Free Software Foundation; either version 2 of the License, or
14    (at your option) any later version.
15 
16    This program is distributed in the hope that it will be useful,
17    but WITHOUT ANY WARRANTY; without even the implied warranty of
18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19    GNU General Public License for more details.
20 
21    You should have received a copy of the GNU General Public License
22    along with this program; if not, write to the Free Software
23    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 
25 */
26 
27 #include "pwc-timon.h"
28 #include "pwc-kiara.h"
29 #include "pwc-dec23.h"
30 #include <media/pwc-ioctl.h>
31 
32 #include <linux/string.h>
33 #include <linux/slab.h>
34 
35 /*
36  * USE_LOOKUP_TABLE_TO_CLAMP
37  *   0: use a C version of this tests:  {  a<0?0:(a>255?255:a) }
38  *   1: use a faster lookup table for cpu with a big cache (intel)
39  */
40 #define USE_LOOKUP_TABLE_TO_CLAMP	1
41 /*
42  * UNROLL_LOOP_FOR_COPYING_BLOCK
43  *   0: use a loop for a smaller code (but little slower)
44  *   1: when unrolling the loop, gcc produces some faster code (perhaps only
45  *   valid for intel processor class). Activating this option, automaticaly
46  *   activate USE_LOOKUP_TABLE_TO_CLAMP
47  */
48 #define UNROLL_LOOP_FOR_COPY		1
49 #if UNROLL_LOOP_FOR_COPY
50 # undef USE_LOOKUP_TABLE_TO_CLAMP
51 # define USE_LOOKUP_TABLE_TO_CLAMP 1
52 #endif
53 
54 /*
55  * ENABLE_BAYER_DECODER
56  *   0: bayer decoder is not build (save some space)
57  *   1: bayer decoder is build and can be used
58  */
59 #define ENABLE_BAYER_DECODER 0
60 
build_subblock_pattern(struct pwc_dec23_private * pdec)61 static void build_subblock_pattern(struct pwc_dec23_private *pdec)
62 {
63 	static const unsigned int initial_values[12] = {
64 		-0x526500, -0x221200, 0x221200, 0x526500,
65 			   -0x3de200, 0x3de200,
66 		-0x6db480, -0x2d5d00, 0x2d5d00, 0x6db480,
67 			   -0x12c200, 0x12c200
68 
69 	};
70 	static const unsigned int values_derivated[12] = {
71 		0xa4ca, 0x4424, -0x4424, -0xa4ca,
72 			0x7bc4, -0x7bc4,
73 		0xdb69, 0x5aba, -0x5aba, -0xdb69,
74 			0x2584, -0x2584
75 	};
76 	unsigned int temp_values[12];
77 	int i, j;
78 
79 	memcpy(temp_values, initial_values, sizeof(initial_values));
80 	for (i = 0; i < 256; i++) {
81 		for (j = 0; j < 12; j++) {
82 			pdec->table_subblock[i][j] = temp_values[j];
83 			temp_values[j] += values_derivated[j];
84 		}
85 	}
86 }
87 
build_bit_powermask_table(struct pwc_dec23_private * pdec)88 static void build_bit_powermask_table(struct pwc_dec23_private *pdec)
89 {
90 	unsigned char *p;
91 	unsigned int bit, byte, mask, val;
92 	unsigned int bitpower = 1;
93 
94 	for (bit = 0; bit < 8; bit++) {
95 		mask = bitpower - 1;
96 		p = pdec->table_bitpowermask[bit];
97 		for (byte = 0; byte < 256; byte++) {
98 			val = (byte & mask);
99 			if (byte & bitpower)
100 				val = -val;
101 			*p++ = val;
102 		}
103 		bitpower<<=1;
104 	}
105 }
106 
107 
build_table_color(const unsigned int romtable[16][8],unsigned char p0004[16][1024],unsigned char p8004[16][256])108 static void build_table_color(const unsigned int romtable[16][8],
109 			      unsigned char p0004[16][1024],
110 			      unsigned char p8004[16][256])
111 {
112 	int compression_mode, j, k, bit, pw;
113 	unsigned char *p0, *p8;
114 	const unsigned int *r;
115 
116 	/* We have 16 compressions tables */
117 	for (compression_mode = 0; compression_mode < 16; compression_mode++) {
118 		p0 = p0004[compression_mode];
119 		p8 = p8004[compression_mode];
120 		r  = romtable[compression_mode];
121 
122 		for (j = 0; j < 8; j++, r++, p0 += 128) {
123 
124 			for (k = 0; k < 16; k++) {
125 				if (k == 0)
126 					bit = 1;
127 				else if (k >= 1 && k < 3)
128 					bit = (r[0] >> 15) & 7;
129 				else if (k >= 3 && k < 6)
130 					bit = (r[0] >> 12) & 7;
131 				else if (k >= 6 && k < 10)
132 					bit = (r[0] >> 9) & 7;
133 				else if (k >= 10 && k < 13)
134 					bit = (r[0] >> 6) & 7;
135 				else if (k >= 13 && k < 15)
136 					bit = (r[0] >> 3) & 7;
137 				else
138 					bit = (r[0]) & 7;
139 				if (k == 0)
140 					*p8++ = 8;
141 				else
142 					*p8++ = j - bit;
143 				*p8++ = bit;
144 
145 				pw = 1 << bit;
146 				p0[k + 0x00] = (1 * pw) + 0x80;
147 				p0[k + 0x10] = (2 * pw) + 0x80;
148 				p0[k + 0x20] = (3 * pw) + 0x80;
149 				p0[k + 0x30] = (4 * pw) + 0x80;
150 				p0[k + 0x40] = (-1 * pw) + 0x80;
151 				p0[k + 0x50] = (-2 * pw) + 0x80;
152 				p0[k + 0x60] = (-3 * pw) + 0x80;
153 				p0[k + 0x70] = (-4 * pw) + 0x80;
154 			}	/* end of for (k=0; k<16; k++, p8++) */
155 		}	/* end of for (j=0; j<8; j++ , table++) */
156 	} /* end of foreach compression_mode */
157 }
158 
159 /*
160  *
161  */
fill_table_dc00_d800(struct pwc_dec23_private * pdec)162 static void fill_table_dc00_d800(struct pwc_dec23_private *pdec)
163 {
164 #define SCALEBITS 15
165 #define ONE_HALF  (1UL << (SCALEBITS - 1))
166 	int i;
167 	unsigned int offset1 = ONE_HALF;
168 	unsigned int offset2 = 0x0000;
169 
170 	for (i=0; i<256; i++) {
171 		pdec->table_dc00[i] = offset1 & ~(ONE_HALF);
172 		pdec->table_d800[i] = offset2;
173 
174 		offset1 += 0x7bc4;
175 		offset2 += 0x7bc4;
176 	}
177 }
178 
179 /*
180  * To decode the stream:
181  *   if look_bits(2) == 0:	# op == 2 in the lookup table
182  *      skip_bits(2)
183  *      end of the stream
184  *   elif look_bits(3) == 7:	# op == 1 in the lookup table
185  *      skip_bits(3)
186  *      yyyy = get_bits(4)
187  *      xxxx = get_bits(8)
188  *   else:			# op == 0 in the lookup table
189  *      skip_bits(x)
190  *
191  * For speedup processing, we build a lookup table and we takes the first 6 bits.
192  *
193  * struct {
194  *   unsigned char op;	    // operation to execute
195  *   unsigned char bits;    // bits use to perform operation
196  *   unsigned char offset1; // offset to add to access in the table_0004 % 16
197  *   unsigned char offset2; // offset to add to access in the table_0004
198  * }
199  *
200  * How to build this table ?
201  *   op == 2 when (i%4)==0
202  *   op == 1 when (i%8)==7
203  *   op == 0 otherwise
204  *
205  */
206 static const unsigned char hash_table_ops[64*4] = {
207 	0x02, 0x00, 0x00, 0x00,
208 	0x00, 0x03, 0x01, 0x00,
209 	0x00, 0x04, 0x01, 0x10,
210 	0x00, 0x06, 0x01, 0x30,
211 	0x02, 0x00, 0x00, 0x00,
212 	0x00, 0x03, 0x01, 0x40,
213 	0x00, 0x05, 0x01, 0x20,
214 	0x01, 0x00, 0x00, 0x00,
215 	0x02, 0x00, 0x00, 0x00,
216 	0x00, 0x03, 0x01, 0x00,
217 	0x00, 0x04, 0x01, 0x50,
218 	0x00, 0x05, 0x02, 0x00,
219 	0x02, 0x00, 0x00, 0x00,
220 	0x00, 0x03, 0x01, 0x40,
221 	0x00, 0x05, 0x03, 0x00,
222 	0x01, 0x00, 0x00, 0x00,
223 	0x02, 0x00, 0x00, 0x00,
224 	0x00, 0x03, 0x01, 0x00,
225 	0x00, 0x04, 0x01, 0x10,
226 	0x00, 0x06, 0x02, 0x10,
227 	0x02, 0x00, 0x00, 0x00,
228 	0x00, 0x03, 0x01, 0x40,
229 	0x00, 0x05, 0x01, 0x60,
230 	0x01, 0x00, 0x00, 0x00,
231 	0x02, 0x00, 0x00, 0x00,
232 	0x00, 0x03, 0x01, 0x00,
233 	0x00, 0x04, 0x01, 0x50,
234 	0x00, 0x05, 0x02, 0x40,
235 	0x02, 0x00, 0x00, 0x00,
236 	0x00, 0x03, 0x01, 0x40,
237 	0x00, 0x05, 0x03, 0x40,
238 	0x01, 0x00, 0x00, 0x00,
239 	0x02, 0x00, 0x00, 0x00,
240 	0x00, 0x03, 0x01, 0x00,
241 	0x00, 0x04, 0x01, 0x10,
242 	0x00, 0x06, 0x01, 0x70,
243 	0x02, 0x00, 0x00, 0x00,
244 	0x00, 0x03, 0x01, 0x40,
245 	0x00, 0x05, 0x01, 0x20,
246 	0x01, 0x00, 0x00, 0x00,
247 	0x02, 0x00, 0x00, 0x00,
248 	0x00, 0x03, 0x01, 0x00,
249 	0x00, 0x04, 0x01, 0x50,
250 	0x00, 0x05, 0x02, 0x00,
251 	0x02, 0x00, 0x00, 0x00,
252 	0x00, 0x03, 0x01, 0x40,
253 	0x00, 0x05, 0x03, 0x00,
254 	0x01, 0x00, 0x00, 0x00,
255 	0x02, 0x00, 0x00, 0x00,
256 	0x00, 0x03, 0x01, 0x00,
257 	0x00, 0x04, 0x01, 0x10,
258 	0x00, 0x06, 0x02, 0x50,
259 	0x02, 0x00, 0x00, 0x00,
260 	0x00, 0x03, 0x01, 0x40,
261 	0x00, 0x05, 0x01, 0x60,
262 	0x01, 0x00, 0x00, 0x00,
263 	0x02, 0x00, 0x00, 0x00,
264 	0x00, 0x03, 0x01, 0x00,
265 	0x00, 0x04, 0x01, 0x50,
266 	0x00, 0x05, 0x02, 0x40,
267 	0x02, 0x00, 0x00, 0x00,
268 	0x00, 0x03, 0x01, 0x40,
269 	0x00, 0x05, 0x03, 0x40,
270 	0x01, 0x00, 0x00, 0x00
271 };
272 
273 /*
274  *
275  */
276 static const unsigned int MulIdx[16][16] = {
277 	{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
278 	{0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,},
279 	{0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,},
280 	{4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,},
281 	{6, 7, 8, 9, 7, 10, 11, 8, 8, 11, 10, 7, 9, 8, 7, 6,},
282 	{4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4,},
283 	{1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,},
284 	{0, 3, 3, 0, 1, 2, 2, 1, 2, 1, 1, 2, 3, 0, 0, 3,},
285 	{0, 1, 2, 3, 3, 2, 1, 0, 3, 2, 1, 0, 0, 1, 2, 3,},
286 	{1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2,},
287 	{7, 10, 11, 8, 9, 8, 7, 6, 6, 7, 8, 9, 8, 11, 10, 7,},
288 	{4, 5, 5, 4, 5, 4, 4, 5, 5, 4, 4, 5, 4, 5, 5, 4,},
289 	{7, 9, 6, 8, 10, 8, 7, 11, 11, 7, 8, 10, 8, 6, 9, 7,},
290 	{1, 3, 0, 2, 2, 0, 3, 1, 2, 0, 3, 1, 1, 3, 0, 2,},
291 	{1, 2, 2, 1, 3, 0, 0, 3, 0, 3, 3, 0, 2, 1, 1, 2,},
292 	{10, 8, 7, 11, 8, 6, 9, 7, 7, 9, 6, 8, 11, 7, 8, 10}
293 };
294 
295 #if USE_LOOKUP_TABLE_TO_CLAMP
296 #define MAX_OUTER_CROP_VALUE	(512)
297 static unsigned char pwc_crop_table[256 + 2*MAX_OUTER_CROP_VALUE];
298 #define CLAMP(x) (pwc_crop_table[MAX_OUTER_CROP_VALUE+(x)])
299 #else
300 #define CLAMP(x) ((x)>255?255:((x)<0?0:x))
301 #endif
302 
303 
304 /* If the type or the command change, we rebuild the lookup table */
pwc_dec23_init(struct pwc_device * pwc,int type,unsigned char * cmd)305 int pwc_dec23_init(struct pwc_device *pwc, int type, unsigned char *cmd)
306 {
307 	int flags, version, shift, i;
308 	struct pwc_dec23_private *pdec;
309 
310 	if (pwc->decompress_data == NULL) {
311 		pdec = kmalloc(sizeof(struct pwc_dec23_private), GFP_KERNEL);
312 		if (pdec == NULL)
313 			return -ENOMEM;
314 		pwc->decompress_data = pdec;
315 	}
316 	pdec = pwc->decompress_data;
317 
318 	if (DEVICE_USE_CODEC3(type)) {
319 		flags = cmd[2] & 0x18;
320 		if (flags == 8)
321 			pdec->nbits = 7;	/* More bits, mean more bits to encode the stream, but better quality */
322 		else if (flags == 0x10)
323 			pdec->nbits = 8;
324 		else
325 			pdec->nbits = 6;
326 
327 		version = cmd[2] >> 5;
328 		build_table_color(KiaraRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1);
329 		build_table_color(KiaraRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2);
330 
331 	} else {
332 
333 		flags = cmd[2] & 6;
334 		if (flags == 2)
335 			pdec->nbits = 7;
336 		else if (flags == 4)
337 			pdec->nbits = 8;
338 		else
339 			pdec->nbits = 6;
340 
341 		version = cmd[2] >> 3;
342 		build_table_color(TimonRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1);
343 		build_table_color(TimonRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2);
344 	}
345 
346 	/* Informations can be coded on a variable number of bits but never less than 8 */
347 	shift = 8 - pdec->nbits;
348 	pdec->scalebits = SCALEBITS - shift;
349 	pdec->nbitsmask = 0xFF >> shift;
350 
351 	fill_table_dc00_d800(pdec);
352 	build_subblock_pattern(pdec);
353 	build_bit_powermask_table(pdec);
354 
355 #if USE_LOOKUP_TABLE_TO_CLAMP
356 	/* Build the static table to clamp value [0-255] */
357 	for (i=0;i<MAX_OUTER_CROP_VALUE;i++)
358 		pwc_crop_table[i] = 0;
359 	for (i=0; i<256; i++)
360 		pwc_crop_table[MAX_OUTER_CROP_VALUE+i] = i;
361 	for (i=0; i<MAX_OUTER_CROP_VALUE; i++)
362 		pwc_crop_table[MAX_OUTER_CROP_VALUE+256+i] = 255;
363 #endif
364 
365 	return 0;
366 }
367 
368 /*
369  * Copy the 4x4 image block to Y plane buffer
370  */
copy_image_block_Y(const int * src,unsigned char * dst,unsigned int bytes_per_line,unsigned int scalebits)371 static void copy_image_block_Y(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
372 {
373 #if UNROLL_LOOP_FOR_COPY
374 	const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
375 	const int *c = src;
376 	unsigned char *d = dst;
377 
378 	*d++ = cm[c[0] >> scalebits];
379 	*d++ = cm[c[1] >> scalebits];
380 	*d++ = cm[c[2] >> scalebits];
381 	*d++ = cm[c[3] >> scalebits];
382 
383 	d = dst + bytes_per_line;
384 	*d++ = cm[c[4] >> scalebits];
385 	*d++ = cm[c[5] >> scalebits];
386 	*d++ = cm[c[6] >> scalebits];
387 	*d++ = cm[c[7] >> scalebits];
388 
389 	d = dst + bytes_per_line*2;
390 	*d++ = cm[c[8] >> scalebits];
391 	*d++ = cm[c[9] >> scalebits];
392 	*d++ = cm[c[10] >> scalebits];
393 	*d++ = cm[c[11] >> scalebits];
394 
395 	d = dst + bytes_per_line*3;
396 	*d++ = cm[c[12] >> scalebits];
397 	*d++ = cm[c[13] >> scalebits];
398 	*d++ = cm[c[14] >> scalebits];
399 	*d++ = cm[c[15] >> scalebits];
400 #else
401 	int i;
402 	const int *c = src;
403 	unsigned char *d = dst;
404 	for (i = 0; i < 4; i++, c++)
405 		*d++ = CLAMP((*c) >> scalebits);
406 
407 	d = dst + bytes_per_line;
408 	for (i = 0; i < 4; i++, c++)
409 		*d++ = CLAMP((*c) >> scalebits);
410 
411 	d = dst + bytes_per_line*2;
412 	for (i = 0; i < 4; i++, c++)
413 		*d++ = CLAMP((*c) >> scalebits);
414 
415 	d = dst + bytes_per_line*3;
416 	for (i = 0; i < 4; i++, c++)
417 		*d++ = CLAMP((*c) >> scalebits);
418 #endif
419 }
420 
421 /*
422  * Copy the 4x4 image block to a CrCb plane buffer
423  *
424  */
copy_image_block_CrCb(const int * src,unsigned char * dst,unsigned int bytes_per_line,unsigned int scalebits)425 static void copy_image_block_CrCb(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
426 {
427 #if UNROLL_LOOP_FOR_COPY
428 	/* Unroll all loops */
429 	const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
430 	const int *c = src;
431 	unsigned char *d = dst;
432 
433 	*d++ = cm[c[0] >> scalebits];
434 	*d++ = cm[c[4] >> scalebits];
435 	*d++ = cm[c[1] >> scalebits];
436 	*d++ = cm[c[5] >> scalebits];
437 	*d++ = cm[c[2] >> scalebits];
438 	*d++ = cm[c[6] >> scalebits];
439 	*d++ = cm[c[3] >> scalebits];
440 	*d++ = cm[c[7] >> scalebits];
441 
442 	d = dst + bytes_per_line;
443 	*d++ = cm[c[12] >> scalebits];
444 	*d++ = cm[c[8] >> scalebits];
445 	*d++ = cm[c[13] >> scalebits];
446 	*d++ = cm[c[9] >> scalebits];
447 	*d++ = cm[c[14] >> scalebits];
448 	*d++ = cm[c[10] >> scalebits];
449 	*d++ = cm[c[15] >> scalebits];
450 	*d++ = cm[c[11] >> scalebits];
451 #else
452 	int i;
453 	const int *c1 = src;
454 	const int *c2 = src + 4;
455 	unsigned char *d = dst;
456 
457 	for (i = 0; i < 4; i++, c1++, c2++) {
458 		*d++ = CLAMP((*c1) >> scalebits);
459 		*d++ = CLAMP((*c2) >> scalebits);
460 	}
461 	c1 = src + 12;
462 	d = dst + bytes_per_line;
463 	for (i = 0; i < 4; i++, c1++, c2++) {
464 		*d++ = CLAMP((*c1) >> scalebits);
465 		*d++ = CLAMP((*c2) >> scalebits);
466 	}
467 #endif
468 }
469 
470 #if ENABLE_BAYER_DECODER
471 /*
472  * Format: 8x2 pixels
473  *   . G . G . G . G . G . G . G
474  *   . . . . . . . . . . . . . .
475  *   . G . G . G . G . G . G . G
476  *   . . . . . . . . . . . . . .
477  *   or
478  *   . . . . . . . . . . . . . .
479  *   G . G . G . G . G . G . G .
480  *   . . . . . . . . . . . . . .
481  *   G . G . G . G . G . G . G .
482 */
copy_image_block_Green(const int * src,unsigned char * dst,unsigned int bytes_per_line,unsigned int scalebits)483 static void copy_image_block_Green(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
484 {
485 #if UNROLL_LOOP_FOR_COPY
486 	/* Unroll all loops */
487 	const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
488 	unsigned char *d = dst;
489 	const int *c = src;
490 
491 	d[0] = cm[c[0] >> scalebits];
492 	d[2] = cm[c[1] >> scalebits];
493 	d[4] = cm[c[2] >> scalebits];
494 	d[6] = cm[c[3] >> scalebits];
495 	d[8] = cm[c[4] >> scalebits];
496 	d[10] = cm[c[5] >> scalebits];
497 	d[12] = cm[c[6] >> scalebits];
498 	d[14] = cm[c[7] >> scalebits];
499 
500 	d = dst + bytes_per_line;
501 	d[0] = cm[c[8] >> scalebits];
502 	d[2] = cm[c[9] >> scalebits];
503 	d[4] = cm[c[10] >> scalebits];
504 	d[6] = cm[c[11] >> scalebits];
505 	d[8] = cm[c[12] >> scalebits];
506 	d[10] = cm[c[13] >> scalebits];
507 	d[12] = cm[c[14] >> scalebits];
508 	d[14] = cm[c[15] >> scalebits];
509 #else
510 	int i;
511 	unsigned char *d;
512 	const int *c = src;
513 
514 	d = dst;
515 	for (i = 0; i < 8; i++, c++)
516 		d[i*2] = CLAMP((*c) >> scalebits);
517 
518 	d = dst + bytes_per_line;
519 	for (i = 0; i < 8; i++, c++)
520 		d[i*2] = CLAMP((*c) >> scalebits);
521 #endif
522 }
523 #endif
524 
525 #if ENABLE_BAYER_DECODER
526 /*
527  * Format: 4x4 pixels
528  *   R . R . R . R
529  *   . B . B . B .
530  *   R . R . R . R
531  *   . B . B . B .
532  */
copy_image_block_RedBlue(const int * src,unsigned char * dst,unsigned int bytes_per_line,unsigned int scalebits)533 static void copy_image_block_RedBlue(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits)
534 {
535 #if UNROLL_LOOP_FOR_COPY
536 	/* Unroll all loops */
537 	const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE;
538 	unsigned char *d = dst;
539 	const int *c = src;
540 
541 	d[0] = cm[c[0] >> scalebits];
542 	d[2] = cm[c[1] >> scalebits];
543 	d[4] = cm[c[2] >> scalebits];
544 	d[6] = cm[c[3] >> scalebits];
545 
546 	d = dst + bytes_per_line;
547 	d[1] = cm[c[4] >> scalebits];
548 	d[3] = cm[c[5] >> scalebits];
549 	d[5] = cm[c[6] >> scalebits];
550 	d[7] = cm[c[7] >> scalebits];
551 
552 	d = dst + bytes_per_line*2;
553 	d[0] = cm[c[8] >> scalebits];
554 	d[2] = cm[c[9] >> scalebits];
555 	d[4] = cm[c[10] >> scalebits];
556 	d[6] = cm[c[11] >> scalebits];
557 
558 	d = dst + bytes_per_line*3;
559 	d[1] = cm[c[12] >> scalebits];
560 	d[3] = cm[c[13] >> scalebits];
561 	d[5] = cm[c[14] >> scalebits];
562 	d[7] = cm[c[15] >> scalebits];
563 #else
564 	int i;
565 	unsigned char *d;
566 	const int *c = src;
567 
568 	d = dst;
569 	for (i = 0; i < 4; i++, c++)
570 		d[i*2] = CLAMP((*c) >> scalebits);
571 
572 	d = dst + bytes_per_line;
573 	for (i = 0; i < 4; i++, c++)
574 		d[i*2+1] = CLAMP((*c) >> scalebits);
575 
576 	d = dst + bytes_per_line*2;
577 	for (i = 0; i < 4; i++, c++)
578 		d[i*2] = CLAMP((*c) >> scalebits);
579 
580 	d = dst + bytes_per_line*3;
581 	for (i = 0; i < 4; i++, c++)
582 		d[i*2+1] = CLAMP((*c) >> scalebits);
583 #endif
584 }
585 #endif
586 
587 /*
588  * To manage the stream, we keep bits in a 32 bits register.
589  * fill_nbits(n): fill the reservoir with at least n bits
590  * skip_bits(n): discard n bits from the reservoir
591  * get_bits(n): fill the reservoir, returns the first n bits and discard the
592  *              bits from the reservoir.
593  * __get_nbits(n): faster version of get_bits(n), but asumes that the reservoir
594  *                 contains at least n bits. bits returned is discarded.
595  */
596 #define fill_nbits(pdec, nbits_wanted) do { \
597    while (pdec->nbits_in_reservoir<(nbits_wanted)) \
598     { \
599       pdec->reservoir |= (*(pdec->stream)++) << (pdec->nbits_in_reservoir); \
600       pdec->nbits_in_reservoir += 8; \
601     } \
602 }  while(0);
603 
604 #define skip_nbits(pdec, nbits_to_skip) do { \
605    pdec->reservoir >>= (nbits_to_skip); \
606    pdec->nbits_in_reservoir -= (nbits_to_skip); \
607 }  while(0);
608 
609 #define get_nbits(pdec, nbits_wanted, result) do { \
610    fill_nbits(pdec, nbits_wanted); \
611    result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \
612    skip_nbits(pdec, nbits_wanted); \
613 }  while(0);
614 
615 #define __get_nbits(pdec, nbits_wanted, result) do { \
616    result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \
617    skip_nbits(pdec, nbits_wanted); \
618 }  while(0);
619 
620 #define look_nbits(pdec, nbits_wanted) \
621    ((pdec->reservoir) & ((1U<<(nbits_wanted))-1))
622 
623 /*
624  * Decode a 4x4 pixel block
625  */
decode_block(struct pwc_dec23_private * pdec,const unsigned char * ptable0004,const unsigned char * ptable8004)626 static void decode_block(struct pwc_dec23_private *pdec,
627 			 const unsigned char *ptable0004,
628 			 const unsigned char *ptable8004)
629 {
630 	unsigned int primary_color;
631 	unsigned int channel_v, offset1, op;
632 	int i;
633 
634 	fill_nbits(pdec, 16);
635 	__get_nbits(pdec, pdec->nbits, primary_color);
636 
637 	if (look_nbits(pdec,2) == 0) {
638 		skip_nbits(pdec, 2);
639 		/* Very simple, the color is the same for all pixels of the square */
640 		for (i = 0; i < 16; i++)
641 			pdec->temp_colors[i] = pdec->table_dc00[primary_color];
642 
643 		return;
644 	}
645 
646 	/* This block is encoded with small pattern */
647 	for (i = 0; i < 16; i++)
648 		pdec->temp_colors[i] = pdec->table_d800[primary_color];
649 
650 	__get_nbits(pdec, 3, channel_v);
651 	channel_v = ((channel_v & 1) << 2) | (channel_v & 2) | ((channel_v & 4) >> 2);
652 
653 	ptable0004 += (channel_v * 128);
654 	ptable8004 += (channel_v * 32);
655 
656 	offset1 = 0;
657 	do
658 	{
659 		unsigned int htable_idx, rows = 0;
660 		const unsigned int *block;
661 
662 		/* [  zzzz y x x ]
663 		 *     xx == 00 :=> end of the block def, remove the two bits from the stream
664 		 *    yxx == 111
665 		 *    yxx == any other value
666 		 *
667 		 */
668 		fill_nbits(pdec, 16);
669 		htable_idx = look_nbits(pdec, 6);
670 		op = hash_table_ops[htable_idx * 4];
671 
672 		if (op == 2) {
673 			skip_nbits(pdec, 2);
674 
675 		} else if (op == 1) {
676 			/* 15bits [ xxxx xxxx yyyy 111 ]
677 			 * yyy => offset in the table8004
678 			 * xxx => offset in the tabled004 (tree)
679 			 */
680 			unsigned int mask, shift;
681 			unsigned int nbits, col1;
682 			unsigned int yyyy;
683 
684 			skip_nbits(pdec, 3);
685 			/* offset1 += yyyy */
686 			__get_nbits(pdec, 4, yyyy);
687 			offset1 += 1 + yyyy;
688 			offset1 &= 0x0F;
689 			nbits = ptable8004[offset1 * 2];
690 
691 			/* col1 = xxxx xxxx */
692 			__get_nbits(pdec, nbits+1, col1);
693 
694 			/* Bit mask table */
695 			mask = pdec->table_bitpowermask[nbits][col1];
696 			shift = ptable8004[offset1 * 2 + 1];
697 			rows = ((mask << shift) + 0x80) & 0xFF;
698 
699 			block = pdec->table_subblock[rows];
700 			for (i = 0; i < 16; i++)
701 				pdec->temp_colors[i] += block[MulIdx[offset1][i]];
702 
703 		} else {
704 			/* op == 0
705 			 * offset1 is coded on 3 bits
706 			 */
707 			unsigned int shift;
708 
709 			offset1 += hash_table_ops [htable_idx * 4 + 2];
710 			offset1 &= 0x0F;
711 
712 			rows = ptable0004[offset1 + hash_table_ops [htable_idx * 4 + 3]];
713 			block = pdec->table_subblock[rows];
714 			for (i = 0; i < 16; i++)
715 				pdec->temp_colors[i] += block[MulIdx[offset1][i]];
716 
717 			shift = hash_table_ops[htable_idx * 4 + 1];
718 			skip_nbits(pdec, shift);
719 		}
720 
721 	} while (op != 2);
722 
723 }
724 
DecompressBand23(struct pwc_dec23_private * pdec,const unsigned char * rawyuv,unsigned char * planar_y,unsigned char * planar_u,unsigned char * planar_v,unsigned int compressed_image_width,unsigned int real_image_width)725 static void DecompressBand23(struct pwc_dec23_private *pdec,
726 			     const unsigned char *rawyuv,
727 			     unsigned char *planar_y,
728 			     unsigned char *planar_u,
729 			     unsigned char *planar_v,
730 			     unsigned int   compressed_image_width,
731 			     unsigned int   real_image_width)
732 {
733 	int compression_index, nblocks;
734 	const unsigned char *ptable0004;
735 	const unsigned char *ptable8004;
736 
737 	pdec->reservoir = 0;
738 	pdec->nbits_in_reservoir = 0;
739 	pdec->stream = rawyuv + 1;	/* The first byte of the stream is skipped */
740 
741 	get_nbits(pdec, 4, compression_index);
742 
743 	/* pass 1: uncompress Y component */
744 	nblocks = compressed_image_width / 4;
745 
746 	ptable0004 = pdec->table_0004_pass1[compression_index];
747 	ptable8004 = pdec->table_8004_pass1[compression_index];
748 
749 	/* Each block decode a square of 4x4 */
750 	while (nblocks) {
751 		decode_block(pdec, ptable0004, ptable8004);
752 		copy_image_block_Y(pdec->temp_colors, planar_y, real_image_width, pdec->scalebits);
753 		planar_y += 4;
754 		nblocks--;
755 	}
756 
757 	/* pass 2: uncompress UV component */
758 	nblocks = compressed_image_width / 8;
759 
760 	ptable0004 = pdec->table_0004_pass2[compression_index];
761 	ptable8004 = pdec->table_8004_pass2[compression_index];
762 
763 	/* Each block decode a square of 4x4 */
764 	while (nblocks) {
765 		decode_block(pdec, ptable0004, ptable8004);
766 		copy_image_block_CrCb(pdec->temp_colors, planar_u, real_image_width/2, pdec->scalebits);
767 
768 		decode_block(pdec, ptable0004, ptable8004);
769 		copy_image_block_CrCb(pdec->temp_colors, planar_v, real_image_width/2, pdec->scalebits);
770 
771 		planar_v += 8;
772 		planar_u += 8;
773 		nblocks -= 2;
774 	}
775 
776 }
777 
778 #if ENABLE_BAYER_DECODER
779 /*
780  * Size need to be a multiple of 8 in width
781  *
782  * Return a block of four line encoded like this:
783  *
784  *   G R G R G R G R G R G R G R G R
785  *   B G B G B G B G B G B G B G B G
786  *   G R G R G R G R G R G R G R G R
787  *   B G B G B G B G B G B G B G B G
788  *
789  */
DecompressBandBayer(struct pwc_dec23_private * pdec,const unsigned char * rawyuv,unsigned char * rgbbayer,unsigned int compressed_image_width,unsigned int real_image_width)790 static void DecompressBandBayer(struct pwc_dec23_private *pdec,
791 				const unsigned char *rawyuv,
792 				unsigned char *rgbbayer,
793 				unsigned int   compressed_image_width,
794 				unsigned int   real_image_width)
795 {
796 	int compression_index, nblocks;
797 	const unsigned char *ptable0004;
798 	const unsigned char *ptable8004;
799 	unsigned char *dest;
800 
801 	pdec->reservoir = 0;
802 	pdec->nbits_in_reservoir = 0;
803 	pdec->stream = rawyuv + 1;	/* The first byte of the stream is skipped */
804 
805 	get_nbits(pdec, 4, compression_index);
806 
807 	/* pass 1: uncompress RB component */
808 	nblocks = compressed_image_width / 4;
809 
810 	ptable0004 = pdec->table_0004_pass1[compression_index];
811 	ptable8004 = pdec->table_8004_pass1[compression_index];
812 	dest = rgbbayer;
813 
814 	/* Each block decode a square of 4x4 */
815 	while (nblocks) {
816 		decode_block(pdec, ptable0004, ptable8004);
817 		copy_image_block_RedBlue(pdec->temp_colors, rgbbayer, real_image_width, pdec->scalebits);
818 		dest += 8;
819 		nblocks--;
820 	}
821 
822 	/* pass 2: uncompress G component */
823 	nblocks = compressed_image_width / 8;
824 
825 	ptable0004 = pdec->table_0004_pass2[compression_index];
826 	ptable8004 = pdec->table_8004_pass2[compression_index];
827 
828 	/* Each block decode a square of 4x4 */
829 	while (nblocks) {
830 		decode_block(pdec, ptable0004, ptable8004);
831 		copy_image_block_Green(pdec->temp_colors, rgbbayer+1, real_image_width, pdec->scalebits);
832 
833 		decode_block(pdec, ptable0004, ptable8004);
834 		copy_image_block_Green(pdec->temp_colors, rgbbayer+real_image_width, real_image_width, pdec->scalebits);
835 
836 		rgbbayer += 16;
837 		nblocks -= 2;
838 	}
839 }
840 #endif
841 
842 
843 /**
844  *
845  * Uncompress a pwc23 buffer.
846  *
847  * pwc.view: size of the image wanted
848  * pwc.image: size of the image returned by the camera
849  * pwc.offset: (x,y) to displayer image in the view
850  *
851  * src: raw data
852  * dst: image output
853  * flags: PWCX_FLAG_PLANAR or PWCX_FLAG_BAYER
854  */
pwc_dec23_decompress(const struct pwc_device * pwc,const void * src,void * dst,int flags)855 void pwc_dec23_decompress(const struct pwc_device *pwc,
856 			  const void *src,
857 			  void *dst,
858 			  int flags)
859 {
860 	int bandlines_left, stride, bytes_per_block;
861 
862 	bandlines_left = pwc->image.y / 4;
863 	bytes_per_block = pwc->view.x * 4;
864 
865 	if (flags & PWCX_FLAG_BAYER) {
866 #if ENABLE_BAYER_DECODER
867 		/* RGB Bayer format */
868 		unsigned char *rgbout;
869 
870 		stride = pwc->view.x * pwc->offset.y;
871 		rgbout = dst + stride + pwc->offset.x;
872 
873 
874 		while (bandlines_left--) {
875 
876 			DecompressBandBayer(pwc->decompress_data,
877 					    src,
878 					    rgbout,
879 					    pwc->image.x, pwc->view.x);
880 
881 			src += pwc->vbandlength;
882 			rgbout += bytes_per_block;
883 
884 		}
885 #else
886 		memset(dst, 0, pwc->view.x * pwc->view.y);
887 #endif
888 
889 	} else {
890 		/* YUV420P image format */
891 		unsigned char *pout_planar_y;
892 		unsigned char *pout_planar_u;
893 		unsigned char *pout_planar_v;
894 		unsigned int   plane_size;
895 
896 		plane_size = pwc->view.x * pwc->view.y;
897 
898 		/* offset in Y plane */
899 		stride = pwc->view.x * pwc->offset.y;
900 		pout_planar_y = dst + stride + pwc->offset.x;
901 
902 		/* offsets in U/V planes */
903 		stride = (pwc->view.x * pwc->offset.y) / 4 + pwc->offset.x / 2;
904 		pout_planar_u = dst + plane_size + stride;
905 		pout_planar_v = dst + plane_size + plane_size / 4 + stride;
906 
907 		while (bandlines_left--) {
908 
909 			DecompressBand23(pwc->decompress_data,
910 					 src,
911 					 pout_planar_y, pout_planar_u, pout_planar_v,
912 					 pwc->image.x, pwc->view.x);
913 			src += pwc->vbandlength;
914 			pout_planar_y += bytes_per_block;
915 			pout_planar_u += pwc->view.x;
916 			pout_planar_v += pwc->view.x;
917 
918 		}
919 
920 	}
921 
922 }
923 
pwc_dec23_exit(void)924 void pwc_dec23_exit(void)
925 {
926 	/* Do nothing */
927 
928 }
929 
930 /**
931  * Allocate a private structure used by lookup table.
932  * You must call kfree() to free the memory allocated.
933  */
pwc_dec23_alloc(struct pwc_device * pwc)934 int pwc_dec23_alloc(struct pwc_device *pwc)
935 {
936 	pwc->decompress_data = kmalloc(sizeof(struct pwc_dec23_private), GFP_KERNEL);
937 	if (pwc->decompress_data == NULL)
938 		return -ENOMEM;
939 	return 0;
940 }
941 
942 /* vim: set cino= formatoptions=croql cindent shiftwidth=8 tabstop=8: */
943